jglaser commited on
Commit
ed3b8f2
·
1 Parent(s): 1b09a0a

Add new SentenceTransformer model.

Browse files
Files changed (35) hide show
  1. .gitattributes +3 -0
  2. 0_Asym/140438115853120_Transformer/config.json +26 -0
  3. 0_Asym/140438115853120_Transformer/pytorch_model.bin +3 -0
  4. 0_Asym/140438115853120_Transformer/sentence_bert_config.json +4 -0
  5. 0_Asym/140438115853120_Transformer/special_tokens_map.json +1 -0
  6. 0_Asym/140438115853120_Transformer/tokenizer.json +197 -0
  7. 0_Asym/140438115853120_Transformer/tokenizer_config.json +1 -0
  8. 0_Asym/140438115853120_Transformer/vocab.txt +30 -0
  9. 0_Asym/140438119132512_Transformer/config.json +27 -0
  10. 0_Asym/140438119132512_Transformer/pytorch_model.bin +3 -0
  11. 0_Asym/140438119132512_Transformer/sentence_bert_config.json +4 -0
  12. 0_Asym/140438119132512_Transformer/special_tokens_map.json +1 -0
  13. 0_Asym/140438119132512_Transformer/tokenizer.json +364 -0
  14. 0_Asym/140438119132512_Transformer/tokenizer_config.json +1 -0
  15. 0_Asym/140438119132512_Transformer/vocab.txt +201 -0
  16. 0_Asym/140442694833872_Pooling/config.json +7 -0
  17. 0_Asym/140442694833968_Dense/config.json +1 -0
  18. 0_Asym/140442694833968_Dense/pytorch_model.bin +3 -0
  19. 0_Asym/140442694834016_Pooling/config.json +7 -0
  20. 0_Asym/140442694834112_Dense/config.json +1 -0
  21. 0_Asym/140442694834112_Dense/pytorch_model.bin +3 -0
  22. 0_Asym/config.json +25 -0
  23. 1_Dense/config.json +1 -0
  24. 1_Dense/pytorch_model.bin +3 -0
  25. 2_Dense/config.json +1 -0
  26. 2_Dense/pytorch_model.bin +3 -0
  27. 3_Dense/config.json +1 -0
  28. 3_Dense/pytorch_model.bin +3 -0
  29. 4_Dense/config.json +1 -0
  30. 4_Dense/pytorch_model.bin +3 -0
  31. 5_Dense/config.json +1 -0
  32. 5_Dense/pytorch_model.bin +3 -0
  33. README.md +65 -0
  34. config_sentence_transformers.json +7 -0
  35. modules.json +38 -0
.gitattributes CHANGED
@@ -25,3 +25,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ 0_Asym/140438115853120_Transformer/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
29
+ 0_Asym/140438119132512_Transformer/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
30
+ 1_Dense/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
0_Asym/140438115853120_Transformer/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/netdisk/xvg/seq_1",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0,
10
+ "hidden_size": 1024,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4096,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 40000,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 30,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "tokenizer_class": "BertTokenizerFast",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.17.0.dev0",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30
26
+ }
0_Asym/140438115853120_Transformer/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcccb96489c856b161561a9be4f77e0f87f970d4448784bbb233c1eab7eebfd9
3
+ size 1680230449
0_Asym/140438115853120_Transformer/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 2048,
3
+ "do_lower_case": false
4
+ }
0_Asym/140438115853120_Transformer/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
0_Asym/140438115853120_Transformer/tokenizer.json ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "special": true,
9
+ "content": "[PAD]",
10
+ "single_word": false,
11
+ "lstrip": false,
12
+ "rstrip": false,
13
+ "normalized": false
14
+ },
15
+ {
16
+ "id": 1,
17
+ "special": true,
18
+ "content": "[UNK]",
19
+ "single_word": false,
20
+ "lstrip": false,
21
+ "rstrip": false,
22
+ "normalized": false
23
+ },
24
+ {
25
+ "id": 2,
26
+ "special": true,
27
+ "content": "[CLS]",
28
+ "single_word": false,
29
+ "lstrip": false,
30
+ "rstrip": false,
31
+ "normalized": false
32
+ },
33
+ {
34
+ "id": 3,
35
+ "special": true,
36
+ "content": "[SEP]",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": false
41
+ },
42
+ {
43
+ "id": 4,
44
+ "special": true,
45
+ "content": "[MASK]",
46
+ "single_word": false,
47
+ "lstrip": false,
48
+ "rstrip": false,
49
+ "normalized": false
50
+ }
51
+ ],
52
+ "normalizer": {
53
+ "type": "Sequence",
54
+ "normalizers": [
55
+ {
56
+ "type": "Replace",
57
+ "pattern": {
58
+ "Regex": "[UZOB]"
59
+ },
60
+ "content": "X"
61
+ },
62
+ {
63
+ "type": "Replace",
64
+ "pattern": {
65
+ "Regex": "\\s"
66
+ },
67
+ "content": ""
68
+ }
69
+ ]
70
+ },
71
+ "pre_tokenizer": {
72
+ "type": "Split",
73
+ "pattern": {
74
+ "Regex": ""
75
+ },
76
+ "behavior": "Isolated",
77
+ "invert": false
78
+ },
79
+ "post_processor": {
80
+ "type": "TemplateProcessing",
81
+ "single": [
82
+ {
83
+ "SpecialToken": {
84
+ "id": "[CLS]",
85
+ "type_id": 0
86
+ }
87
+ },
88
+ {
89
+ "Sequence": {
90
+ "id": "A",
91
+ "type_id": 0
92
+ }
93
+ },
94
+ {
95
+ "SpecialToken": {
96
+ "id": "[SEP]",
97
+ "type_id": 0
98
+ }
99
+ }
100
+ ],
101
+ "pair": [
102
+ {
103
+ "SpecialToken": {
104
+ "id": "[CLS]",
105
+ "type_id": 0
106
+ }
107
+ },
108
+ {
109
+ "Sequence": {
110
+ "id": "A",
111
+ "type_id": 0
112
+ }
113
+ },
114
+ {
115
+ "SpecialToken": {
116
+ "id": "[SEP]",
117
+ "type_id": 0
118
+ }
119
+ },
120
+ {
121
+ "Sequence": {
122
+ "id": "B",
123
+ "type_id": 1
124
+ }
125
+ },
126
+ {
127
+ "SpecialToken": {
128
+ "id": "[SEP]",
129
+ "type_id": 1
130
+ }
131
+ }
132
+ ],
133
+ "special_tokens": {
134
+ "[CLS]": {
135
+ "id": "[CLS]",
136
+ "ids": [
137
+ 2
138
+ ],
139
+ "tokens": [
140
+ "[CLS]"
141
+ ]
142
+ },
143
+ "[SEP]": {
144
+ "id": "[SEP]",
145
+ "ids": [
146
+ 3
147
+ ],
148
+ "tokens": [
149
+ "[SEP]"
150
+ ]
151
+ }
152
+ }
153
+ },
154
+ "decoder": {
155
+ "type": "WordPiece",
156
+ "prefix": "##",
157
+ "cleanup": true
158
+ },
159
+ "model": {
160
+ "type": "WordPiece",
161
+ "unk_token": "[UNK]",
162
+ "continuing_subword_prefix": "##",
163
+ "max_input_chars_per_word": 100,
164
+ "vocab": {
165
+ "[PAD]": 0,
166
+ "[UNK]": 1,
167
+ "[CLS]": 2,
168
+ "[SEP]": 3,
169
+ "[MASK]": 4,
170
+ "L": 5,
171
+ "A": 6,
172
+ "G": 7,
173
+ "V": 8,
174
+ "E": 9,
175
+ "S": 10,
176
+ "I": 11,
177
+ "K": 12,
178
+ "R": 13,
179
+ "D": 14,
180
+ "T": 15,
181
+ "P": 16,
182
+ "N": 17,
183
+ "Q": 18,
184
+ "F": 19,
185
+ "Y": 20,
186
+ "M": 21,
187
+ "H": 22,
188
+ "C": 23,
189
+ "W": 24,
190
+ "X": 25,
191
+ "U": 26,
192
+ "B": 27,
193
+ "Z": 28,
194
+ "O": 29
195
+ }
196
+ }
197
+ }
0_Asym/140438115853120_Transformer/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "full_tokenizer_file": null, "name_or_path": "/netdisk/xvg/seq_tokenizer", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
0_Asym/140438115853120_Transformer/vocab.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [PAD]
2
+ [UNK]
3
+ [CLS]
4
+ [SEP]
5
+ [MASK]
6
+ L
7
+ A
8
+ G
9
+ V
10
+ E
11
+ S
12
+ I
13
+ K
14
+ R
15
+ D
16
+ T
17
+ P
18
+ N
19
+ Q
20
+ F
21
+ Y
22
+ M
23
+ H
24
+ C
25
+ W
26
+ X
27
+ U
28
+ B
29
+ Z
30
+ O
0_Asym/140438119132512_Transformer/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/netdisk/xvg/smiles_1",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "tokenizer_class": "BertTokenizerFast",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.17.0.dev0",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30000
27
+ }
0_Asym/140438119132512_Transformer/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:831cd69e183a06d9c7435e60c2b1bf2b50f5ca406a1cd6ff913517fdff7c8dae
3
+ size 436404785
0_Asym/140438119132512_Transformer/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
0_Asym/140438119132512_Transformer/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
0_Asym/140438119132512_Transformer/tokenizer.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "special": true,
9
+ "content": "[PAD]",
10
+ "single_word": false,
11
+ "lstrip": false,
12
+ "rstrip": false,
13
+ "normalized": false
14
+ },
15
+ {
16
+ "id": 1,
17
+ "special": true,
18
+ "content": "[UNK]",
19
+ "single_word": false,
20
+ "lstrip": false,
21
+ "rstrip": false,
22
+ "normalized": false
23
+ },
24
+ {
25
+ "id": 2,
26
+ "special": true,
27
+ "content": "[CLS]",
28
+ "single_word": false,
29
+ "lstrip": false,
30
+ "rstrip": false,
31
+ "normalized": false
32
+ },
33
+ {
34
+ "id": 3,
35
+ "special": true,
36
+ "content": "[SEP]",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": false
41
+ },
42
+ {
43
+ "id": 4,
44
+ "special": true,
45
+ "content": "[MASK]",
46
+ "single_word": false,
47
+ "lstrip": false,
48
+ "rstrip": false,
49
+ "normalized": false
50
+ }
51
+ ],
52
+ "normalizer": {
53
+ "type": "BertNormalizer",
54
+ "clean_text": true,
55
+ "handle_chinese_chars": true,
56
+ "strip_accents": true,
57
+ "lowercase": false
58
+ },
59
+ "pre_tokenizer": {
60
+ "type": "Sequence",
61
+ "pretokenizers": [
62
+ {
63
+ "type": "WhitespaceSplit"
64
+ },
65
+ {
66
+ "type": "Split",
67
+ "pattern": {
68
+ "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])"
69
+ },
70
+ "behavior": "Isolated",
71
+ "invert": false
72
+ }
73
+ ]
74
+ },
75
+ "post_processor": {
76
+ "type": "TemplateProcessing",
77
+ "single": [
78
+ {
79
+ "SpecialToken": {
80
+ "id": "[CLS]",
81
+ "type_id": 0
82
+ }
83
+ },
84
+ {
85
+ "Sequence": {
86
+ "id": "A",
87
+ "type_id": 0
88
+ }
89
+ },
90
+ {
91
+ "SpecialToken": {
92
+ "id": "[SEP]",
93
+ "type_id": 0
94
+ }
95
+ }
96
+ ],
97
+ "pair": [
98
+ {
99
+ "SpecialToken": {
100
+ "id": "[CLS]",
101
+ "type_id": 0
102
+ }
103
+ },
104
+ {
105
+ "Sequence": {
106
+ "id": "A",
107
+ "type_id": 0
108
+ }
109
+ },
110
+ {
111
+ "SpecialToken": {
112
+ "id": "[SEP]",
113
+ "type_id": 0
114
+ }
115
+ },
116
+ {
117
+ "Sequence": {
118
+ "id": "B",
119
+ "type_id": 1
120
+ }
121
+ },
122
+ {
123
+ "SpecialToken": {
124
+ "id": "[SEP]",
125
+ "type_id": 1
126
+ }
127
+ }
128
+ ],
129
+ "special_tokens": {
130
+ "[CLS]": {
131
+ "id": "[CLS]",
132
+ "ids": [
133
+ 2
134
+ ],
135
+ "tokens": [
136
+ "[CLS]"
137
+ ]
138
+ },
139
+ "[SEP]": {
140
+ "id": "[SEP]",
141
+ "ids": [
142
+ 3
143
+ ],
144
+ "tokens": [
145
+ "[SEP]"
146
+ ]
147
+ }
148
+ }
149
+ },
150
+ "decoder": {
151
+ "type": "WordPiece",
152
+ "prefix": "##",
153
+ "cleanup": true
154
+ },
155
+ "model": {
156
+ "type": "WordPiece",
157
+ "unk_token": "[UNK]",
158
+ "continuing_subword_prefix": "##",
159
+ "max_input_chars_per_word": 100,
160
+ "vocab": {
161
+ "[PAD]": 0,
162
+ "[UNK]": 1,
163
+ "[CLS]": 2,
164
+ "[SEP]": 3,
165
+ "[MASK]": 4,
166
+ "=": 5,
167
+ "F": 6,
168
+ "N": 7,
169
+ "1": 8,
170
+ "n": 9,
171
+ "O": 10,
172
+ "C": 11,
173
+ "S": 12,
174
+ "c": 13,
175
+ "3": 14,
176
+ "Cl": 15,
177
+ "o": 16,
178
+ "[nH]": 17,
179
+ "s": 18,
180
+ "-": 19,
181
+ "2": 20,
182
+ "4": 21,
183
+ "[C@H]": 22,
184
+ ")": 23,
185
+ "(": 24,
186
+ "Br": 25,
187
+ "#": 26,
188
+ "[C@@H]": 27,
189
+ "[C@]": 28,
190
+ "[C@@]": 29,
191
+ "[O-]": 30,
192
+ "[N+]": 31,
193
+ "[C]": 32,
194
+ "I": 33,
195
+ "5": 34,
196
+ "[C-]": 35,
197
+ "[CH]": 36,
198
+ "/": 37,
199
+ "[S@]": 38,
200
+ "[S@@]": 39,
201
+ "[SH]": 40,
202
+ "[Si]": 41,
203
+ "[n+]": 42,
204
+ "[C+]": 43,
205
+ "[S@H]": 44,
206
+ "[Fe]": 45,
207
+ "[S@@H]": 46,
208
+ "B": 47,
209
+ "[O]": 48,
210
+ "[S-]": 49,
211
+ "[P@H]": 50,
212
+ "[PH]": 51,
213
+ "6": 52,
214
+ "[O+]": 53,
215
+ "[P@@H]": 54,
216
+ "[NH+]": 55,
217
+ "[S]": 56,
218
+ "\\": 57,
219
+ "[P@]": 58,
220
+ "[P@@]": 59,
221
+ "[N]": 60,
222
+ "P": 61,
223
+ "[S+]": 62,
224
+ "[P]": 63,
225
+ "[IH]": 64,
226
+ "[se]": 65,
227
+ "[C@+]": 66,
228
+ "[Si@]": 67,
229
+ "[c-]": 68,
230
+ "7": 69,
231
+ "[C@-]": 70,
232
+ "[Si@@]": 71,
233
+ "[Se]": 72,
234
+ "[Si@H]": 73,
235
+ "[S@+]": 74,
236
+ "[N-]": 75,
237
+ "[CnH]": 76,
238
+ "[c+]": 77,
239
+ "[P+]": 78,
240
+ "[Si@@H]": 79,
241
+ "[SiH]": 80,
242
+ "[P-]": 81,
243
+ "[I]": 82,
244
+ "[S@-]": 83,
245
+ "[CH+]": 84,
246
+ "[C@H+]": 85,
247
+ "[I+]": 86,
248
+ "[C@@-]": 87,
249
+ "8": 88,
250
+ "[Si-]": 89,
251
+ "[C@@H+]": 90,
252
+ "[I-]": 91,
253
+ "[CH-]": 92,
254
+ "[P@+]": 93,
255
+ "[Cn]": 94,
256
+ "[C@@+]": 95,
257
+ "[SnH]": 96,
258
+ "[Se@]": 97,
259
+ "[S@@+]": 98,
260
+ "[Se@@]": 99,
261
+ "[Si+]": 100,
262
+ "9": 101,
263
+ "[InH]": 102,
264
+ "[Ce]": 103,
265
+ "[I@@]": 104,
266
+ "[P@-]": 105,
267
+ "[Se-]": 106,
268
+ "[Sc]": 107,
269
+ "[SH+]": 108,
270
+ "[I@@H]": 109,
271
+ "[I@H]": 110,
272
+ "[c]": 111,
273
+ "[SH-]": 112,
274
+ "[N@]": 113,
275
+ "[I@]": 114,
276
+ "[N@@]": 115,
277
+ "[P@@-]": 116,
278
+ "%10": 117,
279
+ "[nH+]": 118,
280
+ "[Ne]": 119,
281
+ "[Si@+]": 120,
282
+ "[Cl+]": 121,
283
+ "[Br+]": 122,
284
+ "[N@H+]": 123,
285
+ "[S@@-]": 124,
286
+ "[N@+]": 125,
287
+ "[n-]": 126,
288
+ "[NH2+]": 127,
289
+ "[B]": 128,
290
+ "[Cn+]": 129,
291
+ "[N@@H+]": 130,
292
+ "[s+]": 131,
293
+ "[si]": 132,
294
+ "[N@@+]": 133,
295
+ "p": 134,
296
+ "[P@@+]": 135,
297
+ "[Sc@H]": 136,
298
+ "[cH+]": 137,
299
+ "[ScH]": 138,
300
+ "[Fe+]": 139,
301
+ "[o+]": 140,
302
+ "[cH-]": 141,
303
+ "[NH]": 142,
304
+ "[Se+]": 143,
305
+ "[Fe-]": 144,
306
+ "[I@+]": 145,
307
+ "[Sc@]": 146,
308
+ "[B-]": 147,
309
+ "[SeH]": 148,
310
+ "[Se@+]": 149,
311
+ "[FeH]": 150,
312
+ "[Fe@@]": 151,
313
+ "[Cn-]": 152,
314
+ "[S@@H+]": 153,
315
+ "%11": 154,
316
+ "[S@H-]": 155,
317
+ "[S@@H-]": 156,
318
+ "[CH2-]": 157,
319
+ "[CnH-]": 158,
320
+ "[In]": 159,
321
+ "[S@H+]": 160,
322
+ "[pH]": 161,
323
+ "[PH+]": 162,
324
+ "[Fe@@H]": 163,
325
+ "[PH-]": 164,
326
+ "b": 165,
327
+ "[Si@-]": 166,
328
+ "[si-]": 167,
329
+ "[sH+]": 168,
330
+ "[Fe@H]": 169,
331
+ "[P@@H-]": 170,
332
+ "[Sn+]": 171,
333
+ "[P@@H+]": 172,
334
+ "[P@H+]": 173,
335
+ "[P@H-]": 174,
336
+ "[Se@H]": 175,
337
+ "[Se@-]": 176,
338
+ "[Sc@@H]": 177,
339
+ "[I@-]": 178,
340
+ "[Sn]": 179,
341
+ "[IH-]": 180,
342
+ "[IH+]": 181,
343
+ "[F+]": 182,
344
+ "[Fe@]": 183,
345
+ "[Sn@]": 184,
346
+ "[CnH+]": 185,
347
+ "[Si@H-]": 186,
348
+ "[NH-]": 187,
349
+ "[Si@H+]": 188,
350
+ "[Sc-]": 189,
351
+ "[Se@@H]": 190,
352
+ "[Sn-]": 191,
353
+ "[se+]": 192,
354
+ "[Si@@+]": 193,
355
+ "[SiH+]": 194,
356
+ "[PH2-]": 195,
357
+ "[CeH]": 196,
358
+ "[SH3]": 197,
359
+ "[CH2]": 198,
360
+ "[Sc+]": 199,
361
+ "[Si@@-]": 200
362
+ }
363
+ }
364
+ }
0_Asym/140438119132512_Transformer/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": true, "model_max_length": 512, "name_or_path": "/netdisk/xvg/smiles_tokenizer", "model_type": "bert", "special_tokens_map_file": "/home/xvg/affinity_pred/train/tokenizer_regex/tokenizer/special_tokens_map.json", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
0_Asym/140438119132512_Transformer/vocab.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [PAD]
2
+ [UNK]
3
+ [CLS]
4
+ [SEP]
5
+ [MASK]
6
+ =
7
+ F
8
+ N
9
+ 1
10
+ n
11
+ O
12
+ C
13
+ S
14
+ c
15
+ 3
16
+ Cl
17
+ o
18
+ [nH]
19
+ s
20
+ -
21
+ 2
22
+ 4
23
+ [C@H]
24
+ )
25
+ (
26
+ Br
27
+ #
28
+ [C@@H]
29
+ [C@]
30
+ [C@@]
31
+ [O-]
32
+ [N+]
33
+ [C]
34
+ I
35
+ 5
36
+ [C-]
37
+ [CH]
38
+ /
39
+ [S@]
40
+ [S@@]
41
+ [SH]
42
+ [Si]
43
+ [n+]
44
+ [C+]
45
+ [S@H]
46
+ [Fe]
47
+ [S@@H]
48
+ B
49
+ [O]
50
+ [S-]
51
+ [P@H]
52
+ [PH]
53
+ 6
54
+ [O+]
55
+ [P@@H]
56
+ [NH+]
57
+ [S]
58
+ \
59
+ [P@]
60
+ [P@@]
61
+ [N]
62
+ P
63
+ [S+]
64
+ [P]
65
+ [IH]
66
+ [se]
67
+ [C@+]
68
+ [Si@]
69
+ [c-]
70
+ 7
71
+ [C@-]
72
+ [Si@@]
73
+ [Se]
74
+ [Si@H]
75
+ [S@+]
76
+ [N-]
77
+ [CnH]
78
+ [c+]
79
+ [P+]
80
+ [Si@@H]
81
+ [SiH]
82
+ [P-]
83
+ [I]
84
+ [S@-]
85
+ [CH+]
86
+ [C@H+]
87
+ [I+]
88
+ [C@@-]
89
+ 8
90
+ [Si-]
91
+ [C@@H+]
92
+ [I-]
93
+ [CH-]
94
+ [P@+]
95
+ [Cn]
96
+ [C@@+]
97
+ [SnH]
98
+ [Se@]
99
+ [S@@+]
100
+ [Se@@]
101
+ [Si+]
102
+ 9
103
+ [InH]
104
+ [Ce]
105
+ [I@@]
106
+ [P@-]
107
+ [Se-]
108
+ [Sc]
109
+ [SH+]
110
+ [I@@H]
111
+ [I@H]
112
+ [c]
113
+ [SH-]
114
+ [N@]
115
+ [I@]
116
+ [N@@]
117
+ [P@@-]
118
+ %10
119
+ [nH+]
120
+ [Ne]
121
+ [Si@+]
122
+ [Cl+]
123
+ [Br+]
124
+ [N@H+]
125
+ [S@@-]
126
+ [N@+]
127
+ [n-]
128
+ [NH2+]
129
+ [B]
130
+ [Cn+]
131
+ [N@@H+]
132
+ [s+]
133
+ [si]
134
+ [N@@+]
135
+ p
136
+ [P@@+]
137
+ [Sc@H]
138
+ [cH+]
139
+ [ScH]
140
+ [Fe+]
141
+ [o+]
142
+ [cH-]
143
+ [NH]
144
+ [Se+]
145
+ [Fe-]
146
+ [I@+]
147
+ [Sc@]
148
+ [B-]
149
+ [SeH]
150
+ [Se@+]
151
+ [FeH]
152
+ [Fe@@]
153
+ [Cn-]
154
+ [S@@H+]
155
+ %11
156
+ [S@H-]
157
+ [S@@H-]
158
+ [CH2-]
159
+ [CnH-]
160
+ [In]
161
+ [S@H+]
162
+ [pH]
163
+ [PH+]
164
+ [Fe@@H]
165
+ [PH-]
166
+ b
167
+ [Si@-]
168
+ [si-]
169
+ [sH+]
170
+ [Fe@H]
171
+ [P@@H-]
172
+ [Sn+]
173
+ [P@@H+]
174
+ [P@H+]
175
+ [P@H-]
176
+ [Se@H]
177
+ [Se@-]
178
+ [Sc@@H]
179
+ [I@-]
180
+ [Sn]
181
+ [IH-]
182
+ [IH+]
183
+ [F+]
184
+ [Fe@]
185
+ [Sn@]
186
+ [CnH+]
187
+ [Si@H-]
188
+ [NH-]
189
+ [Si@H+]
190
+ [Sc-]
191
+ [Se@@H]
192
+ [Sn-]
193
+ [se+]
194
+ [Si@@+]
195
+ [SiH+]
196
+ [PH2-]
197
+ [CeH]
198
+ [SH3]
199
+ [CH2]
200
+ [Sc+]
201
+ [Si@@-]
0_Asym/140442694833872_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
0_Asym/140442694833968_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1024, "out_features": 1024, "bias": true, "activation_function": "torch.nn.modules.activation.Tanh"}
0_Asym/140442694833968_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2112cfc89b0cd85577f83263f374d84ee1e769934ac06fcff38f5fcfea09a3dc
3
+ size 4199463
0_Asym/140442694834016_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
0_Asym/140442694834112_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 768, "out_features": 768, "bias": true, "activation_function": "torch.nn.modules.activation.Tanh"}
0_Asym/140442694834112_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1faf07d8b923bbd985e3784ae89adc67e3e1b2763b35ec7dd7ee5185b2a0115
3
+ size 2363431
0_Asym/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "types": {
3
+ "140438115853120_Transformer": "sentence_transformers.models.Transformer",
4
+ "140442694833872_Pooling": "sentence_transformers.models.Pooling",
5
+ "140442694833968_Dense": "sentence_transformers.models.Dense",
6
+ "140438119132512_Transformer": "sentence_transformers.models.Transformer",
7
+ "140442694834016_Pooling": "sentence_transformers.models.Pooling",
8
+ "140442694834112_Dense": "sentence_transformers.models.Dense"
9
+ },
10
+ "structure": {
11
+ "protein": [
12
+ "140438115853120_Transformer",
13
+ "140442694833872_Pooling",
14
+ "140442694833968_Dense"
15
+ ],
16
+ "ligand": [
17
+ "140438119132512_Transformer",
18
+ "140442694834016_Pooling",
19
+ "140442694834112_Dense"
20
+ ]
21
+ },
22
+ "parameters": {
23
+ "allow_empty_key": true
24
+ }
25
+ }
1_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1792, "out_features": 1000, "bias": true, "activation_function": "torch.nn.modules.activation.GELU"}
1_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03baa628ca6617bf932ce2333a237369df8bc199a37966f6bbf00e0199134a81
3
+ size 7173031
2_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1000, "out_features": 1000, "bias": true, "activation_function": "torch.nn.modules.activation.GELU"}
2_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785c5f35f21b156f9ec22718c2c992fa2d7b99fd274186af4bb102345cc0a8d9
3
+ size 4005031
3_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1000, "out_features": 1000, "bias": true, "activation_function": "torch.nn.modules.activation.GELU"}
3_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef917effce01462ffc6d989b53bb3987b60a2d724788037f38b1bed8ec5be15f
3
+ size 4005031
4_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1000, "out_features": 1, "bias": true, "activation_function": "torch.nn.modules.linear.Identity"}
4_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf4f3597e6ede01973f5a82ee16c2ee4d0dfac42f7eac52399faf4ef585b4b3
3
+ size 5095
5_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1, "out_features": 1, "bias": true, "activation_function": "torch.nn.modules.linear.Identity"}
5_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3c35313827b4d38e21bbf665cd5b9e2b261d24f4d9eafb31fe19a3b7f089c36
3
+ size 1127
README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: sentence-similarity
3
+ tags:
4
+ - sentence-transformers
5
+ - feature-extraction
6
+ - sentence-similarity
7
+ ---
8
+
9
+ # jglaser/protein-ligand-mlp-1
10
+
11
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 1 dimensional dense vector space and can be used for tasks like clustering or semantic search.
12
+
13
+ <!--- Describe your model here -->
14
+
15
+ ## Usage (Sentence-Transformers)
16
+
17
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
18
+
19
+ ```
20
+ pip install -U sentence-transformers
21
+ ```
22
+
23
+ Then you can use the model like this:
24
+
25
+ ```python
26
+ from sentence_transformers import SentenceTransformer
27
+ sentences = ["This is an example sentence", "Each sentence is converted"]
28
+
29
+ model = SentenceTransformer('jglaser/protein-ligand-mlp-1')
30
+ embeddings = model.encode(sentences)
31
+ print(embeddings)
32
+ ```
33
+
34
+
35
+
36
+ ## Evaluation Results
37
+
38
+ <!--- Describe how your model was evaluated -->
39
+
40
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=jglaser/protein-ligand-mlp-1)
41
+
42
+
43
+
44
+ ## Full Model Architecture
45
+ ```
46
+ SentenceTransformer(
47
+ (0): Asym(
48
+ (protein-0): Transformer({'max_seq_length': 2048, 'do_lower_case': False}) with Transformer model: BertModel
49
+ (protein-1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
50
+ (protein-2): Dense({'in_features': 1024, 'out_features': 1024, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
51
+ (ligand-0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
52
+ (ligand-1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
53
+ (ligand-2): Dense({'in_features': 768, 'out_features': 768, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
54
+ )
55
+ (1): Dense({'in_features': 1792, 'out_features': 1000, 'bias': True, 'activation_function': 'torch.nn.modules.activation.GELU'})
56
+ (2): Dense({'in_features': 1000, 'out_features': 1000, 'bias': True, 'activation_function': 'torch.nn.modules.activation.GELU'})
57
+ (3): Dense({'in_features': 1000, 'out_features': 1000, 'bias': True, 'activation_function': 'torch.nn.modules.activation.GELU'})
58
+ (4): Dense({'in_features': 1000, 'out_features': 1, 'bias': True, 'activation_function': 'torch.nn.modules.linear.Identity'})
59
+ (5): Dense({'in_features': 1, 'out_features': 1, 'bias': True, 'activation_function': 'torch.nn.modules.linear.Identity'})
60
+ )
61
+ ```
62
+
63
+ ## Citing & Authors
64
+
65
+ <!--- Describe where people can find more information -->
config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.0",
4
+ "transformers": "4.17.0.dev0",
5
+ "pytorch": "1.10.2"
6
+ }
7
+ }
modules.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "0_Asym",
6
+ "type": "sentence_transformers.models.Asym"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Dense",
12
+ "type": "sentence_transformers.models.Dense"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
+ },
20
+ {
21
+ "idx": 3,
22
+ "name": "3",
23
+ "path": "3_Dense",
24
+ "type": "sentence_transformers.models.Dense"
25
+ },
26
+ {
27
+ "idx": 4,
28
+ "name": "4",
29
+ "path": "4_Dense",
30
+ "type": "sentence_transformers.models.Dense"
31
+ },
32
+ {
33
+ "idx": 5,
34
+ "name": "5",
35
+ "path": "5_Dense",
36
+ "type": "sentence_transformers.models.Dense"
37
+ }
38
+ ]