Muennighoff commited on
Commit
1e82af0
1 Parent(s): fa15541

Better model with bs=256

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ For usage instructions, refer to our codebase: https://github.com/Muennighoff/sg
14
 
15
  ## Evaluation Results
16
 
17
- For eval results, refer to our paper: https://arxiv.org/abs/2202.08904
18
 
19
 
20
  ## Training
@@ -70,4 +70,4 @@ SentenceTransformer(
70
  journal={arXiv preprint arXiv:2202.08904},
71
  year={2022}
72
  }
73
- ```
 
14
 
15
  ## Evaluation Results
16
 
17
+ For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
18
 
19
 
20
  ## Training
 
70
  journal={arXiv preprint arXiv:2202.08904},
71
  year={2022}
72
  }
73
+ ```
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"{SOS}": 50258, "[SOS]": 50257}
 
1
+ {"[SOS]": 50257, "{SOS}": 50258}
config.json CHANGED
@@ -47,7 +47,7 @@
47
  "summary_type": "cls_index",
48
  "summary_use_proj": true,
49
  "torch_dtype": "float32",
50
- "transformers_version": "4.11.3",
51
  "use_cache": true,
52
  "vocab_size": 50259,
53
  "window_size": 256
 
47
  "summary_type": "cls_index",
48
  "summary_use_proj": true,
49
  "torch_dtype": "float32",
50
+ "transformers_version": "4.20.0.dev0",
51
  "use_cache": true,
52
  "vocab_size": 50259,
53
  "window_size": 256
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
- "transformers": "4.11.3",
5
- "pytorch": "1.10.1"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
+ "transformers": "4.20.0.dev0",
5
+ "pytorch": "1.10.2"
6
  }
7
  }
eval/beir.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "SGPT-125M-weightedmean-msmarco-specb-bitfit": {
3
+ "bioasq": {
4
+ "NDCG@1": 0.316,
5
+ "NDCG@3": 0.28125,
6
+ "NDCG@5": 0.27137,
7
+ "NDCG@10": 0.27215,
8
+ "NDCG@100": 0.3218,
9
+ "NDCG@1000": 0.35137
10
+ },
11
+ "nq": {
12
+ "NDCG@1": 0.17439,
13
+ "NDCG@3": 0.23938,
14
+ "NDCG@5": 0.26684,
15
+ "NDCG@10": 0.29726,
16
+ "NDCG@100": 0.34802,
17
+ "NDCG@1000": 0.36888
18
+ },
19
+ "dbpedia-entity": {
20
+ "NDCG@1": 0.31875,
21
+ "NDCG@3": 0.25912,
22
+ "NDCG@5": 0.24217,
23
+ "NDCG@10": 0.22673,
24
+ "NDCG@100": 0.24325,
25
+ "NDCG@1000": 0.29203
26
+ },
27
+ "hotpotqa": {
28
+ "NDCG@1": 0.51101,
29
+ "NDCG@3": 0.37672,
30
+ "NDCG@5": 0.39295,
31
+ "NDCG@10": 0.4089,
32
+ "NDCG@100": 0.43765,
33
+ "NDCG@1000": 0.45428
34
+ },
35
+ "fever": {
36
+ "NDCG@1": 0.4703,
37
+ "NDCG@3": 0.55269,
38
+ "NDCG@5": 0.58116,
39
+ "NDCG@10": 0.60497,
40
+ "NDCG@100": 0.63091,
41
+ "NDCG@1000": 0.63755
42
+ },
43
+ "cqadupstack_android": {
44
+ "NDCG@1": 0.22604,
45
+ "NDCG@3": 0.2575,
46
+ "NDCG@5": 0.2753,
47
+ "NDCG@10": 0.29861,
48
+ "NDCG@100": 0.34719,
49
+ "NDCG@1000": 0.38073
50
+ },
51
+ "cqadupstack_english": {
52
+ "NDCG@1": 0.23885,
53
+ "NDCG@3": 0.27032,
54
+ "NDCG@5": 0.28385,
55
+ "NDCG@10": 0.30398,
56
+ "NDCG@100": 0.34259,
57
+ "NDCG@1000": 0.36855
58
+ },
59
+ "cqadupstack_gaming": {
60
+ "NDCG@1": 0.25956,
61
+ "NDCG@3": 0.31655,
62
+ "NDCG@5": 0.3402,
63
+ "NDCG@10": 0.35956,
64
+ "NDCG@100": 0.40891,
65
+ "NDCG@1000": 0.43256
66
+ },
67
+ "cqadupstack_gis": {
68
+ "NDCG@1": 0.16949,
69
+ "NDCG@3": 0.1979,
70
+ "NDCG@5": 0.21682,
71
+ "NDCG@10": 0.23385,
72
+ "NDCG@100": 0.27691,
73
+ "NDCG@1000": 0.30686
74
+ },
75
+ "cqadupstack_mathematica": {
76
+ "NDCG@1": 0.10821,
77
+ "NDCG@3": 0.14519,
78
+ "NDCG@5": 0.16025,
79
+ "NDCG@10": 0.18252,
80
+ "NDCG@100": 0.23551,
81
+ "NDCG@1000": 0.27061
82
+ },
83
+ "cqadupstack_physics": {
84
+ "NDCG@1": 0.20789,
85
+ "NDCG@3": 0.24259,
86
+ "NDCG@5": 0.26274,
87
+ "NDCG@10": 0.28917,
88
+ "NDCG@100": 0.34858,
89
+ "NDCG@1000": 0.37641
90
+ },
91
+ "cqadupstack_programmers": {
92
+ "NDCG@1": 0.18721,
93
+ "NDCG@3": 0.22957,
94
+ "NDCG@5": 0.24378,
95
+ "NDCG@10": 0.26469,
96
+ "NDCG@100": 0.31762,
97
+ "NDCG@1000": 0.3495
98
+ },
99
+ "cqadupstack_stats": {
100
+ "NDCG@1": 0.16718,
101
+ "NDCG@3": 0.19468,
102
+ "NDCG@5": 0.20881,
103
+ "NDCG@10": 0.22403,
104
+ "NDCG@100": 0.26555,
105
+ "NDCG@1000": 0.29332
106
+ },
107
+ "cqadupstack_wordpress": {
108
+ "NDCG@1": 0.12754,
109
+ "NDCG@3": 0.16302,
110
+ "NDCG@5": 0.18037,
111
+ "NDCG@10": 0.19715,
112
+ "NDCG@100": 0.23863,
113
+ "NDCG@1000": 0.27293
114
+ },
115
+ "cqadupstack_webmasters": {
116
+ "NDCG@1": 0.18972,
117
+ "NDCG@3": 0.2142,
118
+ "NDCG@5": 0.23037,
119
+ "NDCG@10": 0.24997,
120
+ "NDCG@100": 0.30509,
121
+ "NDCG@1000": 0.34226
122
+ },
123
+ "cqadupstack_unix": {
124
+ "NDCG@1": 0.16325,
125
+ "NDCG@3": 0.19538,
126
+ "NDCG@5": 0.20865,
127
+ "NDCG@10": 0.22792,
128
+ "NDCG@100": 0.2762,
129
+ "NDCG@1000": 0.30613
130
+ },
131
+ "cqadupstack_tex": {
132
+ "NDCG@1": 0.10392,
133
+ "NDCG@3": 0.12618,
134
+ "NDCG@5": 0.13715,
135
+ "NDCG@10": 0.15087,
136
+ "NDCG@100": 0.19361,
137
+ "NDCG@1000": 0.22519
138
+ },
139
+ "cqadupstack": {
140
+ "NDCG@1": 0.17907166666666666,
141
+ "NDCG@3": 0.21275666666666665,
142
+ "NDCG@5": 0.22902416666666667,
143
+ "NDCG@10": 0.24852666666666667,
144
+ "NDCG@100": 0.2963658333333334,
145
+ "NDCG@1000": 0.3270875
146
+ },
147
+ "signal1m": {
148
+ "NDCG@1": 0.34021,
149
+ "NDCG@3": 0.30594,
150
+ "NDCG@5": 0.2707,
151
+ "NDCG@10": 0.23598,
152
+ "NDCG@100": 0.22928,
153
+ "NDCG@1000": 0.28209
154
+ },
155
+ "trec-news": {
156
+ "NDCG@1": 0.37135,
157
+ "NDCG@3": 0.36728,
158
+ "NDCG@5": 0.3443,
159
+ "NDCG@10": 0.31943,
160
+ "NDCG@100": 0.32159,
161
+ "NDCG@1000": 0.41124
162
+ },
163
+ "arguana": {
164
+ "NDCG@1": 0.22404,
165
+ "NDCG@3": 0.3505,
166
+ "NDCG@5": 0.401,
167
+ "NDCG@10": 0.45468,
168
+ "NDCG@100": 0.50377,
169
+ "NDCG@1000": 0.50896
170
+ },
171
+ "webis-touche2020": {
172
+ "NDCG@1": 0.31633,
173
+ "NDCG@3": 0.26398,
174
+ "NDCG@5": 0.24418,
175
+ "NDCG@10": 0.22969,
176
+ "NDCG@100": 0.34778,
177
+ "NDCG@1000": 0.45638
178
+ },
179
+ "quora": {
180
+ "NDCG@1": 0.6251,
181
+ "NDCG@3": 0.68147,
182
+ "NDCG@5": 0.70473,
183
+ "NDCG@10": 0.72977,
184
+ "NDCG@100": 0.76027,
185
+ "NDCG@1000": 0.76517
186
+ },
187
+ "scifact": {
188
+ "NDCG@1": 0.45333,
189
+ "NDCG@3": 0.52618,
190
+ "NDCG@5": 0.54557,
191
+ "NDCG@10": 0.56923,
192
+ "NDCG@100": 0.60721,
193
+ "NDCG@1000": 0.62065
194
+ },
195
+ "scidocs": {
196
+ "NDCG@1": 0.145,
197
+ "NDCG@3": 0.12064,
198
+ "NDCG@5": 0.1016,
199
+ "NDCG@10": 0.12118,
200
+ "NDCG@100": 0.17639,
201
+ "NDCG@1000": 0.22638
202
+ },
203
+ "nfcorpus": {
204
+ "NDCG@1": 0.31579,
205
+ "NDCG@3": 0.2681,
206
+ "NDCG@5": 0.25146,
207
+ "NDCG@10": 0.22776,
208
+ "NDCG@100": 0.21672,
209
+ "NDCG@1000": 0.30891
210
+ },
211
+ "robust04": {
212
+ "NDCG@1": 0.41365,
213
+ "NDCG@3": 0.38515,
214
+ "NDCG@5": 0.35751,
215
+ "NDCG@10": 0.31325,
216
+ "NDCG@100": 0.25065,
217
+ "NDCG@1000": 0.32678
218
+ },
219
+ "climate-fever": {
220
+ "NDCG@1": 0.19023,
221
+ "NDCG@3": 0.17124,
222
+ "NDCG@5": 0.18861,
223
+ "NDCG@10": 0.21839,
224
+ "NDCG@100": 0.28314,
225
+ "NDCG@1000": 0.3155
226
+ },
227
+ "trec-covid": {
228
+ "NDCG@1": 0.87,
229
+ "NDCG@3": 0.82704,
230
+ "NDCG@5": 0.79357,
231
+ "NDCG@10": 0.73818,
232
+ "NDCG@100": 0.48765,
233
+ "NDCG@1000": 0.39645
234
+ },
235
+ "fiqa": {
236
+ "NDCG@1": 0.19753,
237
+ "NDCG@3": 0.18319,
238
+ "NDCG@5": 0.19438,
239
+ "NDCG@10": 0.21115,
240
+ "NDCG@100": 0.2733,
241
+ "NDCG@1000": 0.31315
242
+ },
243
+ "average": {
244
+ "NDCG@1": 0.3573378703703704,
245
+ "NDCG@3": 0.35403481481481475,
246
+ "NDCG@5": 0.3545068981481481,
247
+ "NDCG@10": 0.35706814814814813,
248
+ "NDCG@100": 0.3742081018518519,
249
+ "NDCG@1000": 0.4090476388888889
250
+ },
251
+ "subaverage": {
252
+ "NDCG@1": 0.40840090909090904,
253
+ "NDCG@3": 0.40547545454545453,
254
+ "NDCG@5": 0.4127072727272727,
255
+ "NDCG@10": 0.41995,
256
+ "NDCG@100": 0.43560454545454547,
257
+ "NDCG@1000": 0.46082090909090917
258
+ },
259
+ "subsubaverage": {
260
+ "NDCG@1": 0.267138,
261
+ "NDCG@3": 0.289722,
262
+ "NDCG@5": 0.298802,
263
+ "NDCG@10": 0.31679999999999997,
264
+ "NDCG@100": 0.35547799999999996,
265
+ "NDCG@1000": 0.39561
266
+ },
267
+ "msmarco": {
268
+ "NDCG@1": 0.14785,
269
+ "NDCG@3": 0.2189,
270
+ "NDCG@5": 0.24875,
271
+ "NDCG@10": 0.27945,
272
+ "NDCG@100": 0.34107,
273
+ "NDCG@1000": 0.36339
274
+ }
275
+ }
276
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff8eb75758c05dbb0d40d5e4fea9ec8b707b5bb50d3e439d05273bb0b6b0c16b
3
- size 551196689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f11c87e27372670924ad3b066f1159073753895cba56a30df56af485580521
3
+ size 551189073
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-125M", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-125M", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}