tomaarsen HF staff commited on
Commit
e440089
1 Parent(s): ca4c94e

Add new SentenceTransformer model.

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,770 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ library_name: sentence-transformers
6
+ tags:
7
+ - sentence-transformers
8
+ - sentence-similarity
9
+ - feature-extraction
10
+ - dataset_size:1M<n<10M
11
+ - loss:MultipleNegativesRankingLoss
12
+ base_model: microsoft/mpnet-base
13
+ datasets:
14
+ - sentence-transformers/gooaq
15
+ metrics:
16
+ - cosine_accuracy@1
17
+ - cosine_accuracy@3
18
+ - cosine_accuracy@5
19
+ - cosine_accuracy@10
20
+ - cosine_precision@1
21
+ - cosine_precision@3
22
+ - cosine_precision@5
23
+ - cosine_precision@10
24
+ - cosine_recall@1
25
+ - cosine_recall@3
26
+ - cosine_recall@5
27
+ - cosine_recall@10
28
+ - cosine_ndcg@10
29
+ - cosine_mrr@10
30
+ - cosine_map@100
31
+ - dot_accuracy@1
32
+ - dot_accuracy@3
33
+ - dot_accuracy@5
34
+ - dot_accuracy@10
35
+ - dot_precision@1
36
+ - dot_precision@3
37
+ - dot_precision@5
38
+ - dot_precision@10
39
+ - dot_recall@1
40
+ - dot_recall@3
41
+ - dot_recall@5
42
+ - dot_recall@10
43
+ - dot_ndcg@10
44
+ - dot_mrr@10
45
+ - dot_map@100
46
+ widget:
47
+ - source_sentence: nba 2k how to get vc?
48
+ sentences:
49
+ - where are the pvp vendors in legion?
50
+ - what is career development goals?
51
+ - is old passport required while travelling?
52
+ - source_sentence: 26 is how much of 52?
53
+ sentences:
54
+ - what is 52 to decimal?
55
+ - can osu still make the playoffs?
56
+ - light waves are electromagnetic waves?
57
+ - source_sentence: who is steve g jones?
58
+ sentences:
59
+ - is indya adrianna moore a man?
60
+ - how to change my password on fb?
61
+ - is lavender oil good for your hair?
62
+ - source_sentence: what is a coxal bone?
63
+ sentences:
64
+ - what causes knots in neck muscles?
65
+ - are tag heuer movements in house?
66
+ - is 80 battery health good?
67
+ - source_sentence: do oats make you fat?
68
+ sentences:
69
+ - can lattes make you fat?
70
+ - are u haul locations franchised?
71
+ - how is lyme disease treated?
72
+ pipeline_tag: sentence-similarity
73
+ co2_eq_emissions:
74
+ emissions: 636.2415070661234
75
+ energy_consumed: 1.636836206312608
76
+ source: codecarbon
77
+ training_type: fine-tuning
78
+ on_cloud: false
79
+ cpu_model: 13th Gen Intel(R) Core(TM) i7-13700K
80
+ ram_total_size: 31.777088165283203
81
+ hours_used: 4.514
82
+ hardware_used: 1 x NVIDIA GeForce RTX 3090
83
+ model-index:
84
+ - name: MPNet base trained on GooAQ triplets
85
+ results:
86
+ - task:
87
+ type: information-retrieval
88
+ name: Information Retrieval
89
+ dataset:
90
+ name: gooaq dev
91
+ type: gooaq-dev
92
+ metrics:
93
+ - type: cosine_accuracy@1
94
+ value: 0.7198
95
+ name: Cosine Accuracy@1
96
+ - type: cosine_accuracy@3
97
+ value: 0.884
98
+ name: Cosine Accuracy@3
99
+ - type: cosine_accuracy@5
100
+ value: 0.9305
101
+ name: Cosine Accuracy@5
102
+ - type: cosine_accuracy@10
103
+ value: 0.9709
104
+ name: Cosine Accuracy@10
105
+ - type: cosine_precision@1
106
+ value: 0.7198
107
+ name: Cosine Precision@1
108
+ - type: cosine_precision@3
109
+ value: 0.29466666666666663
110
+ name: Cosine Precision@3
111
+ - type: cosine_precision@5
112
+ value: 0.1861
113
+ name: Cosine Precision@5
114
+ - type: cosine_precision@10
115
+ value: 0.09709000000000002
116
+ name: Cosine Precision@10
117
+ - type: cosine_recall@1
118
+ value: 0.7198
119
+ name: Cosine Recall@1
120
+ - type: cosine_recall@3
121
+ value: 0.884
122
+ name: Cosine Recall@3
123
+ - type: cosine_recall@5
124
+ value: 0.9305
125
+ name: Cosine Recall@5
126
+ - type: cosine_recall@10
127
+ value: 0.9709
128
+ name: Cosine Recall@10
129
+ - type: cosine_ndcg@10
130
+ value: 0.8490972112228806
131
+ name: Cosine Ndcg@10
132
+ - type: cosine_mrr@10
133
+ value: 0.8095713888888812
134
+ name: Cosine Mrr@10
135
+ - type: cosine_map@100
136
+ value: 0.8111457785591406
137
+ name: Cosine Map@100
138
+ - type: dot_accuracy@1
139
+ value: 0.7073
140
+ name: Dot Accuracy@1
141
+ - type: dot_accuracy@3
142
+ value: 0.877
143
+ name: Dot Accuracy@3
144
+ - type: dot_accuracy@5
145
+ value: 0.9244
146
+ name: Dot Accuracy@5
147
+ - type: dot_accuracy@10
148
+ value: 0.9669
149
+ name: Dot Accuracy@10
150
+ - type: dot_precision@1
151
+ value: 0.7073
152
+ name: Dot Precision@1
153
+ - type: dot_precision@3
154
+ value: 0.2923333333333333
155
+ name: Dot Precision@3
156
+ - type: dot_precision@5
157
+ value: 0.18488000000000002
158
+ name: Dot Precision@5
159
+ - type: dot_precision@10
160
+ value: 0.09669000000000003
161
+ name: Dot Precision@10
162
+ - type: dot_recall@1
163
+ value: 0.7073
164
+ name: Dot Recall@1
165
+ - type: dot_recall@3
166
+ value: 0.877
167
+ name: Dot Recall@3
168
+ - type: dot_recall@5
169
+ value: 0.9244
170
+ name: Dot Recall@5
171
+ - type: dot_recall@10
172
+ value: 0.9669
173
+ name: Dot Recall@10
174
+ - type: dot_ndcg@10
175
+ value: 0.8412144933973646
176
+ name: Dot Ndcg@10
177
+ - type: dot_mrr@10
178
+ value: 0.8004067857142795
179
+ name: Dot Mrr@10
180
+ - type: dot_map@100
181
+ value: 0.8022667466578848
182
+ name: Dot Map@100
183
+ ---
184
+
185
+ # MPNet base trained on GooAQ triplets
186
+
187
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) on the [sentence-transformers/gooaq](https://huggingface.co/datasets/sentence-transformers/gooaq) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
188
+
189
+ ## Model Details
190
+
191
+ ### Model Description
192
+ - **Model Type:** Sentence Transformer
193
+ - **Base model:** [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) <!-- at revision 6996ce1e91bd2a9c7d7f61daec37463394f73f09 -->
194
+ - **Maximum Sequence Length:** 512 tokens
195
+ - **Output Dimensionality:** 768 tokens
196
+ - **Similarity Function:** Cosine Similarity
197
+ - **Training Dataset:**
198
+ - [sentence-transformers/gooaq](https://huggingface.co/datasets/sentence-transformers/gooaq)
199
+ - **Language:** en
200
+ - **License:** apache-2.0
201
+
202
+ ### Model Sources
203
+
204
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
205
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
206
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
207
+
208
+ ### Full Model Architecture
209
+
210
+ ```
211
+ SentenceTransformer(
212
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
213
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
214
+ )
215
+ ```
216
+
217
+ ## Usage
218
+
219
+ ### Direct Usage (Sentence Transformers)
220
+
221
+ First install the Sentence Transformers library:
222
+
223
+ ```bash
224
+ pip install -U sentence-transformers
225
+ ```
226
+
227
+ Then you can load this model and run inference.
228
+ ```python
229
+ from sentence_transformers import SentenceTransformer
230
+
231
+ # Download from the 🤗 Hub
232
+ model = SentenceTransformer("tomaarsen/mpnet-base-gooaq")
233
+ # Run inference
234
+ sentences = [
235
+ 'do oats make you fat?',
236
+ 'can lattes make you fat?',
237
+ 'are u haul locations franchised?',
238
+ ]
239
+ embeddings = model.encode(sentences)
240
+ print(embeddings.shape)
241
+ # [3, 768]
242
+
243
+ # Get the similarity scores for the embeddings
244
+ similarities = model.similarity(embeddings, embeddings)
245
+ print(similarities.shape)
246
+ # [3, 3]
247
+ ```
248
+
249
+ <!--
250
+ ### Direct Usage (Transformers)
251
+
252
+ <details><summary>Click to see the direct usage in Transformers</summary>
253
+
254
+ </details>
255
+ -->
256
+
257
+ <!--
258
+ ### Downstream Usage (Sentence Transformers)
259
+
260
+ You can finetune this model on your own dataset.
261
+
262
+ <details><summary>Click to expand</summary>
263
+
264
+ </details>
265
+ -->
266
+
267
+ <!--
268
+ ### Out-of-Scope Use
269
+
270
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
271
+ -->
272
+
273
+ ## Evaluation
274
+
275
+ ### Metrics
276
+
277
+ #### Information Retrieval
278
+ * Dataset: `gooaq-dev`
279
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
280
+
281
+ | Metric | Value |
282
+ |:--------------------|:-----------|
283
+ | cosine_accuracy@1 | 0.7198 |
284
+ | cosine_accuracy@3 | 0.884 |
285
+ | cosine_accuracy@5 | 0.9305 |
286
+ | cosine_accuracy@10 | 0.9709 |
287
+ | cosine_precision@1 | 0.7198 |
288
+ | cosine_precision@3 | 0.2947 |
289
+ | cosine_precision@5 | 0.1861 |
290
+ | cosine_precision@10 | 0.0971 |
291
+ | cosine_recall@1 | 0.7198 |
292
+ | cosine_recall@3 | 0.884 |
293
+ | cosine_recall@5 | 0.9305 |
294
+ | cosine_recall@10 | 0.9709 |
295
+ | cosine_ndcg@10 | 0.8491 |
296
+ | cosine_mrr@10 | 0.8096 |
297
+ | **cosine_map@100** | **0.8111** |
298
+ | dot_accuracy@1 | 0.7073 |
299
+ | dot_accuracy@3 | 0.877 |
300
+ | dot_accuracy@5 | 0.9244 |
301
+ | dot_accuracy@10 | 0.9669 |
302
+ | dot_precision@1 | 0.7073 |
303
+ | dot_precision@3 | 0.2923 |
304
+ | dot_precision@5 | 0.1849 |
305
+ | dot_precision@10 | 0.0967 |
306
+ | dot_recall@1 | 0.7073 |
307
+ | dot_recall@3 | 0.877 |
308
+ | dot_recall@5 | 0.9244 |
309
+ | dot_recall@10 | 0.9669 |
310
+ | dot_ndcg@10 | 0.8412 |
311
+ | dot_mrr@10 | 0.8004 |
312
+ | dot_map@100 | 0.8023 |
313
+
314
+ <!--
315
+ ## Bias, Risks and Limitations
316
+
317
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
318
+ -->
319
+
320
+ <!--
321
+ ### Recommendations
322
+
323
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
324
+ -->
325
+
326
+ ## Training Details
327
+
328
+ ### Training Dataset
329
+
330
+ #### sentence-transformers/gooaq
331
+
332
+ * Dataset: [sentence-transformers/gooaq](https://huggingface.co/datasets/sentence-transformers/gooaq) at [b089f72](https://huggingface.co/datasets/sentence-transformers/gooaq/tree/b089f728748a068b7bc5234e5bcf5b25e3c8279c)
333
+ * Size: 3,002,496 training samples
334
+ * Columns: <code>question</code> and <code>answer</code>
335
+ * Approximate statistics based on the first 1000 samples:
336
+ | | question | answer |
337
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
338
+ | type | string | string |
339
+ | details | <ul><li>min: 8 tokens</li><li>mean: 11.89 tokens</li><li>max: 22 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 60.37 tokens</li><li>max: 147 tokens</li></ul> |
340
+ * Samples:
341
+ | question | answer |
342
+ |:-----------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
343
+ | <code>biotechnology is best defined as?</code> | <code>Biotechnology is best defined as_______________? The science that involves using living organisms to produce needed materials. Which of the following tools of biotechnology, to do investigation, is used when trying crime?</code> |
344
+ | <code>how to open xye file?</code> | <code>Firstly, use File then Open and make sure that you can see All Files (*. *) and not just Excel files (the default option!) in the folder containing the *. xye file: Select the file you wish to open and Excel will bring up a wizard menu for importing plain text data into Excel (as shown below).</code> |
345
+ | <code>how much does california spend?</code> | <code>Estimated 2016 expenditures The total estimated government spending in California in fiscal year 2016 was $265.9 billion. Per-capita figures are calculated by taking the state's total spending and dividing by the number of state residents according to United States Census Bureau estimates.</code> |
346
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
347
+ ```json
348
+ {
349
+ "scale": 20.0,
350
+ "similarity_fct": "cos_sim"
351
+ }
352
+ ```
353
+
354
+ ### Evaluation Dataset
355
+
356
+ #### sentence-transformers/gooaq
357
+
358
+ * Dataset: [sentence-transformers/gooaq](https://huggingface.co/datasets/sentence-transformers/gooaq) at [b089f72](https://huggingface.co/datasets/sentence-transformers/gooaq/tree/b089f728748a068b7bc5234e5bcf5b25e3c8279c)
359
+ * Size: 10,000 evaluation samples
360
+ * Columns: <code>question</code> and <code>answer</code>
361
+ * Approximate statistics based on the first 1000 samples:
362
+ | | question | answer |
363
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
364
+ | type | string | string |
365
+ | details | <ul><li>min: 8 tokens</li><li>mean: 11.86 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 14 tokens</li><li>mean: 60.82 tokens</li><li>max: 166 tokens</li></ul> |
366
+ * Samples:
367
+ | question | answer |
368
+ |:--------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
369
+ | <code>how to open nx file?</code> | <code>['Click File > Open. The File Open dialog box opens.', 'Select NX File (*. prt) in the Type box. ... ', 'Select an NX . ... ', 'Select Import in the File Open dialog box. ... ', 'If you do not want to retain the import profile in use, select an import profile from the Profile list. ... ', 'Click OK in the Import New Model dialog box.']</code> |
370
+ | <code>how to recover deleted photos from blackberry priv?</code> | <code>['Run Android Data Recovery. ... ', 'Enable USB Debugging Mode. ... ', 'Scan Your BlackBerry PRIV to Find Deleted Photos. ... ', 'Recover Deleted Photos from BlackBerry PRIV.']</code> |
371
+ | <code>which subatomic particles are found within the nucleus of an atom?</code> | <code>In the middle of every atom is the nucleus. The nucleus contains two types of subatomic particles, protons and neutrons. The protons have a positive electrical charge and the neutrons have no electrical charge. A third type of subatomic particle, electrons, move around the nucleus.</code> |
372
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
373
+ ```json
374
+ {
375
+ "scale": 20.0,
376
+ "similarity_fct": "cos_sim"
377
+ }
378
+ ```
379
+
380
+ ### Training Hyperparameters
381
+ #### Non-Default Hyperparameters
382
+
383
+ - `eval_strategy`: steps
384
+ - `per_device_train_batch_size`: 64
385
+ - `per_device_eval_batch_size`: 64
386
+ - `learning_rate`: 2e-05
387
+ - `num_train_epochs`: 1
388
+ - `warmup_ratio`: 0.1
389
+ - `bf16`: True
390
+ - `batch_sampler`: no_duplicates
391
+
392
+ #### All Hyperparameters
393
+ <details><summary>Click to expand</summary>
394
+
395
+ - `overwrite_output_dir`: False
396
+ - `do_predict`: False
397
+ - `eval_strategy`: steps
398
+ - `prediction_loss_only`: True
399
+ - `per_device_train_batch_size`: 64
400
+ - `per_device_eval_batch_size`: 64
401
+ - `per_gpu_train_batch_size`: None
402
+ - `per_gpu_eval_batch_size`: None
403
+ - `gradient_accumulation_steps`: 1
404
+ - `eval_accumulation_steps`: None
405
+ - `learning_rate`: 2e-05
406
+ - `weight_decay`: 0.0
407
+ - `adam_beta1`: 0.9
408
+ - `adam_beta2`: 0.999
409
+ - `adam_epsilon`: 1e-08
410
+ - `max_grad_norm`: 1.0
411
+ - `num_train_epochs`: 1
412
+ - `max_steps`: -1
413
+ - `lr_scheduler_type`: linear
414
+ - `lr_scheduler_kwargs`: {}
415
+ - `warmup_ratio`: 0.1
416
+ - `warmup_steps`: 0
417
+ - `log_level`: passive
418
+ - `log_level_replica`: warning
419
+ - `log_on_each_node`: True
420
+ - `logging_nan_inf_filter`: True
421
+ - `save_safetensors`: True
422
+ - `save_on_each_node`: False
423
+ - `save_only_model`: False
424
+ - `restore_callback_states_from_checkpoint`: False
425
+ - `no_cuda`: False
426
+ - `use_cpu`: False
427
+ - `use_mps_device`: False
428
+ - `seed`: 42
429
+ - `data_seed`: None
430
+ - `jit_mode_eval`: False
431
+ - `use_ipex`: False
432
+ - `bf16`: True
433
+ - `fp16`: False
434
+ - `fp16_opt_level`: O1
435
+ - `half_precision_backend`: auto
436
+ - `bf16_full_eval`: False
437
+ - `fp16_full_eval`: False
438
+ - `tf32`: None
439
+ - `local_rank`: 0
440
+ - `ddp_backend`: None
441
+ - `tpu_num_cores`: None
442
+ - `tpu_metrics_debug`: False
443
+ - `debug`: []
444
+ - `dataloader_drop_last`: False
445
+ - `dataloader_num_workers`: 0
446
+ - `dataloader_prefetch_factor`: None
447
+ - `past_index`: -1
448
+ - `disable_tqdm`: False
449
+ - `remove_unused_columns`: True
450
+ - `label_names`: None
451
+ - `load_best_model_at_end`: False
452
+ - `ignore_data_skip`: False
453
+ - `fsdp`: []
454
+ - `fsdp_min_num_params`: 0
455
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
456
+ - `fsdp_transformer_layer_cls_to_wrap`: None
457
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
458
+ - `deepspeed`: None
459
+ - `label_smoothing_factor`: 0.0
460
+ - `optim`: adamw_torch
461
+ - `optim_args`: None
462
+ - `adafactor`: False
463
+ - `group_by_length`: False
464
+ - `length_column_name`: length
465
+ - `ddp_find_unused_parameters`: None
466
+ - `ddp_bucket_cap_mb`: None
467
+ - `ddp_broadcast_buffers`: False
468
+ - `dataloader_pin_memory`: True
469
+ - `dataloader_persistent_workers`: False
470
+ - `skip_memory_metrics`: True
471
+ - `use_legacy_prediction_loop`: False
472
+ - `push_to_hub`: False
473
+ - `resume_from_checkpoint`: None
474
+ - `hub_model_id`: None
475
+ - `hub_strategy`: every_save
476
+ - `hub_private_repo`: False
477
+ - `hub_always_push`: False
478
+ - `gradient_checkpointing`: False
479
+ - `gradient_checkpointing_kwargs`: None
480
+ - `include_inputs_for_metrics`: False
481
+ - `eval_do_concat_batches`: True
482
+ - `fp16_backend`: auto
483
+ - `push_to_hub_model_id`: None
484
+ - `push_to_hub_organization`: None
485
+ - `mp_parameters`:
486
+ - `auto_find_batch_size`: False
487
+ - `full_determinism`: False
488
+ - `torchdynamo`: None
489
+ - `ray_scope`: last
490
+ - `ddp_timeout`: 1800
491
+ - `torch_compile`: False
492
+ - `torch_compile_backend`: None
493
+ - `torch_compile_mode`: None
494
+ - `dispatch_batches`: None
495
+ - `split_batches`: None
496
+ - `include_tokens_per_second`: False
497
+ - `include_num_input_tokens_seen`: False
498
+ - `neftune_noise_alpha`: None
499
+ - `optim_target_modules`: None
500
+ - `batch_eval_metrics`: False
501
+ - `batch_sampler`: no_duplicates
502
+ - `multi_dataset_batch_sampler`: proportional
503
+
504
+ </details>
505
+
506
+ ### Training Logs
507
+ <details><summary>Click to expand</summary>
508
+
509
+ | Epoch | Step | Training Loss | loss | gooaq-dev_cosine_map@100 |
510
+ |:------:|:-----:|:-------------:|:------:|:------------------------:|
511
+ | 0 | 0 | - | - | 0.1379 |
512
+ | 0.0000 | 1 | 3.6452 | - | - |
513
+ | 0.0053 | 250 | 2.4418 | - | - |
514
+ | 0.0107 | 500 | 0.373 | - | - |
515
+ | 0.0160 | 750 | 0.183 | - | - |
516
+ | 0.0213 | 1000 | 0.1286 | 0.0805 | 0.6796 |
517
+ | 0.0266 | 1250 | 0.1099 | - | - |
518
+ | 0.0320 | 1500 | 0.091 | - | - |
519
+ | 0.0373 | 1750 | 0.0768 | - | - |
520
+ | 0.0426 | 2000 | 0.0665 | 0.0526 | 0.7162 |
521
+ | 0.0480 | 2250 | 0.0659 | - | - |
522
+ | 0.0533 | 2500 | 0.0602 | - | - |
523
+ | 0.0586 | 2750 | 0.0548 | - | - |
524
+ | 0.0639 | 3000 | 0.0543 | 0.0426 | 0.7328 |
525
+ | 0.0693 | 3250 | 0.0523 | - | - |
526
+ | 0.0746 | 3500 | 0.0494 | - | - |
527
+ | 0.0799 | 3750 | 0.0468 | - | - |
528
+ | 0.0853 | 4000 | 0.0494 | 0.0362 | 0.7450 |
529
+ | 0.0906 | 4250 | 0.048 | - | - |
530
+ | 0.0959 | 4500 | 0.0442 | - | - |
531
+ | 0.1012 | 4750 | 0.0442 | - | - |
532
+ | 0.1066 | 5000 | 0.0408 | 0.0332 | 0.7519 |
533
+ | 0.1119 | 5250 | 0.0396 | - | - |
534
+ | 0.1172 | 5500 | 0.0379 | - | - |
535
+ | 0.1226 | 5750 | 0.0392 | - | - |
536
+ | 0.1279 | 6000 | 0.0395 | 0.0300 | 0.7505 |
537
+ | 0.1332 | 6250 | 0.0349 | - | - |
538
+ | 0.1386 | 6500 | 0.0383 | - | - |
539
+ | 0.1439 | 6750 | 0.0335 | - | - |
540
+ | 0.1492 | 7000 | 0.0323 | 0.0253 | 0.7624 |
541
+ | 0.1545 | 7250 | 0.0342 | - | - |
542
+ | 0.1599 | 7500 | 0.0292 | - | - |
543
+ | 0.1652 | 7750 | 0.0309 | - | - |
544
+ | 0.1705 | 8000 | 0.0335 | 0.0249 | 0.7631 |
545
+ | 0.1759 | 8250 | 0.0304 | - | - |
546
+ | 0.1812 | 8500 | 0.0318 | - | - |
547
+ | 0.1865 | 8750 | 0.0271 | - | - |
548
+ | 0.1918 | 9000 | 0.029 | 0.0230 | 0.7615 |
549
+ | 0.1972 | 9250 | 0.0309 | - | - |
550
+ | 0.2025 | 9500 | 0.0305 | - | - |
551
+ | 0.2078 | 9750 | 0.0237 | - | - |
552
+ | 0.2132 | 10000 | 0.0274 | 0.0220 | 0.7667 |
553
+ | 0.2185 | 10250 | 0.0248 | - | - |
554
+ | 0.2238 | 10500 | 0.0249 | - | - |
555
+ | 0.2291 | 10750 | 0.0272 | - | - |
556
+ | 0.2345 | 11000 | 0.0289 | 0.0230 | 0.7664 |
557
+ | 0.2398 | 11250 | 0.027 | - | - |
558
+ | 0.2451 | 11500 | 0.0259 | - | - |
559
+ | 0.2505 | 11750 | 0.0237 | - | - |
560
+ | 0.2558 | 12000 | 0.0245 | 0.0220 | 0.7694 |
561
+ | 0.2611 | 12250 | 0.0251 | - | - |
562
+ | 0.2664 | 12500 | 0.0243 | - | - |
563
+ | 0.2718 | 12750 | 0.0229 | - | - |
564
+ | 0.2771 | 13000 | 0.0273 | 0.0201 | 0.7725 |
565
+ | 0.2824 | 13250 | 0.0244 | - | - |
566
+ | 0.2878 | 13500 | 0.0248 | - | - |
567
+ | 0.2931 | 13750 | 0.0255 | - | - |
568
+ | 0.2984 | 14000 | 0.0244 | 0.0192 | 0.7729 |
569
+ | 0.3037 | 14250 | 0.0242 | - | - |
570
+ | 0.3091 | 14500 | 0.0235 | - | - |
571
+ | 0.3144 | 14750 | 0.0231 | - | - |
572
+ | 0.3197 | 15000 | 0.0228 | 0.0190 | 0.7823 |
573
+ | 0.3251 | 15250 | 0.0229 | - | - |
574
+ | 0.3304 | 15500 | 0.0224 | - | - |
575
+ | 0.3357 | 15750 | 0.0216 | - | - |
576
+ | 0.3410 | 16000 | 0.0218 | 0.0186 | 0.7787 |
577
+ | 0.3464 | 16250 | 0.022 | - | - |
578
+ | 0.3517 | 16500 | 0.0233 | - | - |
579
+ | 0.3570 | 16750 | 0.0216 | - | - |
580
+ | 0.3624 | 17000 | 0.0226 | 0.0169 | 0.7862 |
581
+ | 0.3677 | 17250 | 0.0215 | - | - |
582
+ | 0.3730 | 17500 | 0.0212 | - | - |
583
+ | 0.3784 | 17750 | 0.0178 | - | - |
584
+ | 0.3837 | 18000 | 0.0217 | 0.0161 | 0.7813 |
585
+ | 0.3890 | 18250 | 0.0217 | - | - |
586
+ | 0.3943 | 18500 | 0.0191 | - | - |
587
+ | 0.3997 | 18750 | 0.0216 | - | - |
588
+ | 0.4050 | 19000 | 0.022 | 0.0157 | 0.7868 |
589
+ | 0.4103 | 19250 | 0.0223 | - | - |
590
+ | 0.4157 | 19500 | 0.021 | - | - |
591
+ | 0.4210 | 19750 | 0.0176 | - | - |
592
+ | 0.4263 | 20000 | 0.021 | 0.0162 | 0.7873 |
593
+ | 0.4316 | 20250 | 0.0206 | - | - |
594
+ | 0.4370 | 20500 | 0.0196 | - | - |
595
+ | 0.4423 | 20750 | 0.0186 | - | - |
596
+ | 0.4476 | 21000 | 0.0197 | 0.0158 | 0.7907 |
597
+ | 0.4530 | 21250 | 0.0156 | - | - |
598
+ | 0.4583 | 21500 | 0.0178 | - | - |
599
+ | 0.4636 | 21750 | 0.0175 | - | - |
600
+ | 0.4689 | 22000 | 0.0187 | 0.0151 | 0.7937 |
601
+ | 0.4743 | 22250 | 0.0182 | - | - |
602
+ | 0.4796 | 22500 | 0.0185 | - | - |
603
+ | 0.4849 | 22750 | 0.0217 | - | - |
604
+ | 0.4903 | 23000 | 0.0179 | 0.0156 | 0.7937 |
605
+ | 0.4956 | 23250 | 0.0193 | - | - |
606
+ | 0.5009 | 23500 | 0.015 | - | - |
607
+ | 0.5062 | 23750 | 0.0181 | - | - |
608
+ | 0.5116 | 24000 | 0.0173 | 0.0150 | 0.7924 |
609
+ | 0.5169 | 24250 | 0.0177 | - | - |
610
+ | 0.5222 | 24500 | 0.0183 | - | - |
611
+ | 0.5276 | 24750 | 0.0171 | - | - |
612
+ | 0.5329 | 25000 | 0.0185 | 0.0140 | 0.7955 |
613
+ | 0.5382 | 25250 | 0.0178 | - | - |
614
+ | 0.5435 | 25500 | 0.015 | - | - |
615
+ | 0.5489 | 25750 | 0.017 | - | - |
616
+ | 0.5542 | 26000 | 0.0171 | 0.0139 | 0.7931 |
617
+ | 0.5595 | 26250 | 0.0164 | - | - |
618
+ | 0.5649 | 26500 | 0.0175 | - | - |
619
+ | 0.5702 | 26750 | 0.0175 | - | - |
620
+ | 0.5755 | 27000 | 0.0163 | 0.0133 | 0.7954 |
621
+ | 0.5809 | 27250 | 0.0179 | - | - |
622
+ | 0.5862 | 27500 | 0.016 | - | - |
623
+ | 0.5915 | 27750 | 0.0155 | - | - |
624
+ | 0.5968 | 28000 | 0.0162 | 0.0138 | 0.7979 |
625
+ | 0.6022 | 28250 | 0.0164 | - | - |
626
+ | 0.6075 | 28500 | 0.0148 | - | - |
627
+ | 0.6128 | 28750 | 0.0152 | - | - |
628
+ | 0.6182 | 29000 | 0.0166 | 0.0134 | 0.7987 |
629
+ | 0.6235 | 29250 | 0.0159 | - | - |
630
+ | 0.6288 | 29500 | 0.0168 | - | - |
631
+ | 0.6341 | 29750 | 0.0187 | - | - |
632
+ | 0.6395 | 30000 | 0.017 | 0.0137 | 0.7980 |
633
+ | 0.6448 | 30250 | 0.0168 | - | - |
634
+ | 0.6501 | 30500 | 0.0149 | - | - |
635
+ | 0.6555 | 30750 | 0.0159 | - | - |
636
+ | 0.6608 | 31000 | 0.0149 | 0.0131 | 0.8017 |
637
+ | 0.6661 | 31250 | 0.0149 | - | - |
638
+ | 0.6714 | 31500 | 0.0147 | - | - |
639
+ | 0.6768 | 31750 | 0.0157 | - | - |
640
+ | 0.6821 | 32000 | 0.0151 | 0.0125 | 0.8011 |
641
+ | 0.6874 | 32250 | 0.015 | - | - |
642
+ | 0.6928 | 32500 | 0.0157 | - | - |
643
+ | 0.6981 | 32750 | 0.0153 | - | - |
644
+ | 0.7034 | 33000 | 0.0141 | 0.0123 | 0.8012 |
645
+ | 0.7087 | 33250 | 0.0143 | - | - |
646
+ | 0.7141 | 33500 | 0.0121 | - | - |
647
+ | 0.7194 | 33750 | 0.0164 | - | - |
648
+ | 0.7247 | 34000 | 0.014 | 0.0121 | 0.8014 |
649
+ | 0.7301 | 34250 | 0.0147 | - | - |
650
+ | 0.7354 | 34500 | 0.0149 | - | - |
651
+ | 0.7407 | 34750 | 0.014 | - | - |
652
+ | 0.7460 | 35000 | 0.0156 | 0.0117 | 0.8022 |
653
+ | 0.7514 | 35250 | 0.0153 | - | - |
654
+ | 0.7567 | 35500 | 0.0146 | - | - |
655
+ | 0.7620 | 35750 | 0.0144 | - | - |
656
+ | 0.7674 | 36000 | 0.0139 | 0.0111 | 0.8035 |
657
+ | 0.7727 | 36250 | 0.0134 | - | - |
658
+ | 0.7780 | 36500 | 0.013 | - | - |
659
+ | 0.7833 | 36750 | 0.0156 | - | - |
660
+ | 0.7887 | 37000 | 0.0144 | 0.0108 | 0.8048 |
661
+ | 0.7940 | 37250 | 0.0133 | - | - |
662
+ | 0.7993 | 37500 | 0.0154 | - | - |
663
+ | 0.8047 | 37750 | 0.0132 | - | - |
664
+ | 0.8100 | 38000 | 0.013 | 0.0108 | 0.8063 |
665
+ | 0.8153 | 38250 | 0.0126 | - | - |
666
+ | 0.8207 | 38500 | 0.0135 | - | - |
667
+ | 0.8260 | 38750 | 0.014 | - | - |
668
+ | 0.8313 | 39000 | 0.013 | 0.0109 | 0.8086 |
669
+ | 0.8366 | 39250 | 0.0136 | - | - |
670
+ | 0.8420 | 39500 | 0.0141 | - | - |
671
+ | 0.8473 | 39750 | 0.0155 | - | - |
672
+ | 0.8526 | 40000 | 0.0153 | 0.0106 | 0.8075 |
673
+ | 0.8580 | 40250 | 0.0131 | - | - |
674
+ | 0.8633 | 40500 | 0.0128 | - | - |
675
+ | 0.8686 | 40750 | 0.013 | - | - |
676
+ | 0.8739 | 41000 | 0.0133 | 0.0109 | 0.8060 |
677
+ | 0.8793 | 41250 | 0.0119 | - | - |
678
+ | 0.8846 | 41500 | 0.0144 | - | - |
679
+ | 0.8899 | 41750 | 0.0142 | - | - |
680
+ | 0.8953 | 42000 | 0.0138 | 0.0105 | 0.8083 |
681
+ | 0.9006 | 42250 | 0.014 | - | - |
682
+ | 0.9059 | 42500 | 0.0134 | - | - |
683
+ | 0.9112 | 42750 | 0.0134 | - | - |
684
+ | 0.9166 | 43000 | 0.0124 | 0.0106 | 0.8113 |
685
+ | 0.9219 | 43250 | 0.0122 | - | - |
686
+ | 0.9272 | 43500 | 0.0126 | - | - |
687
+ | 0.9326 | 43750 | 0.0121 | - | - |
688
+ | 0.9379 | 44000 | 0.0137 | 0.0103 | 0.8105 |
689
+ | 0.9432 | 44250 | 0.0132 | - | - |
690
+ | 0.9485 | 44500 | 0.012 | - | - |
691
+ | 0.9539 | 44750 | 0.0136 | - | - |
692
+ | 0.9592 | 45000 | 0.0133 | 0.0104 | 0.8112 |
693
+ | 0.9645 | 45250 | 0.0118 | - | - |
694
+ | 0.9699 | 45500 | 0.0132 | - | - |
695
+ | 0.9752 | 45750 | 0.0118 | - | - |
696
+ | 0.9805 | 46000 | 0.012 | 0.0102 | 0.8104 |
697
+ | 0.9858 | 46250 | 0.0127 | - | - |
698
+ | 0.9912 | 46500 | 0.0134 | - | - |
699
+ | 0.9965 | 46750 | 0.0121 | - | - |
700
+ | 1.0 | 46914 | - | - | 0.8111 |
701
+
702
+ </details>
703
+
704
+ ### Environmental Impact
705
+ Carbon emissions were measured using [CodeCarbon](https://github.com/mlco2/codecarbon).
706
+ - **Energy Consumed**: 1.637 kWh
707
+ - **Carbon Emitted**: 0.636 kg of CO2
708
+ - **Hours Used**: 4.514 hours
709
+
710
+ ### Training Hardware
711
+ - **On Cloud**: No
712
+ - **GPU Model**: 1 x NVIDIA GeForce RTX 3090
713
+ - **CPU Model**: 13th Gen Intel(R) Core(TM) i7-13700K
714
+ - **RAM Size**: 31.78 GB
715
+
716
+ ### Framework Versions
717
+ - Python: 3.11.6
718
+ - Sentence Transformers: 3.1.0.dev0
719
+ - Transformers: 4.41.2
720
+ - PyTorch: 2.3.0+cu121
721
+ - Accelerate: 0.30.1
722
+ - Datasets: 2.19.1
723
+ - Tokenizers: 0.19.1
724
+
725
+ ## Citation
726
+
727
+ ### BibTeX
728
+
729
+ #### Sentence Transformers
730
+ ```bibtex
731
+ @inproceedings{reimers-2019-sentence-bert,
732
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
733
+ author = "Reimers, Nils and Gurevych, Iryna",
734
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
735
+ month = "11",
736
+ year = "2019",
737
+ publisher = "Association for Computational Linguistics",
738
+ url = "https://arxiv.org/abs/1908.10084",
739
+ }
740
+ ```
741
+
742
+ #### MultipleNegativesRankingLoss
743
+ ```bibtex
744
+ @misc{henderson2017efficient,
745
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
746
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
747
+ year={2017},
748
+ eprint={1705.00652},
749
+ archivePrefix={arXiv},
750
+ primaryClass={cs.CL}
751
+ }
752
+ ```
753
+
754
+ <!--
755
+ ## Glossary
756
+
757
+ *Clearly define terms in order to be accessible across audiences.*
758
+ -->
759
+
760
+ <!--
761
+ ## Model Card Authors
762
+
763
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
764
+ -->
765
+
766
+ <!--
767
+ ## Model Card Contact
768
+
769
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
770
+ -->
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/mpnet-base",
3
+ "architectures": [
4
+ "MPNetModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.41.2",
23
+ "vocab_size": 30527
24
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.0.dev0",
4
+ "transformers": "4.41.2",
5
+ "pytorch": "2.3.0+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9617a7021d0321644dadcc45204c1b06303f492f2f9ad905854b265c21d9e3
3
+ size 437967672
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": true,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "mask_token": "<mask>",
58
+ "model_max_length": 512,
59
+ "pad_token": "<pad>",
60
+ "sep_token": "</s>",
61
+ "strip_accents": null,
62
+ "tokenize_chinese_chars": true,
63
+ "tokenizer_class": "MPNetTokenizer",
64
+ "unk_token": "[UNK]"
65
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff