tomaarsen HF staff commited on
Commit
3399e41
1 Parent(s): c6b0c6b

Add new SentenceTransformer model.

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,793 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: sentence-transformers
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - loss:OnlineContrastiveLoss
10
+ base_model: sentence-transformers/stsb-distilbert-base
11
+ metrics:
12
+ - cosine_accuracy
13
+ - cosine_accuracy_threshold
14
+ - cosine_f1
15
+ - cosine_f1_threshold
16
+ - cosine_precision
17
+ - cosine_recall
18
+ - cosine_ap
19
+ - dot_accuracy
20
+ - dot_accuracy_threshold
21
+ - dot_f1
22
+ - dot_f1_threshold
23
+ - dot_precision
24
+ - dot_recall
25
+ - dot_ap
26
+ - manhattan_accuracy
27
+ - manhattan_accuracy_threshold
28
+ - manhattan_f1
29
+ - manhattan_f1_threshold
30
+ - manhattan_precision
31
+ - manhattan_recall
32
+ - manhattan_ap
33
+ - euclidean_accuracy
34
+ - euclidean_accuracy_threshold
35
+ - euclidean_f1
36
+ - euclidean_f1_threshold
37
+ - euclidean_precision
38
+ - euclidean_recall
39
+ - euclidean_ap
40
+ - max_accuracy
41
+ - max_accuracy_threshold
42
+ - max_f1
43
+ - max_f1_threshold
44
+ - max_precision
45
+ - max_recall
46
+ - max_ap
47
+ - average_precision
48
+ - f1
49
+ - precision
50
+ - recall
51
+ - threshold
52
+ - cosine_accuracy@1
53
+ - cosine_accuracy@3
54
+ - cosine_accuracy@5
55
+ - cosine_accuracy@10
56
+ - cosine_precision@1
57
+ - cosine_precision@3
58
+ - cosine_precision@5
59
+ - cosine_precision@10
60
+ - cosine_recall@1
61
+ - cosine_recall@3
62
+ - cosine_recall@5
63
+ - cosine_recall@10
64
+ - cosine_ndcg@10
65
+ - cosine_mrr@10
66
+ - cosine_map@100
67
+ - dot_accuracy@1
68
+ - dot_accuracy@3
69
+ - dot_accuracy@5
70
+ - dot_accuracy@10
71
+ - dot_precision@1
72
+ - dot_precision@3
73
+ - dot_precision@5
74
+ - dot_precision@10
75
+ - dot_recall@1
76
+ - dot_recall@3
77
+ - dot_recall@5
78
+ - dot_recall@10
79
+ - dot_ndcg@10
80
+ - dot_mrr@10
81
+ - dot_map@100
82
+ widget:
83
+ - source_sentence: Why did he go MIA?
84
+ sentences:
85
+ - Why did Yahoo kill Konfabulator?
86
+ - Why do people get angry with me?
87
+ - What are the best waterproof guns?
88
+ - source_sentence: Who is a soulmate?
89
+ sentences:
90
+ - Is she the “One”?
91
+ - Who is Pakistan's biggest enemy?
92
+ - Will smoking weed help with my anxiety?
93
+ - source_sentence: Is this poem good?
94
+ sentences:
95
+ - Is my poem any good?
96
+ - How can I become a good speaker?
97
+ - What is feminism?
98
+ - source_sentence: Who invented Yoga?
99
+ sentences:
100
+ - How was yoga invented?
101
+ - Who owns this number 3152150252?
102
+ - What is Dynamics CRM Services?
103
+ - source_sentence: Is stretching bad?
104
+ sentences:
105
+ - Is stretching good for you?
106
+ - If i=0; what will i=i++ do to i?
107
+ - What is the Output of this C program ?
108
+ pipeline_tag: sentence-similarity
109
+ co2_eq_emissions:
110
+ emissions: 15.707175691967695
111
+ energy_consumed: 0.040409299905757354
112
+ source: codecarbon
113
+ training_type: fine-tuning
114
+ on_cloud: false
115
+ cpu_model: 13th Gen Intel(R) Core(TM) i7-13700K
116
+ ram_total_size: 31.777088165283203
117
+ hours_used: 0.202
118
+ hardware_used: 1 x NVIDIA GeForce RTX 3090
119
+ model-index:
120
+ - name: SentenceTransformer based on sentence-transformers/stsb-distilbert-base
121
+ results:
122
+ - task:
123
+ type: binary-classification
124
+ name: Binary Classification
125
+ dataset:
126
+ name: quora duplicates
127
+ type: quora-duplicates
128
+ metrics:
129
+ - type: cosine_accuracy
130
+ value: 0.86
131
+ name: Cosine Accuracy
132
+ - type: cosine_accuracy_threshold
133
+ value: 0.8104104995727539
134
+ name: Cosine Accuracy Threshold
135
+ - type: cosine_f1
136
+ value: 0.8250591016548463
137
+ name: Cosine F1
138
+ - type: cosine_f1_threshold
139
+ value: 0.7247534394264221
140
+ name: Cosine F1 Threshold
141
+ - type: cosine_precision
142
+ value: 0.7347368421052631
143
+ name: Cosine Precision
144
+ - type: cosine_recall
145
+ value: 0.9407008086253369
146
+ name: Cosine Recall
147
+ - type: cosine_ap
148
+ value: 0.887247904332921
149
+ name: Cosine Ap
150
+ - type: dot_accuracy
151
+ value: 0.828
152
+ name: Dot Accuracy
153
+ - type: dot_accuracy_threshold
154
+ value: 157.35491943359375
155
+ name: Dot Accuracy Threshold
156
+ - type: dot_f1
157
+ value: 0.7898550724637681
158
+ name: Dot F1
159
+ - type: dot_f1_threshold
160
+ value: 145.7113037109375
161
+ name: Dot F1 Threshold
162
+ - type: dot_precision
163
+ value: 0.7155361050328227
164
+ name: Dot Precision
165
+ - type: dot_recall
166
+ value: 0.8814016172506739
167
+ name: Dot Recall
168
+ - type: dot_ap
169
+ value: 0.8369433397850002
170
+ name: Dot Ap
171
+ - type: manhattan_accuracy
172
+ value: 0.868
173
+ name: Manhattan Accuracy
174
+ - type: manhattan_accuracy_threshold
175
+ value: 208.00347900390625
176
+ name: Manhattan Accuracy Threshold
177
+ - type: manhattan_f1
178
+ value: 0.8307692307692308
179
+ name: Manhattan F1
180
+ - type: manhattan_f1_threshold
181
+ value: 208.00347900390625
182
+ name: Manhattan F1 Threshold
183
+ - type: manhattan_precision
184
+ value: 0.7921760391198044
185
+ name: Manhattan Precision
186
+ - type: manhattan_recall
187
+ value: 0.8733153638814016
188
+ name: Manhattan Recall
189
+ - type: manhattan_ap
190
+ value: 0.8868217413983182
191
+ name: Manhattan Ap
192
+ - type: euclidean_accuracy
193
+ value: 0.867
194
+ name: Euclidean Accuracy
195
+ - type: euclidean_accuracy_threshold
196
+ value: 9.269388198852539
197
+ name: Euclidean Accuracy Threshold
198
+ - type: euclidean_f1
199
+ value: 0.8301404853128991
200
+ name: Euclidean F1
201
+ - type: euclidean_f1_threshold
202
+ value: 9.525729179382324
203
+ name: Euclidean F1 Threshold
204
+ - type: euclidean_precision
205
+ value: 0.7888349514563107
206
+ name: Euclidean Precision
207
+ - type: euclidean_recall
208
+ value: 0.876010781671159
209
+ name: Euclidean Recall
210
+ - type: euclidean_ap
211
+ value: 0.8884154240019244
212
+ name: Euclidean Ap
213
+ - type: max_accuracy
214
+ value: 0.868
215
+ name: Max Accuracy
216
+ - type: max_accuracy_threshold
217
+ value: 208.00347900390625
218
+ name: Max Accuracy Threshold
219
+ - type: max_f1
220
+ value: 0.8307692307692308
221
+ name: Max F1
222
+ - type: max_f1_threshold
223
+ value: 208.00347900390625
224
+ name: Max F1 Threshold
225
+ - type: max_precision
226
+ value: 0.7921760391198044
227
+ name: Max Precision
228
+ - type: max_recall
229
+ value: 0.9407008086253369
230
+ name: Max Recall
231
+ - type: max_ap
232
+ value: 0.8884154240019244
233
+ name: Max Ap
234
+ - task:
235
+ type: paraphrase-mining
236
+ name: Paraphrase Mining
237
+ dataset:
238
+ name: quora duplicates dev
239
+ type: quora-duplicates-dev
240
+ metrics:
241
+ - type: average_precision
242
+ value: 0.534436244125929
243
+ name: Average Precision
244
+ - type: f1
245
+ value: 0.5447997274541295
246
+ name: F1
247
+ - type: precision
248
+ value: 0.5311002514589362
249
+ name: Precision
250
+ - type: recall
251
+ value: 0.5592246590398161
252
+ name: Recall
253
+ - type: threshold
254
+ value: 0.8626040816307068
255
+ name: Threshold
256
+ - task:
257
+ type: information-retrieval
258
+ name: Information Retrieval
259
+ dataset:
260
+ name: Unknown
261
+ type: unknown
262
+ metrics:
263
+ - type: cosine_accuracy@1
264
+ value: 0.928
265
+ name: Cosine Accuracy@1
266
+ - type: cosine_accuracy@3
267
+ value: 0.9712
268
+ name: Cosine Accuracy@3
269
+ - type: cosine_accuracy@5
270
+ value: 0.9782
271
+ name: Cosine Accuracy@5
272
+ - type: cosine_accuracy@10
273
+ value: 0.9874
274
+ name: Cosine Accuracy@10
275
+ - type: cosine_precision@1
276
+ value: 0.928
277
+ name: Cosine Precision@1
278
+ - type: cosine_precision@3
279
+ value: 0.4151333333333334
280
+ name: Cosine Precision@3
281
+ - type: cosine_precision@5
282
+ value: 0.26656
283
+ name: Cosine Precision@5
284
+ - type: cosine_precision@10
285
+ value: 0.14166
286
+ name: Cosine Precision@10
287
+ - type: cosine_recall@1
288
+ value: 0.7993523853760618
289
+ name: Cosine Recall@1
290
+ - type: cosine_recall@3
291
+ value: 0.9341884771405065
292
+ name: Cosine Recall@3
293
+ - type: cosine_recall@5
294
+ value: 0.9560896250710075
295
+ name: Cosine Recall@5
296
+ - type: cosine_recall@10
297
+ value: 0.9766088525134997
298
+ name: Cosine Recall@10
299
+ - type: cosine_ndcg@10
300
+ value: 0.9516150309696244
301
+ name: Cosine Ndcg@10
302
+ - type: cosine_mrr@10
303
+ value: 0.9509392857142857
304
+ name: Cosine Mrr@10
305
+ - type: cosine_map@100
306
+ value: 0.9390263696194139
307
+ name: Cosine Map@100
308
+ - type: dot_accuracy@1
309
+ value: 0.8926
310
+ name: Dot Accuracy@1
311
+ - type: dot_accuracy@3
312
+ value: 0.9518
313
+ name: Dot Accuracy@3
314
+ - type: dot_accuracy@5
315
+ value: 0.9658
316
+ name: Dot Accuracy@5
317
+ - type: dot_accuracy@10
318
+ value: 0.9768
319
+ name: Dot Accuracy@10
320
+ - type: dot_precision@1
321
+ value: 0.8926
322
+ name: Dot Precision@1
323
+ - type: dot_precision@3
324
+ value: 0.40273333333333333
325
+ name: Dot Precision@3
326
+ - type: dot_precision@5
327
+ value: 0.26076
328
+ name: Dot Precision@5
329
+ - type: dot_precision@10
330
+ value: 0.13882
331
+ name: Dot Precision@10
332
+ - type: dot_recall@1
333
+ value: 0.7679620996617761
334
+ name: Dot Recall@1
335
+ - type: dot_recall@3
336
+ value: 0.9105756956997251
337
+ name: Dot Recall@3
338
+ - type: dot_recall@5
339
+ value: 0.9402185219519044
340
+ name: Dot Recall@5
341
+ - type: dot_recall@10
342
+ value: 0.9623418143294613
343
+ name: Dot Recall@10
344
+ - type: dot_ndcg@10
345
+ value: 0.9263520741106431
346
+ name: Dot Ndcg@10
347
+ - type: dot_mrr@10
348
+ value: 0.9243020634920638
349
+ name: Dot Mrr@10
350
+ - type: dot_map@100
351
+ value: 0.9094019438194247
352
+ name: Dot Map@100
353
+ ---
354
+
355
+ # SentenceTransformer based on sentence-transformers/stsb-distilbert-base
356
+
357
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/stsb-distilbert-base](https://huggingface.co/sentence-transformers/stsb-distilbert-base) on the [sentence-transformers/quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
358
+
359
+ ## Model Details
360
+
361
+ ### Model Description
362
+ - **Model Type:** Sentence Transformer
363
+ - **Base model:** [sentence-transformers/stsb-distilbert-base](https://huggingface.co/sentence-transformers/stsb-distilbert-base) <!-- at revision 82ad392c08f81be9be9bf065339670b23f2e1493 -->
364
+ - **Maximum Sequence Length:** 128 tokens
365
+ - **Output Dimensionality:** 768 tokens
366
+ - **Similarity Function:** Cosine Similarity
367
+ - **Training Dataset:**
368
+ - [sentence-transformers/quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates)
369
+ - **Language:** en
370
+ <!-- - **License:** Unknown -->
371
+
372
+ ### Model Sources
373
+
374
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
375
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
376
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
377
+
378
+ ### Full Model Architecture
379
+
380
+ ```
381
+ SentenceTransformer(
382
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: DistilBertModel
383
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
384
+ )
385
+ ```
386
+
387
+ ## Usage
388
+
389
+ ### Direct Usage (Sentence Transformers)
390
+
391
+ First install the Sentence Transformers library:
392
+
393
+ ```bash
394
+ pip install -U sentence-transformers
395
+ ```
396
+
397
+ Then you can load this model and run inference.
398
+ ```python
399
+ from sentence_transformers import SentenceTransformer
400
+
401
+ # Download from the 🤗 Hub
402
+ model = SentenceTransformer("tomaarsen/stsb-distilbert-base-ocl")
403
+ # Run inference
404
+ sentences = [
405
+ 'Is stretching bad?',
406
+ 'Is stretching good for you?',
407
+ 'If i=0; what will i=i++ do to i?',
408
+ ]
409
+ embeddings = model.encode(sentences)
410
+ print(embeddings.shape)
411
+ # [3, 768]
412
+
413
+ # Get the similarity scores for the embeddings
414
+ similarities = model.similarity(embeddings)
415
+ print(similarities.shape)
416
+ # [3, 3]
417
+ ```
418
+
419
+ <!--
420
+ ### Direct Usage (Transformers)
421
+
422
+ <details><summary>Click to see the direct usage in Transformers</summary>
423
+
424
+ </details>
425
+ -->
426
+
427
+ <!--
428
+ ### Downstream Usage (Sentence Transformers)
429
+
430
+ You can finetune this model on your own dataset.
431
+
432
+ <details><summary>Click to expand</summary>
433
+
434
+ </details>
435
+ -->
436
+
437
+ <!--
438
+ ### Out-of-Scope Use
439
+
440
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
441
+ -->
442
+
443
+ ## Evaluation
444
+
445
+ ### Metrics
446
+
447
+ #### Binary Classification
448
+ * Dataset: `quora-duplicates`
449
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
450
+
451
+ | Metric | Value |
452
+ |:-----------------------------|:-----------|
453
+ | cosine_accuracy | 0.86 |
454
+ | cosine_accuracy_threshold | 0.8104 |
455
+ | cosine_f1 | 0.8251 |
456
+ | cosine_f1_threshold | 0.7248 |
457
+ | cosine_precision | 0.7347 |
458
+ | cosine_recall | 0.9407 |
459
+ | cosine_ap | 0.8872 |
460
+ | dot_accuracy | 0.828 |
461
+ | dot_accuracy_threshold | 157.3549 |
462
+ | dot_f1 | 0.7899 |
463
+ | dot_f1_threshold | 145.7113 |
464
+ | dot_precision | 0.7155 |
465
+ | dot_recall | 0.8814 |
466
+ | dot_ap | 0.8369 |
467
+ | manhattan_accuracy | 0.868 |
468
+ | manhattan_accuracy_threshold | 208.0035 |
469
+ | manhattan_f1 | 0.8308 |
470
+ | manhattan_f1_threshold | 208.0035 |
471
+ | manhattan_precision | 0.7922 |
472
+ | manhattan_recall | 0.8733 |
473
+ | manhattan_ap | 0.8868 |
474
+ | euclidean_accuracy | 0.867 |
475
+ | euclidean_accuracy_threshold | 9.2694 |
476
+ | euclidean_f1 | 0.8301 |
477
+ | euclidean_f1_threshold | 9.5257 |
478
+ | euclidean_precision | 0.7888 |
479
+ | euclidean_recall | 0.876 |
480
+ | euclidean_ap | 0.8884 |
481
+ | max_accuracy | 0.868 |
482
+ | max_accuracy_threshold | 208.0035 |
483
+ | max_f1 | 0.8308 |
484
+ | max_f1_threshold | 208.0035 |
485
+ | max_precision | 0.7922 |
486
+ | max_recall | 0.9407 |
487
+ | **max_ap** | **0.8884** |
488
+
489
+ #### Paraphrase Mining
490
+ * Dataset: `quora-duplicates-dev`
491
+ * Evaluated with [<code>ParaphraseMiningEvaluator</code>](https://sbert.net/docs/package_reference/evaluation.html#sentence_transformers.evaluation.ParaphraseMiningEvaluator)
492
+
493
+ | Metric | Value |
494
+ |:----------------------|:-----------|
495
+ | **average_precision** | **0.5344** |
496
+ | f1 | 0.5448 |
497
+ | precision | 0.5311 |
498
+ | recall | 0.5592 |
499
+ | threshold | 0.8626 |
500
+
501
+ #### Information Retrieval
502
+
503
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
504
+
505
+ | Metric | Value |
506
+ |:--------------------|:----------|
507
+ | cosine_accuracy@1 | 0.928 |
508
+ | cosine_accuracy@3 | 0.9712 |
509
+ | cosine_accuracy@5 | 0.9782 |
510
+ | cosine_accuracy@10 | 0.9874 |
511
+ | cosine_precision@1 | 0.928 |
512
+ | cosine_precision@3 | 0.4151 |
513
+ | cosine_precision@5 | 0.2666 |
514
+ | cosine_precision@10 | 0.1417 |
515
+ | cosine_recall@1 | 0.7994 |
516
+ | cosine_recall@3 | 0.9342 |
517
+ | cosine_recall@5 | 0.9561 |
518
+ | cosine_recall@10 | 0.9766 |
519
+ | cosine_ndcg@10 | 0.9516 |
520
+ | cosine_mrr@10 | 0.9509 |
521
+ | **cosine_map@100** | **0.939** |
522
+ | dot_accuracy@1 | 0.8926 |
523
+ | dot_accuracy@3 | 0.9518 |
524
+ | dot_accuracy@5 | 0.9658 |
525
+ | dot_accuracy@10 | 0.9768 |
526
+ | dot_precision@1 | 0.8926 |
527
+ | dot_precision@3 | 0.4027 |
528
+ | dot_precision@5 | 0.2608 |
529
+ | dot_precision@10 | 0.1388 |
530
+ | dot_recall@1 | 0.768 |
531
+ | dot_recall@3 | 0.9106 |
532
+ | dot_recall@5 | 0.9402 |
533
+ | dot_recall@10 | 0.9623 |
534
+ | dot_ndcg@10 | 0.9264 |
535
+ | dot_mrr@10 | 0.9243 |
536
+ | dot_map@100 | 0.9094 |
537
+
538
+ <!--
539
+ ## Bias, Risks and Limitations
540
+
541
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
542
+ -->
543
+
544
+ <!--
545
+ ### Recommendations
546
+
547
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
548
+ -->
549
+
550
+ ## Training Details
551
+
552
+ ### Training Dataset
553
+
554
+ #### sentence-transformers/quora-duplicates
555
+
556
+ * Dataset: [sentence-transformers/quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb)
557
+ * Size: 100,000 training samples
558
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
559
+ * Approximate statistics based on the first 1000 samples:
560
+ | | sentence1 | sentence2 | label |
561
+ |:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
562
+ | type | string | string | int |
563
+ | details | <ul><li>min: 6 tokens</li><li>mean: 15.5 tokens</li><li>max: 45 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.46 tokens</li><li>max: 78 tokens</li></ul> | <ul><li>0: ~64.10%</li><li>1: ~35.90%</li></ul> |
564
+ * Samples:
565
+ | sentence1 | sentence2 | label |
566
+ |:---------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------|
567
+ | <code>What are the best ecommerce blogs to do guest posts on about SEO to gain new clients?</code> | <code>Interested in being a guest blogger for an ecommerce marketing blog?</code> | <code>0</code> |
568
+ | <code>How do I learn Informatica online training?</code> | <code>What is Informatica online training?</code> | <code>0</code> |
569
+ | <code>What effects does marijuana use have on the flu?</code> | <code>What effects does Marijuana use have on the common cold?</code> | <code>0</code> |
570
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/losses.html#onlinecontrastiveloss)
571
+
572
+ ### Evaluation Dataset
573
+
574
+ #### sentence-transformers/quora-duplicates
575
+
576
+ * Dataset: [sentence-transformers/quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb)
577
+ * Size: 1,000 evaluation samples
578
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
579
+ * Approximate statistics based on the first 1000 samples:
580
+ | | sentence1 | sentence2 | label |
581
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
582
+ | type | string | string | int |
583
+ | details | <ul><li>min: 6 tokens</li><li>mean: 15.82 tokens</li><li>max: 46 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.91 tokens</li><li>max: 72 tokens</li></ul> | <ul><li>0: ~62.90%</li><li>1: ~37.10%</li></ul> |
584
+ * Samples:
585
+ | sentence1 | sentence2 | label |
586
+ |:------------------------------------------------------|:---------------------------------------------------|:---------------|
587
+ | <code>How should I prepare for JEE Mains 2017?</code> | <code>How do I prepare for the JEE 2016?</code> | <code>0</code> |
588
+ | <code>What is the gate exam?</code> | <code>What is the GATE exam in engineering?</code> | <code>0</code> |
589
+ | <code>Where do IRS officers get posted?</code> | <code>Does IRS Officers get posted abroad?</code> | <code>0</code> |
590
+ * Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/losses.html#onlinecontrastiveloss)
591
+
592
+ ### Training Hyperparameters
593
+ #### Non-Default Hyperparameters
594
+
595
+ - `eval_strategy`: steps
596
+ - `per_device_train_batch_size`: 64
597
+ - `per_device_eval_batch_size`: 64
598
+ - `num_train_epochs`: 1
599
+ - `warmup_ratio`: 0.1
600
+ - `fp16`: True
601
+ - `batch_sampler`: no_duplicates
602
+
603
+ #### All Hyperparameters
604
+ <details><summary>Click to expand</summary>
605
+
606
+ - `overwrite_output_dir`: False
607
+ - `do_predict`: False
608
+ - `eval_strategy`: steps
609
+ - `prediction_loss_only`: False
610
+ - `per_device_train_batch_size`: 64
611
+ - `per_device_eval_batch_size`: 64
612
+ - `per_gpu_train_batch_size`: None
613
+ - `per_gpu_eval_batch_size`: None
614
+ - `gradient_accumulation_steps`: 1
615
+ - `eval_accumulation_steps`: None
616
+ - `learning_rate`: 5e-05
617
+ - `weight_decay`: 0.0
618
+ - `adam_beta1`: 0.9
619
+ - `adam_beta2`: 0.999
620
+ - `adam_epsilon`: 1e-08
621
+ - `max_grad_norm`: 1.0
622
+ - `num_train_epochs`: 1
623
+ - `max_steps`: -1
624
+ - `lr_scheduler_type`: linear
625
+ - `lr_scheduler_kwargs`: {}
626
+ - `warmup_ratio`: 0.1
627
+ - `warmup_steps`: 0
628
+ - `log_level`: passive
629
+ - `log_level_replica`: warning
630
+ - `log_on_each_node`: True
631
+ - `logging_nan_inf_filter`: True
632
+ - `save_safetensors`: True
633
+ - `save_on_each_node`: False
634
+ - `save_only_model`: False
635
+ - `no_cuda`: False
636
+ - `use_cpu`: False
637
+ - `use_mps_device`: False
638
+ - `seed`: 42
639
+ - `data_seed`: None
640
+ - `jit_mode_eval`: False
641
+ - `use_ipex`: False
642
+ - `bf16`: False
643
+ - `fp16`: True
644
+ - `fp16_opt_level`: O1
645
+ - `half_precision_backend`: auto
646
+ - `bf16_full_eval`: False
647
+ - `fp16_full_eval`: False
648
+ - `tf32`: None
649
+ - `local_rank`: 0
650
+ - `ddp_backend`: None
651
+ - `tpu_num_cores`: None
652
+ - `tpu_metrics_debug`: False
653
+ - `debug`: []
654
+ - `dataloader_drop_last`: False
655
+ - `dataloader_num_workers`: 0
656
+ - `dataloader_prefetch_factor`: None
657
+ - `past_index`: -1
658
+ - `disable_tqdm`: False
659
+ - `remove_unused_columns`: True
660
+ - `label_names`: None
661
+ - `load_best_model_at_end`: False
662
+ - `ignore_data_skip`: False
663
+ - `fsdp`: []
664
+ - `fsdp_min_num_params`: 0
665
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
666
+ - `fsdp_transformer_layer_cls_to_wrap`: None
667
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
668
+ - `deepspeed`: None
669
+ - `label_smoothing_factor`: 0.0
670
+ - `optim`: adamw_torch
671
+ - `optim_args`: None
672
+ - `adafactor`: False
673
+ - `group_by_length`: False
674
+ - `length_column_name`: length
675
+ - `ddp_find_unused_parameters`: None
676
+ - `ddp_bucket_cap_mb`: None
677
+ - `ddp_broadcast_buffers`: None
678
+ - `dataloader_pin_memory`: True
679
+ - `dataloader_persistent_workers`: False
680
+ - `skip_memory_metrics`: True
681
+ - `use_legacy_prediction_loop`: False
682
+ - `push_to_hub`: False
683
+ - `resume_from_checkpoint`: None
684
+ - `hub_model_id`: None
685
+ - `hub_strategy`: every_save
686
+ - `hub_private_repo`: False
687
+ - `hub_always_push`: False
688
+ - `gradient_checkpointing`: False
689
+ - `gradient_checkpointing_kwargs`: None
690
+ - `include_inputs_for_metrics`: False
691
+ - `eval_do_concat_batches`: True
692
+ - `fp16_backend`: auto
693
+ - `push_to_hub_model_id`: None
694
+ - `push_to_hub_organization`: None
695
+ - `mp_parameters`:
696
+ - `auto_find_batch_size`: False
697
+ - `full_determinism`: False
698
+ - `torchdynamo`: None
699
+ - `ray_scope`: last
700
+ - `ddp_timeout`: 1800
701
+ - `torch_compile`: False
702
+ - `torch_compile_backend`: None
703
+ - `torch_compile_mode`: None
704
+ - `dispatch_batches`: None
705
+ - `split_batches`: None
706
+ - `include_tokens_per_second`: False
707
+ - `include_num_input_tokens_seen`: False
708
+ - `neftune_noise_alpha`: None
709
+ - `optim_target_modules`: None
710
+ - `batch_sampler`: no_duplicates
711
+ - `multi_dataset_batch_sampler`: proportional
712
+
713
+ </details>
714
+
715
+ ### Training Logs
716
+ | Epoch | Step | Training Loss | loss | cosine_map@100 | quora-duplicates-dev_average_precision | quora-duplicates_max_ap |
717
+ |:------:|:----:|:-------------:|:------:|:--------------:|:--------------------------------------:|:-----------------------:|
718
+ | 0 | 0 | - | - | 0.9235 | 0.4200 | 0.7276 |
719
+ | 0.0640 | 100 | 2.5123 | - | - | - | - |
720
+ | 0.1280 | 200 | 2.0534 | - | - | - | - |
721
+ | 0.1599 | 250 | - | 1.7914 | 0.9127 | 0.4082 | 0.8301 |
722
+ | 0.1919 | 300 | 1.9505 | - | - | - | - |
723
+ | 0.2559 | 400 | 1.9836 | - | - | - | - |
724
+ | 0.3199 | 500 | 1.8462 | 1.5923 | 0.9190 | 0.4445 | 0.8688 |
725
+ | 0.3839 | 600 | 1.7734 | - | - | - | - |
726
+ | 0.4479 | 700 | 1.7918 | - | - | - | - |
727
+ | 0.4798 | 750 | - | 1.5461 | 0.9291 | 0.4943 | 0.8707 |
728
+ | 0.5118 | 800 | 1.6157 | - | - | - | - |
729
+ | 0.5758 | 900 | 1.7244 | - | - | - | - |
730
+ | 0.6398 | 1000 | 1.7322 | 1.5294 | 0.9309 | 0.5048 | 0.8808 |
731
+ | 0.7038 | 1100 | 1.6825 | - | - | - | - |
732
+ | 0.7678 | 1200 | 1.6823 | - | - | - | - |
733
+ | 0.7997 | 1250 | - | 1.4812 | 0.9351 | 0.5126 | 0.8865 |
734
+ | 0.8317 | 1300 | 1.5707 | - | - | - | - |
735
+ | 0.8957 | 1400 | 1.6145 | - | - | - | - |
736
+ | 0.9597 | 1500 | 1.5795 | 1.4705 | 0.9390 | 0.5344 | 0.8884 |
737
+
738
+
739
+ ### Environmental Impact
740
+ Carbon emissions were measured using [CodeCarbon](https://github.com/mlco2/codecarbon).
741
+ - **Energy Consumed**: 0.040 kWh
742
+ - **Carbon Emitted**: 0.016 kg of CO2
743
+ - **Hours Used**: 0.202 hours
744
+
745
+ ### Training Hardware
746
+ - **On Cloud**: No
747
+ - **GPU Model**: 1 x NVIDIA GeForce RTX 3090
748
+ - **CPU Model**: 13th Gen Intel(R) Core(TM) i7-13700K
749
+ - **RAM Size**: 31.78 GB
750
+
751
+ ### Framework Versions
752
+ - Python: 3.11.6
753
+ - Sentence Transformers: 3.0.0.dev0
754
+ - Transformers: 4.41.0.dev0
755
+ - PyTorch: 2.3.0+cu121
756
+ - Accelerate: 0.26.1
757
+ - Datasets: 2.18.0
758
+ - Tokenizers: 0.19.1
759
+
760
+ ## Citation
761
+
762
+ ### BibTeX
763
+
764
+ #### Sentence Transformers
765
+ ```bibtex
766
+ @inproceedings{reimers-2019-sentence-bert,
767
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
768
+ author = "Reimers, Nils and Gurevych, Iryna",
769
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
770
+ month = "11",
771
+ year = "2019",
772
+ publisher = "Association for Computational Linguistics",
773
+ url = "https://arxiv.org/abs/1908.10084",
774
+ }
775
+ ```
776
+
777
+ <!--
778
+ ## Glossary
779
+
780
+ *Clearly define terms in order to be accessible across audiences.*
781
+ -->
782
+
783
+ <!--
784
+ ## Model Card Authors
785
+
786
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
787
+ -->
788
+
789
+ <!--
790
+ ## Model Card Contact
791
+
792
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
793
+ -->
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/stsb-distilbert-base",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertModel"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "qa_dropout": 0.1,
18
+ "seq_classif_dropout": 0.2,
19
+ "sinusoidal_pos_embds": false,
20
+ "tie_weights_": true,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.41.0.dev0",
23
+ "vocab_size": 30522
24
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.7.0",
5
+ "pytorch": "1.9.0+cu102"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d03b2524152e39f65f10176d2e5fa7b0f261cf9b9a1e7f66c3d49829099318c
3
+ size 265462608
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "full_tokenizer_file": null,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 128,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "DistilBertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff