Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +356 -0
- config.json +26 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
3 |
+
datasets: []
|
4 |
+
language: []
|
5 |
+
library_name: sentence-transformers
|
6 |
+
pipeline_tag: sentence-similarity
|
7 |
+
tags:
|
8 |
+
- sentence-transformers
|
9 |
+
- sentence-similarity
|
10 |
+
- feature-extraction
|
11 |
+
- generated_from_trainer
|
12 |
+
- dataset_size:2860
|
13 |
+
- loss:CosineSimilarityLoss
|
14 |
+
widget:
|
15 |
+
- source_sentence: No, it is not true. The sex chromosomes of the father determine
|
16 |
+
the sex of an unborn baby, not the mother.
|
17 |
+
sentences:
|
18 |
+
- The wall of the uterus expands outward like a balloon during ovum maturation.
|
19 |
+
- The mother's emotional state during pregnancy can influence the sex of the baby,
|
20 |
+
making her solely responsible for determining it.
|
21 |
+
- Six
|
22 |
+
- source_sentence: Answer not found in response.
|
23 |
+
sentences:
|
24 |
+
- nan
|
25 |
+
- In living organisms, cells are likened to bricks in a building due to their role
|
26 |
+
as structural components.
|
27 |
+
- Plant cells exclusively house chloroplasts as they play a crucial role in converting
|
28 |
+
sunlight into energy for plants through the process of photosynthesis. These specialized
|
29 |
+
organelles possess chlorophyll, a green pigment essential for absorbing light
|
30 |
+
energy.
|
31 |
+
- source_sentence: The organelles found in the cytoplasm of a cell include mitochondria,
|
32 |
+
golgi bodies, ribosomes, and other components.
|
33 |
+
sentences:
|
34 |
+
- Examples of diseases that vaccines offer protection from are cholera, tuberculosis,
|
35 |
+
smallpox, and hepatitis.
|
36 |
+
- Having a balanced diet helps regulate the levels of fairy dust in the body, which
|
37 |
+
indirectly impacts reproductive health.
|
38 |
+
- Mitochondria, golgi bodies, ribosomes, and various other structures are present
|
39 |
+
in the cytoplasm of a cell.
|
40 |
+
- source_sentence: The basic practices of crop production include preparation of soil,
|
41 |
+
sowing, adding manure and fertilizers, irrigation, protecting from weeds, harvesting,
|
42 |
+
and storage.
|
43 |
+
sentences:
|
44 |
+
- You can see miniature plants growing inside the water droplet.
|
45 |
+
- Changes in their natural surroundings, such as deforestation and desertification,
|
46 |
+
cause migratory birds to fly to distant areas, impacting their access to food,
|
47 |
+
places for breeding, and the overall ecosystem.
|
48 |
+
- Essential tasks involved in crop cultivation consist of priming the soil, planting
|
49 |
+
seeds, applying fertilizers and manure, providing water, preventing weed growth,
|
50 |
+
collecting the crops, and storing them.
|
51 |
+
- source_sentence: The embryo gets embedded in the wall of the uterus for further
|
52 |
+
development after fertilisation.
|
53 |
+
sentences:
|
54 |
+
- By recycling paper, the need for harvesting trees for paper production can be
|
55 |
+
significantly reduced, leading to conservation of trees, energy, and water, as
|
56 |
+
well as minimizing the use of harmful chemicals in the paper-making process.
|
57 |
+
- In the rainy season, if you examine moist bread, you may see greyish white spots
|
58 |
+
that are adorned with minuscule, black circular shapes, believed to be microorganisms
|
59 |
+
that have thrived on the bread.
|
60 |
+
- Following fertilization, the embryo attaches to the uterine wall to progress in
|
61 |
+
its development.
|
62 |
+
---
|
63 |
+
|
64 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
65 |
+
|
66 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
67 |
+
|
68 |
+
## Model Details
|
69 |
+
|
70 |
+
### Model Description
|
71 |
+
- **Model Type:** Sentence Transformer
|
72 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision 8b3219a92973c328a8e22fadcfa821b5dc75636a -->
|
73 |
+
- **Maximum Sequence Length:** 256 tokens
|
74 |
+
- **Output Dimensionality:** 384 tokens
|
75 |
+
- **Similarity Function:** Cosine Similarity
|
76 |
+
<!-- - **Training Dataset:** Unknown -->
|
77 |
+
<!-- - **Language:** Unknown -->
|
78 |
+
<!-- - **License:** Unknown -->
|
79 |
+
|
80 |
+
### Model Sources
|
81 |
+
|
82 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
83 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
84 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
85 |
+
|
86 |
+
### Full Model Architecture
|
87 |
+
|
88 |
+
```
|
89 |
+
SentenceTransformer(
|
90 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
91 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
92 |
+
(2): Normalize()
|
93 |
+
)
|
94 |
+
```
|
95 |
+
|
96 |
+
## Usage
|
97 |
+
|
98 |
+
### Direct Usage (Sentence Transformers)
|
99 |
+
|
100 |
+
First install the Sentence Transformers library:
|
101 |
+
|
102 |
+
```bash
|
103 |
+
pip install -U sentence-transformers
|
104 |
+
```
|
105 |
+
|
106 |
+
Then you can load this model and run inference.
|
107 |
+
```python
|
108 |
+
from sentence_transformers import SentenceTransformer
|
109 |
+
|
110 |
+
# Download from the 🤗 Hub
|
111 |
+
model = SentenceTransformer("msamg/sts_qna_model")
|
112 |
+
# Run inference
|
113 |
+
sentences = [
|
114 |
+
'The embryo gets embedded in the wall of the uterus for further development after fertilisation.',
|
115 |
+
'Following fertilization, the embryo attaches to the uterine wall to progress in its development.',
|
116 |
+
'By recycling paper, the need for harvesting trees for paper production can be significantly reduced, leading to conservation of trees, energy, and water, as well as minimizing the use of harmful chemicals in the paper-making process.',
|
117 |
+
]
|
118 |
+
embeddings = model.encode(sentences)
|
119 |
+
print(embeddings.shape)
|
120 |
+
# [3, 384]
|
121 |
+
|
122 |
+
# Get the similarity scores for the embeddings
|
123 |
+
similarities = model.similarity(embeddings, embeddings)
|
124 |
+
print(similarities.shape)
|
125 |
+
# [3, 3]
|
126 |
+
```
|
127 |
+
|
128 |
+
<!--
|
129 |
+
### Direct Usage (Transformers)
|
130 |
+
|
131 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
132 |
+
|
133 |
+
</details>
|
134 |
+
-->
|
135 |
+
|
136 |
+
<!--
|
137 |
+
### Downstream Usage (Sentence Transformers)
|
138 |
+
|
139 |
+
You can finetune this model on your own dataset.
|
140 |
+
|
141 |
+
<details><summary>Click to expand</summary>
|
142 |
+
|
143 |
+
</details>
|
144 |
+
-->
|
145 |
+
|
146 |
+
<!--
|
147 |
+
### Out-of-Scope Use
|
148 |
+
|
149 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
150 |
+
-->
|
151 |
+
|
152 |
+
<!--
|
153 |
+
## Bias, Risks and Limitations
|
154 |
+
|
155 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
156 |
+
-->
|
157 |
+
|
158 |
+
<!--
|
159 |
+
### Recommendations
|
160 |
+
|
161 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
162 |
+
-->
|
163 |
+
|
164 |
+
## Training Details
|
165 |
+
|
166 |
+
### Training Dataset
|
167 |
+
|
168 |
+
#### Unnamed Dataset
|
169 |
+
|
170 |
+
|
171 |
+
* Size: 2,860 training samples
|
172 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
173 |
+
* Approximate statistics based on the first 1000 samples:
|
174 |
+
| | sentence_0 | sentence_1 | label |
|
175 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
176 |
+
| type | string | string | float |
|
177 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 40.09 tokens</li><li>max: 225 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 26.95 tokens</li><li>max: 112 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.41</li><li>max: 1.0</li></ul> |
|
178 |
+
* Samples:
|
179 |
+
| sentence_0 | sentence_1 | label |
|
180 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
181 |
+
| <code>To identify the cell membrane, cytoplasm, and nucleus under a microscope when observing cheek cells, you can look for the cell membrane as the outer boundary of the cell, the cytoplasm which is the jelly-like substance between the cell membrane and the nucleus, and the nucleus which is usually darker and located in the center of the cell. Additionally, remember that animal cells do not have a cell wall.</code> | <code>When examining cheek cells under a microscope, you should be able to distinguish the cell membrane, which forms the outer layer, the cytoplasm, which is a gel-like material surrounding the nucleus, and the nucleus, located centrally and typically darker in appearance. It's important to note that animal cells lack a cell wall.</code> | <code>1.0</code> |
|
182 |
+
| <code>The development of the embryo in oviparous animals takes place inside the egg shell.</code> | <code>The development of the embryo in oviparous animals takes place in the mother's pouch.</code> | <code>0.0</code> |
|
183 |
+
| <code>Answer not found in response.</code> | <code>nan</code> | <code>1.0</code> |
|
184 |
+
* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
|
185 |
+
```json
|
186 |
+
{
|
187 |
+
"loss_fct": "torch.nn.modules.loss.MSELoss"
|
188 |
+
}
|
189 |
+
```
|
190 |
+
|
191 |
+
### Training Hyperparameters
|
192 |
+
#### Non-Default Hyperparameters
|
193 |
+
|
194 |
+
- `per_device_train_batch_size`: 16
|
195 |
+
- `per_device_eval_batch_size`: 16
|
196 |
+
- `num_train_epochs`: 1
|
197 |
+
- `multi_dataset_batch_sampler`: round_robin
|
198 |
+
|
199 |
+
#### All Hyperparameters
|
200 |
+
<details><summary>Click to expand</summary>
|
201 |
+
|
202 |
+
- `overwrite_output_dir`: False
|
203 |
+
- `do_predict`: False
|
204 |
+
- `eval_strategy`: no
|
205 |
+
- `prediction_loss_only`: True
|
206 |
+
- `per_device_train_batch_size`: 16
|
207 |
+
- `per_device_eval_batch_size`: 16
|
208 |
+
- `per_gpu_train_batch_size`: None
|
209 |
+
- `per_gpu_eval_batch_size`: None
|
210 |
+
- `gradient_accumulation_steps`: 1
|
211 |
+
- `eval_accumulation_steps`: None
|
212 |
+
- `learning_rate`: 5e-05
|
213 |
+
- `weight_decay`: 0.0
|
214 |
+
- `adam_beta1`: 0.9
|
215 |
+
- `adam_beta2`: 0.999
|
216 |
+
- `adam_epsilon`: 1e-08
|
217 |
+
- `max_grad_norm`: 1
|
218 |
+
- `num_train_epochs`: 1
|
219 |
+
- `max_steps`: -1
|
220 |
+
- `lr_scheduler_type`: linear
|
221 |
+
- `lr_scheduler_kwargs`: {}
|
222 |
+
- `warmup_ratio`: 0.0
|
223 |
+
- `warmup_steps`: 0
|
224 |
+
- `log_level`: passive
|
225 |
+
- `log_level_replica`: warning
|
226 |
+
- `log_on_each_node`: True
|
227 |
+
- `logging_nan_inf_filter`: True
|
228 |
+
- `save_safetensors`: True
|
229 |
+
- `save_on_each_node`: False
|
230 |
+
- `save_only_model`: False
|
231 |
+
- `restore_callback_states_from_checkpoint`: False
|
232 |
+
- `no_cuda`: False
|
233 |
+
- `use_cpu`: False
|
234 |
+
- `use_mps_device`: False
|
235 |
+
- `seed`: 42
|
236 |
+
- `data_seed`: None
|
237 |
+
- `jit_mode_eval`: False
|
238 |
+
- `use_ipex`: False
|
239 |
+
- `bf16`: False
|
240 |
+
- `fp16`: False
|
241 |
+
- `fp16_opt_level`: O1
|
242 |
+
- `half_precision_backend`: auto
|
243 |
+
- `bf16_full_eval`: False
|
244 |
+
- `fp16_full_eval`: False
|
245 |
+
- `tf32`: None
|
246 |
+
- `local_rank`: 0
|
247 |
+
- `ddp_backend`: None
|
248 |
+
- `tpu_num_cores`: None
|
249 |
+
- `tpu_metrics_debug`: False
|
250 |
+
- `debug`: []
|
251 |
+
- `dataloader_drop_last`: False
|
252 |
+
- `dataloader_num_workers`: 0
|
253 |
+
- `dataloader_prefetch_factor`: None
|
254 |
+
- `past_index`: -1
|
255 |
+
- `disable_tqdm`: False
|
256 |
+
- `remove_unused_columns`: True
|
257 |
+
- `label_names`: None
|
258 |
+
- `load_best_model_at_end`: False
|
259 |
+
- `ignore_data_skip`: False
|
260 |
+
- `fsdp`: []
|
261 |
+
- `fsdp_min_num_params`: 0
|
262 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
263 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
264 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
265 |
+
- `deepspeed`: None
|
266 |
+
- `label_smoothing_factor`: 0.0
|
267 |
+
- `optim`: adamw_torch
|
268 |
+
- `optim_args`: None
|
269 |
+
- `adafactor`: False
|
270 |
+
- `group_by_length`: False
|
271 |
+
- `length_column_name`: length
|
272 |
+
- `ddp_find_unused_parameters`: None
|
273 |
+
- `ddp_bucket_cap_mb`: None
|
274 |
+
- `ddp_broadcast_buffers`: False
|
275 |
+
- `dataloader_pin_memory`: True
|
276 |
+
- `dataloader_persistent_workers`: False
|
277 |
+
- `skip_memory_metrics`: True
|
278 |
+
- `use_legacy_prediction_loop`: False
|
279 |
+
- `push_to_hub`: False
|
280 |
+
- `resume_from_checkpoint`: None
|
281 |
+
- `hub_model_id`: None
|
282 |
+
- `hub_strategy`: every_save
|
283 |
+
- `hub_private_repo`: False
|
284 |
+
- `hub_always_push`: False
|
285 |
+
- `gradient_checkpointing`: False
|
286 |
+
- `gradient_checkpointing_kwargs`: None
|
287 |
+
- `include_inputs_for_metrics`: False
|
288 |
+
- `eval_do_concat_batches`: True
|
289 |
+
- `fp16_backend`: auto
|
290 |
+
- `push_to_hub_model_id`: None
|
291 |
+
- `push_to_hub_organization`: None
|
292 |
+
- `mp_parameters`:
|
293 |
+
- `auto_find_batch_size`: False
|
294 |
+
- `full_determinism`: False
|
295 |
+
- `torchdynamo`: None
|
296 |
+
- `ray_scope`: last
|
297 |
+
- `ddp_timeout`: 1800
|
298 |
+
- `torch_compile`: False
|
299 |
+
- `torch_compile_backend`: None
|
300 |
+
- `torch_compile_mode`: None
|
301 |
+
- `dispatch_batches`: None
|
302 |
+
- `split_batches`: None
|
303 |
+
- `include_tokens_per_second`: False
|
304 |
+
- `include_num_input_tokens_seen`: False
|
305 |
+
- `neftune_noise_alpha`: None
|
306 |
+
- `optim_target_modules`: None
|
307 |
+
- `batch_eval_metrics`: False
|
308 |
+
- `eval_on_start`: False
|
309 |
+
- `batch_sampler`: batch_sampler
|
310 |
+
- `multi_dataset_batch_sampler`: round_robin
|
311 |
+
|
312 |
+
</details>
|
313 |
+
|
314 |
+
### Framework Versions
|
315 |
+
- Python: 3.11.3
|
316 |
+
- Sentence Transformers: 3.0.1
|
317 |
+
- Transformers: 4.42.4
|
318 |
+
- PyTorch: 2.3.1+cpu
|
319 |
+
- Accelerate: 0.32.1
|
320 |
+
- Datasets: 2.20.0
|
321 |
+
- Tokenizers: 0.19.1
|
322 |
+
|
323 |
+
## Citation
|
324 |
+
|
325 |
+
### BibTeX
|
326 |
+
|
327 |
+
#### Sentence Transformers
|
328 |
+
```bibtex
|
329 |
+
@inproceedings{reimers-2019-sentence-bert,
|
330 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
331 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
332 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
333 |
+
month = "11",
|
334 |
+
year = "2019",
|
335 |
+
publisher = "Association for Computational Linguistics",
|
336 |
+
url = "https://arxiv.org/abs/1908.10084",
|
337 |
+
}
|
338 |
+
```
|
339 |
+
|
340 |
+
<!--
|
341 |
+
## Glossary
|
342 |
+
|
343 |
+
*Clearly define terms in order to be accessible across audiences.*
|
344 |
+
-->
|
345 |
+
|
346 |
+
<!--
|
347 |
+
## Model Card Authors
|
348 |
+
|
349 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
350 |
+
-->
|
351 |
+
|
352 |
+
<!--
|
353 |
+
## Model Card Contact
|
354 |
+
|
355 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
356 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "trained_models/all-MiniLM-L6-v2_finetuned",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 384,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 1536,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 6,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.42.4",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.42.4",
|
5 |
+
"pytorch": "2.3.1+cpu"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d29ce07c452c799f9edace807804ced2dd5903e9bf29f6c75a664f6acb579943
|
3 |
+
size 90864192
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 256,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|