lealdaniel
/

comp-embedding-matching

@@ -4,35 +4,35 @@ tags:
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
-- dataset_size:5005
 - loss:MultipleNegativesRankingLoss
 base_model: sentence-transformers/all-mpnet-base-v2
 widget:
-- source_sentence: especialista de risco e prevenção a fraudes
-  sentences:
-  - risk & compliance
-  - internal communication
-  - accounting
-- source_sentence: coord integracao do cliente ii
-  sentences:
-  - strategic planning
-  - customer experience
-  - não encontrado (adicione nas observações)
-- source_sentence: gerente sr. marketing e performance
   sentences:
   - business operations
-  - d&i
-  - performance marketing
-- source_sentence: gerente executivo de operacoes
   sentences:
-  - business operations
-  - sdr
   - product management
-- source_sentence: sr designer
   sentences:
-  - product design
-  - talent acquisition
-  - lawyer
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 metrics:
@@ -51,21 +51,6 @@ metrics:
 - cosine_ndcg@10
 - cosine_mrr@10
 - cosine_map@100
-- dot_accuracy@1
-- dot_accuracy@3
-- dot_accuracy@5
-- dot_accuracy@10
-- dot_precision@1
-- dot_precision@3
-- dot_precision@5
-- dot_precision@10
-- dot_recall@1
-- dot_recall@3
-- dot_recall@5
-- dot_recall@10
-- dot_ndcg@10
-- dot_mrr@10
-- dot_map@100
 model-index:
 - name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
   results:
@@ -77,95 +62,50 @@ model-index:
       type: unknown
     metrics:
     - type: cosine_accuracy@1
-      value: 0.6245583038869258
       name: Cosine Accuracy@1
     - type: cosine_accuracy@3
-      value: 0.8206713780918727
       name: Cosine Accuracy@3
     - type: cosine_accuracy@5
-      value: 0.8754416961130742
       name: Cosine Accuracy@5
     - type: cosine_accuracy@10
-      value: 0.926678445229682
       name: Cosine Accuracy@10
     - type: cosine_precision@1
-      value: 0.6245583038869258
       name: Cosine Precision@1
     - type: cosine_precision@3
-      value: 0.2735571260306242
       name: Cosine Precision@3
     - type: cosine_precision@5
-      value: 0.17508833922261482
       name: Cosine Precision@5
     - type: cosine_precision@10
-      value: 0.0926678445229682
       name: Cosine Precision@10
     - type: cosine_recall@1
-      value: 0.6245583038869258
       name: Cosine Recall@1
     - type: cosine_recall@3
-      value: 0.8206713780918727
       name: Cosine Recall@3
     - type: cosine_recall@5
-      value: 0.8754416961130742
       name: Cosine Recall@5
     - type: cosine_recall@10
-      value: 0.926678445229682
       name: Cosine Recall@10
     - type: cosine_ndcg@10
-      value: 0.7790196193570564
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
-      value: 0.7312496494475299
       name: Cosine Mrr@10
     - type: cosine_map@100
-      value: 0.7347864977321262
       name: Cosine Map@100
-    - type: dot_accuracy@1
-      value: 0.6245583038869258
-      name: Dot Accuracy@1
-    - type: dot_accuracy@3
-      value: 0.8206713780918727
-      name: Dot Accuracy@3
-    - type: dot_accuracy@5
-      value: 0.8754416961130742
-      name: Dot Accuracy@5
-    - type: dot_accuracy@10
-      value: 0.926678445229682
-      name: Dot Accuracy@10
-    - type: dot_precision@1
-      value: 0.6245583038869258
-      name: Dot Precision@1
-    - type: dot_precision@3
-      value: 0.2735571260306242
-      name: Dot Precision@3
-    - type: dot_precision@5
-      value: 0.17508833922261482
-      name: Dot Precision@5
-    - type: dot_precision@10
-      value: 0.0926678445229682
-      name: Dot Precision@10
-    - type: dot_recall@1
-      value: 0.6245583038869258
-      name: Dot Recall@1
-    - type: dot_recall@3
-      value: 0.8206713780918727
-      name: Dot Recall@3
-    - type: dot_recall@5
-      value: 0.8754416961130742
-      name: Dot Recall@5
-    - type: dot_recall@10
-      value: 0.926678445229682
-      name: Dot Recall@10
-    - type: dot_ndcg@10
-      value: 0.7790196193570564
-      name: Dot Ndcg@10
-    - type: dot_mrr@10
-      value: 0.7312496494475299
-      name: Dot Mrr@10
-    - type: dot_map@100
-      value: 0.7347864977321262
-      name: Dot Map@100
 ---
 # SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
@@ -178,7 +118,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
 - **Model Type:** Sentence Transformer
 - **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 9a3225965996d404b775526de6dbfe85d3368642 -->
 - **Maximum Sequence Length:** 384 tokens
-- **Output Dimensionality:** 768 tokens
 - **Similarity Function:** Cosine Similarity
 <!-- - **Training Dataset:** Unknown -->
 <!-- - **Language:** Unknown -->
@@ -218,9 +158,9 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer("sentence_transformers_model_id")
 # Run inference
 sentences = [
-    'sr designer',
-    'product design',
-    'talent acquisition',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
@@ -266,36 +206,21 @@ You can finetune this model on your own dataset.
 | Metric              | Value      |
 |:--------------------|:-----------|
-| cosine_accuracy@1   | 0.6246     |
-| cosine_accuracy@3   | 0.8207     |
-| cosine_accuracy@5   | 0.8754     |
-| cosine_accuracy@10  | 0.9267     |
-| cosine_precision@1  | 0.6246     |
-| cosine_precision@3  | 0.2736     |
-| cosine_precision@5  | 0.1751     |
-| cosine_precision@10 | 0.0927     |
-| cosine_recall@1     | 0.6246     |
-| cosine_recall@3     | 0.8207     |
-| cosine_recall@5     | 0.8754     |
-| cosine_recall@10    | 0.9267     |
-| cosine_ndcg@10      | 0.779      |
-| cosine_mrr@10       | 0.7312     |
-| **cosine_map@100**  | **0.7348** |
-| dot_accuracy@1      | 0.6246     |
-| dot_accuracy@3      | 0.8207     |
-| dot_accuracy@5      | 0.8754     |
-| dot_accuracy@10     | 0.9267     |
-| dot_precision@1     | 0.6246     |
-| dot_precision@3     | 0.2736     |
-| dot_precision@5     | 0.1751     |
-| dot_precision@10    | 0.0927     |
-| dot_recall@1        | 0.6246     |
-| dot_recall@3        | 0.8207     |
-| dot_recall@5        | 0.8754     |
-| dot_recall@10       | 0.9267     |
-| dot_ndcg@10         | 0.779      |
-| dot_mrr@10          | 0.7312     |
-| dot_map@100         | 0.7348     |
 <!--
 ## Bias, Risks and Limitations
@@ -316,19 +241,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 5,005 training samples
 * Columns: <code>input</code> and <code>output</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | input                                                                            | output                                                                           |
-  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
-  | type    | string                                                                           | string                                                                           |
-  | details | <ul><li>min: 3 tokens</li><li>mean: 8.83 tokens</li><li>max: 21 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 7.21 tokens</li><li>max: 18 tokens</li></ul> |
 * Samples:
-  | input                                       | output                                                 |
-  |:--------------------------------------------|:-------------------------------------------------------|
-  | <code>fresador mecanico ii</code>           | <code>não encontrado (adicione nas observações)</code> |
-  | <code>analista de sistemas ui ux iii</code> | <code>product design</code>                            |
-  | <code>devops</code>                         | <code>devops engineering</code>                        |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
@@ -342,19 +267,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 1,132 evaluation samples
 * Columns: <code>input</code> and <code>output</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | input                                                                            | output                                                                           |
-  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
-  | type    | string                                                                           | string                                                                           |
-  | details | <ul><li>min: 3 tokens</li><li>mean: 8.76 tokens</li><li>max: 20 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 7.08 tokens</li><li>max: 18 tokens</li></ul> |
 * Samples:
-  | input                                    | output                                                 |
-  |:-----------------------------------------|:-------------------------------------------------------|
-  | <code>produtor (a) de video pleno</code> | <code>não encontrado (adicione nas observações)</code> |
-  | <code>ai staff software engineer</code>  | <code>software engineering</code>                      |
-  | <code>montador digital i</code>          | <code>não encontrado (adicione nas observações)</code> |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
@@ -368,6 +293,8 @@ You can finetune this model on your own dataset.
 - `eval_strategy`: steps
 - `warmup_ratio`: 0.1
 #### All Hyperparameters
 <details><summary>Click to expand</summary>
@@ -389,7 +316,7 @@ You can finetune this model on your own dataset.
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
 - `max_grad_norm`: 1.0
-- `num_train_epochs`: 3.0
 - `max_steps`: -1
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: {}
@@ -429,7 +356,7 @@ You can finetune this model on your own dataset.
 - `disable_tqdm`: False
 - `remove_unused_columns`: True
 - `label_names`: None
-- `load_best_model_at_end`: False
 - `ignore_data_skip`: False
 - `fsdp`: []
 - `fsdp_min_num_params`: 0
@@ -459,6 +386,7 @@ You can finetune this model on your own dataset.
 - `gradient_checkpointing`: False
 - `gradient_checkpointing_kwargs`: None
 - `include_inputs_for_metrics`: False
 - `eval_do_concat_batches`: True
 - `fp16_backend`: auto
 - `push_to_hub_model_id`: None
@@ -482,35 +410,26 @@ You can finetune this model on your own dataset.
 - `eval_on_start`: False
 - `use_liger_kernel`: False
 - `eval_use_gather_object`: False
-- `batch_sampler`: batch_sampler
 - `multi_dataset_batch_sampler`: proportional
 </details>
 ### Training Logs
-| Epoch  | Step | Training Loss | loss   | cosine_map@100 |
-|:------:|:----:|:-------------:|:------:|:--------------:|
-| 0      | 0    | -             | -      | 0.3578         |
-| 0.3195 | 200  | -             | 0.9975 | 0.5035         |
-| 0.6390 | 400  | -             | 0.8471 | 0.5845         |
-| 0.7987 | 500  | 1.0355        | -      | -              |
-| 0.9585 | 600  | -             | 0.7569 | 0.6157         |
-| 1.2780 | 800  | -             | 0.7542 | 0.6565         |
-| 1.5974 | 1000 | 0.648         | 0.6835 | 0.6786         |
-| 1.9169 | 1200 | -             | 0.6569 | 0.6851         |
-| 2.2364 | 1400 | -             | 0.6480 | 0.7167         |
-| 2.3962 | 1500 | 0.5253        | -      | -              |
-| 2.5559 | 1600 | -             | 0.6506 | 0.7110         |
-| 2.8754 | 1800 | -             | 0.6391 | 0.7348         |
 ### Framework Versions
-- Python: 3.11.6
-- Sentence Transformers: 3.1.1
-- Transformers: 4.45.2
-- PyTorch: 2.5.1+cu124
 - Accelerate: 1.1.1
-- Datasets: 2.14.4
 - Tokenizers: 0.20.3
 ## Citation

 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
+- dataset_size:4372
 - loss:MultipleNegativesRankingLoss
 base_model: sentence-transformers/all-mpnet-base-v2
 widget:
+- source_sentence: analista de produtos pl
   sentences:
+  - product management
   - business operations
+  - logistic management generalist
+- source_sentence: product analyst ii
   sentences:
   - product management
+  - business development (bizdev)
+  - compliance
+- source_sentence: analista de gestão de gente pl
+  sentences:
+  - data engineering
+  - hr generalist
+  - data analysis
+- source_sentence: general services
+  sentences:
+  - financial planning and analysis (fp&a)
+  - customer success
+  - general services
+- source_sentence: const parceria de negocio ii
   sentences:
+  - hr generalist
+  - copywriter
+  - business development (bizdev)
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 metrics:
 - cosine_ndcg@10
 - cosine_mrr@10
 - cosine_map@100
 model-index:
 - name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
   results:
       type: unknown
     metrics:
     - type: cosine_accuracy@1
+      value: 0.3202195791399817
       name: Cosine Accuracy@1
     - type: cosine_accuracy@3
+      value: 0.454711802378774
       name: Cosine Accuracy@3
     - type: cosine_accuracy@5
+      value: 0.5224153705397987
       name: Cosine Accuracy@5
     - type: cosine_accuracy@10
+      value: 0.6184812442817932
       name: Cosine Accuracy@10
     - type: cosine_precision@1
+      value: 0.3202195791399817
       name: Cosine Precision@1
     - type: cosine_precision@3
+      value: 0.15157060079292467
       name: Cosine Precision@3
     - type: cosine_precision@5
+      value: 0.10448307410795975
       name: Cosine Precision@5
     - type: cosine_precision@10
+      value: 0.061848124428179316
       name: Cosine Precision@10
     - type: cosine_recall@1
+      value: 0.3202195791399817
       name: Cosine Recall@1
     - type: cosine_recall@3
+      value: 0.454711802378774
       name: Cosine Recall@3
     - type: cosine_recall@5
+      value: 0.5224153705397987
       name: Cosine Recall@5
     - type: cosine_recall@10
+      value: 0.6184812442817932
       name: Cosine Recall@10
     - type: cosine_ndcg@10
+      value: 0.45577270813945114
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
+      value: 0.4052037496913979
       name: Cosine Mrr@10
     - type: cosine_map@100
+      value: 0.4178228611548902
       name: Cosine Map@100
 ---
 # SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
 - **Model Type:** Sentence Transformer
 - **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 9a3225965996d404b775526de6dbfe85d3368642 -->
 - **Maximum Sequence Length:** 384 tokens
+- **Output Dimensionality:** 768 dimensions
 - **Similarity Function:** Cosine Similarity
 <!-- - **Training Dataset:** Unknown -->
 <!-- - **Language:** Unknown -->
 model = SentenceTransformer("sentence_transformers_model_id")
 # Run inference
 sentences = [
+    'const parceria de negocio ii',
+    'business development (bizdev)',
+    'hr generalist',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
 | Metric              | Value      |
 |:--------------------|:-----------|
+| cosine_accuracy@1   | 0.3202     |
+| cosine_accuracy@3   | 0.4547     |
+| cosine_accuracy@5   | 0.5224     |
+| cosine_accuracy@10  | 0.6185     |
+| cosine_precision@1  | 0.3202     |
+| cosine_precision@3  | 0.1516     |
+| cosine_precision@5  | 0.1045     |
+| cosine_precision@10 | 0.0618     |
+| cosine_recall@1     | 0.3202     |
+| cosine_recall@3     | 0.4547     |
+| cosine_recall@5     | 0.5224     |
+| cosine_recall@10    | 0.6185     |
+| **cosine_ndcg@10**  | **0.4558** |
+| cosine_mrr@10       | 0.4052     |
+| cosine_map@100      | 0.4178     |
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 4,372 training samples
 * Columns: <code>input</code> and <code>output</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | input                                                                              | output                                                                           |
+  |:--------|:-----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                           |
+  | details | <ul><li>min: 3 tokens</li><li>mean: 10.55 tokens</li><li>max: 141 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 5.03 tokens</li><li>max: 12 tokens</li></ul> |
 * Samples:
+  | input                                                   | output                              |
+  |:--------------------------------------------------------|:------------------------------------|
+  | <code>analista de desenvolvimento organizacional</code> | <code>learning & development</code> |
+  | <code>software engineer sr</code>                       | <code>software engineering</code>   |
+  | <code>gerente de grupo de produtos i</code>             | <code>product management</code>     |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
 #### Unnamed Dataset
+* Size: 1,093 evaluation samples
 * Columns: <code>input</code> and <code>output</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | input                                                                             | output                                                                           |
+  |:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                           |
+  | details | <ul><li>min: 3 tokens</li><li>mean: 9.91 tokens</li><li>max: 122 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 4.97 tokens</li><li>max: 12 tokens</li></ul> |
 * Samples:
+  | input                                          | output                              |
+  |:-----------------------------------------------|:------------------------------------|
+  | <code>analista de student experience ii</code> | <code>customer support</code>       |
+  | <code>legal support</code>                     | <code>legal support</code>          |
+  | <code>analista de dho</code>                   | <code>learning & development</code> |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
 - `eval_strategy`: steps
 - `warmup_ratio`: 0.1
+- `load_best_model_at_end`: True
+- `batch_sampler`: no_duplicates
 #### All Hyperparameters
 <details><summary>Click to expand</summary>
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
 - `max_grad_norm`: 1.0
+- `num_train_epochs`: 3
 - `max_steps`: -1
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: {}
 - `disable_tqdm`: False
 - `remove_unused_columns`: True
 - `label_names`: None
+- `load_best_model_at_end`: True
 - `ignore_data_skip`: False
 - `fsdp`: []
 - `fsdp_min_num_params`: 0
 - `gradient_checkpointing`: False
 - `gradient_checkpointing_kwargs`: None
 - `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
 - `eval_do_concat_batches`: True
 - `fp16_backend`: auto
 - `push_to_hub_model_id`: None
 - `eval_on_start`: False
 - `use_liger_kernel`: False
 - `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
 - `multi_dataset_batch_sampler`: proportional
 </details>
 ### Training Logs
+| Epoch | Step | cosine_ndcg@10 |
+|:-----:|:----:|:--------------:|
+| 0     | 0    | 0.4558         |
 ### Framework Versions
+- Python: 3.11.0
+- Sentence Transformers: 3.3.1
+- Transformers: 4.46.3
+- PyTorch: 2.2.2
 - Accelerate: 1.1.1
+- Datasets: 3.1.0
 - Tokenizers: 0.20.3
 ## Citation

config.json CHANGED Viewed

@@ -19,6 +19,6 @@
   "pad_token_id": 1,
   "relative_attention_num_buckets": 32,
   "torch_dtype": "float32",
-  "transformers_version": "4.45.2",
   "vocab_size": 30527
 }

   "pad_token_id": 1,
   "relative_attention_num_buckets": 32,
   "torch_dtype": "float32",
+  "transformers_version": "4.46.3",
   "vocab_size": 30527
 }

config_sentence_transformers.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "__version__": {
-    "sentence_transformers": "3.1.1",
-    "transformers": "4.45.2",
-    "pytorch": "2.5.1+cu124"
   },
   "prompts": {},
   "default_prompt_name": null,
-  "similarity_fn_name": null
 }

 {
   "__version__": {
+    "sentence_transformers": "3.3.1",
+    "transformers": "4.46.3",
+    "pytorch": "2.2.2"
   },
   "prompts": {},
   "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5855a55cd3835eec991b1c6b1d902581ed783c5a6ac097472f3296a3e642cc6
 size 437967672

 version https://git-lfs.github.com/spec/v1
+oid sha256:b12db7f02b40be2f96f0917beaaf9462baea0bc46b6ca85a26613d5db4d792d4
 size 437967672

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1dab884e2c5d7c8d23955392573b1b67fdafe15fd6f1a52d4dbe0eaf6ab1baf
+size 5560