CocoRoF commited on
Commit
983116d
·
verified ·
1 Parent(s): be99fd0

Training in progress, step 5, checkpoint

Browse files
last-checkpoint/2_Dense/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af8e000e75d8c67d7ccd238e1d436d7fc316b12360b6482472768cd1f1560787
3
  size 3149984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6f5e9bbb0278bccd5f7111dde1eafe0778c20560c5cba593530ada9a35b6af8
3
  size 3149984
last-checkpoint/README.md CHANGED
@@ -58,34 +58,34 @@ model-index:
58
  type: sts_dev
59
  metrics:
60
  - type: pearson_cosine
61
- value: 0.7501719739320529
62
  name: Pearson Cosine
63
  - type: spearman_cosine
64
- value: 0.7479806577356133
65
  name: Spearman Cosine
66
  - type: pearson_euclidean
67
- value: 0.6950119317484778
68
  name: Pearson Euclidean
69
  - type: spearman_euclidean
70
- value: 0.6856175380175804
71
  name: Spearman Euclidean
72
  - type: pearson_manhattan
73
- value: 0.6972701521824715
74
  name: Pearson Manhattan
75
  - type: spearman_manhattan
76
- value: 0.6882826037704075
77
  name: Spearman Manhattan
78
  - type: pearson_dot
79
- value: 0.6587791423576534
80
  name: Pearson Dot
81
  - type: spearman_dot
82
- value: 0.6501434858309123
83
  name: Spearman Dot
84
  - type: pearson_max
85
- value: 0.7501719739320529
86
  name: Pearson Max
87
  - type: spearman_max
88
- value: 0.7479806577356133
89
  name: Spearman Max
90
  ---
91
 
@@ -186,18 +186,18 @@ You can finetune this model on your own dataset.
186
  * Dataset: `sts_dev`
187
  * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
188
 
189
- | Metric | Value |
190
- |:-------------------|:----------|
191
- | pearson_cosine | 0.7502 |
192
- | spearman_cosine | 0.748 |
193
- | pearson_euclidean | 0.695 |
194
- | spearman_euclidean | 0.6856 |
195
- | pearson_manhattan | 0.6973 |
196
- | spearman_manhattan | 0.6883 |
197
- | pearson_dot | 0.6588 |
198
- | spearman_dot | 0.6501 |
199
- | pearson_max | 0.7502 |
200
- | **spearman_max** | **0.748** |
201
 
202
  <!--
203
  ## Bias, Risks and Limitations
@@ -266,10 +266,11 @@ You can finetune this model on your own dataset.
266
 
267
  - `overwrite_output_dir`: True
268
  - `eval_strategy`: steps
269
- - `per_device_train_batch_size`: 16
270
- - `per_device_eval_batch_size`: 16
271
- - `gradient_accumulation_steps`: 8
272
  - `learning_rate`: 8e-05
 
273
  - `warmup_ratio`: 0.2
274
  - `push_to_hub`: True
275
  - `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03-retry
@@ -283,11 +284,11 @@ You can finetune this model on your own dataset.
283
  - `do_predict`: False
284
  - `eval_strategy`: steps
285
  - `prediction_loss_only`: True
286
- - `per_device_train_batch_size`: 16
287
- - `per_device_eval_batch_size`: 16
288
  - `per_gpu_train_batch_size`: None
289
  - `per_gpu_eval_batch_size`: None
290
- - `gradient_accumulation_steps`: 8
291
  - `eval_accumulation_steps`: None
292
  - `torch_empty_cache_steps`: None
293
  - `learning_rate`: 8e-05
@@ -296,7 +297,7 @@ You can finetune this model on your own dataset.
296
  - `adam_beta2`: 0.999
297
  - `adam_epsilon`: 1e-08
298
  - `max_grad_norm`: 1.0
299
- - `num_train_epochs`: 3.0
300
  - `max_steps`: -1
301
  - `lr_scheduler_type`: linear
302
  - `lr_scheduler_kwargs`: {}
@@ -400,7 +401,7 @@ You can finetune this model on your own dataset.
400
  ### Training Logs
401
  | Epoch | Step | Validation Loss | sts_dev_spearman_max |
402
  |:------:|:----:|:---------------:|:--------------------:|
403
- | 0.5455 | 3 | 0.0373 | 0.7480 |
404
 
405
 
406
  ### Framework Versions
 
58
  type: sts_dev
59
  metrics:
60
  - type: pearson_cosine
61
+ value: 0.7494115429773479
62
  name: Pearson Cosine
63
  - type: spearman_cosine
64
+ value: 0.7470700524367354
65
  name: Spearman Cosine
66
  - type: pearson_euclidean
67
+ value: 0.6941454281465765
68
  name: Pearson Euclidean
69
  - type: spearman_euclidean
70
+ value: 0.684590776689316
71
  name: Spearman Euclidean
72
  - type: pearson_manhattan
73
+ value: 0.6964259759684527
74
  name: Pearson Manhattan
75
  - type: spearman_manhattan
76
+ value: 0.6873610947323412
77
  name: Spearman Manhattan
78
  - type: pearson_dot
79
+ value: 0.6583752142885668
80
  name: Pearson Dot
81
  - type: spearman_dot
82
+ value: 0.6497928276890669
83
  name: Spearman Dot
84
  - type: pearson_max
85
+ value: 0.7494115429773479
86
  name: Pearson Max
87
  - type: spearman_max
88
+ value: 0.7470700524367354
89
  name: Spearman Max
90
  ---
91
 
 
186
  * Dataset: `sts_dev`
187
  * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
188
 
189
+ | Metric | Value |
190
+ |:-------------------|:-----------|
191
+ | pearson_cosine | 0.7494 |
192
+ | spearman_cosine | 0.7471 |
193
+ | pearson_euclidean | 0.6941 |
194
+ | spearman_euclidean | 0.6846 |
195
+ | pearson_manhattan | 0.6964 |
196
+ | spearman_manhattan | 0.6874 |
197
+ | pearson_dot | 0.6584 |
198
+ | spearman_dot | 0.6498 |
199
+ | pearson_max | 0.7494 |
200
+ | **spearman_max** | **0.7471** |
201
 
202
  <!--
203
  ## Bias, Risks and Limitations
 
266
 
267
  - `overwrite_output_dir`: True
268
  - `eval_strategy`: steps
269
+ - `per_device_train_batch_size`: 1
270
+ - `per_device_eval_batch_size`: 1
271
+ - `gradient_accumulation_steps`: 16
272
  - `learning_rate`: 8e-05
273
+ - `num_train_epochs`: 10.0
274
  - `warmup_ratio`: 0.2
275
  - `push_to_hub`: True
276
  - `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03-retry
 
284
  - `do_predict`: False
285
  - `eval_strategy`: steps
286
  - `prediction_loss_only`: True
287
+ - `per_device_train_batch_size`: 1
288
+ - `per_device_eval_batch_size`: 1
289
  - `per_gpu_train_batch_size`: None
290
  - `per_gpu_eval_batch_size`: None
291
+ - `gradient_accumulation_steps`: 16
292
  - `eval_accumulation_steps`: None
293
  - `torch_empty_cache_steps`: None
294
  - `learning_rate`: 8e-05
 
297
  - `adam_beta2`: 0.999
298
  - `adam_epsilon`: 1e-08
299
  - `max_grad_norm`: 1.0
300
+ - `num_train_epochs`: 10.0
301
  - `max_steps`: -1
302
  - `lr_scheduler_type`: linear
303
  - `lr_scheduler_kwargs`: {}
 
401
  ### Training Logs
402
  | Epoch | Step | Validation Loss | sts_dev_spearman_max |
403
  |:------:|:----:|:---------------:|:--------------------:|
404
+ | 0.1114 | 5 | 0.0377 | 0.7471 |
405
 
406
 
407
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb41efa187e981ffdf2d70aa9cead5df9133631b64effed4a884aa80b5e50c60
3
  size 735216376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef8ba139bbf8cc3e37ff551c0583eb18ad410203eff7083c9ca251338c80b6a
3
  size 735216376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4c5b38dee965f0b4e99b14be42584a5416c2d8795490aece812d9112dc52ac0
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f451df94acc29ee1b5aee563e0dd7baf84f28529b714825cae75dff52ae97e44
3
  size 1476823354
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f13302c77db72d951d015b8f23d8c87919e14738fb30ddfb20a6c2417dd1d21
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adeff075bf4ae4999268687af80c7d60cbf988ed52c8f4810d47686a7b79442e
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,37 +1,37 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5454545454545454,
5
- "eval_steps": 3,
6
- "global_step": 3,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.5454545454545454,
13
- "eval_loss": 0.037283755838871,
14
- "eval_runtime": 2.8675,
15
- "eval_samples_per_second": 523.109,
16
- "eval_steps_per_second": 4.185,
17
- "eval_sts_dev_pearson_cosine": 0.7501719739320529,
18
- "eval_sts_dev_pearson_dot": 0.6587791423576534,
19
- "eval_sts_dev_pearson_euclidean": 0.6950119317484778,
20
- "eval_sts_dev_pearson_manhattan": 0.6972701521824715,
21
- "eval_sts_dev_pearson_max": 0.7501719739320529,
22
- "eval_sts_dev_spearman_cosine": 0.7479806577356133,
23
- "eval_sts_dev_spearman_dot": 0.6501434858309123,
24
- "eval_sts_dev_spearman_euclidean": 0.6856175380175804,
25
- "eval_sts_dev_spearman_manhattan": 0.6882826037704075,
26
- "eval_sts_dev_spearman_max": 0.7479806577356133,
27
- "step": 3
28
  }
29
  ],
30
  "logging_steps": 10,
31
- "max_steps": 15,
32
  "num_input_tokens_seen": 0,
33
- "num_train_epochs": 3,
34
- "save_steps": 3,
35
  "stateful_callbacks": {
36
  "TrainerControl": {
37
  "args": {
@@ -45,7 +45,7 @@
45
  }
46
  },
47
  "total_flos": 0.0,
48
- "train_batch_size": 16,
49
  "trial_name": null,
50
  "trial_params": null
51
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11142061281337047,
5
+ "eval_steps": 5,
6
+ "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.11142061281337047,
13
+ "eval_loss": 0.037675488740205765,
14
+ "eval_runtime": 6.7342,
15
+ "eval_samples_per_second": 222.743,
16
+ "eval_steps_per_second": 27.917,
17
+ "eval_sts_dev_pearson_cosine": 0.7494115429773479,
18
+ "eval_sts_dev_pearson_dot": 0.6583752142885668,
19
+ "eval_sts_dev_pearson_euclidean": 0.6941454281465765,
20
+ "eval_sts_dev_pearson_manhattan": 0.6964259759684527,
21
+ "eval_sts_dev_pearson_max": 0.7494115429773479,
22
+ "eval_sts_dev_spearman_cosine": 0.7470700524367354,
23
+ "eval_sts_dev_spearman_dot": 0.6497928276890669,
24
+ "eval_sts_dev_spearman_euclidean": 0.684590776689316,
25
+ "eval_sts_dev_spearman_manhattan": 0.6873610947323412,
26
+ "eval_sts_dev_spearman_max": 0.7470700524367354,
27
+ "step": 5
28
  }
29
  ],
30
  "logging_steps": 10,
31
+ "max_steps": 440,
32
  "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 10,
34
+ "save_steps": 5,
35
  "stateful_callbacks": {
36
  "TrainerControl": {
37
  "args": {
 
45
  }
46
  },
47
  "total_flos": 0.0,
48
+ "train_batch_size": 1,
49
  "trial_name": null,
50
  "trial_params": null
51
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ca14ea9a349dd9557ee3240159b578fbc0d4351af07ba2f771c21465a6b6687
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12391e60c366f91d109aa8b1d895de31abafef60746621c5321108d15de493f1
3
  size 5688