hungnm commited on
Commit
8f98e8d
·
verified ·
1 Parent(s): d5823f9

Model save

Browse files
README.md CHANGED
@@ -1,3 +1,72 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: answerdotai/ModernBERT-base
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - f1
9
+ - precision
10
+ - recall
11
+ model-index:
12
+ - name: modernBERT-base-multilingual-sentiment
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # modernBERT-base-multilingual-sentiment
20
+
21
+ This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on an unknown dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 1.8330
24
+ - F1: 0.1291
25
+ - Precision: 0.1650
26
+ - Recall: 0.1890
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 6e-05
46
+ - train_batch_size: 1024
47
+ - eval_batch_size: 1024
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 2
51
+ - total_train_batch_size: 2048
52
+ - total_eval_batch_size: 2048
53
+ - optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
54
+ - lr_scheduler_type: cosine
55
+ - lr_scheduler_warmup_ratio: 0.01
56
+ - num_epochs: 2.0
57
+ - mixed_precision_training: Native AMP
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall |
62
+ |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
63
+ | 1.8373 | 1.0 | 8 | 1.8330 | 0.1291 | 0.1650 | 0.1890 |
64
+ | 1.8364 | 2.0 | 16 | 1.8330 | 0.1291 | 0.1650 | 0.1890 |
65
+
66
+
67
+ ### Framework versions
68
+
69
+ - Transformers 4.48.0.dev0
70
+ - Pytorch 2.4.0+cu121
71
+ - Datasets 3.2.0
72
+ - Tokenizers 0.21.0
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 5.0,
3
  "eval_f1": 0.5005918962125724,
4
  "eval_loss": 0.916015625,
5
  "eval_precision": 0.5713280704919877,
@@ -14,8 +14,8 @@
14
  "test_runtime": 0.2719,
15
  "test_samples_per_second": 7355.241,
16
  "test_steps_per_second": 3.678,
17
- "train_loss": 1.565679931640625,
18
- "train_runtime": 80.5078,
19
- "train_samples_per_second": 993.693,
20
- "train_steps_per_second": 0.497
21
  }
 
1
  {
2
+ "epoch": 2.0,
3
  "eval_f1": 0.5005918962125724,
4
  "eval_loss": 0.916015625,
5
  "eval_precision": 0.5713280704919877,
 
14
  "test_runtime": 0.2719,
15
  "test_samples_per_second": 7355.241,
16
  "test_steps_per_second": 3.678,
17
+ "train_loss": 1.836273193359375,
18
+ "train_runtime": 55.5777,
19
+ "train_samples_per_second": 575.77,
20
+ "train_steps_per_second": 0.288
21
  }
runs/Dec31_23-40-45_hn-fornix-testing-gpu-platform-2/events.out.tfevents.1735688460.hn-fornix-testing-gpu-platform-2.1028595.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:287d08406c1997c6520f7cf436397cbb03265b773d3bbbaeb4210f850067a7d0
3
- size 6644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dad1fffc7ba82d9a265fbef176283a5ba7e5c45c1c7c0790d7a059195ce79e46
3
+ size 7818
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 1.565679931640625,
4
- "train_runtime": 80.5078,
5
- "train_samples_per_second": 993.693,
6
- "train_steps_per_second": 0.497
7
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "train_loss": 1.836273193359375,
4
+ "train_runtime": 55.5777,
5
+ "train_samples_per_second": 575.77,
6
+ "train_steps_per_second": 0.288
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5005918962125724,
3
- "best_model_checkpoint": "./output/modernBERT-base-sentiment-v2/checkpoint-40",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21,9 +21,9 @@
21
  "eval_loss": 1.8330078125,
22
  "eval_precision": 0.16504066117321736,
23
  "eval_recall": 0.1890018282051825,
24
- "eval_runtime": 8.9078,
25
- "eval_samples_per_second": 224.523,
26
- "eval_steps_per_second": 0.112,
27
  "step": 8
28
  },
29
  {
@@ -46,93 +46,25 @@
46
  "eval_loss": 1.8330078125,
47
  "eval_precision": 0.16504066117321736,
48
  "eval_recall": 0.1890018282051825,
49
- "eval_runtime": 0.2841,
50
- "eval_samples_per_second": 7040.66,
51
- "eval_steps_per_second": 3.52,
52
  "step": 16
53
  },
54
  {
55
- "epoch": 2.5,
56
- "grad_norm": 5.113753318786621,
57
- "learning_rate": 5.961150787913738e-05,
58
- "loss": 1.7937,
59
- "step": 20
60
- },
61
- {
62
- "epoch": 3.0,
63
- "eval_f1": 0.17970771787293252,
64
- "eval_loss": 1.53515625,
65
- "eval_precision": 0.22861304458868634,
66
- "eval_recall": 0.23211174242424246,
67
- "eval_runtime": 0.2207,
68
- "eval_samples_per_second": 9062.407,
69
- "eval_steps_per_second": 4.531,
70
- "step": 24
71
- },
72
- {
73
- "epoch": 3.125,
74
- "grad_norm": 1.7188981771469116,
75
- "learning_rate": 5.535570256631384e-05,
76
- "loss": 1.5896,
77
- "step": 25
78
- },
79
- {
80
- "epoch": 3.75,
81
- "grad_norm": 1.9793833494186401,
82
- "learning_rate": 4.704194240193467e-05,
83
- "loss": 1.4145,
84
- "step": 30
85
- },
86
- {
87
- "epoch": 4.0,
88
- "eval_f1": 0.31283295562034524,
89
- "eval_loss": 1.15625,
90
- "eval_precision": 0.5734842356008,
91
- "eval_recall": 0.33733943473317013,
92
- "eval_runtime": 0.1992,
93
- "eval_samples_per_second": 10041.198,
94
- "eval_steps_per_second": 5.021,
95
- "step": 32
96
- },
97
- {
98
- "epoch": 4.375,
99
- "grad_norm": 11.112994194030762,
100
- "learning_rate": 3.6000770813281334e-05,
101
- "loss": 1.2037,
102
- "step": 35
103
- },
104
- {
105
- "epoch": 5.0,
106
- "grad_norm": 2.136584758758545,
107
- "learning_rate": 2.399922918671867e-05,
108
- "loss": 1.0167,
109
- "step": 40
110
- },
111
- {
112
- "epoch": 5.0,
113
- "eval_f1": 0.5005918962125724,
114
- "eval_loss": 0.916015625,
115
- "eval_precision": 0.5713280704919877,
116
- "eval_recall": 0.49814126531372666,
117
- "eval_runtime": 0.2727,
118
- "eval_samples_per_second": 7333.058,
119
- "eval_steps_per_second": 3.667,
120
- "step": 40
121
- },
122
- {
123
- "epoch": 5.0,
124
- "step": 40,
125
- "total_flos": 3738898811322368.0,
126
- "train_loss": 1.565679931640625,
127
- "train_runtime": 80.5078,
128
- "train_samples_per_second": 993.693,
129
- "train_steps_per_second": 0.497
130
  }
131
  ],
132
  "logging_steps": 5,
133
- "max_steps": 40,
134
  "num_input_tokens_seen": 0,
135
- "num_train_epochs": 5,
136
  "save_steps": 5.0,
137
  "stateful_callbacks": {
138
  "TrainerControl": {
@@ -146,7 +78,7 @@
146
  "attributes": {}
147
  }
148
  },
149
- "total_flos": 3738898811322368.0,
150
  "train_batch_size": 1024,
151
  "trial_name": null,
152
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.12910686958067819,
3
+ "best_model_checkpoint": "./output/modernBERT-base-sentiment-v2/checkpoint-8",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21
  "eval_loss": 1.8330078125,
22
  "eval_precision": 0.16504066117321736,
23
  "eval_recall": 0.1890018282051825,
24
+ "eval_runtime": 8.3613,
25
+ "eval_samples_per_second": 239.198,
26
+ "eval_steps_per_second": 0.12,
27
  "step": 8
28
  },
29
  {
 
46
  "eval_loss": 1.8330078125,
47
  "eval_precision": 0.16504066117321736,
48
  "eval_recall": 0.1890018282051825,
49
+ "eval_runtime": 0.2784,
50
+ "eval_samples_per_second": 7185.079,
51
+ "eval_steps_per_second": 3.593,
52
  "step": 16
53
  },
54
  {
55
+ "epoch": 2.0,
56
+ "step": 16,
57
+ "total_flos": 1489153141243904.0,
58
+ "train_loss": 1.836273193359375,
59
+ "train_runtime": 55.5777,
60
+ "train_samples_per_second": 575.77,
61
+ "train_steps_per_second": 0.288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 5,
65
+ "max_steps": 16,
66
  "num_input_tokens_seen": 0,
67
+ "num_train_epochs": 2,
68
  "save_steps": 5.0,
69
  "stateful_callbacks": {
70
  "TrainerControl": {
 
78
  "attributes": {}
79
  }
80
  },
81
+ "total_flos": 1489153141243904.0,
82
  "train_batch_size": 1024,
83
  "trial_name": null,
84
  "trial_params": null