akahana commited on
Commit
1b500c4
1 Parent(s): 7d6fc86

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: smallbert-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # smallbert-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: smallbert-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.1432211125795332
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # smallbert-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 6.2766
32
+ - Accuracy: 0.1432
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.14000572230472635,
4
- "eval_loss": 6.5218329429626465,
5
- "eval_runtime": 5.6501,
6
  "eval_samples": 963,
7
- "eval_samples_per_second": 170.44,
8
- "eval_steps_per_second": 10.796,
9
- "perplexity": 679.8233214153153,
10
- "total_flos": 7568723438714880.0,
11
- "train_loss": 6.792878454773869,
12
- "train_runtime": 2074.686,
13
  "train_samples": 19092,
14
- "train_samples_per_second": 92.024,
15
- "train_steps_per_second": 2.878
16
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.1432211125795332,
4
+ "eval_loss": 6.276556968688965,
5
+ "eval_runtime": 5.1628,
6
  "eval_samples": 963,
7
+ "eval_samples_per_second": 186.528,
8
+ "eval_steps_per_second": 11.815,
9
+ "perplexity": 531.9539730039461,
10
+ "total_flos": 1.513744687742976e+16,
11
+ "train_loss": 3.172654545207319,
12
+ "train_runtime": 2061.7586,
13
  "train_samples": 19092,
14
+ "train_samples_per_second": 185.201,
15
+ "train_steps_per_second": 5.791
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.14000572230472635,
4
- "eval_loss": 6.5218329429626465,
5
- "eval_runtime": 5.6501,
6
  "eval_samples": 963,
7
- "eval_samples_per_second": 170.44,
8
- "eval_steps_per_second": 10.796,
9
- "perplexity": 679.8233214153153
10
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.1432211125795332,
4
+ "eval_loss": 6.276556968688965,
5
+ "eval_runtime": 5.1628,
6
  "eval_samples": 963,
7
+ "eval_samples_per_second": 186.528,
8
+ "eval_steps_per_second": 11.815,
9
+ "perplexity": 531.9539730039461
10
  }
runs/Jul31_01-46-32_390be55cffea/events.out.tfevents.1722392562.390be55cffea.12947.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ed4206e894a4950a42304840c4ec467fc9e7fb8d9609aff118963914b41321
3
+ size 411
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 7568723438714880.0,
4
- "train_loss": 6.792878454773869,
5
- "train_runtime": 2074.686,
6
  "train_samples": 19092,
7
- "train_samples_per_second": 92.024,
8
- "train_steps_per_second": 2.878
9
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 1.513744687742976e+16,
4
+ "train_loss": 3.172654545207319,
5
+ "train_runtime": 2061.7586,
6
  "train_samples": 19092,
7
+ "train_samples_per_second": 185.201,
8
+ "train_steps_per_second": 5.791
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 5970,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -93,12 +93,105 @@
93
  "train_runtime": 2074.686,
94
  "train_samples_per_second": 92.024,
95
  "train_steps_per_second": 2.878
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "logging_steps": 500,
99
- "max_steps": 5970,
100
  "num_input_tokens_seen": 0,
101
- "num_train_epochs": 10,
102
  "save_steps": 1000,
103
  "stateful_callbacks": {
104
  "TrainerControl": {
@@ -112,7 +205,7 @@
112
  "attributes": {}
113
  }
114
  },
115
- "total_flos": 7568723438714880.0,
116
  "train_batch_size": 32,
117
  "trial_name": null,
118
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 11940,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
93
  "train_runtime": 2074.686,
94
  "train_samples_per_second": 92.024,
95
  "train_steps_per_second": 2.878
96
+ },
97
+ {
98
+ "epoch": 10.050251256281408,
99
+ "grad_norm": 1.6812041997909546,
100
+ "learning_rate": 4.9874371859296486e-05,
101
+ "loss": 6.5362,
102
+ "step": 6000
103
+ },
104
+ {
105
+ "epoch": 10.887772194304858,
106
+ "grad_norm": 1.7040314674377441,
107
+ "learning_rate": 4.778056951423786e-05,
108
+ "loss": 6.542,
109
+ "step": 6500
110
+ },
111
+ {
112
+ "epoch": 11.725293132328309,
113
+ "grad_norm": 1.5005360841751099,
114
+ "learning_rate": 4.568676716917923e-05,
115
+ "loss": 6.4812,
116
+ "step": 7000
117
+ },
118
+ {
119
+ "epoch": 12.56281407035176,
120
+ "grad_norm": 2.1963107585906982,
121
+ "learning_rate": 4.35929648241206e-05,
122
+ "loss": 6.4312,
123
+ "step": 7500
124
+ },
125
+ {
126
+ "epoch": 13.40033500837521,
127
+ "grad_norm": 1.4487565755844116,
128
+ "learning_rate": 4.1499162479061974e-05,
129
+ "loss": 6.4049,
130
+ "step": 8000
131
+ },
132
+ {
133
+ "epoch": 14.23785594639866,
134
+ "grad_norm": 1.7677375078201294,
135
+ "learning_rate": 3.940536013400335e-05,
136
+ "loss": 6.3664,
137
+ "step": 8500
138
+ },
139
+ {
140
+ "epoch": 15.075376884422111,
141
+ "grad_norm": 1.8846427202224731,
142
+ "learning_rate": 3.731155778894472e-05,
143
+ "loss": 6.3339,
144
+ "step": 9000
145
+ },
146
+ {
147
+ "epoch": 15.912897822445562,
148
+ "grad_norm": 1.711972713470459,
149
+ "learning_rate": 3.5217755443886095e-05,
150
+ "loss": 6.3093,
151
+ "step": 9500
152
+ },
153
+ {
154
+ "epoch": 16.75041876046901,
155
+ "grad_norm": 1.8777658939361572,
156
+ "learning_rate": 3.312395309882747e-05,
157
+ "loss": 6.2826,
158
+ "step": 10000
159
+ },
160
+ {
161
+ "epoch": 17.587939698492463,
162
+ "grad_norm": 1.5985726118087769,
163
+ "learning_rate": 3.103015075376884e-05,
164
+ "loss": 6.2612,
165
+ "step": 10500
166
+ },
167
+ {
168
+ "epoch": 18.42546063651591,
169
+ "grad_norm": 1.8098769187927246,
170
+ "learning_rate": 2.8936348408710216e-05,
171
+ "loss": 6.2504,
172
+ "step": 11000
173
+ },
174
+ {
175
+ "epoch": 19.262981574539364,
176
+ "grad_norm": 1.4644687175750732,
177
+ "learning_rate": 2.684254606365159e-05,
178
+ "loss": 6.2358,
179
+ "step": 11500
180
+ },
181
+ {
182
+ "epoch": 20.0,
183
+ "step": 11940,
184
+ "total_flos": 1.513744687742976e+16,
185
+ "train_loss": 3.172654545207319,
186
+ "train_runtime": 2061.7586,
187
+ "train_samples_per_second": 185.201,
188
+ "train_steps_per_second": 5.791
189
  }
190
  ],
191
  "logging_steps": 500,
192
+ "max_steps": 11940,
193
  "num_input_tokens_seen": 0,
194
+ "num_train_epochs": 20,
195
  "save_steps": 1000,
196
  "stateful_callbacks": {
197
  "TrainerControl": {
 
205
  "attributes": {}
206
  }
207
  },
208
+ "total_flos": 1.513744687742976e+16,
209
  "train_batch_size": 32,
210
  "trial_name": null,
211
  "trial_params": null