kanishka commited on
Commit
4789d90
·
verified ·
1 Parent(s): e1b948b

End of training

Browse files
Files changed (5) hide show
  1. README.md +14 -2
  2. all_results.json +13 -13
  3. eval_results.json +7 -7
  4. train_results.json +6 -6
  5. trainer_state.json +0 -0
README.md CHANGED
@@ -2,11 +2,23 @@
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
 
 
5
  metrics:
6
  - accuracy
7
  model-index:
8
  - name: opt-babylm2-clean-spacy-32k_seed-42_1e-3
9
- results: []
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,7 +26,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # opt-babylm2-clean-spacy-32k_seed-42_1e-3
16
 
17
- This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
  - Loss: 3.0380
20
  - Accuracy: 0.4233
 
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
5
+ datasets:
6
+ - kanishka/babylm2-clean-spacy
7
  metrics:
8
  - accuracy
9
  model-index:
10
  - name: opt-babylm2-clean-spacy-32k_seed-42_1e-3
11
+ results:
12
+ - task:
13
+ name: Causal Language Modeling
14
+ type: text-generation
15
+ dataset:
16
+ name: kanishka/babylm2-clean-spacy
17
+ type: kanishka/babylm2-clean-spacy
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.4232865945768086
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # opt-babylm2-clean-spacy-32k_seed-42_1e-3
28
 
29
+ This model was trained from scratch on the kanishka/babylm2-clean-spacy dataset.
30
  It achieves the following results on the evaluation set:
31
  - Loss: 3.0380
32
  - Accuracy: 0.4233
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.4239814649263961,
4
- "eval_loss": 3.035736322402954,
5
- "eval_runtime": 112.5567,
6
- "eval_samples": 52640,
7
- "eval_samples_per_second": 467.675,
8
- "eval_steps_per_second": 7.312,
9
- "perplexity": 20.816299765730864,
10
- "total_flos": 1.30265052217344e+18,
11
- "train_loss": 2.6546336687942524,
12
- "train_runtime": 44210.8645,
13
- "train_samples": 498542,
14
- "train_samples_per_second": 225.529,
15
- "train_steps_per_second": 7.048
16
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.4232865945768086,
4
+ "eval_loss": 3.037959337234497,
5
+ "eval_runtime": 112.5812,
6
+ "eval_samples": 52440,
7
+ "eval_samples_per_second": 465.797,
8
+ "eval_steps_per_second": 7.284,
9
+ "perplexity": 20.862626181925105,
10
+ "total_flos": 1.29957250203648e+18,
11
+ "train_loss": 2.656636161733025,
12
+ "train_runtime": 44071.5037,
13
+ "train_samples": 497364,
14
+ "train_samples_per_second": 225.708,
15
+ "train_steps_per_second": 7.054
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.4239814649263961,
4
- "eval_loss": 3.035736322402954,
5
- "eval_runtime": 112.5567,
6
- "eval_samples": 52640,
7
- "eval_samples_per_second": 467.675,
8
- "eval_steps_per_second": 7.312,
9
- "perplexity": 20.816299765730864
10
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.4232865945768086,
4
+ "eval_loss": 3.037959337234497,
5
+ "eval_runtime": 112.5812,
6
+ "eval_samples": 52440,
7
+ "eval_samples_per_second": 465.797,
8
+ "eval_steps_per_second": 7.284,
9
+ "perplexity": 20.862626181925105
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "total_flos": 1.30265052217344e+18,
4
- "train_loss": 2.6546336687942524,
5
- "train_runtime": 44210.8645,
6
- "train_samples": 498542,
7
- "train_samples_per_second": 225.529,
8
- "train_steps_per_second": 7.048
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "total_flos": 1.29957250203648e+18,
4
+ "train_loss": 2.656636161733025,
5
+ "train_runtime": 44071.5037,
6
+ "train_samples": 497364,
7
+ "train_samples_per_second": 225.708,
8
+ "train_steps_per_second": 7.054
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff