DatPySci commited on
Commit
f0e4d92
1 Parent(s): 9f01cdd

End of training

Browse files
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.2172593233757727,
4
- "train_runtime": 11063.2306,
5
- "train_samples_per_second": 9.039,
6
  "train_steps_per_second": 0.141
7
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.2182784026735346,
4
+ "train_runtime": 11066.0236,
5
+ "train_samples_per_second": 9.037,
6
  "train_steps_per_second": 0.141
7
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.3",
24
- "use_cache": false,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.3",
24
+ "use_cache": true,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aef17aa4c80bd9ca3d731f0c96e3df89e2ef439e97f893f5c44edfb8db1efe8d
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b13ae82e52a899adfb0184d801b21907e3504264787aec1892d5723402cb11
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69f78ea39ee2628a4d4a5235badb8d4d423f23985bd002f126207693903be40f
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5843c86e9c3b143d0e4bac066f9d10b22a437579f986872741d4423947fc10dd
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6a6df3e8f410d437165dc11d02ce307e5bf6c3f3360b543bccbdb3cb28214db
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fb2d89fced370d5ec07b3993e246ee7d73582123062eb66cd0b005f9517eff5
3
  size 4540516344
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.2172593233757727,
4
- "train_runtime": 11063.2306,
5
- "train_samples_per_second": 9.039,
6
  "train_steps_per_second": 0.141
7
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.2182784026735346,
4
+ "train_runtime": 11066.0236,
5
+ "train_samples_per_second": 9.037,
6
  "train_steps_per_second": 0.141
7
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff