Training in progress, step 10, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891644712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8d748fd8f20e14a3cda2d877038ce59887130178c068dcafb79817213c24e1
|
3 |
size 891644712
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783444794
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00bca8fedb1bc6c25e6e507abe70894fe1954894c658291feb63df23505cbe3e
|
3 |
size 1783444794
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70357fb90a1dcb5ff229b8259118cfc08b286f367541b00f95c942e294080e49
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -7,8 +7,44 @@
|
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
-
"log_history": [
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
"max_steps": 3606,
|
13 |
"num_input_tokens_seen": 0,
|
14 |
"num_train_epochs": 2,
|
|
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.0011092623405435386,
|
13 |
+
"grad_norm": 80.64155578613281,
|
14 |
+
"learning_rate": 3.6697247706422022e-06,
|
15 |
+
"loss": 13.4594,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.0022185246810870773,
|
20 |
+
"grad_norm": 66.91927337646484,
|
21 |
+
"learning_rate": 7.3394495412844045e-06,
|
22 |
+
"loss": 13.8607,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.0033277870216306157,
|
27 |
+
"grad_norm": 63.7406005859375,
|
28 |
+
"learning_rate": 1.1009174311926607e-05,
|
29 |
+
"loss": 13.0338,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.004437049362174155,
|
34 |
+
"grad_norm": 61.71236801147461,
|
35 |
+
"learning_rate": 1.4678899082568809e-05,
|
36 |
+
"loss": 12.9429,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.005546311702717693,
|
41 |
+
"grad_norm": 63.84341812133789,
|
42 |
+
"learning_rate": 1.834862385321101e-05,
|
43 |
+
"loss": 12.5906,
|
44 |
+
"step": 10
|
45 |
+
}
|
46 |
+
],
|
47 |
+
"logging_steps": 2,
|
48 |
"max_steps": 3606,
|
49 |
"num_input_tokens_seen": 0,
|
50 |
"num_train_epochs": 2,
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:626d772bac197cb38cc6281159e89ab89ec3be67c70cfa94c2b35d721cf3214f
|
3 |
size 5304
|