pilotj commited on
Commit
c1f7414
·
verified ·
1 Parent(s): 3d67ecd

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:321c691282333574ae3ecfaac8058bd49f9bec725ef06e38a74bd79d46540459
3
  size 267906392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f17a8842bd1dda8817521c87dca7399f6b82c2feb14dca8cf129cb37c7f19f0
3
  size 267906392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c3bece3f1734101ab0d8e91bc4e0dd89a6c2c183c7b697ab6eea4ca99735d89
3
  size 535874874
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0460150bc9761f35800218de11f1ece2a43cc8e686546af08a8463710e5137c
3
  size 535874874
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:189e0d8819c0e53e229fbb1c3c52bec727218a1a7a06af8e13193a34e6249e58
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232ad4134ceeb1880defec30bd4e24accc658768c7d22234560a854be95a4e10
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5639930963516235,
3
  "best_model_checkpoint": "/kaggle/working/results/checkpoint-2000",
4
  "epoch": 0.09845911485255747,
5
  "eval_steps": 2000,
@@ -10,45 +10,45 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.024614778713139367,
13
- "grad_norm": 16.002241134643555,
14
- "learning_rate": 7.901540885147443e-05,
15
- "loss": 0.6917,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.049229557426278735,
20
- "grad_norm": 4.50161075592041,
21
- "learning_rate": 7.803081770294886e-05,
22
- "loss": 0.7013,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.07384433613941811,
27
- "grad_norm": 5.799191474914551,
28
- "learning_rate": 7.704622655442329e-05,
29
- "loss": 0.6597,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.09845911485255747,
34
- "grad_norm": 4.640450954437256,
35
- "learning_rate": 7.60616354058977e-05,
36
- "loss": 0.6615,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.09845911485255747,
41
- "eval_loss": 0.5639930963516235,
42
- "eval_runtime": 250.2708,
43
- "eval_samples_per_second": 113.941,
44
- "eval_steps_per_second": 1.782,
45
  "step": 2000
46
  }
47
  ],
48
  "logging_steps": 500,
49
- "max_steps": 40626,
50
  "num_input_tokens_seen": 0,
51
- "num_train_epochs": 2,
52
  "save_steps": 2000,
53
  "stateful_callbacks": {
54
  "TrainerControl": {
 
1
  {
2
+ "best_metric": 0.5796988010406494,
3
  "best_model_checkpoint": "/kaggle/working/results/checkpoint-2000",
4
  "epoch": 0.09845911485255747,
5
  "eval_steps": 2000,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.024614778713139367,
13
+ "grad_norm": 5.313698768615723,
14
+ "learning_rate": 7.803081770294886e-05,
15
+ "loss": 0.4406,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.049229557426278735,
20
+ "grad_norm": 4.412301540374756,
21
+ "learning_rate": 7.60616354058977e-05,
22
+ "loss": 0.4528,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.07384433613941811,
27
+ "grad_norm": 12.368891716003418,
28
+ "learning_rate": 7.409245310884655e-05,
29
+ "loss": 0.4101,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.09845911485255747,
34
+ "grad_norm": 6.372128486633301,
35
+ "learning_rate": 7.21232708117954e-05,
36
+ "loss": 0.4215,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.09845911485255747,
41
+ "eval_loss": 0.5796988010406494,
42
+ "eval_runtime": 249.9087,
43
+ "eval_samples_per_second": 114.106,
44
+ "eval_steps_per_second": 1.785,
45
  "step": 2000
46
  }
47
  ],
48
  "logging_steps": 500,
49
+ "max_steps": 20313,
50
  "num_input_tokens_seen": 0,
51
+ "num_train_epochs": 1,
52
  "save_steps": 2000,
53
  "stateful_callbacks": {
54
  "TrainerControl": {
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:767e084587bf938119ad3da54ed8e841827f12fa1412f0a5d94c3b189a2d5c5e
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:847456b1bcbfbb9e8249408ddaf4758461104fe8cd1038a47b32746b92b87822
3
  size 5240