pilotj commited on
Commit
0cd5d5c
1 Parent(s): ce60a6d

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d313059b45d865ac1762a583ec400ea36589e519bddaa42409b4ddbc65e4a24
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e2391ae33ca96af19ca2ace841dcda574fc1790b53792580d65d3c9e3026ce
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60939877eb10186c82cb742ececcd86b3b89dcf23d63cc34abe9190235e49c56
3
  size 876185978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f0443fe1f70c7a42de40f82586eb67ee731508f7158f8ba6d83bc10d863a3b
3
  size 876185978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb70df0ad07747f808cfdb28d3f00ff5a1cfdb2f6a944e3debcc3c5e5f27b429
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26edcd7164d1d17c537fa422a9cdc58f953d55d66feed6db98fc01a04f19406
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec9786334357a4fb2218d9067b799b12938c6b355b5990de5c82cd4cb64ba426
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:352d1cb8705a18d1870fdd06ca30240b7780be1ac5622a15d434bd83b9f8b0e9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.3857233226299286,
3
- "best_model_checkpoint": "results/checkpoint-1000",
4
- "epoch": 0.09552010698251982,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -37,6 +37,21 @@
37
  "eval_samples_per_second": 236.622,
38
  "eval_steps_per_second": 3.701,
39
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
  ],
42
  "logging_steps": 500,
@@ -56,7 +71,7 @@
56
  "attributes": {}
57
  }
58
  },
59
- "total_flos": 1.6842736140288e+16,
60
  "train_batch_size": 64,
61
  "trial_name": null,
62
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3843745291233063,
3
+ "best_model_checkpoint": "results/checkpoint-1500",
4
+ "epoch": 0.14328016047377973,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
37
  "eval_samples_per_second": 236.622,
38
  "eval_steps_per_second": 3.701,
39
  "step": 1000
40
+ },
41
+ {
42
+ "epoch": 0.14328016047377973,
43
+ "grad_norm": 3.55123233795166,
44
+ "learning_rate": 4.641799598815551e-05,
45
+ "loss": 0.4412,
46
+ "step": 1500
47
+ },
48
+ {
49
+ "epoch": 0.14328016047377973,
50
+ "eval_loss": 0.3843745291233063,
51
+ "eval_runtime": 108.5384,
52
+ "eval_samples_per_second": 240.938,
53
+ "eval_steps_per_second": 3.768,
54
+ "step": 1500
55
  }
56
  ],
57
  "logging_steps": 500,
 
71
  "attributes": {}
72
  }
73
  },
74
+ "total_flos": 2.5264104210432e+16,
75
  "train_batch_size": 64,
76
  "trial_name": null,
77
  "trial_params": null