biggy-smiley commited on
Commit
e2c6cac
1 Parent(s): e2859ff

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77bb905d9236e55a1c8048367641832d0432d5423ec7cf04357b1b736054ee1a
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca22e06e41773986bf4cda69e78b19dae8d6c06562e9f7bf57bb6bc359c523cd
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14512c44f1f37bd3c3d83693760eaaaba30f33ea26360258c85a62aec0a0ca30
3
  size 876185914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b50feb2e626b83408c0d4116c1f1ed25f6686b487bbb937489f8b52410e72e5a
3
  size 876185914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a703767195d880aff1d5c2dbc4d23286715c3798d1940a8fd2720bd4a5a675ff
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b8d6aadceec387bdf2d000535b59c081063c48e8711fc23a19e65f0d3a3992
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51bb83752794024a27eef10009054d3313b178287743eebf62418a466a4bf8b4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22814798963634fbc95aa4159dff2ff1c962c3cd6bef60971d52789c5165d71
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2087372541427612,
3
- "best_model_checkpoint": "/kaggle/working/results/checkpoint-600",
4
- "epoch": 0.043437341634691956,
5
  "eval_steps": 200,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -38,6 +38,14 @@
38
  "eval_samples_per_second": 63.757,
39
  "eval_steps_per_second": 0.499,
40
  "step": 600
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "logging_steps": 500,
@@ -57,7 +65,7 @@
57
  "attributes": {}
58
  }
59
  },
60
- "total_flos": 5052820842086400.0,
61
  "train_batch_size": 32,
62
  "trial_name": null,
63
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1206928491592407,
3
+ "best_model_checkpoint": "/kaggle/working/results/checkpoint-800",
4
+ "epoch": 0.057916455512922606,
5
  "eval_steps": 200,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
38
  "eval_samples_per_second": 63.757,
39
  "eval_steps_per_second": 0.499,
40
  "step": 600
41
+ },
42
+ {
43
+ "epoch": 0.057916455512922606,
44
+ "eval_loss": 1.1206928491592407,
45
+ "eval_runtime": 122.0821,
46
+ "eval_samples_per_second": 63.891,
47
+ "eval_steps_per_second": 0.5,
48
+ "step": 800
49
  }
50
  ],
51
  "logging_steps": 500,
 
65
  "attributes": {}
66
  }
67
  },
68
+ "total_flos": 6737094456115200.0,
69
  "train_batch_size": 32,
70
  "trial_name": null,
71
  "trial_params": null