biggy-smiley commited on
Commit
417919e
1 Parent(s): 9908c3a

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca22e06e41773986bf4cda69e78b19dae8d6c06562e9f7bf57bb6bc359c523cd
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c45df39d9c4dc5152faefbdf45711d1aa62e64d0c23e774085865f21bf591f9b
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b50feb2e626b83408c0d4116c1f1ed25f6686b487bbb937489f8b52410e72e5a
3
  size 876185914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45efbd0d9e7c22e8c7835bae847bddd5fc3f03cc6671fcd7cf49c7ad364c0d9f
3
  size 876185914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55b8d6aadceec387bdf2d000535b59c081063c48e8711fc23a19e65f0d3a3992
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a1a360bd35da073058490dcfac520d1611dcc1964a4ae6df1f2feddb4ce673
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e22814798963634fbc95aa4159dff2ff1c962c3cd6bef60971d52789c5165d71
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331283a215653945c9f574623ef0726272c0b72361a70435936fcaea7a9b3ec5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,58 +1,34 @@
1
  {
2
- "best_metric": 1.1206928491592407,
3
- "best_model_checkpoint": "/kaggle/working/results/checkpoint-800",
4
- "epoch": 0.057916455512922606,
5
- "eval_steps": 200,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.014479113878230651,
13
- "eval_loss": 1.4317502975463867,
14
- "eval_runtime": 122.3776,
15
- "eval_samples_per_second": 63.737,
16
- "eval_steps_per_second": 0.498,
17
- "step": 200
18
- },
19
- {
20
- "epoch": 0.028958227756461303,
21
- "eval_loss": 1.2947503328323364,
22
- "eval_runtime": 122.3265,
23
- "eval_samples_per_second": 63.764,
24
- "eval_steps_per_second": 0.499,
25
- "step": 400
26
- },
27
  {
28
  "epoch": 0.03619778469557663,
29
- "grad_norm": 10.083489418029785,
30
- "learning_rate": 0.00019276044306088468,
31
- "loss": 1.6511,
32
  "step": 500
33
  },
34
  {
35
- "epoch": 0.043437341634691956,
36
- "eval_loss": 1.2087372541427612,
37
- "eval_runtime": 122.3404,
38
- "eval_samples_per_second": 63.757,
39
  "eval_steps_per_second": 0.499,
40
- "step": 600
41
- },
42
- {
43
- "epoch": 0.057916455512922606,
44
- "eval_loss": 1.1206928491592407,
45
- "eval_runtime": 122.0821,
46
- "eval_samples_per_second": 63.891,
47
- "eval_steps_per_second": 0.5,
48
- "step": 800
49
  }
50
  ],
51
  "logging_steps": 500,
52
  "max_steps": 13813,
53
  "num_input_tokens_seen": 0,
54
  "num_train_epochs": 1,
55
- "save_steps": 200,
56
  "stateful_callbacks": {
57
  "TrainerControl": {
58
  "args": {
@@ -65,7 +41,7 @@
65
  "attributes": {}
66
  }
67
  },
68
- "total_flos": 6737094456115200.0,
69
  "train_batch_size": 32,
70
  "trial_name": null,
71
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.0014784336090088,
3
+ "best_model_checkpoint": "/kaggle/working/results/checkpoint-500",
4
+ "epoch": 0.03619778469557663,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.03619778469557663,
13
+ "grad_norm": 8.067161560058594,
14
+ "learning_rate": 9.638022153044234e-05,
15
+ "loss": 0.873,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.03619778469557663,
20
+ "eval_loss": 1.0014784336090088,
21
+ "eval_runtime": 122.228,
22
+ "eval_samples_per_second": 63.815,
23
  "eval_steps_per_second": 0.499,
24
+ "step": 500
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 500,
28
  "max_steps": 13813,
29
  "num_input_tokens_seen": 0,
30
  "num_train_epochs": 1,
31
+ "save_steps": 500,
32
  "stateful_callbacks": {
33
  "TrainerControl": {
34
  "args": {
 
41
  "attributes": {}
42
  }
43
  },
44
+ "total_flos": 4210684035072000.0,
45
  "train_batch_size": 32,
46
  "trial_name": null,
47
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c7dc33602494f80ff9a206330dadebab61837d70a684cddb89f45ca6ed45e1c
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6994db69eb509e6cbee2ca91744d3ce71fc56a2d02ee0a28edcd732f1eea33bc
3
  size 5240