dalle2 commited on
Commit
44f5643
1 Parent(s): 7691e48

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ab5db3993fff6bfef113bb7ce934d9e7921403f60f33d0c213b7953b6e526a2
3
  size 2283652852
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe61c30fb1859076bb7e7bb5d429b44a52c3556e9438b60b8a414fd8ac15606
3
  size 2283652852
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afc440de8bd791363d8b856fa31330ed582eb260f6f70e14d3fe91d4e5ee0bfb
3
  size 4550170737
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6378dd29bf82ea5a0b3367c62d87bebd129d2d9229a826842314387426cdfb
3
  size 4550170737
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cca5995e1c99590b9eb14846be54161c10a96568f6182c0d5960bfcdfdc7881
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0046d0fdcb58af9aa23af567ef64e74f1d8b2a40853fefe0d870e0884438c05c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab8935b579d5547eb6c7c8c3bf6883b4682c791d2ebf6a0e9a040f1ef1b4b330
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6999eebde348427dc9a087843dcc87da500631a1194e084f76cd498c0a38b33
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9976019184652278,
5
  "eval_steps": 500,
6
- "global_step": 104,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21,6 +21,13 @@
21
  "learning_rate": 1.3164556962025317e-05,
22
  "loss": 6.4196,
23
  "step": 104
 
 
 
 
 
 
 
24
  }
25
  ],
26
  "logging_steps": 2,
@@ -35,12 +42,12 @@
35
  "should_evaluate": false,
36
  "should_log": false,
37
  "should_save": true,
38
- "should_training_stop": false
39
  },
40
  "attributes": {}
41
  }
42
  },
43
- "total_flos": 4731035212480512.0,
44
  "train_batch_size": 1,
45
  "trial_name": null,
46
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9952038369304557,
5
  "eval_steps": 500,
6
+ "global_step": 208,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21
  "learning_rate": 1.3164556962025317e-05,
22
  "loss": 6.4196,
23
  "step": 104
24
+ },
25
+ {
26
+ "epoch": 1.9952038369304557,
27
+ "grad_norm": 5.404126167297363,
28
+ "learning_rate": 0.0,
29
+ "loss": 5.713,
30
+ "step": 208
31
  }
32
  ],
33
  "logging_steps": 2,
 
42
  "should_evaluate": false,
43
  "should_log": false,
44
  "should_save": true,
45
+ "should_training_stop": true
46
  },
47
  "attributes": {}
48
  }
49
  },
50
+ "total_flos": 9439352575352832.0,
51
  "train_batch_size": 1,
52
  "trial_name": null,
53
  "trial_params": null