bhuvanmdev commited on
Commit
cad008f
1 Parent(s): a7da53a

Training in progress, step 180, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ded6ea93dca5b528b272b56d0dba78bfb48686ef712e6aee97e12ad754ec51a
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81750cf4351c62f467fa31cd7b9d64a5c6b54b220ad076d6d668eb9844803aca
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7fcfe6bdcb982add10fed5d7c4599c6d6b8da496a1bdce1e099c1f92ef34189
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6796a4fe299f2d46b565219a8d5ab5b44e38bb09dfe9c205cdfd25ad869b6fcb
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e40ba6383d47c8ec509ed3abd3668fd616332aaf4bb58238305f857b037e170
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:921505c38e3140072fcb25b091248df87486cd572d25945bf43989c3adec66d2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7815a0f425c279b798b8a01de0a05716135e29d85893e062dbb79cc33f526eb8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7caa9fa897837d1407d3cee463cfb4344d4ea9bf72b6aacc7133003f48d23ba
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.057245080500894455,
5
  "eval_steps": 500,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -135,14 +135,30 @@
135
  "loss": 0.4445,
136
  "num_input_tokens_seen": 107467,
137
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
  ],
140
  "logging_steps": 10,
141
  "max_steps": 2795,
142
- "num_input_tokens_seen": 107467,
143
  "num_train_epochs": 1,
144
  "save_steps": 20,
145
- "total_flos": 2416553321895936.0,
146
  "train_batch_size": 1,
147
  "trial_name": null,
148
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06440071556350627,
5
  "eval_steps": 500,
6
+ "global_step": 180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
135
  "loss": 0.4445,
136
  "num_input_tokens_seen": 107467,
137
  "step": 160
138
+ },
139
+ {
140
+ "epoch": 0.06082289803220036,
141
+ "grad_norm": 0.5721924304962158,
142
+ "learning_rate": 0.00018783542039355994,
143
+ "loss": 0.4304,
144
+ "num_input_tokens_seen": 113612,
145
+ "step": 170
146
+ },
147
+ {
148
+ "epoch": 0.06440071556350627,
149
+ "grad_norm": 0.2883516848087311,
150
+ "learning_rate": 0.00018711985688729877,
151
+ "loss": 0.4525,
152
+ "num_input_tokens_seen": 121416,
153
+ "step": 180
154
  }
155
  ],
156
  "logging_steps": 10,
157
  "max_steps": 2795,
158
+ "num_input_tokens_seen": 121416,
159
  "num_train_epochs": 1,
160
  "save_steps": 20,
161
+ "total_flos": 2730217072508928.0,
162
  "train_batch_size": 1,
163
  "trial_name": null,
164
  "trial_params": null