ncbateman commited on
Commit
87b4c3f
1 Parent(s): be3607a

Training in progress, step 135, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f4ab89bfd08067012042eb1067028e346fc22d43bcdede78423832cb1b25414
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b53757c450e806fa0582f6da980220020345670b993e63bf8d59758c647ec13e
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:881de5d933722541efaecd471328000119ad52ffaca50c9c1c58ee44dd5dccab
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62f2d146799cdd29e89bf7d951426b0237e01e6e5606f9b041f19bfe0a9ae8c
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec2dba37f17e925003f4d281a6f44eca5d39774813ad0c3726b50a85b542608e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d8ffd5d0306b34d29b48a6526b5741dc24dc3acdbe564b95d57b68075e66c1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccb887ab76e166b8621433fbbca8d1978648c385cddb0300477edcffe4035a1a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e900207fcc2d3955802905e12eea3ef81eb975d535a9ef6fb02c7c84a229aca9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16823034616628924,
5
  "eval_steps": 386,
6
- "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -925,6 +925,41 @@
925
  "learning_rate": 9.989125425114638e-05,
926
  "loss": 1.0888,
927
  "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928
  }
929
  ],
930
  "logging_steps": 1,
@@ -944,7 +979,7 @@
944
  "attributes": {}
945
  }
946
  },
947
- "total_flos": 1.4533183232016384e+17,
948
  "train_batch_size": 4,
949
  "trial_name": null,
950
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1747007440957619,
5
  "eval_steps": 386,
6
+ "global_step": 135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
925
  "learning_rate": 9.989125425114638e-05,
926
  "loss": 1.0888,
927
  "step": 130
928
+ },
929
+ {
930
+ "epoch": 0.16952442575218377,
931
+ "grad_norm": 0.9069584608078003,
932
+ "learning_rate": 9.988851963291522e-05,
933
+ "loss": 0.8579,
934
+ "step": 131
935
+ },
936
+ {
937
+ "epoch": 0.1708185053380783,
938
+ "grad_norm": 0.8150789141654968,
939
+ "learning_rate": 9.988575109511026e-05,
940
+ "loss": 0.7622,
941
+ "step": 132
942
+ },
943
+ {
944
+ "epoch": 0.17211258492397283,
945
+ "grad_norm": 1.0844395160675049,
946
+ "learning_rate": 9.988294863961387e-05,
947
+ "loss": 0.9284,
948
+ "step": 133
949
+ },
950
+ {
951
+ "epoch": 0.17340666450986736,
952
+ "grad_norm": 1.0463049411773682,
953
+ "learning_rate": 9.988011226833146e-05,
954
+ "loss": 0.9185,
955
+ "step": 134
956
+ },
957
+ {
958
+ "epoch": 0.1747007440957619,
959
+ "grad_norm": 0.9481234550476074,
960
+ "learning_rate": 9.987724198319148e-05,
961
+ "loss": 0.8631,
962
+ "step": 135
963
  }
964
  ],
965
  "logging_steps": 1,
 
979
  "attributes": {}
980
  }
981
  },
982
+ "total_flos": 1.5092151817863168e+17,
983
  "train_batch_size": 4,
984
  "trial_name": null,
985
  "trial_params": null