Rakhman16 commited on
Commit
5a0e918
·
verified ·
1 Parent(s): 445bc2e

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:708bb2c9822b1d19cfc1f711a1426a731d98c1d2dfede98878aeb66a671c5d3d
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8556a691c4ab5fb89b6e13fbc99580121d0e3f94363e6ac2a01dd331ba85836b
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3e460fa778d644588069da89534b6210f9e1b28688b004d2b112bcf63437271
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eaf3ec360621c1ce85b720dd55783852c2df291d2c2dee679c14211f9f7d6ad
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c62c604dba9a000f0608f1abd7fa224ff8ff313b4269759d1f9ec8d5f614e346
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a5efc71021ffa065719e3167f9e1a963daf4640f03e0c3b7abc98600034804
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35a2f311d7dd68d3021f647c351f057c0b1803907e19da4973f0e7cb6dd34438
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e5f31b24159dd112a4c253836ceb74f5ca9e31a7bde4abe64305021c33f511
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11094118654727936,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-4000",
4
- "epoch": 0.7025555457978396,
5
  "eval_steps": 100,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -887,6 +887,116 @@
887
  "eval_samples_per_second": 25.506,
888
  "eval_steps_per_second": 3.191,
889
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  }
891
  ],
892
  "logging_steps": 50,
@@ -906,7 +1016,7 @@
906
  "attributes": {}
907
  }
908
  },
909
- "total_flos": 1.948665249792e+16,
910
  "train_batch_size": 8,
911
  "trial_name": null,
912
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.11007058620452881,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-4500",
4
+ "epoch": 0.7903749890225696,
5
  "eval_steps": 100,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
887
  "eval_samples_per_second": 25.506,
888
  "eval_steps_per_second": 3.191,
889
  "step": 4000
890
+ },
891
+ {
892
+ "epoch": 0.7113374901203127,
893
+ "grad_norm": 10273.943359375,
894
+ "learning_rate": 2.4664500263481468e-05,
895
+ "loss": 0.1122,
896
+ "step": 4050
897
+ },
898
+ {
899
+ "epoch": 0.7201194344427856,
900
+ "grad_norm": 9615.3408203125,
901
+ "learning_rate": 2.4598629896363958e-05,
902
+ "loss": 0.1178,
903
+ "step": 4100
904
+ },
905
+ {
906
+ "epoch": 0.7201194344427856,
907
+ "eval_loss": 0.11066293716430664,
908
+ "eval_runtime": 176.4782,
909
+ "eval_samples_per_second": 25.272,
910
+ "eval_steps_per_second": 3.162,
911
+ "step": 4100
912
+ },
913
+ {
914
+ "epoch": 0.7289013787652586,
915
+ "grad_norm": 9801.9638671875,
916
+ "learning_rate": 2.4532759529246444e-05,
917
+ "loss": 0.1235,
918
+ "step": 4150
919
+ },
920
+ {
921
+ "epoch": 0.7376833230877317,
922
+ "grad_norm": 14902.2216796875,
923
+ "learning_rate": 2.446688916212893e-05,
924
+ "loss": 0.1178,
925
+ "step": 4200
926
+ },
927
+ {
928
+ "epoch": 0.7376833230877317,
929
+ "eval_loss": 0.1100853979587555,
930
+ "eval_runtime": 176.5072,
931
+ "eval_samples_per_second": 25.268,
932
+ "eval_steps_per_second": 3.161,
933
+ "step": 4200
934
+ },
935
+ {
936
+ "epoch": 0.7464652674102046,
937
+ "grad_norm": 11338.7451171875,
938
+ "learning_rate": 2.4401018795011417e-05,
939
+ "loss": 0.1139,
940
+ "step": 4250
941
+ },
942
+ {
943
+ "epoch": 0.7552472117326776,
944
+ "grad_norm": 27772.08203125,
945
+ "learning_rate": 2.4335148427893904e-05,
946
+ "loss": 0.1165,
947
+ "step": 4300
948
+ },
949
+ {
950
+ "epoch": 0.7552472117326776,
951
+ "eval_loss": 0.11026333272457123,
952
+ "eval_runtime": 176.5703,
953
+ "eval_samples_per_second": 25.259,
954
+ "eval_steps_per_second": 3.16,
955
+ "step": 4300
956
+ },
957
+ {
958
+ "epoch": 0.7640291560551506,
959
+ "grad_norm": 15169.2919921875,
960
+ "learning_rate": 2.4269278060776394e-05,
961
+ "loss": 0.1223,
962
+ "step": 4350
963
+ },
964
+ {
965
+ "epoch": 0.7728111003776236,
966
+ "grad_norm": 9459.4482421875,
967
+ "learning_rate": 2.4203407693658877e-05,
968
+ "loss": 0.1139,
969
+ "step": 4400
970
+ },
971
+ {
972
+ "epoch": 0.7728111003776236,
973
+ "eval_loss": 0.11010286957025528,
974
+ "eval_runtime": 176.8628,
975
+ "eval_samples_per_second": 25.217,
976
+ "eval_steps_per_second": 3.155,
977
+ "step": 4400
978
+ },
979
+ {
980
+ "epoch": 0.7815930447000966,
981
+ "grad_norm": 10122.501953125,
982
+ "learning_rate": 2.4137537326541367e-05,
983
+ "loss": 0.1143,
984
+ "step": 4450
985
+ },
986
+ {
987
+ "epoch": 0.7903749890225696,
988
+ "grad_norm": 8584.2705078125,
989
+ "learning_rate": 2.4071666959423854e-05,
990
+ "loss": 0.1104,
991
+ "step": 4500
992
+ },
993
+ {
994
+ "epoch": 0.7903749890225696,
995
+ "eval_loss": 0.11007058620452881,
996
+ "eval_runtime": 176.6787,
997
+ "eval_samples_per_second": 25.244,
998
+ "eval_steps_per_second": 3.158,
999
+ "step": 4500
1000
  }
1001
  ],
1002
  "logging_steps": 50,
 
1016
  "attributes": {}
1017
  }
1018
  },
1019
+ "total_flos": 2.192248406016e+16,
1020
  "train_batch_size": 8,
1021
  "trial_name": null,
1022
  "trial_params": null