ToastyPigeon commited on
Commit
379e18c
1 Parent(s): ad1a73d

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b3445853db2177d069f37119bdbd70bd0ba40586305619d4889c69fce58f4e2
3
  size 763470136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7020135db63cb4ce205e55f2bf1b54a6e092538d96adc09673d817259d9dc072
3
  size 763470136
last-checkpoint/global_step150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:262f79b2db927260dc457209d15ec0579d1534c4799b7c168d64e942f3fc5fc7
3
+ size 1152331664
last-checkpoint/global_step150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e7c223c729493e6dabb1c815c65e64e29201c34b3cfe789fe76c63994b6d8b
3
+ size 1152331664
last-checkpoint/global_step150/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5448b04a18520b61a9edc7b5031e5cf83d93e804a7cfdbc7f2970c6eda915613
3
+ size 348711830
last-checkpoint/global_step150/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38473f7237b72ed42ae392b743ddc0885459c27eb14262736b1d2573ef06b11
3
+ size 348711830
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step120
 
1
+ global_step150
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e581c9b63b766f1f594238a5cca71c1532d5b91bc7cec6c92f09a1bc37dbbb3
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dbf6ae6ebcd74cbebff77f00d6f2ad11df94d5b1fbbe75814b940bac064e081
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d032b1956d2c5ccbb0c6a5d0103db1f906cfb41ee1cfca2520e952693334eddb
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f04cbedb984b29722aaa95ebf3b42954331b486654dbc47f4293b3287172d95
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05b4ad7a9c00749c967e1489fcc0f9309722f8de3a5b956bd6a3ec0903f70bf4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d931f30b34023e9634aa7bafa5dd596bd4eabae6ccf87c8eeba542f2e7de26e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8013355592654424,
5
  "eval_steps": 30,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -887,6 +887,224 @@
887
  "eval_samples_per_second": 0.313,
888
  "eval_steps_per_second": 0.157,
889
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  }
891
  ],
892
  "logging_steps": 1,
@@ -901,12 +1119,12 @@
901
  "should_evaluate": false,
902
  "should_log": false,
903
  "should_save": true,
904
- "should_training_stop": false
905
  },
906
  "attributes": {}
907
  }
908
  },
909
- "total_flos": 1.4315053922648064e+16,
910
  "train_batch_size": 1,
911
  "trial_name": null,
912
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.001669449081803,
5
  "eval_steps": 30,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
887
  "eval_samples_per_second": 0.313,
888
  "eval_steps_per_second": 0.157,
889
  "step": 120
890
+ },
891
+ {
892
+ "epoch": 0.8080133555926544,
893
+ "grad_norm": 0.8360077319999617,
894
+ "learning_rate": 9.59844765015993e-06,
895
+ "loss": 1.302,
896
+ "step": 121
897
+ },
898
+ {
899
+ "epoch": 0.8146911519198664,
900
+ "grad_norm": 0.6446987309963299,
901
+ "learning_rate": 9.297117626563687e-06,
902
+ "loss": 1.3057,
903
+ "step": 122
904
+ },
905
+ {
906
+ "epoch": 0.8213689482470785,
907
+ "grad_norm": 0.5858848837222707,
908
+ "learning_rate": 9.00495330242203e-06,
909
+ "loss": 1.1906,
910
+ "step": 123
911
+ },
912
+ {
913
+ "epoch": 0.8280467445742905,
914
+ "grad_norm": 10.398018166396698,
915
+ "learning_rate": 8.722101791270692e-06,
916
+ "loss": 1.3564,
917
+ "step": 124
918
+ },
919
+ {
920
+ "epoch": 0.8347245409015025,
921
+ "grad_norm": 0.8235165842943503,
922
+ "learning_rate": 8.448705517363609e-06,
923
+ "loss": 1.389,
924
+ "step": 125
925
+ },
926
+ {
927
+ "epoch": 0.8414023372287145,
928
+ "grad_norm": 0.5842066460612957,
929
+ "learning_rate": 8.184902143958014e-06,
930
+ "loss": 1.2718,
931
+ "step": 126
932
+ },
933
+ {
934
+ "epoch": 0.8480801335559266,
935
+ "grad_norm": 0.5373622797430296,
936
+ "learning_rate": 7.930824503996856e-06,
937
+ "loss": 1.1989,
938
+ "step": 127
939
+ },
940
+ {
941
+ "epoch": 0.8547579298831386,
942
+ "grad_norm": 0.5663037814900599,
943
+ "learning_rate": 7.686600533223395e-06,
944
+ "loss": 1.3443,
945
+ "step": 128
946
+ },
947
+ {
948
+ "epoch": 0.8614357262103506,
949
+ "grad_norm": 0.6476062047630753,
950
+ "learning_rate": 7.452353205761725e-06,
951
+ "loss": 1.1721,
952
+ "step": 129
953
+ },
954
+ {
955
+ "epoch": 0.8681135225375626,
956
+ "grad_norm": 0.710228017165703,
957
+ "learning_rate": 7.228200472195573e-06,
958
+ "loss": 1.3234,
959
+ "step": 130
960
+ },
961
+ {
962
+ "epoch": 0.8747913188647746,
963
+ "grad_norm": 0.7248764502921456,
964
+ "learning_rate": 7.014255200176644e-06,
965
+ "loss": 1.2848,
966
+ "step": 131
967
+ },
968
+ {
969
+ "epoch": 0.8814691151919867,
970
+ "grad_norm": 0.6695466576508436,
971
+ "learning_rate": 6.810625117592363e-06,
972
+ "loss": 1.4459,
973
+ "step": 132
974
+ },
975
+ {
976
+ "epoch": 0.8881469115191987,
977
+ "grad_norm": 0.7056169144488292,
978
+ "learning_rate": 6.617412758321627e-06,
979
+ "loss": 1.1978,
980
+ "step": 133
981
+ },
982
+ {
983
+ "epoch": 0.8948247078464107,
984
+ "grad_norm": 1.043195690391376,
985
+ "learning_rate": 6.434715410605914e-06,
986
+ "loss": 1.0937,
987
+ "step": 134
988
+ },
989
+ {
990
+ "epoch": 0.9015025041736227,
991
+ "grad_norm": 0.7994791814367975,
992
+ "learning_rate": 6.26262506806173e-06,
993
+ "loss": 1.3846,
994
+ "step": 135
995
+ },
996
+ {
997
+ "epoch": 0.9081803005008348,
998
+ "grad_norm": 0.49914896448919255,
999
+ "learning_rate": 6.1012283833590465e-06,
1000
+ "loss": 1.4143,
1001
+ "step": 136
1002
+ },
1003
+ {
1004
+ "epoch": 0.9148580968280468,
1005
+ "grad_norm": 0.6079970548926052,
1006
+ "learning_rate": 5.950606624589065e-06,
1007
+ "loss": 1.2448,
1008
+ "step": 137
1009
+ },
1010
+ {
1011
+ "epoch": 0.9215358931552587,
1012
+ "grad_norm": 0.5776796506615264,
1013
+ "learning_rate": 5.810835634343303e-06,
1014
+ "loss": 1.1914,
1015
+ "step": 138
1016
+ },
1017
+ {
1018
+ "epoch": 0.9282136894824707,
1019
+ "grad_norm": 1.0613164868436895,
1020
+ "learning_rate": 5.681985791524546e-06,
1021
+ "loss": 1.1904,
1022
+ "step": 139
1023
+ },
1024
+ {
1025
+ "epoch": 0.9348914858096828,
1026
+ "grad_norm": 1.0019397876061995,
1027
+ "learning_rate": 5.564121975908968e-06,
1028
+ "loss": 1.3183,
1029
+ "step": 140
1030
+ },
1031
+ {
1032
+ "epoch": 0.9415692821368948,
1033
+ "grad_norm": 0.7332315794754085,
1034
+ "learning_rate": 5.457303535477202e-06,
1035
+ "loss": 1.1111,
1036
+ "step": 141
1037
+ },
1038
+ {
1039
+ "epoch": 0.9482470784641068,
1040
+ "grad_norm": 0.6707038470625136,
1041
+ "learning_rate": 5.361584256530833e-06,
1042
+ "loss": 1.2783,
1043
+ "step": 142
1044
+ },
1045
+ {
1046
+ "epoch": 0.9549248747913188,
1047
+ "grad_norm": 0.7509454433889542,
1048
+ "learning_rate": 5.277012336609403e-06,
1049
+ "loss": 1.3497,
1050
+ "step": 143
1051
+ },
1052
+ {
1053
+ "epoch": 0.9616026711185309,
1054
+ "grad_norm": 0.6892654980291844,
1055
+ "learning_rate": 5.20363036022147e-06,
1056
+ "loss": 1.2239,
1057
+ "step": 144
1058
+ },
1059
+ {
1060
+ "epoch": 0.9682804674457429,
1061
+ "grad_norm": 2.6238164871857976,
1062
+ "learning_rate": 5.141475277402042e-06,
1063
+ "loss": 1.3766,
1064
+ "step": 145
1065
+ },
1066
+ {
1067
+ "epoch": 0.9749582637729549,
1068
+ "grad_norm": 0.6105324893762883,
1069
+ "learning_rate": 5.0905783851071216e-06,
1070
+ "loss": 1.4916,
1071
+ "step": 146
1072
+ },
1073
+ {
1074
+ "epoch": 0.9816360601001669,
1075
+ "grad_norm": 0.7216628681832833,
1076
+ "learning_rate": 5.050965311454739e-06,
1077
+ "loss": 1.3969,
1078
+ "step": 147
1079
+ },
1080
+ {
1081
+ "epoch": 0.988313856427379,
1082
+ "grad_norm": 0.696061379100503,
1083
+ "learning_rate": 5.022656002820422e-06,
1084
+ "loss": 1.1597,
1085
+ "step": 148
1086
+ },
1087
+ {
1088
+ "epoch": 0.994991652754591,
1089
+ "grad_norm": 0.7061572174055314,
1090
+ "learning_rate": 5.005664713793603e-06,
1091
+ "loss": 1.1694,
1092
+ "step": 149
1093
+ },
1094
+ {
1095
+ "epoch": 1.001669449081803,
1096
+ "grad_norm": 1.5340986852071277,
1097
+ "learning_rate": 5e-06,
1098
+ "loss": 1.5343,
1099
+ "step": 150
1100
+ },
1101
+ {
1102
+ "epoch": 1.001669449081803,
1103
+ "eval_loss": 0.9879436492919922,
1104
+ "eval_runtime": 319.078,
1105
+ "eval_samples_per_second": 0.313,
1106
+ "eval_steps_per_second": 0.157,
1107
+ "step": 150
1108
  }
1109
  ],
1110
  "logging_steps": 1,
 
1119
  "should_evaluate": false,
1120
  "should_log": false,
1121
  "should_save": true,
1122
+ "should_training_stop": true
1123
  },
1124
  "attributes": {}
1125
  }
1126
  },
1127
+ "total_flos": 1.7893851528167424e+16,
1128
  "train_batch_size": 1,
1129
  "trial_name": null,
1130
  "trial_params": null