g4rg commited on
Commit
d83e0ab
1 Parent(s): ff9c7a7

Training in progress, step 156, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8da66d7ae6c07456dfdb2566c5efbc9cb757f30489aab971f6c4fa69c36c8240
3
  size 763470136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fe1ad4171d926386230d848a46310ce0b399fc2595309237966d052326b2c91
3
  size 763470136
last-checkpoint/global_step156/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af0a20f9aab8b1961501dfd88a27fedcadef853a290dd52fda76c6f4f58f69b
3
+ size 385019984
last-checkpoint/global_step156/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57622b769e1a5f2ed8d89bfeca21f571a422c3049648849cbe1440752e8699d
3
+ size 385019984
last-checkpoint/global_step156/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2cf3c76a91f08fb756d56f8034186f477de3e75d33b8c66e6a55dbf6b36315
3
+ size 385019984
last-checkpoint/global_step156/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be26379bfbd821ca7e767990e1cb7d6dc7bf4ba16bb9072ce5c6b6d6f31a7e0f
3
+ size 385019984
last-checkpoint/global_step156/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68df53721671c8a3db9ed1c3fdfe63cd2a3d62198e59eb82e9ef29a2fccfd1cc
3
+ size 385019984
last-checkpoint/global_step156/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165d49d0943d415996f0708a528cf54bbe0cdb750774f3d0cd46302800b28d19
3
+ size 385019984
last-checkpoint/global_step156/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521d9bb07a8747668fb10f1d71c93bd17ad48055712d7f58cfca2d81d9623159
3
+ size 348711830
last-checkpoint/global_step156/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:815dcb46d147aabf203590daeda1f3bbeb3f7d970f655e34b881af3aae863b28
3
+ size 348711830
last-checkpoint/global_step156/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db9206c07c0be00a83dfc25dc652b29d1e36f9fbe9fcd84718dceeefc43ce8d2
3
+ size 348711830
last-checkpoint/global_step156/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638509ff7ba6288482e9c7ce0785567ced0304b4f4156dcd5c3c77f9c0205fff
3
+ size 348711830
last-checkpoint/global_step156/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0a856df646bc6bb599758efa053864cd1eccc29d51b174bff471e839cdb374f
3
+ size 348711830
last-checkpoint/global_step156/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60cd8f2bf30b93d5c24768728861029d7230edeb8a6ea86069fe671de809ff81
3
+ size 348711830
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step128
 
1
+ global_step156
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74fbe9c3428ed3e9c35b612dab93cb88760e9a705b6c000851dabad16e459b72
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8600cbed3923c306f0963183049a7010cb62ef282ba4a5d15afec51a4e6ae8c
3
  size 15472
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09696d37ecf847e753dcf238b8abaa5cd29c004c3225bbd9bb36e502bbd3e1d2
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c24f9f422a75f3cb9ebdf8ab104ee47eb4b9af16da24aa9ae507d6b8d156644
3
  size 15472
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fddcb8b3b958d20b652b04c28b148d9888da159024072edc127f9894cb6961c
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f691f8bf929b3388ad5dc17400a623eefa09e469d41c7a2446d4119c42804026
3
  size 15472
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f28a47ba026f0903768f763280985efa3436a6168461fd9e14c78c1da328d9c
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cb572ec4e38872993b94ddd3029a6d241c1e90580b219e6b116893fe14f3080
3
  size 15472
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183a725b6dcc281ea67c71bdd66ade19c4182db0091e458e1ec13520873d8d61
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ce4c33ab631efebf26d1508c9e0fc8d5f1f8a71d03cbf0f5b8d59fdd6f3126
3
  size 15472
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea090671e9e0107c57eb483ad64dfda3f8f03477d846083978fd325481ad13ae
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c8f45a3245a65d7841da3bddd87ab402a2b064d40da551488e9bdaaafe5d6b
3
  size 15472
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3b8cc489c9f7d5c810d95c1d7bed07638ecb58d3cdf988fa074bd79e5349fe0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881bf6de9276e1519f4ce875bf9d33f3046848320cfb9914c4c1daef1db2313d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8258064516129032,
5
  "eval_steps": 32,
6
- "global_step": 128,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -943,6 +943,202 @@
943
  "eval_samples_per_second": 1.606,
944
  "eval_steps_per_second": 0.064,
945
  "step": 128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
  }
947
  ],
948
  "logging_steps": 1,
@@ -957,12 +1153,12 @@
957
  "should_evaluate": false,
958
  "should_log": false,
959
  "should_save": true,
960
- "should_training_stop": false
961
  },
962
  "attributes": {}
963
  }
964
  },
965
- "total_flos": 262078904401920.0,
966
  "train_batch_size": 5,
967
  "trial_name": null,
968
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0064516129032257,
5
  "eval_steps": 32,
6
+ "global_step": 156,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
943
  "eval_samples_per_second": 1.606,
944
  "eval_steps_per_second": 0.064,
945
  "step": 128
946
+ },
947
+ {
948
+ "epoch": 0.832258064516129,
949
+ "grad_norm": 0.2930692349121967,
950
+ "learning_rate": 9.236211973370124e-06,
951
+ "loss": 1.2804,
952
+ "step": 129
953
+ },
954
+ {
955
+ "epoch": 0.8387096774193549,
956
+ "grad_norm": 0.3514011035647982,
957
+ "learning_rate": 8.937556821538201e-06,
958
+ "loss": 1.3527,
959
+ "step": 130
960
+ },
961
+ {
962
+ "epoch": 0.8451612903225807,
963
+ "grad_norm": 0.3509271601664881,
964
+ "learning_rate": 8.64880628824269e-06,
965
+ "loss": 1.2336,
966
+ "step": 131
967
+ },
968
+ {
969
+ "epoch": 0.8516129032258064,
970
+ "grad_norm": 0.369286535470622,
971
+ "learning_rate": 8.370114446083686e-06,
972
+ "loss": 1.2204,
973
+ "step": 132
974
+ },
975
+ {
976
+ "epoch": 0.8580645161290322,
977
+ "grad_norm": 0.3376899684032205,
978
+ "learning_rate": 8.101630000506864e-06,
979
+ "loss": 1.114,
980
+ "step": 133
981
+ },
982
+ {
983
+ "epoch": 0.864516129032258,
984
+ "grad_norm": 0.34528372468606205,
985
+ "learning_rate": 7.843496210456687e-06,
986
+ "loss": 1.2915,
987
+ "step": 134
988
+ },
989
+ {
990
+ "epoch": 0.8709677419354839,
991
+ "grad_norm": 0.3271748537414322,
992
+ "learning_rate": 7.595850811935759e-06,
993
+ "loss": 1.2242,
994
+ "step": 135
995
+ },
996
+ {
997
+ "epoch": 0.8774193548387097,
998
+ "grad_norm": 0.34552044795509895,
999
+ "learning_rate": 7.358825944511101e-06,
1000
+ "loss": 1.2238,
1001
+ "step": 136
1002
+ },
1003
+ {
1004
+ "epoch": 0.8838709677419355,
1005
+ "grad_norm": 0.3645405834936748,
1006
+ "learning_rate": 7.132548080806653e-06,
1007
+ "loss": 1.1925,
1008
+ "step": 137
1009
+ },
1010
+ {
1011
+ "epoch": 0.8903225806451613,
1012
+ "grad_norm": 0.39117823625181364,
1013
+ "learning_rate": 6.917137959019528e-06,
1014
+ "loss": 1.1295,
1015
+ "step": 138
1016
+ },
1017
+ {
1018
+ "epoch": 0.896774193548387,
1019
+ "grad_norm": 0.3256830351093455,
1020
+ "learning_rate": 6.712710518496049e-06,
1021
+ "loss": 1.2506,
1022
+ "step": 139
1023
+ },
1024
+ {
1025
+ "epoch": 0.9032258064516129,
1026
+ "grad_norm": 0.4262467981624931,
1027
+ "learning_rate": 6.519374838401997e-06,
1028
+ "loss": 1.1759,
1029
+ "step": 140
1030
+ },
1031
+ {
1032
+ "epoch": 0.9096774193548387,
1033
+ "grad_norm": 0.35503437951993716,
1034
+ "learning_rate": 6.337234079519728e-06,
1035
+ "loss": 1.1777,
1036
+ "step": 141
1037
+ },
1038
+ {
1039
+ "epoch": 0.9161290322580645,
1040
+ "grad_norm": 0.3897540509188695,
1041
+ "learning_rate": 6.166385429203269e-06,
1042
+ "loss": 1.1239,
1043
+ "step": 142
1044
+ },
1045
+ {
1046
+ "epoch": 0.9225806451612903,
1047
+ "grad_norm": 0.36016445939620884,
1048
+ "learning_rate": 6.006920049520701e-06,
1049
+ "loss": 1.2692,
1050
+ "step": 143
1051
+ },
1052
+ {
1053
+ "epoch": 0.9290322580645162,
1054
+ "grad_norm": 0.4413576798023392,
1055
+ "learning_rate": 5.858923028611572e-06,
1056
+ "loss": 1.1879,
1057
+ "step": 144
1058
+ },
1059
+ {
1060
+ "epoch": 0.9354838709677419,
1061
+ "grad_norm": 0.37955599088497055,
1062
+ "learning_rate": 5.722473335285244e-06,
1063
+ "loss": 1.205,
1064
+ "step": 145
1065
+ },
1066
+ {
1067
+ "epoch": 0.9419354838709677,
1068
+ "grad_norm": 0.35919500181972724,
1069
+ "learning_rate": 5.597643776884412e-06,
1070
+ "loss": 1.1617,
1071
+ "step": 146
1072
+ },
1073
+ {
1074
+ "epoch": 0.9483870967741935,
1075
+ "grad_norm": 0.3022686971058462,
1076
+ "learning_rate": 5.4845009604363e-06,
1077
+ "loss": 1.2059,
1078
+ "step": 147
1079
+ },
1080
+ {
1081
+ "epoch": 0.9548387096774194,
1082
+ "grad_norm": 0.30291369490101205,
1083
+ "learning_rate": 5.38310525711221e-06,
1084
+ "loss": 1.2672,
1085
+ "step": 148
1086
+ },
1087
+ {
1088
+ "epoch": 0.9612903225806452,
1089
+ "grad_norm": 0.33599320279905975,
1090
+ "learning_rate": 5.293510770014475e-06,
1091
+ "loss": 1.2755,
1092
+ "step": 149
1093
+ },
1094
+ {
1095
+ "epoch": 0.967741935483871,
1096
+ "grad_norm": 0.2903929279243622,
1097
+ "learning_rate": 5.215765305307886e-06,
1098
+ "loss": 1.1675,
1099
+ "step": 150
1100
+ },
1101
+ {
1102
+ "epoch": 0.9741935483870968,
1103
+ "grad_norm": 0.3305110382050327,
1104
+ "learning_rate": 5.149910346711126e-06,
1105
+ "loss": 1.2342,
1106
+ "step": 151
1107
+ },
1108
+ {
1109
+ "epoch": 0.9806451612903225,
1110
+ "grad_norm": 0.33304378208594904,
1111
+ "learning_rate": 5.095981033361725e-06,
1112
+ "loss": 1.1312,
1113
+ "step": 152
1114
+ },
1115
+ {
1116
+ "epoch": 0.9870967741935484,
1117
+ "grad_norm": 0.3479102720763047,
1118
+ "learning_rate": 5.05400614106637e-06,
1119
+ "loss": 1.1753,
1120
+ "step": 153
1121
+ },
1122
+ {
1123
+ "epoch": 0.9935483870967742,
1124
+ "grad_norm": 0.31384042987234395,
1125
+ "learning_rate": 5.024008066946621e-06,
1126
+ "loss": 1.2077,
1127
+ "step": 154
1128
+ },
1129
+ {
1130
+ "epoch": 1.0,
1131
+ "grad_norm": 0.5248637716000059,
1132
+ "learning_rate": 5.006002817488162e-06,
1133
+ "loss": 1.1639,
1134
+ "step": 155
1135
+ },
1136
+ {
1137
+ "epoch": 1.0064516129032257,
1138
+ "grad_norm": 0.359683648131272,
1139
+ "learning_rate": 5e-06,
1140
+ "loss": 1.2093,
1141
+ "step": 156
1142
  }
1143
  ],
1144
  "logging_steps": 1,
 
1153
  "should_evaluate": false,
1154
  "should_log": false,
1155
  "should_save": true,
1156
+ "should_training_stop": true
1157
  },
1158
  "attributes": {}
1159
  }
1160
  },
1161
+ "total_flos": 319408664739840.0,
1162
  "train_batch_size": 5,
1163
  "trial_name": null,
1164
  "trial_params": null