philschmid HF staff commited on
Commit
a10d0a3
β€’
1 Parent(s): 73f4306

Training in progress, step 2200

Browse files
Files changed (39) hide show
  1. checkpoint-1800/latest +0 -1
  2. {checkpoint-1800 β†’ checkpoint-2200}/config.json +0 -0
  3. {checkpoint-1800 β†’ checkpoint-2200}/generation_config.json +0 -0
  4. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-2200/latest +1 -0
  21. {checkpoint-1800 β†’ checkpoint-2200}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-1800 β†’ checkpoint-2200}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-1800 β†’ checkpoint-2200}/model.safetensors.index.json +0 -0
  24. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_0.pth +0 -0
  25. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_1.pth +0 -0
  26. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_2.pth +0 -0
  27. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_3.pth +0 -0
  28. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_4.pth +0 -0
  29. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_5.pth +0 -0
  30. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_6.pth +0 -0
  31. {checkpoint-1800 β†’ checkpoint-2200}/rng_state_7.pth +0 -0
  32. {checkpoint-1800 β†’ checkpoint-2200}/special_tokens_map.json +0 -0
  33. {checkpoint-1800 β†’ checkpoint-2200}/tokenizer.json +0 -0
  34. {checkpoint-1800 β†’ checkpoint-2200}/tokenizer.model +0 -0
  35. {checkpoint-1800 β†’ checkpoint-2200}/tokenizer_config.json +0 -0
  36. {checkpoint-1800 β†’ checkpoint-2200}/trainer_state.json +243 -3
  37. {checkpoint-1800 β†’ checkpoint-2200}/training_args.bin +0 -0
  38. {checkpoint-1800 β†’ checkpoint-2200}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1800/latest DELETED
@@ -1 +0,0 @@
1
- global_step1800
 
 
{checkpoint-1800 β†’ checkpoint-2200}/config.json RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/generation_config.json RENAMED
File without changes
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65ac051edb6a7231e7ff80bf7b6ff872456d98c899aa44c7f0de57ec100e241a
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3de1f6ed0c91905320e4f3d6eb8149a7321097c6e422a5e66e96dd3ead20bb
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f63f4158c79b9097827e52efe21c448978712c950ba30906df8d4e81779f0705
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6049942a6346ee62050a04a487e2ff6996de4173c7e6709642eecd59ca8846
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a06a42dbae3be95f0f13a0c6a7e3d8fff63f06a1b88e67198a327c7c3f2b4fdf
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c785db79058e106e14d7f10c31104d1b15877f6f2dad07cffa86889822ae089
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d439a4859cf52b20fb93dc23b4dba8c8083d3d58b8bd2b35129a82e4f9847f
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ef7caf982c837d86319b4809266bfe0034dfaac1fcaab8707e65f30a77434b
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56e9a0ca5c754b3520440fc3506fe5a4e46f2a2eade8194719f6c07e5ee7b32e
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c7d71042fe43a020fe99829afa84900dd6b64aacf374a041aa004d70782d3ed
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea9a156039d51fadfd2736deea73e96bf8dfb0dbf11a6c3b10eb075a358c9b8f
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96a6f35eb2a5d74873f0d52cf6d8e3ee340a8cd41bb6876c29e3f05076efdef6
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:103ef36796553ded7aa4fea4a7e84fb84384939f9f20cda0a94ecf5087a56b7d
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb589add5394037a80e2aa7ad1684a3712834d5ec6f3e01faf0c7d59e847b8f8
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:478d4c670c95787a5e189a35cf100f0c722538933effcc1efdf43d7a3a79ec87
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1df2b451f5442f92b20ec0c6aa921c45b654d615d6258d8b20a775c914db58
3
  size 10107626487
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a3559441b3540dc1b5a49a8fb7253c34731a3906e17ecc6f79b18ee56a4ba0e
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5beb79e0c17f4883e73fc1d86c62e293d253a4b710f34bdf075ea8d3c0948f
3
  size 168086
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c7f165ac77d9d755737a9b47099cbc30144ddc12bbdf2a30877c884fe0aa613
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5292ecd35a4d6b299ce503f942577436b9490e59d5037c6e7d934eedbb1ff589
3
  size 168086
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d7943fb9f5e3008adcd3ed98b51af946f70d290a7337c4ffbe0eb7a9b97be6c
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524b59f333eff994650fc2adca7b3a3643aacb6d7c3c6dc093db0c146d03f489
3
  size 168086
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:734fbce19436e77d545003a57bdb0fcc5fbd70f67630be95f23d1f26b813fc8b
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c0eb77991260ad958007ca687b20a0afd9d26b1fed86c679ca1358dd3d2d8b
3
  size 168086
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb7a3c3a029b300ee81735e4b5d95db67561d27acbcc726de134c1749f3d5442
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b45f67a5c87020b1de5c66b0f84170840ae2bc56c3c0100ed1555919ac3cac
3
  size 168086
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afac81b2fa8bcebadbac9d41f1d41c618419c5aa49f521a8b868d88affb1b81b
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8913e88466a2a0af695b1a2df7aabd5441bd4f010bbb7ae1395d71c3971dc259
3
  size 168086
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c36b334041e232c5d22af305a776884cf9a86554d427f772411f35d878d40e0
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c46c8008a47e8e9afa844bf69d37341e7113bf2c75914e1ef052c526b2e95fd9
3
  size 168086
{checkpoint-1800/global_step1800 β†’ checkpoint-2200/global_step2200}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4741731558644f602064546705ffa83f9879c68a3314de93ea623d611af243e5
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8879c52948ce81ce49a3864313623dd93244d519b021a17b8f9b0dab3f3bec72
3
  size 168086
checkpoint-2200/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step2200
{checkpoint-1800 β†’ checkpoint-2200}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8b8d03a8fe3e6b5c85db79b65e4586478a90485d98555e56952666cdae15e5
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d5e4fe559d56cec1b1fa412e29b6a90bbf8a69fb1db74a0c739a225f6b302f
3
  size 9976576392
{checkpoint-1800 β†’ checkpoint-2200}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad1f107946442c1ba1bd23d3ab4b3b2906ea95617d47c6b2f8fe6b989084b50d
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c31f01aa8839468adecb9155e8925979cd25a6c002a11bac6b7027931452e12
3
  size 3500296504
{checkpoint-1800 β†’ checkpoint-2200}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_0.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_1.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_2.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_3.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_4.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_5.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_6.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/rng_state_7.pth RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/tokenizer.json RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/tokenizer.model RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6,
5
- "global_step": 1800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1086,11 +1086,251 @@
1086
  "learning_rate": 0.0003,
1087
  "loss": 0.0525,
1088
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1089
  }
1090
  ],
1091
  "max_steps": 3000,
1092
  "num_train_epochs": 9223372036854775807,
1093
- "total_flos": 753766760448000.0,
1094
  "trial_name": null,
1095
  "trial_params": null
1096
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7333333333333333,
5
+ "global_step": 2200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1086
  "learning_rate": 0.0003,
1087
  "loss": 0.0525,
1088
  "step": 1800
1089
+ },
1090
+ {
1091
+ "epoch": 0.6,
1092
+ "learning_rate": 0.0003,
1093
+ "loss": 0.0519,
1094
+ "step": 1810
1095
+ },
1096
+ {
1097
+ "epoch": 0.61,
1098
+ "learning_rate": 0.0003,
1099
+ "loss": 0.0478,
1100
+ "step": 1820
1101
+ },
1102
+ {
1103
+ "epoch": 0.61,
1104
+ "learning_rate": 0.0003,
1105
+ "loss": 0.0473,
1106
+ "step": 1830
1107
+ },
1108
+ {
1109
+ "epoch": 0.61,
1110
+ "learning_rate": 0.0003,
1111
+ "loss": 0.0473,
1112
+ "step": 1840
1113
+ },
1114
+ {
1115
+ "epoch": 0.62,
1116
+ "learning_rate": 0.0003,
1117
+ "loss": 0.0492,
1118
+ "step": 1850
1119
+ },
1120
+ {
1121
+ "epoch": 0.62,
1122
+ "learning_rate": 0.0003,
1123
+ "loss": 0.0451,
1124
+ "step": 1860
1125
+ },
1126
+ {
1127
+ "epoch": 0.62,
1128
+ "learning_rate": 0.0003,
1129
+ "loss": 0.0453,
1130
+ "step": 1870
1131
+ },
1132
+ {
1133
+ "epoch": 0.63,
1134
+ "learning_rate": 0.0003,
1135
+ "loss": 0.0439,
1136
+ "step": 1880
1137
+ },
1138
+ {
1139
+ "epoch": 0.63,
1140
+ "learning_rate": 0.0003,
1141
+ "loss": 0.0435,
1142
+ "step": 1890
1143
+ },
1144
+ {
1145
+ "epoch": 0.63,
1146
+ "learning_rate": 0.0003,
1147
+ "loss": 0.0435,
1148
+ "step": 1900
1149
+ },
1150
+ {
1151
+ "epoch": 0.64,
1152
+ "learning_rate": 0.0003,
1153
+ "loss": 0.0412,
1154
+ "step": 1910
1155
+ },
1156
+ {
1157
+ "epoch": 0.64,
1158
+ "learning_rate": 0.0003,
1159
+ "loss": 0.0441,
1160
+ "step": 1920
1161
+ },
1162
+ {
1163
+ "epoch": 0.64,
1164
+ "learning_rate": 0.0003,
1165
+ "loss": 0.04,
1166
+ "step": 1930
1167
+ },
1168
+ {
1169
+ "epoch": 0.65,
1170
+ "learning_rate": 0.0003,
1171
+ "loss": 0.041,
1172
+ "step": 1940
1173
+ },
1174
+ {
1175
+ "epoch": 0.65,
1176
+ "learning_rate": 0.0003,
1177
+ "loss": 0.1391,
1178
+ "step": 1950
1179
+ },
1180
+ {
1181
+ "epoch": 0.65,
1182
+ "learning_rate": 0.0003,
1183
+ "loss": 0.1502,
1184
+ "step": 1960
1185
+ },
1186
+ {
1187
+ "epoch": 0.66,
1188
+ "learning_rate": 0.0003,
1189
+ "loss": 0.0767,
1190
+ "step": 1970
1191
+ },
1192
+ {
1193
+ "epoch": 0.66,
1194
+ "learning_rate": 0.0003,
1195
+ "loss": 0.0563,
1196
+ "step": 1980
1197
+ },
1198
+ {
1199
+ "epoch": 0.66,
1200
+ "learning_rate": 0.0003,
1201
+ "loss": 0.0487,
1202
+ "step": 1990
1203
+ },
1204
+ {
1205
+ "epoch": 0.67,
1206
+ "learning_rate": 0.0003,
1207
+ "loss": 0.042,
1208
+ "step": 2000
1209
+ },
1210
+ {
1211
+ "epoch": 0.67,
1212
+ "learning_rate": 0.0003,
1213
+ "loss": 0.0388,
1214
+ "step": 2010
1215
+ },
1216
+ {
1217
+ "epoch": 0.67,
1218
+ "learning_rate": 0.0003,
1219
+ "loss": 0.0424,
1220
+ "step": 2020
1221
+ },
1222
+ {
1223
+ "epoch": 0.68,
1224
+ "learning_rate": 0.0003,
1225
+ "loss": 0.0413,
1226
+ "step": 2030
1227
+ },
1228
+ {
1229
+ "epoch": 0.68,
1230
+ "learning_rate": 0.0003,
1231
+ "loss": 0.036,
1232
+ "step": 2040
1233
+ },
1234
+ {
1235
+ "epoch": 0.68,
1236
+ "learning_rate": 0.0003,
1237
+ "loss": 0.0333,
1238
+ "step": 2050
1239
+ },
1240
+ {
1241
+ "epoch": 0.69,
1242
+ "learning_rate": 0.0003,
1243
+ "loss": 0.0334,
1244
+ "step": 2060
1245
+ },
1246
+ {
1247
+ "epoch": 0.69,
1248
+ "learning_rate": 0.0003,
1249
+ "loss": 0.0298,
1250
+ "step": 2070
1251
+ },
1252
+ {
1253
+ "epoch": 0.69,
1254
+ "learning_rate": 0.0003,
1255
+ "loss": 0.0302,
1256
+ "step": 2080
1257
+ },
1258
+ {
1259
+ "epoch": 0.7,
1260
+ "learning_rate": 0.0003,
1261
+ "loss": 0.0302,
1262
+ "step": 2090
1263
+ },
1264
+ {
1265
+ "epoch": 0.7,
1266
+ "learning_rate": 0.0003,
1267
+ "loss": 0.0293,
1268
+ "step": 2100
1269
+ },
1270
+ {
1271
+ "epoch": 0.7,
1272
+ "learning_rate": 0.0003,
1273
+ "loss": 0.0286,
1274
+ "step": 2110
1275
+ },
1276
+ {
1277
+ "epoch": 0.71,
1278
+ "learning_rate": 0.0003,
1279
+ "loss": 0.0279,
1280
+ "step": 2120
1281
+ },
1282
+ {
1283
+ "epoch": 0.71,
1284
+ "learning_rate": 0.0003,
1285
+ "loss": 0.0258,
1286
+ "step": 2130
1287
+ },
1288
+ {
1289
+ "epoch": 0.71,
1290
+ "learning_rate": 0.0003,
1291
+ "loss": 0.0259,
1292
+ "step": 2140
1293
+ },
1294
+ {
1295
+ "epoch": 0.72,
1296
+ "learning_rate": 0.0003,
1297
+ "loss": 0.0273,
1298
+ "step": 2150
1299
+ },
1300
+ {
1301
+ "epoch": 0.72,
1302
+ "learning_rate": 0.0003,
1303
+ "loss": 0.0253,
1304
+ "step": 2160
1305
+ },
1306
+ {
1307
+ "epoch": 0.72,
1308
+ "learning_rate": 0.0003,
1309
+ "loss": 0.0247,
1310
+ "step": 2170
1311
+ },
1312
+ {
1313
+ "epoch": 0.73,
1314
+ "learning_rate": 0.0003,
1315
+ "loss": 0.0237,
1316
+ "step": 2180
1317
+ },
1318
+ {
1319
+ "epoch": 0.73,
1320
+ "learning_rate": 0.0003,
1321
+ "loss": 0.0231,
1322
+ "step": 2190
1323
+ },
1324
+ {
1325
+ "epoch": 0.73,
1326
+ "learning_rate": 0.0003,
1327
+ "loss": 0.0213,
1328
+ "step": 2200
1329
  }
1330
  ],
1331
  "max_steps": 3000,
1332
  "num_train_epochs": 9223372036854775807,
1333
+ "total_flos": 921270484992000.0,
1334
  "trial_name": null,
1335
  "trial_params": null
1336
  }
{checkpoint-1800 β†’ checkpoint-2200}/training_args.bin RENAMED
File without changes
{checkpoint-1800 β†’ checkpoint-2200}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3476f29a411313ed060c01b35f1409f573b4dc1eb8ce083996033c57780434b
3
- size 37121
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a541254ae717180ce583645ab2027b88252e5c9429179141c869f80155d67f8
3
+ size 38691