philschmid HF staff commited on
Commit
747b2be
β€’
1 Parent(s): 3dd94b9

Training in progress, step 2000

Browse files
Files changed (39) hide show
  1. checkpoint-1600/latest +0 -1
  2. {checkpoint-1600 β†’ checkpoint-2000}/config.json +0 -0
  3. {checkpoint-1600 β†’ checkpoint-2000}/generation_config.json +0 -0
  4. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-2000/latest +1 -0
  21. {checkpoint-1600 β†’ checkpoint-2000}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-1600 β†’ checkpoint-2000}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-1600 β†’ checkpoint-2000}/model.safetensors.index.json +0 -0
  24. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_0.pth +0 -0
  25. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_1.pth +0 -0
  26. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_2.pth +0 -0
  27. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_3.pth +0 -0
  28. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_4.pth +0 -0
  29. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_5.pth +0 -0
  30. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_6.pth +0 -0
  31. {checkpoint-1600 β†’ checkpoint-2000}/rng_state_7.pth +0 -0
  32. {checkpoint-1600 β†’ checkpoint-2000}/special_tokens_map.json +0 -0
  33. {checkpoint-1600 β†’ checkpoint-2000}/tokenizer.json +0 -0
  34. {checkpoint-1600 β†’ checkpoint-2000}/tokenizer.model +0 -0
  35. {checkpoint-1600 β†’ checkpoint-2000}/tokenizer_config.json +0 -0
  36. {checkpoint-1600 β†’ checkpoint-2000}/trainer_state.json +243 -3
  37. {checkpoint-1600 β†’ checkpoint-2000}/training_args.bin +0 -0
  38. {checkpoint-1600 β†’ checkpoint-2000}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1600/latest DELETED
@@ -1 +0,0 @@
1
- global_step1600
 
 
{checkpoint-1600 β†’ checkpoint-2000}/config.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/generation_config.json RENAMED
File without changes
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a340884351df9f228d3ba5317543b112e21edc4d1572228a4abd7118e419a6b
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b86230a2ff339a4eed9c61f759f6d801d3fae222c0ffec184fef39b202862127
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27f75017ef2953071a44cde3f1155976fb76dde43b6395726f2683ee1ec2c250
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b8bb19172786edf6574e43a9c4b464efc2951d5c0b65458d07fa6afe9df15f
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdfad233cd4ea82dc4f02a8e5b074984bb6a29a18f6262838ec7b7f1630e0ac8
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f8d3afd87e522208354f159d281125efbbfd8595bf981ba6223257b2944354c
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5919bc8e571e808ff3b2bcb9ad7014597b2ee31e29993555c14491a33d11f095
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c7a4c8faae05cd37eb1a20d114c300a5a013bdda9679c80bebf1fd441dd432
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c354eb1ceeeb29fb3a5f70328aa4850898429dd56b0f49cfb1b15ac4000f975
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a588583656067de4b1e87507cb2c2e1244a005b5443c82f0d75aeaa3760cdfb6
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fead0347839e08434f2eed3165dad3e70691836ea6e9cf64ef56551331997bca
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a237a297019da5c7a00ad5dda55296958bc2c3ecdf3182fc2edda8a27e481a
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ffe9fbb7a126ec39112724337caecc4eae7d8e492d63490d8567816ba07929
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:991dcd9a7d7a82983e239fcf4412f5dc61102f27b8319a599c7d11680903f3a0
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96e21de250ee92a08fe926befd4f33f363a7c9bd7ca8bbb6aef12bce9df04133
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510ff51bd274ede579cc39b4be0069bccd126470cf05c2334d71122857119a8a
3
  size 10107626487
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91599707db75d7b03814882cbcbb8b854875d9e88102c0831463b1e42ed49ab1
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f06d86e91ff84582e5816e499e839bf1d2883228dba79604295f18fd7dc4d81
3
  size 168086
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6831020de473339b10b00488798cd0193af1763454bfa118f482faf07d70a44d
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f902162193bb7dfeb7d489c7c1d9e7ec375c73e0052cf4393daec6d3b6c9c64
3
  size 168086
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddb9abf6f8ac61756727e9fcc585fd5a1a70c1a20ca3eb82c79388ae9fe06779
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a13900136a358589c9e182e7d7ea628457d78ead8ac21e620ba07662be53e5fa
3
  size 168086
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a24b271888c79b57824d2821f84a48dcb6381041a8b0c6c06463ebd491bb032d
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce2c58d0dd9d4730b644313621a4f6f77a9f14d57207002392948064896ba6b
3
  size 168086
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93af96588d797250ee8a47897e3e963aa391dabb815f6714e949496001e60f03
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93cd97fb74752201ba28f0a3e3726ae8e0100f95455685923fea8410ee07442
3
  size 168086
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:288e31b1bddda038ec9427bc5bb7b953b1d58f1e7c11dbbf18e74dc37512d377
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb802fdb973c3617bf87161de7a1023dfeb05b9680bad7d2d12b9f6200cab168
3
  size 168086
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bab9e0ea1b5660b9e2221c0e11c6a20f979a0b1641b3514c45386900d1001699
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8c558e62de075e88c652f7102d1bdc2487792643685bd9e42b6cdbecc59342
3
  size 168086
{checkpoint-1600/global_step1600 β†’ checkpoint-2000/global_step2000}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:122e850bb9d0ec838955d8d4d6eabbb7c4437e56754813e6645e6ab2a32d64c8
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f43a35e30efe0e59291ccc9e8f22e871c034af657186e34c47cc9def82c0d4a
3
  size 168086
checkpoint-2000/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step2000
{checkpoint-1600 β†’ checkpoint-2000}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb8059dad24ddb21134712cb2d7c09e74afdb9c0ad8407d026104190bec0ae3
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b53aa1baeaa6461a12b6d0712d6ab35ff574e6e5104ae96a3c3727e12d90eabc
3
  size 9976576392
{checkpoint-1600 β†’ checkpoint-2000}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf47ea3281e028d797e91e4208dafdd29c9912946218b26b9fc740a42513cda2
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210e45fb0fb9b67984e732f88ec7a9c4718cc45bf8c06ac1085aacf91a12f6e4
3
  size 3500296504
{checkpoint-1600 β†’ checkpoint-2000}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_0.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_1.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_2.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_3.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_4.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_5.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_6.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/rng_state_7.pth RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/tokenizer.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/tokenizer.model RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5333333333333333,
5
- "global_step": 1600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -966,11 +966,251 @@
966
  "learning_rate": 0.0003,
967
  "loss": 0.0885,
968
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969
  }
970
  ],
971
  "max_steps": 3000,
972
  "num_train_epochs": 9223372036854775807,
973
- "total_flos": 670014898176000.0,
974
  "trial_name": null,
975
  "trial_params": null
976
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6666666666666666,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
966
  "learning_rate": 0.0003,
967
  "loss": 0.0885,
968
  "step": 1600
969
+ },
970
+ {
971
+ "epoch": 0.54,
972
+ "learning_rate": 0.0003,
973
+ "loss": 0.0795,
974
+ "step": 1610
975
+ },
976
+ {
977
+ "epoch": 0.54,
978
+ "learning_rate": 0.0003,
979
+ "loss": 0.0843,
980
+ "step": 1620
981
+ },
982
+ {
983
+ "epoch": 0.54,
984
+ "learning_rate": 0.0003,
985
+ "loss": 0.0734,
986
+ "step": 1630
987
+ },
988
+ {
989
+ "epoch": 0.55,
990
+ "learning_rate": 0.0003,
991
+ "loss": 0.0744,
992
+ "step": 1640
993
+ },
994
+ {
995
+ "epoch": 0.55,
996
+ "learning_rate": 0.0003,
997
+ "loss": 0.0794,
998
+ "step": 1650
999
+ },
1000
+ {
1001
+ "epoch": 0.55,
1002
+ "learning_rate": 0.0003,
1003
+ "loss": 0.0708,
1004
+ "step": 1660
1005
+ },
1006
+ {
1007
+ "epoch": 0.56,
1008
+ "learning_rate": 0.0003,
1009
+ "loss": 0.1133,
1010
+ "step": 1670
1011
+ },
1012
+ {
1013
+ "epoch": 0.56,
1014
+ "learning_rate": 0.0003,
1015
+ "loss": 0.1582,
1016
+ "step": 1680
1017
+ },
1018
+ {
1019
+ "epoch": 0.56,
1020
+ "learning_rate": 0.0003,
1021
+ "loss": 0.1106,
1022
+ "step": 1690
1023
+ },
1024
+ {
1025
+ "epoch": 0.57,
1026
+ "learning_rate": 0.0003,
1027
+ "loss": 0.0904,
1028
+ "step": 1700
1029
+ },
1030
+ {
1031
+ "epoch": 0.57,
1032
+ "learning_rate": 0.0003,
1033
+ "loss": 0.0785,
1034
+ "step": 1710
1035
+ },
1036
+ {
1037
+ "epoch": 0.57,
1038
+ "learning_rate": 0.0003,
1039
+ "loss": 0.0747,
1040
+ "step": 1720
1041
+ },
1042
+ {
1043
+ "epoch": 0.58,
1044
+ "learning_rate": 0.0003,
1045
+ "loss": 0.0704,
1046
+ "step": 1730
1047
+ },
1048
+ {
1049
+ "epoch": 0.58,
1050
+ "learning_rate": 0.0003,
1051
+ "loss": 0.0634,
1052
+ "step": 1740
1053
+ },
1054
+ {
1055
+ "epoch": 0.58,
1056
+ "learning_rate": 0.0003,
1057
+ "loss": 0.0629,
1058
+ "step": 1750
1059
+ },
1060
+ {
1061
+ "epoch": 0.59,
1062
+ "learning_rate": 0.0003,
1063
+ "loss": 0.057,
1064
+ "step": 1760
1065
+ },
1066
+ {
1067
+ "epoch": 0.59,
1068
+ "learning_rate": 0.0003,
1069
+ "loss": 0.0563,
1070
+ "step": 1770
1071
+ },
1072
+ {
1073
+ "epoch": 0.59,
1074
+ "learning_rate": 0.0003,
1075
+ "loss": 0.054,
1076
+ "step": 1780
1077
+ },
1078
+ {
1079
+ "epoch": 0.6,
1080
+ "learning_rate": 0.0003,
1081
+ "loss": 0.0532,
1082
+ "step": 1790
1083
+ },
1084
+ {
1085
+ "epoch": 0.6,
1086
+ "learning_rate": 0.0003,
1087
+ "loss": 0.0525,
1088
+ "step": 1800
1089
+ },
1090
+ {
1091
+ "epoch": 0.6,
1092
+ "learning_rate": 0.0003,
1093
+ "loss": 0.0519,
1094
+ "step": 1810
1095
+ },
1096
+ {
1097
+ "epoch": 0.61,
1098
+ "learning_rate": 0.0003,
1099
+ "loss": 0.0478,
1100
+ "step": 1820
1101
+ },
1102
+ {
1103
+ "epoch": 0.61,
1104
+ "learning_rate": 0.0003,
1105
+ "loss": 0.0473,
1106
+ "step": 1830
1107
+ },
1108
+ {
1109
+ "epoch": 0.61,
1110
+ "learning_rate": 0.0003,
1111
+ "loss": 0.0473,
1112
+ "step": 1840
1113
+ },
1114
+ {
1115
+ "epoch": 0.62,
1116
+ "learning_rate": 0.0003,
1117
+ "loss": 0.0492,
1118
+ "step": 1850
1119
+ },
1120
+ {
1121
+ "epoch": 0.62,
1122
+ "learning_rate": 0.0003,
1123
+ "loss": 0.0451,
1124
+ "step": 1860
1125
+ },
1126
+ {
1127
+ "epoch": 0.62,
1128
+ "learning_rate": 0.0003,
1129
+ "loss": 0.0453,
1130
+ "step": 1870
1131
+ },
1132
+ {
1133
+ "epoch": 0.63,
1134
+ "learning_rate": 0.0003,
1135
+ "loss": 0.0439,
1136
+ "step": 1880
1137
+ },
1138
+ {
1139
+ "epoch": 0.63,
1140
+ "learning_rate": 0.0003,
1141
+ "loss": 0.0435,
1142
+ "step": 1890
1143
+ },
1144
+ {
1145
+ "epoch": 0.63,
1146
+ "learning_rate": 0.0003,
1147
+ "loss": 0.0435,
1148
+ "step": 1900
1149
+ },
1150
+ {
1151
+ "epoch": 0.64,
1152
+ "learning_rate": 0.0003,
1153
+ "loss": 0.0412,
1154
+ "step": 1910
1155
+ },
1156
+ {
1157
+ "epoch": 0.64,
1158
+ "learning_rate": 0.0003,
1159
+ "loss": 0.0441,
1160
+ "step": 1920
1161
+ },
1162
+ {
1163
+ "epoch": 0.64,
1164
+ "learning_rate": 0.0003,
1165
+ "loss": 0.04,
1166
+ "step": 1930
1167
+ },
1168
+ {
1169
+ "epoch": 0.65,
1170
+ "learning_rate": 0.0003,
1171
+ "loss": 0.041,
1172
+ "step": 1940
1173
+ },
1174
+ {
1175
+ "epoch": 0.65,
1176
+ "learning_rate": 0.0003,
1177
+ "loss": 0.1391,
1178
+ "step": 1950
1179
+ },
1180
+ {
1181
+ "epoch": 0.65,
1182
+ "learning_rate": 0.0003,
1183
+ "loss": 0.1502,
1184
+ "step": 1960
1185
+ },
1186
+ {
1187
+ "epoch": 0.66,
1188
+ "learning_rate": 0.0003,
1189
+ "loss": 0.0767,
1190
+ "step": 1970
1191
+ },
1192
+ {
1193
+ "epoch": 0.66,
1194
+ "learning_rate": 0.0003,
1195
+ "loss": 0.0563,
1196
+ "step": 1980
1197
+ },
1198
+ {
1199
+ "epoch": 0.66,
1200
+ "learning_rate": 0.0003,
1201
+ "loss": 0.0487,
1202
+ "step": 1990
1203
+ },
1204
+ {
1205
+ "epoch": 0.67,
1206
+ "learning_rate": 0.0003,
1207
+ "loss": 0.042,
1208
+ "step": 2000
1209
  }
1210
  ],
1211
  "max_steps": 3000,
1212
  "num_train_epochs": 9223372036854775807,
1213
+ "total_flos": 837518622720000.0,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
{checkpoint-1600 β†’ checkpoint-2000}/training_args.bin RENAMED
File without changes
{checkpoint-1600 β†’ checkpoint-2000}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1542e5f72a0deb50ea56f278bcbfb04cc7ef3686cad25fabc136934d1b2ae0e6
3
- size 33981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:140ad3bdd050ce33f869362645d2fe35a1d764c22645cccd1674c2d206ead24c
3
+ size 35551