philschmid HF staff commited on
Commit
73f4306
β€’
1 Parent(s): 747b2be

Training in progress, step 2100

Browse files
Files changed (39) hide show
  1. checkpoint-1700/latest +0 -1
  2. {checkpoint-1700 β†’ checkpoint-2100}/config.json +0 -0
  3. {checkpoint-1700 β†’ checkpoint-2100}/generation_config.json +0 -0
  4. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-2100/latest +1 -0
  21. {checkpoint-1700 β†’ checkpoint-2100}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-1700 β†’ checkpoint-2100}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-1700 β†’ checkpoint-2100}/model.safetensors.index.json +0 -0
  24. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_0.pth +0 -0
  25. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_1.pth +0 -0
  26. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_2.pth +0 -0
  27. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_3.pth +0 -0
  28. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_4.pth +0 -0
  29. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_5.pth +0 -0
  30. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_6.pth +0 -0
  31. {checkpoint-1700 β†’ checkpoint-2100}/rng_state_7.pth +0 -0
  32. {checkpoint-1700 β†’ checkpoint-2100}/special_tokens_map.json +0 -0
  33. {checkpoint-1700 β†’ checkpoint-2100}/tokenizer.json +0 -0
  34. {checkpoint-1700 β†’ checkpoint-2100}/tokenizer.model +0 -0
  35. {checkpoint-1700 β†’ checkpoint-2100}/tokenizer_config.json +0 -0
  36. {checkpoint-1700 β†’ checkpoint-2100}/trainer_state.json +243 -3
  37. {checkpoint-1700 β†’ checkpoint-2100}/training_args.bin +0 -0
  38. {checkpoint-1700 β†’ checkpoint-2100}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1700/latest DELETED
@@ -1 +0,0 @@
1
- global_step1700
 
 
{checkpoint-1700 β†’ checkpoint-2100}/config.json RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/generation_config.json RENAMED
File without changes
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:739358ce4d0db4ffe39ed9682bd0746d64e877475f82b11cc197a2d1776a95bf
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e17adae64d158ed7b80018b62d4f64f76a717739f3667ab868bd08b5546e36
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eddeb7918d9da8d945eff4226bad6433fb778ad8ac595d07ec9e6b727cc1fc88
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90103633ebee5c418c077e4b0d6fc4c2936161788ab990a5b63b49058bf980cd
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be571f757a3b36b7770415b18d750383e66b8f61adee886e8fd35b9f511d00a5
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac402f4e9dfd0a9cdeedc219939c8d2854a425000535a845a391b5a61be5fa21
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f82328ed0b0f3007255807c4af4a3f22fc4a6160d94d1fcab8c2a6cf33febf
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0587a206f02896afd81610085e44cc69b3fd43d70cc64144dda25c7471d6c721
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38205fee9c0d3d5fe1f62fed51614a5f8494d171cbf3a9980100ae29b0455e2f
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d431dee2fe9c6c5124ad116f289b7ae04ae192823760eb75d95eb02f500ec778
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:494e2e6979df9d5a6e1c501ca19a5d391eb72b5b7e453ef352b99c95af7e30c0
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de5485088c9b41156808d8346e4617be253fea5de5cd5d4b91f71a26e2e24ea1
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acd19a1452e5c563fdb4431f7f48bbae00d8854ca2069793f4fbc0beceda5cf1
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eed860602ba38c80b647d3f2412ee1d6b4778dbfce7131878373bbfbacb187c
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e74f03870306d16790aa672096848730b1af5bba987a74a58a386247c7d46d0a
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a484f7976af13384e667021a7316ea630924e973e29a67120db984ff15076c2
3
  size 10107626487
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b0de3709868caed6aec7042aa13cc5ff5a7cdecddaf21ef01d70778485299e0
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11aac4363e618fa55c62977722ae406ff69bb54c12e53e2fdeffa5f1632855c8
3
  size 168086
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:289994d8f0d45c07b722e692498d3d2b48d42f6d5773734bc0b1427168bcb03c
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dfa5bde2a410bd133271a27c7be77ad69ce3015fcdc26b7f58ff280eea1d359
3
  size 168086
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:580fb5ca74492c6b71ff5b8bce34f5d31d862c3f96df7887c8a48b3f6241d58d
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2acccdd6e28633f3c199afe57b8d9b80e0d128bae92176d2309e4102e3de50e0
3
  size 168086
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa2370bff44b992e28f59a7625c475f616252f1a45d67c27bf62dfef50a94e25
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ce5abe052a95d79d00ceac0272d90efa7b18a68a220d8345df706b98c600cf1
3
  size 168086
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:122914f6c795eec77e2d9459857b6ba0d3f66a9e407f4ac2889abd3c8353699a
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a2ce519ef75a9502353294f95984f83b8c1473ae7ea64a1828949d6d3e0e41
3
  size 168086
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95c4796941ff69c9089277ccc210ed66a53aa2aa27fd08592f1477f03ff51105
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851aceef9941342286cbd797e3b61362e8d088deabe520a249a37e2be273cc80
3
  size 168086
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d83660c9172cbdd43bb2f7680c46745082730200dd6e9ca742ce0059c3df8ee
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc4e5fa62b4b71b517895a983df2fc1b618aef52984e9caa8fe7898198a2e10
3
  size 168086
{checkpoint-1700/global_step1700 β†’ checkpoint-2100/global_step2100}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31494943d94985534fd92f9d7ee175a61321dda3900fc4acafdd4b55c8335f81
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40d7b508c8727eadc3ef7115d9b998796e9f503c498fdea49536a01996484fe
3
  size 168086
checkpoint-2100/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step2100
{checkpoint-1700 β†’ checkpoint-2100}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e03a32d4279f008ea227183bce3bea02bdc787efdefe57a690ce1bce74dce1e1
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6adc6b1233e38d5ffa6e2dd4bc49283f9ba67f0e6796f4957275ba8eac5007fa
3
  size 9976576392
{checkpoint-1700 β†’ checkpoint-2100}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ff5280ad459b4473324fb3ecebb46301b963d7f24d4e3457da984689305500
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb05fce5d6454ad204985cd2264d190114a5a59a137bd596e9372925ffd56913
3
  size 3500296504
{checkpoint-1700 β†’ checkpoint-2100}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_0.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_1.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_2.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_3.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_4.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_5.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_6.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/rng_state_7.pth RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/special_tokens_map.json RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/tokenizer.json RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/tokenizer.model RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/tokenizer_config.json RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5666666666666667,
5
- "global_step": 1700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1026,11 +1026,251 @@
1026
  "learning_rate": 0.0003,
1027
  "loss": 0.0904,
1028
  "step": 1700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
  }
1030
  ],
1031
  "max_steps": 3000,
1032
  "num_train_epochs": 9223372036854775807,
1033
- "total_flos": 711890829312000.0,
1034
  "trial_name": null,
1035
  "trial_params": null
1036
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7,
5
+ "global_step": 2100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1026
  "learning_rate": 0.0003,
1027
  "loss": 0.0904,
1028
  "step": 1700
1029
+ },
1030
+ {
1031
+ "epoch": 0.57,
1032
+ "learning_rate": 0.0003,
1033
+ "loss": 0.0785,
1034
+ "step": 1710
1035
+ },
1036
+ {
1037
+ "epoch": 0.57,
1038
+ "learning_rate": 0.0003,
1039
+ "loss": 0.0747,
1040
+ "step": 1720
1041
+ },
1042
+ {
1043
+ "epoch": 0.58,
1044
+ "learning_rate": 0.0003,
1045
+ "loss": 0.0704,
1046
+ "step": 1730
1047
+ },
1048
+ {
1049
+ "epoch": 0.58,
1050
+ "learning_rate": 0.0003,
1051
+ "loss": 0.0634,
1052
+ "step": 1740
1053
+ },
1054
+ {
1055
+ "epoch": 0.58,
1056
+ "learning_rate": 0.0003,
1057
+ "loss": 0.0629,
1058
+ "step": 1750
1059
+ },
1060
+ {
1061
+ "epoch": 0.59,
1062
+ "learning_rate": 0.0003,
1063
+ "loss": 0.057,
1064
+ "step": 1760
1065
+ },
1066
+ {
1067
+ "epoch": 0.59,
1068
+ "learning_rate": 0.0003,
1069
+ "loss": 0.0563,
1070
+ "step": 1770
1071
+ },
1072
+ {
1073
+ "epoch": 0.59,
1074
+ "learning_rate": 0.0003,
1075
+ "loss": 0.054,
1076
+ "step": 1780
1077
+ },
1078
+ {
1079
+ "epoch": 0.6,
1080
+ "learning_rate": 0.0003,
1081
+ "loss": 0.0532,
1082
+ "step": 1790
1083
+ },
1084
+ {
1085
+ "epoch": 0.6,
1086
+ "learning_rate": 0.0003,
1087
+ "loss": 0.0525,
1088
+ "step": 1800
1089
+ },
1090
+ {
1091
+ "epoch": 0.6,
1092
+ "learning_rate": 0.0003,
1093
+ "loss": 0.0519,
1094
+ "step": 1810
1095
+ },
1096
+ {
1097
+ "epoch": 0.61,
1098
+ "learning_rate": 0.0003,
1099
+ "loss": 0.0478,
1100
+ "step": 1820
1101
+ },
1102
+ {
1103
+ "epoch": 0.61,
1104
+ "learning_rate": 0.0003,
1105
+ "loss": 0.0473,
1106
+ "step": 1830
1107
+ },
1108
+ {
1109
+ "epoch": 0.61,
1110
+ "learning_rate": 0.0003,
1111
+ "loss": 0.0473,
1112
+ "step": 1840
1113
+ },
1114
+ {
1115
+ "epoch": 0.62,
1116
+ "learning_rate": 0.0003,
1117
+ "loss": 0.0492,
1118
+ "step": 1850
1119
+ },
1120
+ {
1121
+ "epoch": 0.62,
1122
+ "learning_rate": 0.0003,
1123
+ "loss": 0.0451,
1124
+ "step": 1860
1125
+ },
1126
+ {
1127
+ "epoch": 0.62,
1128
+ "learning_rate": 0.0003,
1129
+ "loss": 0.0453,
1130
+ "step": 1870
1131
+ },
1132
+ {
1133
+ "epoch": 0.63,
1134
+ "learning_rate": 0.0003,
1135
+ "loss": 0.0439,
1136
+ "step": 1880
1137
+ },
1138
+ {
1139
+ "epoch": 0.63,
1140
+ "learning_rate": 0.0003,
1141
+ "loss": 0.0435,
1142
+ "step": 1890
1143
+ },
1144
+ {
1145
+ "epoch": 0.63,
1146
+ "learning_rate": 0.0003,
1147
+ "loss": 0.0435,
1148
+ "step": 1900
1149
+ },
1150
+ {
1151
+ "epoch": 0.64,
1152
+ "learning_rate": 0.0003,
1153
+ "loss": 0.0412,
1154
+ "step": 1910
1155
+ },
1156
+ {
1157
+ "epoch": 0.64,
1158
+ "learning_rate": 0.0003,
1159
+ "loss": 0.0441,
1160
+ "step": 1920
1161
+ },
1162
+ {
1163
+ "epoch": 0.64,
1164
+ "learning_rate": 0.0003,
1165
+ "loss": 0.04,
1166
+ "step": 1930
1167
+ },
1168
+ {
1169
+ "epoch": 0.65,
1170
+ "learning_rate": 0.0003,
1171
+ "loss": 0.041,
1172
+ "step": 1940
1173
+ },
1174
+ {
1175
+ "epoch": 0.65,
1176
+ "learning_rate": 0.0003,
1177
+ "loss": 0.1391,
1178
+ "step": 1950
1179
+ },
1180
+ {
1181
+ "epoch": 0.65,
1182
+ "learning_rate": 0.0003,
1183
+ "loss": 0.1502,
1184
+ "step": 1960
1185
+ },
1186
+ {
1187
+ "epoch": 0.66,
1188
+ "learning_rate": 0.0003,
1189
+ "loss": 0.0767,
1190
+ "step": 1970
1191
+ },
1192
+ {
1193
+ "epoch": 0.66,
1194
+ "learning_rate": 0.0003,
1195
+ "loss": 0.0563,
1196
+ "step": 1980
1197
+ },
1198
+ {
1199
+ "epoch": 0.66,
1200
+ "learning_rate": 0.0003,
1201
+ "loss": 0.0487,
1202
+ "step": 1990
1203
+ },
1204
+ {
1205
+ "epoch": 0.67,
1206
+ "learning_rate": 0.0003,
1207
+ "loss": 0.042,
1208
+ "step": 2000
1209
+ },
1210
+ {
1211
+ "epoch": 0.67,
1212
+ "learning_rate": 0.0003,
1213
+ "loss": 0.0388,
1214
+ "step": 2010
1215
+ },
1216
+ {
1217
+ "epoch": 0.67,
1218
+ "learning_rate": 0.0003,
1219
+ "loss": 0.0424,
1220
+ "step": 2020
1221
+ },
1222
+ {
1223
+ "epoch": 0.68,
1224
+ "learning_rate": 0.0003,
1225
+ "loss": 0.0413,
1226
+ "step": 2030
1227
+ },
1228
+ {
1229
+ "epoch": 0.68,
1230
+ "learning_rate": 0.0003,
1231
+ "loss": 0.036,
1232
+ "step": 2040
1233
+ },
1234
+ {
1235
+ "epoch": 0.68,
1236
+ "learning_rate": 0.0003,
1237
+ "loss": 0.0333,
1238
+ "step": 2050
1239
+ },
1240
+ {
1241
+ "epoch": 0.69,
1242
+ "learning_rate": 0.0003,
1243
+ "loss": 0.0334,
1244
+ "step": 2060
1245
+ },
1246
+ {
1247
+ "epoch": 0.69,
1248
+ "learning_rate": 0.0003,
1249
+ "loss": 0.0298,
1250
+ "step": 2070
1251
+ },
1252
+ {
1253
+ "epoch": 0.69,
1254
+ "learning_rate": 0.0003,
1255
+ "loss": 0.0302,
1256
+ "step": 2080
1257
+ },
1258
+ {
1259
+ "epoch": 0.7,
1260
+ "learning_rate": 0.0003,
1261
+ "loss": 0.0302,
1262
+ "step": 2090
1263
+ },
1264
+ {
1265
+ "epoch": 0.7,
1266
+ "learning_rate": 0.0003,
1267
+ "loss": 0.0293,
1268
+ "step": 2100
1269
  }
1270
  ],
1271
  "max_steps": 3000,
1272
  "num_train_epochs": 9223372036854775807,
1273
+ "total_flos": 879394553856000.0,
1274
  "trial_name": null,
1275
  "trial_params": null
1276
  }
{checkpoint-1700 β†’ checkpoint-2100}/training_args.bin RENAMED
File without changes
{checkpoint-1700 β†’ checkpoint-2100}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:140ad3bdd050ce33f869362645d2fe35a1d764c22645cccd1674c2d206ead24c
3
- size 35551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3476f29a411313ed060c01b35f1409f573b4dc1eb8ce083996033c57780434b
3
+ size 37121