philschmid HF staff commited on
Commit
8560e14
β€’
1 Parent(s): 6a12802

Training in progress, step 500

Browse files
Files changed (40) hide show
  1. checkpoint-100/latest +0 -1
  2. checkpoint-100/trainer_state.json +0 -76
  3. {checkpoint-100 β†’ checkpoint-500}/config.json +0 -0
  4. {checkpoint-100 β†’ checkpoint-500}/generation_config.json +0 -0
  5. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  13. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  20. {checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  21. checkpoint-500/latest +1 -0
  22. {checkpoint-100 β†’ checkpoint-500}/model-00001-of-00002.safetensors +1 -1
  23. {checkpoint-100 β†’ checkpoint-500}/model-00002-of-00002.safetensors +1 -1
  24. {checkpoint-100 β†’ checkpoint-500}/model.safetensors.index.json +0 -0
  25. {checkpoint-100 β†’ checkpoint-500}/rng_state_0.pth +0 -0
  26. {checkpoint-100 β†’ checkpoint-500}/rng_state_1.pth +0 -0
  27. {checkpoint-100 β†’ checkpoint-500}/rng_state_2.pth +0 -0
  28. {checkpoint-100 β†’ checkpoint-500}/rng_state_3.pth +0 -0
  29. {checkpoint-100 β†’ checkpoint-500}/rng_state_4.pth +0 -0
  30. {checkpoint-100 β†’ checkpoint-500}/rng_state_5.pth +0 -0
  31. {checkpoint-100 β†’ checkpoint-500}/rng_state_6.pth +0 -0
  32. {checkpoint-100 β†’ checkpoint-500}/rng_state_7.pth +0 -0
  33. {checkpoint-100 β†’ checkpoint-500}/special_tokens_map.json +0 -0
  34. {checkpoint-100 β†’ checkpoint-500}/tokenizer.json +0 -0
  35. {checkpoint-100 β†’ checkpoint-500}/tokenizer.model +0 -0
  36. {checkpoint-100 β†’ checkpoint-500}/tokenizer_config.json +0 -0
  37. checkpoint-500/trainer_state.json +316 -0
  38. {checkpoint-100 β†’ checkpoint-500}/training_args.bin +0 -0
  39. {checkpoint-100 β†’ checkpoint-500}/zero_to_fp32.py +0 -0
  40. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-100/latest DELETED
@@ -1 +0,0 @@
1
- global_step100
 
 
checkpoint-100/trainer_state.json DELETED
@@ -1,76 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.03333333333333333,
5
- "global_step": 100,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 0.0003,
13
- "loss": 9.4994,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.01,
18
- "learning_rate": 0.0003,
19
- "loss": 7.7223,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.01,
24
- "learning_rate": 0.0003,
25
- "loss": 7.3713,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.01,
30
- "learning_rate": 0.0003,
31
- "loss": 7.2695,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.02,
36
- "learning_rate": 0.0003,
37
- "loss": 7.2811,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.02,
42
- "learning_rate": 0.0003,
43
- "loss": 7.2291,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.02,
48
- "learning_rate": 0.0003,
49
- "loss": 7.1263,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.03,
54
- "learning_rate": 0.0003,
55
- "loss": 7.0881,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.03,
60
- "learning_rate": 0.0003,
61
- "loss": 6.5989,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.03,
66
- "learning_rate": 0.0003,
67
- "loss": 6.0091,
68
- "step": 100
69
- }
70
- ],
71
- "max_steps": 3000,
72
- "num_train_epochs": 9223372036854775807,
73
- "total_flos": 41875931136000.0,
74
- "trial_name": null,
75
- "trial_params": null
76
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{checkpoint-100 β†’ checkpoint-500}/config.json RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/generation_config.json RENAMED
File without changes
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fe1a55e447470a69df52a08a8a6c31e9a156978a4ba2a625c8a30702b84488d
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:784c0410b2a330cc433f981f1f5121bebabb1f3c1d781da96b8ef18c2e0db0c5
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd87bf4c0c7c127cd0eb3ca94ae8ce5f8df7c07968f2625452f262034d79b04
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7a186e1928203b48ef1c8148819f63ee35e5e289f3d284193685d064f813fb
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38e715b0c78de79c8c4274d18035541dc36006f12d7eb04db4fb4d051be9c65a
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d6ee17959a269d6d3e3b64d1b432370d3b086eb26b333e9a61a532db3253be
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c4a28cf894bb5c203707bb5f458ca5180f20e561bf60496dac39b246ec8ab60
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c55b861ec085a2534503a9e5de7134d033ee8cb3625c35db2f3528370ae95c8d
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d812020511b6b6863b40a143452d94b2d9019f6bae0b386cfce3bdc9a8b522b5
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c905c2c37c0f2f94f4a5ae2d10d79032961adea61c10e1cbb2614f6e20d67fda
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:074a945d31ce70971139ebc0522d768a0e93721d8c011cac7f0a70070cd32ede
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4824286c51d845b36a99e1044983ef94a086ad5d007203581ec617fd89edb112
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95eb8c9c22f6e04c73e889c9064eaf020d163a78b9d9bfbde0fddb0d1e3c77cd
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70ae89087b135570ef769d71356ac53cb1162eeecd4e647c2ea4069b8e19094b
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41217fdfa05706b0ba5ec07a6668750369d829aa48488053279099d2c42dda93
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83612a4a188eb3093e946a69b95f9a0c3ddbdfe58f70767b69309388acb7cc83
3
  size 10107626487
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c132881e587ed89040fabcb6e0487cda037e7e63cc0e402e2256ac72d9df210
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f31bdab7ff163280101e1923f64561cf4a6d974c46612977fe58c68ac04e22
3
  size 168086
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bfaad6c2340d432686efd4237fadcfea88c5e8917fc21b82322ef3c50fb33aa
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3129eceffca8659953e6eac5b948129a0984740d232358cc99e9dab7d4426c27
3
  size 168086
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e8660f1738925a1b5009f1c37b11854cf879e34ece11e4da2b8a18ccc95dec9
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251ecd563c2937490b8ba62da01d1c0bdd9597f24d5d788954b9c7073385b0ba
3
  size 168086
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9df8089958af2d5c7ac712c33503ecd4ad01b734acb1bedd9160ca10db525a7
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541652ce8ea40c93f93845b200b752fd459ea34dc30b036e763b7c0bbfe00041
3
  size 168086
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:462e29cf69d3535709cabb7bfb58d3ca268590048ac7fc4c2ae1f326058e58c3
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35ea0ff60f07ad502cd98ed64c92b5bf21728b26f8ec4ad810862d72b85b770f
3
  size 168086
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c85beff6bc0047809de0ea778d57221d319ac97e0628ff2449ab761c4a2a0a59
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e23b7fcb735641a5db63cc2d01383e208488fea01050c3b293389297ce7410
3
  size 168086
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebca5e9a54db9920ece454e1e56264f9066a382467c5ed9d1dc9224615abca56
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80c0240d46abab60a4dda1cd710e6cd7d2379e125e99d7f698dc8732ee786ed
3
  size 168086
{checkpoint-100/global_step100 β†’ checkpoint-500/global_step500}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5a4e8e62fb989ae6d935bce6357e8482a8b690b389444481585b41b2ee56251
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdeed66d696bbb7c4c3335a09232fd9fad0c780a4b1c78f93d28188e80446add
3
  size 168086
checkpoint-500/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step500
{checkpoint-100 β†’ checkpoint-500}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3930d3a7fa35b76e46303c3755740429a6f81598f49260a3a5d24ba4072cc58
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1ee9759da30984c76dadddd1d8c7c94c3758ab4a57cf347cff3fa275944df4
3
  size 9976576392
{checkpoint-100 β†’ checkpoint-500}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3375a48a87312ddf013e22779c80f47d877865d88ac39cfc1d7a625d513af7d8
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c8789b1f2769d0cd20c1574875ba9b08d058a97d8713b8a2a36e33e481dd3d1
3
  size 3500296504
{checkpoint-100 β†’ checkpoint-500}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_0.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_1.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_2.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_3.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_4.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_5.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_6.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/rng_state_7.pth RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/special_tokens_map.json RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/tokenizer.json RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/tokenizer.model RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/tokenizer_config.json RENAMED
File without changes
checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.16666666666666666,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 0.0003,
13
+ "loss": 9.4994,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "learning_rate": 0.0003,
19
+ "loss": 7.7223,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.01,
24
+ "learning_rate": 0.0003,
25
+ "loss": 7.3713,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.01,
30
+ "learning_rate": 0.0003,
31
+ "loss": 7.2695,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.02,
36
+ "learning_rate": 0.0003,
37
+ "loss": 7.2811,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.02,
42
+ "learning_rate": 0.0003,
43
+ "loss": 7.2291,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.02,
48
+ "learning_rate": 0.0003,
49
+ "loss": 7.1263,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.03,
54
+ "learning_rate": 0.0003,
55
+ "loss": 7.0881,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.03,
60
+ "learning_rate": 0.0003,
61
+ "loss": 6.5989,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.03,
66
+ "learning_rate": 0.0003,
67
+ "loss": 6.0091,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.04,
72
+ "learning_rate": 0.0003,
73
+ "loss": 5.5511,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.04,
78
+ "learning_rate": 0.0003,
79
+ "loss": 5.3413,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.04,
84
+ "learning_rate": 0.0003,
85
+ "loss": 5.1775,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 0.05,
90
+ "learning_rate": 0.0003,
91
+ "loss": 5.104,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.05,
96
+ "learning_rate": 0.0003,
97
+ "loss": 5.0133,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 0.05,
102
+ "learning_rate": 0.0003,
103
+ "loss": 4.9843,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 0.06,
108
+ "learning_rate": 0.0003,
109
+ "loss": 4.9256,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 0.06,
114
+ "learning_rate": 0.0003,
115
+ "loss": 4.8692,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 0.06,
120
+ "learning_rate": 0.0003,
121
+ "loss": 4.9233,
122
+ "step": 190
123
+ },
124
+ {
125
+ "epoch": 0.07,
126
+ "learning_rate": 0.0003,
127
+ "loss": 4.9139,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 0.07,
132
+ "learning_rate": 0.0003,
133
+ "loss": 4.8974,
134
+ "step": 210
135
+ },
136
+ {
137
+ "epoch": 0.07,
138
+ "learning_rate": 0.0003,
139
+ "loss": 4.9036,
140
+ "step": 220
141
+ },
142
+ {
143
+ "epoch": 0.08,
144
+ "learning_rate": 0.0003,
145
+ "loss": 4.8635,
146
+ "step": 230
147
+ },
148
+ {
149
+ "epoch": 0.08,
150
+ "learning_rate": 0.0003,
151
+ "loss": 4.7883,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 0.08,
156
+ "learning_rate": 0.0003,
157
+ "loss": 4.7681,
158
+ "step": 250
159
+ },
160
+ {
161
+ "epoch": 0.09,
162
+ "learning_rate": 0.0003,
163
+ "loss": 4.7572,
164
+ "step": 260
165
+ },
166
+ {
167
+ "epoch": 0.09,
168
+ "learning_rate": 0.0003,
169
+ "loss": 4.7552,
170
+ "step": 270
171
+ },
172
+ {
173
+ "epoch": 0.09,
174
+ "learning_rate": 0.0003,
175
+ "loss": 4.706,
176
+ "step": 280
177
+ },
178
+ {
179
+ "epoch": 0.1,
180
+ "learning_rate": 0.0003,
181
+ "loss": 4.7015,
182
+ "step": 290
183
+ },
184
+ {
185
+ "epoch": 0.1,
186
+ "learning_rate": 0.0003,
187
+ "loss": 4.695,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 0.1,
192
+ "learning_rate": 0.0003,
193
+ "loss": 4.6808,
194
+ "step": 310
195
+ },
196
+ {
197
+ "epoch": 0.11,
198
+ "learning_rate": 0.0003,
199
+ "loss": 4.6423,
200
+ "step": 320
201
+ },
202
+ {
203
+ "epoch": 0.11,
204
+ "learning_rate": 0.0003,
205
+ "loss": 4.613,
206
+ "step": 330
207
+ },
208
+ {
209
+ "epoch": 0.11,
210
+ "learning_rate": 0.0003,
211
+ "loss": 4.5851,
212
+ "step": 340
213
+ },
214
+ {
215
+ "epoch": 0.12,
216
+ "learning_rate": 0.0003,
217
+ "loss": 4.5882,
218
+ "step": 350
219
+ },
220
+ {
221
+ "epoch": 0.12,
222
+ "learning_rate": 0.0003,
223
+ "loss": 4.6228,
224
+ "step": 360
225
+ },
226
+ {
227
+ "epoch": 0.12,
228
+ "learning_rate": 0.0003,
229
+ "loss": 4.6269,
230
+ "step": 370
231
+ },
232
+ {
233
+ "epoch": 0.13,
234
+ "learning_rate": 0.0003,
235
+ "loss": 4.5364,
236
+ "step": 380
237
+ },
238
+ {
239
+ "epoch": 0.13,
240
+ "learning_rate": 0.0003,
241
+ "loss": 4.4992,
242
+ "step": 390
243
+ },
244
+ {
245
+ "epoch": 0.13,
246
+ "learning_rate": 0.0003,
247
+ "loss": 4.4799,
248
+ "step": 400
249
+ },
250
+ {
251
+ "epoch": 0.14,
252
+ "learning_rate": 0.0003,
253
+ "loss": 4.3733,
254
+ "step": 410
255
+ },
256
+ {
257
+ "epoch": 0.14,
258
+ "learning_rate": 0.0003,
259
+ "loss": 4.1788,
260
+ "step": 420
261
+ },
262
+ {
263
+ "epoch": 0.14,
264
+ "learning_rate": 0.0003,
265
+ "loss": 3.6706,
266
+ "step": 430
267
+ },
268
+ {
269
+ "epoch": 0.15,
270
+ "learning_rate": 0.0003,
271
+ "loss": 2.8767,
272
+ "step": 440
273
+ },
274
+ {
275
+ "epoch": 0.15,
276
+ "learning_rate": 0.0003,
277
+ "loss": 2.3927,
278
+ "step": 450
279
+ },
280
+ {
281
+ "epoch": 0.15,
282
+ "learning_rate": 0.0003,
283
+ "loss": 2.3062,
284
+ "step": 460
285
+ },
286
+ {
287
+ "epoch": 0.16,
288
+ "learning_rate": 0.0003,
289
+ "loss": 2.2299,
290
+ "step": 470
291
+ },
292
+ {
293
+ "epoch": 0.16,
294
+ "learning_rate": 0.0003,
295
+ "loss": 2.1739,
296
+ "step": 480
297
+ },
298
+ {
299
+ "epoch": 0.16,
300
+ "learning_rate": 0.0003,
301
+ "loss": 2.1303,
302
+ "step": 490
303
+ },
304
+ {
305
+ "epoch": 0.17,
306
+ "learning_rate": 0.0003,
307
+ "loss": 2.1092,
308
+ "step": 500
309
+ }
310
+ ],
311
+ "max_steps": 3000,
312
+ "num_train_epochs": 9223372036854775807,
313
+ "total_flos": 209379655680000.0,
314
+ "trial_name": null,
315
+ "trial_params": null
316
+ }
{checkpoint-100 β†’ checkpoint-500}/training_args.bin RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-500}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbac0f36a764288570496a92b7a47a3fb69f7d7bbb9586a63782011ab9d573a0
3
- size 10431
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7125e059489f5648613066e3ee25f5b3db395f89a0c3ea565999ab8326362e4e
3
+ size 12001