pilotj commited on
Commit
07a741b
1 Parent(s): f782e62

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c5ec040b12517fdd7deecc06ae8726c116052e27f3cc8dfa3cd5c93c96e7566
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f79e495154da67b877bea3f63a4a67b50b5bd6dc9f526699f399304facad62e
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bebd41dd436d0a96f8f404dd2a9a10c9b7be31469235ba2818e32114795ba445
3
  size 876185978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90d453de31c2419e4e349d4a4f435ab05d64a46c66c09d9b10ac7145a578d1f
3
  size 876185978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9302dd11ca8cf3c2eadc2e2dd7547119f45305e0e26866eb1c66b0a7bf115ba8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18250a969c7e8b744e0a81968b85799869d2e007063948d0e72541065db3bc16
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fbc19acf0a0a927f8be2149598258a3f317e7393a952a331dd9a912902ffcc3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bdb930721cbebdf735b7bba6276bd5739b55ef721382393eeafb8577701beb4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.3724469542503357,
3
- "best_model_checkpoint": "results/checkpoint-2000",
4
- "epoch": 0.19104021396503965,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -67,6 +67,66 @@
67
  "eval_samples_per_second": 249.181,
68
  "eval_steps_per_second": 3.897,
69
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
71
  ],
72
  "logging_steps": 500,
@@ -86,7 +146,7 @@
86
  "attributes": {}
87
  }
88
  },
89
- "total_flos": 3.3685472280576e+16,
90
  "train_batch_size": 64,
91
  "trial_name": null,
92
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3715578615665436,
3
+ "best_model_checkpoint": "results/checkpoint-4000",
4
+ "epoch": 0.3820804279300793,
5
  "eval_steps": 500,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
67
  "eval_samples_per_second": 249.181,
68
  "eval_steps_per_second": 3.897,
69
  "step": 2000
70
+ },
71
+ {
72
+ "epoch": 0.23880026745629956,
73
+ "grad_norm": 8.174205780029297,
74
+ "learning_rate": 4.402999331359251e-05,
75
+ "loss": 0.4269,
76
+ "step": 2500
77
+ },
78
+ {
79
+ "epoch": 0.23880026745629956,
80
+ "eval_loss": 0.3822018504142761,
81
+ "eval_runtime": 109.3926,
82
+ "eval_samples_per_second": 239.056,
83
+ "eval_steps_per_second": 3.739,
84
+ "step": 2500
85
+ },
86
+ {
87
+ "epoch": 0.28656032094755945,
88
+ "grad_norm": 8.617950439453125,
89
+ "learning_rate": 4.2835991976311015e-05,
90
+ "loss": 0.4417,
91
+ "step": 3000
92
+ },
93
+ {
94
+ "epoch": 0.28656032094755945,
95
+ "eval_loss": 0.37791815400123596,
96
+ "eval_runtime": 110.9874,
97
+ "eval_samples_per_second": 235.621,
98
+ "eval_steps_per_second": 3.685,
99
+ "step": 3000
100
+ },
101
+ {
102
+ "epoch": 0.33432037443881935,
103
+ "grad_norm": 4.857789993286133,
104
+ "learning_rate": 4.164199063902952e-05,
105
+ "loss": 0.4324,
106
+ "step": 3500
107
+ },
108
+ {
109
+ "epoch": 0.33432037443881935,
110
+ "eval_loss": 0.37730905413627625,
111
+ "eval_runtime": 110.4008,
112
+ "eval_samples_per_second": 236.873,
113
+ "eval_steps_per_second": 3.705,
114
+ "step": 3500
115
+ },
116
+ {
117
+ "epoch": 0.3820804279300793,
118
+ "grad_norm": 4.581517219543457,
119
+ "learning_rate": 4.044798930174802e-05,
120
+ "loss": 0.4184,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 0.3820804279300793,
125
+ "eval_loss": 0.3715578615665436,
126
+ "eval_runtime": 111.0466,
127
+ "eval_samples_per_second": 235.496,
128
+ "eval_steps_per_second": 3.683,
129
+ "step": 4000
130
  }
131
  ],
132
  "logging_steps": 500,
 
146
  "attributes": {}
147
  }
148
  },
149
+ "total_flos": 6.7370944561152e+16,
150
  "train_batch_size": 64,
151
  "trial_name": null,
152
  "trial_params": null