jflotz commited on
Commit
5025aaa
·
1 Parent(s): 0d5e7f4

Training in progress, step 310000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db51f55bf1a16f0aa27746b583a094e054e9a9dfc4763b285a75a463ec28ba4e
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b24e15aaa4e24fe327e8a8cce89f20e5c9f2d77bf1d531b089aac8a736f1ed
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daf63bd5c42184ed6348a862f43229e825f00ce4dfcc6c106b3192d2a71460f8
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4758b3d99119d3a14a23fd0aec29ffaa12fdc4ff244891bef5326c956e766005
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc2096dd2c2f53f22fb4b877201d9217020b1902bfdb0e198941bfd92059ed8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41d8b5030d0b0c384b52902b63df577c91ad4b52f45f72ce22b77b0fec1a92
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7914b9f1e3709b1198ec189eb9bb9105cd6b88dedbcbbdd4128934a703cf33e3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8c10dab6d3ee824fc8fe4628d3bf3ceea806ce0d2fbe513f32af4d508ab89e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.691648822269808,
5
- "global_step": 300000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6006,11 +6006,211 @@
6006
  "eval_samples_per_second": 1167.773,
6007
  "eval_steps_per_second": 18.302,
6008
  "step": 300000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6009
  }
6010
  ],
6011
  "max_steps": 500000,
6012
  "num_train_epochs": 12,
6013
- "total_flos": 9.584443437666508e+21,
6014
  "trial_name": null,
6015
  "trial_params": null
6016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.9147037830121345,
5
+ "global_step": 310000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6006
  "eval_samples_per_second": 1167.773,
6007
  "eval_steps_per_second": 18.302,
6008
  "step": 300000
6009
+ },
6010
+ {
6011
+ "epoch": 6.7,
6012
+ "learning_rate": 0.00011893996636109606,
6013
+ "loss": 0.2728,
6014
+ "step": 300500
6015
+ },
6016
+ {
6017
+ "epoch": 6.71,
6018
+ "learning_rate": 0.00011847572270435852,
6019
+ "loss": 0.2726,
6020
+ "step": 301000
6021
+ },
6022
+ {
6023
+ "epoch": 6.71,
6024
+ "eval_loss": 0.2552023231983185,
6025
+ "eval_runtime": 5.3426,
6026
+ "eval_samples_per_second": 429.941,
6027
+ "eval_steps_per_second": 6.738,
6028
+ "step": 301000
6029
+ },
6030
+ {
6031
+ "epoch": 6.73,
6032
+ "learning_rate": 0.00011801187847126579,
6033
+ "loss": 0.2696,
6034
+ "step": 301500
6035
+ },
6036
+ {
6037
+ "epoch": 6.74,
6038
+ "learning_rate": 0.00011754843873434411,
6039
+ "loss": 0.2691,
6040
+ "step": 302000
6041
+ },
6042
+ {
6043
+ "epoch": 6.74,
6044
+ "eval_loss": 0.25297147035598755,
6045
+ "eval_runtime": 1.9927,
6046
+ "eval_samples_per_second": 1152.684,
6047
+ "eval_steps_per_second": 18.066,
6048
+ "step": 302000
6049
+ },
6050
+ {
6051
+ "epoch": 6.75,
6052
+ "learning_rate": 0.00011708540856169612,
6053
+ "loss": 0.2702,
6054
+ "step": 302500
6055
+ },
6056
+ {
6057
+ "epoch": 6.76,
6058
+ "learning_rate": 0.00011662279301694567,
6059
+ "loss": 0.2698,
6060
+ "step": 303000
6061
+ },
6062
+ {
6063
+ "epoch": 6.76,
6064
+ "eval_loss": 0.253802090883255,
6065
+ "eval_runtime": 1.9502,
6066
+ "eval_samples_per_second": 1177.813,
6067
+ "eval_steps_per_second": 18.459,
6068
+ "step": 303000
6069
+ },
6070
+ {
6071
+ "epoch": 6.77,
6072
+ "learning_rate": 0.0001161605971591822,
6073
+ "loss": 0.2697,
6074
+ "step": 303500
6075
+ },
6076
+ {
6077
+ "epoch": 6.78,
6078
+ "learning_rate": 0.00011569882604290559,
6079
+ "loss": 0.2706,
6080
+ "step": 304000
6081
+ },
6082
+ {
6083
+ "epoch": 6.78,
6084
+ "eval_loss": 0.252233624458313,
6085
+ "eval_runtime": 2.0068,
6086
+ "eval_samples_per_second": 1144.633,
6087
+ "eval_steps_per_second": 17.939,
6088
+ "step": 304000
6089
+ },
6090
+ {
6091
+ "epoch": 6.79,
6092
+ "learning_rate": 0.00011523748471797075,
6093
+ "loss": 0.2704,
6094
+ "step": 304500
6095
+ },
6096
+ {
6097
+ "epoch": 6.8,
6098
+ "learning_rate": 0.00011477657822953255,
6099
+ "loss": 0.2705,
6100
+ "step": 305000
6101
+ },
6102
+ {
6103
+ "epoch": 6.8,
6104
+ "eval_loss": 0.25286948680877686,
6105
+ "eval_runtime": 2.0237,
6106
+ "eval_samples_per_second": 1135.028,
6107
+ "eval_steps_per_second": 17.789,
6108
+ "step": 305000
6109
+ },
6110
+ {
6111
+ "epoch": 6.81,
6112
+ "learning_rate": 0.00011431611161799043,
6113
+ "loss": 0.2732,
6114
+ "step": 305500
6115
+ },
6116
+ {
6117
+ "epoch": 6.83,
6118
+ "learning_rate": 0.0001138560899189335,
6119
+ "loss": 0.2707,
6120
+ "step": 306000
6121
+ },
6122
+ {
6123
+ "epoch": 6.83,
6124
+ "eval_loss": 0.2536955773830414,
6125
+ "eval_runtime": 2.005,
6126
+ "eval_samples_per_second": 1145.645,
6127
+ "eval_steps_per_second": 17.955,
6128
+ "step": 306000
6129
+ },
6130
+ {
6131
+ "epoch": 6.84,
6132
+ "learning_rate": 0.00011339651816308543,
6133
+ "loss": 0.271,
6134
+ "step": 306500
6135
+ },
6136
+ {
6137
+ "epoch": 6.85,
6138
+ "learning_rate": 0.00011293740137624925,
6139
+ "loss": 0.2713,
6140
+ "step": 307000
6141
+ },
6142
+ {
6143
+ "epoch": 6.85,
6144
+ "eval_loss": 0.25167515873908997,
6145
+ "eval_runtime": 1.9694,
6146
+ "eval_samples_per_second": 1166.352,
6147
+ "eval_steps_per_second": 18.28,
6148
+ "step": 307000
6149
+ },
6150
+ {
6151
+ "epoch": 6.86,
6152
+ "learning_rate": 0.00011247874457925261,
6153
+ "loss": 0.2716,
6154
+ "step": 307500
6155
+ },
6156
+ {
6157
+ "epoch": 6.87,
6158
+ "learning_rate": 0.0001120205527878927,
6159
+ "loss": 0.2696,
6160
+ "step": 308000
6161
+ },
6162
+ {
6163
+ "epoch": 6.87,
6164
+ "eval_loss": 0.25594934821128845,
6165
+ "eval_runtime": 1.9543,
6166
+ "eval_samples_per_second": 1175.377,
6167
+ "eval_steps_per_second": 18.421,
6168
+ "step": 308000
6169
+ },
6170
+ {
6171
+ "epoch": 6.88,
6172
+ "learning_rate": 0.00011156283101288165,
6173
+ "loss": 0.2701,
6174
+ "step": 308500
6175
+ },
6176
+ {
6177
+ "epoch": 6.89,
6178
+ "learning_rate": 0.00011110558425979132,
6179
+ "loss": 0.2702,
6180
+ "step": 309000
6181
+ },
6182
+ {
6183
+ "epoch": 6.89,
6184
+ "eval_loss": 0.2543644607067108,
6185
+ "eval_runtime": 1.9412,
6186
+ "eval_samples_per_second": 1183.27,
6187
+ "eval_steps_per_second": 18.545,
6188
+ "step": 309000
6189
+ },
6190
+ {
6191
+ "epoch": 6.9,
6192
+ "learning_rate": 0.00011064881752899906,
6193
+ "loss": 0.2698,
6194
+ "step": 309500
6195
+ },
6196
+ {
6197
+ "epoch": 6.91,
6198
+ "learning_rate": 0.00011019253581563262,
6199
+ "loss": 0.2695,
6200
+ "step": 310000
6201
+ },
6202
+ {
6203
+ "epoch": 6.91,
6204
+ "eval_loss": 0.2494657188653946,
6205
+ "eval_runtime": 2.0069,
6206
+ "eval_samples_per_second": 1144.556,
6207
+ "eval_steps_per_second": 17.938,
6208
+ "step": 310000
6209
  }
6210
  ],
6211
  "max_steps": 500000,
6212
  "num_train_epochs": 12,
6213
+ "total_flos": 9.903930576441008e+21,
6214
  "trial_name": null,
6215
  "trial_params": null
6216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daf63bd5c42184ed6348a862f43229e825f00ce4dfcc6c106b3192d2a71460f8
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4758b3d99119d3a14a23fd0aec29ffaa12fdc4ff244891bef5326c956e766005
3
  size 102501541