jflotz commited on
Commit
9a3da60
·
1 Parent(s): 3cd36fa

Training in progress, step 360000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e2593684e29fbdab18ceae698c9db337e08a0411716ce5539ab2231b908aa6e
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbce419654e8e44df2eb4f9682536b881548d65724339ba4c292532cb71337f7
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaa6c11004791ef4e8c50d4145229be69aa0fee3306608bd403b2eb4d637219a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf27ccfc5825e3575b2b31b80d5eae840d89a2e45fea29d5a456ebd076f43b0c
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2250a0c64e809cea437709ab2245b0ac11e166c80e52d01a00381d2aba90145d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aecc6465a886ff601ea303358a61f89a30e07c965e206675258095a3d963058
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.806923625981442,
5
- "global_step": 350000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7006,11 +7006,211 @@
7006
  "eval_samples_per_second": 1146.703,
7007
  "eval_steps_per_second": 17.972,
7008
  "step": 350000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7009
  }
7010
  ],
7011
  "max_steps": 500000,
7012
  "num_train_epochs": 12,
7013
- "total_flos": 1.1181850677277593e+22,
7014
  "trial_name": null,
7015
  "trial_params": null
7016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.029978586723768,
5
+ "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7006
  "eval_samples_per_second": 1146.703,
7007
  "eval_steps_per_second": 17.972,
7008
  "step": 350000
7009
+ },
7010
+ {
7011
+ "epoch": 7.82,
7012
+ "learning_rate": 7.529152489465592e-05,
7013
+ "loss": 0.2638,
7014
+ "step": 350500
7015
+ },
7016
+ {
7017
+ "epoch": 7.83,
7018
+ "learning_rate": 7.489140439617708e-05,
7019
+ "loss": 0.2632,
7020
+ "step": 351000
7021
+ },
7022
+ {
7023
+ "epoch": 7.83,
7024
+ "eval_loss": 0.2466663420200348,
7025
+ "eval_runtime": 1.9675,
7026
+ "eval_samples_per_second": 1167.492,
7027
+ "eval_steps_per_second": 18.298,
7028
+ "step": 351000
7029
+ },
7030
+ {
7031
+ "epoch": 7.84,
7032
+ "learning_rate": 7.449215995246522e-05,
7033
+ "loss": 0.263,
7034
+ "step": 351500
7035
+ },
7036
+ {
7037
+ "epoch": 7.85,
7038
+ "learning_rate": 7.409379592959367e-05,
7039
+ "loss": 0.2631,
7040
+ "step": 352000
7041
+ },
7042
+ {
7043
+ "epoch": 7.85,
7044
+ "eval_loss": 0.24644367396831512,
7045
+ "eval_runtime": 1.9998,
7046
+ "eval_samples_per_second": 1148.612,
7047
+ "eval_steps_per_second": 18.002,
7048
+ "step": 352000
7049
+ },
7050
+ {
7051
+ "epoch": 7.86,
7052
+ "learning_rate": 7.369631668400746e-05,
7053
+ "loss": 0.2632,
7054
+ "step": 352500
7055
+ },
7056
+ {
7057
+ "epoch": 7.87,
7058
+ "learning_rate": 7.3299726562476e-05,
7059
+ "loss": 0.2629,
7060
+ "step": 353000
7061
+ },
7062
+ {
7063
+ "epoch": 7.87,
7064
+ "eval_loss": 0.24639040231704712,
7065
+ "eval_runtime": 1.996,
7066
+ "eval_samples_per_second": 1150.775,
7067
+ "eval_steps_per_second": 18.036,
7068
+ "step": 353000
7069
+ },
7070
+ {
7071
+ "epoch": 7.88,
7072
+ "learning_rate": 7.290402990204531e-05,
7073
+ "loss": 0.2628,
7074
+ "step": 353500
7075
+ },
7076
+ {
7077
+ "epoch": 7.9,
7078
+ "learning_rate": 7.250923102999073e-05,
7079
+ "loss": 0.2629,
7080
+ "step": 354000
7081
+ },
7082
+ {
7083
+ "epoch": 7.9,
7084
+ "eval_loss": 0.24618536233901978,
7085
+ "eval_runtime": 1.9784,
7086
+ "eval_samples_per_second": 1161.045,
7087
+ "eval_steps_per_second": 18.197,
7088
+ "step": 354000
7089
+ },
7090
+ {
7091
+ "epoch": 7.91,
7092
+ "learning_rate": 7.211533426376934e-05,
7093
+ "loss": 0.2629,
7094
+ "step": 354500
7095
+ },
7096
+ {
7097
+ "epoch": 7.92,
7098
+ "learning_rate": 7.172234391097317e-05,
7099
+ "loss": 0.2625,
7100
+ "step": 355000
7101
+ },
7102
+ {
7103
+ "epoch": 7.92,
7104
+ "eval_loss": 0.24593985080718994,
7105
+ "eval_runtime": 2.059,
7106
+ "eval_samples_per_second": 1115.596,
7107
+ "eval_steps_per_second": 17.484,
7108
+ "step": 355000
7109
+ },
7110
+ {
7111
+ "epoch": 7.93,
7112
+ "learning_rate": 7.133026426928173e-05,
7113
+ "loss": 0.2626,
7114
+ "step": 355500
7115
+ },
7116
+ {
7117
+ "epoch": 7.94,
7118
+ "learning_rate": 7.093909962641514e-05,
7119
+ "loss": 0.2626,
7120
+ "step": 356000
7121
+ },
7122
+ {
7123
+ "epoch": 7.94,
7124
+ "eval_loss": 0.24694356322288513,
7125
+ "eval_runtime": 1.9849,
7126
+ "eval_samples_per_second": 1157.222,
7127
+ "eval_steps_per_second": 18.137,
7128
+ "step": 356000
7129
+ },
7130
+ {
7131
+ "epoch": 7.95,
7132
+ "learning_rate": 7.054885426008737e-05,
7133
+ "loss": 0.2624,
7134
+ "step": 356500
7135
+ },
7136
+ {
7137
+ "epoch": 7.96,
7138
+ "learning_rate": 7.015953243795907e-05,
7139
+ "loss": 0.2625,
7140
+ "step": 357000
7141
+ },
7142
+ {
7143
+ "epoch": 7.96,
7144
+ "eval_loss": 0.24489082396030426,
7145
+ "eval_runtime": 2.0439,
7146
+ "eval_samples_per_second": 1123.842,
7147
+ "eval_steps_per_second": 17.614,
7148
+ "step": 357000
7149
+ },
7150
+ {
7151
+ "epoch": 7.97,
7152
+ "learning_rate": 6.97711384175914e-05,
7153
+ "loss": 0.2623,
7154
+ "step": 357500
7155
+ },
7156
+ {
7157
+ "epoch": 7.99,
7158
+ "learning_rate": 6.938367644639911e-05,
7159
+ "loss": 0.4432,
7160
+ "step": 358000
7161
+ },
7162
+ {
7163
+ "epoch": 7.99,
7164
+ "eval_loss": 0.6783205270767212,
7165
+ "eval_runtime": 2.0172,
7166
+ "eval_samples_per_second": 1138.683,
7167
+ "eval_steps_per_second": 17.846,
7168
+ "step": 358000
7169
+ },
7170
+ {
7171
+ "epoch": 8.0,
7172
+ "learning_rate": 6.899715076160425e-05,
7173
+ "loss": 0.6785,
7174
+ "step": 358500
7175
+ },
7176
+ {
7177
+ "epoch": 8.01,
7178
+ "learning_rate": 6.861156559018986e-05,
7179
+ "loss": 0.6774,
7180
+ "step": 359000
7181
+ },
7182
+ {
7183
+ "epoch": 8.01,
7184
+ "eval_loss": 0.6766601204872131,
7185
+ "eval_runtime": 2.0317,
7186
+ "eval_samples_per_second": 1130.554,
7187
+ "eval_steps_per_second": 17.719,
7188
+ "step": 359000
7189
+ },
7190
+ {
7191
+ "epoch": 8.02,
7192
+ "learning_rate": 6.822692514885346e-05,
7193
+ "loss": 0.6773,
7194
+ "step": 359500
7195
+ },
7196
+ {
7197
+ "epoch": 8.03,
7198
+ "learning_rate": 6.784323364396135e-05,
7199
+ "loss": 0.6773,
7200
+ "step": 360000
7201
+ },
7202
+ {
7203
+ "epoch": 8.03,
7204
+ "eval_loss": 0.6772929430007935,
7205
+ "eval_runtime": 2.0141,
7206
+ "eval_samples_per_second": 1140.463,
7207
+ "eval_steps_per_second": 17.874,
7208
+ "step": 360000
7209
  }
7210
  ],
7211
  "max_steps": 500000,
7212
  "num_train_epochs": 12,
7213
+ "total_flos": 1.1501309361790678e+22,
7214
  "trial_name": null,
7215
  "trial_params": null
7216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaa6c11004791ef4e8c50d4145229be69aa0fee3306608bd403b2eb4d637219a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf27ccfc5825e3575b2b31b80d5eae840d89a2e45fea29d5a456ebd076f43b0c
3
  size 102501541