plip commited on
Commit
ab06d98
1 Parent(s): c33f429

Training in progress, step 160000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:115ffe163e25d965deaf1e95234c158ffd00fbc71531098c0b9ab43a32b44422
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68579b000c67ae02c93fef507e910ee24187e399ca1591a2c497efffe8459dac
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f5f5ae4e2c1f14e613311f65c9deaf0995f26ac33b108067f68da4920fbd182
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce1b6fb3fb5d372a1bae3a15433a06de19284c7aa2bc4d3c1962d8f51884be2
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970f5edc45b72267b07b2b29a2f000865ed4d4ecdc6c514dcf59f57e03814ca9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6516d5b46fda40bf4c056933976209142d5376bc9d4025a968296f31b52bf24b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684030441e546f328363202be7e7a1e6d60b5494506eab9e81487ca712343e2e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc071f88617eb6afe60581ffbadbc2441a73aeec527e5556ce742e0646660ab9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.191176470588236,
5
- "global_step": 150000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3006,11 +3006,211 @@
3006
  "eval_samples_per_second": 750.39,
3007
  "eval_steps_per_second": 12.006,
3008
  "step": 150000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3009
  }
3010
  ],
3011
  "max_steps": 250000,
3012
  "num_train_epochs": 16,
3013
- "total_flos": 2.4024299484545255e+21,
3014
  "trial_name": null,
3015
  "trial_params": null
3016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.803921568627452,
5
+ "global_step": 160000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3006
  "eval_samples_per_second": 750.39,
3007
  "eval_steps_per_second": 12.006,
3008
  "step": 150000
3009
+ },
3010
+ {
3011
+ "epoch": 9.22,
3012
+ "learning_rate": 0.00023069198757093631,
3013
+ "loss": 0.4555,
3014
+ "step": 150500
3015
+ },
3016
+ {
3017
+ "epoch": 9.25,
3018
+ "learning_rate": 0.00022880544432159663,
3019
+ "loss": 0.4551,
3020
+ "step": 151000
3021
+ },
3022
+ {
3023
+ "epoch": 9.25,
3024
+ "eval_loss": 0.8021363615989685,
3025
+ "eval_runtime": 1.3155,
3026
+ "eval_samples_per_second": 760.176,
3027
+ "eval_steps_per_second": 12.163,
3028
+ "step": 151000
3029
+ },
3030
+ {
3031
+ "epoch": 9.28,
3032
+ "learning_rate": 0.00022692223406895848,
3033
+ "loss": 0.4554,
3034
+ "step": 151500
3035
+ },
3036
+ {
3037
+ "epoch": 9.31,
3038
+ "learning_rate": 0.000225042439190739,
3039
+ "loss": 0.4549,
3040
+ "step": 152000
3041
+ },
3042
+ {
3043
+ "epoch": 9.31,
3044
+ "eval_loss": 0.7971563339233398,
3045
+ "eval_runtime": 1.2684,
3046
+ "eval_samples_per_second": 788.389,
3047
+ "eval_steps_per_second": 12.614,
3048
+ "step": 152000
3049
+ },
3050
+ {
3051
+ "epoch": 9.34,
3052
+ "learning_rate": 0.00022316614191525587,
3053
+ "loss": 0.4546,
3054
+ "step": 152500
3055
+ },
3056
+ {
3057
+ "epoch": 9.38,
3058
+ "learning_rate": 0.00022129342431783026,
3059
+ "loss": 0.4545,
3060
+ "step": 153000
3061
+ },
3062
+ {
3063
+ "epoch": 9.38,
3064
+ "eval_loss": 0.8002565503120422,
3065
+ "eval_runtime": 1.2924,
3066
+ "eval_samples_per_second": 773.739,
3067
+ "eval_steps_per_second": 12.38,
3068
+ "step": 153000
3069
+ },
3070
+ {
3071
+ "epoch": 9.41,
3072
+ "learning_rate": 0.00021942436831719677,
3073
+ "loss": 0.4542,
3074
+ "step": 153500
3075
+ },
3076
+ {
3077
+ "epoch": 9.44,
3078
+ "learning_rate": 0.00021755905567191967,
3079
+ "loss": 0.4542,
3080
+ "step": 154000
3081
+ },
3082
+ {
3083
+ "epoch": 9.44,
3084
+ "eval_loss": 0.7999687790870667,
3085
+ "eval_runtime": 1.2876,
3086
+ "eval_samples_per_second": 776.658,
3087
+ "eval_steps_per_second": 12.427,
3088
+ "step": 154000
3089
+ },
3090
+ {
3091
+ "epoch": 9.47,
3092
+ "learning_rate": 0.00021569756797681686,
3093
+ "loss": 0.4538,
3094
+ "step": 154500
3095
+ },
3096
+ {
3097
+ "epoch": 9.5,
3098
+ "learning_rate": 0.00021383998665939054,
3099
+ "loss": 0.4539,
3100
+ "step": 155000
3101
+ },
3102
+ {
3103
+ "epoch": 9.5,
3104
+ "eval_loss": 0.7960466146469116,
3105
+ "eval_runtime": 1.3325,
3106
+ "eval_samples_per_second": 750.454,
3107
+ "eval_steps_per_second": 12.007,
3108
+ "step": 155000
3109
+ },
3110
+ {
3111
+ "epoch": 9.53,
3112
+ "learning_rate": 0.00021198639297626516,
3113
+ "loss": 0.4537,
3114
+ "step": 155500
3115
+ },
3116
+ {
3117
+ "epoch": 9.56,
3118
+ "learning_rate": 0.0002101368680096334,
3119
+ "loss": 0.4533,
3120
+ "step": 156000
3121
+ },
3122
+ {
3123
+ "epoch": 9.56,
3124
+ "eval_loss": 0.8035251498222351,
3125
+ "eval_runtime": 1.3033,
3126
+ "eval_samples_per_second": 767.279,
3127
+ "eval_steps_per_second": 12.276,
3128
+ "step": 156000
3129
+ },
3130
+ {
3131
+ "epoch": 9.59,
3132
+ "learning_rate": 0.00020829149266370862,
3133
+ "loss": 0.4532,
3134
+ "step": 156500
3135
+ },
3136
+ {
3137
+ "epoch": 9.62,
3138
+ "learning_rate": 0.00020645034766118703,
3139
+ "loss": 0.453,
3140
+ "step": 157000
3141
+ },
3142
+ {
3143
+ "epoch": 9.62,
3144
+ "eval_loss": 0.7953096628189087,
3145
+ "eval_runtime": 1.3024,
3146
+ "eval_samples_per_second": 767.789,
3147
+ "eval_steps_per_second": 12.285,
3148
+ "step": 157000
3149
+ },
3150
+ {
3151
+ "epoch": 9.65,
3152
+ "learning_rate": 0.00020461351353971526,
3153
+ "loss": 0.4527,
3154
+ "step": 157500
3155
+ },
3156
+ {
3157
+ "epoch": 9.68,
3158
+ "learning_rate": 0.00020278107064836847,
3159
+ "loss": 0.4527,
3160
+ "step": 158000
3161
+ },
3162
+ {
3163
+ "epoch": 9.68,
3164
+ "eval_loss": 0.7937498688697815,
3165
+ "eval_runtime": 1.2999,
3166
+ "eval_samples_per_second": 769.267,
3167
+ "eval_steps_per_second": 12.308,
3168
+ "step": 158000
3169
+ },
3170
+ {
3171
+ "epoch": 9.71,
3172
+ "learning_rate": 0.00020095309914413485,
3173
+ "loss": 0.4526,
3174
+ "step": 158500
3175
+ },
3176
+ {
3177
+ "epoch": 9.74,
3178
+ "learning_rate": 0.00019912967898840997,
3179
+ "loss": 0.4524,
3180
+ "step": 159000
3181
+ },
3182
+ {
3183
+ "epoch": 9.74,
3184
+ "eval_loss": 0.8021422624588013,
3185
+ "eval_runtime": 1.3181,
3186
+ "eval_samples_per_second": 758.64,
3187
+ "eval_steps_per_second": 12.138,
3188
+ "step": 159000
3189
+ },
3190
+ {
3191
+ "epoch": 9.77,
3192
+ "learning_rate": 0.00019731088994349834,
3193
+ "loss": 0.4524,
3194
+ "step": 159500
3195
+ },
3196
+ {
3197
+ "epoch": 9.8,
3198
+ "learning_rate": 0.0001954968115691248,
3199
+ "loss": 0.4519,
3200
+ "step": 160000
3201
+ },
3202
+ {
3203
+ "epoch": 9.8,
3204
+ "eval_loss": 0.8028143644332886,
3205
+ "eval_runtime": 1.282,
3206
+ "eval_samples_per_second": 780.028,
3207
+ "eval_steps_per_second": 12.48,
3208
+ "step": 160000
3209
  }
3210
  ],
3211
  "max_steps": 250000,
3212
  "num_train_epochs": 16,
3213
+ "total_flos": 2.5625979513203296e+21,
3214
  "trial_name": null,
3215
  "trial_params": null
3216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f5f5ae4e2c1f14e613311f65c9deaf0995f26ac33b108067f68da4920fbd182
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce1b6fb3fb5d372a1bae3a15433a06de19284c7aa2bc4d3c1962d8f51884be2
3
  size 25761253