ssunggun2 commited on
Commit
1961ec8
1 Parent(s): 4aec561

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. adapter_model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +5 -2855
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c040bd2c72181ac32d9eca911ad3511e17a00b7f7692f948f0128919daeeca25
3
  size 13648432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a39d07526ad4c47fcf0ed20eb901c709f826a40aea5a2ba5a7eb336482a344
3
  size 13648432
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43bbe3ae6b244b1777e6da7982cbf7a2e9cb8022fbeddcd423589d85a6101664
3
  size 27338810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96f4e1fc406e0ef3079999964fd2e8c98f4ab679496de7aff06c9b03585f1ba4
3
  size 27338810
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e73f434e8535656b41b1d5bfa3067fcbe838f652992a4fe71ac08e37659f183f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169aa60243c3fe8bcd221891833b2694719c7c5c130939d89d62ad2ab552d909
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c29143e0a84c1bbf5f7d6c4cdd96647b5d99b5c3882bb49ef9e6bdc5f0abca94
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7448b75ce12f86e3d3b951700cc1d23b4d30c97166c1f777930e49ec35d82b42
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.072087287902832,
3
- "best_model_checkpoint": "/home/sunggeunan/data/ICL/outputs/lora/SKIML-ICL_mrqa_nq_v3/Meta-Llama-3-8B-Instruct-unanswerable-3Q-0U-0C-qa_first/checkpoint-811",
4
- "epoch": 1.9993836671802774,
5
  "eval_steps": 500,
6
- "global_step": 811,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2850,2856 +2850,6 @@
2850
  "eval_samples_per_second": 1.234,
2851
  "eval_steps_per_second": 0.308,
2852
  "step": 405
2853
- },
2854
- {
2855
- "epoch": 1.000924499229584,
2856
- "grad_norm": 0.5109534859657288,
2857
- "learning_rate": 3.5051993067590985e-07,
2858
- "loss": 2.0975,
2859
- "step": 406
2860
- },
2861
- {
2862
- "epoch": 1.0033898305084745,
2863
- "grad_norm": 0.5240735411643982,
2864
- "learning_rate": 3.500866551126516e-07,
2865
- "loss": 2.1655,
2866
- "step": 407
2867
- },
2868
- {
2869
- "epoch": 1.0058551617873652,
2870
- "grad_norm": 0.5256450772285461,
2871
- "learning_rate": 3.496533795493934e-07,
2872
- "loss": 2.1388,
2873
- "step": 408
2874
- },
2875
- {
2876
- "epoch": 1.0083204930662557,
2877
- "grad_norm": 0.5329455733299255,
2878
- "learning_rate": 3.4922010398613514e-07,
2879
- "loss": 2.089,
2880
- "step": 409
2881
- },
2882
- {
2883
- "epoch": 1.0107858243451464,
2884
- "grad_norm": 0.5201774835586548,
2885
- "learning_rate": 3.4878682842287693e-07,
2886
- "loss": 2.0812,
2887
- "step": 410
2888
- },
2889
- {
2890
- "epoch": 1.013251155624037,
2891
- "grad_norm": 0.482138454914093,
2892
- "learning_rate": 3.483535528596187e-07,
2893
- "loss": 2.1011,
2894
- "step": 411
2895
- },
2896
- {
2897
- "epoch": 1.0157164869029276,
2898
- "grad_norm": 0.5111163258552551,
2899
- "learning_rate": 3.479202772963605e-07,
2900
- "loss": 2.1351,
2901
- "step": 412
2902
- },
2903
- {
2904
- "epoch": 1.018181818181818,
2905
- "grad_norm": 0.5070258975028992,
2906
- "learning_rate": 3.474870017331022e-07,
2907
- "loss": 2.0726,
2908
- "step": 413
2909
- },
2910
- {
2911
- "epoch": 1.0206471494607088,
2912
- "grad_norm": 0.5213172435760498,
2913
- "learning_rate": 3.47053726169844e-07,
2914
- "loss": 2.0709,
2915
- "step": 414
2916
- },
2917
- {
2918
- "epoch": 1.0231124807395995,
2919
- "grad_norm": 0.5116674304008484,
2920
- "learning_rate": 3.4662045060658576e-07,
2921
- "loss": 2.1098,
2922
- "step": 415
2923
- },
2924
- {
2925
- "epoch": 1.02557781201849,
2926
- "grad_norm": 0.5342626571655273,
2927
- "learning_rate": 3.4618717504332756e-07,
2928
- "loss": 2.0572,
2929
- "step": 416
2930
- },
2931
- {
2932
- "epoch": 1.0280431432973807,
2933
- "grad_norm": 0.5366671085357666,
2934
- "learning_rate": 3.457538994800693e-07,
2935
- "loss": 2.1119,
2936
- "step": 417
2937
- },
2938
- {
2939
- "epoch": 1.0305084745762711,
2940
- "grad_norm": 0.5127258896827698,
2941
- "learning_rate": 3.4532062391681105e-07,
2942
- "loss": 2.1222,
2943
- "step": 418
2944
- },
2945
- {
2946
- "epoch": 1.0329738058551619,
2947
- "grad_norm": 0.5240333080291748,
2948
- "learning_rate": 3.448873483535528e-07,
2949
- "loss": 2.0789,
2950
- "step": 419
2951
- },
2952
- {
2953
- "epoch": 1.0354391371340523,
2954
- "grad_norm": 0.529655933380127,
2955
- "learning_rate": 3.444540727902946e-07,
2956
- "loss": 2.1118,
2957
- "step": 420
2958
- },
2959
- {
2960
- "epoch": 1.037904468412943,
2961
- "grad_norm": 0.5909092426300049,
2962
- "learning_rate": 3.4402079722703634e-07,
2963
- "loss": 2.0552,
2964
- "step": 421
2965
- },
2966
- {
2967
- "epoch": 1.0403697996918335,
2968
- "grad_norm": 0.5189386010169983,
2969
- "learning_rate": 3.4358752166377814e-07,
2970
- "loss": 2.0904,
2971
- "step": 422
2972
- },
2973
- {
2974
- "epoch": 1.0428351309707242,
2975
- "grad_norm": 0.5365529656410217,
2976
- "learning_rate": 3.431542461005199e-07,
2977
- "loss": 2.0861,
2978
- "step": 423
2979
- },
2980
- {
2981
- "epoch": 1.0453004622496147,
2982
- "grad_norm": 0.5217022895812988,
2983
- "learning_rate": 3.427209705372617e-07,
2984
- "loss": 2.0698,
2985
- "step": 424
2986
- },
2987
- {
2988
- "epoch": 1.0477657935285054,
2989
- "grad_norm": 0.5230218172073364,
2990
- "learning_rate": 3.4228769497400343e-07,
2991
- "loss": 2.0571,
2992
- "step": 425
2993
- },
2994
- {
2995
- "epoch": 1.0502311248073959,
2996
- "grad_norm": 0.5243125557899475,
2997
- "learning_rate": 3.418544194107452e-07,
2998
- "loss": 2.1333,
2999
- "step": 426
3000
- },
3001
- {
3002
- "epoch": 1.0526964560862866,
3003
- "grad_norm": 0.5268332958221436,
3004
- "learning_rate": 3.4142114384748697e-07,
3005
- "loss": 2.0533,
3006
- "step": 427
3007
- },
3008
- {
3009
- "epoch": 1.0551617873651773,
3010
- "grad_norm": 0.5369420051574707,
3011
- "learning_rate": 3.4098786828422877e-07,
3012
- "loss": 2.0523,
3013
- "step": 428
3014
- },
3015
- {
3016
- "epoch": 1.0576271186440678,
3017
- "grad_norm": 0.5415328145027161,
3018
- "learning_rate": 3.405545927209705e-07,
3019
- "loss": 2.1297,
3020
- "step": 429
3021
- },
3022
- {
3023
- "epoch": 1.0600924499229585,
3024
- "grad_norm": 0.5068016648292542,
3025
- "learning_rate": 3.401213171577123e-07,
3026
- "loss": 2.1055,
3027
- "step": 430
3028
- },
3029
- {
3030
- "epoch": 1.062557781201849,
3031
- "grad_norm": 0.5332845449447632,
3032
- "learning_rate": 3.3968804159445406e-07,
3033
- "loss": 2.0956,
3034
- "step": 431
3035
- },
3036
- {
3037
- "epoch": 1.0650231124807397,
3038
- "grad_norm": 0.5464420318603516,
3039
- "learning_rate": 3.3925476603119585e-07,
3040
- "loss": 2.1394,
3041
- "step": 432
3042
- },
3043
- {
3044
- "epoch": 1.0674884437596301,
3045
- "grad_norm": 0.5103365778923035,
3046
- "learning_rate": 3.388214904679376e-07,
3047
- "loss": 2.0665,
3048
- "step": 433
3049
- },
3050
- {
3051
- "epoch": 1.0699537750385208,
3052
- "grad_norm": 0.516782820224762,
3053
- "learning_rate": 3.3838821490467934e-07,
3054
- "loss": 2.0944,
3055
- "step": 434
3056
- },
3057
- {
3058
- "epoch": 1.0724191063174113,
3059
- "grad_norm": 0.518387496471405,
3060
- "learning_rate": 3.379549393414211e-07,
3061
- "loss": 2.071,
3062
- "step": 435
3063
- },
3064
- {
3065
- "epoch": 1.074884437596302,
3066
- "grad_norm": 0.5234736204147339,
3067
- "learning_rate": 3.375216637781629e-07,
3068
- "loss": 2.1083,
3069
- "step": 436
3070
- },
3071
- {
3072
- "epoch": 1.0773497688751925,
3073
- "grad_norm": 0.5194358229637146,
3074
- "learning_rate": 3.3708838821490463e-07,
3075
- "loss": 2.0615,
3076
- "step": 437
3077
- },
3078
- {
3079
- "epoch": 1.0798151001540832,
3080
- "grad_norm": 0.4968649744987488,
3081
- "learning_rate": 3.3665511265164643e-07,
3082
- "loss": 2.0345,
3083
- "step": 438
3084
- },
3085
- {
3086
- "epoch": 1.0822804314329737,
3087
- "grad_norm": 0.5469282865524292,
3088
- "learning_rate": 3.362218370883882e-07,
3089
- "loss": 2.1193,
3090
- "step": 439
3091
- },
3092
- {
3093
- "epoch": 1.0847457627118644,
3094
- "grad_norm": 0.509821891784668,
3095
- "learning_rate": 3.3578856152512997e-07,
3096
- "loss": 2.092,
3097
- "step": 440
3098
- },
3099
- {
3100
- "epoch": 1.087211093990755,
3101
- "grad_norm": 0.5226166248321533,
3102
- "learning_rate": 3.353552859618717e-07,
3103
- "loss": 2.0753,
3104
- "step": 441
3105
- },
3106
- {
3107
- "epoch": 1.0896764252696456,
3108
- "grad_norm": 0.5321833491325378,
3109
- "learning_rate": 3.349220103986135e-07,
3110
- "loss": 2.0423,
3111
- "step": 442
3112
- },
3113
- {
3114
- "epoch": 1.0921417565485363,
3115
- "grad_norm": 0.5013507604598999,
3116
- "learning_rate": 3.3448873483535526e-07,
3117
- "loss": 2.0561,
3118
- "step": 443
3119
- },
3120
- {
3121
- "epoch": 1.0946070878274268,
3122
- "grad_norm": 0.5189869999885559,
3123
- "learning_rate": 3.3405545927209706e-07,
3124
- "loss": 2.074,
3125
- "step": 444
3126
- },
3127
- {
3128
- "epoch": 1.0970724191063175,
3129
- "grad_norm": 0.5598823428153992,
3130
- "learning_rate": 3.336221837088388e-07,
3131
- "loss": 2.082,
3132
- "step": 445
3133
- },
3134
- {
3135
- "epoch": 1.099537750385208,
3136
- "grad_norm": 0.553289532661438,
3137
- "learning_rate": 3.331889081455806e-07,
3138
- "loss": 2.1035,
3139
- "step": 446
3140
- },
3141
- {
3142
- "epoch": 1.1020030816640987,
3143
- "grad_norm": 0.5163353681564331,
3144
- "learning_rate": 3.3275563258232235e-07,
3145
- "loss": 2.0894,
3146
- "step": 447
3147
- },
3148
- {
3149
- "epoch": 1.1044684129429891,
3150
- "grad_norm": 0.5245928168296814,
3151
- "learning_rate": 3.3232235701906414e-07,
3152
- "loss": 2.0705,
3153
- "step": 448
3154
- },
3155
- {
3156
- "epoch": 1.1069337442218798,
3157
- "grad_norm": 0.5174081921577454,
3158
- "learning_rate": 3.3188908145580584e-07,
3159
- "loss": 2.1132,
3160
- "step": 449
3161
- },
3162
- {
3163
- "epoch": 1.1093990755007703,
3164
- "grad_norm": 0.5218523740768433,
3165
- "learning_rate": 3.3145580589254763e-07,
3166
- "loss": 2.0536,
3167
- "step": 450
3168
- },
3169
- {
3170
- "epoch": 1.111864406779661,
3171
- "grad_norm": 0.525867760181427,
3172
- "learning_rate": 3.310225303292894e-07,
3173
- "loss": 2.1543,
3174
- "step": 451
3175
- },
3176
- {
3177
- "epoch": 1.1143297380585517,
3178
- "grad_norm": 0.5667258501052856,
3179
- "learning_rate": 3.305892547660312e-07,
3180
- "loss": 2.0583,
3181
- "step": 452
3182
- },
3183
- {
3184
- "epoch": 1.1167950693374422,
3185
- "grad_norm": 0.541770875453949,
3186
- "learning_rate": 3.301559792027729e-07,
3187
- "loss": 2.1327,
3188
- "step": 453
3189
- },
3190
- {
3191
- "epoch": 1.119260400616333,
3192
- "grad_norm": 0.5466485023498535,
3193
- "learning_rate": 3.297227036395147e-07,
3194
- "loss": 2.0528,
3195
- "step": 454
3196
- },
3197
- {
3198
- "epoch": 1.1217257318952234,
3199
- "grad_norm": 0.5483329892158508,
3200
- "learning_rate": 3.2928942807625647e-07,
3201
- "loss": 2.1536,
3202
- "step": 455
3203
- },
3204
- {
3205
- "epoch": 1.124191063174114,
3206
- "grad_norm": 0.5235666632652283,
3207
- "learning_rate": 3.2885615251299826e-07,
3208
- "loss": 2.0838,
3209
- "step": 456
3210
- },
3211
- {
3212
- "epoch": 1.1266563944530046,
3213
- "grad_norm": 0.5424934029579163,
3214
- "learning_rate": 3.2842287694974e-07,
3215
- "loss": 2.113,
3216
- "step": 457
3217
- },
3218
- {
3219
- "epoch": 1.1291217257318953,
3220
- "grad_norm": 0.5532853007316589,
3221
- "learning_rate": 3.279896013864818e-07,
3222
- "loss": 2.0799,
3223
- "step": 458
3224
- },
3225
- {
3226
- "epoch": 1.1315870570107858,
3227
- "grad_norm": 0.5158779621124268,
3228
- "learning_rate": 3.2755632582322355e-07,
3229
- "loss": 2.0674,
3230
- "step": 459
3231
- },
3232
- {
3233
- "epoch": 1.1340523882896765,
3234
- "grad_norm": 0.5670775175094604,
3235
- "learning_rate": 3.2712305025996535e-07,
3236
- "loss": 2.0898,
3237
- "step": 460
3238
- },
3239
- {
3240
- "epoch": 1.136517719568567,
3241
- "grad_norm": 0.5700893402099609,
3242
- "learning_rate": 3.266897746967071e-07,
3243
- "loss": 2.0833,
3244
- "step": 461
3245
- },
3246
- {
3247
- "epoch": 1.1389830508474577,
3248
- "grad_norm": 0.5165070295333862,
3249
- "learning_rate": 3.262564991334489e-07,
3250
- "loss": 2.1227,
3251
- "step": 462
3252
- },
3253
- {
3254
- "epoch": 1.1414483821263481,
3255
- "grad_norm": 0.5242537260055542,
3256
- "learning_rate": 3.2582322357019064e-07,
3257
- "loss": 2.0674,
3258
- "step": 463
3259
- },
3260
- {
3261
- "epoch": 1.1439137134052388,
3262
- "grad_norm": 0.5270352363586426,
3263
- "learning_rate": 3.253899480069324e-07,
3264
- "loss": 2.0815,
3265
- "step": 464
3266
- },
3267
- {
3268
- "epoch": 1.1463790446841293,
3269
- "grad_norm": 0.5376419425010681,
3270
- "learning_rate": 3.2495667244367413e-07,
3271
- "loss": 2.0699,
3272
- "step": 465
3273
- },
3274
- {
3275
- "epoch": 1.14884437596302,
3276
- "grad_norm": 0.5110929012298584,
3277
- "learning_rate": 3.245233968804159e-07,
3278
- "loss": 2.0393,
3279
- "step": 466
3280
- },
3281
- {
3282
- "epoch": 1.1513097072419107,
3283
- "grad_norm": 0.5514217019081116,
3284
- "learning_rate": 3.2409012131715767e-07,
3285
- "loss": 2.0484,
3286
- "step": 467
3287
- },
3288
- {
3289
- "epoch": 1.1537750385208012,
3290
- "grad_norm": 0.5122043490409851,
3291
- "learning_rate": 3.2365684575389947e-07,
3292
- "loss": 2.0707,
3293
- "step": 468
3294
- },
3295
- {
3296
- "epoch": 1.156240369799692,
3297
- "grad_norm": 0.5329337120056152,
3298
- "learning_rate": 3.232235701906412e-07,
3299
- "loss": 2.1176,
3300
- "step": 469
3301
- },
3302
- {
3303
- "epoch": 1.1587057010785824,
3304
- "grad_norm": 0.546440064907074,
3305
- "learning_rate": 3.22790294627383e-07,
3306
- "loss": 2.0814,
3307
- "step": 470
3308
- },
3309
- {
3310
- "epoch": 1.161171032357473,
3311
- "grad_norm": 0.5156552791595459,
3312
- "learning_rate": 3.2235701906412476e-07,
3313
- "loss": 2.0764,
3314
- "step": 471
3315
- },
3316
- {
3317
- "epoch": 1.1636363636363636,
3318
- "grad_norm": 0.5265877842903137,
3319
- "learning_rate": 3.2192374350086656e-07,
3320
- "loss": 2.0558,
3321
- "step": 472
3322
- },
3323
- {
3324
- "epoch": 1.1661016949152543,
3325
- "grad_norm": 0.5469325184822083,
3326
- "learning_rate": 3.214904679376083e-07,
3327
- "loss": 2.1109,
3328
- "step": 473
3329
- },
3330
- {
3331
- "epoch": 1.1685670261941448,
3332
- "grad_norm": 0.5088624358177185,
3333
- "learning_rate": 3.210571923743501e-07,
3334
- "loss": 2.1014,
3335
- "step": 474
3336
- },
3337
- {
3338
- "epoch": 1.1710323574730355,
3339
- "grad_norm": 0.5592899322509766,
3340
- "learning_rate": 3.2062391681109184e-07,
3341
- "loss": 2.0925,
3342
- "step": 475
3343
- },
3344
- {
3345
- "epoch": 1.173497688751926,
3346
- "grad_norm": 0.5526501536369324,
3347
- "learning_rate": 3.2019064124783364e-07,
3348
- "loss": 2.0594,
3349
- "step": 476
3350
- },
3351
- {
3352
- "epoch": 1.1759630200308167,
3353
- "grad_norm": 0.5420464277267456,
3354
- "learning_rate": 3.197573656845754e-07,
3355
- "loss": 2.1364,
3356
- "step": 477
3357
- },
3358
- {
3359
- "epoch": 1.1784283513097074,
3360
- "grad_norm": 0.53159099817276,
3361
- "learning_rate": 3.193240901213172e-07,
3362
- "loss": 2.0358,
3363
- "step": 478
3364
- },
3365
- {
3366
- "epoch": 1.1808936825885978,
3367
- "grad_norm": 0.5268188118934631,
3368
- "learning_rate": 3.188908145580589e-07,
3369
- "loss": 2.0514,
3370
- "step": 479
3371
- },
3372
- {
3373
- "epoch": 1.1833590138674885,
3374
- "grad_norm": 0.5499210953712463,
3375
- "learning_rate": 3.184575389948007e-07,
3376
- "loss": 2.0831,
3377
- "step": 480
3378
- },
3379
- {
3380
- "epoch": 1.185824345146379,
3381
- "grad_norm": 0.5099960565567017,
3382
- "learning_rate": 3.180242634315424e-07,
3383
- "loss": 2.0724,
3384
- "step": 481
3385
- },
3386
- {
3387
- "epoch": 1.1882896764252697,
3388
- "grad_norm": 0.5238215327262878,
3389
- "learning_rate": 3.175909878682842e-07,
3390
- "loss": 2.0665,
3391
- "step": 482
3392
- },
3393
- {
3394
- "epoch": 1.1907550077041602,
3395
- "grad_norm": 0.5423519611358643,
3396
- "learning_rate": 3.1715771230502596e-07,
3397
- "loss": 2.1059,
3398
- "step": 483
3399
- },
3400
- {
3401
- "epoch": 1.193220338983051,
3402
- "grad_norm": 0.5435311198234558,
3403
- "learning_rate": 3.1672443674176776e-07,
3404
- "loss": 2.1405,
3405
- "step": 484
3406
- },
3407
- {
3408
- "epoch": 1.1956856702619414,
3409
- "grad_norm": 0.5252048373222351,
3410
- "learning_rate": 3.162911611785095e-07,
3411
- "loss": 2.0534,
3412
- "step": 485
3413
- },
3414
- {
3415
- "epoch": 1.198151001540832,
3416
- "grad_norm": 0.5369661450386047,
3417
- "learning_rate": 3.158578856152513e-07,
3418
- "loss": 2.1207,
3419
- "step": 486
3420
- },
3421
- {
3422
- "epoch": 1.2006163328197226,
3423
- "grad_norm": 0.5454829931259155,
3424
- "learning_rate": 3.1542461005199305e-07,
3425
- "loss": 2.086,
3426
- "step": 487
3427
- },
3428
- {
3429
- "epoch": 1.2030816640986133,
3430
- "grad_norm": 0.5264859199523926,
3431
- "learning_rate": 3.1499133448873485e-07,
3432
- "loss": 2.0628,
3433
- "step": 488
3434
- },
3435
- {
3436
- "epoch": 1.2055469953775038,
3437
- "grad_norm": 0.5218833684921265,
3438
- "learning_rate": 3.145580589254766e-07,
3439
- "loss": 2.0481,
3440
- "step": 489
3441
- },
3442
- {
3443
- "epoch": 1.2080123266563945,
3444
- "grad_norm": 0.552793025970459,
3445
- "learning_rate": 3.141247833622184e-07,
3446
- "loss": 2.0154,
3447
- "step": 490
3448
- },
3449
- {
3450
- "epoch": 1.210477657935285,
3451
- "grad_norm": 0.5695677995681763,
3452
- "learning_rate": 3.1369150779896013e-07,
3453
- "loss": 2.0963,
3454
- "step": 491
3455
- },
3456
- {
3457
- "epoch": 1.2129429892141756,
3458
- "grad_norm": 0.5433192849159241,
3459
- "learning_rate": 3.1325823223570193e-07,
3460
- "loss": 2.1519,
3461
- "step": 492
3462
- },
3463
- {
3464
- "epoch": 1.2154083204930664,
3465
- "grad_norm": 0.5215097069740295,
3466
- "learning_rate": 3.128249566724437e-07,
3467
- "loss": 2.0963,
3468
- "step": 493
3469
- },
3470
- {
3471
- "epoch": 1.2178736517719568,
3472
- "grad_norm": 0.5091007351875305,
3473
- "learning_rate": 3.123916811091854e-07,
3474
- "loss": 2.1167,
3475
- "step": 494
3476
- },
3477
- {
3478
- "epoch": 1.2203389830508475,
3479
- "grad_norm": 0.500039279460907,
3480
- "learning_rate": 3.1195840554592717e-07,
3481
- "loss": 2.0676,
3482
- "step": 495
3483
- },
3484
- {
3485
- "epoch": 1.222804314329738,
3486
- "grad_norm": 0.550313413143158,
3487
- "learning_rate": 3.1152512998266897e-07,
3488
- "loss": 2.141,
3489
- "step": 496
3490
- },
3491
- {
3492
- "epoch": 1.2252696456086287,
3493
- "grad_norm": 0.5415179133415222,
3494
- "learning_rate": 3.110918544194107e-07,
3495
- "loss": 2.0414,
3496
- "step": 497
3497
- },
3498
- {
3499
- "epoch": 1.2277349768875192,
3500
- "grad_norm": 0.5273166298866272,
3501
- "learning_rate": 3.106585788561525e-07,
3502
- "loss": 2.0847,
3503
- "step": 498
3504
- },
3505
- {
3506
- "epoch": 1.23020030816641,
3507
- "grad_norm": 0.5227318406105042,
3508
- "learning_rate": 3.1022530329289425e-07,
3509
- "loss": 2.074,
3510
- "step": 499
3511
- },
3512
- {
3513
- "epoch": 1.2326656394453004,
3514
- "grad_norm": 0.5437159538269043,
3515
- "learning_rate": 3.0979202772963605e-07,
3516
- "loss": 2.072,
3517
- "step": 500
3518
- },
3519
- {
3520
- "epoch": 1.235130970724191,
3521
- "grad_norm": 0.5285058617591858,
3522
- "learning_rate": 3.093587521663778e-07,
3523
- "loss": 2.0242,
3524
- "step": 501
3525
- },
3526
- {
3527
- "epoch": 1.2375963020030816,
3528
- "grad_norm": 0.5310484766960144,
3529
- "learning_rate": 3.089254766031196e-07,
3530
- "loss": 2.0797,
3531
- "step": 502
3532
- },
3533
- {
3534
- "epoch": 1.2400616332819723,
3535
- "grad_norm": 0.5042904019355774,
3536
- "learning_rate": 3.0849220103986134e-07,
3537
- "loss": 2.0703,
3538
- "step": 503
3539
- },
3540
- {
3541
- "epoch": 1.242526964560863,
3542
- "grad_norm": 0.5337966084480286,
3543
- "learning_rate": 3.0805892547660314e-07,
3544
- "loss": 2.0958,
3545
- "step": 504
3546
- },
3547
- {
3548
- "epoch": 1.2449922958397535,
3549
- "grad_norm": 0.5376964211463928,
3550
- "learning_rate": 3.076256499133449e-07,
3551
- "loss": 2.0587,
3552
- "step": 505
3553
- },
3554
- {
3555
- "epoch": 1.2474576271186442,
3556
- "grad_norm": 0.5448833703994751,
3557
- "learning_rate": 3.071923743500867e-07,
3558
- "loss": 2.0371,
3559
- "step": 506
3560
- },
3561
- {
3562
- "epoch": 1.2499229583975346,
3563
- "grad_norm": 0.5545293688774109,
3564
- "learning_rate": 3.067590987868284e-07,
3565
- "loss": 2.1334,
3566
- "step": 507
3567
- },
3568
- {
3569
- "epoch": 1.2523882896764253,
3570
- "grad_norm": 0.5596605539321899,
3571
- "learning_rate": 3.063258232235702e-07,
3572
- "loss": 2.1101,
3573
- "step": 508
3574
- },
3575
- {
3576
- "epoch": 1.2548536209553158,
3577
- "grad_norm": 0.5680539011955261,
3578
- "learning_rate": 3.058925476603119e-07,
3579
- "loss": 2.1224,
3580
- "step": 509
3581
- },
3582
- {
3583
- "epoch": 1.2573189522342065,
3584
- "grad_norm": 0.5574333071708679,
3585
- "learning_rate": 3.054592720970537e-07,
3586
- "loss": 2.0774,
3587
- "step": 510
3588
- },
3589
- {
3590
- "epoch": 1.259784283513097,
3591
- "grad_norm": 0.5248346924781799,
3592
- "learning_rate": 3.0502599653379546e-07,
3593
- "loss": 2.0657,
3594
- "step": 511
3595
- },
3596
- {
3597
- "epoch": 1.2622496147919877,
3598
- "grad_norm": 0.5491170883178711,
3599
- "learning_rate": 3.0459272097053726e-07,
3600
- "loss": 2.0696,
3601
- "step": 512
3602
- },
3603
- {
3604
- "epoch": 1.2647149460708782,
3605
- "grad_norm": 0.5188306570053101,
3606
- "learning_rate": 3.04159445407279e-07,
3607
- "loss": 2.0528,
3608
- "step": 513
3609
- },
3610
- {
3611
- "epoch": 1.267180277349769,
3612
- "grad_norm": 0.5485676527023315,
3613
- "learning_rate": 3.037261698440208e-07,
3614
- "loss": 2.0722,
3615
- "step": 514
3616
- },
3617
- {
3618
- "epoch": 1.2696456086286596,
3619
- "grad_norm": 0.5163741707801819,
3620
- "learning_rate": 3.0329289428076254e-07,
3621
- "loss": 2.0316,
3622
- "step": 515
3623
- },
3624
- {
3625
- "epoch": 1.27211093990755,
3626
- "grad_norm": 0.5422912836074829,
3627
- "learning_rate": 3.0285961871750434e-07,
3628
- "loss": 2.131,
3629
- "step": 516
3630
- },
3631
- {
3632
- "epoch": 1.2745762711864406,
3633
- "grad_norm": 0.5590441823005676,
3634
- "learning_rate": 3.024263431542461e-07,
3635
- "loss": 2.136,
3636
- "step": 517
3637
- },
3638
- {
3639
- "epoch": 1.2770416024653313,
3640
- "grad_norm": 0.5339657068252563,
3641
- "learning_rate": 3.019930675909879e-07,
3642
- "loss": 2.0866,
3643
- "step": 518
3644
- },
3645
- {
3646
- "epoch": 1.279506933744222,
3647
- "grad_norm": 0.5249587893486023,
3648
- "learning_rate": 3.0155979202772963e-07,
3649
- "loss": 2.0505,
3650
- "step": 519
3651
- },
3652
- {
3653
- "epoch": 1.2819722650231125,
3654
- "grad_norm": 0.5352839827537537,
3655
- "learning_rate": 3.0112651646447143e-07,
3656
- "loss": 2.0719,
3657
- "step": 520
3658
- },
3659
- {
3660
- "epoch": 1.2844375963020032,
3661
- "grad_norm": 0.521338939666748,
3662
- "learning_rate": 3.0069324090121317e-07,
3663
- "loss": 2.1264,
3664
- "step": 521
3665
- },
3666
- {
3667
- "epoch": 1.2869029275808936,
3668
- "grad_norm": 0.5433731079101562,
3669
- "learning_rate": 3.0025996533795497e-07,
3670
- "loss": 2.1272,
3671
- "step": 522
3672
- },
3673
- {
3674
- "epoch": 1.2893682588597843,
3675
- "grad_norm": 0.5479230880737305,
3676
- "learning_rate": 2.998266897746967e-07,
3677
- "loss": 2.0996,
3678
- "step": 523
3679
- },
3680
- {
3681
- "epoch": 1.2918335901386748,
3682
- "grad_norm": 0.5389401912689209,
3683
- "learning_rate": 2.993934142114385e-07,
3684
- "loss": 2.1212,
3685
- "step": 524
3686
- },
3687
- {
3688
- "epoch": 1.2942989214175655,
3689
- "grad_norm": 0.5670454502105713,
3690
- "learning_rate": 2.989601386481802e-07,
3691
- "loss": 2.0662,
3692
- "step": 525
3693
- },
3694
- {
3695
- "epoch": 1.296764252696456,
3696
- "grad_norm": 0.5215103030204773,
3697
- "learning_rate": 2.9852686308492195e-07,
3698
- "loss": 2.0437,
3699
- "step": 526
3700
- },
3701
- {
3702
- "epoch": 1.2992295839753467,
3703
- "grad_norm": 0.5562092065811157,
3704
- "learning_rate": 2.9809358752166375e-07,
3705
- "loss": 2.0364,
3706
- "step": 527
3707
- },
3708
- {
3709
- "epoch": 1.3016949152542372,
3710
- "grad_norm": 0.5172645449638367,
3711
- "learning_rate": 2.976603119584055e-07,
3712
- "loss": 2.0302,
3713
- "step": 528
3714
- },
3715
- {
3716
- "epoch": 1.304160246533128,
3717
- "grad_norm": 0.5652945041656494,
3718
- "learning_rate": 2.972270363951473e-07,
3719
- "loss": 2.1963,
3720
- "step": 529
3721
- },
3722
- {
3723
- "epoch": 1.3066255778120186,
3724
- "grad_norm": 0.5477254986763,
3725
- "learning_rate": 2.9679376083188904e-07,
3726
- "loss": 2.0815,
3727
- "step": 530
3728
- },
3729
- {
3730
- "epoch": 1.309090909090909,
3731
- "grad_norm": 0.5283427834510803,
3732
- "learning_rate": 2.9636048526863084e-07,
3733
- "loss": 2.0962,
3734
- "step": 531
3735
- },
3736
- {
3737
- "epoch": 1.3115562403697996,
3738
- "grad_norm": 0.5537866950035095,
3739
- "learning_rate": 2.959272097053726e-07,
3740
- "loss": 2.0885,
3741
- "step": 532
3742
- },
3743
- {
3744
- "epoch": 1.3140215716486903,
3745
- "grad_norm": 0.5142833590507507,
3746
- "learning_rate": 2.954939341421144e-07,
3747
- "loss": 2.0664,
3748
- "step": 533
3749
- },
3750
- {
3751
- "epoch": 1.316486902927581,
3752
- "grad_norm": 0.5292583107948303,
3753
- "learning_rate": 2.950606585788561e-07,
3754
- "loss": 2.0065,
3755
- "step": 534
3756
- },
3757
- {
3758
- "epoch": 1.3189522342064715,
3759
- "grad_norm": 0.5204624533653259,
3760
- "learning_rate": 2.946273830155979e-07,
3761
- "loss": 2.0522,
3762
- "step": 535
3763
- },
3764
- {
3765
- "epoch": 1.3214175654853622,
3766
- "grad_norm": 0.534831702709198,
3767
- "learning_rate": 2.9419410745233967e-07,
3768
- "loss": 2.1378,
3769
- "step": 536
3770
- },
3771
- {
3772
- "epoch": 1.3238828967642526,
3773
- "grad_norm": 0.5467716455459595,
3774
- "learning_rate": 2.9376083188908146e-07,
3775
- "loss": 2.0894,
3776
- "step": 537
3777
- },
3778
- {
3779
- "epoch": 1.3263482280431433,
3780
- "grad_norm": 0.536545991897583,
3781
- "learning_rate": 2.933275563258232e-07,
3782
- "loss": 2.0737,
3783
- "step": 538
3784
- },
3785
- {
3786
- "epoch": 1.3288135593220338,
3787
- "grad_norm": 0.5458997488021851,
3788
- "learning_rate": 2.92894280762565e-07,
3789
- "loss": 2.0601,
3790
- "step": 539
3791
- },
3792
- {
3793
- "epoch": 1.3312788906009245,
3794
- "grad_norm": 0.5358441472053528,
3795
- "learning_rate": 2.924610051993067e-07,
3796
- "loss": 2.0644,
3797
- "step": 540
3798
- },
3799
- {
3800
- "epoch": 1.3337442218798152,
3801
- "grad_norm": 0.5691521763801575,
3802
- "learning_rate": 2.920277296360485e-07,
3803
- "loss": 2.1359,
3804
- "step": 541
3805
- },
3806
- {
3807
- "epoch": 1.3362095531587057,
3808
- "grad_norm": 0.5218629837036133,
3809
- "learning_rate": 2.9159445407279024e-07,
3810
- "loss": 2.058,
3811
- "step": 542
3812
- },
3813
- {
3814
- "epoch": 1.3386748844375962,
3815
- "grad_norm": 0.5232541561126709,
3816
- "learning_rate": 2.9116117850953204e-07,
3817
- "loss": 2.0103,
3818
- "step": 543
3819
- },
3820
- {
3821
- "epoch": 1.341140215716487,
3822
- "grad_norm": 0.4909007251262665,
3823
- "learning_rate": 2.907279029462738e-07,
3824
- "loss": 2.0264,
3825
- "step": 544
3826
- },
3827
- {
3828
- "epoch": 1.3436055469953776,
3829
- "grad_norm": 0.5056614875793457,
3830
- "learning_rate": 2.902946273830156e-07,
3831
- "loss": 2.0628,
3832
- "step": 545
3833
- },
3834
- {
3835
- "epoch": 1.346070878274268,
3836
- "grad_norm": 0.5265542268753052,
3837
- "learning_rate": 2.8986135181975733e-07,
3838
- "loss": 2.098,
3839
- "step": 546
3840
- },
3841
- {
3842
- "epoch": 1.3485362095531588,
3843
- "grad_norm": 0.5243731141090393,
3844
- "learning_rate": 2.894280762564991e-07,
3845
- "loss": 2.0943,
3846
- "step": 547
3847
- },
3848
- {
3849
- "epoch": 1.3510015408320493,
3850
- "grad_norm": 0.5294694304466248,
3851
- "learning_rate": 2.8899480069324087e-07,
3852
- "loss": 2.0928,
3853
- "step": 548
3854
- },
3855
- {
3856
- "epoch": 1.35346687211094,
3857
- "grad_norm": 0.5630142688751221,
3858
- "learning_rate": 2.8856152512998267e-07,
3859
- "loss": 2.0451,
3860
- "step": 549
3861
- },
3862
- {
3863
- "epoch": 1.3559322033898304,
3864
- "grad_norm": 0.517575740814209,
3865
- "learning_rate": 2.881282495667244e-07,
3866
- "loss": 2.0556,
3867
- "step": 550
3868
- },
3869
- {
3870
- "epoch": 1.3583975346687212,
3871
- "grad_norm": 0.5253962874412537,
3872
- "learning_rate": 2.876949740034662e-07,
3873
- "loss": 2.1088,
3874
- "step": 551
3875
- },
3876
- {
3877
- "epoch": 1.3608628659476116,
3878
- "grad_norm": 0.5190132260322571,
3879
- "learning_rate": 2.8726169844020796e-07,
3880
- "loss": 2.0459,
3881
- "step": 552
3882
- },
3883
- {
3884
- "epoch": 1.3633281972265023,
3885
- "grad_norm": 0.5542247891426086,
3886
- "learning_rate": 2.8682842287694976e-07,
3887
- "loss": 2.0736,
3888
- "step": 553
3889
- },
3890
- {
3891
- "epoch": 1.3657935285053928,
3892
- "grad_norm": 0.5446171164512634,
3893
- "learning_rate": 2.863951473136915e-07,
3894
- "loss": 2.0366,
3895
- "step": 554
3896
- },
3897
- {
3898
- "epoch": 1.3682588597842835,
3899
- "grad_norm": 0.5620147585868835,
3900
- "learning_rate": 2.8596187175043325e-07,
3901
- "loss": 2.0593,
3902
- "step": 555
3903
- },
3904
- {
3905
- "epoch": 1.3707241910631742,
3906
- "grad_norm": 0.5434351563453674,
3907
- "learning_rate": 2.85528596187175e-07,
3908
- "loss": 2.0697,
3909
- "step": 556
3910
- },
3911
- {
3912
- "epoch": 1.3731895223420647,
3913
- "grad_norm": 0.5291680693626404,
3914
- "learning_rate": 2.850953206239168e-07,
3915
- "loss": 2.117,
3916
- "step": 557
3917
- },
3918
- {
3919
- "epoch": 1.3756548536209552,
3920
- "grad_norm": 0.5382573008537292,
3921
- "learning_rate": 2.8466204506065853e-07,
3922
- "loss": 2.0755,
3923
- "step": 558
3924
- },
3925
- {
3926
- "epoch": 1.378120184899846,
3927
- "grad_norm": 0.5260967016220093,
3928
- "learning_rate": 2.8422876949740033e-07,
3929
- "loss": 2.0305,
3930
- "step": 559
3931
- },
3932
- {
3933
- "epoch": 1.3805855161787366,
3934
- "grad_norm": 0.5255964994430542,
3935
- "learning_rate": 2.837954939341421e-07,
3936
- "loss": 2.0419,
3937
- "step": 560
3938
- },
3939
- {
3940
- "epoch": 1.383050847457627,
3941
- "grad_norm": 0.541347086429596,
3942
- "learning_rate": 2.833622183708839e-07,
3943
- "loss": 2.0988,
3944
- "step": 561
3945
- },
3946
- {
3947
- "epoch": 1.3855161787365178,
3948
- "grad_norm": 0.5098375082015991,
3949
- "learning_rate": 2.829289428076256e-07,
3950
- "loss": 2.0677,
3951
- "step": 562
3952
- },
3953
- {
3954
- "epoch": 1.3879815100154083,
3955
- "grad_norm": 0.5496364235877991,
3956
- "learning_rate": 2.824956672443674e-07,
3957
- "loss": 2.0748,
3958
- "step": 563
3959
- },
3960
- {
3961
- "epoch": 1.390446841294299,
3962
- "grad_norm": 0.5202908515930176,
3963
- "learning_rate": 2.8206239168110916e-07,
3964
- "loss": 2.1031,
3965
- "step": 564
3966
- },
3967
- {
3968
- "epoch": 1.3929121725731894,
3969
- "grad_norm": 0.5658861398696899,
3970
- "learning_rate": 2.8162911611785096e-07,
3971
- "loss": 2.1063,
3972
- "step": 565
3973
- },
3974
- {
3975
- "epoch": 1.3953775038520801,
3976
- "grad_norm": 0.5813626646995544,
3977
- "learning_rate": 2.811958405545927e-07,
3978
- "loss": 2.0727,
3979
- "step": 566
3980
- },
3981
- {
3982
- "epoch": 1.3978428351309709,
3983
- "grad_norm": 0.5407326817512512,
3984
- "learning_rate": 2.807625649913345e-07,
3985
- "loss": 2.1281,
3986
- "step": 567
3987
- },
3988
- {
3989
- "epoch": 1.4003081664098613,
3990
- "grad_norm": 0.5366054177284241,
3991
- "learning_rate": 2.8032928942807625e-07,
3992
- "loss": 2.0793,
3993
- "step": 568
3994
- },
3995
- {
3996
- "epoch": 1.4027734976887518,
3997
- "grad_norm": 0.5454252362251282,
3998
- "learning_rate": 2.7989601386481805e-07,
3999
- "loss": 2.0573,
4000
- "step": 569
4001
- },
4002
- {
4003
- "epoch": 1.4052388289676425,
4004
- "grad_norm": 0.5705124735832214,
4005
- "learning_rate": 2.7946273830155974e-07,
4006
- "loss": 2.0594,
4007
- "step": 570
4008
- },
4009
- {
4010
- "epoch": 1.4077041602465332,
4011
- "grad_norm": 0.5513876676559448,
4012
- "learning_rate": 2.7902946273830154e-07,
4013
- "loss": 2.0789,
4014
- "step": 571
4015
- },
4016
- {
4017
- "epoch": 1.4101694915254237,
4018
- "grad_norm": 0.5202826261520386,
4019
- "learning_rate": 2.785961871750433e-07,
4020
- "loss": 2.0809,
4021
- "step": 572
4022
- },
4023
- {
4024
- "epoch": 1.4126348228043144,
4025
- "grad_norm": 0.5438618659973145,
4026
- "learning_rate": 2.781629116117851e-07,
4027
- "loss": 2.1247,
4028
- "step": 573
4029
- },
4030
- {
4031
- "epoch": 1.4151001540832049,
4032
- "grad_norm": 0.5352728962898254,
4033
- "learning_rate": 2.777296360485268e-07,
4034
- "loss": 2.0723,
4035
- "step": 574
4036
- },
4037
- {
4038
- "epoch": 1.4175654853620956,
4039
- "grad_norm": 0.5342223048210144,
4040
- "learning_rate": 2.772963604852686e-07,
4041
- "loss": 2.0088,
4042
- "step": 575
4043
- },
4044
- {
4045
- "epoch": 1.420030816640986,
4046
- "grad_norm": 0.556289553642273,
4047
- "learning_rate": 2.7686308492201037e-07,
4048
- "loss": 2.088,
4049
- "step": 576
4050
- },
4051
- {
4052
- "epoch": 1.4224961479198768,
4053
- "grad_norm": 0.5240088105201721,
4054
- "learning_rate": 2.7642980935875217e-07,
4055
- "loss": 2.05,
4056
- "step": 577
4057
- },
4058
- {
4059
- "epoch": 1.4249614791987673,
4060
- "grad_norm": 0.5449855327606201,
4061
- "learning_rate": 2.759965337954939e-07,
4062
- "loss": 2.0436,
4063
- "step": 578
4064
- },
4065
- {
4066
- "epoch": 1.427426810477658,
4067
- "grad_norm": 0.5392140746116638,
4068
- "learning_rate": 2.755632582322357e-07,
4069
- "loss": 2.0478,
4070
- "step": 579
4071
- },
4072
- {
4073
- "epoch": 1.4298921417565484,
4074
- "grad_norm": 0.5651275515556335,
4075
- "learning_rate": 2.7512998266897745e-07,
4076
- "loss": 2.0421,
4077
- "step": 580
4078
- },
4079
- {
4080
- "epoch": 1.4323574730354391,
4081
- "grad_norm": 0.5078974366188049,
4082
- "learning_rate": 2.7469670710571925e-07,
4083
- "loss": 2.0267,
4084
- "step": 581
4085
- },
4086
- {
4087
- "epoch": 1.4348228043143298,
4088
- "grad_norm": 0.5486698150634766,
4089
- "learning_rate": 2.74263431542461e-07,
4090
- "loss": 2.0553,
4091
- "step": 582
4092
- },
4093
- {
4094
- "epoch": 1.4372881355932203,
4095
- "grad_norm": 0.5565054416656494,
4096
- "learning_rate": 2.738301559792028e-07,
4097
- "loss": 2.1163,
4098
- "step": 583
4099
- },
4100
- {
4101
- "epoch": 1.4397534668721108,
4102
- "grad_norm": 0.5276550650596619,
4103
- "learning_rate": 2.7339688041594454e-07,
4104
- "loss": 2.0427,
4105
- "step": 584
4106
- },
4107
- {
4108
- "epoch": 1.4422187981510015,
4109
- "grad_norm": 0.5744592547416687,
4110
- "learning_rate": 2.729636048526863e-07,
4111
- "loss": 2.1292,
4112
- "step": 585
4113
- },
4114
- {
4115
- "epoch": 1.4446841294298922,
4116
- "grad_norm": 0.5316488146781921,
4117
- "learning_rate": 2.7253032928942803e-07,
4118
- "loss": 2.0663,
4119
- "step": 586
4120
- },
4121
- {
4122
- "epoch": 1.4471494607087827,
4123
- "grad_norm": 0.562899649143219,
4124
- "learning_rate": 2.7209705372616983e-07,
4125
- "loss": 2.1515,
4126
- "step": 587
4127
- },
4128
- {
4129
- "epoch": 1.4496147919876734,
4130
- "grad_norm": 0.5735856294631958,
4131
- "learning_rate": 2.7166377816291157e-07,
4132
- "loss": 2.0883,
4133
- "step": 588
4134
- },
4135
- {
4136
- "epoch": 1.4520801232665639,
4137
- "grad_norm": 0.5496561527252197,
4138
- "learning_rate": 2.7123050259965337e-07,
4139
- "loss": 2.069,
4140
- "step": 589
4141
- },
4142
- {
4143
- "epoch": 1.4545454545454546,
4144
- "grad_norm": 0.5434293150901794,
4145
- "learning_rate": 2.707972270363951e-07,
4146
- "loss": 2.0468,
4147
- "step": 590
4148
- },
4149
- {
4150
- "epoch": 1.457010785824345,
4151
- "grad_norm": 0.5591951012611389,
4152
- "learning_rate": 2.703639514731369e-07,
4153
- "loss": 2.0979,
4154
- "step": 591
4155
- },
4156
- {
4157
- "epoch": 1.4594761171032358,
4158
- "grad_norm": 0.556535542011261,
4159
- "learning_rate": 2.6993067590987866e-07,
4160
- "loss": 2.104,
4161
- "step": 592
4162
- },
4163
- {
4164
- "epoch": 1.4619414483821265,
4165
- "grad_norm": 0.5265719890594482,
4166
- "learning_rate": 2.6949740034662046e-07,
4167
- "loss": 2.0565,
4168
- "step": 593
4169
- },
4170
- {
4171
- "epoch": 1.464406779661017,
4172
- "grad_norm": 0.5520715117454529,
4173
- "learning_rate": 2.690641247833622e-07,
4174
- "loss": 2.1049,
4175
- "step": 594
4176
- },
4177
- {
4178
- "epoch": 1.4668721109399074,
4179
- "grad_norm": 0.5084496736526489,
4180
- "learning_rate": 2.68630849220104e-07,
4181
- "loss": 2.0027,
4182
- "step": 595
4183
- },
4184
- {
4185
- "epoch": 1.4693374422187981,
4186
- "grad_norm": 0.5238492488861084,
4187
- "learning_rate": 2.6819757365684574e-07,
4188
- "loss": 2.019,
4189
- "step": 596
4190
- },
4191
- {
4192
- "epoch": 1.4718027734976888,
4193
- "grad_norm": 0.5607398748397827,
4194
- "learning_rate": 2.6776429809358754e-07,
4195
- "loss": 2.097,
4196
- "step": 597
4197
- },
4198
- {
4199
- "epoch": 1.4742681047765793,
4200
- "grad_norm": 0.5568619966506958,
4201
- "learning_rate": 2.673310225303293e-07,
4202
- "loss": 2.0772,
4203
- "step": 598
4204
- },
4205
- {
4206
- "epoch": 1.47673343605547,
4207
- "grad_norm": 0.553952157497406,
4208
- "learning_rate": 2.668977469670711e-07,
4209
- "loss": 2.0929,
4210
- "step": 599
4211
- },
4212
- {
4213
- "epoch": 1.4791987673343605,
4214
- "grad_norm": 0.5402290225028992,
4215
- "learning_rate": 2.6646447140381283e-07,
4216
- "loss": 2.062,
4217
- "step": 600
4218
- },
4219
- {
4220
- "epoch": 1.4816640986132512,
4221
- "grad_norm": 0.5399553179740906,
4222
- "learning_rate": 2.660311958405546e-07,
4223
- "loss": 2.0673,
4224
- "step": 601
4225
- },
4226
- {
4227
- "epoch": 1.4841294298921417,
4228
- "grad_norm": 0.5668056011199951,
4229
- "learning_rate": 2.655979202772963e-07,
4230
- "loss": 2.0596,
4231
- "step": 602
4232
- },
4233
- {
4234
- "epoch": 1.4865947611710324,
4235
- "grad_norm": 0.5426621437072754,
4236
- "learning_rate": 2.651646447140381e-07,
4237
- "loss": 2.0718,
4238
- "step": 603
4239
- },
4240
- {
4241
- "epoch": 1.4890600924499229,
4242
- "grad_norm": 0.5283502340316772,
4243
- "learning_rate": 2.6473136915077986e-07,
4244
- "loss": 2.0544,
4245
- "step": 604
4246
- },
4247
- {
4248
- "epoch": 1.4915254237288136,
4249
- "grad_norm": 0.5753063559532166,
4250
- "learning_rate": 2.6429809358752166e-07,
4251
- "loss": 2.1082,
4252
- "step": 605
4253
- },
4254
- {
4255
- "epoch": 1.493990755007704,
4256
- "grad_norm": 0.5240095257759094,
4257
- "learning_rate": 2.638648180242634e-07,
4258
- "loss": 2.054,
4259
- "step": 606
4260
- },
4261
- {
4262
- "epoch": 1.4964560862865948,
4263
- "grad_norm": 0.5336340069770813,
4264
- "learning_rate": 2.634315424610052e-07,
4265
- "loss": 2.0585,
4266
- "step": 607
4267
- },
4268
- {
4269
- "epoch": 1.4989214175654855,
4270
- "grad_norm": 0.5449371337890625,
4271
- "learning_rate": 2.6299826689774695e-07,
4272
- "loss": 2.0645,
4273
- "step": 608
4274
- },
4275
- {
4276
- "epoch": 1.501386748844376,
4277
- "grad_norm": 0.553382396697998,
4278
- "learning_rate": 2.6256499133448875e-07,
4279
- "loss": 2.0974,
4280
- "step": 609
4281
- },
4282
- {
4283
- "epoch": 1.5038520801232664,
4284
- "grad_norm": 0.5372682809829712,
4285
- "learning_rate": 2.621317157712305e-07,
4286
- "loss": 2.1287,
4287
- "step": 610
4288
- },
4289
- {
4290
- "epoch": 1.5063174114021571,
4291
- "grad_norm": 0.5454490780830383,
4292
- "learning_rate": 2.616984402079723e-07,
4293
- "loss": 2.0699,
4294
- "step": 611
4295
- },
4296
- {
4297
- "epoch": 1.5087827426810478,
4298
- "grad_norm": 0.5598608255386353,
4299
- "learning_rate": 2.6126516464471404e-07,
4300
- "loss": 2.1242,
4301
- "step": 612
4302
- },
4303
- {
4304
- "epoch": 1.5112480739599383,
4305
- "grad_norm": 0.5377222895622253,
4306
- "learning_rate": 2.6083188908145583e-07,
4307
- "loss": 2.078,
4308
- "step": 613
4309
- },
4310
- {
4311
- "epoch": 1.5137134052388288,
4312
- "grad_norm": 0.549659013748169,
4313
- "learning_rate": 2.603986135181976e-07,
4314
- "loss": 2.077,
4315
- "step": 614
4316
- },
4317
- {
4318
- "epoch": 1.5161787365177197,
4319
- "grad_norm": 0.5235285758972168,
4320
- "learning_rate": 2.599653379549394e-07,
4321
- "loss": 2.0384,
4322
- "step": 615
4323
- },
4324
- {
4325
- "epoch": 1.5186440677966102,
4326
- "grad_norm": 0.5353394746780396,
4327
- "learning_rate": 2.5953206239168107e-07,
4328
- "loss": 2.084,
4329
- "step": 616
4330
- },
4331
- {
4332
- "epoch": 1.5211093990755007,
4333
- "grad_norm": 0.5501331686973572,
4334
- "learning_rate": 2.5909878682842287e-07,
4335
- "loss": 2.0653,
4336
- "step": 617
4337
- },
4338
- {
4339
- "epoch": 1.5235747303543914,
4340
- "grad_norm": 0.5552558898925781,
4341
- "learning_rate": 2.586655112651646e-07,
4342
- "loss": 2.0984,
4343
- "step": 618
4344
- },
4345
- {
4346
- "epoch": 1.526040061633282,
4347
- "grad_norm": 0.5290137529373169,
4348
- "learning_rate": 2.582322357019064e-07,
4349
- "loss": 2.101,
4350
- "step": 619
4351
- },
4352
- {
4353
- "epoch": 1.5285053929121726,
4354
- "grad_norm": 0.5513224005699158,
4355
- "learning_rate": 2.5779896013864815e-07,
4356
- "loss": 2.0511,
4357
- "step": 620
4358
- },
4359
- {
4360
- "epoch": 1.530970724191063,
4361
- "grad_norm": 0.525702714920044,
4362
- "learning_rate": 2.5736568457538995e-07,
4363
- "loss": 2.1566,
4364
- "step": 621
4365
- },
4366
- {
4367
- "epoch": 1.5334360554699538,
4368
- "grad_norm": 0.5237429738044739,
4369
- "learning_rate": 2.569324090121317e-07,
4370
- "loss": 2.0632,
4371
- "step": 622
4372
- },
4373
- {
4374
- "epoch": 1.5359013867488445,
4375
- "grad_norm": 0.5647119879722595,
4376
- "learning_rate": 2.564991334488735e-07,
4377
- "loss": 2.0885,
4378
- "step": 623
4379
- },
4380
- {
4381
- "epoch": 1.538366718027735,
4382
- "grad_norm": 0.5435053706169128,
4383
- "learning_rate": 2.5606585788561524e-07,
4384
- "loss": 2.063,
4385
- "step": 624
4386
- },
4387
- {
4388
- "epoch": 1.5408320493066254,
4389
- "grad_norm": 0.5810684561729431,
4390
- "learning_rate": 2.5563258232235704e-07,
4391
- "loss": 2.1319,
4392
- "step": 625
4393
- },
4394
- {
4395
- "epoch": 1.5432973805855161,
4396
- "grad_norm": 0.5526551008224487,
4397
- "learning_rate": 2.551993067590988e-07,
4398
- "loss": 2.1514,
4399
- "step": 626
4400
- },
4401
- {
4402
- "epoch": 1.5457627118644068,
4403
- "grad_norm": 0.5331485271453857,
4404
- "learning_rate": 2.547660311958406e-07,
4405
- "loss": 1.982,
4406
- "step": 627
4407
- },
4408
- {
4409
- "epoch": 1.5482280431432973,
4410
- "grad_norm": 0.5447163581848145,
4411
- "learning_rate": 2.5433275563258233e-07,
4412
- "loss": 2.0662,
4413
- "step": 628
4414
- },
4415
- {
4416
- "epoch": 1.550693374422188,
4417
- "grad_norm": 0.5696806311607361,
4418
- "learning_rate": 2.538994800693241e-07,
4419
- "loss": 2.0573,
4420
- "step": 629
4421
- },
4422
- {
4423
- "epoch": 1.5531587057010787,
4424
- "grad_norm": 0.5400473475456238,
4425
- "learning_rate": 2.5346620450606587e-07,
4426
- "loss": 2.0772,
4427
- "step": 630
4428
- },
4429
- {
4430
- "epoch": 1.5556240369799692,
4431
- "grad_norm": 0.543637752532959,
4432
- "learning_rate": 2.530329289428076e-07,
4433
- "loss": 2.0869,
4434
- "step": 631
4435
- },
4436
- {
4437
- "epoch": 1.5580893682588597,
4438
- "grad_norm": 0.5363517999649048,
4439
- "learning_rate": 2.5259965337954936e-07,
4440
- "loss": 2.106,
4441
- "step": 632
4442
- },
4443
- {
4444
- "epoch": 1.5605546995377504,
4445
- "grad_norm": 0.5055550932884216,
4446
- "learning_rate": 2.5216637781629116e-07,
4447
- "loss": 2.0274,
4448
- "step": 633
4449
- },
4450
- {
4451
- "epoch": 1.563020030816641,
4452
- "grad_norm": 0.5467242002487183,
4453
- "learning_rate": 2.517331022530329e-07,
4454
- "loss": 2.0927,
4455
- "step": 634
4456
- },
4457
- {
4458
- "epoch": 1.5654853620955316,
4459
- "grad_norm": 0.5361837148666382,
4460
- "learning_rate": 2.512998266897747e-07,
4461
- "loss": 2.1079,
4462
- "step": 635
4463
- },
4464
- {
4465
- "epoch": 1.567950693374422,
4466
- "grad_norm": 0.5386531949043274,
4467
- "learning_rate": 2.5086655112651645e-07,
4468
- "loss": 2.0499,
4469
- "step": 636
4470
- },
4471
- {
4472
- "epoch": 1.5704160246533128,
4473
- "grad_norm": 0.5682772994041443,
4474
- "learning_rate": 2.5043327556325824e-07,
4475
- "loss": 2.1246,
4476
- "step": 637
4477
- },
4478
- {
4479
- "epoch": 1.5728813559322035,
4480
- "grad_norm": 0.5458372831344604,
4481
- "learning_rate": 2.5e-07,
4482
- "loss": 2.1237,
4483
- "step": 638
4484
- },
4485
- {
4486
- "epoch": 1.575346687211094,
4487
- "grad_norm": 0.5346107482910156,
4488
- "learning_rate": 2.4956672443674173e-07,
4489
- "loss": 2.0898,
4490
- "step": 639
4491
- },
4492
- {
4493
- "epoch": 1.5778120184899846,
4494
- "grad_norm": 0.5748855471611023,
4495
- "learning_rate": 2.4913344887348353e-07,
4496
- "loss": 2.1254,
4497
- "step": 640
4498
- },
4499
- {
4500
- "epoch": 1.5802773497688754,
4501
- "grad_norm": 0.5122947692871094,
4502
- "learning_rate": 2.487001733102253e-07,
4503
- "loss": 2.057,
4504
- "step": 641
4505
- },
4506
- {
4507
- "epoch": 1.5827426810477658,
4508
- "grad_norm": 0.5241718888282776,
4509
- "learning_rate": 2.48266897746967e-07,
4510
- "loss": 2.0911,
4511
- "step": 642
4512
- },
4513
- {
4514
- "epoch": 1.5852080123266563,
4515
- "grad_norm": 0.5585358142852783,
4516
- "learning_rate": 2.478336221837088e-07,
4517
- "loss": 2.0615,
4518
- "step": 643
4519
- },
4520
- {
4521
- "epoch": 1.587673343605547,
4522
- "grad_norm": 0.6165497899055481,
4523
- "learning_rate": 2.4740034662045056e-07,
4524
- "loss": 2.179,
4525
- "step": 644
4526
- },
4527
- {
4528
- "epoch": 1.5901386748844377,
4529
- "grad_norm": 0.5242385864257812,
4530
- "learning_rate": 2.4696707105719236e-07,
4531
- "loss": 2.0643,
4532
- "step": 645
4533
- },
4534
- {
4535
- "epoch": 1.5926040061633282,
4536
- "grad_norm": 0.5357149243354797,
4537
- "learning_rate": 2.465337954939341e-07,
4538
- "loss": 2.0407,
4539
- "step": 646
4540
- },
4541
- {
4542
- "epoch": 1.5950693374422187,
4543
- "grad_norm": 0.525974452495575,
4544
- "learning_rate": 2.461005199306759e-07,
4545
- "loss": 2.0345,
4546
- "step": 647
4547
- },
4548
- {
4549
- "epoch": 1.5975346687211094,
4550
- "grad_norm": 0.5614073872566223,
4551
- "learning_rate": 2.4566724436741765e-07,
4552
- "loss": 2.0699,
4553
- "step": 648
4554
- },
4555
- {
4556
- "epoch": 1.6,
4557
- "grad_norm": 0.526637613773346,
4558
- "learning_rate": 2.4523396880415945e-07,
4559
- "loss": 2.0783,
4560
- "step": 649
4561
- },
4562
- {
4563
- "epoch": 1.6024653312788906,
4564
- "grad_norm": 0.5309383869171143,
4565
- "learning_rate": 2.448006932409012e-07,
4566
- "loss": 2.0257,
4567
- "step": 650
4568
- },
4569
- {
4570
- "epoch": 1.604930662557781,
4571
- "grad_norm": 0.5437653660774231,
4572
- "learning_rate": 2.4436741767764294e-07,
4573
- "loss": 2.073,
4574
- "step": 651
4575
- },
4576
- {
4577
- "epoch": 1.6073959938366718,
4578
- "grad_norm": 0.5697052478790283,
4579
- "learning_rate": 2.4393414211438474e-07,
4580
- "loss": 2.0774,
4581
- "step": 652
4582
- },
4583
- {
4584
- "epoch": 1.6098613251155625,
4585
- "grad_norm": 0.5350382328033447,
4586
- "learning_rate": 2.435008665511265e-07,
4587
- "loss": 2.0817,
4588
- "step": 653
4589
- },
4590
- {
4591
- "epoch": 1.612326656394453,
4592
- "grad_norm": 0.5554140210151672,
4593
- "learning_rate": 2.430675909878683e-07,
4594
- "loss": 2.0842,
4595
- "step": 654
4596
- },
4597
- {
4598
- "epoch": 1.6147919876733436,
4599
- "grad_norm": 0.5539381504058838,
4600
- "learning_rate": 2.4263431542461e-07,
4601
- "loss": 2.0684,
4602
- "step": 655
4603
- },
4604
- {
4605
- "epoch": 1.6172573189522343,
4606
- "grad_norm": 0.5573647618293762,
4607
- "learning_rate": 2.422010398613518e-07,
4608
- "loss": 2.0848,
4609
- "step": 656
4610
- },
4611
- {
4612
- "epoch": 1.6197226502311248,
4613
- "grad_norm": 0.5368602871894836,
4614
- "learning_rate": 2.4176776429809357e-07,
4615
- "loss": 2.0463,
4616
- "step": 657
4617
- },
4618
- {
4619
- "epoch": 1.6221879815100153,
4620
- "grad_norm": 0.5693660974502563,
4621
- "learning_rate": 2.413344887348353e-07,
4622
- "loss": 2.1469,
4623
- "step": 658
4624
- },
4625
- {
4626
- "epoch": 1.624653312788906,
4627
- "grad_norm": 0.5438942909240723,
4628
- "learning_rate": 2.409012131715771e-07,
4629
- "loss": 2.0725,
4630
- "step": 659
4631
- },
4632
- {
4633
- "epoch": 1.6271186440677967,
4634
- "grad_norm": 0.5400587916374207,
4635
- "learning_rate": 2.4046793760831886e-07,
4636
- "loss": 2.1452,
4637
- "step": 660
4638
- },
4639
- {
4640
- "epoch": 1.6295839753466872,
4641
- "grad_norm": 0.5040330290794373,
4642
- "learning_rate": 2.4003466204506065e-07,
4643
- "loss": 2.0522,
4644
- "step": 661
4645
- },
4646
- {
4647
- "epoch": 1.6320493066255777,
4648
- "grad_norm": 0.5188309550285339,
4649
- "learning_rate": 2.396013864818024e-07,
4650
- "loss": 2.0538,
4651
- "step": 662
4652
- },
4653
- {
4654
- "epoch": 1.6345146379044684,
4655
- "grad_norm": 0.5574179887771606,
4656
- "learning_rate": 2.391681109185442e-07,
4657
- "loss": 2.1391,
4658
- "step": 663
4659
- },
4660
- {
4661
- "epoch": 1.636979969183359,
4662
- "grad_norm": 0.5268549919128418,
4663
- "learning_rate": 2.3873483535528594e-07,
4664
- "loss": 2.0742,
4665
- "step": 664
4666
- },
4667
- {
4668
- "epoch": 1.6394453004622496,
4669
- "grad_norm": 0.5314316749572754,
4670
- "learning_rate": 2.3830155979202771e-07,
4671
- "loss": 2.0721,
4672
- "step": 665
4673
- },
4674
- {
4675
- "epoch": 1.6419106317411403,
4676
- "grad_norm": 0.5841025710105896,
4677
- "learning_rate": 2.3786828422876948e-07,
4678
- "loss": 2.124,
4679
- "step": 666
4680
- },
4681
- {
4682
- "epoch": 1.644375963020031,
4683
- "grad_norm": 0.5572120547294617,
4684
- "learning_rate": 2.3743500866551126e-07,
4685
- "loss": 2.0926,
4686
- "step": 667
4687
- },
4688
- {
4689
- "epoch": 1.6468412942989215,
4690
- "grad_norm": 0.5612335801124573,
4691
- "learning_rate": 2.3700173310225303e-07,
4692
- "loss": 2.07,
4693
- "step": 668
4694
- },
4695
- {
4696
- "epoch": 1.649306625577812,
4697
- "grad_norm": 0.545322835445404,
4698
- "learning_rate": 2.365684575389948e-07,
4699
- "loss": 2.0675,
4700
- "step": 669
4701
- },
4702
- {
4703
- "epoch": 1.6517719568567026,
4704
- "grad_norm": 0.5538606643676758,
4705
- "learning_rate": 2.3613518197573657e-07,
4706
- "loss": 2.0667,
4707
- "step": 670
4708
- },
4709
- {
4710
- "epoch": 1.6542372881355933,
4711
- "grad_norm": 0.5166656374931335,
4712
- "learning_rate": 2.3570190641247834e-07,
4713
- "loss": 2.0332,
4714
- "step": 671
4715
- },
4716
- {
4717
- "epoch": 1.6567026194144838,
4718
- "grad_norm": 0.5593602061271667,
4719
- "learning_rate": 2.3526863084922011e-07,
4720
- "loss": 2.1419,
4721
- "step": 672
4722
- },
4723
- {
4724
- "epoch": 1.6591679506933743,
4725
- "grad_norm": 0.5563687086105347,
4726
- "learning_rate": 2.3483535528596186e-07,
4727
- "loss": 2.0222,
4728
- "step": 673
4729
- },
4730
- {
4731
- "epoch": 1.661633281972265,
4732
- "grad_norm": 0.5505043864250183,
4733
- "learning_rate": 2.3440207972270363e-07,
4734
- "loss": 2.0949,
4735
- "step": 674
4736
- },
4737
- {
4738
- "epoch": 1.6640986132511557,
4739
- "grad_norm": 0.49905431270599365,
4740
- "learning_rate": 2.339688041594454e-07,
4741
- "loss": 2.0702,
4742
- "step": 675
4743
- },
4744
- {
4745
- "epoch": 1.6665639445300462,
4746
- "grad_norm": 0.5658527612686157,
4747
- "learning_rate": 2.3353552859618717e-07,
4748
- "loss": 2.0925,
4749
- "step": 676
4750
- },
4751
- {
4752
- "epoch": 1.6690292758089367,
4753
- "grad_norm": 0.5240568518638611,
4754
- "learning_rate": 2.3310225303292894e-07,
4755
- "loss": 2.0719,
4756
- "step": 677
4757
- },
4758
- {
4759
- "epoch": 1.6714946070878274,
4760
- "grad_norm": 0.5115277767181396,
4761
- "learning_rate": 2.3266897746967072e-07,
4762
- "loss": 2.0976,
4763
- "step": 678
4764
- },
4765
- {
4766
- "epoch": 1.673959938366718,
4767
- "grad_norm": 0.5566647052764893,
4768
- "learning_rate": 2.322357019064125e-07,
4769
- "loss": 2.0161,
4770
- "step": 679
4771
- },
4772
- {
4773
- "epoch": 1.6764252696456086,
4774
- "grad_norm": 0.5448424816131592,
4775
- "learning_rate": 2.3180242634315423e-07,
4776
- "loss": 2.1174,
4777
- "step": 680
4778
- },
4779
- {
4780
- "epoch": 1.6788906009244993,
4781
- "grad_norm": 0.5539455413818359,
4782
- "learning_rate": 2.31369150779896e-07,
4783
- "loss": 2.0338,
4784
- "step": 681
4785
- },
4786
- {
4787
- "epoch": 1.68135593220339,
4788
- "grad_norm": 0.5492231845855713,
4789
- "learning_rate": 2.3093587521663778e-07,
4790
- "loss": 2.1217,
4791
- "step": 682
4792
- },
4793
- {
4794
- "epoch": 1.6838212634822805,
4795
- "grad_norm": 0.5186160206794739,
4796
- "learning_rate": 2.3050259965337955e-07,
4797
- "loss": 2.0538,
4798
- "step": 683
4799
- },
4800
- {
4801
- "epoch": 1.686286594761171,
4802
- "grad_norm": 0.5424460768699646,
4803
- "learning_rate": 2.3006932409012132e-07,
4804
- "loss": 2.0764,
4805
- "step": 684
4806
- },
4807
- {
4808
- "epoch": 1.6887519260400616,
4809
- "grad_norm": 0.5673890709877014,
4810
- "learning_rate": 2.296360485268631e-07,
4811
- "loss": 2.1029,
4812
- "step": 685
4813
- },
4814
- {
4815
- "epoch": 1.6912172573189523,
4816
- "grad_norm": 0.5408136248588562,
4817
- "learning_rate": 2.2920277296360486e-07,
4818
- "loss": 2.1321,
4819
- "step": 686
4820
- },
4821
- {
4822
- "epoch": 1.6936825885978428,
4823
- "grad_norm": 0.5323325991630554,
4824
- "learning_rate": 2.2876949740034663e-07,
4825
- "loss": 2.1003,
4826
- "step": 687
4827
- },
4828
- {
4829
- "epoch": 1.6961479198767333,
4830
- "grad_norm": 0.5330997109413147,
4831
- "learning_rate": 2.2833622183708838e-07,
4832
- "loss": 2.0747,
4833
- "step": 688
4834
- },
4835
- {
4836
- "epoch": 1.698613251155624,
4837
- "grad_norm": 0.5504052042961121,
4838
- "learning_rate": 2.2790294627383015e-07,
4839
- "loss": 2.1067,
4840
- "step": 689
4841
- },
4842
- {
4843
- "epoch": 1.7010785824345147,
4844
- "grad_norm": 0.5399577617645264,
4845
- "learning_rate": 2.2746967071057192e-07,
4846
- "loss": 2.0032,
4847
- "step": 690
4848
- },
4849
- {
4850
- "epoch": 1.7035439137134052,
4851
- "grad_norm": 0.5435107350349426,
4852
- "learning_rate": 2.270363951473137e-07,
4853
- "loss": 2.0389,
4854
- "step": 691
4855
- },
4856
- {
4857
- "epoch": 1.706009244992296,
4858
- "grad_norm": 0.5398039221763611,
4859
- "learning_rate": 2.2660311958405546e-07,
4860
- "loss": 2.0745,
4861
- "step": 692
4862
- },
4863
- {
4864
- "epoch": 1.7084745762711866,
4865
- "grad_norm": 0.5262869000434875,
4866
- "learning_rate": 2.2616984402079724e-07,
4867
- "loss": 2.0557,
4868
- "step": 693
4869
- },
4870
- {
4871
- "epoch": 1.710939907550077,
4872
- "grad_norm": 0.5872994661331177,
4873
- "learning_rate": 2.25736568457539e-07,
4874
- "loss": 2.1679,
4875
- "step": 694
4876
- },
4877
- {
4878
- "epoch": 1.7134052388289676,
4879
- "grad_norm": 0.526508092880249,
4880
- "learning_rate": 2.2530329289428073e-07,
4881
- "loss": 2.088,
4882
- "step": 695
4883
- },
4884
- {
4885
- "epoch": 1.7158705701078583,
4886
- "grad_norm": 0.5324400663375854,
4887
- "learning_rate": 2.248700173310225e-07,
4888
- "loss": 2.0343,
4889
- "step": 696
4890
- },
4891
- {
4892
- "epoch": 1.718335901386749,
4893
- "grad_norm": 0.5571790933609009,
4894
- "learning_rate": 2.2443674176776427e-07,
4895
- "loss": 2.1619,
4896
- "step": 697
4897
- },
4898
- {
4899
- "epoch": 1.7208012326656394,
4900
- "grad_norm": 0.5518919825553894,
4901
- "learning_rate": 2.2400346620450604e-07,
4902
- "loss": 2.1053,
4903
- "step": 698
4904
- },
4905
- {
4906
- "epoch": 1.72326656394453,
4907
- "grad_norm": 0.5263517498970032,
4908
- "learning_rate": 2.235701906412478e-07,
4909
- "loss": 2.1048,
4910
- "step": 699
4911
- },
4912
- {
4913
- "epoch": 1.7257318952234206,
4914
- "grad_norm": 0.5402522087097168,
4915
- "learning_rate": 2.2313691507798958e-07,
4916
- "loss": 2.1042,
4917
- "step": 700
4918
- },
4919
- {
4920
- "epoch": 1.7281972265023113,
4921
- "grad_norm": 0.527553141117096,
4922
- "learning_rate": 2.2270363951473135e-07,
4923
- "loss": 2.0133,
4924
- "step": 701
4925
- },
4926
- {
4927
- "epoch": 1.7306625577812018,
4928
- "grad_norm": 0.5525938868522644,
4929
- "learning_rate": 2.2227036395147313e-07,
4930
- "loss": 2.0717,
4931
- "step": 702
4932
- },
4933
- {
4934
- "epoch": 1.7331278890600923,
4935
- "grad_norm": 0.49240729212760925,
4936
- "learning_rate": 2.2183708838821487e-07,
4937
- "loss": 2.0334,
4938
- "step": 703
4939
- },
4940
- {
4941
- "epoch": 1.735593220338983,
4942
- "grad_norm": 0.5105845928192139,
4943
- "learning_rate": 2.2140381282495664e-07,
4944
- "loss": 2.0768,
4945
- "step": 704
4946
- },
4947
- {
4948
- "epoch": 1.7380585516178737,
4949
- "grad_norm": 0.5725539326667786,
4950
- "learning_rate": 2.2097053726169841e-07,
4951
- "loss": 2.0955,
4952
- "step": 705
4953
- },
4954
- {
4955
- "epoch": 1.7405238828967642,
4956
- "grad_norm": 0.5486234426498413,
4957
- "learning_rate": 2.2053726169844019e-07,
4958
- "loss": 2.0214,
4959
- "step": 706
4960
- },
4961
- {
4962
- "epoch": 1.7429892141756549,
4963
- "grad_norm": 0.5501362681388855,
4964
- "learning_rate": 2.2010398613518196e-07,
4965
- "loss": 2.1141,
4966
- "step": 707
4967
- },
4968
- {
4969
- "epoch": 1.7454545454545456,
4970
- "grad_norm": 0.543329656124115,
4971
- "learning_rate": 2.1967071057192373e-07,
4972
- "loss": 2.0873,
4973
- "step": 708
4974
- },
4975
- {
4976
- "epoch": 1.747919876733436,
4977
- "grad_norm": 0.5453404188156128,
4978
- "learning_rate": 2.192374350086655e-07,
4979
- "loss": 2.1165,
4980
- "step": 709
4981
- },
4982
- {
4983
- "epoch": 1.7503852080123266,
4984
- "grad_norm": 0.5702621936798096,
4985
- "learning_rate": 2.1880415944540727e-07,
4986
- "loss": 2.1072,
4987
- "step": 710
4988
- },
4989
- {
4990
- "epoch": 1.7528505392912173,
4991
- "grad_norm": 0.5359793305397034,
4992
- "learning_rate": 2.1837088388214902e-07,
4993
- "loss": 2.0051,
4994
- "step": 711
4995
- },
4996
- {
4997
- "epoch": 1.755315870570108,
4998
- "grad_norm": 0.5496872663497925,
4999
- "learning_rate": 2.179376083188908e-07,
5000
- "loss": 2.0871,
5001
- "step": 712
5002
- },
5003
- {
5004
- "epoch": 1.7577812018489984,
5005
- "grad_norm": 0.5678308606147766,
5006
- "learning_rate": 2.1750433275563256e-07,
5007
- "loss": 2.0548,
5008
- "step": 713
5009
- },
5010
- {
5011
- "epoch": 1.760246533127889,
5012
- "grad_norm": 0.546625554561615,
5013
- "learning_rate": 2.1707105719237433e-07,
5014
- "loss": 2.1175,
5015
- "step": 714
5016
- },
5017
- {
5018
- "epoch": 1.7627118644067796,
5019
- "grad_norm": 0.5344644784927368,
5020
- "learning_rate": 2.166377816291161e-07,
5021
- "loss": 2.0468,
5022
- "step": 715
5023
- },
5024
- {
5025
- "epoch": 1.7651771956856703,
5026
- "grad_norm": 0.5369846820831299,
5027
- "learning_rate": 2.1620450606585787e-07,
5028
- "loss": 2.0596,
5029
- "step": 716
5030
- },
5031
- {
5032
- "epoch": 1.7676425269645608,
5033
- "grad_norm": 0.5267930626869202,
5034
- "learning_rate": 2.1577123050259965e-07,
5035
- "loss": 2.0495,
5036
- "step": 717
5037
- },
5038
- {
5039
- "epoch": 1.7701078582434515,
5040
- "grad_norm": 0.5174152255058289,
5041
- "learning_rate": 2.153379549393414e-07,
5042
- "loss": 2.0451,
5043
- "step": 718
5044
- },
5045
- {
5046
- "epoch": 1.7725731895223422,
5047
- "grad_norm": 0.5533501505851746,
5048
- "learning_rate": 2.1490467937608316e-07,
5049
- "loss": 2.0884,
5050
- "step": 719
5051
- },
5052
- {
5053
- "epoch": 1.7750385208012327,
5054
- "grad_norm": 0.5463818907737732,
5055
- "learning_rate": 2.1447140381282493e-07,
5056
- "loss": 2.1242,
5057
- "step": 720
5058
- },
5059
- {
5060
- "epoch": 1.7775038520801232,
5061
- "grad_norm": 0.5650577545166016,
5062
- "learning_rate": 2.140381282495667e-07,
5063
- "loss": 2.0692,
5064
- "step": 721
5065
- },
5066
- {
5067
- "epoch": 1.7799691833590139,
5068
- "grad_norm": 0.55766361951828,
5069
- "learning_rate": 2.1360485268630848e-07,
5070
- "loss": 2.0454,
5071
- "step": 722
5072
- },
5073
- {
5074
- "epoch": 1.7824345146379046,
5075
- "grad_norm": 0.5192793607711792,
5076
- "learning_rate": 2.1317157712305025e-07,
5077
- "loss": 2.0684,
5078
- "step": 723
5079
- },
5080
- {
5081
- "epoch": 1.784899845916795,
5082
- "grad_norm": 0.5581333041191101,
5083
- "learning_rate": 2.1273830155979202e-07,
5084
- "loss": 2.0862,
5085
- "step": 724
5086
- },
5087
- {
5088
- "epoch": 1.7873651771956856,
5089
- "grad_norm": 0.5046393275260925,
5090
- "learning_rate": 2.123050259965338e-07,
5091
- "loss": 2.0813,
5092
- "step": 725
5093
- },
5094
- {
5095
- "epoch": 1.7898305084745763,
5096
- "grad_norm": 0.537695586681366,
5097
- "learning_rate": 2.1187175043327554e-07,
5098
- "loss": 2.0809,
5099
- "step": 726
5100
- },
5101
- {
5102
- "epoch": 1.792295839753467,
5103
- "grad_norm": 0.5130316615104675,
5104
- "learning_rate": 2.114384748700173e-07,
5105
- "loss": 2.0937,
5106
- "step": 727
5107
- },
5108
- {
5109
- "epoch": 1.7947611710323574,
5110
- "grad_norm": 0.535754382610321,
5111
- "learning_rate": 2.1100519930675908e-07,
5112
- "loss": 2.0515,
5113
- "step": 728
5114
- },
5115
- {
5116
- "epoch": 1.797226502311248,
5117
- "grad_norm": 0.5362329483032227,
5118
- "learning_rate": 2.1057192374350085e-07,
5119
- "loss": 2.0982,
5120
- "step": 729
5121
- },
5122
- {
5123
- "epoch": 1.7996918335901386,
5124
- "grad_norm": 0.4987218379974365,
5125
- "learning_rate": 2.1013864818024262e-07,
5126
- "loss": 1.9768,
5127
- "step": 730
5128
- },
5129
- {
5130
- "epoch": 1.8021571648690293,
5131
- "grad_norm": 0.5529645085334778,
5132
- "learning_rate": 2.097053726169844e-07,
5133
- "loss": 2.0765,
5134
- "step": 731
5135
- },
5136
- {
5137
- "epoch": 1.8046224961479198,
5138
- "grad_norm": 0.5982069373130798,
5139
- "learning_rate": 2.0927209705372617e-07,
5140
- "loss": 2.1271,
5141
- "step": 732
5142
- },
5143
- {
5144
- "epoch": 1.8070878274268105,
5145
- "grad_norm": 0.5242738723754883,
5146
- "learning_rate": 2.088388214904679e-07,
5147
- "loss": 2.0285,
5148
- "step": 733
5149
- },
5150
- {
5151
- "epoch": 1.8095531587057012,
5152
- "grad_norm": 0.522243857383728,
5153
- "learning_rate": 2.0840554592720968e-07,
5154
- "loss": 2.0701,
5155
- "step": 734
5156
- },
5157
- {
5158
- "epoch": 1.8120184899845917,
5159
- "grad_norm": 0.5549618601799011,
5160
- "learning_rate": 2.0797227036395145e-07,
5161
- "loss": 2.0853,
5162
- "step": 735
5163
- },
5164
- {
5165
- "epoch": 1.8144838212634822,
5166
- "grad_norm": 0.5378853678703308,
5167
- "learning_rate": 2.0753899480069323e-07,
5168
- "loss": 2.129,
5169
- "step": 736
5170
- },
5171
- {
5172
- "epoch": 1.8169491525423729,
5173
- "grad_norm": 0.5620313286781311,
5174
- "learning_rate": 2.07105719237435e-07,
5175
- "loss": 2.0955,
5176
- "step": 737
5177
- },
5178
- {
5179
- "epoch": 1.8194144838212636,
5180
- "grad_norm": 0.5175241231918335,
5181
- "learning_rate": 2.0667244367417677e-07,
5182
- "loss": 2.0136,
5183
- "step": 738
5184
- },
5185
- {
5186
- "epoch": 1.821879815100154,
5187
- "grad_norm": 0.5567770600318909,
5188
- "learning_rate": 2.0623916811091854e-07,
5189
- "loss": 2.0785,
5190
- "step": 739
5191
- },
5192
- {
5193
- "epoch": 1.8243451463790445,
5194
- "grad_norm": 0.5436593294143677,
5195
- "learning_rate": 2.058058925476603e-07,
5196
- "loss": 2.0805,
5197
- "step": 740
5198
- },
5199
- {
5200
- "epoch": 1.8268104776579352,
5201
- "grad_norm": 0.5625580549240112,
5202
- "learning_rate": 2.0537261698440206e-07,
5203
- "loss": 2.0666,
5204
- "step": 741
5205
- },
5206
- {
5207
- "epoch": 1.829275808936826,
5208
- "grad_norm": 0.5570465922355652,
5209
- "learning_rate": 2.0493934142114383e-07,
5210
- "loss": 2.0822,
5211
- "step": 742
5212
- },
5213
- {
5214
- "epoch": 1.8317411402157164,
5215
- "grad_norm": 0.5406922698020935,
5216
- "learning_rate": 2.045060658578856e-07,
5217
- "loss": 2.08,
5218
- "step": 743
5219
- },
5220
- {
5221
- "epoch": 1.8342064714946071,
5222
- "grad_norm": 0.5050183534622192,
5223
- "learning_rate": 2.0407279029462737e-07,
5224
- "loss": 2.0316,
5225
- "step": 744
5226
- },
5227
- {
5228
- "epoch": 1.8366718027734978,
5229
- "grad_norm": 0.5195122361183167,
5230
- "learning_rate": 2.0363951473136914e-07,
5231
- "loss": 2.0036,
5232
- "step": 745
5233
- },
5234
- {
5235
- "epoch": 1.8391371340523883,
5236
- "grad_norm": 0.5114386677742004,
5237
- "learning_rate": 2.0320623916811091e-07,
5238
- "loss": 2.0347,
5239
- "step": 746
5240
- },
5241
- {
5242
- "epoch": 1.8416024653312788,
5243
- "grad_norm": 0.556669294834137,
5244
- "learning_rate": 2.0277296360485269e-07,
5245
- "loss": 2.0139,
5246
- "step": 747
5247
- },
5248
- {
5249
- "epoch": 1.8440677966101695,
5250
- "grad_norm": 0.5159513354301453,
5251
- "learning_rate": 2.0233968804159446e-07,
5252
- "loss": 2.0301,
5253
- "step": 748
5254
- },
5255
- {
5256
- "epoch": 1.8465331278890602,
5257
- "grad_norm": 0.5236509442329407,
5258
- "learning_rate": 2.019064124783362e-07,
5259
- "loss": 1.995,
5260
- "step": 749
5261
- },
5262
- {
5263
- "epoch": 1.8489984591679507,
5264
- "grad_norm": 0.5232506990432739,
5265
- "learning_rate": 2.0147313691507797e-07,
5266
- "loss": 1.9718,
5267
- "step": 750
5268
- },
5269
- {
5270
- "epoch": 1.8514637904468412,
5271
- "grad_norm": 0.5529718399047852,
5272
- "learning_rate": 2.0103986135181974e-07,
5273
- "loss": 2.0509,
5274
- "step": 751
5275
- },
5276
- {
5277
- "epoch": 1.8539291217257319,
5278
- "grad_norm": 0.5564405918121338,
5279
- "learning_rate": 2.0060658578856152e-07,
5280
- "loss": 2.1508,
5281
- "step": 752
5282
- },
5283
- {
5284
- "epoch": 1.8563944530046226,
5285
- "grad_norm": 0.5209116339683533,
5286
- "learning_rate": 2.001733102253033e-07,
5287
- "loss": 2.0108,
5288
- "step": 753
5289
- },
5290
- {
5291
- "epoch": 1.858859784283513,
5292
- "grad_norm": 0.5505017638206482,
5293
- "learning_rate": 1.9974003466204506e-07,
5294
- "loss": 2.0589,
5295
- "step": 754
5296
- },
5297
- {
5298
- "epoch": 1.8613251155624035,
5299
- "grad_norm": 0.5664639472961426,
5300
- "learning_rate": 1.9930675909878683e-07,
5301
- "loss": 2.1018,
5302
- "step": 755
5303
- },
5304
- {
5305
- "epoch": 1.8637904468412942,
5306
- "grad_norm": 0.5276853442192078,
5307
- "learning_rate": 1.9887348353552858e-07,
5308
- "loss": 2.095,
5309
- "step": 756
5310
- },
5311
- {
5312
- "epoch": 1.866255778120185,
5313
- "grad_norm": 0.5520254373550415,
5314
- "learning_rate": 1.9844020797227035e-07,
5315
- "loss": 2.1249,
5316
- "step": 757
5317
- },
5318
- {
5319
- "epoch": 1.8687211093990754,
5320
- "grad_norm": 0.5317251086235046,
5321
- "learning_rate": 1.9800693240901212e-07,
5322
- "loss": 2.0812,
5323
- "step": 758
5324
- },
5325
- {
5326
- "epoch": 1.8711864406779661,
5327
- "grad_norm": 0.5425392985343933,
5328
- "learning_rate": 1.975736568457539e-07,
5329
- "loss": 2.0785,
5330
- "step": 759
5331
- },
5332
- {
5333
- "epoch": 1.8736517719568568,
5334
- "grad_norm": 0.5492266416549683,
5335
- "learning_rate": 1.9714038128249566e-07,
5336
- "loss": 2.0745,
5337
- "step": 760
5338
- },
5339
- {
5340
- "epoch": 1.8761171032357473,
5341
- "grad_norm": 0.5728384256362915,
5342
- "learning_rate": 1.9670710571923743e-07,
5343
- "loss": 2.0847,
5344
- "step": 761
5345
- },
5346
- {
5347
- "epoch": 1.8785824345146378,
5348
- "grad_norm": 0.5570703744888306,
5349
- "learning_rate": 1.962738301559792e-07,
5350
- "loss": 2.0432,
5351
- "step": 762
5352
- },
5353
- {
5354
- "epoch": 1.8810477657935285,
5355
- "grad_norm": 0.5224375128746033,
5356
- "learning_rate": 1.9584055459272098e-07,
5357
- "loss": 2.0734,
5358
- "step": 763
5359
- },
5360
- {
5361
- "epoch": 1.8835130970724192,
5362
- "grad_norm": 0.5276843905448914,
5363
- "learning_rate": 1.9540727902946272e-07,
5364
- "loss": 2.0405,
5365
- "step": 764
5366
- },
5367
- {
5368
- "epoch": 1.8859784283513097,
5369
- "grad_norm": 0.521706223487854,
5370
- "learning_rate": 1.949740034662045e-07,
5371
- "loss": 2.0807,
5372
- "step": 765
5373
- },
5374
- {
5375
- "epoch": 1.8884437596302002,
5376
- "grad_norm": 0.5321924090385437,
5377
- "learning_rate": 1.9454072790294626e-07,
5378
- "loss": 1.9975,
5379
- "step": 766
5380
- },
5381
- {
5382
- "epoch": 1.8909090909090909,
5383
- "grad_norm": 0.5504716038703918,
5384
- "learning_rate": 1.9410745233968804e-07,
5385
- "loss": 2.1148,
5386
- "step": 767
5387
- },
5388
- {
5389
- "epoch": 1.8933744221879816,
5390
- "grad_norm": 0.5735941529273987,
5391
- "learning_rate": 1.936741767764298e-07,
5392
- "loss": 2.0855,
5393
- "step": 768
5394
- },
5395
- {
5396
- "epoch": 1.895839753466872,
5397
- "grad_norm": 0.5594768524169922,
5398
- "learning_rate": 1.9324090121317158e-07,
5399
- "loss": 2.0675,
5400
- "step": 769
5401
- },
5402
- {
5403
- "epoch": 1.8983050847457628,
5404
- "grad_norm": 0.5534265041351318,
5405
- "learning_rate": 1.9280762564991335e-07,
5406
- "loss": 2.059,
5407
- "step": 770
5408
- },
5409
- {
5410
- "epoch": 1.9007704160246535,
5411
- "grad_norm": 0.5681132674217224,
5412
- "learning_rate": 1.923743500866551e-07,
5413
- "loss": 2.1262,
5414
- "step": 771
5415
- },
5416
- {
5417
- "epoch": 1.903235747303544,
5418
- "grad_norm": 0.52858567237854,
5419
- "learning_rate": 1.9194107452339687e-07,
5420
- "loss": 2.1065,
5421
- "step": 772
5422
- },
5423
- {
5424
- "epoch": 1.9057010785824344,
5425
- "grad_norm": 0.5450841784477234,
5426
- "learning_rate": 1.9150779896013864e-07,
5427
- "loss": 2.0728,
5428
- "step": 773
5429
- },
5430
- {
5431
- "epoch": 1.9081664098613251,
5432
- "grad_norm": 0.558519184589386,
5433
- "learning_rate": 1.910745233968804e-07,
5434
- "loss": 2.0356,
5435
- "step": 774
5436
- },
5437
- {
5438
- "epoch": 1.9106317411402158,
5439
- "grad_norm": 0.5373818874359131,
5440
- "learning_rate": 1.9064124783362218e-07,
5441
- "loss": 2.1255,
5442
- "step": 775
5443
- },
5444
- {
5445
- "epoch": 1.9130970724191063,
5446
- "grad_norm": 0.5443577170372009,
5447
- "learning_rate": 1.9020797227036395e-07,
5448
- "loss": 2.0232,
5449
- "step": 776
5450
- },
5451
- {
5452
- "epoch": 1.9155624036979968,
5453
- "grad_norm": 0.5216531157493591,
5454
- "learning_rate": 1.8977469670710572e-07,
5455
- "loss": 2.091,
5456
- "step": 777
5457
- },
5458
- {
5459
- "epoch": 1.9180277349768875,
5460
- "grad_norm": 0.5133727788925171,
5461
- "learning_rate": 1.893414211438475e-07,
5462
- "loss": 2.0208,
5463
- "step": 778
5464
- },
5465
- {
5466
- "epoch": 1.9204930662557782,
5467
- "grad_norm": 0.5339122414588928,
5468
- "learning_rate": 1.8890814558058924e-07,
5469
- "loss": 2.085,
5470
- "step": 779
5471
- },
5472
- {
5473
- "epoch": 1.9229583975346687,
5474
- "grad_norm": 0.529255211353302,
5475
- "learning_rate": 1.88474870017331e-07,
5476
- "loss": 2.0794,
5477
- "step": 780
5478
- },
5479
- {
5480
- "epoch": 1.9254237288135592,
5481
- "grad_norm": 0.54831463098526,
5482
- "learning_rate": 1.8804159445407278e-07,
5483
- "loss": 2.1017,
5484
- "step": 781
5485
- },
5486
- {
5487
- "epoch": 1.9278890600924499,
5488
- "grad_norm": 0.5309552550315857,
5489
- "learning_rate": 1.8760831889081456e-07,
5490
- "loss": 2.1126,
5491
- "step": 782
5492
- },
5493
- {
5494
- "epoch": 1.9303543913713406,
5495
- "grad_norm": 0.5368801355361938,
5496
- "learning_rate": 1.8717504332755633e-07,
5497
- "loss": 2.0153,
5498
- "step": 783
5499
- },
5500
- {
5501
- "epoch": 1.932819722650231,
5502
- "grad_norm": 0.5303763747215271,
5503
- "learning_rate": 1.867417677642981e-07,
5504
- "loss": 2.0798,
5505
- "step": 784
5506
- },
5507
- {
5508
- "epoch": 1.9352850539291218,
5509
- "grad_norm": 0.5320292115211487,
5510
- "learning_rate": 1.8630849220103987e-07,
5511
- "loss": 2.0749,
5512
- "step": 785
5513
- },
5514
- {
5515
- "epoch": 1.9377503852080125,
5516
- "grad_norm": 0.5434033274650574,
5517
- "learning_rate": 1.8587521663778164e-07,
5518
- "loss": 1.9961,
5519
- "step": 786
5520
- },
5521
- {
5522
- "epoch": 1.940215716486903,
5523
- "grad_norm": 0.5612481236457825,
5524
- "learning_rate": 1.8544194107452339e-07,
5525
- "loss": 2.0702,
5526
- "step": 787
5527
- },
5528
- {
5529
- "epoch": 1.9426810477657934,
5530
- "grad_norm": 0.5725658535957336,
5531
- "learning_rate": 1.8500866551126516e-07,
5532
- "loss": 2.106,
5533
- "step": 788
5534
- },
5535
- {
5536
- "epoch": 1.9451463790446841,
5537
- "grad_norm": 0.5459932088851929,
5538
- "learning_rate": 1.8457538994800693e-07,
5539
- "loss": 2.1029,
5540
- "step": 789
5541
- },
5542
- {
5543
- "epoch": 1.9476117103235748,
5544
- "grad_norm": 0.5336212515830994,
5545
- "learning_rate": 1.841421143847487e-07,
5546
- "loss": 2.1044,
5547
- "step": 790
5548
- },
5549
- {
5550
- "epoch": 1.9500770416024653,
5551
- "grad_norm": 0.5421571135520935,
5552
- "learning_rate": 1.8370883882149047e-07,
5553
- "loss": 2.111,
5554
- "step": 791
5555
- },
5556
- {
5557
- "epoch": 1.9525423728813558,
5558
- "grad_norm": 0.5272592306137085,
5559
- "learning_rate": 1.8327556325823224e-07,
5560
- "loss": 2.0597,
5561
- "step": 792
5562
- },
5563
- {
5564
- "epoch": 1.9550077041602465,
5565
- "grad_norm": 0.5724270939826965,
5566
- "learning_rate": 1.8284228769497402e-07,
5567
- "loss": 2.1377,
5568
- "step": 793
5569
- },
5570
- {
5571
- "epoch": 1.9574730354391372,
5572
- "grad_norm": 0.544668972492218,
5573
- "learning_rate": 1.8240901213171576e-07,
5574
- "loss": 2.0321,
5575
- "step": 794
5576
- },
5577
- {
5578
- "epoch": 1.9599383667180277,
5579
- "grad_norm": 0.5739486217498779,
5580
- "learning_rate": 1.8197573656845753e-07,
5581
- "loss": 2.1109,
5582
- "step": 795
5583
- },
5584
- {
5585
- "epoch": 1.9624036979969184,
5586
- "grad_norm": 0.5566874742507935,
5587
- "learning_rate": 1.815424610051993e-07,
5588
- "loss": 2.0582,
5589
- "step": 796
5590
- },
5591
- {
5592
- "epoch": 1.964869029275809,
5593
- "grad_norm": 0.5381664633750916,
5594
- "learning_rate": 1.8110918544194107e-07,
5595
- "loss": 2.0734,
5596
- "step": 797
5597
- },
5598
- {
5599
- "epoch": 1.9673343605546996,
5600
- "grad_norm": 0.5530892014503479,
5601
- "learning_rate": 1.8067590987868285e-07,
5602
- "loss": 2.0903,
5603
- "step": 798
5604
- },
5605
- {
5606
- "epoch": 1.96979969183359,
5607
- "grad_norm": 0.5485751628875732,
5608
- "learning_rate": 1.8024263431542462e-07,
5609
- "loss": 2.0376,
5610
- "step": 799
5611
- },
5612
- {
5613
- "epoch": 1.9722650231124808,
5614
- "grad_norm": 0.5459527373313904,
5615
- "learning_rate": 1.798093587521664e-07,
5616
- "loss": 2.0328,
5617
- "step": 800
5618
- },
5619
- {
5620
- "epoch": 1.9747303543913715,
5621
- "grad_norm": 0.5666062235832214,
5622
- "learning_rate": 1.7937608318890816e-07,
5623
- "loss": 2.0966,
5624
- "step": 801
5625
- },
5626
- {
5627
- "epoch": 1.977195685670262,
5628
- "grad_norm": 0.5256234407424927,
5629
- "learning_rate": 1.789428076256499e-07,
5630
- "loss": 2.0458,
5631
- "step": 802
5632
- },
5633
- {
5634
- "epoch": 1.9796610169491524,
5635
- "grad_norm": 0.567815899848938,
5636
- "learning_rate": 1.7850953206239168e-07,
5637
- "loss": 2.0793,
5638
- "step": 803
5639
- },
5640
- {
5641
- "epoch": 1.9821263482280431,
5642
- "grad_norm": 0.5290260910987854,
5643
- "learning_rate": 1.7807625649913345e-07,
5644
- "loss": 2.0759,
5645
- "step": 804
5646
- },
5647
- {
5648
- "epoch": 1.9845916795069338,
5649
- "grad_norm": 0.5459809303283691,
5650
- "learning_rate": 1.7764298093587522e-07,
5651
- "loss": 2.0634,
5652
- "step": 805
5653
- },
5654
- {
5655
- "epoch": 1.9870570107858243,
5656
- "grad_norm": 0.5483869910240173,
5657
- "learning_rate": 1.77209705372617e-07,
5658
- "loss": 2.1077,
5659
- "step": 806
5660
- },
5661
- {
5662
- "epoch": 1.9895223420647148,
5663
- "grad_norm": 0.5340948700904846,
5664
- "learning_rate": 1.7677642980935876e-07,
5665
- "loss": 2.0584,
5666
- "step": 807
5667
- },
5668
- {
5669
- "epoch": 1.9919876733436055,
5670
- "grad_norm": 0.5514506101608276,
5671
- "learning_rate": 1.7634315424610053e-07,
5672
- "loss": 2.1299,
5673
- "step": 808
5674
- },
5675
- {
5676
- "epoch": 1.9944530046224962,
5677
- "grad_norm": 0.5255659222602844,
5678
- "learning_rate": 1.7590987868284225e-07,
5679
- "loss": 2.0081,
5680
- "step": 809
5681
- },
5682
- {
5683
- "epoch": 1.9969183359013867,
5684
- "grad_norm": 0.5237001776695251,
5685
- "learning_rate": 1.7547660311958402e-07,
5686
- "loss": 1.9692,
5687
- "step": 810
5688
- },
5689
- {
5690
- "epoch": 1.9993836671802774,
5691
- "grad_norm": 0.51847243309021,
5692
- "learning_rate": 1.750433275563258e-07,
5693
- "loss": 2.0306,
5694
- "step": 811
5695
- },
5696
- {
5697
- "epoch": 1.9993836671802774,
5698
- "eval_loss": 2.072087287902832,
5699
- "eval_runtime": 399.1878,
5700
- "eval_samples_per_second": 1.233,
5701
- "eval_steps_per_second": 0.308,
5702
- "step": 811
5703
  }
5704
  ],
5705
  "logging_steps": 1,
@@ -5719,7 +2869,7 @@
5719
  "attributes": {}
5720
  }
5721
  },
5722
- "total_flos": 2.3947626026442424e+18,
5723
  "train_batch_size": 2,
5724
  "trial_name": null,
5725
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.093395709991455,
3
+ "best_model_checkpoint": "/home/sunggeunan/data/ICL/outputs/lora/SKIML-ICL_mrqa_nq_v3/Meta-Llama-3-8B-Instruct-unanswerable-3Q-0U-0C-qa_first/checkpoint-405",
4
+ "epoch": 0.9984591679506933,
5
  "eval_steps": 500,
6
+ "global_step": 405,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2850
  "eval_samples_per_second": 1.234,
2851
  "eval_steps_per_second": 0.308,
2852
  "step": 405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2853
  }
2854
  ],
2855
  "logging_steps": 1,
 
2869
  "attributes": {}
2870
  }
2871
  },
2872
+ "total_flos": 1.1973813013221212e+18,
2873
  "train_batch_size": 2,
2874
  "trial_name": null,
2875
  "trial_params": null