Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

data/model.safetensors +1 -1
data/optimizer.pt +1 -1
data/rng_state.pth +1 -1
data/scheduler.pt +1 -1
data/trainer_state.json +703 -3
data/training_args.bin +1 -1

data/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63b42d44a888b25f5d24a43face1625057aff889c9dc9de32c96325199c72b34
 size 576008736

 version https://git-lfs.github.com/spec/v1
+oid sha256:502786fd8a726e2156aa16f8cdb0a508eaeff2e6b7935f2a126e5c7fb3fe4875
 size 576008736

data/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50251e5d562ef4b406f69bd0d31673eac817ca46891615696895b53477f2aff7
 size 1152256984

 version https://git-lfs.github.com/spec/v1
+oid sha256:31920983e209ab4ab448d37797fc73ec6f5a2c677beb1998c0c67cde4d40d85a
 size 1152256984

data/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:239b223316f60f2db39edea650469bb96cfcc95853eb15452bbda5602d3d72c9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c31ec632e18c6039ab923941c5b9a34579988b2eae98f203c7491579abcdc560
 size 14244

data/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45a89c93ff6af951697635fce257c751fcc77d62d48d97d624de2afefbf7f1db
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc76b89758f0a3c24f4601172ddc998440db62e30dfdfe43a93a78b9b974a013
 size 1064

data/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.026289243999236638,
   "eval_steps": 500,
-  "global_step": 13500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6307,6 +6307,706 @@
       "learning_rate": 2.9948870643768915e-05,
       "loss": 1.7891,
       "step": 13500
     }
   ],
   "logging_steps": 15,
@@ -6326,7 +7026,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.14551425751552e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.02921027111026293,
   "eval_steps": 500,
+  "global_step": 15000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.9948870643768915e-05,
       "loss": 1.7891,
       "step": 13500
+    },
+    {
+      "epoch": 0.026318454270346903,
+      "grad_norm": 5.705179214477539,
+      "learning_rate": 2.9948757024626645e-05,
+      "loss": 1.8502,
+      "step": 13515
+    },
+    {
+      "epoch": 0.026347664541457164,
+      "grad_norm": 4.427610397338867,
+      "learning_rate": 2.994864327959879e-05,
+      "loss": 1.746,
+      "step": 13530
+    },
+    {
+      "epoch": 0.026376874812567428,
+      "grad_norm": 2.9682793617248535,
+      "learning_rate": 2.994852940868631e-05,
+      "loss": 1.8766,
+      "step": 13545
+    },
+    {
+      "epoch": 0.02640608508367769,
+      "grad_norm": 2.8406543731689453,
+      "learning_rate": 2.9948415411890164e-05,
+      "loss": 1.8637,
+      "step": 13560
+    },
+    {
+      "epoch": 0.026435295354787954,
+      "grad_norm": 2.9661149978637695,
+      "learning_rate": 2.9948301289211308e-05,
+      "loss": 1.7703,
+      "step": 13575
+    },
+    {
+      "epoch": 0.026464505625898214,
+      "grad_norm": 2.961155652999878,
+      "learning_rate": 2.99481870406507e-05,
+      "loss": 1.8123,
+      "step": 13590
+    },
+    {
+      "epoch": 0.02649371589700848,
+      "grad_norm": 3.7241668701171875,
+      "learning_rate": 2.9948072666209308e-05,
+      "loss": 2.018,
+      "step": 13605
+    },
+    {
+      "epoch": 0.026522926168118743,
+      "grad_norm": 2.8102498054504395,
+      "learning_rate": 2.9947958165888096e-05,
+      "loss": 1.8577,
+      "step": 13620
+    },
+    {
+      "epoch": 0.026552136439229004,
+      "grad_norm": 2.061007022857666,
+      "learning_rate": 2.9947843539688027e-05,
+      "loss": 1.9684,
+      "step": 13635
+    },
+    {
+      "epoch": 0.02658134671033927,
+      "grad_norm": 4.699859619140625,
+      "learning_rate": 2.994772878761006e-05,
+      "loss": 1.9438,
+      "step": 13650
+    },
+    {
+      "epoch": 0.02661055698144953,
+      "grad_norm": 5.8805952072143555,
+      "learning_rate": 2.994761390965517e-05,
+      "loss": 1.8862,
+      "step": 13665
+    },
+    {
+      "epoch": 0.026639767252559794,
+      "grad_norm": 3.6178531646728516,
+      "learning_rate": 2.994749890582432e-05,
+      "loss": 1.9754,
+      "step": 13680
+    },
+    {
+      "epoch": 0.026668977523670055,
+      "grad_norm": 2.891448497772217,
+      "learning_rate": 2.9947383776118482e-05,
+      "loss": 1.8838,
+      "step": 13695
+    },
+    {
+      "epoch": 0.02669818779478032,
+      "grad_norm": 2.5380797386169434,
+      "learning_rate": 2.994726852053862e-05,
+      "loss": 2.0006,
+      "step": 13710
+    },
+    {
+      "epoch": 0.02672739806589058,
+      "grad_norm": 3.083801031112671,
+      "learning_rate": 2.994715313908571e-05,
+      "loss": 1.9287,
+      "step": 13725
+    },
+    {
+      "epoch": 0.026756608337000845,
+      "grad_norm": 3.9220306873321533,
+      "learning_rate": 2.9947037631760717e-05,
+      "loss": 2.0063,
+      "step": 13740
+    },
+    {
+      "epoch": 0.02678581860811111,
+      "grad_norm": 2.41329288482666,
+      "learning_rate": 2.994692199856462e-05,
+      "loss": 1.7779,
+      "step": 13755
+    },
+    {
+      "epoch": 0.02681502887922137,
+      "grad_norm": 3.137281656265259,
+      "learning_rate": 2.9946806239498392e-05,
+      "loss": 1.7686,
+      "step": 13770
+    },
+    {
+      "epoch": 0.026844239150331635,
+      "grad_norm": 3.8897507190704346,
+      "learning_rate": 2.994669035456301e-05,
+      "loss": 1.9879,
+      "step": 13785
+    },
+    {
+      "epoch": 0.026873449421441896,
+      "grad_norm": 2.888145685195923,
+      "learning_rate": 2.994657434375944e-05,
+      "loss": 2.0012,
+      "step": 13800
+    },
+    {
+      "epoch": 0.02690265969255216,
+      "grad_norm": 2.683145523071289,
+      "learning_rate": 2.9946458207088667e-05,
+      "loss": 1.8579,
+      "step": 13815
+    },
+    {
+      "epoch": 0.02693186996366242,
+      "grad_norm": 2.5023186206817627,
+      "learning_rate": 2.9946341944551668e-05,
+      "loss": 1.8899,
+      "step": 13830
+    },
+    {
+      "epoch": 0.026961080234772686,
+      "grad_norm": 4.522122383117676,
+      "learning_rate": 2.994622555614942e-05,
+      "loss": 1.8373,
+      "step": 13845
+    },
+    {
+      "epoch": 0.02699029050588295,
+      "grad_norm": 1.9197810888290405,
+      "learning_rate": 2.9946109041882902e-05,
+      "loss": 1.874,
+      "step": 13860
+    },
+    {
+      "epoch": 0.02701950077699321,
+      "grad_norm": 2.0907135009765625,
+      "learning_rate": 2.9945992401753103e-05,
+      "loss": 1.9878,
+      "step": 13875
+    },
+    {
+      "epoch": 0.027048711048103476,
+      "grad_norm": 3.0691592693328857,
+      "learning_rate": 2.9945875635761e-05,
+      "loss": 1.8859,
+      "step": 13890
+    },
+    {
+      "epoch": 0.027077921319213737,
+      "grad_norm": 2.0707552433013916,
+      "learning_rate": 2.9945758743907573e-05,
+      "loss": 1.7612,
+      "step": 13905
+    },
+    {
+      "epoch": 0.027107131590324,
+      "grad_norm": 2.2770462036132812,
+      "learning_rate": 2.994564172619381e-05,
+      "loss": 1.8028,
+      "step": 13920
+    },
+    {
+      "epoch": 0.027136341861434262,
+      "grad_norm": 2.681814193725586,
+      "learning_rate": 2.9945524582620695e-05,
+      "loss": 1.7967,
+      "step": 13935
+    },
+    {
+      "epoch": 0.027165552132544526,
+      "grad_norm": 3.0529186725616455,
+      "learning_rate": 2.994540731318922e-05,
+      "loss": 1.7972,
+      "step": 13950
+    },
+    {
+      "epoch": 0.027194762403654787,
+      "grad_norm": 3.369091033935547,
+      "learning_rate": 2.9945289917900368e-05,
+      "loss": 1.8092,
+      "step": 13965
+    },
+    {
+      "epoch": 0.027223972674765052,
+      "grad_norm": 2.190134048461914,
+      "learning_rate": 2.9945172396755124e-05,
+      "loss": 2.0228,
+      "step": 13980
+    },
+    {
+      "epoch": 0.027253182945875316,
+      "grad_norm": 2.805100202560425,
+      "learning_rate": 2.9945054749754483e-05,
+      "loss": 1.9312,
+      "step": 13995
+    },
+    {
+      "epoch": 0.027282393216985577,
+      "grad_norm": 2.195697546005249,
+      "learning_rate": 2.9944936976899433e-05,
+      "loss": 1.9791,
+      "step": 14010
+    },
+    {
+      "epoch": 0.027311603488095842,
+      "grad_norm": 1.723713755607605,
+      "learning_rate": 2.9944819078190967e-05,
+      "loss": 1.8542,
+      "step": 14025
+    },
+    {
+      "epoch": 0.027340813759206103,
+      "grad_norm": 2.633101463317871,
+      "learning_rate": 2.9944701053630075e-05,
+      "loss": 1.8127,
+      "step": 14040
+    },
+    {
+      "epoch": 0.027370024030316367,
+      "grad_norm": 1.9390171766281128,
+      "learning_rate": 2.9944582903217756e-05,
+      "loss": 1.9183,
+      "step": 14055
+    },
+    {
+      "epoch": 0.027399234301426628,
+      "grad_norm": 3.9491968154907227,
+      "learning_rate": 2.9944464626955003e-05,
+      "loss": 2.0849,
+      "step": 14070
+    },
+    {
+      "epoch": 0.027428444572536893,
+      "grad_norm": 2.4679179191589355,
+      "learning_rate": 2.9944346224842812e-05,
+      "loss": 1.9285,
+      "step": 14085
+    },
+    {
+      "epoch": 0.027457654843647157,
+      "grad_norm": 2.999509334564209,
+      "learning_rate": 2.994422769688218e-05,
+      "loss": 1.9523,
+      "step": 14100
+    },
+    {
+      "epoch": 0.027486865114757418,
+      "grad_norm": 3.8798091411590576,
+      "learning_rate": 2.9944109043074104e-05,
+      "loss": 1.9014,
+      "step": 14115
+    },
+    {
+      "epoch": 0.027516075385867683,
+      "grad_norm": 2.5288240909576416,
+      "learning_rate": 2.9943990263419582e-05,
+      "loss": 2.1135,
+      "step": 14130
+    },
+    {
+      "epoch": 0.027545285656977943,
+      "grad_norm": 2.2120304107666016,
+      "learning_rate": 2.994387135791962e-05,
+      "loss": 1.7418,
+      "step": 14145
+    },
+    {
+      "epoch": 0.027574495928088208,
+      "grad_norm": 2.805328607559204,
+      "learning_rate": 2.994375232657521e-05,
+      "loss": 1.8776,
+      "step": 14160
+    },
+    {
+      "epoch": 0.02760370619919847,
+      "grad_norm": 2.8841097354888916,
+      "learning_rate": 2.9943633169387365e-05,
+      "loss": 1.9106,
+      "step": 14175
+    },
+    {
+      "epoch": 0.027632916470308733,
+      "grad_norm": 1.8887025117874146,
+      "learning_rate": 2.994351388635708e-05,
+      "loss": 1.8916,
+      "step": 14190
+    },
+    {
+      "epoch": 0.027662126741418994,
+      "grad_norm": 2.8623757362365723,
+      "learning_rate": 2.9943394477485363e-05,
+      "loss": 1.8735,
+      "step": 14205
+    },
+    {
+      "epoch": 0.02769133701252926,
+      "grad_norm": 3.1046249866485596,
+      "learning_rate": 2.994327494277322e-05,
+      "loss": 1.9124,
+      "step": 14220
+    },
+    {
+      "epoch": 0.027720547283639523,
+      "grad_norm": 2.653933525085449,
+      "learning_rate": 2.9943155282221663e-05,
+      "loss": 1.9387,
+      "step": 14235
+    },
+    {
+      "epoch": 0.027749757554749784,
+      "grad_norm": 2.975820779800415,
+      "learning_rate": 2.9943035495831688e-05,
+      "loss": 1.8232,
+      "step": 14250
+    },
+    {
+      "epoch": 0.02777896782586005,
+      "grad_norm": 5.906015396118164,
+      "learning_rate": 2.9942915583604307e-05,
+      "loss": 1.9167,
+      "step": 14265
+    },
+    {
+      "epoch": 0.02780817809697031,
+      "grad_norm": 2.592456102371216,
+      "learning_rate": 2.994279554554054e-05,
+      "loss": 1.7433,
+      "step": 14280
+    },
+    {
+      "epoch": 0.027837388368080574,
+      "grad_norm": 5.042680263519287,
+      "learning_rate": 2.994267538164138e-05,
+      "loss": 1.7878,
+      "step": 14295
+    },
+    {
+      "epoch": 0.027866598639190835,
+      "grad_norm": 4.092184066772461,
+      "learning_rate": 2.9942555091907853e-05,
+      "loss": 1.6955,
+      "step": 14310
+    },
+    {
+      "epoch": 0.0278958089103011,
+      "grad_norm": 4.623755931854248,
+      "learning_rate": 2.994243467634097e-05,
+      "loss": 1.7866,
+      "step": 14325
+    },
+    {
+      "epoch": 0.027925019181411364,
+      "grad_norm": 3.042306661605835,
+      "learning_rate": 2.994231413494174e-05,
+      "loss": 1.8891,
+      "step": 14340
+    },
+    {
+      "epoch": 0.027954229452521625,
+      "grad_norm": 2.784275531768799,
+      "learning_rate": 2.9942193467711184e-05,
+      "loss": 2.0112,
+      "step": 14355
+    },
+    {
+      "epoch": 0.02798343972363189,
+      "grad_norm": 1.9308695793151855,
+      "learning_rate": 2.9942072674650317e-05,
+      "loss": 1.7964,
+      "step": 14370
+    },
+    {
+      "epoch": 0.02801264999474215,
+      "grad_norm": 3.7377004623413086,
+      "learning_rate": 2.994195175576015e-05,
+      "loss": 1.8661,
+      "step": 14385
+    },
+    {
+      "epoch": 0.028041860265852415,
+      "grad_norm": 2.484870195388794,
+      "learning_rate": 2.994183071104171e-05,
+      "loss": 1.7358,
+      "step": 14400
+    },
+    {
+      "epoch": 0.028071070536962676,
+      "grad_norm": 2.6344974040985107,
+      "learning_rate": 2.9941709540496013e-05,
+      "loss": 1.9183,
+      "step": 14415
+    },
+    {
+      "epoch": 0.02810028080807294,
+      "grad_norm": 2.168701410293579,
+      "learning_rate": 2.9941588244124072e-05,
+      "loss": 1.999,
+      "step": 14430
+    },
+    {
+      "epoch": 0.028129491079183205,
+      "grad_norm": 2.986727476119995,
+      "learning_rate": 2.994146682192692e-05,
+      "loss": 1.8344,
+      "step": 14445
+    },
+    {
+      "epoch": 0.028158701350293466,
+      "grad_norm": 3.3715713024139404,
+      "learning_rate": 2.9941345273905573e-05,
+      "loss": 2.0468,
+      "step": 14460
+    },
+    {
+      "epoch": 0.02818791162140373,
+      "grad_norm": 2.2077038288116455,
+      "learning_rate": 2.9941223600061054e-05,
+      "loss": 2.0255,
+      "step": 14475
+    },
+    {
+      "epoch": 0.02821712189251399,
+      "grad_norm": 3.4651224613189697,
+      "learning_rate": 2.994110180039439e-05,
+      "loss": 1.8604,
+      "step": 14490
+    },
+    {
+      "epoch": 0.028246332163624256,
+      "grad_norm": 2.0584287643432617,
+      "learning_rate": 2.994097987490661e-05,
+      "loss": 2.0636,
+      "step": 14505
+    },
+    {
+      "epoch": 0.028275542434734516,
+      "grad_norm": 3.1285014152526855,
+      "learning_rate": 2.9940857823598736e-05,
+      "loss": 1.7656,
+      "step": 14520
+    },
+    {
+      "epoch": 0.02830475270584478,
+      "grad_norm": 2.541280746459961,
+      "learning_rate": 2.9940735646471793e-05,
+      "loss": 1.8682,
+      "step": 14535
+    },
+    {
+      "epoch": 0.028333962976955042,
+      "grad_norm": 2.4748847484588623,
+      "learning_rate": 2.9940613343526817e-05,
+      "loss": 2.0047,
+      "step": 14550
+    },
+    {
+      "epoch": 0.028363173248065306,
+      "grad_norm": 3.2105560302734375,
+      "learning_rate": 2.9940490914764834e-05,
+      "loss": 1.7709,
+      "step": 14565
+    },
+    {
+      "epoch": 0.02839238351917557,
+      "grad_norm": 3.491591215133667,
+      "learning_rate": 2.9940368360186878e-05,
+      "loss": 1.786,
+      "step": 14580
+    },
+    {
+      "epoch": 0.028421593790285832,
+      "grad_norm": 3.315342903137207,
+      "learning_rate": 2.9940245679793978e-05,
+      "loss": 1.917,
+      "step": 14595
+    },
+    {
+      "epoch": 0.028450804061396096,
+      "grad_norm": 1.7594997882843018,
+      "learning_rate": 2.9940122873587164e-05,
+      "loss": 1.877,
+      "step": 14610
+    },
+    {
+      "epoch": 0.028480014332506357,
+      "grad_norm": 2.442725896835327,
+      "learning_rate": 2.9939999941567474e-05,
+      "loss": 1.9577,
+      "step": 14625
+    },
+    {
+      "epoch": 0.02850922460361662,
+      "grad_norm": 3.146977663040161,
+      "learning_rate": 2.993987688373595e-05,
+      "loss": 1.8722,
+      "step": 14640
+    },
+    {
+      "epoch": 0.028538434874726883,
+      "grad_norm": 2.6678929328918457,
+      "learning_rate": 2.9939753700093618e-05,
+      "loss": 1.659,
+      "step": 14655
+    },
+    {
+      "epoch": 0.028567645145837147,
+      "grad_norm": 2.4769906997680664,
+      "learning_rate": 2.9939630390641518e-05,
+      "loss": 1.8257,
+      "step": 14670
+    },
+    {
+      "epoch": 0.02859685541694741,
+      "grad_norm": 2.9314770698547363,
+      "learning_rate": 2.993950695538069e-05,
+      "loss": 1.8004,
+      "step": 14685
+    },
+    {
+      "epoch": 0.028626065688057672,
+      "grad_norm": 3.2279980182647705,
+      "learning_rate": 2.993938339431217e-05,
+      "loss": 1.9438,
+      "step": 14700
+    },
+    {
+      "epoch": 0.028655275959167937,
+      "grad_norm": 2.8929495811462402,
+      "learning_rate": 2.9939259707437002e-05,
+      "loss": 1.7995,
+      "step": 14715
+    },
+    {
+      "epoch": 0.028684486230278198,
+      "grad_norm": 4.861998558044434,
+      "learning_rate": 2.9939135894756232e-05,
+      "loss": 1.8188,
+      "step": 14730
+    },
+    {
+      "epoch": 0.028713696501388462,
+      "grad_norm": 5.37394905090332,
+      "learning_rate": 2.9939011956270893e-05,
+      "loss": 2.061,
+      "step": 14745
+    },
+    {
+      "epoch": 0.028742906772498723,
+      "grad_norm": 2.2253520488739014,
+      "learning_rate": 2.9938887891982035e-05,
+      "loss": 1.963,
+      "step": 14760
+    },
+    {
+      "epoch": 0.028772117043608988,
+      "grad_norm": 3.424954414367676,
+      "learning_rate": 2.99387637018907e-05,
+      "loss": 2.0077,
+      "step": 14775
+    },
+    {
+      "epoch": 0.02880132731471925,
+      "grad_norm": 2.8398706912994385,
+      "learning_rate": 2.9938639385997934e-05,
+      "loss": 2.0516,
+      "step": 14790
+    },
+    {
+      "epoch": 0.028830537585829513,
+      "grad_norm": 2.371492385864258,
+      "learning_rate": 2.9938514944304788e-05,
+      "loss": 1.8057,
+      "step": 14805
+    },
+    {
+      "epoch": 0.028859747856939778,
+      "grad_norm": 1.870301365852356,
+      "learning_rate": 2.9938390376812304e-05,
+      "loss": 1.8335,
+      "step": 14820
+    },
+    {
+      "epoch": 0.02888895812805004,
+      "grad_norm": 3.1508800983428955,
+      "learning_rate": 2.9938265683521533e-05,
+      "loss": 2.0272,
+      "step": 14835
+    },
+    {
+      "epoch": 0.028918168399160303,
+      "grad_norm": 2.8456640243530273,
+      "learning_rate": 2.9938140864433528e-05,
+      "loss": 1.889,
+      "step": 14850
+    },
+    {
+      "epoch": 0.028947378670270564,
+      "grad_norm": 2.3040804862976074,
+      "learning_rate": 2.9938015919549337e-05,
+      "loss": 1.9274,
+      "step": 14865
+    },
+    {
+      "epoch": 0.02897658894138083,
+      "grad_norm": 3.075559139251709,
+      "learning_rate": 2.9937890848870012e-05,
+      "loss": 1.9239,
+      "step": 14880
+    },
+    {
+      "epoch": 0.02900579921249109,
+      "grad_norm": 2.4840190410614014,
+      "learning_rate": 2.9937765652396608e-05,
+      "loss": 1.9836,
+      "step": 14895
+    },
+    {
+      "epoch": 0.029035009483601354,
+      "grad_norm": 2.915515422821045,
+      "learning_rate": 2.9937640330130182e-05,
+      "loss": 2.0196,
+      "step": 14910
+    },
+    {
+      "epoch": 0.02906421975471162,
+      "grad_norm": 2.684401035308838,
+      "learning_rate": 2.993751488207178e-05,
+      "loss": 1.9699,
+      "step": 14925
+    },
+    {
+      "epoch": 0.02909343002582188,
+      "grad_norm": 1.7906841039657593,
+      "learning_rate": 2.9937389308222468e-05,
+      "loss": 1.9435,
+      "step": 14940
+    },
+    {
+      "epoch": 0.029122640296932144,
+      "grad_norm": 2.7629384994506836,
+      "learning_rate": 2.9937263608583297e-05,
+      "loss": 1.9266,
+      "step": 14955
+    },
+    {
+      "epoch": 0.029151850568042405,
+      "grad_norm": 3.65447735786438,
+      "learning_rate": 2.9937137783155326e-05,
+      "loss": 1.8818,
+      "step": 14970
+    },
+    {
+      "epoch": 0.02918106083915267,
+      "grad_norm": 2.684885025024414,
+      "learning_rate": 2.993701183193962e-05,
+      "loss": 1.744,
+      "step": 14985
+    },
+    {
+      "epoch": 0.02921027111026293,
+      "grad_norm": 2.995678424835205,
+      "learning_rate": 2.9936885754937237e-05,
+      "loss": 1.868,
+      "step": 15000
     }
   ],
   "logging_steps": 15,
       "attributes": {}
     }
   },
+  "total_flos": 2.384291701225267e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

data/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8dc1798a011f05627e6b1e98cb375157a979238b0369add36e82134e4a35ff2b
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ec2ab14e2ce9ef4d500efa223a38f8e5ca01386342399ff2338f06b1fa66a7a
 size 5304