|
{ |
|
"best_metric": 1.2795084714889526, |
|
"best_model_checkpoint": "saved_model/c2s_jun2024/checkpoint-9692", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 9692, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0, |
|
"loss": 72.6113, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 18.23219108581543, |
|
"learning_rate": 2.5e-06, |
|
"loss": 74.5495, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.582162857055664, |
|
"learning_rate": 7.5e-06, |
|
"loss": 73.7367, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.804972648620605, |
|
"learning_rate": 1.2e-05, |
|
"loss": 72.8853, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.634269714355469, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 70.9592, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.762855529785156, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 66.9603, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.27646827697754, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 61.4318, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 27.16312026977539, |
|
"learning_rate": 3.15e-05, |
|
"loss": 53.3651, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 28.43309783935547, |
|
"learning_rate": 3.65e-05, |
|
"loss": 33.9745, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.292057991027832, |
|
"learning_rate": 4.15e-05, |
|
"loss": 13.4627, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.148832321166992, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 6.8387, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.579999923706055, |
|
"learning_rate": 5.1500000000000005e-05, |
|
"loss": 4.7847, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.650771141052246, |
|
"learning_rate": 5.65e-05, |
|
"loss": 4.1684, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.5379791259765625, |
|
"learning_rate": 6.15e-05, |
|
"loss": 3.8221, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.095062732696533, |
|
"learning_rate": 6.65e-05, |
|
"loss": 3.5635, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 7.0580973625183105, |
|
"learning_rate": 7.15e-05, |
|
"loss": 3.4446, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.517209053039551, |
|
"learning_rate": 7.65e-05, |
|
"loss": 3.2972, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.954787731170654, |
|
"learning_rate": 8.15e-05, |
|
"loss": 3.2621, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.085761547088623, |
|
"learning_rate": 8.65e-05, |
|
"loss": 3.2072, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.4346442222595215, |
|
"learning_rate": 9.15e-05, |
|
"loss": 3.0868, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.535578727722168, |
|
"learning_rate": 9.65e-05, |
|
"loss": 3.0201, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.239222526550293, |
|
"learning_rate": 9.999378367177788e-05, |
|
"loss": 2.9792, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.576033592224121, |
|
"learning_rate": 9.997306257770411e-05, |
|
"loss": 3.0079, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.455887794494629, |
|
"learning_rate": 9.995234148363033e-05, |
|
"loss": 2.8296, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.566660404205322, |
|
"learning_rate": 9.993162038955657e-05, |
|
"loss": 2.7655, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.954742908477783, |
|
"learning_rate": 9.99108992954828e-05, |
|
"loss": 2.5655, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.5510752201080322, |
|
"learning_rate": 9.989017820140904e-05, |
|
"loss": 2.4527, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.358351230621338, |
|
"learning_rate": 9.986945710733528e-05, |
|
"loss": 2.2679, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.9349524974823, |
|
"learning_rate": 9.98487360132615e-05, |
|
"loss": 2.1456, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.3249402046203613, |
|
"learning_rate": 9.982801491918775e-05, |
|
"loss": 2.0943, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.92372989654541, |
|
"learning_rate": 9.980729382511397e-05, |
|
"loss": 2.0194, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.124359130859375, |
|
"learning_rate": 9.97865727310402e-05, |
|
"loss": 1.9523, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.372561454772949, |
|
"learning_rate": 9.976585163696644e-05, |
|
"loss": 1.905, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.5799174308776855, |
|
"learning_rate": 9.974513054289267e-05, |
|
"loss": 1.9159, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1826956272125244, |
|
"learning_rate": 9.97244094488189e-05, |
|
"loss": 1.8362, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1002371311187744, |
|
"learning_rate": 9.970368835474514e-05, |
|
"loss": 1.844, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.1345527172088623, |
|
"learning_rate": 9.968296726067136e-05, |
|
"loss": 1.8084, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.9457321166992188, |
|
"learning_rate": 9.96622461665976e-05, |
|
"loss": 1.7775, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.9511795043945312, |
|
"learning_rate": 9.964152507252383e-05, |
|
"loss": 1.7872, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.9775121212005615, |
|
"learning_rate": 9.962080397845007e-05, |
|
"loss": 1.7665, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.9561394453048706, |
|
"learning_rate": 9.96000828843763e-05, |
|
"loss": 1.7664, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.7436013221740723, |
|
"learning_rate": 9.957936179030253e-05, |
|
"loss": 1.7016, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.6739649772644043, |
|
"learning_rate": 9.955864069622876e-05, |
|
"loss": 1.7219, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.940246343612671, |
|
"learning_rate": 9.9537919602155e-05, |
|
"loss": 1.7174, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.8286395072937012, |
|
"learning_rate": 9.951719850808123e-05, |
|
"loss": 1.6698, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0042293071746826, |
|
"learning_rate": 9.949647741400747e-05, |
|
"loss": 1.6908, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.6445887088775635, |
|
"learning_rate": 9.94757563199337e-05, |
|
"loss": 1.6796, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.068713903427124, |
|
"learning_rate": 9.945503522585992e-05, |
|
"loss": 1.6685, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.8053257465362549, |
|
"learning_rate": 9.943431413178617e-05, |
|
"loss": 1.6522, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.580461859703064, |
|
"learning_rate": 9.94135930377124e-05, |
|
"loss": 1.6425, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.607007384300232, |
|
"learning_rate": 9.939287194363863e-05, |
|
"loss": 1.632, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.649885654449463, |
|
"learning_rate": 9.937215084956486e-05, |
|
"loss": 1.5966, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.7667235136032104, |
|
"learning_rate": 9.93514297554911e-05, |
|
"loss": 1.5942, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.595691442489624, |
|
"learning_rate": 9.933070866141732e-05, |
|
"loss": 1.609, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.5232254266738892, |
|
"learning_rate": 9.930998756734357e-05, |
|
"loss": 1.5614, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.4872910976409912, |
|
"learning_rate": 9.928926647326979e-05, |
|
"loss": 1.5657, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.609491229057312, |
|
"learning_rate": 9.926854537919603e-05, |
|
"loss": 1.5935, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.6403166055679321, |
|
"learning_rate": 9.924782428512226e-05, |
|
"loss": 1.6159, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.6648396253585815, |
|
"learning_rate": 9.922710319104848e-05, |
|
"loss": 1.6012, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.6322458982467651, |
|
"learning_rate": 9.920638209697473e-05, |
|
"loss": 1.5541, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.5503164529800415, |
|
"learning_rate": 9.918566100290095e-05, |
|
"loss": 1.5733, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.6093209981918335, |
|
"learning_rate": 9.916493990882719e-05, |
|
"loss": 1.5144, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.6871626377105713, |
|
"learning_rate": 9.914421881475342e-05, |
|
"loss": 1.573, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.7600977420806885, |
|
"learning_rate": 9.912349772067966e-05, |
|
"loss": 1.5577, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.4892425537109375, |
|
"learning_rate": 9.910277662660588e-05, |
|
"loss": 1.5751, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.5667476654052734, |
|
"learning_rate": 9.908205553253213e-05, |
|
"loss": 1.5298, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.3411659002304077, |
|
"learning_rate": 9.906133443845835e-05, |
|
"loss": 1.5409, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.5329233407974243, |
|
"learning_rate": 9.904061334438459e-05, |
|
"loss": 1.5165, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.5168925523757935, |
|
"learning_rate": 9.901989225031082e-05, |
|
"loss": 1.5222, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.6860578060150146, |
|
"learning_rate": 9.899917115623706e-05, |
|
"loss": 1.5179, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.4629698991775513, |
|
"learning_rate": 9.897845006216329e-05, |
|
"loss": 1.5593, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.3701924085617065, |
|
"learning_rate": 9.895772896808953e-05, |
|
"loss": 1.52, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.4276106357574463, |
|
"learning_rate": 9.893700787401575e-05, |
|
"loss": 1.5546, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.5609627962112427, |
|
"learning_rate": 9.8916286779942e-05, |
|
"loss": 1.5071, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.9602493047714233, |
|
"learning_rate": 9.889556568586822e-05, |
|
"loss": 1.5192, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.4681726694107056, |
|
"learning_rate": 9.887484459179444e-05, |
|
"loss": 1.5065, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.547143816947937, |
|
"learning_rate": 9.885412349772069e-05, |
|
"loss": 1.5303, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.7585084438323975, |
|
"learning_rate": 9.883340240364691e-05, |
|
"loss": 1.5412, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.4589301347732544, |
|
"learning_rate": 9.881268130957315e-05, |
|
"loss": 1.5008, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.5748664140701294, |
|
"learning_rate": 9.879196021549938e-05, |
|
"loss": 1.4856, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.4392333030700684, |
|
"learning_rate": 9.877123912142562e-05, |
|
"loss": 1.4593, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.439276933670044, |
|
"learning_rate": 9.875051802735185e-05, |
|
"loss": 1.4565, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.5028575658798218, |
|
"learning_rate": 9.872979693327809e-05, |
|
"loss": 1.5106, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.5902388095855713, |
|
"learning_rate": 9.870907583920431e-05, |
|
"loss": 1.459, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.5270620584487915, |
|
"learning_rate": 9.868835474513056e-05, |
|
"loss": 1.4705, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.354683518409729, |
|
"learning_rate": 9.866763365105678e-05, |
|
"loss": 1.4468, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.3697203397750854, |
|
"learning_rate": 9.864691255698301e-05, |
|
"loss": 1.4669, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.5006585121154785, |
|
"learning_rate": 9.862619146290925e-05, |
|
"loss": 1.4641, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.3566001653671265, |
|
"learning_rate": 9.860547036883548e-05, |
|
"loss": 1.4545, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.3500274419784546, |
|
"learning_rate": 9.85847492747617e-05, |
|
"loss": 1.477, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3306142091751099, |
|
"learning_rate": 9.856402818068794e-05, |
|
"loss": 1.4469, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.2983628511428833, |
|
"learning_rate": 9.854330708661418e-05, |
|
"loss": 1.4603, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3828344345092773, |
|
"learning_rate": 9.852258599254041e-05, |
|
"loss": 1.4686, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.427741527557373, |
|
"learning_rate": 9.850186489846665e-05, |
|
"loss": 1.4756, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.267404556274414, |
|
"learning_rate": 9.848114380439287e-05, |
|
"loss": 1.4777, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.3213374614715576, |
|
"learning_rate": 9.846042271031912e-05, |
|
"loss": 1.4526, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.6813840866088867, |
|
"learning_rate": 9.843970161624534e-05, |
|
"loss": 1.49, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.2110322713851929, |
|
"learning_rate": 9.841898052217157e-05, |
|
"loss": 1.4796, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.3316526412963867, |
|
"learning_rate": 9.839825942809781e-05, |
|
"loss": 1.4523, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.313766598701477, |
|
"learning_rate": 9.837753833402404e-05, |
|
"loss": 1.4195, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.4528905153274536, |
|
"learning_rate": 9.835681723995028e-05, |
|
"loss": 1.4433, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.3782751560211182, |
|
"learning_rate": 9.833609614587651e-05, |
|
"loss": 1.4673, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.5674275159835815, |
|
"learning_rate": 9.831537505180273e-05, |
|
"loss": 1.4296, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.3901402950286865, |
|
"learning_rate": 9.829465395772898e-05, |
|
"loss": 1.4516, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.1594748497009277, |
|
"learning_rate": 9.82739328636552e-05, |
|
"loss": 1.4225, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.2297048568725586, |
|
"learning_rate": 9.825321176958144e-05, |
|
"loss": 1.4416, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1866023540496826, |
|
"learning_rate": 9.823249067550768e-05, |
|
"loss": 1.444, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.407461404800415, |
|
"learning_rate": 9.82117695814339e-05, |
|
"loss": 1.4415, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.302164912223816, |
|
"learning_rate": 9.819104848736013e-05, |
|
"loss": 1.4405, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.3704490661621094, |
|
"learning_rate": 9.817032739328637e-05, |
|
"loss": 1.4408, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2673710584640503, |
|
"learning_rate": 9.81496062992126e-05, |
|
"loss": 1.4221, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.3337206840515137, |
|
"learning_rate": 9.812888520513884e-05, |
|
"loss": 1.4193, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.3280502557754517, |
|
"learning_rate": 9.810816411106507e-05, |
|
"loss": 1.4736, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.2532864809036255, |
|
"learning_rate": 9.80874430169913e-05, |
|
"loss": 1.4665, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.2475242614746094, |
|
"learning_rate": 9.806672192291754e-05, |
|
"loss": 1.426, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.7034567594528198, |
|
"learning_rate": 9.804600082884376e-05, |
|
"loss": 1.4473, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.3586080074310303, |
|
"learning_rate": 9.802527973477e-05, |
|
"loss": 1.3959, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.2611415386199951, |
|
"learning_rate": 9.800455864069623e-05, |
|
"loss": 1.4401, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.3101681470870972, |
|
"learning_rate": 9.798383754662247e-05, |
|
"loss": 1.4431, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.1770988702774048, |
|
"learning_rate": 9.796311645254869e-05, |
|
"loss": 1.4108, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.2325702905654907, |
|
"learning_rate": 9.794239535847494e-05, |
|
"loss": 1.4141, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.2543164491653442, |
|
"learning_rate": 9.792167426440116e-05, |
|
"loss": 1.4133, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.1258199214935303, |
|
"learning_rate": 9.79009531703274e-05, |
|
"loss": 1.4041, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.3423455953598022, |
|
"learning_rate": 9.788023207625363e-05, |
|
"loss": 1.4144, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.248947024345398, |
|
"learning_rate": 9.785951098217985e-05, |
|
"loss": 1.4043, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.129650354385376, |
|
"learning_rate": 9.78387898881061e-05, |
|
"loss": 1.4216, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.2218910455703735, |
|
"learning_rate": 9.781806879403232e-05, |
|
"loss": 1.3976, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.153981328010559, |
|
"learning_rate": 9.779734769995856e-05, |
|
"loss": 1.4304, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.1724766492843628, |
|
"learning_rate": 9.77766266058848e-05, |
|
"loss": 1.43, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.2830730676651, |
|
"learning_rate": 9.775590551181103e-05, |
|
"loss": 1.4429, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.2320913076400757, |
|
"learning_rate": 9.773518441773725e-05, |
|
"loss": 1.3898, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.2313491106033325, |
|
"learning_rate": 9.77144633236635e-05, |
|
"loss": 1.4273, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.1946086883544922, |
|
"learning_rate": 9.769374222958972e-05, |
|
"loss": 1.4234, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.127300500869751, |
|
"learning_rate": 9.767302113551596e-05, |
|
"loss": 1.4144, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.4888228178024292, |
|
"learning_rate": 9.765230004144219e-05, |
|
"loss": 1.4092, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.3795928955078125, |
|
"learning_rate": 9.763157894736843e-05, |
|
"loss": 1.3647, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.1433610916137695, |
|
"learning_rate": 9.761085785329466e-05, |
|
"loss": 1.415, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.040281891822815, |
|
"learning_rate": 9.75901367592209e-05, |
|
"loss": 1.4244, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.1311726570129395, |
|
"learning_rate": 9.756941566514712e-05, |
|
"loss": 1.3852, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2847346067428589, |
|
"learning_rate": 9.754869457107337e-05, |
|
"loss": 1.4225, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.2235894203186035, |
|
"learning_rate": 9.752797347699959e-05, |
|
"loss": 1.3973, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.1802481412887573, |
|
"learning_rate": 9.750725238292582e-05, |
|
"loss": 1.3923, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.141739010810852, |
|
"learning_rate": 9.748653128885206e-05, |
|
"loss": 1.4049, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.2155243158340454, |
|
"learning_rate": 9.746581019477828e-05, |
|
"loss": 1.3866, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.4717819690704346, |
|
"learning_rate": 9.744508910070453e-05, |
|
"loss": 1.4264, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.1440094709396362, |
|
"learning_rate": 9.742436800663075e-05, |
|
"loss": 1.4291, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.3254936933517456, |
|
"learning_rate": 9.740364691255699e-05, |
|
"loss": 1.3973, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.2041431665420532, |
|
"learning_rate": 9.738292581848322e-05, |
|
"loss": 1.3779, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.1422394514083862, |
|
"learning_rate": 9.736220472440946e-05, |
|
"loss": 1.3918, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2341557741165161, |
|
"learning_rate": 9.734148363033568e-05, |
|
"loss": 1.4065, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.1723967790603638, |
|
"learning_rate": 9.732076253626193e-05, |
|
"loss": 1.4003, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.279010534286499, |
|
"learning_rate": 9.730004144218815e-05, |
|
"loss": 1.3762, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.2639541625976562, |
|
"learning_rate": 9.727932034811438e-05, |
|
"loss": 1.3932, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.1406500339508057, |
|
"learning_rate": 9.725859925404062e-05, |
|
"loss": 1.4318, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.1991297006607056, |
|
"learning_rate": 9.723787815996685e-05, |
|
"loss": 1.3742, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.1058017015457153, |
|
"learning_rate": 9.721715706589309e-05, |
|
"loss": 1.3975, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.3658838272094727, |
|
"learning_rate": 9.719643597181932e-05, |
|
"loss": 1.4245, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.0663561820983887, |
|
"learning_rate": 9.717571487774555e-05, |
|
"loss": 1.3779, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.1523654460906982, |
|
"learning_rate": 9.715499378367178e-05, |
|
"loss": 1.4306, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.223913311958313, |
|
"learning_rate": 9.713427268959802e-05, |
|
"loss": 1.3748, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.0876872539520264, |
|
"learning_rate": 9.711355159552424e-05, |
|
"loss": 1.3806, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.3317033052444458, |
|
"learning_rate": 9.709283050145049e-05, |
|
"loss": 1.3586, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.2402222156524658, |
|
"learning_rate": 9.707210940737671e-05, |
|
"loss": 1.3886, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.1467841863632202, |
|
"learning_rate": 9.705138831330294e-05, |
|
"loss": 1.3634, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.1589218378067017, |
|
"learning_rate": 9.703066721922918e-05, |
|
"loss": 1.3466, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.9369345307350159, |
|
"learning_rate": 9.700994612515541e-05, |
|
"loss": 1.3819, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.0450528860092163, |
|
"learning_rate": 9.698922503108165e-05, |
|
"loss": 1.3482, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.0236886739730835, |
|
"learning_rate": 9.696850393700788e-05, |
|
"loss": 1.3468, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.0324066877365112, |
|
"learning_rate": 9.69477828429341e-05, |
|
"loss": 1.3926, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.1705087423324585, |
|
"learning_rate": 9.692706174886035e-05, |
|
"loss": 1.3547, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.1479854583740234, |
|
"learning_rate": 9.690634065478658e-05, |
|
"loss": 1.3517, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.1700282096862793, |
|
"learning_rate": 9.688561956071281e-05, |
|
"loss": 1.3635, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.079822301864624, |
|
"learning_rate": 9.686489846663905e-05, |
|
"loss": 1.3878, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.188466191291809, |
|
"learning_rate": 9.684417737256528e-05, |
|
"loss": 1.36, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1050995588302612, |
|
"learning_rate": 9.68234562784915e-05, |
|
"loss": 1.3513, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.2480050325393677, |
|
"learning_rate": 9.680273518441774e-05, |
|
"loss": 1.362, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1782851219177246, |
|
"learning_rate": 9.678201409034397e-05, |
|
"loss": 1.378, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.1327308416366577, |
|
"learning_rate": 9.676129299627021e-05, |
|
"loss": 1.3836, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.0974417924880981, |
|
"learning_rate": 9.674057190219644e-05, |
|
"loss": 1.3589, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.1006550788879395, |
|
"learning_rate": 9.671985080812266e-05, |
|
"loss": 1.3734, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.0745372772216797, |
|
"learning_rate": 9.669912971404891e-05, |
|
"loss": 1.4078, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.2572031021118164, |
|
"learning_rate": 9.667840861997513e-05, |
|
"loss": 1.3535, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.065767526626587, |
|
"learning_rate": 9.665768752590137e-05, |
|
"loss": 1.3657, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2773245573043823, |
|
"learning_rate": 9.66369664318276e-05, |
|
"loss": 1.3813, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.0642096996307373, |
|
"learning_rate": 9.661624533775384e-05, |
|
"loss": 1.3829, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.1348739862442017, |
|
"learning_rate": 9.659552424368008e-05, |
|
"loss": 1.3864, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.136107087135315, |
|
"learning_rate": 9.657480314960631e-05, |
|
"loss": 1.3523, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.1533474922180176, |
|
"learning_rate": 9.655408205553253e-05, |
|
"loss": 1.3669, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.1027289628982544, |
|
"learning_rate": 9.653336096145878e-05, |
|
"loss": 1.3256, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.9988449811935425, |
|
"learning_rate": 9.6512639867385e-05, |
|
"loss": 1.4024, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.2975176572799683, |
|
"learning_rate": 9.649191877331124e-05, |
|
"loss": 1.3751, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.2186543941497803, |
|
"learning_rate": 9.647119767923747e-05, |
|
"loss": 1.3444, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.1342490911483765, |
|
"learning_rate": 9.64504765851637e-05, |
|
"loss": 1.3449, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.148695707321167, |
|
"learning_rate": 9.642975549108993e-05, |
|
"loss": 1.3325, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.9545331001281738, |
|
"learning_rate": 9.640903439701616e-05, |
|
"loss": 1.3375, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.0941437482833862, |
|
"learning_rate": 9.63883133029424e-05, |
|
"loss": 1.3671, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.0803030729293823, |
|
"learning_rate": 9.636759220886863e-05, |
|
"loss": 1.3648, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.0937373638153076, |
|
"learning_rate": 9.634687111479487e-05, |
|
"loss": 1.3518, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.1884483098983765, |
|
"learning_rate": 9.632615002072109e-05, |
|
"loss": 1.3461, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.1179327964782715, |
|
"learning_rate": 9.630542892664734e-05, |
|
"loss": 1.3765, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.9843894839286804, |
|
"learning_rate": 9.628470783257356e-05, |
|
"loss": 1.3379, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.0279515981674194, |
|
"learning_rate": 9.62639867384998e-05, |
|
"loss": 1.3389, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.0797231197357178, |
|
"learning_rate": 9.624326564442603e-05, |
|
"loss": 1.346, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.1976298093795776, |
|
"learning_rate": 9.622254455035227e-05, |
|
"loss": 1.3366, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.057880163192749, |
|
"learning_rate": 9.620182345627849e-05, |
|
"loss": 1.3264, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.1059492826461792, |
|
"learning_rate": 9.618110236220474e-05, |
|
"loss": 1.3446, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0970298051834106, |
|
"learning_rate": 9.616038126813096e-05, |
|
"loss": 1.3521, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0951462984085083, |
|
"learning_rate": 9.61396601740572e-05, |
|
"loss": 1.3669, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0926049947738647, |
|
"learning_rate": 9.611893907998343e-05, |
|
"loss": 1.354, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0136979818344116, |
|
"learning_rate": 9.609821798590965e-05, |
|
"loss": 1.3321, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.129214882850647, |
|
"learning_rate": 9.60774968918359e-05, |
|
"loss": 1.382, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.1166954040527344, |
|
"learning_rate": 9.605677579776212e-05, |
|
"loss": 1.3337, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.204231858253479, |
|
"learning_rate": 9.603605470368836e-05, |
|
"loss": 1.3642, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.0265048742294312, |
|
"learning_rate": 9.601533360961459e-05, |
|
"loss": 1.3662, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.0513389110565186, |
|
"learning_rate": 9.599461251554083e-05, |
|
"loss": 1.3395, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.17727792263031, |
|
"learning_rate": 9.597389142146705e-05, |
|
"loss": 1.3738, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.0676214694976807, |
|
"learning_rate": 9.59531703273933e-05, |
|
"loss": 1.3383, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.9273681640625, |
|
"learning_rate": 9.593244923331952e-05, |
|
"loss": 1.367, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.0774747133255005, |
|
"learning_rate": 9.591172813924575e-05, |
|
"loss": 1.3369, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.131264090538025, |
|
"learning_rate": 9.589100704517199e-05, |
|
"loss": 1.3457, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.106242060661316, |
|
"learning_rate": 9.587028595109822e-05, |
|
"loss": 1.321, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.054598331451416, |
|
"learning_rate": 9.584956485702446e-05, |
|
"loss": 1.3424, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.0380080938339233, |
|
"learning_rate": 9.58288437629507e-05, |
|
"loss": 1.3425, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1068315505981445, |
|
"learning_rate": 9.580812266887692e-05, |
|
"loss": 1.321, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.1228212118148804, |
|
"learning_rate": 9.578740157480316e-05, |
|
"loss": 1.3301, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.9643247127532959, |
|
"learning_rate": 9.576668048072939e-05, |
|
"loss": 1.3403, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.9587458372116089, |
|
"learning_rate": 9.574595938665562e-05, |
|
"loss": 1.3402, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.0192015171051025, |
|
"learning_rate": 9.572523829258186e-05, |
|
"loss": 1.3595, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.1033486127853394, |
|
"learning_rate": 9.570451719850808e-05, |
|
"loss": 1.3515, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.244828462600708, |
|
"learning_rate": 9.568379610443431e-05, |
|
"loss": 1.3148, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.031778335571289, |
|
"learning_rate": 9.566307501036055e-05, |
|
"loss": 1.3343, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.0581692457199097, |
|
"learning_rate": 9.564235391628678e-05, |
|
"loss": 1.3352, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.9989519119262695, |
|
"learning_rate": 9.562163282221302e-05, |
|
"loss": 1.3206, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1149669885635376, |
|
"learning_rate": 9.560091172813925e-05, |
|
"loss": 1.3355, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1359626054763794, |
|
"learning_rate": 9.558019063406548e-05, |
|
"loss": 1.3233, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.1091575622558594, |
|
"learning_rate": 9.555946953999172e-05, |
|
"loss": 1.3678, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.0405771732330322, |
|
"learning_rate": 9.553874844591795e-05, |
|
"loss": 1.3555, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.068385124206543, |
|
"learning_rate": 9.551802735184418e-05, |
|
"loss": 1.346, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.0115128755569458, |
|
"learning_rate": 9.549730625777042e-05, |
|
"loss": 1.3448, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.026138424873352, |
|
"learning_rate": 9.547658516369665e-05, |
|
"loss": 1.3286, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.375127911567688, |
|
"learning_rate": 9.545586406962289e-05, |
|
"loss": 1.3931, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2297391891479492, |
|
"learning_rate": 9.543514297554912e-05, |
|
"loss": 1.3223, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.143249750137329, |
|
"learning_rate": 9.541442188147534e-05, |
|
"loss": 1.3142, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1182348728179932, |
|
"learning_rate": 9.539370078740158e-05, |
|
"loss": 1.3414, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.0450687408447266, |
|
"learning_rate": 9.537297969332781e-05, |
|
"loss": 1.3119, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.105624794960022, |
|
"learning_rate": 9.535225859925403e-05, |
|
"loss": 1.3275, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.1117305755615234, |
|
"learning_rate": 9.533153750518028e-05, |
|
"loss": 1.3384, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.122660756111145, |
|
"learning_rate": 9.53108164111065e-05, |
|
"loss": 1.3509, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.994361937046051, |
|
"learning_rate": 9.529009531703274e-05, |
|
"loss": 1.3638, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.1339287757873535, |
|
"learning_rate": 9.526937422295898e-05, |
|
"loss": 1.3282, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0273313522338867, |
|
"learning_rate": 9.524865312888521e-05, |
|
"loss": 1.3261, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.067122220993042, |
|
"learning_rate": 9.522793203481145e-05, |
|
"loss": 1.3502, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.9780186414718628, |
|
"learning_rate": 9.520721094073768e-05, |
|
"loss": 1.3209, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0634074211120605, |
|
"learning_rate": 9.51864898466639e-05, |
|
"loss": 1.3508, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0088226795196533, |
|
"learning_rate": 9.516576875259015e-05, |
|
"loss": 1.2848, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.156569242477417, |
|
"learning_rate": 9.514504765851637e-05, |
|
"loss": 1.3336, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.9981438517570496, |
|
"learning_rate": 9.512432656444261e-05, |
|
"loss": 1.3237, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.0465401411056519, |
|
"learning_rate": 9.510360547036884e-05, |
|
"loss": 1.3347, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.282174825668335, |
|
"learning_rate": 9.508288437629508e-05, |
|
"loss": 1.3234, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.0925480127334595, |
|
"learning_rate": 9.50621632822213e-05, |
|
"loss": 1.3604, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0559757947921753, |
|
"learning_rate": 9.504144218814753e-05, |
|
"loss": 1.3411, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0160987377166748, |
|
"learning_rate": 9.502072109407377e-05, |
|
"loss": 1.3299, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0814076662063599, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.3053, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.1541906595230103, |
|
"learning_rate": 9.497927890592624e-05, |
|
"loss": 1.3368, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.0476430654525757, |
|
"learning_rate": 9.495855781185246e-05, |
|
"loss": 1.3266, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.0859614610671997, |
|
"learning_rate": 9.493783671777871e-05, |
|
"loss": 1.3077, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.047561526298523, |
|
"learning_rate": 9.491711562370493e-05, |
|
"loss": 1.301, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1071749925613403, |
|
"learning_rate": 9.489639452963117e-05, |
|
"loss": 1.3069, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.0207133293151855, |
|
"learning_rate": 9.48756734355574e-05, |
|
"loss": 1.3128, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.1883114576339722, |
|
"learning_rate": 9.485495234148364e-05, |
|
"loss": 1.2987, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.1708128452301025, |
|
"learning_rate": 9.483423124740986e-05, |
|
"loss": 1.3386, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.0731940269470215, |
|
"learning_rate": 9.481351015333611e-05, |
|
"loss": 1.3165, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.02231764793396, |
|
"learning_rate": 9.479278905926233e-05, |
|
"loss": 1.3364, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.9825921654701233, |
|
"learning_rate": 9.477206796518856e-05, |
|
"loss": 1.3078, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.1280665397644043, |
|
"learning_rate": 9.47513468711148e-05, |
|
"loss": 1.3359, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.9910861253738403, |
|
"learning_rate": 9.473062577704103e-05, |
|
"loss": 1.3361, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.0153850317001343, |
|
"learning_rate": 9.470990468296727e-05, |
|
"loss": 1.3059, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.01111900806427, |
|
"learning_rate": 9.468918358889349e-05, |
|
"loss": 1.3226, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.0714573860168457, |
|
"learning_rate": 9.466846249481973e-05, |
|
"loss": 1.3195, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.0012733936309814, |
|
"learning_rate": 9.464774140074596e-05, |
|
"loss": 1.2948, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.9637882709503174, |
|
"learning_rate": 9.46270203066722e-05, |
|
"loss": 1.3208, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.0453296899795532, |
|
"learning_rate": 9.460629921259843e-05, |
|
"loss": 1.3095, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.0107698440551758, |
|
"learning_rate": 9.458557811852467e-05, |
|
"loss": 1.3164, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.1132638454437256, |
|
"learning_rate": 9.456485702445089e-05, |
|
"loss": 1.3162, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0389189720153809, |
|
"learning_rate": 9.454413593037714e-05, |
|
"loss": 1.317, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0654906034469604, |
|
"learning_rate": 9.452341483630336e-05, |
|
"loss": 1.305, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.2564867734909058, |
|
"learning_rate": 9.45026937422296e-05, |
|
"loss": 1.3301, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0308964252471924, |
|
"learning_rate": 9.448197264815583e-05, |
|
"loss": 1.334, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.0542854070663452, |
|
"learning_rate": 9.446125155408206e-05, |
|
"loss": 1.3001, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.2161365747451782, |
|
"learning_rate": 9.444053046000829e-05, |
|
"loss": 1.2985, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.206581473350525, |
|
"learning_rate": 9.441980936593454e-05, |
|
"loss": 1.3177, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.1631922721862793, |
|
"learning_rate": 9.439908827186076e-05, |
|
"loss": 1.3269, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.9827607274055481, |
|
"learning_rate": 9.437836717778699e-05, |
|
"loss": 1.3228, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.0078628063201904, |
|
"learning_rate": 9.435764608371323e-05, |
|
"loss": 1.3047, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.1704260110855103, |
|
"learning_rate": 9.433692498963945e-05, |
|
"loss": 1.3074, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.075964093208313, |
|
"learning_rate": 9.43162038955657e-05, |
|
"loss": 1.3252, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.9463378190994263, |
|
"learning_rate": 9.429548280149192e-05, |
|
"loss": 1.3201, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.01523756980896, |
|
"learning_rate": 9.427476170741815e-05, |
|
"loss": 1.3127, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.9392449259757996, |
|
"learning_rate": 9.425404061334439e-05, |
|
"loss": 1.3254, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.01919424533844, |
|
"learning_rate": 9.423331951927062e-05, |
|
"loss": 1.3021, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.1243764162063599, |
|
"learning_rate": 9.421259842519685e-05, |
|
"loss": 1.3112, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.0084974765777588, |
|
"learning_rate": 9.41918773311231e-05, |
|
"loss": 1.3173, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.9945486783981323, |
|
"learning_rate": 9.417115623704932e-05, |
|
"loss": 1.3114, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.1148301362991333, |
|
"learning_rate": 9.415043514297555e-05, |
|
"loss": 1.3275, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.2701823711395264, |
|
"learning_rate": 9.412971404890179e-05, |
|
"loss": 1.3094, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.1923747062683105, |
|
"learning_rate": 9.410899295482802e-05, |
|
"loss": 1.2812, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.2106274366378784, |
|
"learning_rate": 9.408827186075426e-05, |
|
"loss": 1.3011, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.0127681493759155, |
|
"learning_rate": 9.406755076668049e-05, |
|
"loss": 1.3059, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.042222499847412, |
|
"learning_rate": 9.404682967260671e-05, |
|
"loss": 1.2961, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.9650092124938965, |
|
"learning_rate": 9.402610857853296e-05, |
|
"loss": 1.3264, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0504155158996582, |
|
"learning_rate": 9.400538748445918e-05, |
|
"loss": 1.2853, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.0501419305801392, |
|
"learning_rate": 9.39846663903854e-05, |
|
"loss": 1.3179, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.056299090385437, |
|
"learning_rate": 9.396394529631165e-05, |
|
"loss": 1.2962, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0278836488723755, |
|
"learning_rate": 9.394322420223788e-05, |
|
"loss": 1.2828, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.9813990592956543, |
|
"learning_rate": 9.392250310816411e-05, |
|
"loss": 1.2986, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0665332078933716, |
|
"learning_rate": 9.390178201409035e-05, |
|
"loss": 1.2891, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0281347036361694, |
|
"learning_rate": 9.388106092001658e-05, |
|
"loss": 1.299, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.0530226230621338, |
|
"learning_rate": 9.386033982594282e-05, |
|
"loss": 1.2887, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0053261518478394, |
|
"learning_rate": 9.383961873186905e-05, |
|
"loss": 1.327, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.1362097263336182, |
|
"learning_rate": 9.381889763779527e-05, |
|
"loss": 1.3001, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0610814094543457, |
|
"learning_rate": 9.379817654372152e-05, |
|
"loss": 1.2535, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.9906120300292969, |
|
"learning_rate": 9.377745544964774e-05, |
|
"loss": 1.291, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.0676803588867188, |
|
"learning_rate": 9.375673435557398e-05, |
|
"loss": 1.3032, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.056851863861084, |
|
"learning_rate": 9.373601326150021e-05, |
|
"loss": 1.2879, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.048841118812561, |
|
"learning_rate": 9.371529216742645e-05, |
|
"loss": 1.2798, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.047361969947815, |
|
"learning_rate": 9.369457107335268e-05, |
|
"loss": 1.3195, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.076904296875, |
|
"learning_rate": 9.367384997927892e-05, |
|
"loss": 1.3013, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.0863533020019531, |
|
"learning_rate": 9.365312888520514e-05, |
|
"loss": 1.2971, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.0460786819458008, |
|
"learning_rate": 9.363240779113138e-05, |
|
"loss": 1.3023, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.906493604183197, |
|
"learning_rate": 9.361168669705761e-05, |
|
"loss": 1.3053, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.1181541681289673, |
|
"learning_rate": 9.359096560298383e-05, |
|
"loss": 1.3142, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.0198432207107544, |
|
"learning_rate": 9.357024450891008e-05, |
|
"loss": 1.293, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.0075292587280273, |
|
"learning_rate": 9.35495234148363e-05, |
|
"loss": 1.299, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.024592399597168, |
|
"learning_rate": 9.352880232076254e-05, |
|
"loss": 1.2983, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.9931455254554749, |
|
"learning_rate": 9.350808122668877e-05, |
|
"loss": 1.279, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.0673152208328247, |
|
"learning_rate": 9.348736013261501e-05, |
|
"loss": 1.2816, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.068587303161621, |
|
"learning_rate": 9.346663903854124e-05, |
|
"loss": 1.2934, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.9838789701461792, |
|
"learning_rate": 9.344591794446748e-05, |
|
"loss": 1.2917, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.0613404512405396, |
|
"learning_rate": 9.34251968503937e-05, |
|
"loss": 1.2879, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.0173070430755615, |
|
"learning_rate": 9.340447575631995e-05, |
|
"loss": 1.2966, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.1227622032165527, |
|
"learning_rate": 9.338375466224617e-05, |
|
"loss": 1.2554, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.007338523864746, |
|
"learning_rate": 9.33630335681724e-05, |
|
"loss": 1.3115, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0479813814163208, |
|
"learning_rate": 9.334231247409864e-05, |
|
"loss": 1.3048, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0560479164123535, |
|
"learning_rate": 9.332159138002486e-05, |
|
"loss": 1.2919, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.1081204414367676, |
|
"learning_rate": 9.33008702859511e-05, |
|
"loss": 1.2967, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.0260145664215088, |
|
"learning_rate": 9.328014919187733e-05, |
|
"loss": 1.3195, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.057966947555542, |
|
"learning_rate": 9.325942809780357e-05, |
|
"loss": 1.2896, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.0711556673049927, |
|
"learning_rate": 9.32387070037298e-05, |
|
"loss": 1.2817, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.0118924379348755, |
|
"learning_rate": 9.321798590965604e-05, |
|
"loss": 1.3052, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.0227614641189575, |
|
"learning_rate": 9.319726481558226e-05, |
|
"loss": 1.3186, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.0655134916305542, |
|
"learning_rate": 9.317654372150851e-05, |
|
"loss": 1.3087, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.1255359649658203, |
|
"learning_rate": 9.315582262743473e-05, |
|
"loss": 1.2749, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.0832923650741577, |
|
"learning_rate": 9.313510153336096e-05, |
|
"loss": 1.2892, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.067236304283142, |
|
"learning_rate": 9.31143804392872e-05, |
|
"loss": 1.284, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.1556322574615479, |
|
"learning_rate": 9.309365934521344e-05, |
|
"loss": 1.2604, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.151723861694336, |
|
"learning_rate": 9.307293825113966e-05, |
|
"loss": 1.3045, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.0258938074111938, |
|
"learning_rate": 9.30522171570659e-05, |
|
"loss": 1.2859, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0165237188339233, |
|
"learning_rate": 9.303149606299213e-05, |
|
"loss": 1.3212, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.9969586133956909, |
|
"learning_rate": 9.301077496891836e-05, |
|
"loss": 1.3038, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.1335457563400269, |
|
"learning_rate": 9.29900538748446e-05, |
|
"loss": 1.2747, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.0744903087615967, |
|
"learning_rate": 9.296933278077082e-05, |
|
"loss": 1.3078, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.2294646501541138, |
|
"learning_rate": 9.294861168669707e-05, |
|
"loss": 1.2631, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.0542582273483276, |
|
"learning_rate": 9.292789059262329e-05, |
|
"loss": 1.2778, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.0787122249603271, |
|
"learning_rate": 9.290716949854952e-05, |
|
"loss": 1.2952, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.182387351989746, |
|
"learning_rate": 9.288644840447576e-05, |
|
"loss": 1.2955, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.0466411113739014, |
|
"learning_rate": 9.2865727310402e-05, |
|
"loss": 1.3085, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.0271363258361816, |
|
"learning_rate": 9.284500621632823e-05, |
|
"loss": 1.2881, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.1320871114730835, |
|
"learning_rate": 9.282428512225446e-05, |
|
"loss": 1.2671, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.1176432371139526, |
|
"learning_rate": 9.280356402818069e-05, |
|
"loss": 1.299, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.9895033240318298, |
|
"learning_rate": 9.278284293410694e-05, |
|
"loss": 1.2984, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.191007137298584, |
|
"learning_rate": 9.276212184003316e-05, |
|
"loss": 1.2808, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.0878729820251465, |
|
"learning_rate": 9.274140074595939e-05, |
|
"loss": 1.2864, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.1144053936004639, |
|
"learning_rate": 9.272067965188563e-05, |
|
"loss": 1.3175, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.128405213356018, |
|
"learning_rate": 9.269995855781186e-05, |
|
"loss": 1.3147, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.0539438724517822, |
|
"learning_rate": 9.267923746373808e-05, |
|
"loss": 1.2927, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.0515836477279663, |
|
"learning_rate": 9.265851636966433e-05, |
|
"loss": 1.2743, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.1526955366134644, |
|
"learning_rate": 9.263779527559055e-05, |
|
"loss": 1.2911, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.010903000831604, |
|
"learning_rate": 9.261707418151679e-05, |
|
"loss": 1.2735, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.1373246908187866, |
|
"learning_rate": 9.259635308744302e-05, |
|
"loss": 1.2952, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.9458179473876953, |
|
"learning_rate": 9.257563199336925e-05, |
|
"loss": 1.2936, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.949252724647522, |
|
"learning_rate": 9.25549108992955e-05, |
|
"loss": 1.287, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.1074215173721313, |
|
"learning_rate": 9.253418980522172e-05, |
|
"loss": 1.2937, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.995959460735321, |
|
"learning_rate": 9.251346871114795e-05, |
|
"loss": 1.265, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0461138486862183, |
|
"learning_rate": 9.249274761707419e-05, |
|
"loss": 1.2822, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0449700355529785, |
|
"learning_rate": 9.247202652300042e-05, |
|
"loss": 1.2896, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 1.0590384006500244, |
|
"learning_rate": 9.245130542892664e-05, |
|
"loss": 1.2923, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.178272008895874, |
|
"learning_rate": 9.243058433485289e-05, |
|
"loss": 1.2797, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.0651668310165405, |
|
"learning_rate": 9.240986324077911e-05, |
|
"loss": 1.2632, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.0944633483886719, |
|
"learning_rate": 9.238914214670535e-05, |
|
"loss": 1.2853, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.042576551437378, |
|
"learning_rate": 9.236842105263158e-05, |
|
"loss": 1.2884, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.1282780170440674, |
|
"learning_rate": 9.234769995855782e-05, |
|
"loss": 1.2937, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.9996076822280884, |
|
"learning_rate": 9.232697886448405e-05, |
|
"loss": 1.2657, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.9630957245826721, |
|
"learning_rate": 9.230625777041029e-05, |
|
"loss": 1.2679, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.0428036451339722, |
|
"learning_rate": 9.228553667633651e-05, |
|
"loss": 1.2921, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.1940115690231323, |
|
"learning_rate": 9.226481558226275e-05, |
|
"loss": 1.2759, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.1081199645996094, |
|
"learning_rate": 9.224409448818898e-05, |
|
"loss": 1.2636, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.989032506942749, |
|
"learning_rate": 9.22233733941152e-05, |
|
"loss": 1.2489, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.0575727224349976, |
|
"learning_rate": 9.220265230004145e-05, |
|
"loss": 1.2777, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.0007938146591187, |
|
"learning_rate": 9.218193120596767e-05, |
|
"loss": 1.286, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.0560977458953857, |
|
"learning_rate": 9.216121011189391e-05, |
|
"loss": 1.2939, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.308013916015625, |
|
"learning_rate": 9.214048901782014e-05, |
|
"loss": 1.2728, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.2011935710906982, |
|
"learning_rate": 9.211976792374638e-05, |
|
"loss": 1.2853, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.0514850616455078, |
|
"learning_rate": 9.209904682967261e-05, |
|
"loss": 1.3102, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.0865683555603027, |
|
"learning_rate": 9.207832573559885e-05, |
|
"loss": 1.2835, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.2012451887130737, |
|
"learning_rate": 9.205760464152507e-05, |
|
"loss": 1.2801, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0710102319717407, |
|
"learning_rate": 9.203688354745132e-05, |
|
"loss": 1.2745, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0107301473617554, |
|
"learning_rate": 9.201616245337754e-05, |
|
"loss": 1.2928, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.0026335716247559, |
|
"learning_rate": 9.199544135930378e-05, |
|
"loss": 1.2916, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.9443692564964294, |
|
"learning_rate": 9.197472026523001e-05, |
|
"loss": 1.2956, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.9472268223762512, |
|
"learning_rate": 9.195399917115625e-05, |
|
"loss": 1.2875, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0817506313323975, |
|
"learning_rate": 9.193327807708247e-05, |
|
"loss": 1.2779, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0539813041687012, |
|
"learning_rate": 9.19125569830087e-05, |
|
"loss": 1.2661, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.9975004196166992, |
|
"learning_rate": 9.189183588893494e-05, |
|
"loss": 1.2499, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0313421487808228, |
|
"learning_rate": 9.187111479486117e-05, |
|
"loss": 1.2907, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.0273147821426392, |
|
"learning_rate": 9.185039370078741e-05, |
|
"loss": 1.2929, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.9810879230499268, |
|
"learning_rate": 9.182967260671363e-05, |
|
"loss": 1.2974, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0243279933929443, |
|
"learning_rate": 9.180895151263988e-05, |
|
"loss": 1.2406, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0115349292755127, |
|
"learning_rate": 9.17882304185661e-05, |
|
"loss": 1.2765, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.1206727027893066, |
|
"learning_rate": 9.176750932449234e-05, |
|
"loss": 1.2956, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.99837327003479, |
|
"learning_rate": 9.174678823041857e-05, |
|
"loss": 1.2614, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0117827653884888, |
|
"learning_rate": 9.17260671363448e-05, |
|
"loss": 1.2611, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.1119413375854492, |
|
"learning_rate": 9.170534604227104e-05, |
|
"loss": 1.3006, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0567435026168823, |
|
"learning_rate": 9.168462494819728e-05, |
|
"loss": 1.2544, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.1326267719268799, |
|
"learning_rate": 9.16639038541235e-05, |
|
"loss": 1.2895, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 1.0858250856399536, |
|
"learning_rate": 9.164318276004975e-05, |
|
"loss": 1.2672, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.1111207008361816, |
|
"learning_rate": 9.162246166597597e-05, |
|
"loss": 1.255, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.0791577100753784, |
|
"learning_rate": 9.16017405719022e-05, |
|
"loss": 1.2865, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.1741734743118286, |
|
"learning_rate": 9.158101947782844e-05, |
|
"loss": 1.2898, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.9900088310241699, |
|
"learning_rate": 9.156029838375466e-05, |
|
"loss": 1.26, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.2227307558059692, |
|
"learning_rate": 9.15395772896809e-05, |
|
"loss": 1.2818, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0696605443954468, |
|
"learning_rate": 9.151885619560713e-05, |
|
"loss": 1.2739, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.145063042640686, |
|
"learning_rate": 9.149813510153336e-05, |
|
"loss": 1.2886, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.0415133237838745, |
|
"learning_rate": 9.14774140074596e-05, |
|
"loss": 1.2999, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.9151254296302795, |
|
"learning_rate": 9.145669291338584e-05, |
|
"loss": 1.2883, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.1220918893814087, |
|
"learning_rate": 9.143597181931206e-05, |
|
"loss": 1.2751, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.0417348146438599, |
|
"learning_rate": 9.14152507252383e-05, |
|
"loss": 1.2675, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.1090322732925415, |
|
"learning_rate": 9.139452963116453e-05, |
|
"loss": 1.2521, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.003110408782959, |
|
"learning_rate": 9.137380853709076e-05, |
|
"loss": 1.2461, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.0214240550994873, |
|
"learning_rate": 9.1353087443017e-05, |
|
"loss": 1.3003, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.0389635562896729, |
|
"learning_rate": 9.133236634894323e-05, |
|
"loss": 1.2642, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.1644842624664307, |
|
"learning_rate": 9.131164525486945e-05, |
|
"loss": 1.247, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.0494420528411865, |
|
"learning_rate": 9.12909241607957e-05, |
|
"loss": 1.2727, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.1759871244430542, |
|
"learning_rate": 9.127020306672192e-05, |
|
"loss": 1.2569, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.0006252527236938, |
|
"learning_rate": 9.124948197264816e-05, |
|
"loss": 1.2761, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.0942422151565552, |
|
"learning_rate": 9.12287608785744e-05, |
|
"loss": 1.2778, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1273877620697021, |
|
"learning_rate": 9.120803978450062e-05, |
|
"loss": 1.2738, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.0103455781936646, |
|
"learning_rate": 9.118731869042686e-05, |
|
"loss": 1.2559, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.0412319898605347, |
|
"learning_rate": 9.116659759635309e-05, |
|
"loss": 1.2839, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.1623831987380981, |
|
"learning_rate": 9.114587650227932e-05, |
|
"loss": 1.2684, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.213977336883545, |
|
"learning_rate": 9.112515540820556e-05, |
|
"loss": 1.2587, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.1630234718322754, |
|
"learning_rate": 9.110443431413179e-05, |
|
"loss": 1.2557, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0047425031661987, |
|
"learning_rate": 9.108371322005801e-05, |
|
"loss": 1.2785, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0434467792510986, |
|
"learning_rate": 9.106299212598426e-05, |
|
"loss": 1.2916, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.0278736352920532, |
|
"learning_rate": 9.104227103191048e-05, |
|
"loss": 1.2494, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.9865803122520447, |
|
"learning_rate": 9.102154993783672e-05, |
|
"loss": 1.2487, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.0419522523880005, |
|
"learning_rate": 9.100082884376295e-05, |
|
"loss": 1.2618, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.1537505388259888, |
|
"learning_rate": 9.098010774968919e-05, |
|
"loss": 1.2613, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.0565922260284424, |
|
"learning_rate": 9.095938665561542e-05, |
|
"loss": 1.2592, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.106313705444336, |
|
"learning_rate": 9.093866556154166e-05, |
|
"loss": 1.2585, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.0392132997512817, |
|
"learning_rate": 9.091794446746788e-05, |
|
"loss": 1.2626, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.1017506122589111, |
|
"learning_rate": 9.089722337339413e-05, |
|
"loss": 1.2696, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.134838581085205, |
|
"learning_rate": 9.087650227932035e-05, |
|
"loss": 1.2626, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.3448967933654785, |
|
"learning_rate": 9.085578118524659e-05, |
|
"loss": 1.2728, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.9399920701980591, |
|
"learning_rate": 9.083506009117282e-05, |
|
"loss": 1.253, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0530354976654053, |
|
"learning_rate": 9.081433899709904e-05, |
|
"loss": 1.2501, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.0609047412872314, |
|
"learning_rate": 9.079361790302529e-05, |
|
"loss": 1.2599, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.120044469833374, |
|
"learning_rate": 9.077289680895151e-05, |
|
"loss": 1.2511, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.068034291267395, |
|
"learning_rate": 9.075217571487775e-05, |
|
"loss": 1.2623, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.9914749264717102, |
|
"learning_rate": 9.073145462080398e-05, |
|
"loss": 1.2754, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.084227204322815, |
|
"learning_rate": 9.071073352673022e-05, |
|
"loss": 1.2861, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.9811500906944275, |
|
"learning_rate": 9.069001243265644e-05, |
|
"loss": 1.2361, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.0811760425567627, |
|
"learning_rate": 9.066929133858269e-05, |
|
"loss": 1.247, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.0787228345870972, |
|
"learning_rate": 9.064857024450891e-05, |
|
"loss": 1.2609, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.0164875984191895, |
|
"learning_rate": 9.062784915043515e-05, |
|
"loss": 1.289, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.1449209451675415, |
|
"learning_rate": 9.060712805636138e-05, |
|
"loss": 1.269, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.2805284261703491, |
|
"learning_rate": 9.058640696228762e-05, |
|
"loss": 1.2803, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.03864586353302, |
|
"learning_rate": 9.056568586821385e-05, |
|
"loss": 1.2635, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.9715671539306641, |
|
"learning_rate": 9.054496477414009e-05, |
|
"loss": 1.2232, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.1221860647201538, |
|
"learning_rate": 9.052424368006631e-05, |
|
"loss": 1.2536, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.9467464089393616, |
|
"learning_rate": 9.050352258599254e-05, |
|
"loss": 1.2529, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.1306428909301758, |
|
"learning_rate": 9.048280149191878e-05, |
|
"loss": 1.2621, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.038275122642517, |
|
"learning_rate": 9.0462080397845e-05, |
|
"loss": 1.2785, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.1947646141052246, |
|
"learning_rate": 9.044135930377125e-05, |
|
"loss": 1.2643, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0585174560546875, |
|
"learning_rate": 9.042063820969747e-05, |
|
"loss": 1.248, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1931108236312866, |
|
"learning_rate": 9.03999171156237e-05, |
|
"loss": 1.2884, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.280229926109314, |
|
"eval_runtime": 1606.712, |
|
"eval_samples_per_second": 262.53, |
|
"eval_steps_per_second": 4.102, |
|
"step": 4846 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0185681581497192, |
|
"learning_rate": 9.037919602154994e-05, |
|
"loss": 1.2701, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.2571942806243896, |
|
"learning_rate": 9.035847492747618e-05, |
|
"loss": 1.2309, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1584192514419556, |
|
"learning_rate": 9.033775383340241e-05, |
|
"loss": 1.2859, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.05573570728302, |
|
"learning_rate": 9.031703273932865e-05, |
|
"loss": 1.2502, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.0500950813293457, |
|
"learning_rate": 9.029631164525487e-05, |
|
"loss": 1.2288, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.0603646039962769, |
|
"learning_rate": 9.027559055118112e-05, |
|
"loss": 1.2609, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.085644245147705, |
|
"learning_rate": 9.025486945710734e-05, |
|
"loss": 1.2392, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.0088363885879517, |
|
"learning_rate": 9.023414836303357e-05, |
|
"loss": 1.2361, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.0100585222244263, |
|
"learning_rate": 9.021342726895981e-05, |
|
"loss": 1.2382, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.067159652709961, |
|
"learning_rate": 9.019270617488604e-05, |
|
"loss": 1.2433, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.0431448221206665, |
|
"learning_rate": 9.017198508081226e-05, |
|
"loss": 1.2461, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.3945410251617432, |
|
"learning_rate": 9.01512639867385e-05, |
|
"loss": 1.2406, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.0631392002105713, |
|
"learning_rate": 9.013054289266474e-05, |
|
"loss": 1.2265, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.0823907852172852, |
|
"learning_rate": 9.010982179859097e-05, |
|
"loss": 1.2388, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.1125047206878662, |
|
"learning_rate": 9.00891007045172e-05, |
|
"loss": 1.2553, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.0436159372329712, |
|
"learning_rate": 9.006837961044343e-05, |
|
"loss": 1.2362, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.2972134351730347, |
|
"learning_rate": 9.004765851636968e-05, |
|
"loss": 1.2387, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.0587375164031982, |
|
"learning_rate": 9.00269374222959e-05, |
|
"loss": 1.2348, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.0204670429229736, |
|
"learning_rate": 9.000621632822213e-05, |
|
"loss": 1.2391, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.0182541608810425, |
|
"learning_rate": 8.998549523414837e-05, |
|
"loss": 1.2337, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.9534347057342529, |
|
"learning_rate": 8.99647741400746e-05, |
|
"loss": 1.2403, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.1534489393234253, |
|
"learning_rate": 8.994405304600084e-05, |
|
"loss": 1.251, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.0630913972854614, |
|
"learning_rate": 8.992333195192707e-05, |
|
"loss": 1.2741, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.0464857816696167, |
|
"learning_rate": 8.99026108578533e-05, |
|
"loss": 1.27, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.1541072130203247, |
|
"learning_rate": 8.988188976377954e-05, |
|
"loss": 1.2183, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.007450819015503, |
|
"learning_rate": 8.986116866970576e-05, |
|
"loss": 1.2345, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.984767496585846, |
|
"learning_rate": 8.9840447575632e-05, |
|
"loss": 1.2661, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.2972489595413208, |
|
"learning_rate": 8.981972648155824e-05, |
|
"loss": 1.2577, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.1882805824279785, |
|
"learning_rate": 8.979900538748446e-05, |
|
"loss": 1.2685, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.1580913066864014, |
|
"learning_rate": 8.977828429341069e-05, |
|
"loss": 1.2269, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.0898735523223877, |
|
"learning_rate": 8.975756319933693e-05, |
|
"loss": 1.2315, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.1261813640594482, |
|
"learning_rate": 8.973684210526316e-05, |
|
"loss": 1.2617, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.0266163349151611, |
|
"learning_rate": 8.97161210111894e-05, |
|
"loss": 1.2379, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.0973056554794312, |
|
"learning_rate": 8.969539991711563e-05, |
|
"loss": 1.2527, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.0927435159683228, |
|
"learning_rate": 8.967467882304185e-05, |
|
"loss": 1.2433, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.1209070682525635, |
|
"learning_rate": 8.96539577289681e-05, |
|
"loss": 1.2512, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.222163200378418, |
|
"learning_rate": 8.963323663489432e-05, |
|
"loss": 1.2377, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.1234538555145264, |
|
"learning_rate": 8.961251554082056e-05, |
|
"loss": 1.2409, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.1121318340301514, |
|
"learning_rate": 8.95917944467468e-05, |
|
"loss": 1.2402, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.0124129056930542, |
|
"learning_rate": 8.957107335267303e-05, |
|
"loss": 1.2609, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.0647163391113281, |
|
"learning_rate": 8.955035225859925e-05, |
|
"loss": 1.2144, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.0653977394104004, |
|
"learning_rate": 8.95296311645255e-05, |
|
"loss": 1.2337, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.0555377006530762, |
|
"learning_rate": 8.950891007045172e-05, |
|
"loss": 1.2121, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.0859911441802979, |
|
"learning_rate": 8.948818897637796e-05, |
|
"loss": 1.232, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.0167226791381836, |
|
"learning_rate": 8.946746788230419e-05, |
|
"loss": 1.2252, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 1.0780484676361084, |
|
"learning_rate": 8.944674678823041e-05, |
|
"loss": 1.2537, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.0674623250961304, |
|
"learning_rate": 8.942602569415666e-05, |
|
"loss": 1.2511, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.1867153644561768, |
|
"learning_rate": 8.940530460008288e-05, |
|
"loss": 1.2406, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.0617753267288208, |
|
"learning_rate": 8.938458350600912e-05, |
|
"loss": 1.2648, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.1268622875213623, |
|
"learning_rate": 8.936386241193535e-05, |
|
"loss": 1.2461, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.06646728515625, |
|
"learning_rate": 8.934314131786159e-05, |
|
"loss": 1.235, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.0061641931533813, |
|
"learning_rate": 8.932242022378781e-05, |
|
"loss": 1.2256, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.1114619970321655, |
|
"learning_rate": 8.930169912971406e-05, |
|
"loss": 1.2481, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.121626615524292, |
|
"learning_rate": 8.928097803564028e-05, |
|
"loss": 1.2592, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.0986332893371582, |
|
"learning_rate": 8.926025694156652e-05, |
|
"loss": 1.2397, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 1.055759072303772, |
|
"learning_rate": 8.923953584749275e-05, |
|
"loss": 1.2468, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.035019874572754, |
|
"learning_rate": 8.921881475341899e-05, |
|
"loss": 1.2455, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.0355908870697021, |
|
"learning_rate": 8.919809365934522e-05, |
|
"loss": 1.2584, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.0616044998168945, |
|
"learning_rate": 8.917737256527146e-05, |
|
"loss": 1.2472, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.0461037158966064, |
|
"learning_rate": 8.915665147119768e-05, |
|
"loss": 1.2531, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.2218214273452759, |
|
"learning_rate": 8.913593037712393e-05, |
|
"loss": 1.2486, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.0007187128067017, |
|
"learning_rate": 8.911520928305015e-05, |
|
"loss": 1.2464, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.056276798248291, |
|
"learning_rate": 8.909448818897638e-05, |
|
"loss": 1.2375, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.0595409870147705, |
|
"learning_rate": 8.907376709490262e-05, |
|
"loss": 1.2645, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.1598786115646362, |
|
"learning_rate": 8.905304600082884e-05, |
|
"loss": 1.2502, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.0664838552474976, |
|
"learning_rate": 8.903232490675508e-05, |
|
"loss": 1.2712, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.0016567707061768, |
|
"learning_rate": 8.901160381268131e-05, |
|
"loss": 1.2386, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.0359711647033691, |
|
"learning_rate": 8.899088271860755e-05, |
|
"loss": 1.238, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.1633154153823853, |
|
"learning_rate": 8.897016162453378e-05, |
|
"loss": 1.2139, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.085390567779541, |
|
"learning_rate": 8.894944053046002e-05, |
|
"loss": 1.2564, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.041222333908081, |
|
"learning_rate": 8.892871943638624e-05, |
|
"loss": 1.2361, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.0352637767791748, |
|
"learning_rate": 8.890799834231249e-05, |
|
"loss": 1.2104, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.0879154205322266, |
|
"learning_rate": 8.888727724823871e-05, |
|
"loss": 1.2408, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.177937388420105, |
|
"learning_rate": 8.886655615416494e-05, |
|
"loss": 1.221, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.0638147592544556, |
|
"learning_rate": 8.884583506009118e-05, |
|
"loss": 1.2309, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.0461276769638062, |
|
"learning_rate": 8.882511396601741e-05, |
|
"loss": 1.2498, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.9356592297554016, |
|
"learning_rate": 8.880439287194365e-05, |
|
"loss": 1.2584, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.9423561096191406, |
|
"learning_rate": 8.878367177786988e-05, |
|
"loss": 1.2094, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.1970158815383911, |
|
"learning_rate": 8.87629506837961e-05, |
|
"loss": 1.2134, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.1081819534301758, |
|
"learning_rate": 8.874222958972234e-05, |
|
"loss": 1.2134, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.1248120069503784, |
|
"learning_rate": 8.872150849564858e-05, |
|
"loss": 1.2288, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.0750600099563599, |
|
"learning_rate": 8.87007874015748e-05, |
|
"loss": 1.2191, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.058366060256958, |
|
"learning_rate": 8.868006630750105e-05, |
|
"loss": 1.2274, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.9436173439025879, |
|
"learning_rate": 8.865934521342727e-05, |
|
"loss": 1.2556, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.0624874830245972, |
|
"learning_rate": 8.86386241193535e-05, |
|
"loss": 1.2383, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.0870168209075928, |
|
"learning_rate": 8.861790302527974e-05, |
|
"loss": 1.2432, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.0561186075210571, |
|
"learning_rate": 8.859718193120597e-05, |
|
"loss": 1.2329, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.2139157056808472, |
|
"learning_rate": 8.857646083713221e-05, |
|
"loss": 1.2183, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.0662713050842285, |
|
"learning_rate": 8.855573974305844e-05, |
|
"loss": 1.2371, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.1198335886001587, |
|
"learning_rate": 8.853501864898466e-05, |
|
"loss": 1.2396, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.2191115617752075, |
|
"learning_rate": 8.851429755491091e-05, |
|
"loss": 1.2355, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.039201259613037, |
|
"learning_rate": 8.849357646083714e-05, |
|
"loss": 1.2251, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.9540196061134338, |
|
"learning_rate": 8.847285536676337e-05, |
|
"loss": 1.2424, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.0640240907669067, |
|
"learning_rate": 8.84521342726896e-05, |
|
"loss": 1.2414, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.0465424060821533, |
|
"learning_rate": 8.843141317861584e-05, |
|
"loss": 1.2241, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.1881465911865234, |
|
"learning_rate": 8.841069208454206e-05, |
|
"loss": 1.2182, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.0851510763168335, |
|
"learning_rate": 8.83899709904683e-05, |
|
"loss": 1.2115, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.0743972063064575, |
|
"learning_rate": 8.836924989639453e-05, |
|
"loss": 1.2211, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.049249529838562, |
|
"learning_rate": 8.834852880232077e-05, |
|
"loss": 1.2452, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.0910190343856812, |
|
"learning_rate": 8.8327807708247e-05, |
|
"loss": 1.249, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.0976841449737549, |
|
"learning_rate": 8.830708661417322e-05, |
|
"loss": 1.2123, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.3569914102554321, |
|
"learning_rate": 8.828636552009947e-05, |
|
"loss": 1.2338, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.068427324295044, |
|
"learning_rate": 8.82656444260257e-05, |
|
"loss": 1.2351, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.1309690475463867, |
|
"learning_rate": 8.824492333195193e-05, |
|
"loss": 1.2367, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.1933242082595825, |
|
"learning_rate": 8.822420223787816e-05, |
|
"loss": 1.2261, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.029557466506958, |
|
"learning_rate": 8.82034811438044e-05, |
|
"loss": 1.2499, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.106086015701294, |
|
"learning_rate": 8.818276004973062e-05, |
|
"loss": 1.2236, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.0114145278930664, |
|
"learning_rate": 8.816203895565687e-05, |
|
"loss": 1.2445, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.1829569339752197, |
|
"learning_rate": 8.814131786158309e-05, |
|
"loss": 1.2334, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.1952738761901855, |
|
"learning_rate": 8.812059676750933e-05, |
|
"loss": 1.2402, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.0816442966461182, |
|
"learning_rate": 8.809987567343556e-05, |
|
"loss": 1.252, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.1453193426132202, |
|
"learning_rate": 8.80791545793618e-05, |
|
"loss": 1.233, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.045602798461914, |
|
"learning_rate": 8.805843348528803e-05, |
|
"loss": 1.2195, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.0934393405914307, |
|
"learning_rate": 8.803771239121425e-05, |
|
"loss": 1.2177, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.0120600461959839, |
|
"learning_rate": 8.801699129714049e-05, |
|
"loss": 1.2329, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.103251576423645, |
|
"learning_rate": 8.799627020306672e-05, |
|
"loss": 1.2379, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.0402165651321411, |
|
"learning_rate": 8.797554910899296e-05, |
|
"loss": 1.2256, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.0983202457427979, |
|
"learning_rate": 8.79548280149192e-05, |
|
"loss": 1.2296, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.0412105321884155, |
|
"learning_rate": 8.793410692084543e-05, |
|
"loss": 1.2228, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.0473746061325073, |
|
"learning_rate": 8.791338582677165e-05, |
|
"loss": 1.2228, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.004840612411499, |
|
"learning_rate": 8.78926647326979e-05, |
|
"loss": 1.2108, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.1168406009674072, |
|
"learning_rate": 8.787194363862412e-05, |
|
"loss": 1.2104, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.1124876737594604, |
|
"learning_rate": 8.785122254455036e-05, |
|
"loss": 1.2336, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.059415340423584, |
|
"learning_rate": 8.783050145047659e-05, |
|
"loss": 1.2438, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.063344955444336, |
|
"learning_rate": 8.780978035640283e-05, |
|
"loss": 1.2352, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.0552620887756348, |
|
"learning_rate": 8.778905926232905e-05, |
|
"loss": 1.2131, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.1673498153686523, |
|
"learning_rate": 8.77683381682553e-05, |
|
"loss": 1.213, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.1740206480026245, |
|
"learning_rate": 8.774761707418152e-05, |
|
"loss": 1.2162, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.0944995880126953, |
|
"learning_rate": 8.772689598010775e-05, |
|
"loss": 1.2005, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.0152305364608765, |
|
"learning_rate": 8.770617488603399e-05, |
|
"loss": 1.2198, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.1265654563903809, |
|
"learning_rate": 8.768545379196021e-05, |
|
"loss": 1.2362, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.151207447052002, |
|
"learning_rate": 8.766473269788646e-05, |
|
"loss": 1.2561, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.035855770111084, |
|
"learning_rate": 8.764401160381268e-05, |
|
"loss": 1.2391, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.091009497642517, |
|
"learning_rate": 8.762329050973892e-05, |
|
"loss": 1.2342, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.057400107383728, |
|
"learning_rate": 8.760256941566515e-05, |
|
"loss": 1.2216, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.9303261041641235, |
|
"learning_rate": 8.758184832159139e-05, |
|
"loss": 1.2352, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.2061206102371216, |
|
"learning_rate": 8.756112722751761e-05, |
|
"loss": 1.215, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.0886818170547485, |
|
"learning_rate": 8.754040613344386e-05, |
|
"loss": 1.2266, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.279563546180725, |
|
"learning_rate": 8.751968503937008e-05, |
|
"loss": 1.237, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.0063716173171997, |
|
"learning_rate": 8.749896394529631e-05, |
|
"loss": 1.2119, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.0414812564849854, |
|
"learning_rate": 8.747824285122255e-05, |
|
"loss": 1.2177, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.0915932655334473, |
|
"learning_rate": 8.745752175714878e-05, |
|
"loss": 1.2422, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.0544465780258179, |
|
"learning_rate": 8.743680066307502e-05, |
|
"loss": 1.234, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.1502125263214111, |
|
"learning_rate": 8.741607956900125e-05, |
|
"loss": 1.2183, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.3113386631011963, |
|
"learning_rate": 8.739535847492748e-05, |
|
"loss": 1.202, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.087583303451538, |
|
"learning_rate": 8.737463738085371e-05, |
|
"loss": 1.241, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.135565161705017, |
|
"learning_rate": 8.735391628677995e-05, |
|
"loss": 1.2411, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 1.0986896753311157, |
|
"learning_rate": 8.733319519270617e-05, |
|
"loss": 1.2178, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 1.6357513666152954, |
|
"learning_rate": 8.731247409863242e-05, |
|
"loss": 1.2102, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 1.0731899738311768, |
|
"learning_rate": 8.729175300455864e-05, |
|
"loss": 1.213, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 1.1432324647903442, |
|
"learning_rate": 8.727103191048487e-05, |
|
"loss": 1.2395, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 1.0547071695327759, |
|
"learning_rate": 8.725031081641111e-05, |
|
"loss": 1.2121, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 1.1022225618362427, |
|
"learning_rate": 8.722958972233734e-05, |
|
"loss": 1.2274, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.0772980451583862, |
|
"learning_rate": 8.720886862826358e-05, |
|
"loss": 1.2244, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.073470115661621, |
|
"learning_rate": 8.718814753418981e-05, |
|
"loss": 1.2243, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.1750410795211792, |
|
"learning_rate": 8.716742644011604e-05, |
|
"loss": 1.2191, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.206298828125, |
|
"learning_rate": 8.714670534604228e-05, |
|
"loss": 1.2284, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.1253398656845093, |
|
"learning_rate": 8.71259842519685e-05, |
|
"loss": 1.2222, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.1828629970550537, |
|
"learning_rate": 8.710526315789474e-05, |
|
"loss": 1.225, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.0462387800216675, |
|
"learning_rate": 8.708454206382098e-05, |
|
"loss": 1.2156, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.3468637466430664, |
|
"learning_rate": 8.706382096974721e-05, |
|
"loss": 1.2111, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.1359398365020752, |
|
"learning_rate": 8.704309987567345e-05, |
|
"loss": 1.2022, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.0748237371444702, |
|
"learning_rate": 8.702237878159967e-05, |
|
"loss": 1.2227, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.0240176916122437, |
|
"learning_rate": 8.70016576875259e-05, |
|
"loss": 1.2042, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.0575100183486938, |
|
"learning_rate": 8.698093659345214e-05, |
|
"loss": 1.2166, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.0095828771591187, |
|
"learning_rate": 8.696021549937837e-05, |
|
"loss": 1.2352, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.140643835067749, |
|
"learning_rate": 8.69394944053046e-05, |
|
"loss": 1.2143, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.0403310060501099, |
|
"learning_rate": 8.691877331123084e-05, |
|
"loss": 1.2268, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.1176542043685913, |
|
"learning_rate": 8.689805221715706e-05, |
|
"loss": 1.2123, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.041100025177002, |
|
"learning_rate": 8.68773311230833e-05, |
|
"loss": 1.2356, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.9852685928344727, |
|
"learning_rate": 8.685661002900954e-05, |
|
"loss": 1.2043, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.206588864326477, |
|
"learning_rate": 8.683588893493577e-05, |
|
"loss": 1.2132, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.1841477155685425, |
|
"learning_rate": 8.6815167840862e-05, |
|
"loss": 1.241, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.1257410049438477, |
|
"learning_rate": 8.679444674678824e-05, |
|
"loss": 1.2241, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.053404688835144, |
|
"learning_rate": 8.677372565271446e-05, |
|
"loss": 1.2409, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.109271764755249, |
|
"learning_rate": 8.675300455864071e-05, |
|
"loss": 1.2036, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 1.0103741884231567, |
|
"learning_rate": 8.673228346456693e-05, |
|
"loss": 1.2128, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.149715542793274, |
|
"learning_rate": 8.671156237049317e-05, |
|
"loss": 1.208, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.0523351430892944, |
|
"learning_rate": 8.66908412764194e-05, |
|
"loss": 1.2146, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.2250432968139648, |
|
"learning_rate": 8.667012018234562e-05, |
|
"loss": 1.2454, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.9975298643112183, |
|
"learning_rate": 8.664939908827186e-05, |
|
"loss": 1.228, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.0323069095611572, |
|
"learning_rate": 8.66286779941981e-05, |
|
"loss": 1.2223, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.0941072702407837, |
|
"learning_rate": 8.660795690012433e-05, |
|
"loss": 1.2214, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.2442706823349, |
|
"learning_rate": 8.658723580605056e-05, |
|
"loss": 1.2316, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.1723049879074097, |
|
"learning_rate": 8.65665147119768e-05, |
|
"loss": 1.2184, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.1901644468307495, |
|
"learning_rate": 8.654579361790302e-05, |
|
"loss": 1.2382, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.1003718376159668, |
|
"learning_rate": 8.652507252382927e-05, |
|
"loss": 1.2272, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.1174200773239136, |
|
"learning_rate": 8.650435142975549e-05, |
|
"loss": 1.2248, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.2489265203475952, |
|
"learning_rate": 8.648363033568173e-05, |
|
"loss": 1.2252, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.2472732067108154, |
|
"learning_rate": 8.646290924160796e-05, |
|
"loss": 1.227, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.0011487007141113, |
|
"learning_rate": 8.64421881475342e-05, |
|
"loss": 1.2267, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.060117244720459, |
|
"learning_rate": 8.642146705346042e-05, |
|
"loss": 1.2263, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.102454423904419, |
|
"learning_rate": 8.640074595938667e-05, |
|
"loss": 1.2205, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.0496042966842651, |
|
"learning_rate": 8.638002486531289e-05, |
|
"loss": 1.2207, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.005552053451538, |
|
"learning_rate": 8.635930377123912e-05, |
|
"loss": 1.1992, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.0828580856323242, |
|
"learning_rate": 8.633858267716536e-05, |
|
"loss": 1.2164, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.0597617626190186, |
|
"learning_rate": 8.631786158309158e-05, |
|
"loss": 1.2081, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.1591441631317139, |
|
"learning_rate": 8.629714048901783e-05, |
|
"loss": 1.2315, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.1969908475875854, |
|
"learning_rate": 8.627641939494405e-05, |
|
"loss": 1.2307, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.1163939237594604, |
|
"learning_rate": 8.625569830087029e-05, |
|
"loss": 1.2272, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 1.047241449356079, |
|
"learning_rate": 8.623497720679652e-05, |
|
"loss": 1.1991, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.0410964488983154, |
|
"learning_rate": 8.621425611272276e-05, |
|
"loss": 1.211, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.0674628019332886, |
|
"learning_rate": 8.619353501864899e-05, |
|
"loss": 1.2195, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.9892363548278809, |
|
"learning_rate": 8.617281392457523e-05, |
|
"loss": 1.2344, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.08130943775177, |
|
"learning_rate": 8.615209283050145e-05, |
|
"loss": 1.2299, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.0285786390304565, |
|
"learning_rate": 8.613137173642768e-05, |
|
"loss": 1.2166, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.0203038454055786, |
|
"learning_rate": 8.611065064235392e-05, |
|
"loss": 1.2043, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.17252516746521, |
|
"learning_rate": 8.608992954828015e-05, |
|
"loss": 1.2278, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.233001708984375, |
|
"learning_rate": 8.606920845420639e-05, |
|
"loss": 1.233, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.0858713388442993, |
|
"learning_rate": 8.604848736013262e-05, |
|
"loss": 1.2174, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.069405198097229, |
|
"learning_rate": 8.602776626605885e-05, |
|
"loss": 1.2176, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.0860111713409424, |
|
"learning_rate": 8.60070451719851e-05, |
|
"loss": 1.2179, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.111624836921692, |
|
"learning_rate": 8.598632407791132e-05, |
|
"loss": 1.2167, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.092925786972046, |
|
"learning_rate": 8.596560298383755e-05, |
|
"loss": 1.2154, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.115460753440857, |
|
"learning_rate": 8.594488188976379e-05, |
|
"loss": 1.2052, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.229777455329895, |
|
"learning_rate": 8.592416079569001e-05, |
|
"loss": 1.1769, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.1182063817977905, |
|
"learning_rate": 8.590343970161626e-05, |
|
"loss": 1.2035, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.0315207242965698, |
|
"learning_rate": 8.588271860754248e-05, |
|
"loss": 1.2158, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.1051239967346191, |
|
"learning_rate": 8.586199751346871e-05, |
|
"loss": 1.1874, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.0437036752700806, |
|
"learning_rate": 8.584127641939495e-05, |
|
"loss": 1.2421, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.1060231924057007, |
|
"learning_rate": 8.582055532532118e-05, |
|
"loss": 1.2392, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.0951759815216064, |
|
"learning_rate": 8.57998342312474e-05, |
|
"loss": 1.2348, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.7351630926132202, |
|
"learning_rate": 8.577911313717365e-05, |
|
"loss": 1.2198, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.3410096168518066, |
|
"learning_rate": 8.575839204309988e-05, |
|
"loss": 1.2069, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.1723905801773071, |
|
"learning_rate": 8.573767094902611e-05, |
|
"loss": 1.2264, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.0400230884552002, |
|
"learning_rate": 8.571694985495235e-05, |
|
"loss": 1.2285, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.1555556058883667, |
|
"learning_rate": 8.569622876087858e-05, |
|
"loss": 1.1982, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.0779943466186523, |
|
"learning_rate": 8.567550766680482e-05, |
|
"loss": 1.2241, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.011435627937317, |
|
"learning_rate": 8.565478657273105e-05, |
|
"loss": 1.1919, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.1413905620574951, |
|
"learning_rate": 8.563406547865727e-05, |
|
"loss": 1.2026, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.305766224861145, |
|
"learning_rate": 8.561334438458351e-05, |
|
"loss": 1.2243, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.1241309642791748, |
|
"learning_rate": 8.559262329050974e-05, |
|
"loss": 1.2305, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.1354045867919922, |
|
"learning_rate": 8.557190219643596e-05, |
|
"loss": 1.2203, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.0294325351715088, |
|
"learning_rate": 8.555118110236221e-05, |
|
"loss": 1.2356, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.1068978309631348, |
|
"learning_rate": 8.553046000828844e-05, |
|
"loss": 1.2191, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.0707268714904785, |
|
"learning_rate": 8.550973891421467e-05, |
|
"loss": 1.2168, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.158420443534851, |
|
"learning_rate": 8.54890178201409e-05, |
|
"loss": 1.1924, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.018847107887268, |
|
"learning_rate": 8.546829672606714e-05, |
|
"loss": 1.2326, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.1226823329925537, |
|
"learning_rate": 8.544757563199338e-05, |
|
"loss": 1.2029, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.1094051599502563, |
|
"learning_rate": 8.542685453791961e-05, |
|
"loss": 1.2005, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.1425297260284424, |
|
"learning_rate": 8.540613344384583e-05, |
|
"loss": 1.2109, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.0846836566925049, |
|
"learning_rate": 8.538541234977208e-05, |
|
"loss": 1.2098, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.0861196517944336, |
|
"learning_rate": 8.53646912556983e-05, |
|
"loss": 1.2201, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.0006380081176758, |
|
"learning_rate": 8.534397016162454e-05, |
|
"loss": 1.2457, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.0799134969711304, |
|
"learning_rate": 8.532324906755077e-05, |
|
"loss": 1.2117, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.1802451610565186, |
|
"learning_rate": 8.530252797347701e-05, |
|
"loss": 1.2277, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.0235154628753662, |
|
"learning_rate": 8.528180687940323e-05, |
|
"loss": 1.204, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.1273279190063477, |
|
"learning_rate": 8.526108578532946e-05, |
|
"loss": 1.2179, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.9829218983650208, |
|
"learning_rate": 8.52403646912557e-05, |
|
"loss": 1.1913, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.1805267333984375, |
|
"learning_rate": 8.521964359718194e-05, |
|
"loss": 1.1822, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.026557207107544, |
|
"learning_rate": 8.519892250310817e-05, |
|
"loss": 1.1912, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.04379141330719, |
|
"learning_rate": 8.517820140903439e-05, |
|
"loss": 1.2093, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.0553088188171387, |
|
"learning_rate": 8.515748031496064e-05, |
|
"loss": 1.2213, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.8822417855262756, |
|
"learning_rate": 8.513675922088686e-05, |
|
"loss": 1.2223, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.9738419055938721, |
|
"learning_rate": 8.51160381268131e-05, |
|
"loss": 1.2011, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.0899471044540405, |
|
"learning_rate": 8.509738914214672e-05, |
|
"loss": 1.2406, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.1138182878494263, |
|
"learning_rate": 8.507666804807294e-05, |
|
"loss": 1.1963, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 1.1311627626419067, |
|
"learning_rate": 8.505594695399917e-05, |
|
"loss": 1.1977, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 2.138723134994507, |
|
"learning_rate": 8.503522585992541e-05, |
|
"loss": 1.2104, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 1.0873496532440186, |
|
"learning_rate": 8.501450476585164e-05, |
|
"loss": 1.2113, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.9628106355667114, |
|
"learning_rate": 8.499378367177787e-05, |
|
"loss": 1.2258, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 1.1409735679626465, |
|
"learning_rate": 8.497306257770411e-05, |
|
"loss": 1.1964, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 1.111512541770935, |
|
"learning_rate": 8.495234148363034e-05, |
|
"loss": 1.2061, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 1.0905687808990479, |
|
"learning_rate": 8.493162038955657e-05, |
|
"loss": 1.2025, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.1417665481567383, |
|
"learning_rate": 8.49108992954828e-05, |
|
"loss": 1.1861, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.024594783782959, |
|
"learning_rate": 8.489017820140904e-05, |
|
"loss": 1.1893, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.112746238708496, |
|
"learning_rate": 8.486945710733528e-05, |
|
"loss": 1.2196, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.1404857635498047, |
|
"learning_rate": 8.484873601326151e-05, |
|
"loss": 1.2131, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.1401077508926392, |
|
"learning_rate": 8.482801491918773e-05, |
|
"loss": 1.2107, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.2235480546951294, |
|
"learning_rate": 8.480729382511397e-05, |
|
"loss": 1.2363, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.0837748050689697, |
|
"learning_rate": 8.47865727310402e-05, |
|
"loss": 1.2139, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.22895348072052, |
|
"learning_rate": 8.476585163696643e-05, |
|
"loss": 1.1998, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.105093240737915, |
|
"learning_rate": 8.474513054289267e-05, |
|
"loss": 1.2212, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.222209095954895, |
|
"learning_rate": 8.47244094488189e-05, |
|
"loss": 1.2453, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.1390637159347534, |
|
"learning_rate": 8.470368835474513e-05, |
|
"loss": 1.2071, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.0356426239013672, |
|
"learning_rate": 8.468296726067137e-05, |
|
"loss": 1.2157, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.0771561861038208, |
|
"learning_rate": 8.46622461665976e-05, |
|
"loss": 1.212, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.3607767820358276, |
|
"learning_rate": 8.464152507252384e-05, |
|
"loss": 1.2253, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 1.0732100009918213, |
|
"learning_rate": 8.462080397845007e-05, |
|
"loss": 1.1938, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.0551025867462158, |
|
"learning_rate": 8.460008288437629e-05, |
|
"loss": 1.2163, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.127244472503662, |
|
"learning_rate": 8.457936179030254e-05, |
|
"loss": 1.1956, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.0608160495758057, |
|
"learning_rate": 8.455864069622876e-05, |
|
"loss": 1.191, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.0632834434509277, |
|
"learning_rate": 8.4537919602155e-05, |
|
"loss": 1.1971, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.1187163591384888, |
|
"learning_rate": 8.451719850808123e-05, |
|
"loss": 1.2156, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.135420799255371, |
|
"learning_rate": 8.449647741400747e-05, |
|
"loss": 1.1953, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.1564319133758545, |
|
"learning_rate": 8.447575631993369e-05, |
|
"loss": 1.2169, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.1321407556533813, |
|
"learning_rate": 8.445503522585993e-05, |
|
"loss": 1.1869, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 1.097676157951355, |
|
"learning_rate": 8.443431413178616e-05, |
|
"loss": 1.2039, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.9873398542404175, |
|
"learning_rate": 8.44135930377124e-05, |
|
"loss": 1.2115, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.0774983167648315, |
|
"learning_rate": 8.439287194363863e-05, |
|
"loss": 1.2089, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.199475884437561, |
|
"learning_rate": 8.437215084956485e-05, |
|
"loss": 1.1913, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.2517530918121338, |
|
"learning_rate": 8.43514297554911e-05, |
|
"loss": 1.24, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.1117113828659058, |
|
"learning_rate": 8.433070866141732e-05, |
|
"loss": 1.1907, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.1518152952194214, |
|
"learning_rate": 8.430998756734356e-05, |
|
"loss": 1.2194, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.0633752346038818, |
|
"learning_rate": 8.428926647326979e-05, |
|
"loss": 1.2173, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.1065930128097534, |
|
"learning_rate": 8.426854537919603e-05, |
|
"loss": 1.1803, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.03267240524292, |
|
"learning_rate": 8.424782428512226e-05, |
|
"loss": 1.2178, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.9949610233306885, |
|
"learning_rate": 8.42271031910485e-05, |
|
"loss": 1.1999, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.0859277248382568, |
|
"learning_rate": 8.420638209697472e-05, |
|
"loss": 1.2056, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.0535070896148682, |
|
"learning_rate": 8.418566100290096e-05, |
|
"loss": 1.2004, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.1210662126541138, |
|
"learning_rate": 8.416493990882719e-05, |
|
"loss": 1.2223, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.0601907968521118, |
|
"learning_rate": 8.414421881475343e-05, |
|
"loss": 1.1985, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.0329095125198364, |
|
"learning_rate": 8.412349772067966e-05, |
|
"loss": 1.202, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.1331712007522583, |
|
"learning_rate": 8.410277662660588e-05, |
|
"loss": 1.1923, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.1752616167068481, |
|
"learning_rate": 8.408205553253212e-05, |
|
"loss": 1.195, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.1065553426742554, |
|
"learning_rate": 8.406133443845835e-05, |
|
"loss": 1.2173, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.1917232275009155, |
|
"learning_rate": 8.404061334438459e-05, |
|
"loss": 1.1926, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.1937179565429688, |
|
"learning_rate": 8.401989225031082e-05, |
|
"loss": 1.18, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.1231236457824707, |
|
"learning_rate": 8.399917115623706e-05, |
|
"loss": 1.2006, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.2102551460266113, |
|
"learning_rate": 8.397845006216328e-05, |
|
"loss": 1.209, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.0514934062957764, |
|
"learning_rate": 8.395772896808953e-05, |
|
"loss": 1.1959, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.0771079063415527, |
|
"learning_rate": 8.393700787401575e-05, |
|
"loss": 1.2303, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.1473889350891113, |
|
"learning_rate": 8.391628677994198e-05, |
|
"loss": 1.189, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.101943850517273, |
|
"learning_rate": 8.389556568586822e-05, |
|
"loss": 1.1792, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.217653751373291, |
|
"learning_rate": 8.387484459179446e-05, |
|
"loss": 1.2032, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.1558287143707275, |
|
"learning_rate": 8.385412349772068e-05, |
|
"loss": 1.2048, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.1772674322128296, |
|
"learning_rate": 8.383340240364693e-05, |
|
"loss": 1.208, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.1206742525100708, |
|
"learning_rate": 8.381268130957315e-05, |
|
"loss": 1.2158, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.0629864931106567, |
|
"learning_rate": 8.379196021549938e-05, |
|
"loss": 1.1989, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.0601340532302856, |
|
"learning_rate": 8.377123912142562e-05, |
|
"loss": 1.2149, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.2142128944396973, |
|
"learning_rate": 8.375051802735184e-05, |
|
"loss": 1.2045, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.015764832496643, |
|
"learning_rate": 8.372979693327809e-05, |
|
"loss": 1.2177, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.1525570154190063, |
|
"learning_rate": 8.370907583920431e-05, |
|
"loss": 1.2203, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.0959409475326538, |
|
"learning_rate": 8.368835474513054e-05, |
|
"loss": 1.2022, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.073423981666565, |
|
"learning_rate": 8.366763365105678e-05, |
|
"loss": 1.2225, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.1058803796768188, |
|
"learning_rate": 8.364691255698301e-05, |
|
"loss": 1.202, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.067132592201233, |
|
"learning_rate": 8.362619146290924e-05, |
|
"loss": 1.2215, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.1303818225860596, |
|
"learning_rate": 8.360547036883548e-05, |
|
"loss": 1.2049, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.1500287055969238, |
|
"learning_rate": 8.35847492747617e-05, |
|
"loss": 1.2241, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.9954307675361633, |
|
"learning_rate": 8.356402818068794e-05, |
|
"loss": 1.1806, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.0905861854553223, |
|
"learning_rate": 8.354330708661418e-05, |
|
"loss": 1.2108, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.1111913919448853, |
|
"learning_rate": 8.352258599254041e-05, |
|
"loss": 1.2026, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.9808979034423828, |
|
"learning_rate": 8.350186489846665e-05, |
|
"loss": 1.2101, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.127181053161621, |
|
"learning_rate": 8.348114380439288e-05, |
|
"loss": 1.1955, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.0933669805526733, |
|
"learning_rate": 8.34604227103191e-05, |
|
"loss": 1.1904, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.2010207176208496, |
|
"learning_rate": 8.343970161624535e-05, |
|
"loss": 1.2018, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.019642949104309, |
|
"learning_rate": 8.341898052217157e-05, |
|
"loss": 1.221, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.115064024925232, |
|
"learning_rate": 8.339825942809781e-05, |
|
"loss": 1.193, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.008520245552063, |
|
"learning_rate": 8.337753833402404e-05, |
|
"loss": 1.222, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.0627065896987915, |
|
"learning_rate": 8.335681723995027e-05, |
|
"loss": 1.1968, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.2253535985946655, |
|
"learning_rate": 8.33360961458765e-05, |
|
"loss": 1.2135, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.0848592519760132, |
|
"learning_rate": 8.331537505180274e-05, |
|
"loss": 1.2087, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.1441541910171509, |
|
"learning_rate": 8.329465395772897e-05, |
|
"loss": 1.1789, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.1108758449554443, |
|
"learning_rate": 8.32739328636552e-05, |
|
"loss": 1.1778, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.1133983135223389, |
|
"learning_rate": 8.325321176958144e-05, |
|
"loss": 1.2062, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.1412779092788696, |
|
"learning_rate": 8.323249067550766e-05, |
|
"loss": 1.2129, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.0338503122329712, |
|
"learning_rate": 8.321176958143391e-05, |
|
"loss": 1.1727, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.1191462278366089, |
|
"learning_rate": 8.319104848736013e-05, |
|
"loss": 1.1997, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.0909239053726196, |
|
"learning_rate": 8.317032739328637e-05, |
|
"loss": 1.2144, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.1151865720748901, |
|
"learning_rate": 8.31496062992126e-05, |
|
"loss": 1.2033, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.143604040145874, |
|
"learning_rate": 8.312888520513884e-05, |
|
"loss": 1.2088, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.2317973375320435, |
|
"learning_rate": 8.310816411106507e-05, |
|
"loss": 1.2179, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.1043517589569092, |
|
"learning_rate": 8.308744301699131e-05, |
|
"loss": 1.1913, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.149396300315857, |
|
"learning_rate": 8.306672192291753e-05, |
|
"loss": 1.2066, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.0468456745147705, |
|
"learning_rate": 8.304600082884377e-05, |
|
"loss": 1.1823, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.0730814933776855, |
|
"learning_rate": 8.302527973477e-05, |
|
"loss": 1.2053, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.2069722414016724, |
|
"learning_rate": 8.300455864069622e-05, |
|
"loss": 1.1999, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.0964728593826294, |
|
"learning_rate": 8.298383754662247e-05, |
|
"loss": 1.1927, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.0547986030578613, |
|
"learning_rate": 8.296311645254869e-05, |
|
"loss": 1.2198, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.9480800628662109, |
|
"learning_rate": 8.294239535847493e-05, |
|
"loss": 1.1673, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.042935848236084, |
|
"learning_rate": 8.292167426440116e-05, |
|
"loss": 1.211, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.1812864542007446, |
|
"learning_rate": 8.29009531703274e-05, |
|
"loss": 1.1775, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.0850603580474854, |
|
"learning_rate": 8.288023207625363e-05, |
|
"loss": 1.2023, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.0494657754898071, |
|
"learning_rate": 8.285951098217987e-05, |
|
"loss": 1.1897, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.2138115167617798, |
|
"learning_rate": 8.283878988810609e-05, |
|
"loss": 1.1691, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.0943918228149414, |
|
"learning_rate": 8.281806879403234e-05, |
|
"loss": 1.1972, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0463635921478271, |
|
"learning_rate": 8.279734769995856e-05, |
|
"loss": 1.2312, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.1013418436050415, |
|
"learning_rate": 8.27766266058848e-05, |
|
"loss": 1.2311, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.9996619820594788, |
|
"learning_rate": 8.275590551181103e-05, |
|
"loss": 1.2129, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0416114330291748, |
|
"learning_rate": 8.273518441773727e-05, |
|
"loss": 1.1822, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0862529277801514, |
|
"learning_rate": 8.271446332366349e-05, |
|
"loss": 1.2145, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.1965521574020386, |
|
"learning_rate": 8.269374222958972e-05, |
|
"loss": 1.1969, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.1601965427398682, |
|
"learning_rate": 8.267302113551596e-05, |
|
"loss": 1.2112, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.0845617055892944, |
|
"learning_rate": 8.265230004144219e-05, |
|
"loss": 1.1885, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.0907461643218994, |
|
"learning_rate": 8.263157894736843e-05, |
|
"loss": 1.2091, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.027777075767517, |
|
"learning_rate": 8.261085785329465e-05, |
|
"loss": 1.1919, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.2468430995941162, |
|
"learning_rate": 8.25901367592209e-05, |
|
"loss": 1.1948, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.1889891624450684, |
|
"learning_rate": 8.256941566514712e-05, |
|
"loss": 1.2143, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.2794381380081177, |
|
"learning_rate": 8.254869457107336e-05, |
|
"loss": 1.2132, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.0493546724319458, |
|
"learning_rate": 8.252797347699959e-05, |
|
"loss": 1.1766, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.001658320426941, |
|
"learning_rate": 8.250725238292583e-05, |
|
"loss": 1.1715, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.06058669090271, |
|
"learning_rate": 8.248653128885205e-05, |
|
"loss": 1.1793, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.09765625, |
|
"learning_rate": 8.24658101947783e-05, |
|
"loss": 1.1954, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.0230962038040161, |
|
"learning_rate": 8.244508910070452e-05, |
|
"loss": 1.2158, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.028173804283142, |
|
"learning_rate": 8.242436800663075e-05, |
|
"loss": 1.1874, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.0620988607406616, |
|
"learning_rate": 8.240364691255699e-05, |
|
"loss": 1.2101, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.0968023538589478, |
|
"learning_rate": 8.238292581848322e-05, |
|
"loss": 1.2131, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.1402392387390137, |
|
"learning_rate": 8.236220472440946e-05, |
|
"loss": 1.2173, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.1263937950134277, |
|
"learning_rate": 8.234148363033568e-05, |
|
"loss": 1.1796, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.1383157968521118, |
|
"learning_rate": 8.232076253626191e-05, |
|
"loss": 1.1724, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.255072832107544, |
|
"learning_rate": 8.230004144218815e-05, |
|
"loss": 1.2019, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.4536107778549194, |
|
"learning_rate": 8.227932034811438e-05, |
|
"loss": 1.189, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.0804840326309204, |
|
"learning_rate": 8.225859925404062e-05, |
|
"loss": 1.2009, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.1545298099517822, |
|
"learning_rate": 8.223787815996686e-05, |
|
"loss": 1.2047, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 1.2308061122894287, |
|
"learning_rate": 8.221715706589308e-05, |
|
"loss": 1.1958, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.2053345441818237, |
|
"learning_rate": 8.219643597181933e-05, |
|
"loss": 1.183, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.2016477584838867, |
|
"learning_rate": 8.217571487774555e-05, |
|
"loss": 1.1831, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.2991312742233276, |
|
"learning_rate": 8.215499378367178e-05, |
|
"loss": 1.1893, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.0688191652297974, |
|
"learning_rate": 8.213427268959802e-05, |
|
"loss": 1.1962, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.0042345523834229, |
|
"learning_rate": 8.211355159552425e-05, |
|
"loss": 1.1935, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.1283092498779297, |
|
"learning_rate": 8.209283050145047e-05, |
|
"loss": 1.1983, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.1149276494979858, |
|
"learning_rate": 8.207210940737672e-05, |
|
"loss": 1.1967, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.0818403959274292, |
|
"learning_rate": 8.205138831330294e-05, |
|
"loss": 1.187, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.1263095140457153, |
|
"learning_rate": 8.203066721922918e-05, |
|
"loss": 1.1866, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.0879017114639282, |
|
"learning_rate": 8.200994612515541e-05, |
|
"loss": 1.2018, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.1448615789413452, |
|
"learning_rate": 8.198922503108164e-05, |
|
"loss": 1.1879, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.9900506138801575, |
|
"learning_rate": 8.196850393700788e-05, |
|
"loss": 1.1985, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.0438898801803589, |
|
"learning_rate": 8.19477828429341e-05, |
|
"loss": 1.1881, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.1896075010299683, |
|
"learning_rate": 8.192706174886034e-05, |
|
"loss": 1.2022, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.152300238609314, |
|
"learning_rate": 8.190634065478658e-05, |
|
"loss": 1.1828, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1.0616978406906128, |
|
"learning_rate": 8.188561956071281e-05, |
|
"loss": 1.1788, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1.0310215950012207, |
|
"learning_rate": 8.186489846663903e-05, |
|
"loss": 1.1898, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1.1227622032165527, |
|
"learning_rate": 8.184417737256528e-05, |
|
"loss": 1.1936, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 1.0663117170333862, |
|
"learning_rate": 8.18234562784915e-05, |
|
"loss": 1.1857, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.202330470085144, |
|
"learning_rate": 8.180273518441774e-05, |
|
"loss": 1.1811, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.036596417427063, |
|
"learning_rate": 8.178201409034397e-05, |
|
"loss": 1.2013, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.0503324270248413, |
|
"learning_rate": 8.176129299627021e-05, |
|
"loss": 1.1755, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.0510581731796265, |
|
"learning_rate": 8.174057190219644e-05, |
|
"loss": 1.1936, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 1.0580915212631226, |
|
"learning_rate": 8.171985080812268e-05, |
|
"loss": 1.2006, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.0723899602890015, |
|
"learning_rate": 8.16991297140489e-05, |
|
"loss": 1.1841, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.0289912223815918, |
|
"learning_rate": 8.167840861997515e-05, |
|
"loss": 1.1762, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.2664040327072144, |
|
"learning_rate": 8.165768752590137e-05, |
|
"loss": 1.1695, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.0689359903335571, |
|
"learning_rate": 8.163696643182759e-05, |
|
"loss": 1.1888, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 1.131007194519043, |
|
"learning_rate": 8.161624533775384e-05, |
|
"loss": 1.1856, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.06068754196167, |
|
"learning_rate": 8.159552424368006e-05, |
|
"loss": 1.2055, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.0843102931976318, |
|
"learning_rate": 8.15748031496063e-05, |
|
"loss": 1.1898, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.141878366470337, |
|
"learning_rate": 8.155408205553253e-05, |
|
"loss": 1.1885, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.1082584857940674, |
|
"learning_rate": 8.153336096145877e-05, |
|
"loss": 1.1843, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.1285297870635986, |
|
"learning_rate": 8.1512639867385e-05, |
|
"loss": 1.2126, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.2701120376586914, |
|
"learning_rate": 8.149191877331124e-05, |
|
"loss": 1.1816, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.1763367652893066, |
|
"learning_rate": 8.147119767923746e-05, |
|
"loss": 1.2031, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.0942009687423706, |
|
"learning_rate": 8.145047658516371e-05, |
|
"loss": 1.1922, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.1373592615127563, |
|
"learning_rate": 8.142975549108993e-05, |
|
"loss": 1.1891, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.0547840595245361, |
|
"learning_rate": 8.140903439701617e-05, |
|
"loss": 1.2119, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.973156213760376, |
|
"learning_rate": 8.13883133029424e-05, |
|
"loss": 1.1929, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.07832670211792, |
|
"learning_rate": 8.136759220886864e-05, |
|
"loss": 1.1693, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.173492670059204, |
|
"learning_rate": 8.134687111479487e-05, |
|
"loss": 1.1998, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.161414623260498, |
|
"learning_rate": 8.132615002072109e-05, |
|
"loss": 1.1769, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.1352944374084473, |
|
"learning_rate": 8.130542892664733e-05, |
|
"loss": 1.1837, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.102401852607727, |
|
"learning_rate": 8.128470783257356e-05, |
|
"loss": 1.175, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.2963926792144775, |
|
"learning_rate": 8.12639867384998e-05, |
|
"loss": 1.1782, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.0536015033721924, |
|
"learning_rate": 8.124326564442602e-05, |
|
"loss": 1.1799, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.1193984746932983, |
|
"learning_rate": 8.122254455035227e-05, |
|
"loss": 1.1729, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.1112126111984253, |
|
"learning_rate": 8.120182345627849e-05, |
|
"loss": 1.168, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.0647683143615723, |
|
"learning_rate": 8.118110236220473e-05, |
|
"loss": 1.2057, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.1709637641906738, |
|
"learning_rate": 8.116038126813096e-05, |
|
"loss": 1.1757, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.0774716138839722, |
|
"learning_rate": 8.11396601740572e-05, |
|
"loss": 1.1784, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.088477373123169, |
|
"learning_rate": 8.111893907998343e-05, |
|
"loss": 1.1726, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.0596317052841187, |
|
"learning_rate": 8.109821798590967e-05, |
|
"loss": 1.2029, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.1406421661376953, |
|
"learning_rate": 8.107749689183589e-05, |
|
"loss": 1.2154, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.1345916986465454, |
|
"learning_rate": 8.105677579776214e-05, |
|
"loss": 1.1684, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.2776840925216675, |
|
"learning_rate": 8.103605470368836e-05, |
|
"loss": 1.1915, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.1104062795639038, |
|
"learning_rate": 8.101533360961459e-05, |
|
"loss": 1.1935, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.0262198448181152, |
|
"learning_rate": 8.099461251554083e-05, |
|
"loss": 1.2047, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.1350001096725464, |
|
"learning_rate": 8.097389142146705e-05, |
|
"loss": 1.1798, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.0631601810455322, |
|
"learning_rate": 8.095317032739328e-05, |
|
"loss": 1.1744, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.1678036451339722, |
|
"learning_rate": 8.093244923331952e-05, |
|
"loss": 1.208, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.0665799379348755, |
|
"learning_rate": 8.091172813924576e-05, |
|
"loss": 1.1792, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.1364054679870605, |
|
"learning_rate": 8.089100704517199e-05, |
|
"loss": 1.2036, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.0586035251617432, |
|
"learning_rate": 8.087028595109823e-05, |
|
"loss": 1.201, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.9820613861083984, |
|
"learning_rate": 8.084956485702445e-05, |
|
"loss": 1.1912, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.0832290649414062, |
|
"learning_rate": 8.08288437629507e-05, |
|
"loss": 1.2044, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.2247551679611206, |
|
"learning_rate": 8.080812266887692e-05, |
|
"loss": 1.1613, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.0764803886413574, |
|
"learning_rate": 8.078740157480315e-05, |
|
"loss": 1.1885, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.1871037483215332, |
|
"learning_rate": 8.076668048072939e-05, |
|
"loss": 1.2089, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.0709806680679321, |
|
"learning_rate": 8.074595938665562e-05, |
|
"loss": 1.1743, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.2077934741973877, |
|
"learning_rate": 8.072523829258184e-05, |
|
"loss": 1.192, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.111323356628418, |
|
"learning_rate": 8.070451719850809e-05, |
|
"loss": 1.178, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.1351996660232544, |
|
"learning_rate": 8.06858682138417e-05, |
|
"loss": 1.1921, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.1319098472595215, |
|
"learning_rate": 8.066514711976792e-05, |
|
"loss": 1.2069, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.080480694770813, |
|
"learning_rate": 8.064442602569417e-05, |
|
"loss": 1.1949, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.126483678817749, |
|
"learning_rate": 8.062370493162039e-05, |
|
"loss": 1.1523, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 1.0840978622436523, |
|
"learning_rate": 8.060298383754663e-05, |
|
"loss": 1.2074, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.1075447797775269, |
|
"learning_rate": 8.058226274347286e-05, |
|
"loss": 1.1937, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.0781664848327637, |
|
"learning_rate": 8.05615416493991e-05, |
|
"loss": 1.1837, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.0375909805297852, |
|
"learning_rate": 8.054082055532532e-05, |
|
"loss": 1.1928, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.101149320602417, |
|
"learning_rate": 8.052009946125155e-05, |
|
"loss": 1.2006, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.9537743926048279, |
|
"learning_rate": 8.049937836717779e-05, |
|
"loss": 1.1939, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.1106035709381104, |
|
"learning_rate": 8.047865727310402e-05, |
|
"loss": 1.1874, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.1406303644180298, |
|
"learning_rate": 8.045793617903026e-05, |
|
"loss": 1.1885, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.0510179996490479, |
|
"learning_rate": 8.043721508495648e-05, |
|
"loss": 1.1831, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.1432597637176514, |
|
"learning_rate": 8.041649399088273e-05, |
|
"loss": 1.1746, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0710557699203491, |
|
"learning_rate": 8.039577289680895e-05, |
|
"loss": 1.1668, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.067130446434021, |
|
"learning_rate": 8.037505180273519e-05, |
|
"loss": 1.1895, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1639565229415894, |
|
"learning_rate": 8.035433070866142e-05, |
|
"loss": 1.1786, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.2795084714889526, |
|
"eval_runtime": 1604.6697, |
|
"eval_samples_per_second": 262.864, |
|
"eval_steps_per_second": 4.107, |
|
"step": 9692 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 48460, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 4.1335523730815713e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|