|
{ |
|
"best_metric": 1.578300952911377, |
|
"best_model_checkpoint": "en-to-lg-ufal/checkpoint-19400", |
|
"epoch": 4.999355753124597, |
|
"eval_steps": 500, |
|
"global_step": 19400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012884937508053087, |
|
"grad_norm": 0.9295567870140076, |
|
"learning_rate": 1.9949484536082476e-05, |
|
"loss": 3.2843, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.025769875016106173, |
|
"grad_norm": 1.0073925256729126, |
|
"learning_rate": 1.9897938144329896e-05, |
|
"loss": 3.1584, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.038654812524159254, |
|
"grad_norm": 1.3255535364151, |
|
"learning_rate": 1.984639175257732e-05, |
|
"loss": 3.0963, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.051539750032212346, |
|
"grad_norm": 1.086804747581482, |
|
"learning_rate": 1.9794845360824745e-05, |
|
"loss": 2.9216, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06442468754026542, |
|
"grad_norm": 0.6704310178756714, |
|
"learning_rate": 1.9743298969072166e-05, |
|
"loss": 2.8337, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07730962504831851, |
|
"grad_norm": 0.837518572807312, |
|
"learning_rate": 1.969278350515464e-05, |
|
"loss": 2.7777, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09019456255637161, |
|
"grad_norm": 0.7694929242134094, |
|
"learning_rate": 1.9641237113402064e-05, |
|
"loss": 2.7939, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10307950006442469, |
|
"grad_norm": 1.0657893419265747, |
|
"learning_rate": 1.9589690721649485e-05, |
|
"loss": 2.7371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11596443757247778, |
|
"grad_norm": 0.7758269906044006, |
|
"learning_rate": 1.953814432989691e-05, |
|
"loss": 2.6623, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12884937508053085, |
|
"grad_norm": 0.746475100517273, |
|
"learning_rate": 1.948659793814433e-05, |
|
"loss": 2.6365, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14173431258858393, |
|
"grad_norm": 0.8395822048187256, |
|
"learning_rate": 1.9435051546391754e-05, |
|
"loss": 2.6025, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15461925009663702, |
|
"grad_norm": 1.061213493347168, |
|
"learning_rate": 1.938350515463918e-05, |
|
"loss": 2.5525, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16750418760469013, |
|
"grad_norm": 0.8460017442703247, |
|
"learning_rate": 1.93319587628866e-05, |
|
"loss": 2.554, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18038912511274321, |
|
"grad_norm": 0.8066027164459229, |
|
"learning_rate": 1.9280412371134024e-05, |
|
"loss": 2.4868, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1932740626207963, |
|
"grad_norm": 0.8909623622894287, |
|
"learning_rate": 1.9228865979381445e-05, |
|
"loss": 2.5062, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.20615900012884938, |
|
"grad_norm": 0.9352700114250183, |
|
"learning_rate": 1.9177319587628865e-05, |
|
"loss": 2.4822, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21904393763690247, |
|
"grad_norm": 1.2132598161697388, |
|
"learning_rate": 1.912577319587629e-05, |
|
"loss": 2.408, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23192887514495555, |
|
"grad_norm": 1.0032589435577393, |
|
"learning_rate": 1.907422680412371e-05, |
|
"loss": 2.436, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24481381265300864, |
|
"grad_norm": 1.0249050855636597, |
|
"learning_rate": 1.9022680412371135e-05, |
|
"loss": 2.4474, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2576987501610617, |
|
"grad_norm": 0.8265942335128784, |
|
"learning_rate": 1.897113402061856e-05, |
|
"loss": 2.4005, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2705836876691148, |
|
"grad_norm": 0.9384586215019226, |
|
"learning_rate": 1.891958762886598e-05, |
|
"loss": 2.2971, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28346862517716787, |
|
"grad_norm": 0.9439546465873718, |
|
"learning_rate": 1.8868041237113404e-05, |
|
"loss": 2.3563, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29635356268522095, |
|
"grad_norm": 0.9652894139289856, |
|
"learning_rate": 1.8816494845360825e-05, |
|
"loss": 2.2672, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.30923850019327404, |
|
"grad_norm": 0.8074690103530884, |
|
"learning_rate": 1.876494845360825e-05, |
|
"loss": 2.319, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3221234377013272, |
|
"grad_norm": 0.8441233038902283, |
|
"learning_rate": 1.8713402061855674e-05, |
|
"loss": 2.3201, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33500837520938026, |
|
"grad_norm": 1.0416673421859741, |
|
"learning_rate": 1.8661855670103094e-05, |
|
"loss": 2.2416, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34789331271743335, |
|
"grad_norm": 1.100706696510315, |
|
"learning_rate": 1.861030927835052e-05, |
|
"loss": 2.2427, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36077825022548643, |
|
"grad_norm": 0.9824424386024475, |
|
"learning_rate": 1.855876288659794e-05, |
|
"loss": 2.196, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3736631877335395, |
|
"grad_norm": 0.9210222363471985, |
|
"learning_rate": 1.850721649484536e-05, |
|
"loss": 2.2371, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3865481252415926, |
|
"grad_norm": 1.0536917448043823, |
|
"learning_rate": 1.8455670103092785e-05, |
|
"loss": 2.2008, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3994330627496457, |
|
"grad_norm": 1.0008552074432373, |
|
"learning_rate": 1.8404123711340206e-05, |
|
"loss": 2.1586, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.41231800025769877, |
|
"grad_norm": 0.8722209334373474, |
|
"learning_rate": 1.835257731958763e-05, |
|
"loss": 2.1753, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42520293776575185, |
|
"grad_norm": 0.7571916580200195, |
|
"learning_rate": 1.8301030927835054e-05, |
|
"loss": 2.1598, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43808787527380494, |
|
"grad_norm": 1.054757833480835, |
|
"learning_rate": 1.8249484536082475e-05, |
|
"loss": 2.18, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.450972812781858, |
|
"grad_norm": 0.8249649405479431, |
|
"learning_rate": 1.81979381443299e-05, |
|
"loss": 2.1078, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.4638577502899111, |
|
"grad_norm": 1.7045085430145264, |
|
"learning_rate": 1.814639175257732e-05, |
|
"loss": 2.211, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4767426877979642, |
|
"grad_norm": 1.0341459512710571, |
|
"learning_rate": 1.8094845360824744e-05, |
|
"loss": 2.1393, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4896276253060173, |
|
"grad_norm": 0.9365245699882507, |
|
"learning_rate": 1.804329896907217e-05, |
|
"loss": 2.1602, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5025125628140703, |
|
"grad_norm": 1.2039780616760254, |
|
"learning_rate": 1.799175257731959e-05, |
|
"loss": 2.1431, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5153975003221234, |
|
"grad_norm": 0.7472810745239258, |
|
"learning_rate": 1.7940206185567014e-05, |
|
"loss": 2.1242, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5282824378301765, |
|
"grad_norm": 0.8963159918785095, |
|
"learning_rate": 1.7888659793814435e-05, |
|
"loss": 2.1757, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5411673753382296, |
|
"grad_norm": 1.0002330541610718, |
|
"learning_rate": 1.7837113402061855e-05, |
|
"loss": 2.1329, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5540523128462826, |
|
"grad_norm": 0.944322943687439, |
|
"learning_rate": 1.778556701030928e-05, |
|
"loss": 2.1254, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5669372503543357, |
|
"grad_norm": 1.0756226778030396, |
|
"learning_rate": 1.77340206185567e-05, |
|
"loss": 2.1479, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5798221878623888, |
|
"grad_norm": 0.9357224106788635, |
|
"learning_rate": 1.7682474226804125e-05, |
|
"loss": 2.142, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5927071253704419, |
|
"grad_norm": 0.9683809876441956, |
|
"learning_rate": 1.763092783505155e-05, |
|
"loss": 2.1744, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.605592062878495, |
|
"grad_norm": 0.9993259310722351, |
|
"learning_rate": 1.757938144329897e-05, |
|
"loss": 2.0672, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6184770003865481, |
|
"grad_norm": 1.168818473815918, |
|
"learning_rate": 1.7527835051546394e-05, |
|
"loss": 2.1412, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6313619378946013, |
|
"grad_norm": 1.0189549922943115, |
|
"learning_rate": 1.7476288659793815e-05, |
|
"loss": 2.079, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6442468754026544, |
|
"grad_norm": 0.935614824295044, |
|
"learning_rate": 1.742474226804124e-05, |
|
"loss": 2.0944, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6571318129107074, |
|
"grad_norm": 0.9308194518089294, |
|
"learning_rate": 1.7373195876288664e-05, |
|
"loss": 2.0767, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6700167504187605, |
|
"grad_norm": 0.9042763113975525, |
|
"learning_rate": 1.7321649484536084e-05, |
|
"loss": 2.0909, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6829016879268136, |
|
"grad_norm": 0.9609789252281189, |
|
"learning_rate": 1.7270103092783505e-05, |
|
"loss": 2.1105, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6957866254348667, |
|
"grad_norm": 1.844524621963501, |
|
"learning_rate": 1.721855670103093e-05, |
|
"loss": 2.1664, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7086715629429198, |
|
"grad_norm": 1.3245840072631836, |
|
"learning_rate": 1.716701030927835e-05, |
|
"loss": 2.0887, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7215565004509729, |
|
"grad_norm": 0.9674375057220459, |
|
"learning_rate": 1.7115463917525775e-05, |
|
"loss": 2.0579, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.734441437959026, |
|
"grad_norm": 1.1117796897888184, |
|
"learning_rate": 1.7063917525773196e-05, |
|
"loss": 2.0793, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.747326375467079, |
|
"grad_norm": 1.099692702293396, |
|
"learning_rate": 1.701237113402062e-05, |
|
"loss": 2.0568, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7602113129751321, |
|
"grad_norm": 1.1181882619857788, |
|
"learning_rate": 1.6961855670103094e-05, |
|
"loss": 2.1385, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7730962504831852, |
|
"grad_norm": 1.0414690971374512, |
|
"learning_rate": 1.6911340206185568e-05, |
|
"loss": 2.0686, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7859811879912383, |
|
"grad_norm": 0.9875026345252991, |
|
"learning_rate": 1.6859793814432992e-05, |
|
"loss": 2.0503, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7988661254992914, |
|
"grad_norm": 1.0894653797149658, |
|
"learning_rate": 1.6808247422680413e-05, |
|
"loss": 2.0283, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8117510630073445, |
|
"grad_norm": 0.9688855409622192, |
|
"learning_rate": 1.6756701030927837e-05, |
|
"loss": 2.0442, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8246360005153975, |
|
"grad_norm": 0.8581517338752747, |
|
"learning_rate": 1.6705154639175258e-05, |
|
"loss": 2.0938, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8375209380234506, |
|
"grad_norm": 1.0975722074508667, |
|
"learning_rate": 1.6653608247422682e-05, |
|
"loss": 2.0493, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8504058755315037, |
|
"grad_norm": 0.9611416459083557, |
|
"learning_rate": 1.6602061855670103e-05, |
|
"loss": 2.0434, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8632908130395568, |
|
"grad_norm": 0.9956973195075989, |
|
"learning_rate": 1.6550515463917527e-05, |
|
"loss": 2.0556, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8761757505476099, |
|
"grad_norm": 1.0307831764221191, |
|
"learning_rate": 1.6498969072164948e-05, |
|
"loss": 2.0353, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.889060688055663, |
|
"grad_norm": 0.9811009168624878, |
|
"learning_rate": 1.6447422680412372e-05, |
|
"loss": 2.0574, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.901945625563716, |
|
"grad_norm": 1.0394349098205566, |
|
"learning_rate": 1.6395876288659797e-05, |
|
"loss": 2.0544, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9148305630717691, |
|
"grad_norm": 0.9915798902511597, |
|
"learning_rate": 1.6344329896907218e-05, |
|
"loss": 2.0685, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9277155005798222, |
|
"grad_norm": 0.8833404183387756, |
|
"learning_rate": 1.6292783505154642e-05, |
|
"loss": 2.0734, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.9406004380878753, |
|
"grad_norm": 0.9089716672897339, |
|
"learning_rate": 1.6241237113402063e-05, |
|
"loss": 2.0531, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9534853755959284, |
|
"grad_norm": 0.9168672561645508, |
|
"learning_rate": 1.6189690721649487e-05, |
|
"loss": 2.008, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9663703131039815, |
|
"grad_norm": 0.9824495911598206, |
|
"learning_rate": 1.6138144329896908e-05, |
|
"loss": 2.0488, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9792552506120346, |
|
"grad_norm": 1.2295233011245728, |
|
"learning_rate": 1.6086597938144332e-05, |
|
"loss": 2.0184, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9921401881200876, |
|
"grad_norm": 0.9734981656074524, |
|
"learning_rate": 1.6035051546391753e-05, |
|
"loss": 2.0659, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9998711506249195, |
|
"eval_bleu": 18.1461, |
|
"eval_gen_len": 45.4751, |
|
"eval_loss": 1.7681266069412231, |
|
"eval_runtime": 2364.104, |
|
"eval_samples_per_second": 6.565, |
|
"eval_steps_per_second": 0.41, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.0050251256281406, |
|
"grad_norm": 1.2103891372680664, |
|
"learning_rate": 1.5983505154639177e-05, |
|
"loss": 2.0579, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.0179100631361937, |
|
"grad_norm": 0.983440637588501, |
|
"learning_rate": 1.5931958762886598e-05, |
|
"loss": 1.9984, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.0307950006442468, |
|
"grad_norm": 1.0848294496536255, |
|
"learning_rate": 1.5880412371134022e-05, |
|
"loss": 2.0428, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0436799381522999, |
|
"grad_norm": 1.143467903137207, |
|
"learning_rate": 1.5828865979381443e-05, |
|
"loss": 2.0236, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.056564875660353, |
|
"grad_norm": 0.9652382731437683, |
|
"learning_rate": 1.5777319587628867e-05, |
|
"loss": 2.0219, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.069449813168406, |
|
"grad_norm": 1.0381783246994019, |
|
"learning_rate": 1.5725773195876292e-05, |
|
"loss": 2.0466, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.0823347506764591, |
|
"grad_norm": 1.0150736570358276, |
|
"learning_rate": 1.5674226804123713e-05, |
|
"loss": 1.9878, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.0952196881845122, |
|
"grad_norm": 1.0291893482208252, |
|
"learning_rate": 1.5622680412371137e-05, |
|
"loss": 2.0637, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.1081046256925653, |
|
"grad_norm": 1.5909788608551025, |
|
"learning_rate": 1.5571134020618558e-05, |
|
"loss": 1.9356, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.1209895632006184, |
|
"grad_norm": 1.1417341232299805, |
|
"learning_rate": 1.551958762886598e-05, |
|
"loss": 2.0089, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.1338745007086715, |
|
"grad_norm": 1.0295405387878418, |
|
"learning_rate": 1.5468041237113403e-05, |
|
"loss": 2.0486, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.1467594382167245, |
|
"grad_norm": 1.1314431428909302, |
|
"learning_rate": 1.5416494845360827e-05, |
|
"loss": 1.9785, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.1596443757247776, |
|
"grad_norm": 0.9702105522155762, |
|
"learning_rate": 1.5364948453608248e-05, |
|
"loss": 1.9474, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.1725293132328307, |
|
"grad_norm": 1.9038368463516235, |
|
"learning_rate": 1.5313402061855672e-05, |
|
"loss": 2.0264, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.1854142507408838, |
|
"grad_norm": 1.0110225677490234, |
|
"learning_rate": 1.5261855670103093e-05, |
|
"loss": 2.0689, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.1982991882489369, |
|
"grad_norm": 0.9490695595741272, |
|
"learning_rate": 1.5210309278350517e-05, |
|
"loss": 1.9998, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.21118412575699, |
|
"grad_norm": 1.0398602485656738, |
|
"learning_rate": 1.515876288659794e-05, |
|
"loss": 1.9622, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.224069063265043, |
|
"grad_norm": 1.088680624961853, |
|
"learning_rate": 1.5107216494845362e-05, |
|
"loss": 2.0096, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.2369540007730961, |
|
"grad_norm": 1.0555858612060547, |
|
"learning_rate": 1.5055670103092785e-05, |
|
"loss": 1.9494, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.2498389382811492, |
|
"grad_norm": 1.1039170026779175, |
|
"learning_rate": 1.5004123711340208e-05, |
|
"loss": 2.0132, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.2627238757892023, |
|
"grad_norm": 1.069201946258545, |
|
"learning_rate": 1.4952577319587632e-05, |
|
"loss": 2.043, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.2756088132972554, |
|
"grad_norm": 17.700525283813477, |
|
"learning_rate": 1.4901030927835051e-05, |
|
"loss": 1.9629, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.2884937508053085, |
|
"grad_norm": 0.9558641314506531, |
|
"learning_rate": 1.4849484536082475e-05, |
|
"loss": 1.9488, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.3013786883133616, |
|
"grad_norm": 1.0301564931869507, |
|
"learning_rate": 1.4797938144329898e-05, |
|
"loss": 2.0172, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.3142636258214146, |
|
"grad_norm": 1.017805814743042, |
|
"learning_rate": 1.474639175257732e-05, |
|
"loss": 2.04, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.3271485633294677, |
|
"grad_norm": 0.8880634903907776, |
|
"learning_rate": 1.4694845360824743e-05, |
|
"loss": 2.0109, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.3400335008375208, |
|
"grad_norm": 0.9009851813316345, |
|
"learning_rate": 1.4643298969072166e-05, |
|
"loss": 1.9195, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.352918438345574, |
|
"grad_norm": 0.9800803661346436, |
|
"learning_rate": 1.459175257731959e-05, |
|
"loss": 2.006, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.365803375853627, |
|
"grad_norm": 0.9194086194038391, |
|
"learning_rate": 1.4540206185567012e-05, |
|
"loss": 1.927, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.37868831336168, |
|
"grad_norm": 1.1191316843032837, |
|
"learning_rate": 1.4488659793814435e-05, |
|
"loss": 2.0157, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.3915732508697332, |
|
"grad_norm": 1.159419298171997, |
|
"learning_rate": 1.4437113402061857e-05, |
|
"loss": 1.9806, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.4044581883777862, |
|
"grad_norm": 1.0512197017669678, |
|
"learning_rate": 1.438556701030928e-05, |
|
"loss": 1.9496, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.4173431258858393, |
|
"grad_norm": 1.125361442565918, |
|
"learning_rate": 1.4334020618556703e-05, |
|
"loss": 1.9632, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.4302280633938924, |
|
"grad_norm": 1.0887775421142578, |
|
"learning_rate": 1.4282474226804123e-05, |
|
"loss": 1.928, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.4431130009019455, |
|
"grad_norm": 1.0122510194778442, |
|
"learning_rate": 1.4230927835051546e-05, |
|
"loss": 1.9677, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.4559979384099986, |
|
"grad_norm": 0.9938000440597534, |
|
"learning_rate": 1.417938144329897e-05, |
|
"loss": 1.9492, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.4688828759180517, |
|
"grad_norm": 1.1632938385009766, |
|
"learning_rate": 1.4127835051546393e-05, |
|
"loss": 1.9629, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.4817678134261048, |
|
"grad_norm": 0.9199478626251221, |
|
"learning_rate": 1.4076288659793815e-05, |
|
"loss": 1.999, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.4946527509341578, |
|
"grad_norm": 1.1851013898849487, |
|
"learning_rate": 1.4024742268041238e-05, |
|
"loss": 1.964, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.507537688442211, |
|
"grad_norm": 1.541742205619812, |
|
"learning_rate": 1.397319587628866e-05, |
|
"loss": 2.0018, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.520422625950264, |
|
"grad_norm": 1.3189605474472046, |
|
"learning_rate": 1.3921649484536083e-05, |
|
"loss": 1.9245, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.533307563458317, |
|
"grad_norm": 0.995586097240448, |
|
"learning_rate": 1.3870103092783507e-05, |
|
"loss": 2.0303, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.5461925009663702, |
|
"grad_norm": 1.0431631803512573, |
|
"learning_rate": 1.381855670103093e-05, |
|
"loss": 1.9044, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.5590774384744233, |
|
"grad_norm": 1.0870169401168823, |
|
"learning_rate": 1.3767010309278352e-05, |
|
"loss": 2.0023, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.5719623759824763, |
|
"grad_norm": 0.921909511089325, |
|
"learning_rate": 1.3715463917525775e-05, |
|
"loss": 1.9186, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.5848473134905294, |
|
"grad_norm": 1.1961994171142578, |
|
"learning_rate": 1.3663917525773196e-05, |
|
"loss": 1.9109, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.5977322509985825, |
|
"grad_norm": 1.0308939218521118, |
|
"learning_rate": 1.3612371134020618e-05, |
|
"loss": 1.9146, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.6106171885066356, |
|
"grad_norm": 2.139348030090332, |
|
"learning_rate": 1.3560824742268041e-05, |
|
"loss": 1.9541, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.6235021260146887, |
|
"grad_norm": 1.0335361957550049, |
|
"learning_rate": 1.3509278350515465e-05, |
|
"loss": 2.0021, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.6363870635227418, |
|
"grad_norm": 1.0377309322357178, |
|
"learning_rate": 1.3457731958762888e-05, |
|
"loss": 1.9546, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.6492720010307949, |
|
"grad_norm": 0.9605665802955627, |
|
"learning_rate": 1.340618556701031e-05, |
|
"loss": 1.9477, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.662156938538848, |
|
"grad_norm": 1.1977301836013794, |
|
"learning_rate": 1.3354639175257733e-05, |
|
"loss": 1.928, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.675041876046901, |
|
"grad_norm": 0.9851065874099731, |
|
"learning_rate": 1.3303092783505156e-05, |
|
"loss": 1.9224, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.687926813554954, |
|
"grad_norm": 0.8746098875999451, |
|
"learning_rate": 1.3251546391752578e-05, |
|
"loss": 1.9798, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.7008117510630072, |
|
"grad_norm": 0.9900497794151306, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 1.9244, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.7136966885710603, |
|
"grad_norm": 0.9657949805259705, |
|
"learning_rate": 1.3148453608247425e-05, |
|
"loss": 1.9565, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.7265816260791134, |
|
"grad_norm": 1.1366913318634033, |
|
"learning_rate": 1.3096907216494847e-05, |
|
"loss": 1.9724, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.7394665635871664, |
|
"grad_norm": 1.15602445602417, |
|
"learning_rate": 1.3045360824742268e-05, |
|
"loss": 1.927, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.7523515010952195, |
|
"grad_norm": 0.9413411021232605, |
|
"learning_rate": 1.2993814432989691e-05, |
|
"loss": 1.9347, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.7652364386032726, |
|
"grad_norm": 0.9607951641082764, |
|
"learning_rate": 1.2942268041237113e-05, |
|
"loss": 1.9495, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.7781213761113257, |
|
"grad_norm": 1.155685305595398, |
|
"learning_rate": 1.2890721649484536e-05, |
|
"loss": 1.9498, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.7910063136193788, |
|
"grad_norm": 1.8821039199829102, |
|
"learning_rate": 1.283917525773196e-05, |
|
"loss": 1.9411, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.8038912511274319, |
|
"grad_norm": 1.2264201641082764, |
|
"learning_rate": 1.2787628865979383e-05, |
|
"loss": 1.9488, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.816776188635485, |
|
"grad_norm": 0.9997029304504395, |
|
"learning_rate": 1.2736082474226805e-05, |
|
"loss": 1.9162, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.829661126143538, |
|
"grad_norm": 1.1943738460540771, |
|
"learning_rate": 1.2684536082474228e-05, |
|
"loss": 1.961, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.8425460636515911, |
|
"grad_norm": 1.1875113248825073, |
|
"learning_rate": 1.263298969072165e-05, |
|
"loss": 1.9256, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.8554310011596442, |
|
"grad_norm": 1.0550329685211182, |
|
"learning_rate": 1.2581443298969073e-05, |
|
"loss": 1.9519, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.8683159386676973, |
|
"grad_norm": 1.3292375802993774, |
|
"learning_rate": 1.2529896907216497e-05, |
|
"loss": 1.9341, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.8812008761757506, |
|
"grad_norm": 1.0914188623428345, |
|
"learning_rate": 1.247835051546392e-05, |
|
"loss": 1.894, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.8940858136838037, |
|
"grad_norm": 1.1687994003295898, |
|
"learning_rate": 1.242680412371134e-05, |
|
"loss": 1.9044, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.9069707511918568, |
|
"grad_norm": 1.0040736198425293, |
|
"learning_rate": 1.2376288659793816e-05, |
|
"loss": 1.919, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.9198556886999099, |
|
"grad_norm": 1.0108208656311035, |
|
"learning_rate": 1.2324742268041239e-05, |
|
"loss": 1.8785, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.932740626207963, |
|
"grad_norm": 1.0039801597595215, |
|
"learning_rate": 1.2273195876288662e-05, |
|
"loss": 1.8761, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.945625563716016, |
|
"grad_norm": 1.0580838918685913, |
|
"learning_rate": 1.2221649484536084e-05, |
|
"loss": 1.8699, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.9585105012240691, |
|
"grad_norm": 1.1629561185836792, |
|
"learning_rate": 1.2170103092783505e-05, |
|
"loss": 1.927, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.9713954387321222, |
|
"grad_norm": 0.908470094203949, |
|
"learning_rate": 1.2118556701030928e-05, |
|
"loss": 1.9778, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.9842803762401753, |
|
"grad_norm": 1.1411256790161133, |
|
"learning_rate": 1.206701030927835e-05, |
|
"loss": 1.9035, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.9971653137482284, |
|
"grad_norm": 0.9729508757591248, |
|
"learning_rate": 1.2015463917525774e-05, |
|
"loss": 1.9071, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 19.4026, |
|
"eval_gen_len": 45.2194, |
|
"eval_loss": 1.662958025932312, |
|
"eval_runtime": 2310.4182, |
|
"eval_samples_per_second": 6.717, |
|
"eval_steps_per_second": 0.42, |
|
"step": 7761 |
|
}, |
|
{ |
|
"epoch": 2.0100502512562812, |
|
"grad_norm": 1.2045557498931885, |
|
"learning_rate": 1.1963917525773197e-05, |
|
"loss": 1.8937, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.0229351887643343, |
|
"grad_norm": 0.9809572696685791, |
|
"learning_rate": 1.191237113402062e-05, |
|
"loss": 1.9757, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.0358201262723874, |
|
"grad_norm": 1.0871580839157104, |
|
"learning_rate": 1.1860824742268042e-05, |
|
"loss": 1.9366, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.0487050637804405, |
|
"grad_norm": 1.1801034212112427, |
|
"learning_rate": 1.1809278350515465e-05, |
|
"loss": 1.92, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.0615900012884936, |
|
"grad_norm": 1.118897557258606, |
|
"learning_rate": 1.1757731958762887e-05, |
|
"loss": 1.9593, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.0744749387965467, |
|
"grad_norm": 1.1403993368148804, |
|
"learning_rate": 1.1706185567010311e-05, |
|
"loss": 1.8699, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.0873598763045997, |
|
"grad_norm": 3.7084872722625732, |
|
"learning_rate": 1.1654639175257734e-05, |
|
"loss": 1.8686, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.100244813812653, |
|
"grad_norm": 1.135362982749939, |
|
"learning_rate": 1.1603092783505157e-05, |
|
"loss": 1.9068, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.113129751320706, |
|
"grad_norm": 1.5067939758300781, |
|
"learning_rate": 1.1551546391752577e-05, |
|
"loss": 1.9196, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.126014688828759, |
|
"grad_norm": 1.0155831575393677, |
|
"learning_rate": 1.15e-05, |
|
"loss": 1.8711, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.138899626336812, |
|
"grad_norm": 1.2201752662658691, |
|
"learning_rate": 1.1448453608247423e-05, |
|
"loss": 1.8985, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.151784563844865, |
|
"grad_norm": 1.0999071598052979, |
|
"learning_rate": 1.1396907216494845e-05, |
|
"loss": 1.8978, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.1646695013529182, |
|
"grad_norm": 1.1638729572296143, |
|
"learning_rate": 1.134536082474227e-05, |
|
"loss": 1.8593, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.1775544388609713, |
|
"grad_norm": 1.1784203052520752, |
|
"learning_rate": 1.1293814432989692e-05, |
|
"loss": 1.8663, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.1904393763690244, |
|
"grad_norm": 1.026315450668335, |
|
"learning_rate": 1.1242268041237115e-05, |
|
"loss": 1.8285, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.2033243138770775, |
|
"grad_norm": 0.9852938652038574, |
|
"learning_rate": 1.1190721649484537e-05, |
|
"loss": 1.9048, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.2162092513851306, |
|
"grad_norm": 1.052303671836853, |
|
"learning_rate": 1.113917525773196e-05, |
|
"loss": 1.8808, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.2290941888931837, |
|
"grad_norm": 5.098678112030029, |
|
"learning_rate": 1.1087628865979382e-05, |
|
"loss": 1.8679, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.2419791264012368, |
|
"grad_norm": 1.1427991390228271, |
|
"learning_rate": 1.1036082474226806e-05, |
|
"loss": 1.9065, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.25486406390929, |
|
"grad_norm": 3.338353395462036, |
|
"learning_rate": 1.0984536082474229e-05, |
|
"loss": 1.8915, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.267749001417343, |
|
"grad_norm": 1.2049264907836914, |
|
"learning_rate": 1.093298969072165e-05, |
|
"loss": 1.9018, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.280633938925396, |
|
"grad_norm": 1.0751088857650757, |
|
"learning_rate": 1.0881443298969072e-05, |
|
"loss": 1.8914, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.293518876433449, |
|
"grad_norm": 0.9149059653282166, |
|
"learning_rate": 1.0829896907216495e-05, |
|
"loss": 1.8717, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.306403813941502, |
|
"grad_norm": 1.0154598951339722, |
|
"learning_rate": 1.0778350515463918e-05, |
|
"loss": 1.8979, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.3192887514495553, |
|
"grad_norm": 1.198451042175293, |
|
"learning_rate": 1.072680412371134e-05, |
|
"loss": 1.8987, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.3321736889576083, |
|
"grad_norm": 1.0957529544830322, |
|
"learning_rate": 1.0675257731958764e-05, |
|
"loss": 1.892, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.3450586264656614, |
|
"grad_norm": 1.055180311203003, |
|
"learning_rate": 1.0623711340206187e-05, |
|
"loss": 1.8694, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.3579435639737145, |
|
"grad_norm": 1.094000220298767, |
|
"learning_rate": 1.057216494845361e-05, |
|
"loss": 1.9105, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.3708285014817676, |
|
"grad_norm": 1.0135473012924194, |
|
"learning_rate": 1.0520618556701032e-05, |
|
"loss": 1.892, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.3837134389898207, |
|
"grad_norm": 1.2554734945297241, |
|
"learning_rate": 1.0469072164948455e-05, |
|
"loss": 1.9458, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.3965983764978738, |
|
"grad_norm": 1.312153697013855, |
|
"learning_rate": 1.0417525773195877e-05, |
|
"loss": 1.9138, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.409483314005927, |
|
"grad_norm": 1.0656461715698242, |
|
"learning_rate": 1.0367010309278351e-05, |
|
"loss": 1.931, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.42236825151398, |
|
"grad_norm": 1.4363470077514648, |
|
"learning_rate": 1.0315463917525774e-05, |
|
"loss": 1.8815, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.435253189022033, |
|
"grad_norm": 1.2165566682815552, |
|
"learning_rate": 1.0263917525773196e-05, |
|
"loss": 1.8774, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.448138126530086, |
|
"grad_norm": 1.2879478931427002, |
|
"learning_rate": 1.021237113402062e-05, |
|
"loss": 1.9324, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.461023064038139, |
|
"grad_norm": 1.1694004535675049, |
|
"learning_rate": 1.0160824742268043e-05, |
|
"loss": 1.8595, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.4739080015461923, |
|
"grad_norm": 1.1013145446777344, |
|
"learning_rate": 1.0109278350515466e-05, |
|
"loss": 1.8523, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.4867929390542454, |
|
"grad_norm": 1.0102790594100952, |
|
"learning_rate": 1.0057731958762887e-05, |
|
"loss": 1.8239, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.4996778765622985, |
|
"grad_norm": 2.130802869796753, |
|
"learning_rate": 1.0006185567010309e-05, |
|
"loss": 1.9528, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.5125628140703515, |
|
"grad_norm": 1.3339593410491943, |
|
"learning_rate": 9.954639175257733e-06, |
|
"loss": 1.8553, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.5254477515784046, |
|
"grad_norm": 1.0675934553146362, |
|
"learning_rate": 9.903092783505154e-06, |
|
"loss": 1.8547, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.5383326890864577, |
|
"grad_norm": 1.111695408821106, |
|
"learning_rate": 9.851546391752578e-06, |
|
"loss": 1.81, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.551217626594511, |
|
"grad_norm": 1.3534823656082153, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 1.8789, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"grad_norm": 1.138906717300415, |
|
"learning_rate": 9.748453608247424e-06, |
|
"loss": 1.8355, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.576987501610617, |
|
"grad_norm": 1.183947205543518, |
|
"learning_rate": 9.696907216494846e-06, |
|
"loss": 1.8563, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.58987243911867, |
|
"grad_norm": 0.9370452165603638, |
|
"learning_rate": 9.645360824742269e-06, |
|
"loss": 1.8845, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.602757376626723, |
|
"grad_norm": 1.1878234148025513, |
|
"learning_rate": 9.593814432989691e-06, |
|
"loss": 1.9203, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.615642314134776, |
|
"grad_norm": 1.0847914218902588, |
|
"learning_rate": 9.542268041237114e-06, |
|
"loss": 1.8816, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.6285272516428293, |
|
"grad_norm": 1.2127825021743774, |
|
"learning_rate": 9.490721649484536e-06, |
|
"loss": 1.899, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.6414121891508824, |
|
"grad_norm": 1.0567888021469116, |
|
"learning_rate": 9.439175257731959e-06, |
|
"loss": 1.8433, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.6542971266589355, |
|
"grad_norm": 1.1838716268539429, |
|
"learning_rate": 9.387628865979383e-06, |
|
"loss": 1.8931, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.6671820641669886, |
|
"grad_norm": 1.1113821268081665, |
|
"learning_rate": 9.336082474226806e-06, |
|
"loss": 1.9041, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.6800670016750416, |
|
"grad_norm": 1.538613200187683, |
|
"learning_rate": 9.284536082474228e-06, |
|
"loss": 1.9085, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.6929519391830947, |
|
"grad_norm": 1.1364761590957642, |
|
"learning_rate": 9.23298969072165e-06, |
|
"loss": 1.8925, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.705836876691148, |
|
"grad_norm": 1.1954172849655151, |
|
"learning_rate": 9.181443298969073e-06, |
|
"loss": 1.8608, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.718721814199201, |
|
"grad_norm": 0.8984624147415161, |
|
"learning_rate": 9.129896907216496e-06, |
|
"loss": 1.823, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.731606751707254, |
|
"grad_norm": 1.0665663480758667, |
|
"learning_rate": 9.078350515463919e-06, |
|
"loss": 1.9052, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.744491689215307, |
|
"grad_norm": 1.2751344442367554, |
|
"learning_rate": 9.026804123711341e-06, |
|
"loss": 1.8573, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.75737662672336, |
|
"grad_norm": 0.964619517326355, |
|
"learning_rate": 8.975257731958764e-06, |
|
"loss": 1.9581, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.7702615642314132, |
|
"grad_norm": 1.1248286962509155, |
|
"learning_rate": 8.923711340206186e-06, |
|
"loss": 1.9004, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.7831465017394663, |
|
"grad_norm": 1.2471715211868286, |
|
"learning_rate": 8.872164948453609e-06, |
|
"loss": 1.8969, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.7960314392475194, |
|
"grad_norm": 1.3639956712722778, |
|
"learning_rate": 8.820618556701031e-06, |
|
"loss": 1.8493, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.8089163767555725, |
|
"grad_norm": 1.1183199882507324, |
|
"learning_rate": 8.769072164948454e-06, |
|
"loss": 1.8888, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.8218013142636256, |
|
"grad_norm": 1.3162132501602173, |
|
"learning_rate": 8.717525773195877e-06, |
|
"loss": 1.8567, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.8346862517716787, |
|
"grad_norm": 1.2056224346160889, |
|
"learning_rate": 8.6659793814433e-06, |
|
"loss": 1.8687, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.8475711892797317, |
|
"grad_norm": 1.285947322845459, |
|
"learning_rate": 8.614432989690722e-06, |
|
"loss": 1.8864, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.860456126787785, |
|
"grad_norm": 1.292939305305481, |
|
"learning_rate": 8.562886597938144e-06, |
|
"loss": 1.9056, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.873341064295838, |
|
"grad_norm": 1.2155085802078247, |
|
"learning_rate": 8.511340206185568e-06, |
|
"loss": 1.8699, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.886226001803891, |
|
"grad_norm": 1.4173967838287354, |
|
"learning_rate": 8.459793814432991e-06, |
|
"loss": 1.8581, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.899110939311944, |
|
"grad_norm": 1.0226136445999146, |
|
"learning_rate": 8.408247422680414e-06, |
|
"loss": 1.8491, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.911995876819997, |
|
"grad_norm": 1.2074532508850098, |
|
"learning_rate": 8.356701030927836e-06, |
|
"loss": 1.8493, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.9248808143280502, |
|
"grad_norm": 1.0812984704971313, |
|
"learning_rate": 8.305154639175259e-06, |
|
"loss": 1.8839, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.9377657518361033, |
|
"grad_norm": 1.3052395582199097, |
|
"learning_rate": 8.253608247422681e-06, |
|
"loss": 1.869, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.9506506893441564, |
|
"grad_norm": 1.0708857774734497, |
|
"learning_rate": 8.202061855670104e-06, |
|
"loss": 1.855, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.9635356268522095, |
|
"grad_norm": 0.9860512614250183, |
|
"learning_rate": 8.150515463917526e-06, |
|
"loss": 1.8535, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.9764205643602626, |
|
"grad_norm": 0.9245162010192871, |
|
"learning_rate": 8.098969072164949e-06, |
|
"loss": 1.8647, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.9893055018683157, |
|
"grad_norm": 1.1266101598739624, |
|
"learning_rate": 8.047422680412372e-06, |
|
"loss": 1.8646, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.9998711506249194, |
|
"eval_bleu": 19.9649, |
|
"eval_gen_len": 45.274, |
|
"eval_loss": 1.6131339073181152, |
|
"eval_runtime": 2307.2556, |
|
"eval_samples_per_second": 6.727, |
|
"eval_steps_per_second": 0.42, |
|
"step": 11641 |
|
}, |
|
{ |
|
"epoch": 3.002190439376369, |
|
"grad_norm": 0.924880862236023, |
|
"learning_rate": 7.995876288659794e-06, |
|
"loss": 1.8485, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 3.0150753768844223, |
|
"grad_norm": 1.0113394260406494, |
|
"learning_rate": 7.944329896907217e-06, |
|
"loss": 1.8349, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.0279603143924754, |
|
"grad_norm": 1.1226181983947754, |
|
"learning_rate": 7.89278350515464e-06, |
|
"loss": 1.8707, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 3.0408452519005285, |
|
"grad_norm": 1.0973234176635742, |
|
"learning_rate": 7.841237113402062e-06, |
|
"loss": 1.8104, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.0537301894085815, |
|
"grad_norm": 1.2233961820602417, |
|
"learning_rate": 7.789690721649486e-06, |
|
"loss": 1.8668, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 3.0666151269166346, |
|
"grad_norm": 1.1300643682479858, |
|
"learning_rate": 7.738144329896909e-06, |
|
"loss": 1.8661, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.0795000644246877, |
|
"grad_norm": 1.1732138395309448, |
|
"learning_rate": 7.68659793814433e-06, |
|
"loss": 1.8213, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 3.092385001932741, |
|
"grad_norm": 1.459231972694397, |
|
"learning_rate": 7.635051546391754e-06, |
|
"loss": 1.8533, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.105269939440794, |
|
"grad_norm": 1.353126049041748, |
|
"learning_rate": 7.5835051546391755e-06, |
|
"loss": 1.8838, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 3.118154876948847, |
|
"grad_norm": 0.9796210527420044, |
|
"learning_rate": 7.531958762886599e-06, |
|
"loss": 1.8079, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.1310398144569, |
|
"grad_norm": 1.1230041980743408, |
|
"learning_rate": 7.4804123711340214e-06, |
|
"loss": 1.9103, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 3.143924751964953, |
|
"grad_norm": 1.3261069059371948, |
|
"learning_rate": 7.428865979381444e-06, |
|
"loss": 1.845, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.156809689473006, |
|
"grad_norm": 1.0127289295196533, |
|
"learning_rate": 7.377319587628866e-06, |
|
"loss": 1.8408, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 3.1696946269810593, |
|
"grad_norm": 1.1761748790740967, |
|
"learning_rate": 7.325773195876289e-06, |
|
"loss": 1.855, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.1825795644891124, |
|
"grad_norm": 1.1443302631378174, |
|
"learning_rate": 7.274226804123712e-06, |
|
"loss": 1.8234, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 3.1954645019971655, |
|
"grad_norm": 1.1420938968658447, |
|
"learning_rate": 7.222680412371135e-06, |
|
"loss": 1.9063, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.2083494395052186, |
|
"grad_norm": 0.9729594588279724, |
|
"learning_rate": 7.171134020618558e-06, |
|
"loss": 1.9181, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 3.2212343770132716, |
|
"grad_norm": 1.2192091941833496, |
|
"learning_rate": 7.11958762886598e-06, |
|
"loss": 1.9125, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.2341193145213247, |
|
"grad_norm": 1.335284948348999, |
|
"learning_rate": 7.068041237113402e-06, |
|
"loss": 1.8348, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 3.247004252029378, |
|
"grad_norm": 1.5923230648040771, |
|
"learning_rate": 7.016494845360825e-06, |
|
"loss": 1.8523, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.259889189537431, |
|
"grad_norm": 1.3718382120132446, |
|
"learning_rate": 6.964948453608248e-06, |
|
"loss": 1.8404, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 3.272774127045484, |
|
"grad_norm": 1.3347010612487793, |
|
"learning_rate": 6.9134020618556705e-06, |
|
"loss": 1.852, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.285659064553537, |
|
"grad_norm": 1.3411351442337036, |
|
"learning_rate": 6.861855670103094e-06, |
|
"loss": 1.8505, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 3.29854400206159, |
|
"grad_norm": 1.2156487703323364, |
|
"learning_rate": 6.8103092783505165e-06, |
|
"loss": 1.8589, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.3114289395696432, |
|
"grad_norm": 1.1836252212524414, |
|
"learning_rate": 6.758762886597938e-06, |
|
"loss": 1.8518, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 3.3243138770776963, |
|
"grad_norm": 1.3949558734893799, |
|
"learning_rate": 6.707216494845361e-06, |
|
"loss": 1.8459, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.3371988145857494, |
|
"grad_norm": 1.0333205461502075, |
|
"learning_rate": 6.655670103092784e-06, |
|
"loss": 1.86, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 3.3500837520938025, |
|
"grad_norm": 1.0828937292099, |
|
"learning_rate": 6.604123711340207e-06, |
|
"loss": 1.8463, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.3629686896018556, |
|
"grad_norm": 1.1059962511062622, |
|
"learning_rate": 6.552577319587629e-06, |
|
"loss": 1.8608, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 3.3758536271099087, |
|
"grad_norm": 1.025884747505188, |
|
"learning_rate": 6.501030927835053e-06, |
|
"loss": 1.8371, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.3887385646179617, |
|
"grad_norm": 1.0845879316329956, |
|
"learning_rate": 6.449484536082474e-06, |
|
"loss": 1.8205, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 3.401623502126015, |
|
"grad_norm": 0.9505090713500977, |
|
"learning_rate": 6.397938144329897e-06, |
|
"loss": 1.8467, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.414508439634068, |
|
"grad_norm": 1.1278256177902222, |
|
"learning_rate": 6.34639175257732e-06, |
|
"loss": 1.8406, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 3.427393377142121, |
|
"grad_norm": 1.0838017463684082, |
|
"learning_rate": 6.294845360824743e-06, |
|
"loss": 1.8284, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.440278314650174, |
|
"grad_norm": 1.212337851524353, |
|
"learning_rate": 6.2432989690721655e-06, |
|
"loss": 1.8202, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 3.453163252158227, |
|
"grad_norm": 1.0882956981658936, |
|
"learning_rate": 6.191752577319589e-06, |
|
"loss": 1.8493, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.4660481896662803, |
|
"grad_norm": 1.213768482208252, |
|
"learning_rate": 6.140206185567011e-06, |
|
"loss": 1.8518, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 3.4789331271743333, |
|
"grad_norm": 1.2855191230773926, |
|
"learning_rate": 6.088659793814433e-06, |
|
"loss": 1.8574, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.4918180646823864, |
|
"grad_norm": 1.0810940265655518, |
|
"learning_rate": 6.037113402061856e-06, |
|
"loss": 1.8766, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 3.5047030021904395, |
|
"grad_norm": 1.15432608127594, |
|
"learning_rate": 5.985567010309279e-06, |
|
"loss": 1.836, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.5175879396984926, |
|
"grad_norm": 1.468928337097168, |
|
"learning_rate": 5.934020618556702e-06, |
|
"loss": 1.8165, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 3.5304728772065457, |
|
"grad_norm": 1.6314187049865723, |
|
"learning_rate": 5.882474226804124e-06, |
|
"loss": 1.8725, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 3.5433578147145988, |
|
"grad_norm": 1.1987876892089844, |
|
"learning_rate": 5.830927835051546e-06, |
|
"loss": 1.8486, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 3.556242752222652, |
|
"grad_norm": 1.1263744831085205, |
|
"learning_rate": 5.779381443298969e-06, |
|
"loss": 1.8739, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.569127689730705, |
|
"grad_norm": 1.2357795238494873, |
|
"learning_rate": 5.727835051546392e-06, |
|
"loss": 1.848, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 3.582012627238758, |
|
"grad_norm": 1.228352427482605, |
|
"learning_rate": 5.6762886597938145e-06, |
|
"loss": 1.8785, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 3.594897564746811, |
|
"grad_norm": 1.0710021257400513, |
|
"learning_rate": 5.624742268041238e-06, |
|
"loss": 1.8108, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 3.607782502254864, |
|
"grad_norm": 0.9839572906494141, |
|
"learning_rate": 5.5731958762886605e-06, |
|
"loss": 1.8779, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.6206674397629173, |
|
"grad_norm": 1.1732807159423828, |
|
"learning_rate": 5.521649484536082e-06, |
|
"loss": 1.8692, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 3.6335523772709704, |
|
"grad_norm": 1.0930730104446411, |
|
"learning_rate": 5.470103092783506e-06, |
|
"loss": 1.7912, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 3.6464373147790234, |
|
"grad_norm": 1.1306408643722534, |
|
"learning_rate": 5.418556701030928e-06, |
|
"loss": 1.7992, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 3.6593222522870765, |
|
"grad_norm": 1.171573281288147, |
|
"learning_rate": 5.367010309278351e-06, |
|
"loss": 1.8286, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 3.6722071897951296, |
|
"grad_norm": 1.033572793006897, |
|
"learning_rate": 5.315463917525774e-06, |
|
"loss": 1.8168, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 3.6850921273031827, |
|
"grad_norm": 1.109149694442749, |
|
"learning_rate": 5.263917525773197e-06, |
|
"loss": 1.8229, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 3.697977064811236, |
|
"grad_norm": 1.085472822189331, |
|
"learning_rate": 5.2123711340206184e-06, |
|
"loss": 1.7883, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 3.710862002319289, |
|
"grad_norm": 1.0914117097854614, |
|
"learning_rate": 5.160824742268041e-06, |
|
"loss": 1.8375, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 3.723746939827342, |
|
"grad_norm": 1.3772042989730835, |
|
"learning_rate": 5.110309278350516e-06, |
|
"loss": 1.8259, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 3.736631877335395, |
|
"grad_norm": 0.9119631052017212, |
|
"learning_rate": 5.058762886597939e-06, |
|
"loss": 1.8679, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.749516814843448, |
|
"grad_norm": 1.1717164516448975, |
|
"learning_rate": 5.007216494845362e-06, |
|
"loss": 1.8524, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 3.762401752351501, |
|
"grad_norm": 1.131783127784729, |
|
"learning_rate": 4.955670103092784e-06, |
|
"loss": 1.8081, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 3.7752866898595543, |
|
"grad_norm": 1.1898800134658813, |
|
"learning_rate": 4.904123711340207e-06, |
|
"loss": 1.8172, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 3.7881716273676074, |
|
"grad_norm": 1.0781954526901245, |
|
"learning_rate": 4.8525773195876294e-06, |
|
"loss": 1.8365, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 3.8010565648756605, |
|
"grad_norm": 1.1128448247909546, |
|
"learning_rate": 4.801030927835052e-06, |
|
"loss": 1.8904, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 3.8139415023837135, |
|
"grad_norm": 1.0720164775848389, |
|
"learning_rate": 4.7494845360824746e-06, |
|
"loss": 1.8521, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 3.8268264398917666, |
|
"grad_norm": 1.0853550434112549, |
|
"learning_rate": 4.697938144329897e-06, |
|
"loss": 1.7686, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 3.8397113773998197, |
|
"grad_norm": 0.9527387619018555, |
|
"learning_rate": 4.64639175257732e-06, |
|
"loss": 1.8529, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 3.852596314907873, |
|
"grad_norm": 1.3065271377563477, |
|
"learning_rate": 4.594845360824743e-06, |
|
"loss": 1.8569, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 3.865481252415926, |
|
"grad_norm": 1.2607804536819458, |
|
"learning_rate": 4.543298969072165e-06, |
|
"loss": 1.8417, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.878366189923979, |
|
"grad_norm": 1.089626669883728, |
|
"learning_rate": 4.491752577319588e-06, |
|
"loss": 1.8491, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 3.891251127432032, |
|
"grad_norm": 1.2275793552398682, |
|
"learning_rate": 4.440206185567011e-06, |
|
"loss": 1.8475, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 3.904136064940085, |
|
"grad_norm": 1.2494066953659058, |
|
"learning_rate": 4.388659793814433e-06, |
|
"loss": 1.8095, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 3.9170210024481382, |
|
"grad_norm": 1.0344122648239136, |
|
"learning_rate": 4.337113402061856e-06, |
|
"loss": 1.8177, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 3.9299059399561913, |
|
"grad_norm": 1.20706307888031, |
|
"learning_rate": 4.285567010309279e-06, |
|
"loss": 1.7988, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 3.9427908774642444, |
|
"grad_norm": 1.0796010494232178, |
|
"learning_rate": 4.234020618556701e-06, |
|
"loss": 1.8232, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 3.9556758149722975, |
|
"grad_norm": 1.2336502075195312, |
|
"learning_rate": 4.1824742268041245e-06, |
|
"loss": 1.8759, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 3.9685607524803506, |
|
"grad_norm": 1.0533246994018555, |
|
"learning_rate": 4.130927835051547e-06, |
|
"loss": 1.8084, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.9814456899884036, |
|
"grad_norm": 1.1837642192840576, |
|
"learning_rate": 4.07938144329897e-06, |
|
"loss": 1.8939, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 3.9943306274964567, |
|
"grad_norm": 1.1679872274398804, |
|
"learning_rate": 4.027835051546392e-06, |
|
"loss": 1.8759, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 20.3549, |
|
"eval_gen_len": 45.3627, |
|
"eval_loss": 1.5858944654464722, |
|
"eval_runtime": 2286.6294, |
|
"eval_samples_per_second": 6.787, |
|
"eval_steps_per_second": 0.424, |
|
"step": 15522 |
|
}, |
|
{ |
|
"epoch": 4.007215565004509, |
|
"grad_norm": 1.6103421449661255, |
|
"learning_rate": 3.976288659793815e-06, |
|
"loss": 1.8672, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 4.0201005025125625, |
|
"grad_norm": 1.074686050415039, |
|
"learning_rate": 3.924742268041237e-06, |
|
"loss": 1.8157, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 4.0329854400206155, |
|
"grad_norm": 1.11213219165802, |
|
"learning_rate": 3.87319587628866e-06, |
|
"loss": 1.8017, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 4.045870377528669, |
|
"grad_norm": 1.1408663988113403, |
|
"learning_rate": 3.821649484536083e-06, |
|
"loss": 1.7926, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 4.058755315036722, |
|
"grad_norm": 1.0430666208267212, |
|
"learning_rate": 3.7701030927835054e-06, |
|
"loss": 1.7889, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 4.071640252544775, |
|
"grad_norm": 1.1866077184677124, |
|
"learning_rate": 3.718556701030928e-06, |
|
"loss": 1.7969, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 4.084525190052828, |
|
"grad_norm": 1.362838625907898, |
|
"learning_rate": 3.667010309278351e-06, |
|
"loss": 1.8444, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 4.097410127560881, |
|
"grad_norm": 1.0319502353668213, |
|
"learning_rate": 3.6154639175257735e-06, |
|
"loss": 1.8415, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 4.110295065068934, |
|
"grad_norm": 1.0608545541763306, |
|
"learning_rate": 3.563917525773196e-06, |
|
"loss": 1.8614, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 4.123180002576987, |
|
"grad_norm": 1.0609222650527954, |
|
"learning_rate": 3.512371134020619e-06, |
|
"loss": 1.8395, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.13606494008504, |
|
"grad_norm": 1.0247282981872559, |
|
"learning_rate": 3.460824742268041e-06, |
|
"loss": 1.8515, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 4.148949877593093, |
|
"grad_norm": 1.1988474130630493, |
|
"learning_rate": 3.409278350515464e-06, |
|
"loss": 1.8241, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 4.161834815101146, |
|
"grad_norm": 1.164461374282837, |
|
"learning_rate": 3.357731958762887e-06, |
|
"loss": 1.8444, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 4.1747197526091995, |
|
"grad_norm": 1.090458631515503, |
|
"learning_rate": 3.3061855670103093e-06, |
|
"loss": 1.844, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 4.187604690117253, |
|
"grad_norm": 1.4075111150741577, |
|
"learning_rate": 3.2546391752577323e-06, |
|
"loss": 1.8042, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 4.200489627625306, |
|
"grad_norm": 1.1348927021026611, |
|
"learning_rate": 3.2030927835051553e-06, |
|
"loss": 1.7964, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 4.213374565133359, |
|
"grad_norm": 1.3190233707427979, |
|
"learning_rate": 3.1515463917525774e-06, |
|
"loss": 1.8265, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 4.226259502641412, |
|
"grad_norm": 1.0522139072418213, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 1.8129, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 4.239144440149465, |
|
"grad_norm": 1.0048468112945557, |
|
"learning_rate": 3.048453608247423e-06, |
|
"loss": 1.8452, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 4.252029377657518, |
|
"grad_norm": 1.180903434753418, |
|
"learning_rate": 2.9969072164948455e-06, |
|
"loss": 1.8357, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.264914315165571, |
|
"grad_norm": 1.297938585281372, |
|
"learning_rate": 2.945360824742268e-06, |
|
"loss": 1.8436, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 4.277799252673624, |
|
"grad_norm": 1.0574970245361328, |
|
"learning_rate": 2.893814432989691e-06, |
|
"loss": 1.8493, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 4.290684190181677, |
|
"grad_norm": 1.082375168800354, |
|
"learning_rate": 2.8422680412371136e-06, |
|
"loss": 1.8424, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 4.30356912768973, |
|
"grad_norm": 0.9978700280189514, |
|
"learning_rate": 2.790721649484536e-06, |
|
"loss": 1.865, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 4.316454065197783, |
|
"grad_norm": 1.1757255792617798, |
|
"learning_rate": 2.739175257731959e-06, |
|
"loss": 1.8743, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 4.3293390027058365, |
|
"grad_norm": 1.1812617778778076, |
|
"learning_rate": 2.6876288659793813e-06, |
|
"loss": 1.8218, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 4.34222394021389, |
|
"grad_norm": 1.126605749130249, |
|
"learning_rate": 2.6360824742268043e-06, |
|
"loss": 1.8278, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 4.355108877721943, |
|
"grad_norm": 1.1985175609588623, |
|
"learning_rate": 2.5845360824742273e-06, |
|
"loss": 1.8033, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 4.367993815229996, |
|
"grad_norm": 1.1603890657424927, |
|
"learning_rate": 2.5329896907216494e-06, |
|
"loss": 1.8361, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 4.380878752738049, |
|
"grad_norm": 1.209686517715454, |
|
"learning_rate": 2.4814432989690724e-06, |
|
"loss": 1.8762, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.393763690246102, |
|
"grad_norm": 1.105088233947754, |
|
"learning_rate": 2.429896907216495e-06, |
|
"loss": 1.8506, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 4.406648627754155, |
|
"grad_norm": 1.1008309125900269, |
|
"learning_rate": 2.378350515463918e-06, |
|
"loss": 1.8256, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 4.419533565262208, |
|
"grad_norm": 1.3243298530578613, |
|
"learning_rate": 2.3268041237113405e-06, |
|
"loss": 1.8009, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 4.432418502770261, |
|
"grad_norm": 1.0257518291473389, |
|
"learning_rate": 2.275257731958763e-06, |
|
"loss": 1.8524, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 4.445303440278314, |
|
"grad_norm": 0.9655742645263672, |
|
"learning_rate": 2.2237113402061856e-06, |
|
"loss": 1.8044, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 4.458188377786367, |
|
"grad_norm": 1.0721204280853271, |
|
"learning_rate": 2.172164948453608e-06, |
|
"loss": 1.8242, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 4.47107331529442, |
|
"grad_norm": 1.227541208267212, |
|
"learning_rate": 2.120618556701031e-06, |
|
"loss": 1.8186, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 4.4839582528024735, |
|
"grad_norm": 1.0894291400909424, |
|
"learning_rate": 2.070103092783505e-06, |
|
"loss": 1.7768, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 4.496843190310527, |
|
"grad_norm": 1.004269003868103, |
|
"learning_rate": 2.0185567010309277e-06, |
|
"loss": 1.8019, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 4.50972812781858, |
|
"grad_norm": 1.1534968614578247, |
|
"learning_rate": 1.9670103092783507e-06, |
|
"loss": 1.8244, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.522613065326633, |
|
"grad_norm": 1.3757740259170532, |
|
"learning_rate": 1.9154639175257733e-06, |
|
"loss": 1.8458, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 4.535498002834686, |
|
"grad_norm": 1.184401035308838, |
|
"learning_rate": 1.8639175257731958e-06, |
|
"loss": 1.829, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 4.548382940342739, |
|
"grad_norm": 1.2132309675216675, |
|
"learning_rate": 1.8123711340206188e-06, |
|
"loss": 1.8529, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 4.561267877850792, |
|
"grad_norm": 1.0804411172866821, |
|
"learning_rate": 1.7608247422680414e-06, |
|
"loss": 1.8534, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 4.574152815358845, |
|
"grad_norm": 1.5346250534057617, |
|
"learning_rate": 1.709278350515464e-06, |
|
"loss": 1.8277, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 4.587037752866898, |
|
"grad_norm": 1.1482338905334473, |
|
"learning_rate": 1.6577319587628867e-06, |
|
"loss": 1.803, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 4.599922690374951, |
|
"grad_norm": 1.171758770942688, |
|
"learning_rate": 1.6061855670103093e-06, |
|
"loss": 1.83, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 4.612807627883004, |
|
"grad_norm": 1.3336864709854126, |
|
"learning_rate": 1.554639175257732e-06, |
|
"loss": 1.8386, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 4.625692565391057, |
|
"grad_norm": 1.0265740156173706, |
|
"learning_rate": 1.5030927835051548e-06, |
|
"loss": 1.8072, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 4.6385775028991105, |
|
"grad_norm": 0.9137164950370789, |
|
"learning_rate": 1.4515463917525774e-06, |
|
"loss": 1.8189, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.651462440407164, |
|
"grad_norm": 1.2193052768707275, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 1.8489, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 4.664347377915217, |
|
"grad_norm": 1.1527855396270752, |
|
"learning_rate": 1.348453608247423e-06, |
|
"loss": 1.7922, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 4.67723231542327, |
|
"grad_norm": 0.9622436761856079, |
|
"learning_rate": 1.2969072164948455e-06, |
|
"loss": 1.81, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 4.690117252931323, |
|
"grad_norm": 1.173771858215332, |
|
"learning_rate": 1.245360824742268e-06, |
|
"loss": 1.8375, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 4.703002190439376, |
|
"grad_norm": 1.1019172668457031, |
|
"learning_rate": 1.1938144329896908e-06, |
|
"loss": 1.836, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 4.715887127947429, |
|
"grad_norm": 1.419956088066101, |
|
"learning_rate": 1.1422680412371134e-06, |
|
"loss": 1.7774, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 4.728772065455482, |
|
"grad_norm": 0.9891812205314636, |
|
"learning_rate": 1.0907216494845362e-06, |
|
"loss": 1.7989, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 4.741657002963535, |
|
"grad_norm": 1.0100419521331787, |
|
"learning_rate": 1.039175257731959e-06, |
|
"loss": 1.8439, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 4.754541940471588, |
|
"grad_norm": 1.1432263851165771, |
|
"learning_rate": 9.876288659793815e-07, |
|
"loss": 1.8154, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 4.767426877979641, |
|
"grad_norm": 1.1132447719573975, |
|
"learning_rate": 9.360824742268042e-07, |
|
"loss": 1.8817, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.7803118154876945, |
|
"grad_norm": 1.086591362953186, |
|
"learning_rate": 8.845360824742269e-07, |
|
"loss": 1.8116, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 4.7931967529957475, |
|
"grad_norm": 1.0462530851364136, |
|
"learning_rate": 8.329896907216496e-07, |
|
"loss": 1.8432, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 4.806081690503801, |
|
"grad_norm": 1.0310077667236328, |
|
"learning_rate": 7.814432989690722e-07, |
|
"loss": 1.825, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 4.818966628011854, |
|
"grad_norm": 1.0306345224380493, |
|
"learning_rate": 7.298969072164949e-07, |
|
"loss": 1.8085, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 4.831851565519907, |
|
"grad_norm": 1.4541102647781372, |
|
"learning_rate": 6.783505154639176e-07, |
|
"loss": 1.8276, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 4.84473650302796, |
|
"grad_norm": 1.2799372673034668, |
|
"learning_rate": 6.268041237113402e-07, |
|
"loss": 1.8876, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 4.857621440536013, |
|
"grad_norm": 1.0106414556503296, |
|
"learning_rate": 5.75257731958763e-07, |
|
"loss": 1.7739, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 4.870506378044066, |
|
"grad_norm": 1.11018967628479, |
|
"learning_rate": 5.237113402061856e-07, |
|
"loss": 1.8272, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 4.883391315552119, |
|
"grad_norm": 1.587908387184143, |
|
"learning_rate": 4.7216494845360834e-07, |
|
"loss": 1.8069, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 4.896276253060172, |
|
"grad_norm": 1.1059330701828003, |
|
"learning_rate": 4.2061855670103096e-07, |
|
"loss": 1.85, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.909161190568225, |
|
"grad_norm": 1.2126922607421875, |
|
"learning_rate": 3.690721649484536e-07, |
|
"loss": 1.8446, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 4.922046128076278, |
|
"grad_norm": 1.0558154582977295, |
|
"learning_rate": 3.1752577319587635e-07, |
|
"loss": 1.8169, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 4.9349310655843315, |
|
"grad_norm": 1.0007387399673462, |
|
"learning_rate": 2.65979381443299e-07, |
|
"loss": 1.813, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 4.947816003092385, |
|
"grad_norm": 1.1500272750854492, |
|
"learning_rate": 2.1443298969072168e-07, |
|
"loss": 1.8094, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 4.960700940600438, |
|
"grad_norm": 1.0796419382095337, |
|
"learning_rate": 1.6288659793814433e-07, |
|
"loss": 1.7628, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 4.973585878108491, |
|
"grad_norm": 1.1715208292007446, |
|
"learning_rate": 1.1134020618556701e-07, |
|
"loss": 1.8339, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 4.986470815616544, |
|
"grad_norm": 1.0896481275558472, |
|
"learning_rate": 5.97938144329897e-08, |
|
"loss": 1.8318, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 4.999355753124597, |
|
"grad_norm": 1.2462085485458374, |
|
"learning_rate": 8.247422680412371e-09, |
|
"loss": 1.8287, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 4.999355753124597, |
|
"eval_bleu": 20.4616, |
|
"eval_gen_len": 45.2528, |
|
"eval_loss": 1.578300952911377, |
|
"eval_runtime": 2280.7257, |
|
"eval_samples_per_second": 6.805, |
|
"eval_steps_per_second": 0.425, |
|
"step": 19400 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 19400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.497673924411392e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|