|
{ |
|
"best_metric": 1.4905033111572266, |
|
"best_model_checkpoint": "./checkpoints/mbarthez-davide_articles-copy_enhanced/checkpoint-100656", |
|
"epoch": 3.0, |
|
"global_step": 100656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9971685741535528e-05, |
|
"loss": 2.9916, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9941881258941343e-05, |
|
"loss": 2.4185, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9912374821173107e-05, |
|
"loss": 2.336, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9882570338578922e-05, |
|
"loss": 2.2868, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.985276585598474e-05, |
|
"loss": 2.2529, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.98232594182165e-05, |
|
"loss": 2.2467, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9793454935622317e-05, |
|
"loss": 2.1735, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9763650453028136e-05, |
|
"loss": 2.2049, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9734442060085837e-05, |
|
"loss": 2.1168, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9704637577491656e-05, |
|
"loss": 2.1353, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9674833094897475e-05, |
|
"loss": 2.0458, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.964502861230329e-05, |
|
"loss": 2.1209, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.961552217453505e-05, |
|
"loss": 2.0867, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.958571769194087e-05, |
|
"loss": 2.028, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9555913209346685e-05, |
|
"loss": 2.0236, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9526108726752504e-05, |
|
"loss": 2.1211, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9496602288984264e-05, |
|
"loss": 2.0742, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9466797806390083e-05, |
|
"loss": 2.0753, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.94369933237959e-05, |
|
"loss": 2.0278, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9407188841201717e-05, |
|
"loss": 2.0408, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9377384358607533e-05, |
|
"loss": 2.0124, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9347579876013352e-05, |
|
"loss": 1.9951, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.931777539341917e-05, |
|
"loss": 1.9751, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.928826895565093e-05, |
|
"loss": 2.0204, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.925846447305675e-05, |
|
"loss": 1.9919, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9228659990462566e-05, |
|
"loss": 2.0126, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.919885550786838e-05, |
|
"loss": 2.0276, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.91690510252742e-05, |
|
"loss": 2.0272, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.913924654268002e-05, |
|
"loss": 1.9608, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.910944206008584e-05, |
|
"loss": 2.0019, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9079637577491654e-05, |
|
"loss": 1.9454, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9049833094897473e-05, |
|
"loss": 1.9716, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9020028612303292e-05, |
|
"loss": 1.9543, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8990224129709108e-05, |
|
"loss": 1.9659, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8960419647114927e-05, |
|
"loss": 1.9729, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8930615164520746e-05, |
|
"loss": 1.992, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8900810681926565e-05, |
|
"loss": 1.9147, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.887100619933238e-05, |
|
"loss": 1.8892, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.88412017167382e-05, |
|
"loss": 1.941, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.881139723414402e-05, |
|
"loss": 1.9463, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.878159275154983e-05, |
|
"loss": 1.9645, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.875178826895565e-05, |
|
"loss": 1.9414, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.872198378636147e-05, |
|
"loss": 1.9317, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8692179303767288e-05, |
|
"loss": 1.8677, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8662374821173103e-05, |
|
"loss": 1.9439, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8632570338578922e-05, |
|
"loss": 1.8576, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.860276585598474e-05, |
|
"loss": 1.9448, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8572961373390557e-05, |
|
"loss": 1.8824, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8543156890796376e-05, |
|
"loss": 1.9521, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8513352408202195e-05, |
|
"loss": 1.9182, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8483547925608014e-05, |
|
"loss": 1.941, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.845374344301383e-05, |
|
"loss": 1.9335, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.842393896041965e-05, |
|
"loss": 1.9182, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8394134477825468e-05, |
|
"loss": 1.9045, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8364329995231287e-05, |
|
"loss": 1.8742, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.83345255126371e-05, |
|
"loss": 1.8348, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8304721030042918e-05, |
|
"loss": 1.8538, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8274916547448737e-05, |
|
"loss": 1.8849, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8245112064854553e-05, |
|
"loss": 1.9078, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.821530758226037e-05, |
|
"loss": 1.8603, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.818550309966619e-05, |
|
"loss": 1.8936, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.815569861707201e-05, |
|
"loss": 1.8688, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8125894134477825e-05, |
|
"loss": 1.8588, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8096089651883644e-05, |
|
"loss": 1.8781, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8066285169289463e-05, |
|
"loss": 1.8708, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.803648068669528e-05, |
|
"loss": 1.8755, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.8006676204101098e-05, |
|
"loss": 1.9063, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7976871721506917e-05, |
|
"loss": 1.8432, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7947067238912736e-05, |
|
"loss": 1.8905, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.791726275631855e-05, |
|
"loss": 1.8297, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7887458273724367e-05, |
|
"loss": 1.8125, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7857653791130186e-05, |
|
"loss": 1.8585, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7827849308536002e-05, |
|
"loss": 1.8668, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.779804482594182e-05, |
|
"loss": 1.8574, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.776824034334764e-05, |
|
"loss": 1.8183, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.773843586075346e-05, |
|
"loss": 1.8725, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7708631378159275e-05, |
|
"loss": 1.8529, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7678826895565094e-05, |
|
"loss": 1.8526, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.7649022412970913e-05, |
|
"loss": 1.8428, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.7619217930376728e-05, |
|
"loss": 1.845, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.7589711492608488e-05, |
|
"loss": 1.8325, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.7559907010014307e-05, |
|
"loss": 1.821, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7530400572246067e-05, |
|
"loss": 1.9171, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7500596089651886e-05, |
|
"loss": 1.868, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7470791607057705e-05, |
|
"loss": 1.8178, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.7440987124463517e-05, |
|
"loss": 1.8377, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.7411182641869336e-05, |
|
"loss": 1.8537, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.7381378159275155e-05, |
|
"loss": 1.845, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.735157367668097e-05, |
|
"loss": 1.8144, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.732176919408679e-05, |
|
"loss": 1.8261, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.729196471149261e-05, |
|
"loss": 1.8569, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.7262160228898428e-05, |
|
"loss": 1.8529, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.7232355746304244e-05, |
|
"loss": 1.7626, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.7202551263710063e-05, |
|
"loss": 1.8715, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.7172746781115882e-05, |
|
"loss": 1.7882, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.7142942298521697e-05, |
|
"loss": 1.8134, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.7113137815927516e-05, |
|
"loss": 1.7804, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.7083333333333335e-05, |
|
"loss": 1.8603, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.7053826895565092e-05, |
|
"loss": 1.7646, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.702402241297091e-05, |
|
"loss": 1.8108, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.699421793037673e-05, |
|
"loss": 1.7624, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.696441344778255e-05, |
|
"loss": 1.746, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.6934608965188365e-05, |
|
"loss": 1.7821, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.6904804482594184e-05, |
|
"loss": 1.8216, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.6875000000000003e-05, |
|
"loss": 1.8033, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.684519551740582e-05, |
|
"loss": 1.7954, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6815391034811637e-05, |
|
"loss": 1.8692, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6785586552217456e-05, |
|
"loss": 1.789, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6755782069623272e-05, |
|
"loss": 1.7989, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6726275631855032e-05, |
|
"loss": 1.7977, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.669647114926085e-05, |
|
"loss": 1.7815, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.7803, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.6636862184072486e-05, |
|
"loss": 1.7956, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.6607057701478305e-05, |
|
"loss": 1.8058, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.6577253218884124e-05, |
|
"loss": 1.8262, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.654744873628994e-05, |
|
"loss": 1.8004, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.6517644253695755e-05, |
|
"loss": 1.7565, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.6487839771101574e-05, |
|
"loss": 1.8006, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.645803528850739e-05, |
|
"loss": 1.8004, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.642823080591321e-05, |
|
"loss": 1.7985, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.6398426323319028e-05, |
|
"loss": 1.7983, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.6368621840724847e-05, |
|
"loss": 1.7755, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.6338817358130662e-05, |
|
"loss": 1.8261, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.630901287553648e-05, |
|
"loss": 1.8139, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.62792083929423e-05, |
|
"loss": 1.7813, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6249403910348116e-05, |
|
"loss": 1.7667, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6219599427753935e-05, |
|
"loss": 1.7622, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6189794945159754e-05, |
|
"loss": 1.7805, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6159990462565573e-05, |
|
"loss": 1.7998, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.613018597997139e-05, |
|
"loss": 1.7419, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.6100381497377204e-05, |
|
"loss": 1.7531, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.6070577014783023e-05, |
|
"loss": 1.7852, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.6040772532188842e-05, |
|
"loss": 1.7967, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.6010968049594658e-05, |
|
"loss": 1.7887, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.5981163567000477e-05, |
|
"loss": 1.7465, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.5951359084406296e-05, |
|
"loss": 1.7776, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.592155460181211e-05, |
|
"loss": 1.7584, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.589175011921793e-05, |
|
"loss": 1.7761, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.586194563662375e-05, |
|
"loss": 1.7238, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5832439198855506e-05, |
|
"loss": 1.7991, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5802634716261325e-05, |
|
"loss": 1.7551, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5772830233667144e-05, |
|
"loss": 1.8406, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.574302575107296e-05, |
|
"loss": 1.7478, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.571322126847878e-05, |
|
"loss": 1.8065, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5683416785884598e-05, |
|
"loss": 1.8075, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.5653612303290417e-05, |
|
"loss": 1.7356, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.5623807820696232e-05, |
|
"loss": 1.7674, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.559400333810205e-05, |
|
"loss": 1.7419, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.556419885550787e-05, |
|
"loss": 1.7297, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.5534394372913686e-05, |
|
"loss": 1.7576, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.5504589890319505e-05, |
|
"loss": 1.8, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.5474785407725324e-05, |
|
"loss": 1.7792, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.5444980925131143e-05, |
|
"loss": 1.739, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.5415176442536955e-05, |
|
"loss": 1.7421, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.5385371959942774e-05, |
|
"loss": 1.7634, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.5355567477348593e-05, |
|
"loss": 1.8138, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.5325762994754413e-05, |
|
"loss": 1.6962, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.5295958512160228e-05, |
|
"loss": 1.7461, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.5266154029566047e-05, |
|
"loss": 1.7672, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5236647591797807e-05, |
|
"loss": 1.7314, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5206843109203626e-05, |
|
"loss": 1.7327, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5177038626609442e-05, |
|
"loss": 1.702, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.514723414401526e-05, |
|
"loss": 1.7768, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5117429661421076e-05, |
|
"loss": 1.7364, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5087625178826895e-05, |
|
"loss": 1.7584, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5057820696232714e-05, |
|
"loss": 1.7383, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.502801621363853e-05, |
|
"loss": 1.7418, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.499821173104435e-05, |
|
"loss": 1.6813, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4968407248450168e-05, |
|
"loss": 1.7484, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4938900810681925e-05, |
|
"loss": 1.7323, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4909096328087744e-05, |
|
"loss": 1.7436, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4879291845493563e-05, |
|
"loss": 1.7571, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4849487362899378e-05, |
|
"loss": 1.7422, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4819682880305197e-05, |
|
"loss": 1.783, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4789878397711016e-05, |
|
"loss": 1.7675, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4760073915116835e-05, |
|
"loss": 1.7611, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.473026943252265e-05, |
|
"loss": 1.7964, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.470046494992847e-05, |
|
"loss": 1.681, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.467066046733429e-05, |
|
"loss": 1.6863, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.4640855984740108e-05, |
|
"loss": 1.725, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.4611051502145924e-05, |
|
"loss": 1.7158, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.4581247019551743e-05, |
|
"loss": 1.7756, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.455144253695756e-05, |
|
"loss": 1.7627, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4521638054363374e-05, |
|
"loss": 1.7381, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4491833571769193e-05, |
|
"loss": 1.7606, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4462029089175012e-05, |
|
"loss": 1.7298, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.443222460658083e-05, |
|
"loss": 1.7116, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4402420123986647e-05, |
|
"loss": 1.775, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4372615641392466e-05, |
|
"loss": 1.7324, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4342811158798285e-05, |
|
"loss": 1.7449, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.43130066762041e-05, |
|
"loss": 1.7271, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.428320219360992e-05, |
|
"loss": 1.7374, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.425339771101574e-05, |
|
"loss": 1.7579, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.4223593228421557e-05, |
|
"loss": 1.7903, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.4193788745827373e-05, |
|
"loss": 1.7057, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.4163984263233192e-05, |
|
"loss": 1.7083, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.413417978063901e-05, |
|
"loss": 1.7556, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.4104375298044827e-05, |
|
"loss": 1.6617, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.4074570815450642e-05, |
|
"loss": 1.7244, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.404476633285646e-05, |
|
"loss": 1.7108, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.401496185026228e-05, |
|
"loss": 1.7411, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.3985157367668096e-05, |
|
"loss": 1.735, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.3955352885073915e-05, |
|
"loss": 1.7065, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.3925548402479734e-05, |
|
"loss": 1.7175, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.389574391988555e-05, |
|
"loss": 1.6863, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.386593943729137e-05, |
|
"loss": 1.6851, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.3836134954697188e-05, |
|
"loss": 1.6974, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.3806330472103007e-05, |
|
"loss": 1.7211, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.3776525989508822e-05, |
|
"loss": 1.7042, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.374672150691464e-05, |
|
"loss": 1.7337, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.371691702432046e-05, |
|
"loss": 1.7224, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3687112541726276e-05, |
|
"loss": 1.6624, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3657308059132095e-05, |
|
"loss": 1.6704, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.362750357653791e-05, |
|
"loss": 1.6547, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.359769909394373e-05, |
|
"loss": 1.6913, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.3567894611349545e-05, |
|
"loss": 1.7347, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.3538090128755364e-05, |
|
"loss": 1.6806, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.3508285646161183e-05, |
|
"loss": 1.6834, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.3478481163567002e-05, |
|
"loss": 1.7367, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.3448676680972818e-05, |
|
"loss": 1.6755, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.3418872198378637e-05, |
|
"loss": 1.7169, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.3389067715784456e-05, |
|
"loss": 1.7314, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.335926323319027e-05, |
|
"loss": 1.7077, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.332945875059609e-05, |
|
"loss": 1.7225, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.329965426800191e-05, |
|
"loss": 1.6787, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.326984978540773e-05, |
|
"loss": 1.6933, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.3240045302813544e-05, |
|
"loss": 1.7114, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.3210240820219363e-05, |
|
"loss": 1.7037, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.318043633762518e-05, |
|
"loss": 1.7217, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3150631855030995e-05, |
|
"loss": 1.7062, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3120827372436814e-05, |
|
"loss": 1.6969, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3091022889842633e-05, |
|
"loss": 1.693, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.306121840724845e-05, |
|
"loss": 1.6718, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.3031413924654267e-05, |
|
"loss": 1.6782, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.3001609442060086e-05, |
|
"loss": 1.6719, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.2971804959465905e-05, |
|
"loss": 1.6695, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.294200047687172e-05, |
|
"loss": 1.6336, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.291219599427754e-05, |
|
"loss": 1.6949, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.288239151168336e-05, |
|
"loss": 1.6912, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.2852587029089178e-05, |
|
"loss": 1.6374, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.2822782546494994e-05, |
|
"loss": 1.6663, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.2792978063900813e-05, |
|
"loss": 1.7008, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.276317358130663e-05, |
|
"loss": 1.6883, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2733369098712444e-05, |
|
"loss": 1.6593, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2703564616118263e-05, |
|
"loss": 1.6847, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2673760133524082e-05, |
|
"loss": 1.6684, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.26439556509299e-05, |
|
"loss": 1.6365, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.2614151168335717e-05, |
|
"loss": 1.6649, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.2584346685741536e-05, |
|
"loss": 1.6906, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.2554542203147355e-05, |
|
"loss": 1.6493, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.252473772055317e-05, |
|
"loss": 1.6709, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.249493323795899e-05, |
|
"loss": 1.6905, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.2465128755364808e-05, |
|
"loss": 1.6655, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.2435324272770627e-05, |
|
"loss": 1.6957, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.2405519790176443e-05, |
|
"loss": 1.6703, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.2376013352408203e-05, |
|
"loss": 1.7062, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2346208869814022e-05, |
|
"loss": 1.6806, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2316404387219837e-05, |
|
"loss": 1.6701, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2286599904625657e-05, |
|
"loss": 1.6538, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2256795422031476e-05, |
|
"loss": 1.6635, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.222699093943729e-05, |
|
"loss": 1.6629, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.219718645684311e-05, |
|
"loss": 1.7256, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.216738197424893e-05, |
|
"loss": 1.6586, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2137577491654748e-05, |
|
"loss": 1.6814, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2107773009060564e-05, |
|
"loss": 1.6326, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.207796852646638e-05, |
|
"loss": 1.6973, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.20481640438722e-05, |
|
"loss": 1.6486, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.2018359561278014e-05, |
|
"loss": 1.6419, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1988555078683833e-05, |
|
"loss": 1.665, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1958750596089652e-05, |
|
"loss": 1.6391, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.192894611349547e-05, |
|
"loss": 1.6599, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.1899141630901287e-05, |
|
"loss": 1.6657, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.1869337148307106e-05, |
|
"loss": 1.651, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.1839532665712925e-05, |
|
"loss": 1.6894, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.180972818311874e-05, |
|
"loss": 1.6259, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.177992370052456e-05, |
|
"loss": 1.6694, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.175011921793038e-05, |
|
"loss": 1.7037, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.1720314735336198e-05, |
|
"loss": 1.6759, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.1690808297567954e-05, |
|
"loss": 1.6561, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.1661003814973773e-05, |
|
"loss": 1.6251, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.1631199332379592e-05, |
|
"loss": 1.6564, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.1601394849785408e-05, |
|
"loss": 1.5827, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.1571590367191227e-05, |
|
"loss": 1.6458, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.1541785884597046e-05, |
|
"loss": 1.6513, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.151198140200286e-05, |
|
"loss": 1.6328, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.148217691940868e-05, |
|
"loss": 1.6608, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.14523724368145e-05, |
|
"loss": 1.6601, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.142256795422032e-05, |
|
"loss": 1.6835, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.139276347162613e-05, |
|
"loss": 1.644, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.136295898903195e-05, |
|
"loss": 1.647, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.133315450643777e-05, |
|
"loss": 1.6461, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.1303350023843584e-05, |
|
"loss": 1.6605, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.1273545541249403e-05, |
|
"loss": 1.693, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1243741058655222e-05, |
|
"loss": 1.6356, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.121393657606104e-05, |
|
"loss": 1.6449, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1184132093466857e-05, |
|
"loss": 1.628, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.1154327610872676e-05, |
|
"loss": 1.6563, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.1124523128278495e-05, |
|
"loss": 1.6084, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.109471864568431e-05, |
|
"loss": 1.627, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.106491416309013e-05, |
|
"loss": 1.6689, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.103510968049595e-05, |
|
"loss": 1.6247, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.1005305197901768e-05, |
|
"loss": 1.6514, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0975500715307583e-05, |
|
"loss": 1.6428, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0945994277539343e-05, |
|
"loss": 1.6138, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0916189794945162e-05, |
|
"loss": 1.6405, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0886385312350978e-05, |
|
"loss": 1.6526, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0856580829756797e-05, |
|
"loss": 1.6827, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.0826776347162616e-05, |
|
"loss": 1.664, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.079697186456843e-05, |
|
"loss": 1.635, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.076716738197425e-05, |
|
"loss": 1.6359, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0737362899380066e-05, |
|
"loss": 1.6109, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0707558416785885e-05, |
|
"loss": 1.5975, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.06777539341917e-05, |
|
"loss": 1.6295, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.064794945159752e-05, |
|
"loss": 1.659, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.061814496900334e-05, |
|
"loss": 1.6064, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0588340486409155e-05, |
|
"loss": 1.646, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0558536003814974e-05, |
|
"loss": 1.6404, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.0528731521220793e-05, |
|
"loss": 1.6281, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.049892703862661e-05, |
|
"loss": 1.6216, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.0469122556032427e-05, |
|
"loss": 1.6398, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0439318073438246e-05, |
|
"loss": 1.67, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0409513590844065e-05, |
|
"loss": 1.619, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.037970910824988e-05, |
|
"loss": 1.6067, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.03499046256557e-05, |
|
"loss": 1.6079, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.032010014306152e-05, |
|
"loss": 1.6647, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.0290295660467335e-05, |
|
"loss": 1.6308, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.026049117787315e-05, |
|
"loss": 1.6525, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.023068669527897e-05, |
|
"loss": 1.6212, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.020118025751073e-05, |
|
"loss": 1.6845, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.0171375774916548e-05, |
|
"loss": 1.6324, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0141571292322367e-05, |
|
"loss": 1.6024, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0111766809728186e-05, |
|
"loss": 1.6687, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0081962327134002e-05, |
|
"loss": 1.6338, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.0052157844539818e-05, |
|
"loss": 1.6172, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.0022353361945637e-05, |
|
"loss": 1.6706, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 18.6217, |
|
"eval_loss": 1.5689729452133179, |
|
"eval_rouge1": 31.2477, |
|
"eval_rouge2": 16.5455, |
|
"eval_rougeL": 26.9855, |
|
"eval_rougeLsum": 26.9754, |
|
"eval_runtime": 291.6493, |
|
"eval_samples_per_second": 9.299, |
|
"eval_steps_per_second": 1.162, |
|
"step": 33552 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.9992548879351456e-05, |
|
"loss": 1.5373, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.996274439675727e-05, |
|
"loss": 1.5053, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.993293991416309e-05, |
|
"loss": 1.4423, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.990313543156891e-05, |
|
"loss": 1.4924, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.9873330948974725e-05, |
|
"loss": 1.5381, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.9843526466380544e-05, |
|
"loss": 1.4693, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.9813721983786363e-05, |
|
"loss": 1.4748, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.9783917501192182e-05, |
|
"loss": 1.5042, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.9754113018597998e-05, |
|
"loss": 1.4755, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.9724308536003817e-05, |
|
"loss": 1.4685, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.9694504053409636e-05, |
|
"loss": 1.5157, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.966469957081545e-05, |
|
"loss": 1.4362, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.963489508822127e-05, |
|
"loss": 1.4859, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.9605090605627086e-05, |
|
"loss": 1.4786, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.9575286123032905e-05, |
|
"loss": 1.4813, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.954548164043872e-05, |
|
"loss": 1.4775, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.951567715784454e-05, |
|
"loss": 1.5173, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.948587267525036e-05, |
|
"loss": 1.5366, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.9456068192656174e-05, |
|
"loss": 1.4664, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.9426263710061993e-05, |
|
"loss": 1.5118, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.9396459227467812e-05, |
|
"loss": 1.4968, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.936665474487363e-05, |
|
"loss": 1.4721, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9336850262279447e-05, |
|
"loss": 1.5235, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9307045779685266e-05, |
|
"loss": 1.4908, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9277241297091085e-05, |
|
"loss": 1.4354, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.92474368144969e-05, |
|
"loss": 1.517, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.921763233190272e-05, |
|
"loss": 1.4848, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.9187827849308535e-05, |
|
"loss": 1.5129, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.9158023366714354e-05, |
|
"loss": 1.4799, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.912821888412017e-05, |
|
"loss": 1.4383, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.909841440152599e-05, |
|
"loss": 1.4719, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.9068609918931808e-05, |
|
"loss": 1.4657, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.9038805436337623e-05, |
|
"loss": 1.5345, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.9009000953743443e-05, |
|
"loss": 1.4622, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.897919647114926e-05, |
|
"loss": 1.4856, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.894939198855508e-05, |
|
"loss": 1.4691, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.8919587505960896e-05, |
|
"loss": 1.4761, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.8889783023366715e-05, |
|
"loss": 1.5007, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.8859978540772534e-05, |
|
"loss": 1.4755, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.8830174058178353e-05, |
|
"loss": 1.5022, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.880036957558417e-05, |
|
"loss": 1.4647, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.8770565092989988e-05, |
|
"loss": 1.4652, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.8740760610395804e-05, |
|
"loss": 1.4751, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.871095612780162e-05, |
|
"loss": 1.4767, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.8681151645207438e-05, |
|
"loss": 1.5161, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.8651347162613257e-05, |
|
"loss": 1.4369, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.8621542680019076e-05, |
|
"loss": 1.4612, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.8591738197424892e-05, |
|
"loss": 1.4817, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.856193371483071e-05, |
|
"loss": 1.4727, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.853212923223653e-05, |
|
"loss": 1.4477, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.8502324749642345e-05, |
|
"loss": 1.4775, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.8472520267048165e-05, |
|
"loss": 1.484, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8442715784453984e-05, |
|
"loss": 1.4885, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8412911301859803e-05, |
|
"loss": 1.5263, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8383106819265618e-05, |
|
"loss": 1.4864, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8353302336671437e-05, |
|
"loss": 1.5276, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8323497854077256e-05, |
|
"loss": 1.5265, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.829369337148307e-05, |
|
"loss": 1.4731, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8263888888888887e-05, |
|
"loss": 1.4715, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.8234084406294706e-05, |
|
"loss": 1.5026, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.8204279923700526e-05, |
|
"loss": 1.4841, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.817447544110634e-05, |
|
"loss": 1.4688, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.814467095851216e-05, |
|
"loss": 1.4782, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.811486647591798e-05, |
|
"loss": 1.4734, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.8085061993323795e-05, |
|
"loss": 1.4915, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.8055257510729614e-05, |
|
"loss": 1.4493, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.8025453028135433e-05, |
|
"loss": 1.5045, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.7995648545541252e-05, |
|
"loss": 1.4681, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.7965844062947067e-05, |
|
"loss": 1.5133, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.7936039580352887e-05, |
|
"loss": 1.4748, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.7906235097758706e-05, |
|
"loss": 1.5346, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.787643061516452e-05, |
|
"loss": 1.4709, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.7846626132570337e-05, |
|
"loss": 1.4431, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.7816821649976156e-05, |
|
"loss": 1.5095, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.7787017167381975e-05, |
|
"loss": 1.4872, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.775721268478779e-05, |
|
"loss": 1.529, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.772740820219361e-05, |
|
"loss": 1.4687, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.769760371959943e-05, |
|
"loss": 1.4696, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.7667799237005248e-05, |
|
"loss": 1.457, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.7637994754411063e-05, |
|
"loss": 1.5051, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.7608190271816882e-05, |
|
"loss": 1.4694, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.75783857892227e-05, |
|
"loss": 1.4556, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.7548581306628517e-05, |
|
"loss": 1.4872, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.7518776824034336e-05, |
|
"loss": 1.4489, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.7488972341440155e-05, |
|
"loss": 1.4892, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.7459167858845974e-05, |
|
"loss": 1.4869, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.742936337625179e-05, |
|
"loss": 1.4624, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.7399558893657605e-05, |
|
"loss": 1.4492, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.7369754411063424e-05, |
|
"loss": 1.4306, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.733994992846924e-05, |
|
"loss": 1.4595, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.731014544587506e-05, |
|
"loss": 1.4703, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.7280340963280878e-05, |
|
"loss": 1.4962, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7250536480686697e-05, |
|
"loss": 1.4476, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7220731998092512e-05, |
|
"loss": 1.4674, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.719092751549833e-05, |
|
"loss": 1.4704, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.716112303290415e-05, |
|
"loss": 1.5435, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.7131318550309966e-05, |
|
"loss": 1.5339, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.7101514067715785e-05, |
|
"loss": 1.4905, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.7071709585121604e-05, |
|
"loss": 1.4978, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7041905102527423e-05, |
|
"loss": 1.4478, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.701239866475918e-05, |
|
"loss": 1.5059, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.6982594182165e-05, |
|
"loss": 1.4643, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.6953087744396755e-05, |
|
"loss": 1.5011, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.6923283261802574e-05, |
|
"loss": 1.4895, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.6893478779208393e-05, |
|
"loss": 1.4306, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.686367429661421e-05, |
|
"loss": 1.5001, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.6834167858845972e-05, |
|
"loss": 1.4506, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.680436337625179e-05, |
|
"loss": 1.4456, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.6774558893657607e-05, |
|
"loss": 1.5038, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.6744754411063426e-05, |
|
"loss": 1.5082, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.671494992846924e-05, |
|
"loss": 1.5055, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.6685145445875057e-05, |
|
"loss": 1.4697, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6655340963280876e-05, |
|
"loss": 1.501, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6625536480686695e-05, |
|
"loss": 1.4994, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6595731998092514e-05, |
|
"loss": 1.4899, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.656592751549833e-05, |
|
"loss": 1.4819, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.653612303290415e-05, |
|
"loss": 1.4529, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.6506318550309968e-05, |
|
"loss": 1.4849, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.6476514067715784e-05, |
|
"loss": 1.4587, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.6446709585121603e-05, |
|
"loss": 1.4595, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.641690510252742e-05, |
|
"loss": 1.4292, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.638710061993324e-05, |
|
"loss": 1.4624, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.6357296137339056e-05, |
|
"loss": 1.5235, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.6327491654744875e-05, |
|
"loss": 1.5378, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.6297687172150694e-05, |
|
"loss": 1.4633, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.626788268955651e-05, |
|
"loss": 1.4387, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.6238078206962326e-05, |
|
"loss": 1.4335, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.6208273724368145e-05, |
|
"loss": 1.4708, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.6178469241773964e-05, |
|
"loss": 1.4633, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.6148962804005723e-05, |
|
"loss": 1.5035, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.6119158321411542e-05, |
|
"loss": 1.4553, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.608935383881736e-05, |
|
"loss": 1.4737, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.6059549356223177e-05, |
|
"loss": 1.4676, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.6029744873628993e-05, |
|
"loss": 1.4482, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.5999940391034812e-05, |
|
"loss": 1.4381, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.5970135908440627e-05, |
|
"loss": 1.4099, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.5940331425846446e-05, |
|
"loss": 1.4595, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.5910526943252265e-05, |
|
"loss": 1.4551, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.5880722460658084e-05, |
|
"loss": 1.4629, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.58509179780639e-05, |
|
"loss": 1.4616, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.582111349546972e-05, |
|
"loss": 1.4742, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.5791309012875538e-05, |
|
"loss": 1.4735, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.5761504530281354e-05, |
|
"loss": 1.4289, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5731700047687173e-05, |
|
"loss": 1.4924, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5701895565092992e-05, |
|
"loss": 1.4643, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.567209108249881e-05, |
|
"loss": 1.4499, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.5642286599904626e-05, |
|
"loss": 1.5153, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.5612482117310442e-05, |
|
"loss": 1.4627, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.558267763471626e-05, |
|
"loss": 1.426, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.555317119694802e-05, |
|
"loss": 1.4213, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.552336671435384e-05, |
|
"loss": 1.4519, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.549356223175966e-05, |
|
"loss": 1.4877, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.5463757749165475e-05, |
|
"loss": 1.488, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.5433953266571294e-05, |
|
"loss": 1.4738, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.5404148783977113e-05, |
|
"loss": 1.4912, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.537434430138293e-05, |
|
"loss": 1.4391, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.5344539818788744e-05, |
|
"loss": 1.4983, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.5314735336194563e-05, |
|
"loss": 1.411, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.5284930853600382e-05, |
|
"loss": 1.5083, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.5255126371006198e-05, |
|
"loss": 1.4399, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.5225321888412017e-05, |
|
"loss": 1.4691, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.5195517405817836e-05, |
|
"loss": 1.4732, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.5165712923223655e-05, |
|
"loss": 1.5002, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.513590844062947e-05, |
|
"loss": 1.4984, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.510610395803529e-05, |
|
"loss": 1.4538, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.5076299475441108e-05, |
|
"loss": 1.4828, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.5046494992846922e-05, |
|
"loss": 1.5021, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.5016690510252741e-05, |
|
"loss": 1.4735, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.498688602765856e-05, |
|
"loss": 1.4508, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4957081545064378e-05, |
|
"loss": 1.5091, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4927277062470197e-05, |
|
"loss": 1.4725, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4897472579876014e-05, |
|
"loss": 1.4495, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4867668097281831e-05, |
|
"loss": 1.4596, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.483786361468765e-05, |
|
"loss": 1.4447, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.4808059132093466e-05, |
|
"loss": 1.4665, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.4778254649499285e-05, |
|
"loss": 1.4793, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4748450166905102e-05, |
|
"loss": 1.4722, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4718645684310921e-05, |
|
"loss": 1.4474, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4688841201716739e-05, |
|
"loss": 1.4295, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4659036719122558e-05, |
|
"loss": 1.4441, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.4629232236528375e-05, |
|
"loss": 1.4148, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.459942775393419e-05, |
|
"loss": 1.4457, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.456962327134001e-05, |
|
"loss": 1.4851, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4539818788745827e-05, |
|
"loss": 1.4864, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4510014306151646e-05, |
|
"loss": 1.4926, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4480209823557463e-05, |
|
"loss": 1.4553, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.4450405340963282e-05, |
|
"loss": 1.4744, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.44206008583691e-05, |
|
"loss": 1.4624, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.4390796375774915e-05, |
|
"loss": 1.4483, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.4360991893180734e-05, |
|
"loss": 1.4569, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.4331187410586552e-05, |
|
"loss": 1.4262, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.430138292799237e-05, |
|
"loss": 1.4517, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.4271578445398188e-05, |
|
"loss": 1.4496, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.4241773962804007e-05, |
|
"loss": 1.4519, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.4211969480209824e-05, |
|
"loss": 1.4407, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.4182164997615643e-05, |
|
"loss": 1.4715, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.4152360515021459e-05, |
|
"loss": 1.4313, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.4122556032427276e-05, |
|
"loss": 1.4798, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.4092751549833095e-05, |
|
"loss": 1.4386, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.4062947067238913e-05, |
|
"loss": 1.4592, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.4033142584644732e-05, |
|
"loss": 1.4857, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.4003338102050549e-05, |
|
"loss": 1.463, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3973533619456368e-05, |
|
"loss": 1.4752, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3943729136862184e-05, |
|
"loss": 1.4524, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3913924654268001e-05, |
|
"loss": 1.4677, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.388412017167382e-05, |
|
"loss": 1.48, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3854315689079637e-05, |
|
"loss": 1.4365, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3824511206485456e-05, |
|
"loss": 1.485, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3794706723891274e-05, |
|
"loss": 1.4921, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3764902241297093e-05, |
|
"loss": 1.4118, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.373509775870291e-05, |
|
"loss": 1.4373, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3705293276108726e-05, |
|
"loss": 1.4557, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3675488793514545e-05, |
|
"loss": 1.4653, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3645684310920362e-05, |
|
"loss": 1.4442, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3615879828326181e-05, |
|
"loss": 1.4891, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3586075345731998e-05, |
|
"loss": 1.4398, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3556270863137817e-05, |
|
"loss": 1.4589, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3526466380543635e-05, |
|
"loss": 1.4547, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3496661897949452e-05, |
|
"loss": 1.4411, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.346685741535527e-05, |
|
"loss": 1.4369, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3437052932761087e-05, |
|
"loss": 1.4799, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3407546494992847e-05, |
|
"loss": 1.4088, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3377742012398666e-05, |
|
"loss": 1.5022, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3347937529804483e-05, |
|
"loss": 1.454, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3318133047210302e-05, |
|
"loss": 1.4688, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.328832856461612e-05, |
|
"loss": 1.4207, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3258524082021937e-05, |
|
"loss": 1.4623, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3228719599427754e-05, |
|
"loss": 1.4111, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3198915116833571e-05, |
|
"loss": 1.4048, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.316911063423939e-05, |
|
"loss": 1.5198, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.3139306151645208e-05, |
|
"loss": 1.4427, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.3109501669051027e-05, |
|
"loss": 1.451, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.3079697186456844e-05, |
|
"loss": 1.4568, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3049892703862661e-05, |
|
"loss": 1.4529, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3020088221268479e-05, |
|
"loss": 1.4692, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.2990283738674296e-05, |
|
"loss": 1.4401, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.2960479256080115e-05, |
|
"loss": 1.4887, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2930674773485932e-05, |
|
"loss": 1.4467, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2900870290891751e-05, |
|
"loss": 1.451, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2871065808297569e-05, |
|
"loss": 1.4036, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.2841261325703388e-05, |
|
"loss": 1.4618, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.2811456843109203e-05, |
|
"loss": 1.4312, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.2781950405340963e-05, |
|
"loss": 1.4498, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.275214592274678e-05, |
|
"loss": 1.4157, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.27223414401526e-05, |
|
"loss": 1.5118, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.2692536957558417e-05, |
|
"loss": 1.3883, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.2662732474964236e-05, |
|
"loss": 1.4585, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.2632927992370053e-05, |
|
"loss": 1.4856, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.260312350977587e-05, |
|
"loss": 1.4275, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.2573319027181688e-05, |
|
"loss": 1.4411, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2543514544587505e-05, |
|
"loss": 1.415, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2513710061993324e-05, |
|
"loss": 1.4709, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2483905579399141e-05, |
|
"loss": 1.4638, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.245410109680496e-05, |
|
"loss": 1.4267, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2424296614210778e-05, |
|
"loss": 1.4549, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2394492131616597e-05, |
|
"loss": 1.4498, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2364687649022412e-05, |
|
"loss": 1.4167, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2334883166428231e-05, |
|
"loss": 1.4539, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2305078683834049e-05, |
|
"loss": 1.4552, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2275274201239866e-05, |
|
"loss": 1.4319, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2245469718645685e-05, |
|
"loss": 1.4416, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2215665236051502e-05, |
|
"loss": 1.4767, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2185860753457321e-05, |
|
"loss": 1.4194, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2156056270863137e-05, |
|
"loss": 1.4524, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2126251788268956e-05, |
|
"loss": 1.3962, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2096745350500716e-05, |
|
"loss": 1.4556, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2067238912732474e-05, |
|
"loss": 1.453, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2037434430138293e-05, |
|
"loss": 1.4252, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2007927992370053e-05, |
|
"loss": 1.4319, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.197812350977587e-05, |
|
"loss": 1.4448, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.194831902718169e-05, |
|
"loss": 1.4311, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.1918514544587505e-05, |
|
"loss": 1.4623, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.1888710061993324e-05, |
|
"loss": 1.408, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1858905579399142e-05, |
|
"loss": 1.4497, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1829101096804959e-05, |
|
"loss": 1.4413, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1799296614210778e-05, |
|
"loss": 1.4201, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1769492131616595e-05, |
|
"loss": 1.452, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1739687649022414e-05, |
|
"loss": 1.4851, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1709883166428232e-05, |
|
"loss": 1.4997, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1680078683834049e-05, |
|
"loss": 1.461, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1650274201239866e-05, |
|
"loss": 1.3893, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1620767763471626e-05, |
|
"loss": 1.48, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1590963280877443e-05, |
|
"loss": 1.451, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1561158798283262e-05, |
|
"loss": 1.472, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.153135431568908e-05, |
|
"loss": 1.446, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.1501549833094899e-05, |
|
"loss": 1.4504, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.1471745350500716e-05, |
|
"loss": 1.4677, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.1441940867906533e-05, |
|
"loss": 1.4301, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.141213638531235e-05, |
|
"loss": 1.4207, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.138233190271817e-05, |
|
"loss": 1.4144, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1352527420123987e-05, |
|
"loss": 1.4468, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1322722937529804e-05, |
|
"loss": 1.4232, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1292918454935623e-05, |
|
"loss": 1.4328, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.126311397234144e-05, |
|
"loss": 1.4364, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1233309489747258e-05, |
|
"loss": 1.4485, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1203505007153075e-05, |
|
"loss": 1.4054, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1173700524558894e-05, |
|
"loss": 1.438, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1143896041964712e-05, |
|
"loss": 1.4417, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1114091559370529e-05, |
|
"loss": 1.5028, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1084287076776348e-05, |
|
"loss": 1.424, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.1054482594182165e-05, |
|
"loss": 1.3832, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.1024678111587983e-05, |
|
"loss": 1.413, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.09948736289938e-05, |
|
"loss": 1.4512, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0965069146399619e-05, |
|
"loss": 1.4262, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0935264663805436e-05, |
|
"loss": 1.4745, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0905460181211254e-05, |
|
"loss": 1.4105, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0875655698617073e-05, |
|
"loss": 1.4641, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.084585121602289e-05, |
|
"loss": 1.4694, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.0816046733428709e-05, |
|
"loss": 1.4276, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.0786242250834525e-05, |
|
"loss": 1.4694, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.0756437768240344e-05, |
|
"loss": 1.4874, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.0726633285646161e-05, |
|
"loss": 1.4515, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.069682880305198e-05, |
|
"loss": 1.4539, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.0667024320457797e-05, |
|
"loss": 1.3871, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.0637219837863615e-05, |
|
"loss": 1.4109, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.0607415355269434e-05, |
|
"loss": 1.4384, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.057761087267525e-05, |
|
"loss": 1.4199, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0547806390081068e-05, |
|
"loss": 1.46, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0518001907486886e-05, |
|
"loss": 1.4389, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0488197424892705e-05, |
|
"loss": 1.425, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0458392942298522e-05, |
|
"loss": 1.4028, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.042858845970434e-05, |
|
"loss": 1.4536, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0398783977110158e-05, |
|
"loss": 1.4986, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0368979494515976e-05, |
|
"loss": 1.4005, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0339175011921793e-05, |
|
"loss": 1.4286, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.030937052932761e-05, |
|
"loss": 1.4028, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.027956604673343e-05, |
|
"loss": 1.4589, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.025005960896519e-05, |
|
"loss": 1.4169, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0220255126371007e-05, |
|
"loss": 1.4127, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0190450643776824e-05, |
|
"loss": 1.4243, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0160646161182643e-05, |
|
"loss": 1.3892, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0130841678588459e-05, |
|
"loss": 1.4301, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0101037195994278e-05, |
|
"loss": 1.4429, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0071232713400095e-05, |
|
"loss": 1.4028, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.0041428230805914e-05, |
|
"loss": 1.4174, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.0011623748211731e-05, |
|
"loss": 1.3446, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 18.9115, |
|
"eval_loss": 1.5060008764266968, |
|
"eval_rouge1": 32.1108, |
|
"eval_rouge2": 17.1408, |
|
"eval_rougeL": 27.7833, |
|
"eval_rougeLsum": 27.7703, |
|
"eval_runtime": 292.3222, |
|
"eval_samples_per_second": 9.277, |
|
"eval_steps_per_second": 1.16, |
|
"step": 67104 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.981819265617549e-06, |
|
"loss": 1.3076, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.952014783023368e-06, |
|
"loss": 1.3116, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.922210300429185e-06, |
|
"loss": 1.352, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.892405817835002e-06, |
|
"loss": 1.3229, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.86260133524082e-06, |
|
"loss": 1.2791, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.832796852646639e-06, |
|
"loss": 1.3413, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.802992370052456e-06, |
|
"loss": 1.313, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.773187887458275e-06, |
|
"loss": 1.3097, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.743383404864092e-06, |
|
"loss": 1.3202, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.71357892226991e-06, |
|
"loss": 1.3114, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.683774439675727e-06, |
|
"loss": 1.2995, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.653969957081544e-06, |
|
"loss": 1.2941, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.624165474487363e-06, |
|
"loss": 1.3195, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.59436099189318e-06, |
|
"loss": 1.3223, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.564556509299e-06, |
|
"loss": 1.2635, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.534752026704817e-06, |
|
"loss": 1.3237, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.504947544110634e-06, |
|
"loss": 1.3631, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.475143061516453e-06, |
|
"loss": 1.3345, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.445338578922269e-06, |
|
"loss": 1.2867, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.415534096328088e-06, |
|
"loss": 1.2966, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.385729613733905e-06, |
|
"loss": 1.3146, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.355925131139724e-06, |
|
"loss": 1.3152, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.326120648545542e-06, |
|
"loss": 1.2995, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.29631616595136e-06, |
|
"loss": 1.3113, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.266511683357178e-06, |
|
"loss": 1.3072, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.236707200762994e-06, |
|
"loss": 1.2986, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.206902718168813e-06, |
|
"loss": 1.3205, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.17709823557463e-06, |
|
"loss": 1.3499, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.147293752980449e-06, |
|
"loss": 1.3307, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.117489270386266e-06, |
|
"loss": 1.3387, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.087684787792085e-06, |
|
"loss": 1.3261, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.058178350023845e-06, |
|
"loss": 1.2811, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.028373867429663e-06, |
|
"loss": 1.2962, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.998569384835478e-06, |
|
"loss": 1.3124, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.968764902241297e-06, |
|
"loss": 1.3171, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.938960419647115e-06, |
|
"loss": 1.3073, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.909155937052934e-06, |
|
"loss": 1.3202, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.879351454458751e-06, |
|
"loss": 1.3314, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.84954697186457e-06, |
|
"loss": 1.3105, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.819742489270387e-06, |
|
"loss": 1.3241, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.789938006676203e-06, |
|
"loss": 1.3228, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.760133524082022e-06, |
|
"loss": 1.3377, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.73032904148784e-06, |
|
"loss": 1.263, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.700524558893658e-06, |
|
"loss": 1.2543, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.670720076299476e-06, |
|
"loss": 1.3436, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.640915593705295e-06, |
|
"loss": 1.3391, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 1.3108, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.58130662851693e-06, |
|
"loss": 1.2825, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.551502145922747e-06, |
|
"loss": 1.3004, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.521697663328564e-06, |
|
"loss": 1.312, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.491893180734383e-06, |
|
"loss": 1.3004, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.4620886981402e-06, |
|
"loss": 1.3638, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.43228421554602e-06, |
|
"loss": 1.3283, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.402479732951837e-06, |
|
"loss": 1.2835, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.372675250357656e-06, |
|
"loss": 1.3222, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.342870767763471e-06, |
|
"loss": 1.3031, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.313066285169288e-06, |
|
"loss": 1.3239, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.283261802575108e-06, |
|
"loss": 1.3093, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.253457319980925e-06, |
|
"loss": 1.3053, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.223652837386744e-06, |
|
"loss": 1.3409, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.193848354792561e-06, |
|
"loss": 1.3406, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.16404387219838e-06, |
|
"loss": 1.3051, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.134239389604196e-06, |
|
"loss": 1.2662, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.104434907010013e-06, |
|
"loss": 1.3195, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.074630424415832e-06, |
|
"loss": 1.2922, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.04482594182165e-06, |
|
"loss": 1.3208, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.015021459227469e-06, |
|
"loss": 1.3047, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 7.985216976633286e-06, |
|
"loss": 1.3148, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.955412494039105e-06, |
|
"loss": 1.2982, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.925608011444922e-06, |
|
"loss": 1.323, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.895803528850738e-06, |
|
"loss": 1.2963, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.865999046256557e-06, |
|
"loss": 1.2981, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.836194563662374e-06, |
|
"loss": 1.2817, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.806390081068193e-06, |
|
"loss": 1.3061, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.77658559847401e-06, |
|
"loss": 1.302, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.74678111587983e-06, |
|
"loss": 1.3064, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.716976633285647e-06, |
|
"loss": 1.2704, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.687172150691464e-06, |
|
"loss": 1.2871, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.657367668097281e-06, |
|
"loss": 1.3032, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.627563185503099e-06, |
|
"loss": 1.3276, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.597758702908918e-06, |
|
"loss": 1.2922, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.567954220314735e-06, |
|
"loss": 1.3013, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.538149737720554e-06, |
|
"loss": 1.3174, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.508345255126371e-06, |
|
"loss": 1.3155, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.478540772532189e-06, |
|
"loss": 1.3232, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.448736289938007e-06, |
|
"loss": 1.2449, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.418931807343825e-06, |
|
"loss": 1.297, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.3891273247496425e-06, |
|
"loss": 1.3049, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.359322842155461e-06, |
|
"loss": 1.2848, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.329816404387221e-06, |
|
"loss": 1.2833, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.300011921793038e-06, |
|
"loss": 1.281, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.270207439198855e-06, |
|
"loss": 1.3107, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.240402956604673e-06, |
|
"loss": 1.2745, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.210598474010492e-06, |
|
"loss": 1.3614, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.180793991416309e-06, |
|
"loss": 1.2813, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.150989508822127e-06, |
|
"loss": 1.3131, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.121185026227945e-06, |
|
"loss": 1.2976, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.091380543633763e-06, |
|
"loss": 1.3525, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.06157606103958e-06, |
|
"loss": 1.3217, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.031771578445398e-06, |
|
"loss": 1.2728, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.001967095851216e-06, |
|
"loss": 1.3291, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.972162613257034e-06, |
|
"loss": 1.3162, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.942358130662852e-06, |
|
"loss": 1.273, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.91255364806867e-06, |
|
"loss": 1.2917, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.882749165474488e-06, |
|
"loss": 1.3109, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.8529446828803046e-06, |
|
"loss": 1.3239, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.823140200286123e-06, |
|
"loss": 1.3042, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.793335717691941e-06, |
|
"loss": 1.2799, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.763531235097759e-06, |
|
"loss": 1.3083, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.733726752503576e-06, |
|
"loss": 1.2967, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.7039222699093946e-06, |
|
"loss": 1.3231, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.674117787315213e-06, |
|
"loss": 1.3679, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.644313304721031e-06, |
|
"loss": 1.3178, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.614508822126847e-06, |
|
"loss": 1.305, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.5847043395326656e-06, |
|
"loss": 1.329, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.554899856938484e-06, |
|
"loss": 1.3147, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.525095374344301e-06, |
|
"loss": 1.3129, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.495290891750119e-06, |
|
"loss": 1.295, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.465486409155937e-06, |
|
"loss": 1.3236, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.4356819265617556e-06, |
|
"loss": 1.3012, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.405877443967573e-06, |
|
"loss": 1.2792, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.37607296137339e-06, |
|
"loss": 1.3223, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.346268478779208e-06, |
|
"loss": 1.3346, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.3164639961850266e-06, |
|
"loss": 1.3006, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.286659513590844e-06, |
|
"loss": 1.3093, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.256855030996662e-06, |
|
"loss": 1.33, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.22705054840248e-06, |
|
"loss": 1.3127, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.197246065808298e-06, |
|
"loss": 1.2684, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.167441583214116e-06, |
|
"loss": 1.2938, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.137637100619933e-06, |
|
"loss": 1.3103, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.107832618025751e-06, |
|
"loss": 1.307, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.0780281354315685e-06, |
|
"loss": 1.327, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.048223652837387e-06, |
|
"loss": 1.292, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.018419170243205e-06, |
|
"loss": 1.3067, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.988614687649023e-06, |
|
"loss": 1.2973, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.95881020505484e-06, |
|
"loss": 1.3501, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.9290057224606586e-06, |
|
"loss": 1.2745, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.899201239866476e-06, |
|
"loss": 1.3246, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.869694802098236e-06, |
|
"loss": 1.3207, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.839890319504053e-06, |
|
"loss": 1.2823, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.810085836909871e-06, |
|
"loss": 1.307, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.7802813543156895e-06, |
|
"loss": 1.3289, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.750476871721508e-06, |
|
"loss": 1.271, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.720672389127325e-06, |
|
"loss": 1.3111, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.690867906533142e-06, |
|
"loss": 1.3137, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6610634239389605e-06, |
|
"loss": 1.3168, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.631258941344778e-06, |
|
"loss": 1.3243, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.601454458750596e-06, |
|
"loss": 1.2846, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.571649976156414e-06, |
|
"loss": 1.3359, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.541845493562232e-06, |
|
"loss": 1.312, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.51204101096805e-06, |
|
"loss": 1.304, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.482236528373868e-06, |
|
"loss": 1.3005, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.452432045779685e-06, |
|
"loss": 1.3196, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.422925608011445e-06, |
|
"loss": 1.325, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.393121125417262e-06, |
|
"loss": 1.2993, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.3633166428230805e-06, |
|
"loss": 1.3078, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.333512160228899e-06, |
|
"loss": 1.2723, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.303707677634717e-06, |
|
"loss": 1.3061, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.273903195040534e-06, |
|
"loss": 1.2831, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.244098712446352e-06, |
|
"loss": 1.3367, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.21429422985217e-06, |
|
"loss": 1.3299, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.184489747257988e-06, |
|
"loss": 1.2967, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.154685264663805e-06, |
|
"loss": 1.2826, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.124880782069623e-06, |
|
"loss": 1.2924, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.0950762994754415e-06, |
|
"loss": 1.287, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.065271816881259e-06, |
|
"loss": 1.3353, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.035467334287077e-06, |
|
"loss": 1.2794, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.005960896518837e-06, |
|
"loss": 1.2811, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.976156413924654e-06, |
|
"loss": 1.2638, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.946351931330472e-06, |
|
"loss": 1.34, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.91654744873629e-06, |
|
"loss": 1.2886, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.886742966142108e-06, |
|
"loss": 1.2936, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.856938483547926e-06, |
|
"loss": 1.2905, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.8271340009537434e-06, |
|
"loss": 1.3039, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.797329518359562e-06, |
|
"loss": 1.2702, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.76752503576538e-06, |
|
"loss": 1.3245, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.737720553171197e-06, |
|
"loss": 1.298, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.7079160705770144e-06, |
|
"loss": 1.3183, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.678111587982833e-06, |
|
"loss": 1.2971, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.648307105388651e-06, |
|
"loss": 1.2877, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.618502622794468e-06, |
|
"loss": 1.2796, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.588698140200286e-06, |
|
"loss": 1.325, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.5588936576061044e-06, |
|
"loss": 1.3102, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.529089175011923e-06, |
|
"loss": 1.3211, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.499284692417739e-06, |
|
"loss": 1.2872, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.469480209823557e-06, |
|
"loss": 1.3031, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4396757272293754e-06, |
|
"loss": 1.3126, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.409871244635194e-06, |
|
"loss": 1.3138, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.380066762041011e-06, |
|
"loss": 1.3186, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.350262279446829e-06, |
|
"loss": 1.3243, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.320457796852647e-06, |
|
"loss": 1.2839, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.290653314258465e-06, |
|
"loss": 1.266, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.260848831664282e-06, |
|
"loss": 1.2661, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.2310443490701e-06, |
|
"loss": 1.2955, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.201239866475918e-06, |
|
"loss": 1.2725, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.171435383881736e-06, |
|
"loss": 1.3274, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.141630901287554e-06, |
|
"loss": 1.291, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.111826418693372e-06, |
|
"loss": 1.313, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.08202193609919e-06, |
|
"loss": 1.3116, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.052217453505007e-06, |
|
"loss": 1.2802, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.022412970910825e-06, |
|
"loss": 1.3158, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.992608488316643e-06, |
|
"loss": 1.2753, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.962804005722461e-06, |
|
"loss": 1.2926, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.932999523128278e-06, |
|
"loss": 1.2848, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.903195040534097e-06, |
|
"loss": 1.3019, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.873390557939915e-06, |
|
"loss": 1.3003, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.843586075345732e-06, |
|
"loss": 1.2979, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.8137815927515494e-06, |
|
"loss": 1.2929, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.7839771101573676e-06, |
|
"loss": 1.2944, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.7541726275631853e-06, |
|
"loss": 1.2952, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.7243681449690035e-06, |
|
"loss": 1.3206, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.6945636623748213e-06, |
|
"loss": 1.272, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.6647591797806394e-06, |
|
"loss": 1.3305, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.6349546971864567e-06, |
|
"loss": 1.2863, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.605150214592275e-06, |
|
"loss": 1.3274, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.5753457319980927e-06, |
|
"loss": 1.2402, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.545541249403911e-06, |
|
"loss": 1.2966, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.515736766809728e-06, |
|
"loss": 1.2915, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.485932284215546e-06, |
|
"loss": 1.2973, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.456127801621364e-06, |
|
"loss": 1.2655, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.4263233190271814e-06, |
|
"loss": 1.305, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.3965188364329996e-06, |
|
"loss": 1.2942, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.3667143538388173e-06, |
|
"loss": 1.2901, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.3369098712446355e-06, |
|
"loss": 1.2841, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.307105388650453e-06, |
|
"loss": 1.3179, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.277300906056271e-06, |
|
"loss": 1.338, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.2474964234620887e-06, |
|
"loss": 1.2943, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.217691940867907e-06, |
|
"loss": 1.3189, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.1878874582737242e-06, |
|
"loss": 1.3266, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.1580829756795424e-06, |
|
"loss": 1.3228, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.12827849308536e-06, |
|
"loss": 1.297, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.098474010491178e-06, |
|
"loss": 1.2824, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.0686695278969957e-06, |
|
"loss": 1.3217, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0388650453028134e-06, |
|
"loss": 1.2884, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0090605627086316e-06, |
|
"loss": 1.2947, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.9792560801144493e-06, |
|
"loss": 1.2985, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.949451597520267e-06, |
|
"loss": 1.3063, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.919647114926085e-06, |
|
"loss": 1.2729, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.889842632331903e-06, |
|
"loss": 1.3206, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8600381497377207e-06, |
|
"loss": 1.2862, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8302336671435385e-06, |
|
"loss": 1.3047, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8004291845493562e-06, |
|
"loss": 1.3283, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.7706247019551744e-06, |
|
"loss": 1.3355, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.740820219360992e-06, |
|
"loss": 1.294, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7110157367668095e-06, |
|
"loss": 1.2771, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.6812112541726277e-06, |
|
"loss": 1.2671, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6514067715784454e-06, |
|
"loss": 1.2855, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6216022889842636e-06, |
|
"loss": 1.2664, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.5920958512160226e-06, |
|
"loss": 1.3065, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.562291368621841e-06, |
|
"loss": 1.2835, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5324868860276586e-06, |
|
"loss": 1.2752, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5026824034334767e-06, |
|
"loss": 1.3331, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.472877920839294e-06, |
|
"loss": 1.2784, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4430734382451122e-06, |
|
"loss": 1.3154, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.41326895565093e-06, |
|
"loss": 1.2989, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.383464473056748e-06, |
|
"loss": 1.2854, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3536599904625655e-06, |
|
"loss": 1.3367, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3238555078683836e-06, |
|
"loss": 1.2928, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2940510252742014e-06, |
|
"loss": 1.2888, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.264246542680019e-06, |
|
"loss": 1.2786, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.234442060085837e-06, |
|
"loss": 1.2806, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2046375774916546e-06, |
|
"loss": 1.3047, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.174833094897473e-06, |
|
"loss": 1.2596, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.14502861230329e-06, |
|
"loss": 1.3129, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1152241297091083e-06, |
|
"loss": 1.3005, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.085419647114926e-06, |
|
"loss": 1.2638, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0556151645207442e-06, |
|
"loss": 1.305, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0258106819265615e-06, |
|
"loss": 1.2499, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9960061993323797e-06, |
|
"loss": 1.2803, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9662017167381975e-06, |
|
"loss": 1.2982, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9363972341440156e-06, |
|
"loss": 1.2846, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.906592751549833e-06, |
|
"loss": 1.3006, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.876788268955651e-06, |
|
"loss": 1.2996, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8472818311874106e-06, |
|
"loss": 1.2905, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8174773485932284e-06, |
|
"loss": 1.2661, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.7876728659990463e-06, |
|
"loss": 1.2848, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.757868383404864e-06, |
|
"loss": 1.2962, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.728063900810682e-06, |
|
"loss": 1.3271, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6982594182164998e-06, |
|
"loss": 1.2668, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6684549356223177e-06, |
|
"loss": 1.3104, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6386504530281355e-06, |
|
"loss": 1.2795, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6088459704339535e-06, |
|
"loss": 1.2807, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.5790414878397712e-06, |
|
"loss": 1.3023, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.549237005245589e-06, |
|
"loss": 1.2996, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5194325226514067e-06, |
|
"loss": 1.3163, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4896280400572247e-06, |
|
"loss": 1.3352, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4598235574630424e-06, |
|
"loss": 1.2638, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4300190748688604e-06, |
|
"loss": 1.3498, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4002145922746781e-06, |
|
"loss": 1.2619, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.370410109680496e-06, |
|
"loss": 1.3227, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3406056270863138e-06, |
|
"loss": 1.2838, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3108011444921318e-06, |
|
"loss": 1.2992, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2809966618979495e-06, |
|
"loss": 1.272, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2511921793037675e-06, |
|
"loss": 1.2672, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2213876967095852e-06, |
|
"loss": 1.307, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.191881258941345e-06, |
|
"loss": 1.2649, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1620767763471627e-06, |
|
"loss": 1.2454, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1322722937529807e-06, |
|
"loss": 1.2826, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1024678111587984e-06, |
|
"loss": 1.3654, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0726633285646161e-06, |
|
"loss": 1.2908, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.042858845970434e-06, |
|
"loss": 1.302, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0130543633762516e-06, |
|
"loss": 1.2849, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.832498807820696e-07, |
|
"loss": 1.2789, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.537434430138293e-07, |
|
"loss": 1.2867, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.239389604196472e-07, |
|
"loss": 1.2662, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.94134477825465e-07, |
|
"loss": 1.2251, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.643299952312829e-07, |
|
"loss": 1.2888, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.345255126371006e-07, |
|
"loss": 1.2734, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.047210300429185e-07, |
|
"loss": 1.2751, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.749165474487363e-07, |
|
"loss": 1.2835, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.451120648545542e-07, |
|
"loss": 1.2643, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.15307582260372e-07, |
|
"loss": 1.3029, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.855030996661899e-07, |
|
"loss": 1.3015, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.556986170720076e-07, |
|
"loss": 1.2745, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.261921793037673e-07, |
|
"loss": 1.298, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.963876967095852e-07, |
|
"loss": 1.2703, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.66583214115403e-07, |
|
"loss": 1.2758, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.367787315212208e-07, |
|
"loss": 1.2944, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.069742489270386e-07, |
|
"loss": 1.3073, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.771697663328565e-07, |
|
"loss": 1.2741, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.473652837386743e-07, |
|
"loss": 1.2829, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.1756080114449216e-07, |
|
"loss": 1.3196, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.8775631855030996e-07, |
|
"loss": 1.3021, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.579518359561278e-07, |
|
"loss": 1.2743, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.2814735336194567e-07, |
|
"loss": 1.2547, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.9834287076776347e-07, |
|
"loss": 1.2993, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.685383881735813e-07, |
|
"loss": 1.2546, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.387339055793992e-07, |
|
"loss": 1.2743, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.0892942298521698e-07, |
|
"loss": 1.2957, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.791249403910348e-07, |
|
"loss": 1.2468, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.4932045779685266e-07, |
|
"loss": 1.2819, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.1951597520267048e-07, |
|
"loss": 1.2974, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.971149260848831e-08, |
|
"loss": 1.2679, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.990701001430615e-08, |
|
"loss": 1.2761, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.010252742012399e-08, |
|
"loss": 1.3245, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 18.9801, |
|
"eval_loss": 1.4905033111572266, |
|
"eval_rouge1": 32.9084, |
|
"eval_rouge2": 17.7027, |
|
"eval_rougeL": 28.2912, |
|
"eval_rougeLsum": 28.2975, |
|
"eval_runtime": 291.9809, |
|
"eval_samples_per_second": 9.288, |
|
"eval_steps_per_second": 1.161, |
|
"step": 100656 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 100656, |
|
"total_flos": 9.506404988551581e+17, |
|
"train_loss": 1.5143268576412658, |
|
"train_runtime": 40494.9096, |
|
"train_samples_per_second": 19.885, |
|
"train_steps_per_second": 2.486 |
|
} |
|
], |
|
"max_steps": 100656, |
|
"num_train_epochs": 3, |
|
"total_flos": 9.506404988551581e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|