|
{ |
|
"best_metric": 0.8285140562248996, |
|
"best_model_checkpoint": "/scratch/camembertv2/runs/results/xnli/camembertv2-base-bf16-p2-17000/max_seq_length-160-gradient_accumulation_steps-4-precision-fp32-learning_rate-1e-05-epochs-10-lr_scheduler-cosine-warmup_steps-0.1/SEED-666/checkpoint-61360", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 122720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008148631029986962, |
|
"grad_norm": 12.58836841583252, |
|
"learning_rate": 8.148631029986963e-08, |
|
"loss": 1.1012, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016297262059973925, |
|
"grad_norm": 1.359410285949707, |
|
"learning_rate": 1.6297262059973925e-07, |
|
"loss": 1.1011, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.024445893089960886, |
|
"grad_norm": 1.128892183303833, |
|
"learning_rate": 2.4445893089960885e-07, |
|
"loss": 1.0978, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03259452411994785, |
|
"grad_norm": 1.3794234991073608, |
|
"learning_rate": 3.259452411994785e-07, |
|
"loss": 1.0999, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04074315514993481, |
|
"grad_norm": 1.3247599601745605, |
|
"learning_rate": 4.0743155149934816e-07, |
|
"loss": 1.0984, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04889178617992177, |
|
"grad_norm": 0.9611015319824219, |
|
"learning_rate": 4.889178617992177e-07, |
|
"loss": 1.1001, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05704041720990873, |
|
"grad_norm": 0.9682479500770569, |
|
"learning_rate": 5.704041720990874e-07, |
|
"loss": 1.0985, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0651890482398957, |
|
"grad_norm": 1.950333833694458, |
|
"learning_rate": 6.51890482398957e-07, |
|
"loss": 1.0989, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07333767926988266, |
|
"grad_norm": 1.4916733503341675, |
|
"learning_rate": 7.333767926988267e-07, |
|
"loss": 1.0964, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.08148631029986962, |
|
"grad_norm": 1.1135200262069702, |
|
"learning_rate": 8.148631029986963e-07, |
|
"loss": 1.096, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08963494132985658, |
|
"grad_norm": 1.773497462272644, |
|
"learning_rate": 8.963494132985659e-07, |
|
"loss": 1.094, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09778357235984354, |
|
"grad_norm": 1.5511926412582397, |
|
"learning_rate": 9.778357235984354e-07, |
|
"loss": 1.093, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1059322033898305, |
|
"grad_norm": 1.389298915863037, |
|
"learning_rate": 1.059322033898305e-06, |
|
"loss": 1.0871, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.11408083441981746, |
|
"grad_norm": 2.486689329147339, |
|
"learning_rate": 1.1408083441981747e-06, |
|
"loss": 1.0751, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.12222946544980444, |
|
"grad_norm": 2.697650194168091, |
|
"learning_rate": 1.2222946544980446e-06, |
|
"loss": 1.0505, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1303780964797914, |
|
"grad_norm": 3.557525157928467, |
|
"learning_rate": 1.303780964797914e-06, |
|
"loss": 1.0393, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.13852672750977835, |
|
"grad_norm": 4.691379070281982, |
|
"learning_rate": 1.3852672750977837e-06, |
|
"loss": 1.0147, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14667535853976532, |
|
"grad_norm": 5.234630107879639, |
|
"learning_rate": 1.4667535853976533e-06, |
|
"loss": 0.9971, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.15482398956975227, |
|
"grad_norm": 6.027713298797607, |
|
"learning_rate": 1.5482398956975228e-06, |
|
"loss": 1.0007, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.16297262059973924, |
|
"grad_norm": 13.33498477935791, |
|
"learning_rate": 1.6297262059973926e-06, |
|
"loss": 0.984, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17112125162972622, |
|
"grad_norm": 9.432430267333984, |
|
"learning_rate": 1.7112125162972623e-06, |
|
"loss": 0.9633, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17926988265971316, |
|
"grad_norm": 7.303864479064941, |
|
"learning_rate": 1.7926988265971317e-06, |
|
"loss": 0.9463, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.18741851368970014, |
|
"grad_norm": 6.125274181365967, |
|
"learning_rate": 1.8741851368970016e-06, |
|
"loss": 0.9336, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.19556714471968709, |
|
"grad_norm": 6.614850044250488, |
|
"learning_rate": 1.955671447196871e-06, |
|
"loss": 0.9388, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.20371577574967406, |
|
"grad_norm": 7.883510589599609, |
|
"learning_rate": 2.037157757496741e-06, |
|
"loss": 0.9122, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.211864406779661, |
|
"grad_norm": 6.615538597106934, |
|
"learning_rate": 2.11864406779661e-06, |
|
"loss": 0.8907, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.22001303780964798, |
|
"grad_norm": 6.040781021118164, |
|
"learning_rate": 2.20013037809648e-06, |
|
"loss": 0.8725, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.22816166883963493, |
|
"grad_norm": 9.688776016235352, |
|
"learning_rate": 2.2816166883963494e-06, |
|
"loss": 0.8674, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.2363102998696219, |
|
"grad_norm": 15.747467994689941, |
|
"learning_rate": 2.363102998696219e-06, |
|
"loss": 0.8199, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.24445893089960888, |
|
"grad_norm": 9.381732940673828, |
|
"learning_rate": 2.444589308996089e-06, |
|
"loss": 0.831, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2526075619295958, |
|
"grad_norm": 8.603889465332031, |
|
"learning_rate": 2.5260756192959584e-06, |
|
"loss": 0.811, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.2607561929595828, |
|
"grad_norm": 11.614546775817871, |
|
"learning_rate": 2.607561929595828e-06, |
|
"loss": 0.789, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.2689048239895698, |
|
"grad_norm": 7.733945846557617, |
|
"learning_rate": 2.689048239895698e-06, |
|
"loss": 0.7947, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2770534550195567, |
|
"grad_norm": 14.573506355285645, |
|
"learning_rate": 2.7705345501955674e-06, |
|
"loss": 0.7913, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.28520208604954367, |
|
"grad_norm": 11.938140869140625, |
|
"learning_rate": 2.852020860495437e-06, |
|
"loss": 0.793, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.29335071707953064, |
|
"grad_norm": 9.235187530517578, |
|
"learning_rate": 2.9335071707953067e-06, |
|
"loss": 0.7538, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3014993481095176, |
|
"grad_norm": 9.092159271240234, |
|
"learning_rate": 3.0149934810951763e-06, |
|
"loss": 0.7547, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.30964797913950454, |
|
"grad_norm": 11.72921371459961, |
|
"learning_rate": 3.0964797913950456e-06, |
|
"loss": 0.7461, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.3177966101694915, |
|
"grad_norm": 15.118708610534668, |
|
"learning_rate": 3.1779661016949152e-06, |
|
"loss": 0.7171, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.3259452411994785, |
|
"grad_norm": 17.719839096069336, |
|
"learning_rate": 3.2594524119947853e-06, |
|
"loss": 0.7027, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.33409387222946546, |
|
"grad_norm": 10.063789367675781, |
|
"learning_rate": 3.340938722294655e-06, |
|
"loss": 0.7229, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.34224250325945244, |
|
"grad_norm": 8.052227020263672, |
|
"learning_rate": 3.4224250325945246e-06, |
|
"loss": 0.7218, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.35039113428943935, |
|
"grad_norm": 9.68342399597168, |
|
"learning_rate": 3.503911342894394e-06, |
|
"loss": 0.6873, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.35853976531942633, |
|
"grad_norm": 9.140670776367188, |
|
"learning_rate": 3.5853976531942635e-06, |
|
"loss": 0.702, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.3666883963494133, |
|
"grad_norm": 8.805059432983398, |
|
"learning_rate": 3.666883963494133e-06, |
|
"loss": 0.7245, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3748370273794003, |
|
"grad_norm": 7.228201389312744, |
|
"learning_rate": 3.748370273794003e-06, |
|
"loss": 0.6651, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.3829856584093872, |
|
"grad_norm": 8.284133911132812, |
|
"learning_rate": 3.829856584093872e-06, |
|
"loss": 0.6956, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.39113428943937417, |
|
"grad_norm": 8.938249588012695, |
|
"learning_rate": 3.911342894393742e-06, |
|
"loss": 0.6777, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.39928292046936115, |
|
"grad_norm": 10.810254096984863, |
|
"learning_rate": 3.992829204693612e-06, |
|
"loss": 0.6803, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.4074315514993481, |
|
"grad_norm": 11.629922866821289, |
|
"learning_rate": 4.074315514993482e-06, |
|
"loss": 0.6659, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4155801825293351, |
|
"grad_norm": 7.82265043258667, |
|
"learning_rate": 4.1558018252933515e-06, |
|
"loss": 0.6842, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 9.290712356567383, |
|
"learning_rate": 4.23728813559322e-06, |
|
"loss": 0.6711, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.431877444589309, |
|
"grad_norm": 10.643411636352539, |
|
"learning_rate": 4.31877444589309e-06, |
|
"loss": 0.6521, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.44002607561929596, |
|
"grad_norm": 8.533503532409668, |
|
"learning_rate": 4.40026075619296e-06, |
|
"loss": 0.6613, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.44817470664928294, |
|
"grad_norm": 12.260805130004883, |
|
"learning_rate": 4.48174706649283e-06, |
|
"loss": 0.6512, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.45632333767926986, |
|
"grad_norm": 7.977556228637695, |
|
"learning_rate": 4.563233376792699e-06, |
|
"loss": 0.6499, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.46447196870925683, |
|
"grad_norm": 7.418649673461914, |
|
"learning_rate": 4.6447196870925686e-06, |
|
"loss": 0.6591, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.4726205997392438, |
|
"grad_norm": 10.594202995300293, |
|
"learning_rate": 4.726205997392438e-06, |
|
"loss": 0.6497, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 11.133523941040039, |
|
"learning_rate": 4.807692307692308e-06, |
|
"loss": 0.6538, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.48891786179921776, |
|
"grad_norm": 12.108560562133789, |
|
"learning_rate": 4.889178617992178e-06, |
|
"loss": 0.6195, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4970664928292047, |
|
"grad_norm": 9.70545482635498, |
|
"learning_rate": 4.970664928292047e-06, |
|
"loss": 0.6351, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.5052151238591917, |
|
"grad_norm": 12.699902534484863, |
|
"learning_rate": 5.052151238591917e-06, |
|
"loss": 0.6557, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.5133637548891786, |
|
"grad_norm": 10.324420928955078, |
|
"learning_rate": 5.1336375488917865e-06, |
|
"loss": 0.6415, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.5215123859191656, |
|
"grad_norm": 10.3858642578125, |
|
"learning_rate": 5.215123859191656e-06, |
|
"loss": 0.624, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.5296610169491526, |
|
"grad_norm": 13.573092460632324, |
|
"learning_rate": 5.296610169491526e-06, |
|
"loss": 0.6622, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5378096479791395, |
|
"grad_norm": 8.366503715515137, |
|
"learning_rate": 5.378096479791396e-06, |
|
"loss": 0.6166, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.5459582790091264, |
|
"grad_norm": 6.413454532623291, |
|
"learning_rate": 5.459582790091264e-06, |
|
"loss": 0.6315, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.5541069100391134, |
|
"grad_norm": 7.670026779174805, |
|
"learning_rate": 5.541069100391135e-06, |
|
"loss": 0.612, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5622555410691004, |
|
"grad_norm": 10.53145694732666, |
|
"learning_rate": 5.622555410691004e-06, |
|
"loss": 0.6167, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5704041720990873, |
|
"grad_norm": 6.5404462814331055, |
|
"learning_rate": 5.704041720990874e-06, |
|
"loss": 0.6226, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5785528031290743, |
|
"grad_norm": 9.084834098815918, |
|
"learning_rate": 5.785528031290744e-06, |
|
"loss": 0.6214, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5867014341590613, |
|
"grad_norm": 9.231087684631348, |
|
"learning_rate": 5.867014341590613e-06, |
|
"loss": 0.6245, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5948500651890483, |
|
"grad_norm": 8.526376724243164, |
|
"learning_rate": 5.948500651890483e-06, |
|
"loss": 0.6205, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.6029986962190352, |
|
"grad_norm": 9.337794303894043, |
|
"learning_rate": 6.029986962190353e-06, |
|
"loss": 0.6156, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.6111473272490222, |
|
"grad_norm": 8.846671104431152, |
|
"learning_rate": 6.111473272490222e-06, |
|
"loss": 0.6142, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6192959582790091, |
|
"grad_norm": 8.68179988861084, |
|
"learning_rate": 6.192959582790091e-06, |
|
"loss": 0.6218, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.627444589308996, |
|
"grad_norm": 9.76940631866455, |
|
"learning_rate": 6.274445893089961e-06, |
|
"loss": 0.587, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.635593220338983, |
|
"grad_norm": 7.811220169067383, |
|
"learning_rate": 6.3559322033898304e-06, |
|
"loss": 0.6002, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.64374185136897, |
|
"grad_norm": 8.950928688049316, |
|
"learning_rate": 6.4374185136897e-06, |
|
"loss": 0.6032, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.651890482398957, |
|
"grad_norm": 6.704097270965576, |
|
"learning_rate": 6.518904823989571e-06, |
|
"loss": 0.5993, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.660039113428944, |
|
"grad_norm": 11.18411922454834, |
|
"learning_rate": 6.60039113428944e-06, |
|
"loss": 0.6035, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.6681877444589309, |
|
"grad_norm": 8.417338371276855, |
|
"learning_rate": 6.68187744458931e-06, |
|
"loss": 0.624, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6763363754889179, |
|
"grad_norm": 9.916496276855469, |
|
"learning_rate": 6.7633637548891795e-06, |
|
"loss": 0.6275, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6844850065189049, |
|
"grad_norm": 8.701171875, |
|
"learning_rate": 6.844850065189049e-06, |
|
"loss": 0.5773, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.6926336375488917, |
|
"grad_norm": 10.245955467224121, |
|
"learning_rate": 6.926336375488918e-06, |
|
"loss": 0.6139, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.7007822685788787, |
|
"grad_norm": 6.190640926361084, |
|
"learning_rate": 7.007822685788788e-06, |
|
"loss": 0.5833, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.7089308996088657, |
|
"grad_norm": 10.875850677490234, |
|
"learning_rate": 7.089308996088657e-06, |
|
"loss": 0.6, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.7170795306388527, |
|
"grad_norm": 8.644452095031738, |
|
"learning_rate": 7.170795306388527e-06, |
|
"loss": 0.6097, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.7252281616688396, |
|
"grad_norm": 8.089356422424316, |
|
"learning_rate": 7.252281616688397e-06, |
|
"loss": 0.583, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.7333767926988266, |
|
"grad_norm": 12.513883590698242, |
|
"learning_rate": 7.333767926988266e-06, |
|
"loss": 0.5669, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7415254237288136, |
|
"grad_norm": 9.404706001281738, |
|
"learning_rate": 7.415254237288137e-06, |
|
"loss": 0.5833, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.7496740547588006, |
|
"grad_norm": 6.789037227630615, |
|
"learning_rate": 7.496740547588006e-06, |
|
"loss": 0.5985, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.7578226857887875, |
|
"grad_norm": 7.355409145355225, |
|
"learning_rate": 7.578226857887876e-06, |
|
"loss": 0.5686, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.7659713168187744, |
|
"grad_norm": 7.175694465637207, |
|
"learning_rate": 7.659713168187744e-06, |
|
"loss": 0.5991, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.7741199478487614, |
|
"grad_norm": 6.2600274085998535, |
|
"learning_rate": 7.741199478487615e-06, |
|
"loss": 0.5803, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7822685788787483, |
|
"grad_norm": 11.514883995056152, |
|
"learning_rate": 7.822685788787483e-06, |
|
"loss": 0.5802, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7904172099087353, |
|
"grad_norm": 6.594653129577637, |
|
"learning_rate": 7.904172099087354e-06, |
|
"loss": 0.5772, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.7985658409387223, |
|
"grad_norm": 10.59202766418457, |
|
"learning_rate": 7.985658409387224e-06, |
|
"loss": 0.5848, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.8067144719687093, |
|
"grad_norm": 7.8735151290893555, |
|
"learning_rate": 8.067144719687093e-06, |
|
"loss": 0.5813, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.8148631029986962, |
|
"grad_norm": 9.064979553222656, |
|
"learning_rate": 8.148631029986964e-06, |
|
"loss": 0.5792, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8230117340286832, |
|
"grad_norm": 10.0288667678833, |
|
"learning_rate": 8.230117340286832e-06, |
|
"loss": 0.5622, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.8311603650586702, |
|
"grad_norm": 8.7724609375, |
|
"learning_rate": 8.311603650586703e-06, |
|
"loss": 0.5767, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.8393089960886571, |
|
"grad_norm": 8.127886772155762, |
|
"learning_rate": 8.393089960886572e-06, |
|
"loss": 0.5721, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 7.77069616317749, |
|
"learning_rate": 8.47457627118644e-06, |
|
"loss": 0.5925, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.855606258148631, |
|
"grad_norm": 7.864415645599365, |
|
"learning_rate": 8.556062581486311e-06, |
|
"loss": 0.5805, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.863754889178618, |
|
"grad_norm": 7.0319952964782715, |
|
"learning_rate": 8.63754889178618e-06, |
|
"loss": 0.577, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.871903520208605, |
|
"grad_norm": 7.513912677764893, |
|
"learning_rate": 8.71903520208605e-06, |
|
"loss": 0.5978, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.8800521512385919, |
|
"grad_norm": 8.28197193145752, |
|
"learning_rate": 8.80052151238592e-06, |
|
"loss": 0.5912, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.8882007822685789, |
|
"grad_norm": 7.632150650024414, |
|
"learning_rate": 8.88200782268579e-06, |
|
"loss": 0.5706, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.8963494132985659, |
|
"grad_norm": 7.691524028778076, |
|
"learning_rate": 8.96349413298566e-06, |
|
"loss": 0.5612, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9044980443285529, |
|
"grad_norm": 8.549062728881836, |
|
"learning_rate": 9.044980443285529e-06, |
|
"loss": 0.5494, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.9126466753585397, |
|
"grad_norm": 10.64492416381836, |
|
"learning_rate": 9.126466753585398e-06, |
|
"loss": 0.5629, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.9207953063885267, |
|
"grad_norm": 7.610856056213379, |
|
"learning_rate": 9.207953063885268e-06, |
|
"loss": 0.5627, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.9289439374185137, |
|
"grad_norm": 10.41044807434082, |
|
"learning_rate": 9.289439374185137e-06, |
|
"loss": 0.5756, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.9370925684485006, |
|
"grad_norm": 6.464520454406738, |
|
"learning_rate": 9.370925684485008e-06, |
|
"loss": 0.5817, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.9452411994784876, |
|
"grad_norm": 12.031845092773438, |
|
"learning_rate": 9.452411994784876e-06, |
|
"loss": 0.5761, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.9533898305084746, |
|
"grad_norm": 8.345417022705078, |
|
"learning_rate": 9.533898305084747e-06, |
|
"loss": 0.5789, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 8.58055305480957, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 0.5745, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.9696870925684485, |
|
"grad_norm": 5.948461532592773, |
|
"learning_rate": 9.696870925684486e-06, |
|
"loss": 0.5695, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.9778357235984355, |
|
"grad_norm": 8.523883819580078, |
|
"learning_rate": 9.778357235984357e-06, |
|
"loss": 0.575, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9859843546284224, |
|
"grad_norm": 8.530996322631836, |
|
"learning_rate": 9.859843546284224e-06, |
|
"loss": 0.5496, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.9941329856584094, |
|
"grad_norm": 8.197943687438965, |
|
"learning_rate": 9.941329856584094e-06, |
|
"loss": 0.5929, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8028112449799196, |
|
"eval_loss": 0.5100582242012024, |
|
"eval_runtime": 7.5718, |
|
"eval_samples_per_second": 328.853, |
|
"eval_steps_per_second": 41.206, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 1.0022816166883963, |
|
"grad_norm": 8.692102432250977, |
|
"learning_rate": 9.999998414230423e-06, |
|
"loss": 0.5456, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.0104302477183833, |
|
"grad_norm": 6.663279056549072, |
|
"learning_rate": 9.999966860686959e-06, |
|
"loss": 0.546, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.0185788787483703, |
|
"grad_norm": 7.53484582901001, |
|
"learning_rate": 9.999894854131206e-06, |
|
"loss": 0.5182, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.0267275097783573, |
|
"grad_norm": 6.181861877441406, |
|
"learning_rate": 9.999782395145752e-06, |
|
"loss": 0.5093, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.0348761408083442, |
|
"grad_norm": 9.323958396911621, |
|
"learning_rate": 9.999629484640457e-06, |
|
"loss": 0.528, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.0430247718383312, |
|
"grad_norm": 10.739737510681152, |
|
"learning_rate": 9.999436123852473e-06, |
|
"loss": 0.5423, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.0511734028683182, |
|
"grad_norm": 7.653073787689209, |
|
"learning_rate": 9.99920231434621e-06, |
|
"loss": 0.5215, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.0593220338983051, |
|
"grad_norm": 6.83660888671875, |
|
"learning_rate": 9.998928058013346e-06, |
|
"loss": 0.5134, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.0674706649282921, |
|
"grad_norm": 10.44430923461914, |
|
"learning_rate": 9.99861335707279e-06, |
|
"loss": 0.5466, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.075619295958279, |
|
"grad_norm": 7.2710280418396, |
|
"learning_rate": 9.998258214070683e-06, |
|
"loss": 0.5364, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.083767926988266, |
|
"grad_norm": 5.829804420471191, |
|
"learning_rate": 9.997862631880362e-06, |
|
"loss": 0.5146, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.0919165580182528, |
|
"grad_norm": 8.52145767211914, |
|
"learning_rate": 9.997426613702348e-06, |
|
"loss": 0.5105, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.1000651890482398, |
|
"grad_norm": 6.255794525146484, |
|
"learning_rate": 9.996950163064313e-06, |
|
"loss": 0.532, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.1082138200782268, |
|
"grad_norm": 8.463394165039062, |
|
"learning_rate": 9.996433283821057e-06, |
|
"loss": 0.5265, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.1163624511082137, |
|
"grad_norm": 9.939913749694824, |
|
"learning_rate": 9.995875980154468e-06, |
|
"loss": 0.5297, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.1245110821382007, |
|
"grad_norm": 10.322543144226074, |
|
"learning_rate": 9.995278256573504e-06, |
|
"loss": 0.5413, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.1326597131681877, |
|
"grad_norm": 7.6662445068359375, |
|
"learning_rate": 9.994640117914139e-06, |
|
"loss": 0.5197, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.1408083441981747, |
|
"grad_norm": 12.555916786193848, |
|
"learning_rate": 9.99396156933933e-06, |
|
"loss": 0.5472, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.1489569752281616, |
|
"grad_norm": 12.246332168579102, |
|
"learning_rate": 9.993242616338983e-06, |
|
"loss": 0.5296, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.1571056062581486, |
|
"grad_norm": 11.406452178955078, |
|
"learning_rate": 9.992483264729902e-06, |
|
"loss": 0.5266, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.1652542372881356, |
|
"grad_norm": 7.620953559875488, |
|
"learning_rate": 9.991683520655735e-06, |
|
"loss": 0.5267, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.1734028683181226, |
|
"grad_norm": 7.820069789886475, |
|
"learning_rate": 9.990843390586938e-06, |
|
"loss": 0.5384, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.1815514993481095, |
|
"grad_norm": 8.187140464782715, |
|
"learning_rate": 9.989962881320714e-06, |
|
"loss": 0.5071, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.1897001303780965, |
|
"grad_norm": 8.322758674621582, |
|
"learning_rate": 9.989041999980964e-06, |
|
"loss": 0.5342, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.1978487614080835, |
|
"grad_norm": 9.802703857421875, |
|
"learning_rate": 9.988080754018218e-06, |
|
"loss": 0.5205, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.2059973924380705, |
|
"grad_norm": 9.249838829040527, |
|
"learning_rate": 9.987079151209588e-06, |
|
"loss": 0.5069, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.2141460234680574, |
|
"grad_norm": 4.855494022369385, |
|
"learning_rate": 9.986037199658698e-06, |
|
"loss": 0.5107, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.2222946544980444, |
|
"grad_norm": 9.250731468200684, |
|
"learning_rate": 9.984954907795619e-06, |
|
"loss": 0.5093, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.2304432855280312, |
|
"grad_norm": 5.86234712600708, |
|
"learning_rate": 9.983832284376804e-06, |
|
"loss": 0.5539, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.2385919165580184, |
|
"grad_norm": 13.074224472045898, |
|
"learning_rate": 9.982669338485012e-06, |
|
"loss": 0.5248, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.2467405475880051, |
|
"grad_norm": 12.13022518157959, |
|
"learning_rate": 9.981466079529236e-06, |
|
"loss": 0.5415, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.254889178617992, |
|
"grad_norm": 9.259481430053711, |
|
"learning_rate": 9.980222517244633e-06, |
|
"loss": 0.5224, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.263037809647979, |
|
"grad_norm": 7.281178951263428, |
|
"learning_rate": 9.978938661692439e-06, |
|
"loss": 0.5363, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"grad_norm": 12.429268836975098, |
|
"learning_rate": 9.977614523259884e-06, |
|
"loss": 0.5257, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.279335071707953, |
|
"grad_norm": 8.357499122619629, |
|
"learning_rate": 9.97625011266012e-06, |
|
"loss": 0.5151, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.28748370273794, |
|
"grad_norm": 7.741194725036621, |
|
"learning_rate": 9.974845440932121e-06, |
|
"loss": 0.4973, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.295632333767927, |
|
"grad_norm": 12.34659481048584, |
|
"learning_rate": 9.973400519440605e-06, |
|
"loss": 0.5275, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.303780964797914, |
|
"grad_norm": 7.972919940948486, |
|
"learning_rate": 9.971915359875935e-06, |
|
"loss": 0.5196, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.311929595827901, |
|
"grad_norm": 6.398066520690918, |
|
"learning_rate": 9.970389974254025e-06, |
|
"loss": 0.5239, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.320078226857888, |
|
"grad_norm": 9.441793441772461, |
|
"learning_rate": 9.968824374916245e-06, |
|
"loss": 0.5141, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.3282268578878749, |
|
"grad_norm": 8.154695510864258, |
|
"learning_rate": 9.967218574529323e-06, |
|
"loss": 0.5179, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.3363754889178618, |
|
"grad_norm": 9.219006538391113, |
|
"learning_rate": 9.965572586085235e-06, |
|
"loss": 0.4859, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.3445241199478488, |
|
"grad_norm": 7.020698070526123, |
|
"learning_rate": 9.96388642290111e-06, |
|
"loss": 0.5128, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.3526727509778358, |
|
"grad_norm": 7.134260654449463, |
|
"learning_rate": 9.96216009861911e-06, |
|
"loss": 0.5067, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.3608213820078228, |
|
"grad_norm": 6.663614273071289, |
|
"learning_rate": 9.96039362720634e-06, |
|
"loss": 0.5352, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.3689700130378095, |
|
"grad_norm": 7.817680358886719, |
|
"learning_rate": 9.958587022954704e-06, |
|
"loss": 0.5143, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.3771186440677967, |
|
"grad_norm": 8.092264175415039, |
|
"learning_rate": 9.956740300480818e-06, |
|
"loss": 0.5111, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.3852672750977835, |
|
"grad_norm": 7.305174350738525, |
|
"learning_rate": 9.954853474725878e-06, |
|
"loss": 0.5432, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.3934159061277707, |
|
"grad_norm": 7.337920188903809, |
|
"learning_rate": 9.952926560955547e-06, |
|
"loss": 0.5279, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.4015645371577574, |
|
"grad_norm": 8.824036598205566, |
|
"learning_rate": 9.950959574759815e-06, |
|
"loss": 0.5073, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.4097131681877444, |
|
"grad_norm": 5.825498580932617, |
|
"learning_rate": 9.948952532052895e-06, |
|
"loss": 0.5208, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.4178617992177314, |
|
"grad_norm": 6.746844291687012, |
|
"learning_rate": 9.946905449073077e-06, |
|
"loss": 0.5245, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.4260104302477183, |
|
"grad_norm": 9.570401191711426, |
|
"learning_rate": 9.944818342382607e-06, |
|
"loss": 0.5056, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.4341590612777053, |
|
"grad_norm": 8.143331527709961, |
|
"learning_rate": 9.942691228867548e-06, |
|
"loss": 0.5066, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.4423076923076923, |
|
"grad_norm": 8.18307113647461, |
|
"learning_rate": 9.940524125737641e-06, |
|
"loss": 0.4933, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.4504563233376793, |
|
"grad_norm": 9.306159019470215, |
|
"learning_rate": 9.938317050526173e-06, |
|
"loss": 0.5092, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.4586049543676662, |
|
"grad_norm": 7.026943206787109, |
|
"learning_rate": 9.936070021089834e-06, |
|
"loss": 0.5071, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.4667535853976532, |
|
"grad_norm": 8.45121955871582, |
|
"learning_rate": 9.933783055608562e-06, |
|
"loss": 0.5193, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.4749022164276402, |
|
"grad_norm": 5.932709217071533, |
|
"learning_rate": 9.93145617258541e-06, |
|
"loss": 0.5311, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.4830508474576272, |
|
"grad_norm": 8.077872276306152, |
|
"learning_rate": 9.929089390846389e-06, |
|
"loss": 0.4887, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.4911994784876141, |
|
"grad_norm": 10.298677444458008, |
|
"learning_rate": 9.926682729540313e-06, |
|
"loss": 0.5006, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.4993481095176011, |
|
"grad_norm": 7.896773815155029, |
|
"learning_rate": 9.924236208138656e-06, |
|
"loss": 0.4828, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.5074967405475879, |
|
"grad_norm": 10.591178894042969, |
|
"learning_rate": 9.921749846435375e-06, |
|
"loss": 0.4936, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.515645371577575, |
|
"grad_norm": 8.356033325195312, |
|
"learning_rate": 9.919223664546774e-06, |
|
"loss": 0.5271, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.5237940026075618, |
|
"grad_norm": 9.826644897460938, |
|
"learning_rate": 9.916657682911317e-06, |
|
"loss": 0.5115, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.531942633637549, |
|
"grad_norm": 7.742495536804199, |
|
"learning_rate": 9.914051922289482e-06, |
|
"loss": 0.5037, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.5400912646675358, |
|
"grad_norm": 6.355010032653809, |
|
"learning_rate": 9.91140640376358e-06, |
|
"loss": 0.5047, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.548239895697523, |
|
"grad_norm": 11.718524932861328, |
|
"learning_rate": 9.908721148737591e-06, |
|
"loss": 0.5074, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.5563885267275097, |
|
"grad_norm": 6.173713207244873, |
|
"learning_rate": 9.905996178936991e-06, |
|
"loss": 0.5367, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.5645371577574967, |
|
"grad_norm": 10.962457656860352, |
|
"learning_rate": 9.903231516408576e-06, |
|
"loss": 0.4991, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.5726857887874837, |
|
"grad_norm": 6.949578285217285, |
|
"learning_rate": 9.900427183520276e-06, |
|
"loss": 0.4935, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.5808344198174706, |
|
"grad_norm": 6.240306854248047, |
|
"learning_rate": 9.897583202960985e-06, |
|
"loss": 0.5136, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.5889830508474576, |
|
"grad_norm": 6.609454154968262, |
|
"learning_rate": 9.89469959774037e-06, |
|
"loss": 0.4972, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.5971316818774446, |
|
"grad_norm": 8.191039085388184, |
|
"learning_rate": 9.891776391188694e-06, |
|
"loss": 0.5202, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.6052803129074316, |
|
"grad_norm": 7.624372959136963, |
|
"learning_rate": 9.888813606956612e-06, |
|
"loss": 0.515, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.6134289439374185, |
|
"grad_norm": 8.45014476776123, |
|
"learning_rate": 9.885811269014992e-06, |
|
"loss": 0.517, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.6215775749674055, |
|
"grad_norm": 6.690873146057129, |
|
"learning_rate": 9.882769401654719e-06, |
|
"loss": 0.5153, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.6297262059973925, |
|
"grad_norm": 6.8720808029174805, |
|
"learning_rate": 9.879688029486496e-06, |
|
"loss": 0.5288, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.6378748370273795, |
|
"grad_norm": 9.76561164855957, |
|
"learning_rate": 9.876567177440645e-06, |
|
"loss": 0.509, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.6460234680573662, |
|
"grad_norm": 12.810523986816406, |
|
"learning_rate": 9.873406870766906e-06, |
|
"loss": 0.5144, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.6541720990873534, |
|
"grad_norm": 6.44625997543335, |
|
"learning_rate": 9.870207135034235e-06, |
|
"loss": 0.5237, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.6623207301173402, |
|
"grad_norm": 9.6302490234375, |
|
"learning_rate": 9.86696799613059e-06, |
|
"loss": 0.5094, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.6704693611473274, |
|
"grad_norm": 10.308381080627441, |
|
"learning_rate": 9.863689480262734e-06, |
|
"loss": 0.498, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.6786179921773141, |
|
"grad_norm": 11.594625473022461, |
|
"learning_rate": 9.860371613956008e-06, |
|
"loss": 0.5224, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.6867666232073013, |
|
"grad_norm": 7.823093414306641, |
|
"learning_rate": 9.85701442405413e-06, |
|
"loss": 0.515, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 6.978199481964111, |
|
"learning_rate": 9.853617937718966e-06, |
|
"loss": 0.5103, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.7030638852672753, |
|
"grad_norm": 9.50684928894043, |
|
"learning_rate": 9.850182182430322e-06, |
|
"loss": 0.4876, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.711212516297262, |
|
"grad_norm": 9.167742729187012, |
|
"learning_rate": 9.84670718598571e-06, |
|
"loss": 0.521, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.719361147327249, |
|
"grad_norm": 9.103960990905762, |
|
"learning_rate": 9.843192976500131e-06, |
|
"loss": 0.4987, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.727509778357236, |
|
"grad_norm": 7.777735233306885, |
|
"learning_rate": 9.83963958240585e-06, |
|
"loss": 0.4838, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.735658409387223, |
|
"grad_norm": 3.7518503665924072, |
|
"learning_rate": 9.83604703245215e-06, |
|
"loss": 0.5019, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.74380704041721, |
|
"grad_norm": 8.239873886108398, |
|
"learning_rate": 9.832415355705118e-06, |
|
"loss": 0.5119, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.7519556714471969, |
|
"grad_norm": 7.265876293182373, |
|
"learning_rate": 9.828744581547407e-06, |
|
"loss": 0.4681, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.7601043024771839, |
|
"grad_norm": 9.064807891845703, |
|
"learning_rate": 9.825034739677984e-06, |
|
"loss": 0.4737, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.7682529335071708, |
|
"grad_norm": 6.92955207824707, |
|
"learning_rate": 9.821285860111903e-06, |
|
"loss": 0.4968, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.7764015645371578, |
|
"grad_norm": 10.282632827758789, |
|
"learning_rate": 9.817497973180062e-06, |
|
"loss": 0.4986, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.7845501955671446, |
|
"grad_norm": 5.6930084228515625, |
|
"learning_rate": 9.813671109528949e-06, |
|
"loss": 0.5135, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.7926988265971318, |
|
"grad_norm": 6.911000728607178, |
|
"learning_rate": 9.809805300120403e-06, |
|
"loss": 0.5046, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.8008474576271185, |
|
"grad_norm": 6.411030292510986, |
|
"learning_rate": 9.805900576231358e-06, |
|
"loss": 0.4926, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.8089960886571057, |
|
"grad_norm": 6.620294570922852, |
|
"learning_rate": 9.801956969453592e-06, |
|
"loss": 0.4788, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.8171447196870925, |
|
"grad_norm": 6.77543830871582, |
|
"learning_rate": 9.797974511693471e-06, |
|
"loss": 0.4896, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.8252933507170797, |
|
"grad_norm": 7.471630573272705, |
|
"learning_rate": 9.793953235171694e-06, |
|
"loss": 0.4979, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.8334419817470664, |
|
"grad_norm": 6.550878524780273, |
|
"learning_rate": 9.789893172423021e-06, |
|
"loss": 0.5081, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.8415906127770536, |
|
"grad_norm": 9.887825965881348, |
|
"learning_rate": 9.78579435629603e-06, |
|
"loss": 0.5089, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.8497392438070404, |
|
"grad_norm": 7.088003158569336, |
|
"learning_rate": 9.781656819952826e-06, |
|
"loss": 0.4811, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.8578878748370273, |
|
"grad_norm": 6.524052619934082, |
|
"learning_rate": 9.777480596868796e-06, |
|
"loss": 0.5018, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.8660365058670143, |
|
"grad_norm": 7.965360164642334, |
|
"learning_rate": 9.773265720832324e-06, |
|
"loss": 0.5144, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.8741851368970013, |
|
"grad_norm": 7.510045051574707, |
|
"learning_rate": 9.769012225944521e-06, |
|
"loss": 0.5002, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.8823337679269883, |
|
"grad_norm": 11.717968940734863, |
|
"learning_rate": 9.764720146618955e-06, |
|
"loss": 0.5003, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.8904823989569752, |
|
"grad_norm": 5.974288463592529, |
|
"learning_rate": 9.760389517581362e-06, |
|
"loss": 0.4912, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.8986310299869622, |
|
"grad_norm": 5.159633159637451, |
|
"learning_rate": 9.75602037386937e-06, |
|
"loss": 0.4861, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.9067796610169492, |
|
"grad_norm": 6.651115417480469, |
|
"learning_rate": 9.75161275083222e-06, |
|
"loss": 0.5153, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.9149282920469362, |
|
"grad_norm": 7.513479709625244, |
|
"learning_rate": 9.747166684130474e-06, |
|
"loss": 0.4931, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 8.77505874633789, |
|
"learning_rate": 9.742682209735727e-06, |
|
"loss": 0.501, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.93122555410691, |
|
"grad_norm": 6.932135581970215, |
|
"learning_rate": 9.738159363930324e-06, |
|
"loss": 0.52, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.9393741851368969, |
|
"grad_norm": 10.359477996826172, |
|
"learning_rate": 9.73359818330705e-06, |
|
"loss": 0.4877, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.947522816166884, |
|
"grad_norm": 8.781031608581543, |
|
"learning_rate": 9.72899870476885e-06, |
|
"loss": 0.4891, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.9556714471968708, |
|
"grad_norm": 8.263874053955078, |
|
"learning_rate": 9.724360965528523e-06, |
|
"loss": 0.5061, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.963820078226858, |
|
"grad_norm": 7.766465663909912, |
|
"learning_rate": 9.719685003108423e-06, |
|
"loss": 0.4902, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.9719687092568448, |
|
"grad_norm": 4.978456974029541, |
|
"learning_rate": 9.714970855340152e-06, |
|
"loss": 0.4873, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.980117340286832, |
|
"grad_norm": 7.918380260467529, |
|
"learning_rate": 9.71021856036426e-06, |
|
"loss": 0.4941, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.9882659713168187, |
|
"grad_norm": 8.015583038330078, |
|
"learning_rate": 9.705428156629933e-06, |
|
"loss": 0.4833, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.996414602346806, |
|
"grad_norm": 7.768013954162598, |
|
"learning_rate": 9.700599682894675e-06, |
|
"loss": 0.4932, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8008032128514057, |
|
"eval_loss": 0.5220404267311096, |
|
"eval_runtime": 6.9482, |
|
"eval_samples_per_second": 358.366, |
|
"eval_steps_per_second": 44.904, |
|
"step": 24544 |
|
}, |
|
{ |
|
"epoch": 2.0045632333767927, |
|
"grad_norm": 7.617489337921143, |
|
"learning_rate": 9.695733178224009e-06, |
|
"loss": 0.4491, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.01271186440678, |
|
"grad_norm": 8.741541862487793, |
|
"learning_rate": 9.690828681991153e-06, |
|
"loss": 0.4068, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 2.0208604954367666, |
|
"grad_norm": 11.999881744384766, |
|
"learning_rate": 9.685886233876696e-06, |
|
"loss": 0.4138, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 2.029009126466754, |
|
"grad_norm": 9.766683578491211, |
|
"learning_rate": 9.680905873868287e-06, |
|
"loss": 0.3986, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 2.0371577574967406, |
|
"grad_norm": 6.533343315124512, |
|
"learning_rate": 9.675887642260306e-06, |
|
"loss": 0.4024, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.0453063885267273, |
|
"grad_norm": 9.137768745422363, |
|
"learning_rate": 9.670831579653539e-06, |
|
"loss": 0.4436, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 2.0534550195567145, |
|
"grad_norm": 9.635496139526367, |
|
"learning_rate": 9.665737726954852e-06, |
|
"loss": 0.4019, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 2.0616036505867013, |
|
"grad_norm": 7.93952751159668, |
|
"learning_rate": 9.66060612537685e-06, |
|
"loss": 0.4221, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 2.0697522816166884, |
|
"grad_norm": 9.508652687072754, |
|
"learning_rate": 9.65543681643756e-06, |
|
"loss": 0.4221, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 2.077900912646675, |
|
"grad_norm": 9.136526107788086, |
|
"learning_rate": 9.650229841960084e-06, |
|
"loss": 0.4239, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.0860495436766624, |
|
"grad_norm": 11.71844482421875, |
|
"learning_rate": 9.644985244072258e-06, |
|
"loss": 0.4047, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 2.094198174706649, |
|
"grad_norm": 4.190426826477051, |
|
"learning_rate": 9.639703065206323e-06, |
|
"loss": 0.4209, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 2.1023468057366363, |
|
"grad_norm": 11.736051559448242, |
|
"learning_rate": 9.63438334809857e-06, |
|
"loss": 0.4086, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 2.110495436766623, |
|
"grad_norm": 7.024579048156738, |
|
"learning_rate": 9.629026135789002e-06, |
|
"loss": 0.4346, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"grad_norm": 10.942073822021484, |
|
"learning_rate": 9.62363147162098e-06, |
|
"loss": 0.4242, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.126792698826597, |
|
"grad_norm": 12.155450820922852, |
|
"learning_rate": 9.618199399240876e-06, |
|
"loss": 0.4706, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 2.1349413298565842, |
|
"grad_norm": 6.733283519744873, |
|
"learning_rate": 9.612729962597721e-06, |
|
"loss": 0.4406, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 2.143089960886571, |
|
"grad_norm": 7.309271335601807, |
|
"learning_rate": 9.607223205942845e-06, |
|
"loss": 0.4169, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 2.151238591916558, |
|
"grad_norm": 7.154285907745361, |
|
"learning_rate": 9.601679173829522e-06, |
|
"loss": 0.4406, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.159387222946545, |
|
"grad_norm": 8.043559074401855, |
|
"learning_rate": 9.596097911112609e-06, |
|
"loss": 0.4264, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.167535853976532, |
|
"grad_norm": 9.203978538513184, |
|
"learning_rate": 9.590479462948185e-06, |
|
"loss": 0.4173, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 2.175684485006519, |
|
"grad_norm": 7.716718673706055, |
|
"learning_rate": 9.58482387479318e-06, |
|
"loss": 0.412, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 2.1838331160365057, |
|
"grad_norm": 10.910019874572754, |
|
"learning_rate": 9.57913119240501e-06, |
|
"loss": 0.3844, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 2.191981747066493, |
|
"grad_norm": 7.980166435241699, |
|
"learning_rate": 9.573401461841218e-06, |
|
"loss": 0.4441, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 2.2001303780964796, |
|
"grad_norm": 7.328435897827148, |
|
"learning_rate": 9.567634729459076e-06, |
|
"loss": 0.4118, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.208279009126467, |
|
"grad_norm": 7.026157379150391, |
|
"learning_rate": 9.561831041915238e-06, |
|
"loss": 0.4258, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 2.2164276401564535, |
|
"grad_norm": 10.100348472595215, |
|
"learning_rate": 9.555990446165339e-06, |
|
"loss": 0.4368, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 2.2245762711864407, |
|
"grad_norm": 11.21714973449707, |
|
"learning_rate": 9.550112989463633e-06, |
|
"loss": 0.4253, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 2.2327249022164275, |
|
"grad_norm": 7.6962127685546875, |
|
"learning_rate": 9.5441987193626e-06, |
|
"loss": 0.4273, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 2.2408735332464147, |
|
"grad_norm": 13.219654083251953, |
|
"learning_rate": 9.538247683712567e-06, |
|
"loss": 0.4369, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.2490221642764014, |
|
"grad_norm": 5.536248683929443, |
|
"learning_rate": 9.532259930661315e-06, |
|
"loss": 0.4493, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 2.2571707953063886, |
|
"grad_norm": 7.112065315246582, |
|
"learning_rate": 9.526235508653694e-06, |
|
"loss": 0.4325, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 2.2653194263363754, |
|
"grad_norm": 6.064886093139648, |
|
"learning_rate": 9.520174466431235e-06, |
|
"loss": 0.4353, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 2.2734680573663626, |
|
"grad_norm": 7.9532318115234375, |
|
"learning_rate": 9.51407685303174e-06, |
|
"loss": 0.4358, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 2.2816166883963493, |
|
"grad_norm": 8.64626693725586, |
|
"learning_rate": 9.507942717788907e-06, |
|
"loss": 0.4489, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.2897653194263365, |
|
"grad_norm": 9.648942947387695, |
|
"learning_rate": 9.50177211033191e-06, |
|
"loss": 0.4498, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 2.2979139504563233, |
|
"grad_norm": 7.498199939727783, |
|
"learning_rate": 9.495565080585017e-06, |
|
"loss": 0.4086, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 2.3060625814863105, |
|
"grad_norm": 8.632119178771973, |
|
"learning_rate": 9.489321678767167e-06, |
|
"loss": 0.4207, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 2.3142112125162972, |
|
"grad_norm": 8.807448387145996, |
|
"learning_rate": 9.48304195539158e-06, |
|
"loss": 0.428, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 2.322359843546284, |
|
"grad_norm": 7.809271812438965, |
|
"learning_rate": 9.476725961265332e-06, |
|
"loss": 0.4546, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.330508474576271, |
|
"grad_norm": 8.758193969726562, |
|
"learning_rate": 9.470373747488966e-06, |
|
"loss": 0.432, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 2.3386571056062584, |
|
"grad_norm": 8.046852111816406, |
|
"learning_rate": 9.463985365456057e-06, |
|
"loss": 0.4169, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 2.346805736636245, |
|
"grad_norm": 12.665115356445312, |
|
"learning_rate": 9.457560866852805e-06, |
|
"loss": 0.4242, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 2.354954367666232, |
|
"grad_norm": 10.333826065063477, |
|
"learning_rate": 9.45110030365762e-06, |
|
"loss": 0.4603, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 2.363102998696219, |
|
"grad_norm": 8.857953071594238, |
|
"learning_rate": 9.444603728140698e-06, |
|
"loss": 0.454, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.371251629726206, |
|
"grad_norm": 9.135393142700195, |
|
"learning_rate": 9.438071192863596e-06, |
|
"loss": 0.4574, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 2.379400260756193, |
|
"grad_norm": 6.3214921951293945, |
|
"learning_rate": 9.43150275067881e-06, |
|
"loss": 0.4299, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 2.38754889178618, |
|
"grad_norm": 7.322382926940918, |
|
"learning_rate": 9.42489845472935e-06, |
|
"loss": 0.4265, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 2.395697522816167, |
|
"grad_norm": 11.1491060256958, |
|
"learning_rate": 9.418258358448298e-06, |
|
"loss": 0.4233, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"grad_norm": 7.171163082122803, |
|
"learning_rate": 9.411582515558391e-06, |
|
"loss": 0.4271, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.411994784876141, |
|
"grad_norm": 5.758033275604248, |
|
"learning_rate": 9.404870980071579e-06, |
|
"loss": 0.4463, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 2.4201434159061277, |
|
"grad_norm": 11.001411437988281, |
|
"learning_rate": 9.398123806288588e-06, |
|
"loss": 0.42, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 2.428292046936115, |
|
"grad_norm": 6.28535795211792, |
|
"learning_rate": 9.39134104879848e-06, |
|
"loss": 0.4188, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 2.4364406779661016, |
|
"grad_norm": 6.2432861328125, |
|
"learning_rate": 9.38452276247821e-06, |
|
"loss": 0.4242, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 2.444589308996089, |
|
"grad_norm": 9.474976539611816, |
|
"learning_rate": 9.377669002492193e-06, |
|
"loss": 0.43, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.4527379400260756, |
|
"grad_norm": 7.984436988830566, |
|
"learning_rate": 9.37077982429184e-06, |
|
"loss": 0.4328, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 2.4608865710560623, |
|
"grad_norm": 8.237207412719727, |
|
"learning_rate": 9.363855283615124e-06, |
|
"loss": 0.4166, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 2.4690352020860495, |
|
"grad_norm": 7.6592936515808105, |
|
"learning_rate": 9.356895436486122e-06, |
|
"loss": 0.4253, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 2.4771838331160367, |
|
"grad_norm": 5.206706523895264, |
|
"learning_rate": 9.349900339214564e-06, |
|
"loss": 0.4414, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 2.4853324641460235, |
|
"grad_norm": 10.161866188049316, |
|
"learning_rate": 9.342870048395376e-06, |
|
"loss": 0.415, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.4934810951760102, |
|
"grad_norm": 4.225031852722168, |
|
"learning_rate": 9.335804620908222e-06, |
|
"loss": 0.4243, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 2.5016297262059974, |
|
"grad_norm": 7.489659786224365, |
|
"learning_rate": 9.328704113917046e-06, |
|
"loss": 0.4417, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 2.509778357235984, |
|
"grad_norm": 8.180109977722168, |
|
"learning_rate": 9.32156858486961e-06, |
|
"loss": 0.4217, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 2.5179269882659714, |
|
"grad_norm": 9.16032886505127, |
|
"learning_rate": 9.314398091497024e-06, |
|
"loss": 0.4297, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 2.526075619295958, |
|
"grad_norm": 8.16234302520752, |
|
"learning_rate": 9.307192691813285e-06, |
|
"loss": 0.4319, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.5342242503259453, |
|
"grad_norm": 10.111699104309082, |
|
"learning_rate": 9.299952444114802e-06, |
|
"loss": 0.4186, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 6.305666923522949, |
|
"learning_rate": 9.29267740697993e-06, |
|
"loss": 0.4382, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 2.5505215123859193, |
|
"grad_norm": 9.985565185546875, |
|
"learning_rate": 9.285367639268492e-06, |
|
"loss": 0.4272, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 2.558670143415906, |
|
"grad_norm": 10.670126914978027, |
|
"learning_rate": 9.278023200121305e-06, |
|
"loss": 0.4228, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 2.5668187744458932, |
|
"grad_norm": 7.42661714553833, |
|
"learning_rate": 9.2706441489597e-06, |
|
"loss": 0.4314, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.57496740547588, |
|
"grad_norm": 6.457535266876221, |
|
"learning_rate": 9.263230545485044e-06, |
|
"loss": 0.4401, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 2.583116036505867, |
|
"grad_norm": 11.822875022888184, |
|
"learning_rate": 9.25578244967825e-06, |
|
"loss": 0.3865, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 2.591264667535854, |
|
"grad_norm": 12.4473295211792, |
|
"learning_rate": 9.2482999217993e-06, |
|
"loss": 0.4272, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 2.5994132985658407, |
|
"grad_norm": 5.283376693725586, |
|
"learning_rate": 9.240783022386757e-06, |
|
"loss": 0.4084, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 2.607561929595828, |
|
"grad_norm": 8.190621376037598, |
|
"learning_rate": 9.233231812257266e-06, |
|
"loss": 0.4257, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.615710560625815, |
|
"grad_norm": 6.570192813873291, |
|
"learning_rate": 9.225646352505071e-06, |
|
"loss": 0.4464, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 2.623859191655802, |
|
"grad_norm": 10.470175743103027, |
|
"learning_rate": 9.218026704501519e-06, |
|
"loss": 0.4245, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 2.6320078226857886, |
|
"grad_norm": 7.662964820861816, |
|
"learning_rate": 9.210372929894561e-06, |
|
"loss": 0.4265, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 2.640156453715776, |
|
"grad_norm": 7.74278450012207, |
|
"learning_rate": 9.202685090608256e-06, |
|
"loss": 0.4293, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 2.648305084745763, |
|
"grad_norm": 6.661880970001221, |
|
"learning_rate": 9.194963248842266e-06, |
|
"loss": 0.4592, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.6564537157757497, |
|
"grad_norm": 8.020112991333008, |
|
"learning_rate": 9.18720746707136e-06, |
|
"loss": 0.4229, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 2.6646023468057365, |
|
"grad_norm": 5.921052932739258, |
|
"learning_rate": 9.179417808044897e-06, |
|
"loss": 0.4141, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 2.6727509778357237, |
|
"grad_norm": 10.444842338562012, |
|
"learning_rate": 9.17159433478633e-06, |
|
"loss": 0.4437, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 2.6808996088657104, |
|
"grad_norm": 7.524814605712891, |
|
"learning_rate": 9.163737110592697e-06, |
|
"loss": 0.4128, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 2.6890482398956976, |
|
"grad_norm": 10.936373710632324, |
|
"learning_rate": 9.155846199034086e-06, |
|
"loss": 0.4273, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.6971968709256844, |
|
"grad_norm": 7.02941370010376, |
|
"learning_rate": 9.147921663953157e-06, |
|
"loss": 0.4433, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 2.7053455019556716, |
|
"grad_norm": 10.595579147338867, |
|
"learning_rate": 9.139963569464593e-06, |
|
"loss": 0.4264, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 2.7134941329856583, |
|
"grad_norm": 5.312283992767334, |
|
"learning_rate": 9.131971979954603e-06, |
|
"loss": 0.4149, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 2.7216427640156455, |
|
"grad_norm": 7.464469909667969, |
|
"learning_rate": 9.123946960080387e-06, |
|
"loss": 0.4368, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 2.7297913950456323, |
|
"grad_norm": 7.507636547088623, |
|
"learning_rate": 9.115888574769623e-06, |
|
"loss": 0.4344, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.737940026075619, |
|
"grad_norm": 7.984206676483154, |
|
"learning_rate": 9.107796889219933e-06, |
|
"loss": 0.4165, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 2.7460886571056062, |
|
"grad_norm": 9.600481986999512, |
|
"learning_rate": 9.099671968898362e-06, |
|
"loss": 0.4212, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 2.7542372881355934, |
|
"grad_norm": 6.417558670043945, |
|
"learning_rate": 9.091513879540845e-06, |
|
"loss": 0.41, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 2.76238591916558, |
|
"grad_norm": 7.52598762512207, |
|
"learning_rate": 9.08332268715168e-06, |
|
"loss": 0.4443, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 2.770534550195567, |
|
"grad_norm": 8.766283988952637, |
|
"learning_rate": 9.075098458002988e-06, |
|
"loss": 0.4552, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.778683181225554, |
|
"grad_norm": 7.127804756164551, |
|
"learning_rate": 9.066841258634177e-06, |
|
"loss": 0.426, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 2.7868318122555413, |
|
"grad_norm": 8.190874099731445, |
|
"learning_rate": 9.058551155851405e-06, |
|
"loss": 0.4374, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 2.794980443285528, |
|
"grad_norm": 7.887624740600586, |
|
"learning_rate": 9.050228216727046e-06, |
|
"loss": 0.437, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 2.803129074315515, |
|
"grad_norm": 10.439249038696289, |
|
"learning_rate": 9.041872508599136e-06, |
|
"loss": 0.4165, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 2.811277705345502, |
|
"grad_norm": 9.891864776611328, |
|
"learning_rate": 9.033484099070839e-06, |
|
"loss": 0.4336, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.819426336375489, |
|
"grad_norm": 10.03987979888916, |
|
"learning_rate": 9.025063056009886e-06, |
|
"loss": 0.4365, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 2.827574967405476, |
|
"grad_norm": 6.188653469085693, |
|
"learning_rate": 9.016609447548046e-06, |
|
"loss": 0.41, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 2.8357235984354627, |
|
"grad_norm": 11.486917495727539, |
|
"learning_rate": 9.008123342080553e-06, |
|
"loss": 0.4343, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 2.84387222946545, |
|
"grad_norm": 9.972556114196777, |
|
"learning_rate": 8.99960480826557e-06, |
|
"loss": 0.4282, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 2.8520208604954367, |
|
"grad_norm": 7.771157741546631, |
|
"learning_rate": 8.991053915023625e-06, |
|
"loss": 0.4086, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.860169491525424, |
|
"grad_norm": 5.989213943481445, |
|
"learning_rate": 8.982470731537054e-06, |
|
"loss": 0.4647, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 2.8683181225554106, |
|
"grad_norm": 7.19948148727417, |
|
"learning_rate": 8.973855327249442e-06, |
|
"loss": 0.4086, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 2.8764667535853974, |
|
"grad_norm": 7.22706937789917, |
|
"learning_rate": 8.965207771865061e-06, |
|
"loss": 0.4225, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 2.8846153846153846, |
|
"grad_norm": 11.344962120056152, |
|
"learning_rate": 8.95652813534831e-06, |
|
"loss": 0.4275, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 2.8927640156453718, |
|
"grad_norm": 10.637499809265137, |
|
"learning_rate": 8.947816487923143e-06, |
|
"loss": 0.4347, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.9009126466753585, |
|
"grad_norm": 7.946286678314209, |
|
"learning_rate": 8.939072900072501e-06, |
|
"loss": 0.4218, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 2.9090612777053453, |
|
"grad_norm": 6.058999061584473, |
|
"learning_rate": 8.930297442537747e-06, |
|
"loss": 0.4212, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 2.9172099087353325, |
|
"grad_norm": 10.35421371459961, |
|
"learning_rate": 8.921490186318092e-06, |
|
"loss": 0.4028, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 2.9253585397653197, |
|
"grad_norm": 8.85345458984375, |
|
"learning_rate": 8.912651202670013e-06, |
|
"loss": 0.4455, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 2.9335071707953064, |
|
"grad_norm": 7.476600646972656, |
|
"learning_rate": 8.90378056310669e-06, |
|
"loss": 0.4212, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.941655801825293, |
|
"grad_norm": 8.27695369720459, |
|
"learning_rate": 8.894878339397416e-06, |
|
"loss": 0.4186, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 2.9498044328552804, |
|
"grad_norm": 8.344620704650879, |
|
"learning_rate": 8.885944603567023e-06, |
|
"loss": 0.4242, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 2.957953063885267, |
|
"grad_norm": 8.976387023925781, |
|
"learning_rate": 8.876979427895291e-06, |
|
"loss": 0.4359, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 2.9661016949152543, |
|
"grad_norm": 10.581543922424316, |
|
"learning_rate": 8.867982884916377e-06, |
|
"loss": 0.4171, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 2.974250325945241, |
|
"grad_norm": 6.423446178436279, |
|
"learning_rate": 8.858955047418217e-06, |
|
"loss": 0.4248, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.9823989569752283, |
|
"grad_norm": 6.647116184234619, |
|
"learning_rate": 8.849895988441933e-06, |
|
"loss": 0.4272, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 2.990547588005215, |
|
"grad_norm": 11.199699401855469, |
|
"learning_rate": 8.840805781281261e-06, |
|
"loss": 0.4336, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 2.9986962190352022, |
|
"grad_norm": 6.946083068847656, |
|
"learning_rate": 8.831684499481941e-06, |
|
"loss": 0.4278, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8088353413654619, |
|
"eval_loss": 0.5133101940155029, |
|
"eval_runtime": 6.8742, |
|
"eval_samples_per_second": 362.222, |
|
"eval_steps_per_second": 45.387, |
|
"step": 36816 |
|
}, |
|
{ |
|
"epoch": 3.006844850065189, |
|
"grad_norm": 8.117693901062012, |
|
"learning_rate": 8.822532216841124e-06, |
|
"loss": 0.3563, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 3.014993481095176, |
|
"grad_norm": 8.939483642578125, |
|
"learning_rate": 8.813349007406785e-06, |
|
"loss": 0.3693, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.023142112125163, |
|
"grad_norm": 5.619213104248047, |
|
"learning_rate": 8.80413494547711e-06, |
|
"loss": 0.359, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 3.03129074315515, |
|
"grad_norm": 7.458463191986084, |
|
"learning_rate": 8.794890105599905e-06, |
|
"loss": 0.3631, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 3.039439374185137, |
|
"grad_norm": 8.206454277038574, |
|
"learning_rate": 8.785614562571991e-06, |
|
"loss": 0.3513, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 3.047588005215124, |
|
"grad_norm": 8.663100242614746, |
|
"learning_rate": 8.776308391438597e-06, |
|
"loss": 0.3348, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 3.055736636245111, |
|
"grad_norm": 8.638208389282227, |
|
"learning_rate": 8.766971667492754e-06, |
|
"loss": 0.3618, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.0638852672750976, |
|
"grad_norm": 8.416321754455566, |
|
"learning_rate": 8.757604466274683e-06, |
|
"loss": 0.3671, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 3.0720338983050848, |
|
"grad_norm": 10.002084732055664, |
|
"learning_rate": 8.748206863571188e-06, |
|
"loss": 0.3462, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 3.0801825293350715, |
|
"grad_norm": 8.242202758789062, |
|
"learning_rate": 8.73877893541504e-06, |
|
"loss": 0.3524, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 3.0883311603650587, |
|
"grad_norm": 9.762850761413574, |
|
"learning_rate": 8.729320758084363e-06, |
|
"loss": 0.3844, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 3.0964797913950455, |
|
"grad_norm": 13.008197784423828, |
|
"learning_rate": 8.719832408102017e-06, |
|
"loss": 0.3489, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.1046284224250327, |
|
"grad_norm": 9.61468505859375, |
|
"learning_rate": 8.71031396223498e-06, |
|
"loss": 0.3386, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 3.1127770534550194, |
|
"grad_norm": 9.158555030822754, |
|
"learning_rate": 8.700765497493723e-06, |
|
"loss": 0.3542, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 3.1209256844850066, |
|
"grad_norm": 11.94726276397705, |
|
"learning_rate": 8.69118709113159e-06, |
|
"loss": 0.3591, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 3.1290743155149934, |
|
"grad_norm": 9.813300132751465, |
|
"learning_rate": 8.681578820644173e-06, |
|
"loss": 0.3625, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 3.1372229465449806, |
|
"grad_norm": 8.50658130645752, |
|
"learning_rate": 8.671940763768682e-06, |
|
"loss": 0.3789, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.1453715775749673, |
|
"grad_norm": 6.037990570068359, |
|
"learning_rate": 8.662272998483323e-06, |
|
"loss": 0.3635, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 3.1535202086049545, |
|
"grad_norm": 11.817001342773438, |
|
"learning_rate": 8.65257560300666e-06, |
|
"loss": 0.3526, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 3.1616688396349413, |
|
"grad_norm": 4.690389156341553, |
|
"learning_rate": 8.642848655796985e-06, |
|
"loss": 0.3634, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 3.1698174706649285, |
|
"grad_norm": 12.257222175598145, |
|
"learning_rate": 8.633092235551679e-06, |
|
"loss": 0.3626, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 3.1779661016949152, |
|
"grad_norm": 7.710871696472168, |
|
"learning_rate": 8.623306421206588e-06, |
|
"loss": 0.3571, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.1861147327249024, |
|
"grad_norm": 6.811945915222168, |
|
"learning_rate": 8.613491291935365e-06, |
|
"loss": 0.351, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 3.194263363754889, |
|
"grad_norm": 19.7229061126709, |
|
"learning_rate": 8.60364692714885e-06, |
|
"loss": 0.3348, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 3.2024119947848764, |
|
"grad_norm": 9.32421875, |
|
"learning_rate": 8.59377340649441e-06, |
|
"loss": 0.3437, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 3.210560625814863, |
|
"grad_norm": 9.309675216674805, |
|
"learning_rate": 8.583870809855306e-06, |
|
"loss": 0.3687, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 3.21870925684485, |
|
"grad_norm": 5.458558559417725, |
|
"learning_rate": 8.573939217350043e-06, |
|
"loss": 0.3584, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.226857887874837, |
|
"grad_norm": 8.717120170593262, |
|
"learning_rate": 8.563978709331717e-06, |
|
"loss": 0.3473, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 3.235006518904824, |
|
"grad_norm": 6.542947769165039, |
|
"learning_rate": 8.553989366387376e-06, |
|
"loss": 0.3806, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 3.243155149934811, |
|
"grad_norm": 11.504007339477539, |
|
"learning_rate": 8.543971269337355e-06, |
|
"loss": 0.3606, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 3.2513037809647978, |
|
"grad_norm": 9.393417358398438, |
|
"learning_rate": 8.533924499234633e-06, |
|
"loss": 0.3532, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 3.259452411994785, |
|
"grad_norm": 8.129273414611816, |
|
"learning_rate": 8.523849137364175e-06, |
|
"loss": 0.3473, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.2676010430247717, |
|
"grad_norm": 12.241875648498535, |
|
"learning_rate": 8.513745265242263e-06, |
|
"loss": 0.3576, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 3.275749674054759, |
|
"grad_norm": 9.895030975341797, |
|
"learning_rate": 8.503612964615858e-06, |
|
"loss": 0.3458, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 3.2838983050847457, |
|
"grad_norm": 5.42219877243042, |
|
"learning_rate": 8.493452317461914e-06, |
|
"loss": 0.3772, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 3.292046936114733, |
|
"grad_norm": 8.165868759155273, |
|
"learning_rate": 8.483263405986735e-06, |
|
"loss": 0.3561, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 3.3001955671447196, |
|
"grad_norm": 13.24457836151123, |
|
"learning_rate": 8.4730463126253e-06, |
|
"loss": 0.3587, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.308344198174707, |
|
"grad_norm": 12.287585258483887, |
|
"learning_rate": 8.462801120040595e-06, |
|
"loss": 0.3432, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 3.3164928292046936, |
|
"grad_norm": 8.932402610778809, |
|
"learning_rate": 8.452527911122953e-06, |
|
"loss": 0.3696, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 3.3246414602346808, |
|
"grad_norm": 8.847443580627441, |
|
"learning_rate": 8.442226768989373e-06, |
|
"loss": 0.362, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 3.3327900912646675, |
|
"grad_norm": 13.20019245147705, |
|
"learning_rate": 8.431897776982851e-06, |
|
"loss": 0.3543, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 3.3409387222946547, |
|
"grad_norm": 8.375232696533203, |
|
"learning_rate": 8.421541018671712e-06, |
|
"loss": 0.3741, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.3490873533246415, |
|
"grad_norm": 7.601521968841553, |
|
"learning_rate": 8.411156577848927e-06, |
|
"loss": 0.3518, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 3.3572359843546282, |
|
"grad_norm": 5.853700637817383, |
|
"learning_rate": 8.400744538531431e-06, |
|
"loss": 0.3556, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 3.3653846153846154, |
|
"grad_norm": 15.7562837600708, |
|
"learning_rate": 8.390304984959455e-06, |
|
"loss": 0.3591, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 3.373533246414602, |
|
"grad_norm": 7.048288822174072, |
|
"learning_rate": 8.379838001595837e-06, |
|
"loss": 0.3774, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 3.3816818774445894, |
|
"grad_norm": 8.532382011413574, |
|
"learning_rate": 8.369343673125339e-06, |
|
"loss": 0.3482, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 5.468735218048096, |
|
"learning_rate": 8.358822084453964e-06, |
|
"loss": 0.3637, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 3.3979791395045633, |
|
"grad_norm": 7.324248313903809, |
|
"learning_rate": 8.348273320708269e-06, |
|
"loss": 0.365, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 3.40612777053455, |
|
"grad_norm": 8.06946849822998, |
|
"learning_rate": 8.33769746723467e-06, |
|
"loss": 0.3661, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 3.4142764015645373, |
|
"grad_norm": 11.85434341430664, |
|
"learning_rate": 8.32709460959876e-06, |
|
"loss": 0.3542, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 3.422425032594524, |
|
"grad_norm": 8.629081726074219, |
|
"learning_rate": 8.316464833584618e-06, |
|
"loss": 0.3476, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.430573663624511, |
|
"grad_norm": 7.888760566711426, |
|
"learning_rate": 8.305808225194103e-06, |
|
"loss": 0.3752, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 3.438722294654498, |
|
"grad_norm": 8.756083488464355, |
|
"learning_rate": 8.295124870646168e-06, |
|
"loss": 0.359, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 3.446870925684485, |
|
"grad_norm": 8.682005882263184, |
|
"learning_rate": 8.284414856376161e-06, |
|
"loss": 0.3607, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 3.455019556714472, |
|
"grad_norm": 14.85304069519043, |
|
"learning_rate": 8.273678269035126e-06, |
|
"loss": 0.3417, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 3.463168187744459, |
|
"grad_norm": 10.479057312011719, |
|
"learning_rate": 8.262915195489097e-06, |
|
"loss": 0.3571, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.471316818774446, |
|
"grad_norm": 9.107665061950684, |
|
"learning_rate": 8.2521257228184e-06, |
|
"loss": 0.3655, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 3.479465449804433, |
|
"grad_norm": 10.01933765411377, |
|
"learning_rate": 8.241309938316947e-06, |
|
"loss": 0.363, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 3.48761408083442, |
|
"grad_norm": 7.9999189376831055, |
|
"learning_rate": 8.230467929491533e-06, |
|
"loss": 0.3753, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 3.4957627118644066, |
|
"grad_norm": 9.211396217346191, |
|
"learning_rate": 8.219599784061124e-06, |
|
"loss": 0.3389, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 3.5039113428943938, |
|
"grad_norm": 9.140076637268066, |
|
"learning_rate": 8.20870558995614e-06, |
|
"loss": 0.3683, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.512059973924381, |
|
"grad_norm": 9.534440040588379, |
|
"learning_rate": 8.197785435317766e-06, |
|
"loss": 0.3585, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 3.5202086049543677, |
|
"grad_norm": 10.818157196044922, |
|
"learning_rate": 8.186839408497213e-06, |
|
"loss": 0.3546, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 3.5283572359843545, |
|
"grad_norm": 11.54218578338623, |
|
"learning_rate": 8.175867598055021e-06, |
|
"loss": 0.3818, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 3.5365058670143417, |
|
"grad_norm": 10.037505149841309, |
|
"learning_rate": 8.164870092760336e-06, |
|
"loss": 0.347, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 3.5446544980443284, |
|
"grad_norm": 11.143013000488281, |
|
"learning_rate": 8.153846981590191e-06, |
|
"loss": 0.3633, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.5528031290743156, |
|
"grad_norm": 9.558606147766113, |
|
"learning_rate": 8.142798353728786e-06, |
|
"loss": 0.373, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 3.5609517601043024, |
|
"grad_norm": 13.201570510864258, |
|
"learning_rate": 8.131724298566767e-06, |
|
"loss": 0.3611, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 3.5691003911342896, |
|
"grad_norm": 10.490971565246582, |
|
"learning_rate": 8.120624905700511e-06, |
|
"loss": 0.3292, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 3.5772490221642763, |
|
"grad_norm": 3.778831958770752, |
|
"learning_rate": 8.109500264931387e-06, |
|
"loss": 0.3731, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 3.5853976531942635, |
|
"grad_norm": 10.723892211914062, |
|
"learning_rate": 8.098350466265034e-06, |
|
"loss": 0.3783, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.5935462842242503, |
|
"grad_norm": 9.849285125732422, |
|
"learning_rate": 8.087175599910642e-06, |
|
"loss": 0.337, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 3.601694915254237, |
|
"grad_norm": 11.700067520141602, |
|
"learning_rate": 8.07597575628021e-06, |
|
"loss": 0.3639, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 3.609843546284224, |
|
"grad_norm": 37.506065368652344, |
|
"learning_rate": 8.064751025987822e-06, |
|
"loss": 0.3644, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 3.6179921773142114, |
|
"grad_norm": 9.770977973937988, |
|
"learning_rate": 8.053501499848907e-06, |
|
"loss": 0.3838, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 3.626140808344198, |
|
"grad_norm": 14.631871223449707, |
|
"learning_rate": 8.042227268879516e-06, |
|
"loss": 0.3732, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.634289439374185, |
|
"grad_norm": 7.656193256378174, |
|
"learning_rate": 8.030928424295572e-06, |
|
"loss": 0.358, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 3.642438070404172, |
|
"grad_norm": 9.974722862243652, |
|
"learning_rate": 8.019605057512144e-06, |
|
"loss": 0.3588, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 3.6505867014341593, |
|
"grad_norm": 12.311222076416016, |
|
"learning_rate": 8.008257260142693e-06, |
|
"loss": 0.362, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 3.658735332464146, |
|
"grad_norm": 11.374334335327148, |
|
"learning_rate": 7.99688512399835e-06, |
|
"loss": 0.385, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 3.666883963494133, |
|
"grad_norm": 7.951153755187988, |
|
"learning_rate": 7.985488741087153e-06, |
|
"loss": 0.352, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.67503259452412, |
|
"grad_norm": 5.6287384033203125, |
|
"learning_rate": 7.97406820361332e-06, |
|
"loss": 0.3763, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 3.6831812255541068, |
|
"grad_norm": 9.33438777923584, |
|
"learning_rate": 7.962623603976491e-06, |
|
"loss": 0.3852, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 3.691329856584094, |
|
"grad_norm": 12.365875244140625, |
|
"learning_rate": 7.951155034770983e-06, |
|
"loss": 0.3775, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 3.6994784876140807, |
|
"grad_norm": 9.91942024230957, |
|
"learning_rate": 7.93966258878505e-06, |
|
"loss": 0.3678, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 3.707627118644068, |
|
"grad_norm": 9.160215377807617, |
|
"learning_rate": 7.928146359000117e-06, |
|
"loss": 0.36, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.7157757496740547, |
|
"grad_norm": 11.565260887145996, |
|
"learning_rate": 7.91660643859004e-06, |
|
"loss": 0.3531, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 3.723924380704042, |
|
"grad_norm": 4.027003765106201, |
|
"learning_rate": 7.905042920920344e-06, |
|
"loss": 0.3722, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 3.7320730117340286, |
|
"grad_norm": 13.809627532958984, |
|
"learning_rate": 7.893455899547476e-06, |
|
"loss": 0.3524, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 3.740221642764016, |
|
"grad_norm": 13.452054023742676, |
|
"learning_rate": 7.881845468218039e-06, |
|
"loss": 0.375, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 3.7483702737940026, |
|
"grad_norm": 9.63260269165039, |
|
"learning_rate": 7.87021172086804e-06, |
|
"loss": 0.3636, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.7565189048239898, |
|
"grad_norm": 8.539379119873047, |
|
"learning_rate": 7.85855475162213e-06, |
|
"loss": 0.3687, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 3.7646675358539765, |
|
"grad_norm": 7.635307788848877, |
|
"learning_rate": 7.846874654792835e-06, |
|
"loss": 0.3709, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 3.7728161668839633, |
|
"grad_norm": 8.707938194274902, |
|
"learning_rate": 7.835171524879805e-06, |
|
"loss": 0.3466, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 3.7809647979139505, |
|
"grad_norm": 6.248547077178955, |
|
"learning_rate": 7.823445456569036e-06, |
|
"loss": 0.3706, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 3.7891134289439377, |
|
"grad_norm": 11.434155464172363, |
|
"learning_rate": 7.811696544732115e-06, |
|
"loss": 0.3907, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.7972620599739244, |
|
"grad_norm": 5.250894546508789, |
|
"learning_rate": 7.799924884425447e-06, |
|
"loss": 0.377, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 3.805410691003911, |
|
"grad_norm": 6.875328063964844, |
|
"learning_rate": 7.788130570889488e-06, |
|
"loss": 0.3569, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 3.8135593220338984, |
|
"grad_norm": 8.773159980773926, |
|
"learning_rate": 7.776313699547971e-06, |
|
"loss": 0.3635, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 3.821707953063885, |
|
"grad_norm": 4.8134002685546875, |
|
"learning_rate": 7.764474366007138e-06, |
|
"loss": 0.345, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 3.8298565840938723, |
|
"grad_norm": 6.085391998291016, |
|
"learning_rate": 7.752612666054963e-06, |
|
"loss": 0.3699, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.838005215123859, |
|
"grad_norm": 8.958887100219727, |
|
"learning_rate": 7.740728695660389e-06, |
|
"loss": 0.3407, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 6.2288994789123535, |
|
"learning_rate": 7.728822550972523e-06, |
|
"loss": 0.3633, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 3.854302477183833, |
|
"grad_norm": 9.540541648864746, |
|
"learning_rate": 7.716894328319893e-06, |
|
"loss": 0.3476, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 3.86245110821382, |
|
"grad_norm": 5.929731369018555, |
|
"learning_rate": 7.704944124209645e-06, |
|
"loss": 0.3929, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 3.870599739243807, |
|
"grad_norm": 7.797017574310303, |
|
"learning_rate": 7.692972035326772e-06, |
|
"loss": 0.3728, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.878748370273794, |
|
"grad_norm": 14.781734466552734, |
|
"learning_rate": 7.680978158533324e-06, |
|
"loss": 0.3546, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 3.886897001303781, |
|
"grad_norm": 10.41878890991211, |
|
"learning_rate": 7.668962590867636e-06, |
|
"loss": 0.3603, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 3.895045632333768, |
|
"grad_norm": 8.300308227539062, |
|
"learning_rate": 7.656925429543531e-06, |
|
"loss": 0.3546, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 3.903194263363755, |
|
"grad_norm": 9.709467887878418, |
|
"learning_rate": 7.644866771949544e-06, |
|
"loss": 0.3575, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 3.9113428943937416, |
|
"grad_norm": 7.606164455413818, |
|
"learning_rate": 7.632786715648128e-06, |
|
"loss": 0.3658, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.919491525423729, |
|
"grad_norm": 11.461851119995117, |
|
"learning_rate": 7.62068535837486e-06, |
|
"loss": 0.3653, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 3.927640156453716, |
|
"grad_norm": 11.35883617401123, |
|
"learning_rate": 7.608562798037662e-06, |
|
"loss": 0.3672, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 3.9357887874837028, |
|
"grad_norm": 9.994701385498047, |
|
"learning_rate": 7.596419132715997e-06, |
|
"loss": 0.3601, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 3.9439374185136895, |
|
"grad_norm": 12.242551803588867, |
|
"learning_rate": 7.584254460660092e-06, |
|
"loss": 0.3552, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 3.9520860495436767, |
|
"grad_norm": 11.628976821899414, |
|
"learning_rate": 7.572068880290118e-06, |
|
"loss": 0.3644, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.960234680573664, |
|
"grad_norm": 9.713350296020508, |
|
"learning_rate": 7.559862490195418e-06, |
|
"loss": 0.3463, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 3.9683833116036507, |
|
"grad_norm": 5.648345470428467, |
|
"learning_rate": 7.547635389133694e-06, |
|
"loss": 0.3483, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 3.9765319426336374, |
|
"grad_norm": 15.131999015808105, |
|
"learning_rate": 7.535387676030222e-06, |
|
"loss": 0.366, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 3.9846805736636246, |
|
"grad_norm": 8.72270393371582, |
|
"learning_rate": 7.523119449977028e-06, |
|
"loss": 0.3567, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 3.9928292046936114, |
|
"grad_norm": 10.733074188232422, |
|
"learning_rate": 7.510830810232112e-06, |
|
"loss": 0.37, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8188755020080322, |
|
"eval_loss": 0.529120922088623, |
|
"eval_runtime": 6.8942, |
|
"eval_samples_per_second": 361.175, |
|
"eval_steps_per_second": 45.256, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 4.0009778357235986, |
|
"grad_norm": 7.13838529586792, |
|
"learning_rate": 7.498521856218637e-06, |
|
"loss": 0.355, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 4.009126466753585, |
|
"grad_norm": 5.439541816711426, |
|
"learning_rate": 7.486192687524112e-06, |
|
"loss": 0.3005, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 4.017275097783572, |
|
"grad_norm": 17.687950134277344, |
|
"learning_rate": 7.4738434038996e-06, |
|
"loss": 0.2864, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 4.02542372881356, |
|
"grad_norm": 11.162871360778809, |
|
"learning_rate": 7.461474105258911e-06, |
|
"loss": 0.3025, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 4.0335723598435465, |
|
"grad_norm": 9.104811668395996, |
|
"learning_rate": 7.449084891677785e-06, |
|
"loss": 0.2846, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.041720990873533, |
|
"grad_norm": 11.716981887817383, |
|
"learning_rate": 7.436675863393086e-06, |
|
"loss": 0.2984, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 4.04986962190352, |
|
"grad_norm": 6.521731376647949, |
|
"learning_rate": 7.424247120801997e-06, |
|
"loss": 0.2979, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 4.058018252933508, |
|
"grad_norm": 6.5696539878845215, |
|
"learning_rate": 7.4117987644611985e-06, |
|
"loss": 0.2898, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 4.066166883963494, |
|
"grad_norm": 9.98416805267334, |
|
"learning_rate": 7.399330895086061e-06, |
|
"loss": 0.3115, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 4.074315514993481, |
|
"grad_norm": 6.788928985595703, |
|
"learning_rate": 7.386843613549828e-06, |
|
"loss": 0.3158, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.082464146023468, |
|
"grad_norm": 9.002969741821289, |
|
"learning_rate": 7.374337020882798e-06, |
|
"loss": 0.2964, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 4.090612777053455, |
|
"grad_norm": 8.216889381408691, |
|
"learning_rate": 7.3618112182715115e-06, |
|
"loss": 0.3194, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 4.098761408083442, |
|
"grad_norm": 17.576051712036133, |
|
"learning_rate": 7.349266307057932e-06, |
|
"loss": 0.3093, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 4.106910039113429, |
|
"grad_norm": 14.113720893859863, |
|
"learning_rate": 7.336702388738619e-06, |
|
"loss": 0.2656, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 4.115058670143416, |
|
"grad_norm": 13.906309127807617, |
|
"learning_rate": 7.324119564963915e-06, |
|
"loss": 0.2977, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.1232073011734025, |
|
"grad_norm": 9.152776718139648, |
|
"learning_rate": 7.311517937537122e-06, |
|
"loss": 0.3067, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 4.13135593220339, |
|
"grad_norm": 10.242730140686035, |
|
"learning_rate": 7.29889760841367e-06, |
|
"loss": 0.301, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 4.139504563233377, |
|
"grad_norm": 11.567678451538086, |
|
"learning_rate": 7.2862586797003046e-06, |
|
"loss": 0.2997, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 4.147653194263364, |
|
"grad_norm": 6.842143535614014, |
|
"learning_rate": 7.27360125365425e-06, |
|
"loss": 0.3004, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 4.15580182529335, |
|
"grad_norm": 12.490499496459961, |
|
"learning_rate": 7.260925432682386e-06, |
|
"loss": 0.2959, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.163950456323338, |
|
"grad_norm": 7.078547477722168, |
|
"learning_rate": 7.248231319340422e-06, |
|
"loss": 0.2966, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 4.172099087353325, |
|
"grad_norm": 17.07299041748047, |
|
"learning_rate": 7.235519016332064e-06, |
|
"loss": 0.3241, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 4.1802477183833116, |
|
"grad_norm": 14.579496383666992, |
|
"learning_rate": 7.222788626508184e-06, |
|
"loss": 0.294, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 4.188396349413298, |
|
"grad_norm": 16.198028564453125, |
|
"learning_rate": 7.210040252865984e-06, |
|
"loss": 0.3049, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 4.196544980443286, |
|
"grad_norm": 12.001542091369629, |
|
"learning_rate": 7.197273998548174e-06, |
|
"loss": 0.2932, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.204693611473273, |
|
"grad_norm": 8.593428611755371, |
|
"learning_rate": 7.184489966842128e-06, |
|
"loss": 0.3147, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 4.2128422425032594, |
|
"grad_norm": 19.119985580444336, |
|
"learning_rate": 7.1716882611790475e-06, |
|
"loss": 0.2929, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 4.220990873533246, |
|
"grad_norm": 12.756973266601562, |
|
"learning_rate": 7.1588689851331305e-06, |
|
"loss": 0.2973, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 4.229139504563233, |
|
"grad_norm": 11.550286293029785, |
|
"learning_rate": 7.146032242420732e-06, |
|
"loss": 0.2996, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 4.237288135593221, |
|
"grad_norm": 8.533171653747559, |
|
"learning_rate": 7.133178136899522e-06, |
|
"loss": 0.3094, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.245436766623207, |
|
"grad_norm": 11.978692054748535, |
|
"learning_rate": 7.120306772567647e-06, |
|
"loss": 0.3013, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 4.253585397653194, |
|
"grad_norm": 10.963492393493652, |
|
"learning_rate": 7.107418253562889e-06, |
|
"loss": 0.3081, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 4.261734028683181, |
|
"grad_norm": 11.645411491394043, |
|
"learning_rate": 7.0945126841618225e-06, |
|
"loss": 0.2867, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 4.2698826597131685, |
|
"grad_norm": 11.48385238647461, |
|
"learning_rate": 7.081590168778973e-06, |
|
"loss": 0.3088, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 4.278031290743155, |
|
"grad_norm": 10.083149909973145, |
|
"learning_rate": 7.068650811965967e-06, |
|
"loss": 0.2954, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.286179921773142, |
|
"grad_norm": 10.841811180114746, |
|
"learning_rate": 7.055694718410688e-06, |
|
"loss": 0.2944, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 4.294328552803129, |
|
"grad_norm": 12.332331657409668, |
|
"learning_rate": 7.042721992936438e-06, |
|
"loss": 0.2857, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 4.302477183833116, |
|
"grad_norm": 13.689620971679688, |
|
"learning_rate": 7.029732740501073e-06, |
|
"loss": 0.3024, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 4.310625814863103, |
|
"grad_norm": 13.064624786376953, |
|
"learning_rate": 7.016727066196168e-06, |
|
"loss": 0.2917, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 4.31877444589309, |
|
"grad_norm": 8.214381217956543, |
|
"learning_rate": 7.003705075246163e-06, |
|
"loss": 0.3173, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.326923076923077, |
|
"grad_norm": 14.797425270080566, |
|
"learning_rate": 6.990666873007506e-06, |
|
"loss": 0.2734, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 4.335071707953064, |
|
"grad_norm": 10.985969543457031, |
|
"learning_rate": 6.977612564967808e-06, |
|
"loss": 0.2958, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 4.343220338983051, |
|
"grad_norm": 12.808884620666504, |
|
"learning_rate": 6.964542256744986e-06, |
|
"loss": 0.3169, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 4.351368970013038, |
|
"grad_norm": 21.643781661987305, |
|
"learning_rate": 6.9514560540864095e-06, |
|
"loss": 0.3154, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 4.3595176010430245, |
|
"grad_norm": 7.609200477600098, |
|
"learning_rate": 6.938354062868041e-06, |
|
"loss": 0.2985, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.367666232073011, |
|
"grad_norm": 13.469466209411621, |
|
"learning_rate": 6.925236389093588e-06, |
|
"loss": 0.3063, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 4.375814863102999, |
|
"grad_norm": 12.873883247375488, |
|
"learning_rate": 6.912103138893636e-06, |
|
"loss": 0.2903, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 4.383963494132986, |
|
"grad_norm": 8.953607559204102, |
|
"learning_rate": 6.898954418524797e-06, |
|
"loss": 0.2897, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 4.3921121251629724, |
|
"grad_norm": 21.484949111938477, |
|
"learning_rate": 6.885790334368844e-06, |
|
"loss": 0.2989, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 4.400260756192959, |
|
"grad_norm": 8.624776840209961, |
|
"learning_rate": 6.872610992931857e-06, |
|
"loss": 0.2811, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.408409387222947, |
|
"grad_norm": 13.120560646057129, |
|
"learning_rate": 6.859416500843351e-06, |
|
"loss": 0.3003, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 4.416558018252934, |
|
"grad_norm": 8.616204261779785, |
|
"learning_rate": 6.846206964855426e-06, |
|
"loss": 0.3191, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 4.42470664928292, |
|
"grad_norm": 7.0158233642578125, |
|
"learning_rate": 6.832982491841894e-06, |
|
"loss": 0.31, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 4.432855280312907, |
|
"grad_norm": 9.716617584228516, |
|
"learning_rate": 6.819743188797419e-06, |
|
"loss": 0.2949, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 4.441003911342895, |
|
"grad_norm": 10.602276802062988, |
|
"learning_rate": 6.806489162836649e-06, |
|
"loss": 0.3037, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.4491525423728815, |
|
"grad_norm": 8.699592590332031, |
|
"learning_rate": 6.793220521193347e-06, |
|
"loss": 0.313, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 4.457301173402868, |
|
"grad_norm": 8.307058334350586, |
|
"learning_rate": 6.779937371219532e-06, |
|
"loss": 0.2924, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 4.465449804432855, |
|
"grad_norm": 10.045998573303223, |
|
"learning_rate": 6.766639820384602e-06, |
|
"loss": 0.3124, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 4.473598435462843, |
|
"grad_norm": 15.478697776794434, |
|
"learning_rate": 6.753327976274467e-06, |
|
"loss": 0.2892, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 4.481747066492829, |
|
"grad_norm": 12.46609878540039, |
|
"learning_rate": 6.740001946590675e-06, |
|
"loss": 0.2809, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.489895697522816, |
|
"grad_norm": 11.292198181152344, |
|
"learning_rate": 6.726661839149556e-06, |
|
"loss": 0.2915, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 4.498044328552803, |
|
"grad_norm": 15.23190689086914, |
|
"learning_rate": 6.71330776188133e-06, |
|
"loss": 0.306, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 4.5061929595827905, |
|
"grad_norm": 11.232503890991211, |
|
"learning_rate": 6.69993982282924e-06, |
|
"loss": 0.2979, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 4.514341590612777, |
|
"grad_norm": 11.436495780944824, |
|
"learning_rate": 6.686558130148687e-06, |
|
"loss": 0.2976, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 4.522490221642764, |
|
"grad_norm": 11.90659236907959, |
|
"learning_rate": 6.673162792106341e-06, |
|
"loss": 0.3106, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.530638852672751, |
|
"grad_norm": 9.979248046875, |
|
"learning_rate": 6.6597539170792795e-06, |
|
"loss": 0.2948, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 4.5387874837027375, |
|
"grad_norm": 19.104442596435547, |
|
"learning_rate": 6.646331613554094e-06, |
|
"loss": 0.3248, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 4.546936114732725, |
|
"grad_norm": 9.139418601989746, |
|
"learning_rate": 6.632895990126028e-06, |
|
"loss": 0.2996, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 4.555084745762712, |
|
"grad_norm": 9.373650550842285, |
|
"learning_rate": 6.619447155498091e-06, |
|
"loss": 0.3127, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 4.563233376792699, |
|
"grad_norm": 12.213810920715332, |
|
"learning_rate": 6.605985218480179e-06, |
|
"loss": 0.3113, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.5713820078226854, |
|
"grad_norm": 9.15962028503418, |
|
"learning_rate": 6.5925102879881915e-06, |
|
"loss": 0.311, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 4.579530638852673, |
|
"grad_norm": 11.712223052978516, |
|
"learning_rate": 6.579022473043159e-06, |
|
"loss": 0.3074, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 4.58767926988266, |
|
"grad_norm": 9.559146881103516, |
|
"learning_rate": 6.565521882770355e-06, |
|
"loss": 0.3065, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 4.595827900912647, |
|
"grad_norm": 8.07590389251709, |
|
"learning_rate": 6.552008626398409e-06, |
|
"loss": 0.3195, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 4.603976531942633, |
|
"grad_norm": 13.063721656799316, |
|
"learning_rate": 6.5384828132584335e-06, |
|
"loss": 0.2778, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.612125162972621, |
|
"grad_norm": 13.26430892944336, |
|
"learning_rate": 6.524944552783129e-06, |
|
"loss": 0.3081, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 4.620273794002608, |
|
"grad_norm": 14.221997261047363, |
|
"learning_rate": 6.511393954505906e-06, |
|
"loss": 0.3072, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 4.6284224250325945, |
|
"grad_norm": 10.34438705444336, |
|
"learning_rate": 6.497831128059993e-06, |
|
"loss": 0.3078, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 4.636571056062581, |
|
"grad_norm": 15.65034294128418, |
|
"learning_rate": 6.4842561831775575e-06, |
|
"loss": 0.3035, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 4.644719687092568, |
|
"grad_norm": 10.238895416259766, |
|
"learning_rate": 6.470669229688809e-06, |
|
"loss": 0.2962, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.652868318122556, |
|
"grad_norm": 16.671092987060547, |
|
"learning_rate": 6.457070377521111e-06, |
|
"loss": 0.307, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 4.661016949152542, |
|
"grad_norm": 11.118473052978516, |
|
"learning_rate": 6.443459736698106e-06, |
|
"loss": 0.3079, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 4.669165580182529, |
|
"grad_norm": 7.511115550994873, |
|
"learning_rate": 6.429837417338804e-06, |
|
"loss": 0.2959, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 4.677314211212517, |
|
"grad_norm": 14.2573881149292, |
|
"learning_rate": 6.416203529656707e-06, |
|
"loss": 0.2948, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 4.6854628422425035, |
|
"grad_norm": 11.03162956237793, |
|
"learning_rate": 6.40255818395891e-06, |
|
"loss": 0.3095, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.69361147327249, |
|
"grad_norm": 11.995973587036133, |
|
"learning_rate": 6.388901490645214e-06, |
|
"loss": 0.3099, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 4.701760104302477, |
|
"grad_norm": 9.43193244934082, |
|
"learning_rate": 6.375233560207229e-06, |
|
"loss": 0.3276, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 4.709908735332464, |
|
"grad_norm": 10.617565155029297, |
|
"learning_rate": 6.361554503227475e-06, |
|
"loss": 0.3149, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 4.718057366362451, |
|
"grad_norm": 16.004545211791992, |
|
"learning_rate": 6.347864430378501e-06, |
|
"loss": 0.2907, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 4.726205997392438, |
|
"grad_norm": 18.075027465820312, |
|
"learning_rate": 6.334163452421978e-06, |
|
"loss": 0.3168, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.734354628422425, |
|
"grad_norm": 19.736661911010742, |
|
"learning_rate": 6.320451680207805e-06, |
|
"loss": 0.3077, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 4.742503259452412, |
|
"grad_norm": 6.202484607696533, |
|
"learning_rate": 6.306729224673217e-06, |
|
"loss": 0.3022, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 4.750651890482399, |
|
"grad_norm": 4.973538398742676, |
|
"learning_rate": 6.29299619684188e-06, |
|
"loss": 0.3032, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 4.758800521512386, |
|
"grad_norm": 9.67834186553955, |
|
"learning_rate": 6.2792527078230024e-06, |
|
"loss": 0.2937, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 4.766949152542373, |
|
"grad_norm": 7.5604777336120605, |
|
"learning_rate": 6.265498868810424e-06, |
|
"loss": 0.3132, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.77509778357236, |
|
"grad_norm": 11.391521453857422, |
|
"learning_rate": 6.251734791081728e-06, |
|
"loss": 0.3249, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 4.783246414602347, |
|
"grad_norm": 16.40961265563965, |
|
"learning_rate": 6.237960585997334e-06, |
|
"loss": 0.2951, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 4.791395045632334, |
|
"grad_norm": 4.114518165588379, |
|
"learning_rate": 6.224176364999595e-06, |
|
"loss": 0.3091, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 4.799543676662321, |
|
"grad_norm": 9.569024085998535, |
|
"learning_rate": 6.210382239611906e-06, |
|
"loss": 0.3093, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 4.8076923076923075, |
|
"grad_norm": 30.753637313842773, |
|
"learning_rate": 6.1965783214377895e-06, |
|
"loss": 0.2982, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.815840938722294, |
|
"grad_norm": 7.500620365142822, |
|
"learning_rate": 6.18276472216e-06, |
|
"loss": 0.2956, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 4.823989569752282, |
|
"grad_norm": 14.710212707519531, |
|
"learning_rate": 6.16894155353962e-06, |
|
"loss": 0.3078, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 4.832138200782269, |
|
"grad_norm": 7.550549507141113, |
|
"learning_rate": 6.1551089274151525e-06, |
|
"loss": 0.3093, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 4.840286831812255, |
|
"grad_norm": 8.313648223876953, |
|
"learning_rate": 6.141266955701616e-06, |
|
"loss": 0.2872, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 4.848435462842242, |
|
"grad_norm": 3.505223274230957, |
|
"learning_rate": 6.127415750389645e-06, |
|
"loss": 0.2991, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.85658409387223, |
|
"grad_norm": 10.405817985534668, |
|
"learning_rate": 6.113555423544576e-06, |
|
"loss": 0.3083, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 4.8647327249022165, |
|
"grad_norm": 9.818922996520996, |
|
"learning_rate": 6.0996860873055505e-06, |
|
"loss": 0.3131, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 4.872881355932203, |
|
"grad_norm": 8.345934867858887, |
|
"learning_rate": 6.085807853884595e-06, |
|
"loss": 0.2963, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 4.88102998696219, |
|
"grad_norm": 10.804642677307129, |
|
"learning_rate": 6.071920835565724e-06, |
|
"loss": 0.315, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 4.889178617992178, |
|
"grad_norm": 10.550320625305176, |
|
"learning_rate": 6.058025144704026e-06, |
|
"loss": 0.288, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.897327249022164, |
|
"grad_norm": 7.386425018310547, |
|
"learning_rate": 6.044120893724758e-06, |
|
"loss": 0.3175, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 4.905475880052151, |
|
"grad_norm": 16.652528762817383, |
|
"learning_rate": 6.030208195122433e-06, |
|
"loss": 0.3218, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 4.913624511082138, |
|
"grad_norm": 15.053431510925293, |
|
"learning_rate": 6.016287161459907e-06, |
|
"loss": 0.2769, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 4.921773142112125, |
|
"grad_norm": 7.756086349487305, |
|
"learning_rate": 6.002357905367481e-06, |
|
"loss": 0.289, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 4.929921773142112, |
|
"grad_norm": 10.426520347595215, |
|
"learning_rate": 5.9884205395419725e-06, |
|
"loss": 0.3169, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.938070404172099, |
|
"grad_norm": 12.334880828857422, |
|
"learning_rate": 5.974475176745813e-06, |
|
"loss": 0.3093, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 4.946219035202086, |
|
"grad_norm": 14.239689826965332, |
|
"learning_rate": 5.960521929806141e-06, |
|
"loss": 0.3036, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 4.9543676662320735, |
|
"grad_norm": 12.593892097473145, |
|
"learning_rate": 5.946560911613877e-06, |
|
"loss": 0.2911, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 4.96251629726206, |
|
"grad_norm": 4.950251579284668, |
|
"learning_rate": 5.9325922351228186e-06, |
|
"loss": 0.2942, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 4.970664928292047, |
|
"grad_norm": 10.60743522644043, |
|
"learning_rate": 5.918616013348719e-06, |
|
"loss": 0.302, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.978813559322034, |
|
"grad_norm": 18.459735870361328, |
|
"learning_rate": 5.904632359368388e-06, |
|
"loss": 0.2806, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 4.9869621903520205, |
|
"grad_norm": 10.454113006591797, |
|
"learning_rate": 5.890641386318756e-06, |
|
"loss": 0.3009, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 4.995110821382008, |
|
"grad_norm": 12.8052396774292, |
|
"learning_rate": 5.876643207395976e-06, |
|
"loss": 0.3122, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8285140562248996, |
|
"eval_loss": 0.5347269773483276, |
|
"eval_runtime": 6.815, |
|
"eval_samples_per_second": 365.37, |
|
"eval_steps_per_second": 45.781, |
|
"step": 61360 |
|
}, |
|
{ |
|
"epoch": 5.003259452411995, |
|
"grad_norm": 3.523259162902832, |
|
"learning_rate": 5.862637935854502e-06, |
|
"loss": 0.289, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 5.011408083441982, |
|
"grad_norm": 14.498679161071777, |
|
"learning_rate": 5.848625685006164e-06, |
|
"loss": 0.2673, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.019556714471968, |
|
"grad_norm": 15.165558815002441, |
|
"learning_rate": 5.834606568219269e-06, |
|
"loss": 0.2499, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 5.027705345501956, |
|
"grad_norm": 12.705721855163574, |
|
"learning_rate": 5.820580698917666e-06, |
|
"loss": 0.2486, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 5.035853976531943, |
|
"grad_norm": 15.987256050109863, |
|
"learning_rate": 5.806548190579842e-06, |
|
"loss": 0.2417, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 5.0440026075619295, |
|
"grad_norm": 8.831116676330566, |
|
"learning_rate": 5.792509156737997e-06, |
|
"loss": 0.2265, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 5.052151238591916, |
|
"grad_norm": 12.182964324951172, |
|
"learning_rate": 5.7784637109771225e-06, |
|
"loss": 0.2538, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.060299869621904, |
|
"grad_norm": 10.809981346130371, |
|
"learning_rate": 5.764411966934092e-06, |
|
"loss": 0.2603, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 5.068448500651891, |
|
"grad_norm": 5.705296039581299, |
|
"learning_rate": 5.750354038296733e-06, |
|
"loss": 0.2438, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 5.076597131681877, |
|
"grad_norm": 9.95255184173584, |
|
"learning_rate": 5.736290038802911e-06, |
|
"loss": 0.234, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"grad_norm": 7.724064350128174, |
|
"learning_rate": 5.722220082239608e-06, |
|
"loss": 0.2488, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 5.092894393741851, |
|
"grad_norm": 10.82822036743164, |
|
"learning_rate": 5.708144282442006e-06, |
|
"loss": 0.2591, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.101043024771839, |
|
"grad_norm": 8.642077445983887, |
|
"learning_rate": 5.694062753292559e-06, |
|
"loss": 0.2581, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 5.109191655801825, |
|
"grad_norm": 10.630475044250488, |
|
"learning_rate": 5.679975608720078e-06, |
|
"loss": 0.2408, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 5.117340286831812, |
|
"grad_norm": 10.559286117553711, |
|
"learning_rate": 5.665882962698801e-06, |
|
"loss": 0.2417, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 5.125488917861799, |
|
"grad_norm": 6.505354881286621, |
|
"learning_rate": 5.651784929247486e-06, |
|
"loss": 0.2517, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 5.1336375488917865, |
|
"grad_norm": 10.710380554199219, |
|
"learning_rate": 5.637681622428468e-06, |
|
"loss": 0.235, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.141786179921773, |
|
"grad_norm": 4.721646785736084, |
|
"learning_rate": 5.6235731563467535e-06, |
|
"loss": 0.2577, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 5.14993481095176, |
|
"grad_norm": 8.588154792785645, |
|
"learning_rate": 5.609459645149089e-06, |
|
"loss": 0.2728, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 5.158083441981747, |
|
"grad_norm": 19.248777389526367, |
|
"learning_rate": 5.595341203023044e-06, |
|
"loss": 0.2371, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 5.166232073011734, |
|
"grad_norm": 8.45293140411377, |
|
"learning_rate": 5.581217944196071e-06, |
|
"loss": 0.264, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 5.174380704041721, |
|
"grad_norm": 8.333393096923828, |
|
"learning_rate": 5.567089982934605e-06, |
|
"loss": 0.2558, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.182529335071708, |
|
"grad_norm": 14.054290771484375, |
|
"learning_rate": 5.552957433543119e-06, |
|
"loss": 0.2524, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 5.190677966101695, |
|
"grad_norm": 12.668076515197754, |
|
"learning_rate": 5.538820410363214e-06, |
|
"loss": 0.2408, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 5.198826597131682, |
|
"grad_norm": 9.344785690307617, |
|
"learning_rate": 5.524679027772676e-06, |
|
"loss": 0.2538, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 5.206975228161669, |
|
"grad_norm": 9.552376747131348, |
|
"learning_rate": 5.510533400184572e-06, |
|
"loss": 0.2535, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 5.215123859191656, |
|
"grad_norm": 10.270748138427734, |
|
"learning_rate": 5.496383642046311e-06, |
|
"loss": 0.2672, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.2232724902216425, |
|
"grad_norm": 15.067427635192871, |
|
"learning_rate": 5.4822298678387174e-06, |
|
"loss": 0.2455, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 5.23142112125163, |
|
"grad_norm": 5.5667948722839355, |
|
"learning_rate": 5.468072192075111e-06, |
|
"loss": 0.2539, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 5.239569752281617, |
|
"grad_norm": 11.088788032531738, |
|
"learning_rate": 5.453910729300378e-06, |
|
"loss": 0.2523, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 5.247718383311604, |
|
"grad_norm": 24.676876068115234, |
|
"learning_rate": 5.439745594090042e-06, |
|
"loss": 0.2488, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 5.25586701434159, |
|
"grad_norm": 9.937374114990234, |
|
"learning_rate": 5.425576901049342e-06, |
|
"loss": 0.2575, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.264015645371577, |
|
"grad_norm": 13.66021728515625, |
|
"learning_rate": 5.411404764812299e-06, |
|
"loss": 0.2396, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 5.272164276401565, |
|
"grad_norm": 11.568852424621582, |
|
"learning_rate": 5.3972293000407945e-06, |
|
"loss": 0.2398, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 5.280312907431552, |
|
"grad_norm": 9.292428970336914, |
|
"learning_rate": 5.383050621423639e-06, |
|
"loss": 0.2696, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 5.288461538461538, |
|
"grad_norm": 21.01643180847168, |
|
"learning_rate": 5.368868843675642e-06, |
|
"loss": 0.2522, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 5.296610169491525, |
|
"grad_norm": 7.557727813720703, |
|
"learning_rate": 5.354684081536693e-06, |
|
"loss": 0.2709, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.304758800521513, |
|
"grad_norm": 7.703597545623779, |
|
"learning_rate": 5.340496449770824e-06, |
|
"loss": 0.2561, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 5.3129074315514995, |
|
"grad_norm": 11.133892059326172, |
|
"learning_rate": 5.3263060631652805e-06, |
|
"loss": 0.2595, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 5.321056062581486, |
|
"grad_norm": 15.144754409790039, |
|
"learning_rate": 5.312113036529604e-06, |
|
"loss": 0.2506, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 5.329204693611473, |
|
"grad_norm": 7.959693431854248, |
|
"learning_rate": 5.297917484694692e-06, |
|
"loss": 0.2644, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 5.337353324641461, |
|
"grad_norm": 15.450654029846191, |
|
"learning_rate": 5.28371952251187e-06, |
|
"loss": 0.2533, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.345501955671447, |
|
"grad_norm": 6.035745620727539, |
|
"learning_rate": 5.269519264851967e-06, |
|
"loss": 0.2507, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 5.353650586701434, |
|
"grad_norm": 8.266439437866211, |
|
"learning_rate": 5.255316826604385e-06, |
|
"loss": 0.2588, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 5.361799217731421, |
|
"grad_norm": 9.542835235595703, |
|
"learning_rate": 5.24111232267617e-06, |
|
"loss": 0.2584, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 5.369947848761408, |
|
"grad_norm": 11.433173179626465, |
|
"learning_rate": 5.2269058679910735e-06, |
|
"loss": 0.2451, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 5.378096479791395, |
|
"grad_norm": 12.72153377532959, |
|
"learning_rate": 5.212697577488635e-06, |
|
"loss": 0.2496, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.386245110821382, |
|
"grad_norm": 9.416111946105957, |
|
"learning_rate": 5.1984875661232495e-06, |
|
"loss": 0.2567, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 5.394393741851369, |
|
"grad_norm": 15.701902389526367, |
|
"learning_rate": 5.184275948863231e-06, |
|
"loss": 0.2432, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 5.4025423728813555, |
|
"grad_norm": 7.241784572601318, |
|
"learning_rate": 5.1700628406898835e-06, |
|
"loss": 0.2441, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 5.410691003911343, |
|
"grad_norm": 21.102312088012695, |
|
"learning_rate": 5.155848356596581e-06, |
|
"loss": 0.2695, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 5.41883963494133, |
|
"grad_norm": 12.834817886352539, |
|
"learning_rate": 5.1416326115878255e-06, |
|
"loss": 0.2705, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.426988265971317, |
|
"grad_norm": 29.203624725341797, |
|
"learning_rate": 5.127415720678319e-06, |
|
"loss": 0.2354, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 5.435136897001303, |
|
"grad_norm": 13.500927925109863, |
|
"learning_rate": 5.113197798892038e-06, |
|
"loss": 0.2508, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 5.443285528031291, |
|
"grad_norm": 7.524002552032471, |
|
"learning_rate": 5.098978961261296e-06, |
|
"loss": 0.2494, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 5.451434159061278, |
|
"grad_norm": 17.00074577331543, |
|
"learning_rate": 5.084759322825821e-06, |
|
"loss": 0.241, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 5.459582790091265, |
|
"grad_norm": 11.755769729614258, |
|
"learning_rate": 5.070538998631813e-06, |
|
"loss": 0.2658, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.467731421121251, |
|
"grad_norm": 13.64929485321045, |
|
"learning_rate": 5.056318103731028e-06, |
|
"loss": 0.2515, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 5.475880052151239, |
|
"grad_norm": 6.673364639282227, |
|
"learning_rate": 5.042096753179835e-06, |
|
"loss": 0.2505, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 5.484028683181226, |
|
"grad_norm": 6.764876365661621, |
|
"learning_rate": 5.02787506203829e-06, |
|
"loss": 0.2584, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 5.4921773142112125, |
|
"grad_norm": 11.133795738220215, |
|
"learning_rate": 5.013653145369204e-06, |
|
"loss": 0.2598, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 5.500325945241199, |
|
"grad_norm": 11.689901351928711, |
|
"learning_rate": 4.9994311182372145e-06, |
|
"loss": 0.2397, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.508474576271187, |
|
"grad_norm": 18.084266662597656, |
|
"learning_rate": 4.985209095707852e-06, |
|
"loss": 0.265, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 5.516623207301174, |
|
"grad_norm": 12.136353492736816, |
|
"learning_rate": 4.970987192846609e-06, |
|
"loss": 0.2372, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 5.52477183833116, |
|
"grad_norm": 14.060345649719238, |
|
"learning_rate": 4.95676552471801e-06, |
|
"loss": 0.2657, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 5.532920469361147, |
|
"grad_norm": 5.493065357208252, |
|
"learning_rate": 4.942544206384682e-06, |
|
"loss": 0.2377, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 5.541069100391134, |
|
"grad_norm": 13.543553352355957, |
|
"learning_rate": 4.928323352906421e-06, |
|
"loss": 0.2456, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.5492177314211215, |
|
"grad_norm": 12.011448860168457, |
|
"learning_rate": 4.9141030793392595e-06, |
|
"loss": 0.2695, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 5.557366362451108, |
|
"grad_norm": 7.862688064575195, |
|
"learning_rate": 4.899883500734542e-06, |
|
"loss": 0.2668, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 5.565514993481095, |
|
"grad_norm": 11.895374298095703, |
|
"learning_rate": 4.885664732137988e-06, |
|
"loss": 0.2581, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 5.573663624511082, |
|
"grad_norm": 19.049335479736328, |
|
"learning_rate": 4.871446888588762e-06, |
|
"loss": 0.2581, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 5.581812255541069, |
|
"grad_norm": 15.173524856567383, |
|
"learning_rate": 4.85723008511855e-06, |
|
"loss": 0.2374, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.589960886571056, |
|
"grad_norm": 15.82532024383545, |
|
"learning_rate": 4.84301443675062e-06, |
|
"loss": 0.2548, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 5.598109517601043, |
|
"grad_norm": 9.289793014526367, |
|
"learning_rate": 4.828800058498889e-06, |
|
"loss": 0.2585, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 5.60625814863103, |
|
"grad_norm": 13.010422706604004, |
|
"learning_rate": 4.814587065367009e-06, |
|
"loss": 0.264, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 5.614406779661017, |
|
"grad_norm": 10.556730270385742, |
|
"learning_rate": 4.800375572347414e-06, |
|
"loss": 0.2436, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 5.622555410691004, |
|
"grad_norm": 13.723767280578613, |
|
"learning_rate": 4.786165694420408e-06, |
|
"loss": 0.2477, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.630704041720991, |
|
"grad_norm": 11.722618103027344, |
|
"learning_rate": 4.771957546553226e-06, |
|
"loss": 0.2581, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 5.638852672750978, |
|
"grad_norm": 10.373120307922363, |
|
"learning_rate": 4.757751243699109e-06, |
|
"loss": 0.2606, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 5.647001303780964, |
|
"grad_norm": 15.857172966003418, |
|
"learning_rate": 4.743546900796364e-06, |
|
"loss": 0.2723, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 5.655149934810952, |
|
"grad_norm": 22.450532913208008, |
|
"learning_rate": 4.729344632767446e-06, |
|
"loss": 0.235, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 5.663298565840939, |
|
"grad_norm": 15.469109535217285, |
|
"learning_rate": 4.71514455451802e-06, |
|
"loss": 0.2455, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.6714471968709255, |
|
"grad_norm": 21.650880813598633, |
|
"learning_rate": 4.7009467809360375e-06, |
|
"loss": 0.2597, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 5.679595827900913, |
|
"grad_norm": 16.47661590576172, |
|
"learning_rate": 4.6867514268907995e-06, |
|
"loss": 0.2555, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 5.6877444589309, |
|
"grad_norm": 16.370121002197266, |
|
"learning_rate": 4.672558607232033e-06, |
|
"loss": 0.2411, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 5.695893089960887, |
|
"grad_norm": 10.867352485656738, |
|
"learning_rate": 4.658368436788963e-06, |
|
"loss": 0.2638, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 5.704041720990873, |
|
"grad_norm": 13.257880210876465, |
|
"learning_rate": 4.644181030369378e-06, |
|
"loss": 0.233, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.71219035202086, |
|
"grad_norm": 16.66828155517578, |
|
"learning_rate": 4.629996502758703e-06, |
|
"loss": 0.2549, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 5.720338983050848, |
|
"grad_norm": 8.0230712890625, |
|
"learning_rate": 4.615814968719071e-06, |
|
"loss": 0.251, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 5.7284876140808345, |
|
"grad_norm": 20.61688804626465, |
|
"learning_rate": 4.6016365429884e-06, |
|
"loss": 0.2617, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 5.736636245110821, |
|
"grad_norm": 4.916039943695068, |
|
"learning_rate": 4.587461340279457e-06, |
|
"loss": 0.2772, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 5.744784876140808, |
|
"grad_norm": 13.59726333618164, |
|
"learning_rate": 4.573289475278927e-06, |
|
"loss": 0.2654, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.752933507170796, |
|
"grad_norm": 21.178253173828125, |
|
"learning_rate": 4.559121062646499e-06, |
|
"loss": 0.237, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 5.761082138200782, |
|
"grad_norm": 15.958664894104004, |
|
"learning_rate": 4.544956217013927e-06, |
|
"loss": 0.2447, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 5.769230769230769, |
|
"grad_norm": 7.610626220703125, |
|
"learning_rate": 4.530795052984104e-06, |
|
"loss": 0.239, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 5.777379400260756, |
|
"grad_norm": 10.934889793395996, |
|
"learning_rate": 4.5166376851301385e-06, |
|
"loss": 0.2562, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 5.7855280312907436, |
|
"grad_norm": 7.9625244140625, |
|
"learning_rate": 4.502484227994426e-06, |
|
"loss": 0.2606, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.79367666232073, |
|
"grad_norm": 15.313315391540527, |
|
"learning_rate": 4.488334796087719e-06, |
|
"loss": 0.2454, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 5.801825293350717, |
|
"grad_norm": 16.183135986328125, |
|
"learning_rate": 4.474189503888207e-06, |
|
"loss": 0.2591, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 5.809973924380704, |
|
"grad_norm": 8.89918041229248, |
|
"learning_rate": 4.4600484658405815e-06, |
|
"loss": 0.2577, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 5.818122555410691, |
|
"grad_norm": 8.31811237335205, |
|
"learning_rate": 4.445911796355119e-06, |
|
"loss": 0.2382, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 5.826271186440678, |
|
"grad_norm": 9.141270637512207, |
|
"learning_rate": 4.431779609806751e-06, |
|
"loss": 0.2401, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.834419817470665, |
|
"grad_norm": 8.92165756225586, |
|
"learning_rate": 4.4176520205341365e-06, |
|
"loss": 0.2133, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 5.842568448500652, |
|
"grad_norm": 14.15666675567627, |
|
"learning_rate": 4.403529142838745e-06, |
|
"loss": 0.2536, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 5.8507170795306385, |
|
"grad_norm": 8.742586135864258, |
|
"learning_rate": 4.38941109098392e-06, |
|
"loss": 0.261, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 5.858865710560626, |
|
"grad_norm": 8.7103853225708, |
|
"learning_rate": 4.375297979193965e-06, |
|
"loss": 0.2331, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 5.867014341590613, |
|
"grad_norm": 13.822142601013184, |
|
"learning_rate": 4.361189921653215e-06, |
|
"loss": 0.2583, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.8751629726206, |
|
"grad_norm": 9.043753623962402, |
|
"learning_rate": 4.3470870325051084e-06, |
|
"loss": 0.2635, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 5.883311603650586, |
|
"grad_norm": 10.288004875183105, |
|
"learning_rate": 4.332989425851273e-06, |
|
"loss": 0.2644, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 5.891460234680574, |
|
"grad_norm": 18.826217651367188, |
|
"learning_rate": 4.318897215750593e-06, |
|
"loss": 0.2515, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 5.899608865710561, |
|
"grad_norm": 11.778913497924805, |
|
"learning_rate": 4.304810516218298e-06, |
|
"loss": 0.2628, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 5.9077574967405475, |
|
"grad_norm": 16.54121971130371, |
|
"learning_rate": 4.290729441225027e-06, |
|
"loss": 0.2792, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 5.915906127770534, |
|
"grad_norm": 12.631098747253418, |
|
"learning_rate": 4.276654104695915e-06, |
|
"loss": 0.2503, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 5.924054758800521, |
|
"grad_norm": 10.706419944763184, |
|
"learning_rate": 4.262584620509669e-06, |
|
"loss": 0.2564, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 5.932203389830509, |
|
"grad_norm": 8.69650650024414, |
|
"learning_rate": 4.248521102497649e-06, |
|
"loss": 0.2569, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 5.940352020860495, |
|
"grad_norm": 12.438202857971191, |
|
"learning_rate": 4.23446366444294e-06, |
|
"loss": 0.2531, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 5.948500651890482, |
|
"grad_norm": 22.02505874633789, |
|
"learning_rate": 4.220412420079438e-06, |
|
"loss": 0.2692, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.95664928292047, |
|
"grad_norm": 13.650114059448242, |
|
"learning_rate": 4.206367483090931e-06, |
|
"loss": 0.2663, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 5.9647979139504566, |
|
"grad_norm": 13.705251693725586, |
|
"learning_rate": 4.192328967110172e-06, |
|
"loss": 0.2295, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 5.972946544980443, |
|
"grad_norm": 7.683305263519287, |
|
"learning_rate": 4.178296985717967e-06, |
|
"loss": 0.2622, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 5.98109517601043, |
|
"grad_norm": 7.798497676849365, |
|
"learning_rate": 4.16427165244225e-06, |
|
"loss": 0.2431, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 5.989243807040417, |
|
"grad_norm": 8.129569053649902, |
|
"learning_rate": 4.150253080757172e-06, |
|
"loss": 0.2372, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.9973924380704045, |
|
"grad_norm": 14.516979217529297, |
|
"learning_rate": 4.136241384082174e-06, |
|
"loss": 0.2801, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8168674698795181, |
|
"eval_loss": 0.6053332686424255, |
|
"eval_runtime": 7.0202, |
|
"eval_samples_per_second": 354.691, |
|
"eval_steps_per_second": 44.443, |
|
"step": 73632 |
|
}, |
|
{ |
|
"epoch": 6.005541069100391, |
|
"grad_norm": 11.174201965332031, |
|
"learning_rate": 4.122236675781071e-06, |
|
"loss": 0.224, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 6.013689700130378, |
|
"grad_norm": 24.070091247558594, |
|
"learning_rate": 4.108239069161147e-06, |
|
"loss": 0.2289, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 6.021838331160365, |
|
"grad_norm": 14.804594993591309, |
|
"learning_rate": 4.09424867747222e-06, |
|
"loss": 0.2017, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 6.029986962190352, |
|
"grad_norm": 20.014951705932617, |
|
"learning_rate": 4.0802656139057385e-06, |
|
"loss": 0.2203, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.038135593220339, |
|
"grad_norm": 11.608116149902344, |
|
"learning_rate": 4.066289991593859e-06, |
|
"loss": 0.1983, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 6.046284224250326, |
|
"grad_norm": 10.88152027130127, |
|
"learning_rate": 4.052321923608539e-06, |
|
"loss": 0.2167, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 6.054432855280313, |
|
"grad_norm": 9.91988754272461, |
|
"learning_rate": 4.038361522960609e-06, |
|
"loss": 0.2114, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 6.0625814863103, |
|
"grad_norm": 10.7438383102417, |
|
"learning_rate": 4.024408902598871e-06, |
|
"loss": 0.2126, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 6.070730117340287, |
|
"grad_norm": 13.341911315917969, |
|
"learning_rate": 4.01046417540918e-06, |
|
"loss": 0.2099, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.078878748370274, |
|
"grad_norm": 14.30612564086914, |
|
"learning_rate": 3.996527454213522e-06, |
|
"loss": 0.2159, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 6.0870273794002605, |
|
"grad_norm": 14.352286338806152, |
|
"learning_rate": 3.98259885176912e-06, |
|
"loss": 0.2314, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 6.095176010430248, |
|
"grad_norm": 10.346816062927246, |
|
"learning_rate": 3.968678480767503e-06, |
|
"loss": 0.2111, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 6.103324641460235, |
|
"grad_norm": 16.672042846679688, |
|
"learning_rate": 3.954766453833608e-06, |
|
"loss": 0.199, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 6.111473272490222, |
|
"grad_norm": 14.719056129455566, |
|
"learning_rate": 3.94086288352486e-06, |
|
"loss": 0.1996, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.119621903520208, |
|
"grad_norm": 15.159549713134766, |
|
"learning_rate": 3.926967882330262e-06, |
|
"loss": 0.2246, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 6.127770534550195, |
|
"grad_norm": 8.278336524963379, |
|
"learning_rate": 3.913081562669492e-06, |
|
"loss": 0.229, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 6.135919165580183, |
|
"grad_norm": 17.559757232666016, |
|
"learning_rate": 3.899204036891989e-06, |
|
"loss": 0.2012, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 6.1440677966101696, |
|
"grad_norm": 11.502748489379883, |
|
"learning_rate": 3.885335417276037e-06, |
|
"loss": 0.202, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 6.152216427640156, |
|
"grad_norm": 10.84666633605957, |
|
"learning_rate": 3.871475816027868e-06, |
|
"loss": 0.2142, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.160365058670143, |
|
"grad_norm": 15.855389595031738, |
|
"learning_rate": 3.857625345280751e-06, |
|
"loss": 0.2287, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 6.168513689700131, |
|
"grad_norm": 12.554780960083008, |
|
"learning_rate": 3.843784117094081e-06, |
|
"loss": 0.1949, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 6.1766623207301175, |
|
"grad_norm": 7.536383628845215, |
|
"learning_rate": 3.829952243452475e-06, |
|
"loss": 0.2062, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 6.184810951760104, |
|
"grad_norm": 13.602145195007324, |
|
"learning_rate": 3.816129836264864e-06, |
|
"loss": 0.2211, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 6.192959582790091, |
|
"grad_norm": 10.88949966430664, |
|
"learning_rate": 3.802317007363593e-06, |
|
"loss": 0.2141, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.201108213820079, |
|
"grad_norm": 3.1079776287078857, |
|
"learning_rate": 3.7885138685035113e-06, |
|
"loss": 0.2121, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 6.209256844850065, |
|
"grad_norm": 10.546631813049316, |
|
"learning_rate": 3.774720531361063e-06, |
|
"loss": 0.2272, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 6.217405475880052, |
|
"grad_norm": 22.11454200744629, |
|
"learning_rate": 3.7609371075334e-06, |
|
"loss": 0.2118, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 6.225554106910039, |
|
"grad_norm": 16.33343505859375, |
|
"learning_rate": 3.7471637085374614e-06, |
|
"loss": 0.227, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 6.2337027379400265, |
|
"grad_norm": 14.43807315826416, |
|
"learning_rate": 3.7334004458090833e-06, |
|
"loss": 0.2287, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.241851368970013, |
|
"grad_norm": 14.813934326171875, |
|
"learning_rate": 3.719647430702089e-06, |
|
"loss": 0.2064, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 5.587681770324707, |
|
"learning_rate": 3.705904774487396e-06, |
|
"loss": 0.2051, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 6.258148631029987, |
|
"grad_norm": 7.330463409423828, |
|
"learning_rate": 3.6921725883521087e-06, |
|
"loss": 0.2225, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 6.2662972620599735, |
|
"grad_norm": 19.726444244384766, |
|
"learning_rate": 3.678450983398623e-06, |
|
"loss": 0.2131, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 6.274445893089961, |
|
"grad_norm": 15.526715278625488, |
|
"learning_rate": 3.664740070643723e-06, |
|
"loss": 0.2257, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.282594524119948, |
|
"grad_norm": 9.113424301147461, |
|
"learning_rate": 3.6510399610176906e-06, |
|
"loss": 0.2075, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 6.290743155149935, |
|
"grad_norm": 11.527823448181152, |
|
"learning_rate": 3.6373507653634e-06, |
|
"loss": 0.1921, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 6.298891786179921, |
|
"grad_norm": 5.839615345001221, |
|
"learning_rate": 3.6236725944354245e-06, |
|
"loss": 0.2426, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 6.307040417209909, |
|
"grad_norm": 16.31635284423828, |
|
"learning_rate": 3.6100055588991435e-06, |
|
"loss": 0.206, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 6.315189048239896, |
|
"grad_norm": 13.138345718383789, |
|
"learning_rate": 3.5963497693298386e-06, |
|
"loss": 0.2223, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.3233376792698825, |
|
"grad_norm": 3.202862024307251, |
|
"learning_rate": 3.5827053362118085e-06, |
|
"loss": 0.2095, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 6.331486310299869, |
|
"grad_norm": 11.949639320373535, |
|
"learning_rate": 3.5690723699374697e-06, |
|
"loss": 0.2176, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 6.339634941329857, |
|
"grad_norm": 17.555377960205078, |
|
"learning_rate": 3.5554509808064602e-06, |
|
"loss": 0.2204, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 6.347783572359844, |
|
"grad_norm": 6.945880889892578, |
|
"learning_rate": 3.5418412790247575e-06, |
|
"loss": 0.2006, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 6.3559322033898304, |
|
"grad_norm": 29.10856056213379, |
|
"learning_rate": 3.528243374703776e-06, |
|
"loss": 0.2089, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.364080834419817, |
|
"grad_norm": 21.48233413696289, |
|
"learning_rate": 3.5146573778594855e-06, |
|
"loss": 0.2091, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 6.372229465449805, |
|
"grad_norm": 10.77776050567627, |
|
"learning_rate": 3.5010833984115135e-06, |
|
"loss": 0.1919, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 6.380378096479792, |
|
"grad_norm": 22.155200958251953, |
|
"learning_rate": 3.4875215461822574e-06, |
|
"loss": 0.2269, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 6.388526727509778, |
|
"grad_norm": 12.029594421386719, |
|
"learning_rate": 3.473971930896001e-06, |
|
"loss": 0.2328, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 6.396675358539765, |
|
"grad_norm": 8.563623428344727, |
|
"learning_rate": 3.460434662178024e-06, |
|
"loss": 0.2202, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.404823989569753, |
|
"grad_norm": 6.394750118255615, |
|
"learning_rate": 3.4469098495537063e-06, |
|
"loss": 0.2324, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 6.4129726205997395, |
|
"grad_norm": 15.485038757324219, |
|
"learning_rate": 3.433397602447659e-06, |
|
"loss": 0.2152, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 6.421121251629726, |
|
"grad_norm": 15.386170387268066, |
|
"learning_rate": 3.4198980301828256e-06, |
|
"loss": 0.2065, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 6.429269882659713, |
|
"grad_norm": 11.893247604370117, |
|
"learning_rate": 3.406411241979603e-06, |
|
"loss": 0.2235, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 6.4374185136897, |
|
"grad_norm": 12.216060638427734, |
|
"learning_rate": 3.3929373469549554e-06, |
|
"loss": 0.211, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.445567144719687, |
|
"grad_norm": 9.018731117248535, |
|
"learning_rate": 3.379476454121533e-06, |
|
"loss": 0.2253, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 6.453715775749674, |
|
"grad_norm": 18.289003372192383, |
|
"learning_rate": 3.366028672386792e-06, |
|
"loss": 0.2265, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 6.461864406779661, |
|
"grad_norm": 6.403520584106445, |
|
"learning_rate": 3.35259411055211e-06, |
|
"loss": 0.2241, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 6.470013037809648, |
|
"grad_norm": 6.311509609222412, |
|
"learning_rate": 3.3391728773119037e-06, |
|
"loss": 0.2204, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 6.478161668839635, |
|
"grad_norm": 16.21648597717285, |
|
"learning_rate": 3.3257650812527566e-06, |
|
"loss": 0.2083, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.486310299869622, |
|
"grad_norm": 2.8797686100006104, |
|
"learning_rate": 3.3123708308525354e-06, |
|
"loss": 0.2152, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 6.494458930899609, |
|
"grad_norm": 12.40995979309082, |
|
"learning_rate": 3.298990234479514e-06, |
|
"loss": 0.2061, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 6.5026075619295955, |
|
"grad_norm": 13.1309814453125, |
|
"learning_rate": 3.2856234003914945e-06, |
|
"loss": 0.196, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 6.510756192959583, |
|
"grad_norm": 11.270479202270508, |
|
"learning_rate": 3.2722704367349357e-06, |
|
"loss": 0.1969, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 6.51890482398957, |
|
"grad_norm": 5.54075813293457, |
|
"learning_rate": 3.258931451544075e-06, |
|
"loss": 0.2345, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.527053455019557, |
|
"grad_norm": 9.90404987335205, |
|
"learning_rate": 3.245606552740053e-06, |
|
"loss": 0.2223, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 6.5352020860495434, |
|
"grad_norm": 16.18077850341797, |
|
"learning_rate": 3.2322958481300426e-06, |
|
"loss": 0.2163, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 6.54335071707953, |
|
"grad_norm": 6.288787841796875, |
|
"learning_rate": 3.2189994454063776e-06, |
|
"loss": 0.2093, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 6.551499348109518, |
|
"grad_norm": 21.265981674194336, |
|
"learning_rate": 3.205717452145679e-06, |
|
"loss": 0.1972, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 6.559647979139505, |
|
"grad_norm": 14.27213191986084, |
|
"learning_rate": 3.1924499758079863e-06, |
|
"loss": 0.2211, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.567796610169491, |
|
"grad_norm": 6.663931369781494, |
|
"learning_rate": 3.1791971237358893e-06, |
|
"loss": 0.2037, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 6.575945241199479, |
|
"grad_norm": 20.920997619628906, |
|
"learning_rate": 3.1659590031536546e-06, |
|
"loss": 0.2016, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 6.584093872229466, |
|
"grad_norm": 5.427749156951904, |
|
"learning_rate": 3.1527357211663647e-06, |
|
"loss": 0.2145, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 6.5922425032594525, |
|
"grad_norm": 5.944066524505615, |
|
"learning_rate": 3.1395273847590444e-06, |
|
"loss": 0.2243, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 6.600391134289439, |
|
"grad_norm": 4.4831366539001465, |
|
"learning_rate": 3.1263341007958015e-06, |
|
"loss": 0.2251, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.608539765319426, |
|
"grad_norm": 7.92203950881958, |
|
"learning_rate": 3.113155976018959e-06, |
|
"loss": 0.2202, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 6.616688396349414, |
|
"grad_norm": 7.755978584289551, |
|
"learning_rate": 3.0999931170481922e-06, |
|
"loss": 0.2099, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 6.6248370273794, |
|
"grad_norm": 12.123492240905762, |
|
"learning_rate": 3.086845630379668e-06, |
|
"loss": 0.2279, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 6.632985658409387, |
|
"grad_norm": 8.695425987243652, |
|
"learning_rate": 3.073713622385177e-06, |
|
"loss": 0.2171, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 6.641134289439374, |
|
"grad_norm": 12.858569145202637, |
|
"learning_rate": 3.0605971993112805e-06, |
|
"loss": 0.21, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.6492829204693615, |
|
"grad_norm": 20.741817474365234, |
|
"learning_rate": 3.0474964672784456e-06, |
|
"loss": 0.2101, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 6.657431551499348, |
|
"grad_norm": 12.694851875305176, |
|
"learning_rate": 3.034411532280193e-06, |
|
"loss": 0.2119, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 6.665580182529335, |
|
"grad_norm": 11.025914192199707, |
|
"learning_rate": 3.0213425001822266e-06, |
|
"loss": 0.1936, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 6.673728813559322, |
|
"grad_norm": 8.600627899169922, |
|
"learning_rate": 3.008289476721594e-06, |
|
"loss": 0.2239, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 6.681877444589309, |
|
"grad_norm": 5.949343681335449, |
|
"learning_rate": 2.9952525675058175e-06, |
|
"loss": 0.2103, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.690026075619296, |
|
"grad_norm": 9.281770706176758, |
|
"learning_rate": 2.9822318780120463e-06, |
|
"loss": 0.2252, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 6.698174706649283, |
|
"grad_norm": 8.222912788391113, |
|
"learning_rate": 2.9692275135862002e-06, |
|
"loss": 0.199, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 6.70632333767927, |
|
"grad_norm": 10.598749160766602, |
|
"learning_rate": 2.9562395794421193e-06, |
|
"loss": 0.2244, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 6.7144719687092564, |
|
"grad_norm": 11.608291625976562, |
|
"learning_rate": 2.9432681806607145e-06, |
|
"loss": 0.2176, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 6.722620599739244, |
|
"grad_norm": 9.24106216430664, |
|
"learning_rate": 2.9303134221891106e-06, |
|
"loss": 0.2222, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.730769230769231, |
|
"grad_norm": 2.6706371307373047, |
|
"learning_rate": 2.917375408839803e-06, |
|
"loss": 0.2159, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 6.738917861799218, |
|
"grad_norm": 11.834959030151367, |
|
"learning_rate": 2.904454245289805e-06, |
|
"loss": 0.216, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 6.747066492829204, |
|
"grad_norm": 3.9120168685913086, |
|
"learning_rate": 2.8915500360798117e-06, |
|
"loss": 0.2051, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 6.755215123859192, |
|
"grad_norm": 9.347685813903809, |
|
"learning_rate": 2.8786628856133404e-06, |
|
"loss": 0.238, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 6.763363754889179, |
|
"grad_norm": 7.142603874206543, |
|
"learning_rate": 2.8657928981558926e-06, |
|
"loss": 0.2076, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.7715123859191655, |
|
"grad_norm": 15.814796447753906, |
|
"learning_rate": 2.852940177834111e-06, |
|
"loss": 0.2018, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"grad_norm": 11.722209930419922, |
|
"learning_rate": 2.8401048286349353e-06, |
|
"loss": 0.2275, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 6.78780964797914, |
|
"grad_norm": 10.187668800354004, |
|
"learning_rate": 2.8272869544047622e-06, |
|
"loss": 0.2093, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 6.795958279009127, |
|
"grad_norm": 15.927581787109375, |
|
"learning_rate": 2.814486658848603e-06, |
|
"loss": 0.2065, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 6.804106910039113, |
|
"grad_norm": 12.883095741271973, |
|
"learning_rate": 2.8017040455292465e-06, |
|
"loss": 0.2108, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.8122555410691, |
|
"grad_norm": 7.530974864959717, |
|
"learning_rate": 2.788939217866422e-06, |
|
"loss": 0.2139, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 6.820404172099087, |
|
"grad_norm": 20.07868766784668, |
|
"learning_rate": 2.7761922791359596e-06, |
|
"loss": 0.2205, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 6.8285528031290745, |
|
"grad_norm": 7.615067481994629, |
|
"learning_rate": 2.7634633324689563e-06, |
|
"loss": 0.2067, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 6.836701434159061, |
|
"grad_norm": 10.10435962677002, |
|
"learning_rate": 2.7507524808509416e-06, |
|
"loss": 0.2284, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 6.844850065189048, |
|
"grad_norm": 12.469111442565918, |
|
"learning_rate": 2.738059827121046e-06, |
|
"loss": 0.2086, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.852998696219036, |
|
"grad_norm": 8.140021324157715, |
|
"learning_rate": 2.7253854739711634e-06, |
|
"loss": 0.2162, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 6.861147327249022, |
|
"grad_norm": 14.818914413452148, |
|
"learning_rate": 2.7127295239451273e-06, |
|
"loss": 0.2153, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 6.869295958279009, |
|
"grad_norm": 8.947492599487305, |
|
"learning_rate": 2.700092079437877e-06, |
|
"loss": 0.2073, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 6.877444589308996, |
|
"grad_norm": 8.173857688903809, |
|
"learning_rate": 2.687473242694629e-06, |
|
"loss": 0.2136, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 6.885593220338983, |
|
"grad_norm": 4.175146579742432, |
|
"learning_rate": 2.6748731158100528e-06, |
|
"loss": 0.2082, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 6.89374185136897, |
|
"grad_norm": 8.696370124816895, |
|
"learning_rate": 2.6622918007274406e-06, |
|
"loss": 0.2128, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 6.901890482398957, |
|
"grad_norm": 8.253527641296387, |
|
"learning_rate": 2.649729399237886e-06, |
|
"loss": 0.1985, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 6.910039113428944, |
|
"grad_norm": 9.825946807861328, |
|
"learning_rate": 2.6371860129794585e-06, |
|
"loss": 0.2084, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 6.918187744458931, |
|
"grad_norm": 21.79430389404297, |
|
"learning_rate": 2.624661743436383e-06, |
|
"loss": 0.2154, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 6.926336375488918, |
|
"grad_norm": 17.554534912109375, |
|
"learning_rate": 2.6121566919382168e-06, |
|
"loss": 0.2073, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.934485006518905, |
|
"grad_norm": 14.525189399719238, |
|
"learning_rate": 2.599670959659032e-06, |
|
"loss": 0.2136, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 6.942633637548892, |
|
"grad_norm": 17.66045570373535, |
|
"learning_rate": 2.5872046476165926e-06, |
|
"loss": 0.2259, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 6.9507822685788785, |
|
"grad_norm": 12.12194538116455, |
|
"learning_rate": 2.574757856671542e-06, |
|
"loss": 0.2303, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 6.958930899608866, |
|
"grad_norm": 16.121667861938477, |
|
"learning_rate": 2.5623306875265865e-06, |
|
"loss": 0.209, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 6.967079530638853, |
|
"grad_norm": 37.0359001159668, |
|
"learning_rate": 2.5499232407256764e-06, |
|
"loss": 0.2135, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 6.97522816166884, |
|
"grad_norm": 9.753621101379395, |
|
"learning_rate": 2.5375356166531974e-06, |
|
"loss": 0.2246, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 6.983376792698826, |
|
"grad_norm": 11.933328628540039, |
|
"learning_rate": 2.525167915533153e-06, |
|
"loss": 0.2083, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 6.991525423728813, |
|
"grad_norm": 11.32873821258545, |
|
"learning_rate": 2.512820237428366e-06, |
|
"loss": 0.221, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 6.999674054758801, |
|
"grad_norm": 10.335704803466797, |
|
"learning_rate": 2.5004926822396468e-06, |
|
"loss": 0.218, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8200803212851405, |
|
"eval_loss": 0.6657418608665466, |
|
"eval_runtime": 6.9032, |
|
"eval_samples_per_second": 360.703, |
|
"eval_steps_per_second": 45.196, |
|
"step": 85904 |
|
}, |
|
{ |
|
"epoch": 7.0078226857887875, |
|
"grad_norm": 13.04452133178711, |
|
"learning_rate": 2.4881853497050074e-06, |
|
"loss": 0.1828, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.015971316818774, |
|
"grad_norm": 11.350065231323242, |
|
"learning_rate": 2.475898339398842e-06, |
|
"loss": 0.1981, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 7.024119947848761, |
|
"grad_norm": 3.5544838905334473, |
|
"learning_rate": 2.463631750731125e-06, |
|
"loss": 0.1873, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 7.032268578878749, |
|
"grad_norm": 6.474255084991455, |
|
"learning_rate": 2.451385682946606e-06, |
|
"loss": 0.205, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 7.040417209908735, |
|
"grad_norm": 10.676136016845703, |
|
"learning_rate": 2.43916023512401e-06, |
|
"loss": 0.1702, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 7.048565840938722, |
|
"grad_norm": 6.142400741577148, |
|
"learning_rate": 2.4269555061752303e-06, |
|
"loss": 0.2017, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.056714471968709, |
|
"grad_norm": 16.273656845092773, |
|
"learning_rate": 2.4147715948445323e-06, |
|
"loss": 0.1776, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 7.064863102998697, |
|
"grad_norm": 22.690208435058594, |
|
"learning_rate": 2.4026085997077486e-06, |
|
"loss": 0.1762, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 7.073011734028683, |
|
"grad_norm": 14.49307632446289, |
|
"learning_rate": 2.390466619171492e-06, |
|
"loss": 0.1664, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 7.08116036505867, |
|
"grad_norm": 14.948646545410156, |
|
"learning_rate": 2.378345751472351e-06, |
|
"loss": 0.1953, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 7.089308996088657, |
|
"grad_norm": 12.674484252929688, |
|
"learning_rate": 2.3662460946760962e-06, |
|
"loss": 0.1932, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.0974576271186445, |
|
"grad_norm": 14.729815483093262, |
|
"learning_rate": 2.354167746676892e-06, |
|
"loss": 0.1814, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 7.105606258148631, |
|
"grad_norm": 16.739356994628906, |
|
"learning_rate": 2.3421108051964974e-06, |
|
"loss": 0.1761, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 7.113754889178618, |
|
"grad_norm": 16.266368865966797, |
|
"learning_rate": 2.330075367783479e-06, |
|
"loss": 0.1947, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 7.121903520208605, |
|
"grad_norm": 12.137019157409668, |
|
"learning_rate": 2.318061531812422e-06, |
|
"loss": 0.2017, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 7.130052151238592, |
|
"grad_norm": 7.073469161987305, |
|
"learning_rate": 2.3060693944831404e-06, |
|
"loss": 0.1746, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.138200782268579, |
|
"grad_norm": 7.888490200042725, |
|
"learning_rate": 2.294099052819893e-06, |
|
"loss": 0.1882, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 7.146349413298566, |
|
"grad_norm": 18.83835792541504, |
|
"learning_rate": 2.282150603670596e-06, |
|
"loss": 0.182, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 7.154498044328553, |
|
"grad_norm": 9.491145133972168, |
|
"learning_rate": 2.2702241437060463e-06, |
|
"loss": 0.1817, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 7.162646675358539, |
|
"grad_norm": 11.629495620727539, |
|
"learning_rate": 2.2583197694191272e-06, |
|
"loss": 0.1737, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 7.170795306388527, |
|
"grad_norm": 3.3986611366271973, |
|
"learning_rate": 2.246437577124038e-06, |
|
"loss": 0.1839, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.178943937418514, |
|
"grad_norm": 3.2696523666381836, |
|
"learning_rate": 2.2345776629555085e-06, |
|
"loss": 0.1896, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 7.1870925684485005, |
|
"grad_norm": 9.869660377502441, |
|
"learning_rate": 2.2227401228680275e-06, |
|
"loss": 0.2028, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 7.195241199478487, |
|
"grad_norm": 8.699070930480957, |
|
"learning_rate": 2.2109250526350584e-06, |
|
"loss": 0.2025, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 7.203389830508475, |
|
"grad_norm": 3.9306254386901855, |
|
"learning_rate": 2.1991325478482695e-06, |
|
"loss": 0.1827, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 7.211538461538462, |
|
"grad_norm": 18.14926528930664, |
|
"learning_rate": 2.187362703916766e-06, |
|
"loss": 0.1843, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.219687092568448, |
|
"grad_norm": 15.083455085754395, |
|
"learning_rate": 2.175615616066305e-06, |
|
"loss": 0.1932, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 7.227835723598435, |
|
"grad_norm": 14.958844184875488, |
|
"learning_rate": 2.163891379338535e-06, |
|
"loss": 0.1839, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 7.235984354628423, |
|
"grad_norm": 9.219823837280273, |
|
"learning_rate": 2.1521900885902214e-06, |
|
"loss": 0.205, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 7.24413298565841, |
|
"grad_norm": 10.361544609069824, |
|
"learning_rate": 2.1405118384924858e-06, |
|
"loss": 0.1942, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 7.252281616688396, |
|
"grad_norm": 7.847745418548584, |
|
"learning_rate": 2.128856723530033e-06, |
|
"loss": 0.2046, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.260430247718383, |
|
"grad_norm": 8.953947067260742, |
|
"learning_rate": 2.1172248380003853e-06, |
|
"loss": 0.1903, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 7.26857887874837, |
|
"grad_norm": 6.825370788574219, |
|
"learning_rate": 2.105616276013133e-06, |
|
"loss": 0.178, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 7.2767275097783575, |
|
"grad_norm": 10.48969554901123, |
|
"learning_rate": 2.0940311314891574e-06, |
|
"loss": 0.1778, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 7.284876140808344, |
|
"grad_norm": 13.994695663452148, |
|
"learning_rate": 2.082469498159879e-06, |
|
"loss": 0.1673, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 7.293024771838331, |
|
"grad_norm": 17.321313858032227, |
|
"learning_rate": 2.0709314695664957e-06, |
|
"loss": 0.2043, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.301173402868318, |
|
"grad_norm": 10.52856731414795, |
|
"learning_rate": 2.0594171390592294e-06, |
|
"loss": 0.1942, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 7.309322033898305, |
|
"grad_norm": 23.261329650878906, |
|
"learning_rate": 2.047926599796568e-06, |
|
"loss": 0.1816, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 7.317470664928292, |
|
"grad_norm": 6.534886360168457, |
|
"learning_rate": 2.0364599447445126e-06, |
|
"loss": 0.1808, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 7.325619295958279, |
|
"grad_norm": 12.067914962768555, |
|
"learning_rate": 2.0250172666758267e-06, |
|
"loss": 0.187, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 7.333767926988266, |
|
"grad_norm": 11.018478393554688, |
|
"learning_rate": 2.0135986581692817e-06, |
|
"loss": 0.1865, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.341916558018253, |
|
"grad_norm": 9.79710865020752, |
|
"learning_rate": 2.002204211608913e-06, |
|
"loss": 0.1987, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 7.35006518904824, |
|
"grad_norm": 15.164643287658691, |
|
"learning_rate": 1.990834019183268e-06, |
|
"loss": 0.1973, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 7.358213820078227, |
|
"grad_norm": 22.170740127563477, |
|
"learning_rate": 1.9794881728846642e-06, |
|
"loss": 0.1702, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 7.3663624511082135, |
|
"grad_norm": 8.200043678283691, |
|
"learning_rate": 1.968166764508442e-06, |
|
"loss": 0.183, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 7.374511082138201, |
|
"grad_norm": 6.145725250244141, |
|
"learning_rate": 1.9568698856522215e-06, |
|
"loss": 0.1906, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.382659713168188, |
|
"grad_norm": 22.14548683166504, |
|
"learning_rate": 1.945597627715166e-06, |
|
"loss": 0.1947, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 7.390808344198175, |
|
"grad_norm": 10.075164794921875, |
|
"learning_rate": 1.934350081897237e-06, |
|
"loss": 0.171, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 7.398956975228161, |
|
"grad_norm": 6.933922290802002, |
|
"learning_rate": 1.923127339198459e-06, |
|
"loss": 0.1845, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 7.407105606258149, |
|
"grad_norm": 26.223041534423828, |
|
"learning_rate": 1.9119294904181847e-06, |
|
"loss": 0.1852, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 7.415254237288136, |
|
"grad_norm": 4.778967380523682, |
|
"learning_rate": 1.900756626154356e-06, |
|
"loss": 0.1958, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.423402868318123, |
|
"grad_norm": 29.773698806762695, |
|
"learning_rate": 1.889608836802776e-06, |
|
"loss": 0.1809, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 7.431551499348109, |
|
"grad_norm": 8.9940767288208, |
|
"learning_rate": 1.8784862125563734e-06, |
|
"loss": 0.1869, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 7.439700130378096, |
|
"grad_norm": 15.34753704071045, |
|
"learning_rate": 1.8673888434044756e-06, |
|
"loss": 0.1863, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 7.447848761408084, |
|
"grad_norm": 19.44320297241211, |
|
"learning_rate": 1.8563168191320823e-06, |
|
"loss": 0.1798, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 7.4559973924380705, |
|
"grad_norm": 12.468984603881836, |
|
"learning_rate": 1.8452702293191339e-06, |
|
"loss": 0.1808, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.464146023468057, |
|
"grad_norm": 8.79600715637207, |
|
"learning_rate": 1.8342491633397863e-06, |
|
"loss": 0.1823, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 7.472294654498044, |
|
"grad_norm": 15.76307487487793, |
|
"learning_rate": 1.8232537103616953e-06, |
|
"loss": 0.1959, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 7.480443285528032, |
|
"grad_norm": 9.05780029296875, |
|
"learning_rate": 1.8122839593452902e-06, |
|
"loss": 0.1797, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 7.488591916558018, |
|
"grad_norm": 11.826004981994629, |
|
"learning_rate": 1.8013399990430525e-06, |
|
"loss": 0.1639, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 7.496740547588005, |
|
"grad_norm": 20.31383514404297, |
|
"learning_rate": 1.7904219179988007e-06, |
|
"loss": 0.1916, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.504889178617992, |
|
"grad_norm": 18.240629196166992, |
|
"learning_rate": 1.7795298045469766e-06, |
|
"loss": 0.1791, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 7.5130378096479795, |
|
"grad_norm": 20.392873764038086, |
|
"learning_rate": 1.7686637468119223e-06, |
|
"loss": 0.2021, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 7.521186440677966, |
|
"grad_norm": 9.732405662536621, |
|
"learning_rate": 1.757823832707175e-06, |
|
"loss": 0.1818, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 7.529335071707953, |
|
"grad_norm": 21.23190689086914, |
|
"learning_rate": 1.7470101499347498e-06, |
|
"loss": 0.1692, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 7.53748370273794, |
|
"grad_norm": 7.4514641761779785, |
|
"learning_rate": 1.736222785984435e-06, |
|
"loss": 0.2084, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.5456323337679265, |
|
"grad_norm": 13.29001522064209, |
|
"learning_rate": 1.7254618281330838e-06, |
|
"loss": 0.1897, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 7.553780964797914, |
|
"grad_norm": 9.683525085449219, |
|
"learning_rate": 1.7147273634439021e-06, |
|
"loss": 0.156, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 7.561929595827901, |
|
"grad_norm": 12.022348403930664, |
|
"learning_rate": 1.7040194787657566e-06, |
|
"loss": 0.2136, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 7.570078226857888, |
|
"grad_norm": 11.087843894958496, |
|
"learning_rate": 1.6933382607324572e-06, |
|
"loss": 0.171, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 7.578226857887875, |
|
"grad_norm": 20.101045608520508, |
|
"learning_rate": 1.6826837957620662e-06, |
|
"loss": 0.2131, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.586375488917862, |
|
"grad_norm": 13.087589263916016, |
|
"learning_rate": 1.672056170056196e-06, |
|
"loss": 0.1791, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 7.594524119947849, |
|
"grad_norm": 9.458551406860352, |
|
"learning_rate": 1.6614554695993085e-06, |
|
"loss": 0.1746, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 7.602672750977836, |
|
"grad_norm": 12.884553909301758, |
|
"learning_rate": 1.6508817801580268e-06, |
|
"loss": 0.1673, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 7.610821382007822, |
|
"grad_norm": 10.40186595916748, |
|
"learning_rate": 1.6403351872804347e-06, |
|
"loss": 0.1659, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 7.61897001303781, |
|
"grad_norm": 12.832286834716797, |
|
"learning_rate": 1.6298157762953897e-06, |
|
"loss": 0.1693, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.627118644067797, |
|
"grad_norm": 13.989652633666992, |
|
"learning_rate": 1.6193236323118283e-06, |
|
"loss": 0.203, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 7.6352672750977835, |
|
"grad_norm": 13.184144020080566, |
|
"learning_rate": 1.6088588402180783e-06, |
|
"loss": 0.1983, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 7.64341590612777, |
|
"grad_norm": 50.71080017089844, |
|
"learning_rate": 1.5984214846811735e-06, |
|
"loss": 0.1837, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 7.651564537157758, |
|
"grad_norm": 8.608222007751465, |
|
"learning_rate": 1.588011650146169e-06, |
|
"loss": 0.1786, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 7.659713168187745, |
|
"grad_norm": 9.973206520080566, |
|
"learning_rate": 1.5776294208354537e-06, |
|
"loss": 0.1873, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.667861799217731, |
|
"grad_norm": 3.6279351711273193, |
|
"learning_rate": 1.5672748807480736e-06, |
|
"loss": 0.1754, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 7.676010430247718, |
|
"grad_norm": 13.710479736328125, |
|
"learning_rate": 1.5569481136590554e-06, |
|
"loss": 0.1973, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 7.684159061277706, |
|
"grad_norm": 20.849790573120117, |
|
"learning_rate": 1.5466492031187174e-06, |
|
"loss": 0.1953, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 16.05866241455078, |
|
"learning_rate": 1.5363782324520033e-06, |
|
"loss": 0.1834, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 7.700456323337679, |
|
"grad_norm": 10.594083786010742, |
|
"learning_rate": 1.5261352847578044e-06, |
|
"loss": 0.196, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.708604954367666, |
|
"grad_norm": 14.200790405273438, |
|
"learning_rate": 1.5159204429082874e-06, |
|
"loss": 0.1793, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 7.716753585397653, |
|
"grad_norm": 3.8873071670532227, |
|
"learning_rate": 1.5057337895482255e-06, |
|
"loss": 0.1865, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 7.72490221642764, |
|
"grad_norm": 13.96704387664795, |
|
"learning_rate": 1.4955754070943268e-06, |
|
"loss": 0.1653, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 7.733050847457627, |
|
"grad_norm": 23.539247512817383, |
|
"learning_rate": 1.48544537773457e-06, |
|
"loss": 0.1713, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 7.741199478487614, |
|
"grad_norm": 14.154293060302734, |
|
"learning_rate": 1.4753437834275397e-06, |
|
"loss": 0.1894, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.749348109517601, |
|
"grad_norm": 8.608110427856445, |
|
"learning_rate": 1.4652707059017607e-06, |
|
"loss": 0.1887, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 7.757496740547588, |
|
"grad_norm": 9.453892707824707, |
|
"learning_rate": 1.4552262266550382e-06, |
|
"loss": 0.1769, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 7.765645371577575, |
|
"grad_norm": 12.239083290100098, |
|
"learning_rate": 1.4452104269538009e-06, |
|
"loss": 0.1699, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 7.773794002607562, |
|
"grad_norm": 10.937909126281738, |
|
"learning_rate": 1.4352233878324384e-06, |
|
"loss": 0.1667, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 7.781942633637549, |
|
"grad_norm": 21.223346710205078, |
|
"learning_rate": 1.4252651900926496e-06, |
|
"loss": 0.182, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.790091264667536, |
|
"grad_norm": 7.070313453674316, |
|
"learning_rate": 1.4153359143027879e-06, |
|
"loss": 0.1896, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 7.798239895697523, |
|
"grad_norm": 14.346339225769043, |
|
"learning_rate": 1.4054356407972086e-06, |
|
"loss": 0.1743, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 7.80638852672751, |
|
"grad_norm": 15.966556549072266, |
|
"learning_rate": 1.3955644496756199e-06, |
|
"loss": 0.1902, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 7.8145371577574965, |
|
"grad_norm": 16.198644638061523, |
|
"learning_rate": 1.3857224208024345e-06, |
|
"loss": 0.1945, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 7.822685788787483, |
|
"grad_norm": 8.803377151489258, |
|
"learning_rate": 1.3759096338061222e-06, |
|
"loss": 0.1793, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.830834419817471, |
|
"grad_norm": 19.771717071533203, |
|
"learning_rate": 1.3661261680785693e-06, |
|
"loss": 0.1809, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 7.838983050847458, |
|
"grad_norm": 11.52552318572998, |
|
"learning_rate": 1.3563721027744309e-06, |
|
"loss": 0.1887, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 7.847131681877444, |
|
"grad_norm": 17.998104095458984, |
|
"learning_rate": 1.3466475168104953e-06, |
|
"loss": 0.2107, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 7.855280312907432, |
|
"grad_norm": 6.081639289855957, |
|
"learning_rate": 1.3369524888650437e-06, |
|
"loss": 0.1849, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 7.863428943937419, |
|
"grad_norm": 6.099484443664551, |
|
"learning_rate": 1.3272870973772118e-06, |
|
"loss": 0.1847, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 7.8715775749674055, |
|
"grad_norm": 19.433902740478516, |
|
"learning_rate": 1.3176514205463586e-06, |
|
"loss": 0.2, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 7.879726205997392, |
|
"grad_norm": 6.365217208862305, |
|
"learning_rate": 1.3080455363314309e-06, |
|
"loss": 0.2062, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 7.887874837027379, |
|
"grad_norm": 9.893994331359863, |
|
"learning_rate": 1.2984695224503351e-06, |
|
"loss": 0.1721, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 7.896023468057367, |
|
"grad_norm": 22.75550079345703, |
|
"learning_rate": 1.2889234563793058e-06, |
|
"loss": 0.204, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 7.904172099087353, |
|
"grad_norm": 2.8168067932128906, |
|
"learning_rate": 1.279407415352279e-06, |
|
"loss": 0.1963, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 7.91232073011734, |
|
"grad_norm": 19.346757888793945, |
|
"learning_rate": 1.2699214763602741e-06, |
|
"loss": 0.1845, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 7.920469361147327, |
|
"grad_norm": 13.861513137817383, |
|
"learning_rate": 1.2604657161507566e-06, |
|
"loss": 0.1934, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 7.9286179921773146, |
|
"grad_norm": 12.996659278869629, |
|
"learning_rate": 1.2510402112270326e-06, |
|
"loss": 0.1808, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 7.936766623207301, |
|
"grad_norm": 16.255569458007812, |
|
"learning_rate": 1.2416450378476196e-06, |
|
"loss": 0.1919, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 7.944915254237288, |
|
"grad_norm": 9.47265625, |
|
"learning_rate": 1.2322802720256355e-06, |
|
"loss": 0.1887, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 7.953063885267275, |
|
"grad_norm": 13.006512641906738, |
|
"learning_rate": 1.2229459895281787e-06, |
|
"loss": 0.1927, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 7.9612125162972625, |
|
"grad_norm": 13.849684715270996, |
|
"learning_rate": 1.213642265875718e-06, |
|
"loss": 0.1906, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 7.969361147327249, |
|
"grad_norm": 25.117225646972656, |
|
"learning_rate": 1.2043691763414844e-06, |
|
"loss": 0.1659, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 7.977509778357236, |
|
"grad_norm": 9.633444786071777, |
|
"learning_rate": 1.1951267959508562e-06, |
|
"loss": 0.1923, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 7.985658409387223, |
|
"grad_norm": 9.853534698486328, |
|
"learning_rate": 1.185915199480751e-06, |
|
"loss": 0.1969, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 7.9938070404172095, |
|
"grad_norm": 12.424792289733887, |
|
"learning_rate": 1.1767344614590303e-06, |
|
"loss": 0.1772, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8200803212851405, |
|
"eval_loss": 0.7215536236763, |
|
"eval_runtime": 7.0555, |
|
"eval_samples_per_second": 352.917, |
|
"eval_steps_per_second": 44.221, |
|
"step": 98176 |
|
}, |
|
{ |
|
"epoch": 8.001955671447197, |
|
"grad_norm": 13.437636375427246, |
|
"learning_rate": 1.167584656163887e-06, |
|
"loss": 0.1774, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 8.010104302477183, |
|
"grad_norm": 14.577449798583984, |
|
"learning_rate": 1.1584658576232482e-06, |
|
"loss": 0.1693, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 8.01825293350717, |
|
"grad_norm": 18.45952606201172, |
|
"learning_rate": 1.1493781396141795e-06, |
|
"loss": 0.17, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 8.026401564537158, |
|
"grad_norm": 18.29120635986328, |
|
"learning_rate": 1.1403215756622804e-06, |
|
"loss": 0.178, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.034550195567144, |
|
"grad_norm": 11.486896514892578, |
|
"learning_rate": 1.1312962390410954e-06, |
|
"loss": 0.1815, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 8.042698826597132, |
|
"grad_norm": 19.90141487121582, |
|
"learning_rate": 1.1223022027715197e-06, |
|
"loss": 0.1682, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 8.05084745762712, |
|
"grad_norm": 11.248079299926758, |
|
"learning_rate": 1.1133395396212048e-06, |
|
"loss": 0.169, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 8.058996088657105, |
|
"grad_norm": 7.839399814605713, |
|
"learning_rate": 1.104408322103978e-06, |
|
"loss": 0.1684, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 8.067144719687093, |
|
"grad_norm": 8.082372665405273, |
|
"learning_rate": 1.095508622479247e-06, |
|
"loss": 0.1769, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.075293350717079, |
|
"grad_norm": 9.952238082885742, |
|
"learning_rate": 1.0866405127514234e-06, |
|
"loss": 0.1866, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 8.083441981747066, |
|
"grad_norm": 5.250309467315674, |
|
"learning_rate": 1.0778040646693316e-06, |
|
"loss": 0.162, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 8.091590612777054, |
|
"grad_norm": 9.988779067993164, |
|
"learning_rate": 1.0689993497256336e-06, |
|
"loss": 0.177, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 8.09973924380704, |
|
"grad_norm": 8.978513717651367, |
|
"learning_rate": 1.0602264391562506e-06, |
|
"loss": 0.151, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 8.107887874837028, |
|
"grad_norm": 23.60556983947754, |
|
"learning_rate": 1.051485403939786e-06, |
|
"loss": 0.1734, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.116036505867015, |
|
"grad_norm": 10.938061714172363, |
|
"learning_rate": 1.0427763147969467e-06, |
|
"loss": 0.1733, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 8.124185136897001, |
|
"grad_norm": 5.527510643005371, |
|
"learning_rate": 1.0340992421899776e-06, |
|
"loss": 0.1565, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 8.132333767926989, |
|
"grad_norm": 9.493518829345703, |
|
"learning_rate": 1.0254542563220922e-06, |
|
"loss": 0.181, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 8.140482398956975, |
|
"grad_norm": 7.9793548583984375, |
|
"learning_rate": 1.0168414271368953e-06, |
|
"loss": 0.1837, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 8.148631029986962, |
|
"grad_norm": 11.252303123474121, |
|
"learning_rate": 1.0082608243178276e-06, |
|
"loss": 0.1708, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.15677966101695, |
|
"grad_norm": 14.102470397949219, |
|
"learning_rate": 9.997125172875943e-07, |
|
"loss": 0.1884, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 8.164928292046936, |
|
"grad_norm": 38.51998519897461, |
|
"learning_rate": 9.91196575207608e-07, |
|
"loss": 0.184, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 8.173076923076923, |
|
"grad_norm": 7.0270466804504395, |
|
"learning_rate": 9.82713066977427e-07, |
|
"loss": 0.1489, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 8.18122555410691, |
|
"grad_norm": 14.944999694824219, |
|
"learning_rate": 9.742620612341992e-07, |
|
"loss": 0.1835, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 8.189374185136897, |
|
"grad_norm": 7.147238731384277, |
|
"learning_rate": 9.658436263521048e-07, |
|
"loss": 0.1512, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.197522816166884, |
|
"grad_norm": 5.465837001800537, |
|
"learning_rate": 9.574578304418063e-07, |
|
"loss": 0.1702, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 8.20567144719687, |
|
"grad_norm": 4.3965630531311035, |
|
"learning_rate": 9.491047413498933e-07, |
|
"loss": 0.1619, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 8.213820078226858, |
|
"grad_norm": 21.602157592773438, |
|
"learning_rate": 9.407844266583377e-07, |
|
"loss": 0.1726, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 8.221968709256846, |
|
"grad_norm": 16.533201217651367, |
|
"learning_rate": 9.324969536839435e-07, |
|
"loss": 0.1564, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 8.230117340286832, |
|
"grad_norm": 17.454898834228516, |
|
"learning_rate": 9.242423894778046e-07, |
|
"loss": 0.1847, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.23826597131682, |
|
"grad_norm": 17.726686477661133, |
|
"learning_rate": 9.160208008247618e-07, |
|
"loss": 0.1695, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 8.246414602346805, |
|
"grad_norm": 31.844257354736328, |
|
"learning_rate": 9.078322542428597e-07, |
|
"loss": 0.1698, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 8.254563233376793, |
|
"grad_norm": 9.689949989318848, |
|
"learning_rate": 8.99676815982814e-07, |
|
"loss": 0.153, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 8.26271186440678, |
|
"grad_norm": 13.61907958984375, |
|
"learning_rate": 8.915545520274699e-07, |
|
"loss": 0.177, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 8.270860495436766, |
|
"grad_norm": 11.14121150970459, |
|
"learning_rate": 8.834655280912718e-07, |
|
"loss": 0.1674, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.279009126466754, |
|
"grad_norm": 12.197967529296875, |
|
"learning_rate": 8.754098096197312e-07, |
|
"loss": 0.1787, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 8.28715775749674, |
|
"grad_norm": 12.565035820007324, |
|
"learning_rate": 8.67387461788895e-07, |
|
"loss": 0.1679, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 8.295306388526727, |
|
"grad_norm": 21.256549835205078, |
|
"learning_rate": 8.593985495048201e-07, |
|
"loss": 0.1695, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 8.303455019556715, |
|
"grad_norm": 4.485990524291992, |
|
"learning_rate": 8.514431374030496e-07, |
|
"loss": 0.1654, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 8.3116036505867, |
|
"grad_norm": 13.213761329650879, |
|
"learning_rate": 8.435212898480855e-07, |
|
"loss": 0.1626, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.319752281616688, |
|
"grad_norm": 19.035646438598633, |
|
"learning_rate": 8.356330709328725e-07, |
|
"loss": 0.1611, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 8.327900912646676, |
|
"grad_norm": 21.1912841796875, |
|
"learning_rate": 8.277785444782765e-07, |
|
"loss": 0.1607, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 8.336049543676662, |
|
"grad_norm": 19.324132919311523, |
|
"learning_rate": 8.199577740325703e-07, |
|
"loss": 0.1741, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 8.34419817470665, |
|
"grad_norm": 8.325228691101074, |
|
"learning_rate": 8.121708228709174e-07, |
|
"loss": 0.1808, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 8.352346805736635, |
|
"grad_norm": 11.028812408447266, |
|
"learning_rate": 8.044177539948617e-07, |
|
"loss": 0.169, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.360495436766623, |
|
"grad_norm": 20.587303161621094, |
|
"learning_rate": 7.966986301318158e-07, |
|
"loss": 0.1569, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 8.36864406779661, |
|
"grad_norm": 8.49282455444336, |
|
"learning_rate": 7.890135137345589e-07, |
|
"loss": 0.1584, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 8.376792698826597, |
|
"grad_norm": 14.866241455078125, |
|
"learning_rate": 7.813624669807246e-07, |
|
"loss": 0.1608, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 8.384941329856584, |
|
"grad_norm": 3.761150598526001, |
|
"learning_rate": 7.73745551772298e-07, |
|
"loss": 0.1533, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 8.393089960886572, |
|
"grad_norm": 17.36056900024414, |
|
"learning_rate": 7.66162829735122e-07, |
|
"loss": 0.1723, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.401238591916558, |
|
"grad_norm": 14.63774585723877, |
|
"learning_rate": 7.586143622183922e-07, |
|
"loss": 0.1769, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 8.409387222946545, |
|
"grad_norm": 15.453008651733398, |
|
"learning_rate": 7.511002102941639e-07, |
|
"loss": 0.1845, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 8.417535853976531, |
|
"grad_norm": 23.958969116210938, |
|
"learning_rate": 7.436204347568548e-07, |
|
"loss": 0.1829, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 8.425684485006519, |
|
"grad_norm": 22.29449462890625, |
|
"learning_rate": 7.361750961227587e-07, |
|
"loss": 0.1722, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 8.433833116036507, |
|
"grad_norm": 12.636420249938965, |
|
"learning_rate": 7.287642546295487e-07, |
|
"loss": 0.1614, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.441981747066492, |
|
"grad_norm": 12.580671310424805, |
|
"learning_rate": 7.213879702357951e-07, |
|
"loss": 0.1713, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 8.45013037809648, |
|
"grad_norm": 9.213543891906738, |
|
"learning_rate": 7.140463026204764e-07, |
|
"loss": 0.1619, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 8.458279009126466, |
|
"grad_norm": 15.926830291748047, |
|
"learning_rate": 7.067393111825016e-07, |
|
"loss": 0.1748, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 8.466427640156454, |
|
"grad_norm": 22.008920669555664, |
|
"learning_rate": 6.994670550402249e-07, |
|
"loss": 0.1926, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 8.474576271186441, |
|
"grad_norm": 4.002703666687012, |
|
"learning_rate": 6.922295930309691e-07, |
|
"loss": 0.1613, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.482724902216427, |
|
"grad_norm": 10.932751655578613, |
|
"learning_rate": 6.850269837105522e-07, |
|
"loss": 0.1635, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 8.490873533246415, |
|
"grad_norm": 20.70867347717285, |
|
"learning_rate": 6.778592853528077e-07, |
|
"loss": 0.1708, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 8.499022164276402, |
|
"grad_norm": 9.567403793334961, |
|
"learning_rate": 6.707265559491188e-07, |
|
"loss": 0.1814, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 8.507170795306388, |
|
"grad_norm": 24.9285888671875, |
|
"learning_rate": 6.63628853207946e-07, |
|
"loss": 0.1746, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 8.515319426336376, |
|
"grad_norm": 12.97628402709961, |
|
"learning_rate": 6.565662345543595e-07, |
|
"loss": 0.17, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.523468057366362, |
|
"grad_norm": 5.221209526062012, |
|
"learning_rate": 6.495387571295785e-07, |
|
"loss": 0.1726, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 8.53161668839635, |
|
"grad_norm": 12.438835144042969, |
|
"learning_rate": 6.42546477790506e-07, |
|
"loss": 0.1703, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 8.539765319426337, |
|
"grad_norm": 9.98957633972168, |
|
"learning_rate": 6.355894531092705e-07, |
|
"loss": 0.1883, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 8.547913950456323, |
|
"grad_norm": 8.844900131225586, |
|
"learning_rate": 6.286677393727653e-07, |
|
"loss": 0.1623, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 8.55606258148631, |
|
"grad_norm": 5.921658039093018, |
|
"learning_rate": 6.217813925821958e-07, |
|
"loss": 0.16, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.564211212516298, |
|
"grad_norm": 12.132319450378418, |
|
"learning_rate": 6.149304684526253e-07, |
|
"loss": 0.1843, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 8.572359843546284, |
|
"grad_norm": 13.31769847869873, |
|
"learning_rate": 6.081150224125254e-07, |
|
"loss": 0.1586, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 8.580508474576272, |
|
"grad_norm": 21.240800857543945, |
|
"learning_rate": 6.013351096033254e-07, |
|
"loss": 0.1783, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 8.588657105606258, |
|
"grad_norm": 9.178833961486816, |
|
"learning_rate": 5.945907848789667e-07, |
|
"loss": 0.1847, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 8.596805736636245, |
|
"grad_norm": 7.893414497375488, |
|
"learning_rate": 5.878821028054637e-07, |
|
"loss": 0.1474, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.604954367666233, |
|
"grad_norm": 17.363147735595703, |
|
"learning_rate": 5.812091176604551e-07, |
|
"loss": 0.1567, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 8.613102998696219, |
|
"grad_norm": 7.612610340118408, |
|
"learning_rate": 5.745718834327679e-07, |
|
"loss": 0.158, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 8.621251629726206, |
|
"grad_norm": 12.395828247070312, |
|
"learning_rate": 5.679704538219827e-07, |
|
"loss": 0.1817, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 8.629400260756192, |
|
"grad_norm": 2.951467514038086, |
|
"learning_rate": 5.614048822379947e-07, |
|
"loss": 0.1731, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 8.63754889178618, |
|
"grad_norm": 14.023295402526855, |
|
"learning_rate": 5.548752218005882e-07, |
|
"loss": 0.1638, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.645697522816167, |
|
"grad_norm": 21.505937576293945, |
|
"learning_rate": 5.483815253389957e-07, |
|
"loss": 0.1529, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 8.653846153846153, |
|
"grad_norm": 8.31225299835205, |
|
"learning_rate": 5.41923845391486e-07, |
|
"loss": 0.1563, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 8.661994784876141, |
|
"grad_norm": 9.446884155273438, |
|
"learning_rate": 5.355022342049249e-07, |
|
"loss": 0.1622, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 8.670143415906129, |
|
"grad_norm": 21.06761360168457, |
|
"learning_rate": 5.291167437343608e-07, |
|
"loss": 0.1602, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 8.678292046936114, |
|
"grad_norm": 13.025223731994629, |
|
"learning_rate": 5.227674256426002e-07, |
|
"loss": 0.1611, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.686440677966102, |
|
"grad_norm": 6.65778923034668, |
|
"learning_rate": 5.164543312997922e-07, |
|
"loss": 0.1677, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 8.694589308996088, |
|
"grad_norm": 25.8751220703125, |
|
"learning_rate": 5.101775117830121e-07, |
|
"loss": 0.1639, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 8.702737940026076, |
|
"grad_norm": 18.437524795532227, |
|
"learning_rate": 5.039370178758485e-07, |
|
"loss": 0.1651, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 8.710886571056063, |
|
"grad_norm": 31.746627807617188, |
|
"learning_rate": 4.977329000679903e-07, |
|
"loss": 0.1758, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 8.719035202086049, |
|
"grad_norm": 12.55679988861084, |
|
"learning_rate": 4.915652085548217e-07, |
|
"loss": 0.1571, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.727183833116037, |
|
"grad_norm": 1.4074722528457642, |
|
"learning_rate": 4.854339932370134e-07, |
|
"loss": 0.1526, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 8.735332464146023, |
|
"grad_norm": 5.811018466949463, |
|
"learning_rate": 4.793393037201194e-07, |
|
"loss": 0.1745, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 8.74348109517601, |
|
"grad_norm": 2.8639020919799805, |
|
"learning_rate": 4.7328118931417753e-07, |
|
"loss": 0.1695, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 8.751629726205998, |
|
"grad_norm": 20.180130004882812, |
|
"learning_rate": 4.672596990333073e-07, |
|
"loss": 0.1758, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 8.759778357235984, |
|
"grad_norm": 19.003700256347656, |
|
"learning_rate": 4.6127488159531495e-07, |
|
"loss": 0.1669, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.767926988265971, |
|
"grad_norm": 12.393278121948242, |
|
"learning_rate": 4.553267854213017e-07, |
|
"loss": 0.1827, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 8.776075619295959, |
|
"grad_norm": 23.79950714111328, |
|
"learning_rate": 4.494154586352667e-07, |
|
"loss": 0.1571, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 8.784224250325945, |
|
"grad_norm": 21.107633590698242, |
|
"learning_rate": 4.435409490637227e-07, |
|
"loss": 0.1744, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 8.792372881355933, |
|
"grad_norm": 15.573356628417969, |
|
"learning_rate": 4.3770330423530626e-07, |
|
"loss": 0.1675, |
|
"step": 107900 |
|
}, |
|
{ |
|
"epoch": 8.800521512385918, |
|
"grad_norm": 14.63633918762207, |
|
"learning_rate": 4.3190257138039313e-07, |
|
"loss": 0.1667, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 8.808670143415906, |
|
"grad_norm": 15.823701858520508, |
|
"learning_rate": 4.2613879743071907e-07, |
|
"loss": 0.164, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 8.816818774445894, |
|
"grad_norm": 7.163984775543213, |
|
"learning_rate": 4.204120290189956e-07, |
|
"loss": 0.1648, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 8.82496740547588, |
|
"grad_norm": 10.87267780303955, |
|
"learning_rate": 4.147223124785366e-07, |
|
"loss": 0.1767, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 8.833116036505867, |
|
"grad_norm": 13.024577140808105, |
|
"learning_rate": 4.0906969384288396e-07, |
|
"loss": 0.1561, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 8.841264667535853, |
|
"grad_norm": 15.831514358520508, |
|
"learning_rate": 4.034542188454282e-07, |
|
"loss": 0.2002, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 8.84941329856584, |
|
"grad_norm": 8.199058532714844, |
|
"learning_rate": 3.9787593291904793e-07, |
|
"loss": 0.1823, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 8.857561929595828, |
|
"grad_norm": 14.69583511352539, |
|
"learning_rate": 3.9233488119573506e-07, |
|
"loss": 0.1779, |
|
"step": 108700 |
|
}, |
|
{ |
|
"epoch": 8.865710560625814, |
|
"grad_norm": 12.765257835388184, |
|
"learning_rate": 3.868311085062337e-07, |
|
"loss": 0.1626, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 8.873859191655802, |
|
"grad_norm": 31.990026473999023, |
|
"learning_rate": 3.8136465937967657e-07, |
|
"loss": 0.1856, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 8.88200782268579, |
|
"grad_norm": 24.627126693725586, |
|
"learning_rate": 3.7593557804322167e-07, |
|
"loss": 0.1518, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 8.890156453715775, |
|
"grad_norm": 32.763092041015625, |
|
"learning_rate": 3.705439084217016e-07, |
|
"loss": 0.1526, |
|
"step": 109100 |
|
}, |
|
{ |
|
"epoch": 8.898305084745763, |
|
"grad_norm": 14.418821334838867, |
|
"learning_rate": 3.6518969413725905e-07, |
|
"loss": 0.1602, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 8.906453715775749, |
|
"grad_norm": 9.382340431213379, |
|
"learning_rate": 3.5987297850900217e-07, |
|
"loss": 0.1742, |
|
"step": 109300 |
|
}, |
|
{ |
|
"epoch": 8.914602346805736, |
|
"grad_norm": 22.482595443725586, |
|
"learning_rate": 3.5459380455264594e-07, |
|
"loss": 0.1737, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 8.922750977835724, |
|
"grad_norm": 18.5339412689209, |
|
"learning_rate": 3.4935221498017316e-07, |
|
"loss": 0.1581, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 8.93089960886571, |
|
"grad_norm": 21.965267181396484, |
|
"learning_rate": 3.4414825219948153e-07, |
|
"loss": 0.1597, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 8.939048239895698, |
|
"grad_norm": 13.353527069091797, |
|
"learning_rate": 3.3898195831404354e-07, |
|
"loss": 0.1747, |
|
"step": 109700 |
|
}, |
|
{ |
|
"epoch": 8.947196870925685, |
|
"grad_norm": 7.977973461151123, |
|
"learning_rate": 3.3385337512256863e-07, |
|
"loss": 0.1562, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 8.955345501955671, |
|
"grad_norm": 9.263310432434082, |
|
"learning_rate": 3.287625441186576e-07, |
|
"loss": 0.1772, |
|
"step": 109900 |
|
}, |
|
{ |
|
"epoch": 8.963494132985659, |
|
"grad_norm": 13.787714958190918, |
|
"learning_rate": 3.2370950649047383e-07, |
|
"loss": 0.1976, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 8.971642764015645, |
|
"grad_norm": 20.066761016845703, |
|
"learning_rate": 3.1869430312040816e-07, |
|
"loss": 0.1596, |
|
"step": 110100 |
|
}, |
|
{ |
|
"epoch": 8.979791395045632, |
|
"grad_norm": 20.64689826965332, |
|
"learning_rate": 3.137169745847435e-07, |
|
"loss": 0.1704, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 8.98794002607562, |
|
"grad_norm": 46.617713928222656, |
|
"learning_rate": 3.08777561153335e-07, |
|
"loss": 0.1889, |
|
"step": 110300 |
|
}, |
|
{ |
|
"epoch": 8.996088657105606, |
|
"grad_norm": 14.401327133178711, |
|
"learning_rate": 3.0387610278927725e-07, |
|
"loss": 0.1702, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8196787148594378, |
|
"eval_loss": 0.7465346455574036, |
|
"eval_runtime": 7.168, |
|
"eval_samples_per_second": 347.378, |
|
"eval_steps_per_second": 43.527, |
|
"step": 110448 |
|
}, |
|
{ |
|
"epoch": 9.004237288135593, |
|
"grad_norm": 15.593995094299316, |
|
"learning_rate": 2.990126391485848e-07, |
|
"loss": 0.1722, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 9.01238591916558, |
|
"grad_norm": 4.0746636390686035, |
|
"learning_rate": 2.941872095798698e-07, |
|
"loss": 0.1346, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 9.020534550195567, |
|
"grad_norm": 6.78621768951416, |
|
"learning_rate": 2.893998531240222e-07, |
|
"loss": 0.1819, |
|
"step": 110700 |
|
}, |
|
{ |
|
"epoch": 9.028683181225555, |
|
"grad_norm": 16.810945510864258, |
|
"learning_rate": 2.8465060851389725e-07, |
|
"loss": 0.152, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 9.03683181225554, |
|
"grad_norm": 2.5170655250549316, |
|
"learning_rate": 2.7993951417400025e-07, |
|
"loss": 0.1737, |
|
"step": 110900 |
|
}, |
|
{ |
|
"epoch": 9.044980443285528, |
|
"grad_norm": 5.630674362182617, |
|
"learning_rate": 2.752666082201727e-07, |
|
"loss": 0.1703, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.053129074315516, |
|
"grad_norm": 29.249120712280273, |
|
"learning_rate": 2.7063192845929286e-07, |
|
"loss": 0.1648, |
|
"step": 111100 |
|
}, |
|
{ |
|
"epoch": 9.061277705345502, |
|
"grad_norm": 7.27542781829834, |
|
"learning_rate": 2.660355123889585e-07, |
|
"loss": 0.1483, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 9.06942633637549, |
|
"grad_norm": 27.242809295654297, |
|
"learning_rate": 2.614773971971929e-07, |
|
"loss": 0.1693, |
|
"step": 111300 |
|
}, |
|
{ |
|
"epoch": 9.077574967405475, |
|
"grad_norm": 15.899724006652832, |
|
"learning_rate": 2.5695761976213704e-07, |
|
"loss": 0.1562, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 9.085723598435463, |
|
"grad_norm": 20.975248336791992, |
|
"learning_rate": 2.5247621665175636e-07, |
|
"loss": 0.1558, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.09387222946545, |
|
"grad_norm": 17.303001403808594, |
|
"learning_rate": 2.4803322412354227e-07, |
|
"loss": 0.1594, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 9.102020860495436, |
|
"grad_norm": 14.3364839553833, |
|
"learning_rate": 2.436286781242192e-07, |
|
"loss": 0.1558, |
|
"step": 111700 |
|
}, |
|
{ |
|
"epoch": 9.110169491525424, |
|
"grad_norm": 18.47357940673828, |
|
"learning_rate": 2.3926261428945386e-07, |
|
"loss": 0.1713, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 9.118318122555412, |
|
"grad_norm": 2.021436929702759, |
|
"learning_rate": 2.3493506794356745e-07, |
|
"loss": 0.1577, |
|
"step": 111900 |
|
}, |
|
{ |
|
"epoch": 9.126466753585397, |
|
"grad_norm": 4.512004852294922, |
|
"learning_rate": 2.3064607409924888e-07, |
|
"loss": 0.1552, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.134615384615385, |
|
"grad_norm": 21.13969612121582, |
|
"learning_rate": 2.2639566745727203e-07, |
|
"loss": 0.1504, |
|
"step": 112100 |
|
}, |
|
{ |
|
"epoch": 9.142764015645371, |
|
"grad_norm": 17.030675888061523, |
|
"learning_rate": 2.2218388240621558e-07, |
|
"loss": 0.1785, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 9.150912646675359, |
|
"grad_norm": 11.586610794067383, |
|
"learning_rate": 2.1801075302218423e-07, |
|
"loss": 0.174, |
|
"step": 112300 |
|
}, |
|
{ |
|
"epoch": 9.159061277705346, |
|
"grad_norm": 19.795167922973633, |
|
"learning_rate": 2.1387631306853174e-07, |
|
"loss": 0.1672, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 9.167209908735332, |
|
"grad_norm": 23.909713745117188, |
|
"learning_rate": 2.0978059599559065e-07, |
|
"loss": 0.1684, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.17535853976532, |
|
"grad_norm": 5.545074939727783, |
|
"learning_rate": 2.057236349403985e-07, |
|
"loss": 0.165, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 9.183507170795306, |
|
"grad_norm": 12.588091850280762, |
|
"learning_rate": 2.0170546272643256e-07, |
|
"loss": 0.167, |
|
"step": 112700 |
|
}, |
|
{ |
|
"epoch": 9.191655801825293, |
|
"grad_norm": 12.73204517364502, |
|
"learning_rate": 1.9772611186334168e-07, |
|
"loss": 0.1535, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 9.19980443285528, |
|
"grad_norm": 11.712594985961914, |
|
"learning_rate": 1.9378561454668598e-07, |
|
"loss": 0.1629, |
|
"step": 112900 |
|
}, |
|
{ |
|
"epoch": 9.207953063885267, |
|
"grad_norm": 6.922073841094971, |
|
"learning_rate": 1.8988400265767316e-07, |
|
"loss": 0.1544, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.216101694915254, |
|
"grad_norm": 14.258295059204102, |
|
"learning_rate": 1.8602130776290362e-07, |
|
"loss": 0.1575, |
|
"step": 113100 |
|
}, |
|
{ |
|
"epoch": 9.224250325945242, |
|
"grad_norm": 20.113460540771484, |
|
"learning_rate": 1.8219756111411357e-07, |
|
"loss": 0.151, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 9.232398956975228, |
|
"grad_norm": 9.496116638183594, |
|
"learning_rate": 1.784127936479213e-07, |
|
"loss": 0.1791, |
|
"step": 113300 |
|
}, |
|
{ |
|
"epoch": 9.240547588005215, |
|
"grad_norm": 7.643208026885986, |
|
"learning_rate": 1.7466703598557898e-07, |
|
"loss": 0.1752, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 9.248696219035201, |
|
"grad_norm": 21.511184692382812, |
|
"learning_rate": 1.709603184327241e-07, |
|
"loss": 0.1538, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.256844850065189, |
|
"grad_norm": 18.147607803344727, |
|
"learning_rate": 1.6729267097913338e-07, |
|
"loss": 0.1606, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 9.264993481095177, |
|
"grad_norm": 13.48155689239502, |
|
"learning_rate": 1.6366412329848035e-07, |
|
"loss": 0.1661, |
|
"step": 113700 |
|
}, |
|
{ |
|
"epoch": 9.273142112125162, |
|
"grad_norm": 21.713895797729492, |
|
"learning_rate": 1.6007470474809772e-07, |
|
"loss": 0.157, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 9.28129074315515, |
|
"grad_norm": 11.30298137664795, |
|
"learning_rate": 1.565244443687347e-07, |
|
"loss": 0.1802, |
|
"step": 113900 |
|
}, |
|
{ |
|
"epoch": 9.289439374185136, |
|
"grad_norm": 15.809433937072754, |
|
"learning_rate": 1.5301337088432787e-07, |
|
"loss": 0.1723, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.297588005215124, |
|
"grad_norm": 8.747072219848633, |
|
"learning_rate": 1.4954151270176686e-07, |
|
"loss": 0.1616, |
|
"step": 114100 |
|
}, |
|
{ |
|
"epoch": 9.305736636245111, |
|
"grad_norm": 1.6549293994903564, |
|
"learning_rate": 1.4610889791066008e-07, |
|
"loss": 0.1732, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 9.313885267275097, |
|
"grad_norm": 13.10067367553711, |
|
"learning_rate": 1.4271555428311323e-07, |
|
"loss": 0.1618, |
|
"step": 114300 |
|
}, |
|
{ |
|
"epoch": 9.322033898305085, |
|
"grad_norm": 13.006690979003906, |
|
"learning_rate": 1.39361509273504e-07, |
|
"loss": 0.1806, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 9.330182529335072, |
|
"grad_norm": 23.973905563354492, |
|
"learning_rate": 1.3604679001825605e-07, |
|
"loss": 0.1678, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.338331160365058, |
|
"grad_norm": 10.249641418457031, |
|
"learning_rate": 1.3277142333562253e-07, |
|
"loss": 0.1646, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 9.346479791395046, |
|
"grad_norm": 30.132413864135742, |
|
"learning_rate": 1.2953543572546968e-07, |
|
"loss": 0.1635, |
|
"step": 114700 |
|
}, |
|
{ |
|
"epoch": 9.354628422425032, |
|
"grad_norm": 13.259139060974121, |
|
"learning_rate": 1.2633885336906014e-07, |
|
"loss": 0.172, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 9.36277705345502, |
|
"grad_norm": 19.1724853515625, |
|
"learning_rate": 1.2318170212884285e-07, |
|
"loss": 0.1633, |
|
"step": 114900 |
|
}, |
|
{ |
|
"epoch": 9.370925684485007, |
|
"grad_norm": 14.311450004577637, |
|
"learning_rate": 1.2006400754824177e-07, |
|
"loss": 0.1747, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.379074315514993, |
|
"grad_norm": 8.39560317993164, |
|
"learning_rate": 1.1698579485145134e-07, |
|
"loss": 0.1441, |
|
"step": 115100 |
|
}, |
|
{ |
|
"epoch": 9.38722294654498, |
|
"grad_norm": 10.600957870483398, |
|
"learning_rate": 1.1394708894323314e-07, |
|
"loss": 0.1923, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 9.395371577574968, |
|
"grad_norm": 9.45894718170166, |
|
"learning_rate": 1.1094791440871e-07, |
|
"loss": 0.1476, |
|
"step": 115300 |
|
}, |
|
{ |
|
"epoch": 9.403520208604954, |
|
"grad_norm": 6.497547149658203, |
|
"learning_rate": 1.079882955131728e-07, |
|
"loss": 0.1621, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 9.411668839634942, |
|
"grad_norm": 5.700404644012451, |
|
"learning_rate": 1.0506825620187954e-07, |
|
"loss": 0.1569, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.419817470664928, |
|
"grad_norm": 5.055960655212402, |
|
"learning_rate": 1.0218782009986494e-07, |
|
"loss": 0.1439, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 9.427966101694915, |
|
"grad_norm": 0.8036000728607178, |
|
"learning_rate": 9.93470105117461e-08, |
|
"loss": 0.163, |
|
"step": 115700 |
|
}, |
|
{ |
|
"epoch": 9.436114732724903, |
|
"grad_norm": 21.1984920501709, |
|
"learning_rate": 9.654585042153663e-08, |
|
"loss": 0.153, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 9.444263363754889, |
|
"grad_norm": 3.3010308742523193, |
|
"learning_rate": 9.378436249245892e-08, |
|
"loss": 0.1584, |
|
"step": 115900 |
|
}, |
|
{ |
|
"epoch": 9.452411994784876, |
|
"grad_norm": 9.636171340942383, |
|
"learning_rate": 9.106256906676159e-08, |
|
"loss": 0.1765, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 9.460560625814864, |
|
"grad_norm": 1.7043323516845703, |
|
"learning_rate": 8.838049216554123e-08, |
|
"loss": 0.1604, |
|
"step": 116100 |
|
}, |
|
{ |
|
"epoch": 9.46870925684485, |
|
"grad_norm": 9.73293399810791, |
|
"learning_rate": 8.573815348855818e-08, |
|
"loss": 0.1703, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 9.476857887874838, |
|
"grad_norm": 7.777896404266357, |
|
"learning_rate": 8.313557441406606e-08, |
|
"loss": 0.1632, |
|
"step": 116300 |
|
}, |
|
{ |
|
"epoch": 9.485006518904823, |
|
"grad_norm": 17.46415901184082, |
|
"learning_rate": 8.057277599863744e-08, |
|
"loss": 0.1536, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 9.493155149934811, |
|
"grad_norm": 10.912395477294922, |
|
"learning_rate": 7.804977897699295e-08, |
|
"loss": 0.1611, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 9.501303780964799, |
|
"grad_norm": 12.858296394348145, |
|
"learning_rate": 7.556660376183301e-08, |
|
"loss": 0.1458, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 9.509452411994785, |
|
"grad_norm": 7.577301025390625, |
|
"learning_rate": 7.312327044367463e-08, |
|
"loss": 0.1408, |
|
"step": 116700 |
|
}, |
|
{ |
|
"epoch": 9.517601043024772, |
|
"grad_norm": 13.470318794250488, |
|
"learning_rate": 7.071979879068769e-08, |
|
"loss": 0.1568, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 9.525749674054758, |
|
"grad_norm": 16.199295043945312, |
|
"learning_rate": 6.835620824853451e-08, |
|
"loss": 0.161, |
|
"step": 116900 |
|
}, |
|
{ |
|
"epoch": 9.533898305084746, |
|
"grad_norm": 15.154216766357422, |
|
"learning_rate": 6.603251794021381e-08, |
|
"loss": 0.1783, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 9.542046936114733, |
|
"grad_norm": 9.926989555358887, |
|
"learning_rate": 6.374874666590369e-08, |
|
"loss": 0.149, |
|
"step": 117100 |
|
}, |
|
{ |
|
"epoch": 9.55019556714472, |
|
"grad_norm": 14.719680786132812, |
|
"learning_rate": 6.15049129028128e-08, |
|
"loss": 0.1459, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 9.558344198174707, |
|
"grad_norm": 23.45909881591797, |
|
"learning_rate": 5.93010348050288e-08, |
|
"loss": 0.1624, |
|
"step": 117300 |
|
}, |
|
{ |
|
"epoch": 9.566492829204694, |
|
"grad_norm": 22.256080627441406, |
|
"learning_rate": 5.7137130203370194e-08, |
|
"loss": 0.1536, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 9.57464146023468, |
|
"grad_norm": 5.540316581726074, |
|
"learning_rate": 5.501321660524583e-08, |
|
"loss": 0.1541, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 9.582790091264668, |
|
"grad_norm": 3.839772939682007, |
|
"learning_rate": 5.292931119451006e-08, |
|
"loss": 0.1577, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 9.590938722294654, |
|
"grad_norm": 4.665050029754639, |
|
"learning_rate": 5.088543083132502e-08, |
|
"loss": 0.1547, |
|
"step": 117700 |
|
}, |
|
{ |
|
"epoch": 9.599087353324641, |
|
"grad_norm": 18.975759506225586, |
|
"learning_rate": 4.888159205202303e-08, |
|
"loss": 0.1652, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 9.607235984354629, |
|
"grad_norm": 13.844809532165527, |
|
"learning_rate": 4.691781106897497e-08, |
|
"loss": 0.1528, |
|
"step": 117900 |
|
}, |
|
{ |
|
"epoch": 9.615384615384615, |
|
"grad_norm": 5.203334331512451, |
|
"learning_rate": 4.499410377045765e-08, |
|
"loss": 0.1484, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 9.623533246414603, |
|
"grad_norm": 17.595108032226562, |
|
"learning_rate": 4.311048572052501e-08, |
|
"loss": 0.1547, |
|
"step": 118100 |
|
}, |
|
{ |
|
"epoch": 9.631681877444588, |
|
"grad_norm": 10.652242660522461, |
|
"learning_rate": 4.1266972158883204e-08, |
|
"loss": 0.1658, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 9.639830508474576, |
|
"grad_norm": 15.711381912231445, |
|
"learning_rate": 3.9463578000765724e-08, |
|
"loss": 0.1493, |
|
"step": 118300 |
|
}, |
|
{ |
|
"epoch": 9.647979139504564, |
|
"grad_norm": 18.064918518066406, |
|
"learning_rate": 3.7700317836814605e-08, |
|
"loss": 0.1558, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 9.65612777053455, |
|
"grad_norm": 11.699357986450195, |
|
"learning_rate": 3.5977205932962164e-08, |
|
"loss": 0.1465, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 9.664276401564537, |
|
"grad_norm": 9.775052070617676, |
|
"learning_rate": 3.429425623031335e-08, |
|
"loss": 0.1456, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 9.672425032594525, |
|
"grad_norm": 19.886598587036133, |
|
"learning_rate": 3.265148234503579e-08, |
|
"loss": 0.165, |
|
"step": 118700 |
|
}, |
|
{ |
|
"epoch": 9.68057366362451, |
|
"grad_norm": 13.31386661529541, |
|
"learning_rate": 3.104889756824825e-08, |
|
"loss": 0.1682, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 9.688722294654498, |
|
"grad_norm": 16.752405166625977, |
|
"learning_rate": 2.9486514865912364e-08, |
|
"loss": 0.1498, |
|
"step": 118900 |
|
}, |
|
{ |
|
"epoch": 9.696870925684484, |
|
"grad_norm": 12.920425415039062, |
|
"learning_rate": 2.7964346878729952e-08, |
|
"loss": 0.1573, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 9.705019556714472, |
|
"grad_norm": 5.0780110359191895, |
|
"learning_rate": 2.64824059220381e-08, |
|
"loss": 0.159, |
|
"step": 119100 |
|
}, |
|
{ |
|
"epoch": 9.71316818774446, |
|
"grad_norm": 13.475509643554688, |
|
"learning_rate": 2.504070398571201e-08, |
|
"loss": 0.1997, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 9.721316818774445, |
|
"grad_norm": 20.931211471557617, |
|
"learning_rate": 2.3639252734065644e-08, |
|
"loss": 0.1957, |
|
"step": 119300 |
|
}, |
|
{ |
|
"epoch": 9.729465449804433, |
|
"grad_norm": 20.29063606262207, |
|
"learning_rate": 2.227806350575956e-08, |
|
"loss": 0.1388, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 9.737614080834419, |
|
"grad_norm": 0.7664732336997986, |
|
"learning_rate": 2.0957147313707127e-08, |
|
"loss": 0.166, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 9.745762711864407, |
|
"grad_norm": 18.868257522583008, |
|
"learning_rate": 1.9676514844987338e-08, |
|
"loss": 0.1618, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 9.753911342894394, |
|
"grad_norm": 15.741533279418945, |
|
"learning_rate": 1.8436176460756572e-08, |
|
"loss": 0.1589, |
|
"step": 119700 |
|
}, |
|
{ |
|
"epoch": 9.76205997392438, |
|
"grad_norm": 11.955362319946289, |
|
"learning_rate": 1.723614219616754e-08, |
|
"loss": 0.168, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 9.770208604954368, |
|
"grad_norm": 26.171483993530273, |
|
"learning_rate": 1.6076421760283234e-08, |
|
"loss": 0.157, |
|
"step": 119900 |
|
}, |
|
{ |
|
"epoch": 9.778357235984355, |
|
"grad_norm": 14.887884140014648, |
|
"learning_rate": 1.4957024536003674e-08, |
|
"loss": 0.1383, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 9.786505867014341, |
|
"grad_norm": 9.518312454223633, |
|
"learning_rate": 1.3877959579985944e-08, |
|
"loss": 0.1385, |
|
"step": 120100 |
|
}, |
|
{ |
|
"epoch": 9.794654498044329, |
|
"grad_norm": 18.155826568603516, |
|
"learning_rate": 1.283923562257483e-08, |
|
"loss": 0.1623, |
|
"step": 120200 |
|
}, |
|
{ |
|
"epoch": 9.802803129074315, |
|
"grad_norm": 17.2945613861084, |
|
"learning_rate": 1.1840861067727306e-08, |
|
"loss": 0.1551, |
|
"step": 120300 |
|
}, |
|
{ |
|
"epoch": 9.810951760104302, |
|
"grad_norm": 24.658214569091797, |
|
"learning_rate": 1.0882843992949255e-08, |
|
"loss": 0.1499, |
|
"step": 120400 |
|
}, |
|
{ |
|
"epoch": 9.81910039113429, |
|
"grad_norm": 6.880736351013184, |
|
"learning_rate": 9.9651921492272e-09, |
|
"loss": 0.1501, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 9.827249022164276, |
|
"grad_norm": 25.12505531311035, |
|
"learning_rate": 9.087912960967227e-09, |
|
"loss": 0.1571, |
|
"step": 120600 |
|
}, |
|
{ |
|
"epoch": 9.835397653194264, |
|
"grad_norm": 28.05438995361328, |
|
"learning_rate": 8.251013525932273e-09, |
|
"loss": 0.1637, |
|
"step": 120700 |
|
}, |
|
{ |
|
"epoch": 9.843546284224251, |
|
"grad_norm": 10.58689022064209, |
|
"learning_rate": 7.454500615188264e-09, |
|
"loss": 0.1509, |
|
"step": 120800 |
|
}, |
|
{ |
|
"epoch": 9.851694915254237, |
|
"grad_norm": 24.10919761657715, |
|
"learning_rate": 6.698380673048066e-09, |
|
"loss": 0.1691, |
|
"step": 120900 |
|
}, |
|
{ |
|
"epoch": 9.859843546284225, |
|
"grad_norm": 0.43672606348991394, |
|
"learning_rate": 5.982659817017067e-09, |
|
"loss": 0.1746, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 9.86799217731421, |
|
"grad_norm": 12.899723052978516, |
|
"learning_rate": 5.307343837747115e-09, |
|
"loss": 0.1497, |
|
"step": 121100 |
|
}, |
|
{ |
|
"epoch": 9.876140808344198, |
|
"grad_norm": 18.292190551757812, |
|
"learning_rate": 4.672438198987661e-09, |
|
"loss": 0.1594, |
|
"step": 121200 |
|
}, |
|
{ |
|
"epoch": 9.884289439374186, |
|
"grad_norm": 18.396923065185547, |
|
"learning_rate": 4.077948037541357e-09, |
|
"loss": 0.1574, |
|
"step": 121300 |
|
}, |
|
{ |
|
"epoch": 9.892438070404172, |
|
"grad_norm": 22.605993270874023, |
|
"learning_rate": 3.5238781632240813e-09, |
|
"loss": 0.1642, |
|
"step": 121400 |
|
}, |
|
{ |
|
"epoch": 9.90058670143416, |
|
"grad_norm": 23.427574157714844, |
|
"learning_rate": 3.010233058824419e-09, |
|
"loss": 0.1765, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 9.908735332464147, |
|
"grad_norm": 3.2891268730163574, |
|
"learning_rate": 2.5370168800681325e-09, |
|
"loss": 0.1743, |
|
"step": 121600 |
|
}, |
|
{ |
|
"epoch": 9.916883963494133, |
|
"grad_norm": 19.58220100402832, |
|
"learning_rate": 2.1042334555848585e-09, |
|
"loss": 0.1596, |
|
"step": 121700 |
|
}, |
|
{ |
|
"epoch": 9.92503259452412, |
|
"grad_norm": 15.260977745056152, |
|
"learning_rate": 1.711886286876463e-09, |
|
"loss": 0.1486, |
|
"step": 121800 |
|
}, |
|
{ |
|
"epoch": 9.933181225554106, |
|
"grad_norm": 5.988215446472168, |
|
"learning_rate": 1.3599785482881767e-09, |
|
"loss": 0.1518, |
|
"step": 121900 |
|
}, |
|
{ |
|
"epoch": 9.941329856584094, |
|
"grad_norm": 5.850574970245361, |
|
"learning_rate": 1.0485130869858362e-09, |
|
"loss": 0.1588, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 9.949478487614082, |
|
"grad_norm": 11.288055419921875, |
|
"learning_rate": 7.774924229281278e-10, |
|
"loss": 0.1734, |
|
"step": 122100 |
|
}, |
|
{ |
|
"epoch": 9.957627118644067, |
|
"grad_norm": 10.142143249511719, |
|
"learning_rate": 5.469187488510441e-10, |
|
"loss": 0.173, |
|
"step": 122200 |
|
}, |
|
{ |
|
"epoch": 9.965775749674055, |
|
"grad_norm": 14.460721015930176, |
|
"learning_rate": 3.5679393024623533e-10, |
|
"loss": 0.1603, |
|
"step": 122300 |
|
}, |
|
{ |
|
"epoch": 9.973924380704041, |
|
"grad_norm": 23.698572158813477, |
|
"learning_rate": 2.071195053482411e-10, |
|
"loss": 0.1616, |
|
"step": 122400 |
|
}, |
|
{ |
|
"epoch": 9.982073011734029, |
|
"grad_norm": 9.158120155334473, |
|
"learning_rate": 9.789668512116823e-11, |
|
"loss": 0.1702, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 9.990221642764016, |
|
"grad_norm": 32.79683303833008, |
|
"learning_rate": 2.912635325036384e-11, |
|
"loss": 0.1718, |
|
"step": 122600 |
|
}, |
|
{ |
|
"epoch": 9.998370273794002, |
|
"grad_norm": 17.385313034057617, |
|
"learning_rate": 8.090661318682636e-13, |
|
"loss": 0.1656, |
|
"step": 122700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8172690763052208, |
|
"eval_loss": 0.7754501700401306, |
|
"eval_runtime": 7.1115, |
|
"eval_samples_per_second": 350.138, |
|
"eval_steps_per_second": 43.873, |
|
"step": 122720 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 122720, |
|
"total_flos": 1.617427903829713e+17, |
|
"train_loss": 0.3309724763735177, |
|
"train_runtime": 41426.0671, |
|
"train_samples_per_second": 94.796, |
|
"train_steps_per_second": 2.962 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 122720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.617427903829713e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|