|
{ |
|
"best_metric": 0.22902172803878784, |
|
"best_model_checkpoint": "./checkpoint-11500", |
|
"epoch": 15.0, |
|
"global_step": 11670, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.2933333333333334e-05, |
|
"loss": 14.1317, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6266666666666667e-05, |
|
"loss": 5.2635, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 3.8268, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.293333333333334e-05, |
|
"loss": 3.2266, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.626666666666666e-05, |
|
"loss": 3.0952, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 3.0981762409210205, |
|
"eval_runtime": 326.629, |
|
"eval_samples_per_second": 25.414, |
|
"eval_steps_per_second": 0.796, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.960000000000001e-05, |
|
"loss": 3.0583, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.293333333333333e-05, |
|
"loss": 3.0349, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00010626666666666667, |
|
"loss": 2.9355, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00011960000000000001, |
|
"loss": 2.3372, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013293333333333333, |
|
"loss": 1.7975, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.7887413501739502, |
|
"eval_runtime": 341.1915, |
|
"eval_samples_per_second": 24.329, |
|
"eval_steps_per_second": 0.762, |
|
"eval_wer": 0.5651080072872386, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00014626666666666665, |
|
"loss": 1.6159, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001596, |
|
"loss": 1.5287, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00017293333333333335, |
|
"loss": 1.4876, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00018626666666666668, |
|
"loss": 1.4606, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001996, |
|
"loss": 1.4138, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 0.523814857006073, |
|
"eval_runtime": 341.7084, |
|
"eval_samples_per_second": 24.293, |
|
"eval_steps_per_second": 0.761, |
|
"eval_wer": 0.43889997397414765, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00019809242871189774, |
|
"loss": 1.4258, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.000196125860373648, |
|
"loss": 1.3778, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00019415929203539823, |
|
"loss": 1.3518, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00019219272369714848, |
|
"loss": 1.3692, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00019022615535889875, |
|
"loss": 1.344, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.4774917662143707, |
|
"eval_runtime": 337.4262, |
|
"eval_samples_per_second": 24.601, |
|
"eval_steps_per_second": 0.771, |
|
"eval_wer": 0.4318209421358549, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.000188259587020649, |
|
"loss": 1.3167, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00018629301868239921, |
|
"loss": 1.3126, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00018432645034414946, |
|
"loss": 1.3161, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.0001823598820058997, |
|
"loss": 1.2738, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00018039331366764995, |
|
"loss": 1.2737, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 0.46475061774253845, |
|
"eval_runtime": 336.0925, |
|
"eval_samples_per_second": 24.699, |
|
"eval_steps_per_second": 0.774, |
|
"eval_wer": 0.4074954454758393, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0001784267453294002, |
|
"loss": 1.262, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00017646017699115044, |
|
"loss": 1.2496, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0001744936086529007, |
|
"loss": 1.2612, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00017252704031465093, |
|
"loss": 1.2197, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0001705604719764012, |
|
"loss": 1.2554, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_loss": 0.4068518280982971, |
|
"eval_runtime": 331.555, |
|
"eval_samples_per_second": 25.037, |
|
"eval_steps_per_second": 0.784, |
|
"eval_wer": 0.36781469593129174, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.00016859390363815145, |
|
"loss": 1.2204, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0001666273352999017, |
|
"loss": 1.2113, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00016466076696165194, |
|
"loss": 1.2053, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00016269419862340216, |
|
"loss": 1.2019, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0001607276302851524, |
|
"loss": 1.1996, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 0.3914338946342468, |
|
"eval_runtime": 333.5366, |
|
"eval_samples_per_second": 24.888, |
|
"eval_steps_per_second": 0.78, |
|
"eval_wer": 0.36679101240565626, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00015876106194690265, |
|
"loss": 1.1903, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0001567944936086529, |
|
"loss": 1.1866, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00015482792527040314, |
|
"loss": 1.1678, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.0001528613569321534, |
|
"loss": 1.1854, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00015089478859390363, |
|
"loss": 1.1427, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_loss": 0.36938655376434326, |
|
"eval_runtime": 326.2453, |
|
"eval_samples_per_second": 25.444, |
|
"eval_steps_per_second": 0.797, |
|
"eval_wer": 0.3571614470373905, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0001489282202556539, |
|
"loss": 1.1393, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00014696165191740415, |
|
"loss": 1.1182, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.0001449950835791544, |
|
"loss": 1.127, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00014302851524090464, |
|
"loss": 1.1431, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.0001410619469026549, |
|
"loss": 1.1372, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_loss": 0.3567572236061096, |
|
"eval_runtime": 325.458, |
|
"eval_samples_per_second": 25.506, |
|
"eval_steps_per_second": 0.799, |
|
"eval_wer": 0.3500824151990978, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0001390953785644051, |
|
"loss": 1.1226, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00013712881022615535, |
|
"loss": 1.1019, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.0001351622418879056, |
|
"loss": 1.1031, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00013321533923303834, |
|
"loss": 1.0882, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.00013124877089478858, |
|
"loss": 1.0831, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 0.3331395387649536, |
|
"eval_runtime": 327.2346, |
|
"eval_samples_per_second": 25.367, |
|
"eval_steps_per_second": 0.795, |
|
"eval_wer": 0.3253058037650733, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00012928220255653886, |
|
"loss": 1.1039, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0001273156342182891, |
|
"loss": 1.0683, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00012534906588003935, |
|
"loss": 1.0773, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.0001233824975417896, |
|
"loss": 1.1002, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00012141592920353984, |
|
"loss": 1.1074, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 0.333199679851532, |
|
"eval_runtime": 329.8287, |
|
"eval_samples_per_second": 25.168, |
|
"eval_steps_per_second": 0.788, |
|
"eval_wer": 0.3352129782250369, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00011944936086529008, |
|
"loss": 1.0341, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00011748279252704033, |
|
"loss": 1.0812, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.00011551622418879056, |
|
"loss": 1.0709, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00011354965585054081, |
|
"loss": 1.0843, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.00011158308751229105, |
|
"loss": 1.0536, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 0.3130946755409241, |
|
"eval_runtime": 327.9895, |
|
"eval_samples_per_second": 25.309, |
|
"eval_steps_per_second": 0.793, |
|
"eval_wer": 0.3151557213498742, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.0001096165191740413, |
|
"loss": 1.0239, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00010764995083579154, |
|
"loss": 1.0383, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.00010568338249754179, |
|
"loss": 1.0157, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.00010371681415929205, |
|
"loss": 1.0128, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.0001017502458210423, |
|
"loss": 1.0248, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 0.30239033699035645, |
|
"eval_runtime": 328.0553, |
|
"eval_samples_per_second": 25.304, |
|
"eval_steps_per_second": 0.793, |
|
"eval_wer": 0.3022642491541598, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 9.978367748279254e-05, |
|
"loss": 0.9989, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 9.781710914454277e-05, |
|
"loss": 1.0151, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 9.585054080629302e-05, |
|
"loss": 0.9914, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 9.388397246804326e-05, |
|
"loss": 0.9893, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 9.193706981317602e-05, |
|
"loss": 1.0075, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.2947603166103363, |
|
"eval_runtime": 326.8764, |
|
"eval_samples_per_second": 25.395, |
|
"eval_steps_per_second": 0.795, |
|
"eval_wer": 0.3028368179057864, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 8.997050147492626e-05, |
|
"loss": 0.9851, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 8.800393313667651e-05, |
|
"loss": 0.973, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 8.605703048180925e-05, |
|
"loss": 0.9623, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 8.40904621435595e-05, |
|
"loss": 0.9598, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 8.212389380530974e-05, |
|
"loss": 0.979, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_loss": 0.27962473034858704, |
|
"eval_runtime": 329.4084, |
|
"eval_samples_per_second": 25.2, |
|
"eval_steps_per_second": 0.789, |
|
"eval_wer": 0.2852953934241346, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 8.015732546705999e-05, |
|
"loss": 0.9582, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 7.819075712881023e-05, |
|
"loss": 0.9467, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 7.622418879056048e-05, |
|
"loss": 0.907, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 7.425762045231072e-05, |
|
"loss": 0.944, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 7.229105211406097e-05, |
|
"loss": 0.9594, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"eval_loss": 0.2719425559043884, |
|
"eval_runtime": 329.4201, |
|
"eval_samples_per_second": 25.199, |
|
"eval_steps_per_second": 0.789, |
|
"eval_wer": 0.2789103843150863, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 7.032448377581121e-05, |
|
"loss": 0.9559, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 6.835791543756146e-05, |
|
"loss": 0.9368, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 6.63913470993117e-05, |
|
"loss": 0.9362, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 6.442477876106195e-05, |
|
"loss": 0.9134, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 6.24582104228122e-05, |
|
"loss": 0.9172, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"eval_loss": 0.2620205879211426, |
|
"eval_runtime": 325.6384, |
|
"eval_samples_per_second": 25.491, |
|
"eval_steps_per_second": 0.798, |
|
"eval_wer": 0.26950637633382496, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 6.049164208456244e-05, |
|
"loss": 0.9179, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 5.8525073746312686e-05, |
|
"loss": 0.9157, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 5.655850540806293e-05, |
|
"loss": 0.8974, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 5.459193706981318e-05, |
|
"loss": 0.8994, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 5.262536873156343e-05, |
|
"loss": 0.9047, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_loss": 0.25371646881103516, |
|
"eval_runtime": 328.7713, |
|
"eval_samples_per_second": 25.249, |
|
"eval_steps_per_second": 0.791, |
|
"eval_wer": 0.2596165524420925, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 5.065880039331367e-05, |
|
"loss": 0.8816, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 4.869223205506391e-05, |
|
"loss": 0.9007, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 4.672566371681416e-05, |
|
"loss": 0.9035, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 4.475909537856441e-05, |
|
"loss": 0.8705, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 4.279252704031465e-05, |
|
"loss": 0.8777, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"eval_loss": 0.24379895627498627, |
|
"eval_runtime": 329.8391, |
|
"eval_samples_per_second": 25.167, |
|
"eval_steps_per_second": 0.788, |
|
"eval_wer": 0.25250281946733755, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 4.0825958702064895e-05, |
|
"loss": 0.8734, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 3.887905604719764e-05, |
|
"loss": 0.8886, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 3.691248770894789e-05, |
|
"loss": 0.8626, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 3.4945919370698134e-05, |
|
"loss": 0.8669, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 3.297935103244838e-05, |
|
"loss": 0.8629, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"eval_loss": 0.2408979833126068, |
|
"eval_runtime": 327.8875, |
|
"eval_samples_per_second": 25.317, |
|
"eval_steps_per_second": 0.793, |
|
"eval_wer": 0.24934501604927561, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 3.1012782694198625e-05, |
|
"loss": 0.8489, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 2.904621435594887e-05, |
|
"loss": 0.8356, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 2.7079646017699116e-05, |
|
"loss": 0.8596, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 2.5113077679449358e-05, |
|
"loss": 0.8401, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 2.3146509341199607e-05, |
|
"loss": 0.8575, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_loss": 0.2366442084312439, |
|
"eval_runtime": 327.7324, |
|
"eval_samples_per_second": 25.329, |
|
"eval_steps_per_second": 0.793, |
|
"eval_wer": 0.24396633989763164, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 2.1179941002949856e-05, |
|
"loss": 0.8343, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 1.9213372664700098e-05, |
|
"loss": 0.8308, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 1.7246804326450343e-05, |
|
"loss": 0.8431, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 1.5280235988200592e-05, |
|
"loss": 0.8468, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 1.3313667649950836e-05, |
|
"loss": 0.8361, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"eval_loss": 0.23166431486606598, |
|
"eval_runtime": 329.2059, |
|
"eval_samples_per_second": 25.215, |
|
"eval_steps_per_second": 0.79, |
|
"eval_wer": 0.23848356033660104, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 1.1347099311701081e-05, |
|
"loss": 0.824, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 9.380530973451327e-06, |
|
"loss": 0.8252, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 7.4336283185840714e-06, |
|
"loss": 0.8286, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 5.467059980334317e-06, |
|
"loss": 0.8077, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 3.500491642084563e-06, |
|
"loss": 0.8126, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"eval_loss": 0.22902172803878784, |
|
"eval_runtime": 327.6124, |
|
"eval_samples_per_second": 25.338, |
|
"eval_steps_per_second": 0.794, |
|
"eval_wer": 0.23818860067667216, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 1.5339233038348083e-06, |
|
"loss": 0.8118, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 11670, |
|
"total_flos": 4.014305196020058e+19, |
|
"train_loss": 1.3307904394651542, |
|
"train_runtime": 23640.5221, |
|
"train_samples_per_second": 15.785, |
|
"train_steps_per_second": 0.494 |
|
} |
|
], |
|
"max_steps": 11670, |
|
"num_train_epochs": 15, |
|
"total_flos": 4.014305196020058e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|