{ "best_metric": 0.3793087899684906, "best_model_checkpoint": "/data/users/yanyang/Projects/COCO_Caption_Refine/debug/git/2023-09-11-23-23-40/checkpoint-4800", "epoch": 2.9702970297029703, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.948432343234324e-05, "loss": 6.0088, "step": 50 }, { "epoch": 0.03, "eval_loss": 2.9973134994506836, "eval_runtime": 2.3906, "eval_samples_per_second": 13.386, "eval_steps_per_second": 6.693, "eval_wer_score": 3.802087864288821, "step": 50 }, { "epoch": 0.06, "learning_rate": 4.8968646864686466e-05, "loss": 1.3576, "step": 100 }, { "epoch": 0.06, "eval_loss": 0.5444818139076233, "eval_runtime": 2.2435, "eval_samples_per_second": 14.264, "eval_steps_per_second": 7.132, "eval_wer_score": 3.5702479338842976, "step": 100 }, { "epoch": 0.09, "learning_rate": 4.8452970297029704e-05, "loss": 0.5214, "step": 150 }, { "epoch": 0.09, "eval_loss": 0.4757494628429413, "eval_runtime": 2.3157, "eval_samples_per_second": 13.818, "eval_steps_per_second": 6.909, "eval_wer_score": 4.052196607220531, "step": 150 }, { "epoch": 0.12, "learning_rate": 4.793729372937294e-05, "loss": 0.4845, "step": 200 }, { "epoch": 0.12, "eval_loss": 0.4562256336212158, "eval_runtime": 2.3849, "eval_samples_per_second": 13.418, "eval_steps_per_second": 6.709, "eval_wer_score": 4.051326663766855, "step": 200 }, { "epoch": 0.15, "learning_rate": 4.7421617161716174e-05, "loss": 0.4694, "step": 250 }, { "epoch": 0.15, "eval_loss": 0.4434777796268463, "eval_runtime": 2.3886, "eval_samples_per_second": 13.397, "eval_steps_per_second": 6.698, "eval_wer_score": 4.0582862113962594, "step": 250 }, { "epoch": 0.19, "learning_rate": 4.6905940594059406e-05, "loss": 0.4598, "step": 300 }, { "epoch": 0.19, "eval_loss": 0.43529027700424194, "eval_runtime": 2.3401, "eval_samples_per_second": 13.674, "eval_steps_per_second": 6.837, "eval_wer_score": 4.02131361461505, "step": 300 }, { "epoch": 0.22, "learning_rate": 4.6390264026402644e-05, "loss": 0.4499, "step": 350 }, { "epoch": 0.22, "eval_loss": 0.4303966462612152, "eval_runtime": 2.386, "eval_samples_per_second": 13.412, "eval_steps_per_second": 6.706, "eval_wer_score": 4.097433666811657, "step": 350 }, { "epoch": 0.25, "learning_rate": 4.5874587458745876e-05, "loss": 0.4444, "step": 400 }, { "epoch": 0.25, "eval_loss": 0.4260537624359131, "eval_runtime": 2.2892, "eval_samples_per_second": 13.979, "eval_steps_per_second": 6.989, "eval_wer_score": 4.073510221835581, "step": 400 }, { "epoch": 0.28, "learning_rate": 4.5358910891089114e-05, "loss": 0.4419, "step": 450 }, { "epoch": 0.28, "eval_loss": 0.4221154451370239, "eval_runtime": 2.3303, "eval_samples_per_second": 13.732, "eval_steps_per_second": 6.866, "eval_wer_score": 4.101348412353197, "step": 450 }, { "epoch": 0.31, "learning_rate": 4.4843234323432346e-05, "loss": 0.439, "step": 500 }, { "epoch": 0.31, "eval_loss": 0.42153507471084595, "eval_runtime": 2.3513, "eval_samples_per_second": 13.61, "eval_steps_per_second": 6.805, "eval_wer_score": 4.062200956937799, "step": 500 }, { "epoch": 0.34, "learning_rate": 4.432755775577558e-05, "loss": 0.4339, "step": 550 }, { "epoch": 0.34, "eval_loss": 0.42045870423316956, "eval_runtime": 2.2803, "eval_samples_per_second": 14.034, "eval_steps_per_second": 7.017, "eval_wer_score": 4.056546324488909, "step": 550 }, { "epoch": 0.37, "learning_rate": 4.3811881188118816e-05, "loss": 0.4295, "step": 600 }, { "epoch": 0.37, "eval_loss": 0.4192642569541931, "eval_runtime": 2.3998, "eval_samples_per_second": 13.335, "eval_steps_per_second": 6.667, "eval_wer_score": 4.061765985210961, "step": 600 }, { "epoch": 0.4, "learning_rate": 4.329620462046205e-05, "loss": 0.4275, "step": 650 }, { "epoch": 0.4, "eval_loss": 0.4134916663169861, "eval_runtime": 1.7005, "eval_samples_per_second": 18.818, "eval_steps_per_second": 9.409, "eval_wer_score": 4.1200521966072206, "step": 650 }, { "epoch": 0.43, "learning_rate": 4.278052805280528e-05, "loss": 0.4278, "step": 700 }, { "epoch": 0.43, "eval_loss": 0.4126208424568176, "eval_runtime": 2.7168, "eval_samples_per_second": 11.779, "eval_steps_per_second": 5.889, "eval_wer_score": 4.035232709873858, "step": 700 }, { "epoch": 0.46, "learning_rate": 4.226485148514852e-05, "loss": 0.4251, "step": 750 }, { "epoch": 0.46, "eval_loss": 0.41297051310539246, "eval_runtime": 2.3567, "eval_samples_per_second": 13.578, "eval_steps_per_second": 6.789, "eval_wer_score": 4.082644628099174, "step": 750 }, { "epoch": 0.5, "learning_rate": 4.174917491749175e-05, "loss": 0.4224, "step": 800 }, { "epoch": 0.5, "eval_loss": 0.41018015146255493, "eval_runtime": 1.6739, "eval_samples_per_second": 19.117, "eval_steps_per_second": 9.558, "eval_wer_score": 4.034362766420183, "step": 800 }, { "epoch": 0.53, "learning_rate": 4.123349834983499e-05, "loss": 0.4186, "step": 850 }, { "epoch": 0.53, "eval_loss": 0.40729203820228577, "eval_runtime": 3.0395, "eval_samples_per_second": 10.528, "eval_steps_per_second": 5.264, "eval_wer_score": 4.054806437581557, "step": 850 }, { "epoch": 0.56, "learning_rate": 4.071782178217822e-05, "loss": 0.4186, "step": 900 }, { "epoch": 0.56, "eval_loss": 0.4050724506378174, "eval_runtime": 1.6968, "eval_samples_per_second": 18.859, "eval_steps_per_second": 9.429, "eval_wer_score": 4.0669856459330145, "step": 900 }, { "epoch": 0.59, "learning_rate": 4.020214521452145e-05, "loss": 0.4159, "step": 950 }, { "epoch": 0.59, "eval_loss": 0.4054299592971802, "eval_runtime": 2.3306, "eval_samples_per_second": 13.73, "eval_steps_per_second": 6.865, "eval_wer_score": 4.100043497172684, "step": 950 }, { "epoch": 0.62, "learning_rate": 3.968646864686469e-05, "loss": 0.4157, "step": 1000 }, { "epoch": 0.62, "eval_loss": 0.4036748707294464, "eval_runtime": 2.3387, "eval_samples_per_second": 13.683, "eval_steps_per_second": 6.841, "eval_wer_score": 4.08133971291866, "step": 1000 }, { "epoch": 0.65, "learning_rate": 3.917079207920793e-05, "loss": 0.4141, "step": 1050 }, { "epoch": 0.65, "eval_loss": 0.40399593114852905, "eval_runtime": 1.6717, "eval_samples_per_second": 19.142, "eval_steps_per_second": 9.571, "eval_wer_score": 4.090474119182253, "step": 1050 }, { "epoch": 0.68, "learning_rate": 3.865511551155115e-05, "loss": 0.4119, "step": 1100 }, { "epoch": 0.68, "eval_loss": 0.4020461142063141, "eval_runtime": 2.2547, "eval_samples_per_second": 14.192, "eval_steps_per_second": 7.096, "eval_wer_score": 4.063505872118312, "step": 1100 }, { "epoch": 0.71, "learning_rate": 3.813943894389439e-05, "loss": 0.4104, "step": 1150 }, { "epoch": 0.71, "eval_loss": 0.39923593401908875, "eval_runtime": 2.3937, "eval_samples_per_second": 13.368, "eval_steps_per_second": 6.684, "eval_wer_score": 4.0487168334058286, "step": 1150 }, { "epoch": 0.74, "learning_rate": 3.762376237623763e-05, "loss": 0.4104, "step": 1200 }, { "epoch": 0.74, "eval_loss": 0.39828553795814514, "eval_runtime": 1.6946, "eval_samples_per_second": 18.883, "eval_steps_per_second": 9.442, "eval_wer_score": 4.107438016528926, "step": 1200 }, { "epoch": 0.77, "learning_rate": 3.710808580858086e-05, "loss": 0.4086, "step": 1250 }, { "epoch": 0.77, "eval_loss": 0.3990360200405121, "eval_runtime": 2.3013, "eval_samples_per_second": 13.905, "eval_steps_per_second": 6.953, "eval_wer_score": 4.1022183558068726, "step": 1250 }, { "epoch": 0.8, "learning_rate": 3.659240924092409e-05, "loss": 0.4082, "step": 1300 }, { "epoch": 0.8, "eval_loss": 0.39795249700546265, "eval_runtime": 2.3895, "eval_samples_per_second": 13.392, "eval_steps_per_second": 6.696, "eval_wer_score": 4.086124401913875, "step": 1300 }, { "epoch": 0.84, "learning_rate": 3.607673267326733e-05, "loss": 0.4071, "step": 1350 }, { "epoch": 0.84, "eval_loss": 0.39599987864494324, "eval_runtime": 1.6611, "eval_samples_per_second": 19.265, "eval_steps_per_second": 9.632, "eval_wer_score": 4.075250108742932, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.556105610561056e-05, "loss": 0.4057, "step": 1400 }, { "epoch": 0.87, "eval_loss": 0.39804807305336, "eval_runtime": 2.3409, "eval_samples_per_second": 13.67, "eval_steps_per_second": 6.835, "eval_wer_score": 4.056546324488909, "step": 1400 }, { "epoch": 0.9, "learning_rate": 3.50453795379538e-05, "loss": 0.4067, "step": 1450 }, { "epoch": 0.9, "eval_loss": 0.3962915241718292, "eval_runtime": 2.295, "eval_samples_per_second": 13.944, "eval_steps_per_second": 6.972, "eval_wer_score": 4.103088299260548, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.452970297029703e-05, "loss": 0.404, "step": 1500 }, { "epoch": 0.93, "eval_loss": 0.3967008888721466, "eval_runtime": 2.3182, "eval_samples_per_second": 13.804, "eval_steps_per_second": 6.902, "eval_wer_score": 4.110047846889952, "step": 1500 }, { "epoch": 0.96, "learning_rate": 3.4014026402640264e-05, "loss": 0.4019, "step": 1550 }, { "epoch": 0.96, "eval_loss": 0.39423680305480957, "eval_runtime": 2.2298, "eval_samples_per_second": 14.351, "eval_steps_per_second": 7.176, "eval_wer_score": 4.082644628099174, "step": 1550 }, { "epoch": 0.99, "learning_rate": 3.34983498349835e-05, "loss": 0.4002, "step": 1600 }, { "epoch": 0.99, "eval_loss": 0.39309296011924744, "eval_runtime": 2.3184, "eval_samples_per_second": 13.803, "eval_steps_per_second": 6.901, "eval_wer_score": 4.07742496737712, "step": 1600 }, { "epoch": 1.02, "learning_rate": 3.2982673267326734e-05, "loss": 0.3975, "step": 1650 }, { "epoch": 1.02, "eval_loss": 0.39335134625434875, "eval_runtime": 2.2755, "eval_samples_per_second": 14.063, "eval_steps_per_second": 7.032, "eval_wer_score": 4.10656807307525, "step": 1650 }, { "epoch": 1.05, "learning_rate": 3.2466996699669965e-05, "loss": 0.3932, "step": 1700 }, { "epoch": 1.05, "eval_loss": 0.3922082781791687, "eval_runtime": 2.3564, "eval_samples_per_second": 13.58, "eval_steps_per_second": 6.79, "eval_wer_score": 4.048281861678991, "step": 1700 }, { "epoch": 1.08, "learning_rate": 3.1951320132013203e-05, "loss": 0.3922, "step": 1750 }, { "epoch": 1.08, "eval_loss": 0.392187237739563, "eval_runtime": 2.3537, "eval_samples_per_second": 13.596, "eval_steps_per_second": 6.798, "eval_wer_score": 4.079164854284471, "step": 1750 }, { "epoch": 1.11, "learning_rate": 3.1435643564356435e-05, "loss": 0.3929, "step": 1800 }, { "epoch": 1.11, "eval_loss": 0.39260441064834595, "eval_runtime": 2.3232, "eval_samples_per_second": 13.774, "eval_steps_per_second": 6.887, "eval_wer_score": 4.020443671161375, "step": 1800 }, { "epoch": 1.14, "learning_rate": 3.0919966996699673e-05, "loss": 0.3914, "step": 1850 }, { "epoch": 1.14, "eval_loss": 0.3927755355834961, "eval_runtime": 2.2418, "eval_samples_per_second": 14.274, "eval_steps_per_second": 7.137, "eval_wer_score": 4.090474119182253, "step": 1850 }, { "epoch": 1.18, "learning_rate": 3.0404290429042902e-05, "loss": 0.3911, "step": 1900 }, { "epoch": 1.18, "eval_loss": 0.39090630412101746, "eval_runtime": 2.3278, "eval_samples_per_second": 13.747, "eval_steps_per_second": 6.873, "eval_wer_score": 4.040887342322749, "step": 1900 }, { "epoch": 1.21, "learning_rate": 2.988861386138614e-05, "loss": 0.3903, "step": 1950 }, { "epoch": 1.21, "eval_loss": 0.3916274607181549, "eval_runtime": 2.2878, "eval_samples_per_second": 13.987, "eval_steps_per_second": 6.994, "eval_wer_score": 4.0426272292301, "step": 1950 }, { "epoch": 1.24, "learning_rate": 2.9372937293729375e-05, "loss": 0.3906, "step": 2000 }, { "epoch": 1.24, "eval_loss": 0.39041662216186523, "eval_runtime": 2.3191, "eval_samples_per_second": 13.798, "eval_steps_per_second": 6.899, "eval_wer_score": 4.063505872118312, "step": 2000 }, { "epoch": 1.27, "learning_rate": 2.885726072607261e-05, "loss": 0.3887, "step": 2050 }, { "epoch": 1.27, "eval_loss": 0.39140254259109497, "eval_runtime": 2.3005, "eval_samples_per_second": 13.91, "eval_steps_per_second": 6.955, "eval_wer_score": 4.08133971291866, "step": 2050 }, { "epoch": 1.3, "learning_rate": 2.834158415841584e-05, "loss": 0.3899, "step": 2100 }, { "epoch": 1.3, "eval_loss": 0.39112603664398193, "eval_runtime": 2.3634, "eval_samples_per_second": 13.54, "eval_steps_per_second": 6.77, "eval_wer_score": 4.0491518051326665, "step": 2100 }, { "epoch": 1.33, "learning_rate": 2.7825907590759077e-05, "loss": 0.389, "step": 2150 }, { "epoch": 1.33, "eval_loss": 0.3917953073978424, "eval_runtime": 2.2409, "eval_samples_per_second": 14.28, "eval_steps_per_second": 7.14, "eval_wer_score": 4.098738581992171, "step": 2150 }, { "epoch": 1.36, "learning_rate": 2.731023102310231e-05, "loss": 0.3878, "step": 2200 }, { "epoch": 1.36, "eval_loss": 0.38973185420036316, "eval_runtime": 2.3148, "eval_samples_per_second": 13.824, "eval_steps_per_second": 6.912, "eval_wer_score": 4.091779034362767, "step": 2200 }, { "epoch": 1.39, "learning_rate": 2.6794554455445547e-05, "loss": 0.3877, "step": 2250 }, { "epoch": 1.39, "eval_loss": 0.387917160987854, "eval_runtime": 2.3247, "eval_samples_per_second": 13.765, "eval_steps_per_second": 6.883, "eval_wer_score": 4.068290561113527, "step": 2250 }, { "epoch": 1.42, "learning_rate": 2.6278877887788778e-05, "loss": 0.3871, "step": 2300 }, { "epoch": 1.42, "eval_loss": 0.3881215453147888, "eval_runtime": 2.2878, "eval_samples_per_second": 13.987, "eval_steps_per_second": 6.994, "eval_wer_score": 4.076989995650282, "step": 2300 }, { "epoch": 1.45, "learning_rate": 2.5763201320132013e-05, "loss": 0.387, "step": 2350 }, { "epoch": 1.45, "eval_loss": 0.3859871029853821, "eval_runtime": 2.3063, "eval_samples_per_second": 13.875, "eval_steps_per_second": 6.937, "eval_wer_score": 4.069160504567203, "step": 2350 }, { "epoch": 1.49, "learning_rate": 2.5247524752475248e-05, "loss": 0.3887, "step": 2400 }, { "epoch": 1.49, "eval_loss": 0.38679689168930054, "eval_runtime": 2.3448, "eval_samples_per_second": 13.647, "eval_steps_per_second": 6.824, "eval_wer_score": 4.063070900391475, "step": 2400 }, { "epoch": 1.52, "learning_rate": 2.4731848184818483e-05, "loss": 0.3847, "step": 2450 }, { "epoch": 1.52, "eval_loss": 0.3876645267009735, "eval_runtime": 1.6183, "eval_samples_per_second": 19.774, "eval_steps_per_second": 9.887, "eval_wer_score": 4.091344062635929, "step": 2450 }, { "epoch": 1.55, "learning_rate": 2.4216171617161718e-05, "loss": 0.3854, "step": 2500 }, { "epoch": 1.55, "eval_loss": 0.3880044221878052, "eval_runtime": 1.6125, "eval_samples_per_second": 19.845, "eval_steps_per_second": 9.923, "eval_wer_score": 4.040887342322749, "step": 2500 }, { "epoch": 1.58, "learning_rate": 2.370049504950495e-05, "loss": 0.3841, "step": 2550 }, { "epoch": 1.58, "eval_loss": 0.38710057735443115, "eval_runtime": 1.6138, "eval_samples_per_second": 19.829, "eval_steps_per_second": 9.914, "eval_wer_score": 4.064810787298826, "step": 2550 }, { "epoch": 1.61, "learning_rate": 2.3184818481848185e-05, "loss": 0.384, "step": 2600 }, { "epoch": 1.61, "eval_loss": 0.3867349326610565, "eval_runtime": 1.6339, "eval_samples_per_second": 19.585, "eval_steps_per_second": 9.793, "eval_wer_score": 4.063070900391475, "step": 2600 }, { "epoch": 1.64, "learning_rate": 2.266914191419142e-05, "loss": 0.3839, "step": 2650 }, { "epoch": 1.64, "eval_loss": 0.38524535298347473, "eval_runtime": 1.6377, "eval_samples_per_second": 19.539, "eval_steps_per_second": 9.77, "eval_wer_score": 4.076989995650282, "step": 2650 }, { "epoch": 1.67, "learning_rate": 2.2153465346534655e-05, "loss": 0.3833, "step": 2700 }, { "epoch": 1.67, "eval_loss": 0.3858553171157837, "eval_runtime": 1.6388, "eval_samples_per_second": 19.526, "eval_steps_per_second": 9.763, "eval_wer_score": 4.033492822966507, "step": 2700 }, { "epoch": 1.7, "learning_rate": 2.1637788778877886e-05, "loss": 0.3819, "step": 2750 }, { "epoch": 1.7, "eval_loss": 0.38464829325675964, "eval_runtime": 1.6686, "eval_samples_per_second": 19.178, "eval_steps_per_second": 9.589, "eval_wer_score": 4.073510221835581, "step": 2750 }, { "epoch": 1.73, "learning_rate": 2.1122112211221125e-05, "loss": 0.3831, "step": 2800 }, { "epoch": 1.73, "eval_loss": 0.3838270902633667, "eval_runtime": 1.6915, "eval_samples_per_second": 18.918, "eval_steps_per_second": 9.459, "eval_wer_score": 4.0678555893866895, "step": 2800 }, { "epoch": 1.76, "learning_rate": 2.0606435643564356e-05, "loss": 0.3817, "step": 2850 }, { "epoch": 1.76, "eval_loss": 0.38364166021347046, "eval_runtime": 1.6576, "eval_samples_per_second": 19.305, "eval_steps_per_second": 9.653, "eval_wer_score": 4.051761635493693, "step": 2850 }, { "epoch": 1.79, "learning_rate": 2.009075907590759e-05, "loss": 0.3811, "step": 2900 }, { "epoch": 1.79, "eval_loss": 0.38430899381637573, "eval_runtime": 1.6858, "eval_samples_per_second": 18.982, "eval_steps_per_second": 9.491, "eval_wer_score": 4.053936494127882, "step": 2900 }, { "epoch": 1.83, "learning_rate": 1.9575082508250826e-05, "loss": 0.3821, "step": 2950 }, { "epoch": 1.83, "eval_loss": 0.3839759826660156, "eval_runtime": 1.6393, "eval_samples_per_second": 19.52, "eval_steps_per_second": 9.76, "eval_wer_score": 4.08133971291866, "step": 2950 }, { "epoch": 1.86, "learning_rate": 1.905940594059406e-05, "loss": 0.3803, "step": 3000 }, { "epoch": 1.86, "eval_loss": 0.3829007148742676, "eval_runtime": 1.757, "eval_samples_per_second": 18.212, "eval_steps_per_second": 9.106, "eval_wer_score": 4.043932144410613, "step": 3000 }, { "epoch": 1.89, "learning_rate": 1.8543729372937293e-05, "loss": 0.3804, "step": 3050 }, { "epoch": 1.89, "eval_loss": 0.3814784288406372, "eval_runtime": 1.7626, "eval_samples_per_second": 18.155, "eval_steps_per_second": 9.078, "eval_wer_score": 4.068725532840365, "step": 3050 }, { "epoch": 1.92, "learning_rate": 1.8028052805280528e-05, "loss": 0.3803, "step": 3100 }, { "epoch": 1.92, "eval_loss": 0.382568895816803, "eval_runtime": 1.7262, "eval_samples_per_second": 18.538, "eval_steps_per_second": 9.269, "eval_wer_score": 4.033927794693345, "step": 3100 }, { "epoch": 1.95, "learning_rate": 1.7512376237623763e-05, "loss": 0.3774, "step": 3150 }, { "epoch": 1.95, "eval_loss": 0.38378775119781494, "eval_runtime": 1.776, "eval_samples_per_second": 18.018, "eval_steps_per_second": 9.009, "eval_wer_score": 4.053936494127882, "step": 3150 }, { "epoch": 1.98, "learning_rate": 1.6996699669966998e-05, "loss": 0.379, "step": 3200 }, { "epoch": 1.98, "eval_loss": 0.3831249475479126, "eval_runtime": 1.7324, "eval_samples_per_second": 18.471, "eval_steps_per_second": 9.236, "eval_wer_score": 4.062635928664637, "step": 3200 }, { "epoch": 2.01, "learning_rate": 1.648102310231023e-05, "loss": 0.3779, "step": 3250 }, { "epoch": 2.01, "eval_loss": 0.3829612135887146, "eval_runtime": 1.7483, "eval_samples_per_second": 18.304, "eval_steps_per_second": 9.152, "eval_wer_score": 4.070465419747716, "step": 3250 }, { "epoch": 2.04, "learning_rate": 1.5965346534653468e-05, "loss": 0.3714, "step": 3300 }, { "epoch": 2.04, "eval_loss": 0.38411805033683777, "eval_runtime": 1.7491, "eval_samples_per_second": 18.295, "eval_steps_per_second": 9.147, "eval_wer_score": 4.074380165289257, "step": 3300 }, { "epoch": 2.07, "learning_rate": 1.54496699669967e-05, "loss": 0.3718, "step": 3350 }, { "epoch": 2.07, "eval_loss": 0.38236159086227417, "eval_runtime": 1.7284, "eval_samples_per_second": 18.514, "eval_steps_per_second": 9.257, "eval_wer_score": 4.076555023923445, "step": 3350 }, { "epoch": 2.1, "learning_rate": 1.4933993399339935e-05, "loss": 0.3705, "step": 3400 }, { "epoch": 2.1, "eval_loss": 0.3845175504684448, "eval_runtime": 1.748, "eval_samples_per_second": 18.307, "eval_steps_per_second": 9.153, "eval_wer_score": 4.075250108742932, "step": 3400 }, { "epoch": 2.13, "learning_rate": 1.4418316831683168e-05, "loss": 0.3719, "step": 3450 }, { "epoch": 2.13, "eval_loss": 0.382493793964386, "eval_runtime": 1.7164, "eval_samples_per_second": 18.643, "eval_steps_per_second": 9.322, "eval_wer_score": 4.0848194867333625, "step": 3450 }, { "epoch": 2.17, "learning_rate": 1.3902640264026403e-05, "loss": 0.3715, "step": 3500 }, { "epoch": 2.17, "eval_loss": 0.3831576108932495, "eval_runtime": 1.7528, "eval_samples_per_second": 18.257, "eval_steps_per_second": 9.128, "eval_wer_score": 4.061765985210961, "step": 3500 }, { "epoch": 2.2, "learning_rate": 1.3386963696369636e-05, "loss": 0.3704, "step": 3550 }, { "epoch": 2.2, "eval_loss": 0.38182923197746277, "eval_runtime": 1.8646, "eval_samples_per_second": 17.161, "eval_steps_per_second": 8.581, "eval_wer_score": 4.0843845150065246, "step": 3550 }, { "epoch": 2.23, "learning_rate": 1.2871287128712873e-05, "loss": 0.3703, "step": 3600 }, { "epoch": 2.23, "eval_loss": 0.38130998611450195, "eval_runtime": 1.7229, "eval_samples_per_second": 18.574, "eval_steps_per_second": 9.287, "eval_wer_score": 4.063070900391475, "step": 3600 }, { "epoch": 2.26, "learning_rate": 1.2355610561056106e-05, "loss": 0.3697, "step": 3650 }, { "epoch": 2.26, "eval_loss": 0.381599485874176, "eval_runtime": 1.7655, "eval_samples_per_second": 18.126, "eval_steps_per_second": 9.063, "eval_wer_score": 4.073510221835581, "step": 3650 }, { "epoch": 2.29, "learning_rate": 1.1839933993399341e-05, "loss": 0.3706, "step": 3700 }, { "epoch": 2.29, "eval_loss": 0.3811095952987671, "eval_runtime": 1.7447, "eval_samples_per_second": 18.341, "eval_steps_per_second": 9.171, "eval_wer_score": 4.06394084384515, "step": 3700 }, { "epoch": 2.32, "learning_rate": 1.1324257425742574e-05, "loss": 0.3709, "step": 3750 }, { "epoch": 2.32, "eval_loss": 0.3830665946006775, "eval_runtime": 1.7245, "eval_samples_per_second": 18.556, "eval_steps_per_second": 9.278, "eval_wer_score": 4.070900391474554, "step": 3750 }, { "epoch": 2.35, "learning_rate": 1.080858085808581e-05, "loss": 0.3698, "step": 3800 }, { "epoch": 2.35, "eval_loss": 0.38286030292510986, "eval_runtime": 1.7434, "eval_samples_per_second": 18.355, "eval_steps_per_second": 9.178, "eval_wer_score": 4.066115702479339, "step": 3800 }, { "epoch": 2.38, "learning_rate": 1.0292904290429044e-05, "loss": 0.3692, "step": 3850 }, { "epoch": 2.38, "eval_loss": 0.38252702355384827, "eval_runtime": 1.7413, "eval_samples_per_second": 18.377, "eval_steps_per_second": 9.189, "eval_wer_score": 4.063070900391475, "step": 3850 }, { "epoch": 2.41, "learning_rate": 9.777227722772278e-06, "loss": 0.3695, "step": 3900 }, { "epoch": 2.41, "eval_loss": 0.3817928731441498, "eval_runtime": 1.7445, "eval_samples_per_second": 18.343, "eval_steps_per_second": 9.172, "eval_wer_score": 4.0665506742061766, "step": 3900 }, { "epoch": 2.44, "learning_rate": 9.261551155115513e-06, "loss": 0.3678, "step": 3950 }, { "epoch": 2.44, "eval_loss": 0.38285502791404724, "eval_runtime": 1.7607, "eval_samples_per_second": 18.174, "eval_steps_per_second": 9.087, "eval_wer_score": 4.053936494127882, "step": 3950 }, { "epoch": 2.48, "learning_rate": 8.745874587458746e-06, "loss": 0.3679, "step": 4000 }, { "epoch": 2.48, "eval_loss": 0.38176512718200684, "eval_runtime": 1.731, "eval_samples_per_second": 18.487, "eval_steps_per_second": 9.243, "eval_wer_score": 4.0939538929969554, "step": 4000 }, { "epoch": 2.51, "learning_rate": 8.230198019801981e-06, "loss": 0.3692, "step": 4050 }, { "epoch": 2.51, "eval_loss": 0.3833461403846741, "eval_runtime": 1.7399, "eval_samples_per_second": 18.392, "eval_steps_per_second": 9.196, "eval_wer_score": 4.0856894301870375, "step": 4050 }, { "epoch": 2.54, "learning_rate": 7.714521452145216e-06, "loss": 0.3691, "step": 4100 }, { "epoch": 2.54, "eval_loss": 0.381513774394989, "eval_runtime": 1.7336, "eval_samples_per_second": 18.458, "eval_steps_per_second": 9.229, "eval_wer_score": 4.061765985210961, "step": 4100 }, { "epoch": 2.57, "learning_rate": 7.198844884488449e-06, "loss": 0.3674, "step": 4150 }, { "epoch": 2.57, "eval_loss": 0.38187551498413086, "eval_runtime": 1.7502, "eval_samples_per_second": 18.284, "eval_steps_per_second": 9.142, "eval_wer_score": 4.069595476294041, "step": 4150 }, { "epoch": 2.6, "learning_rate": 6.6831683168316835e-06, "loss": 0.3675, "step": 4200 }, { "epoch": 2.6, "eval_loss": 0.38118019700050354, "eval_runtime": 1.7491, "eval_samples_per_second": 18.295, "eval_steps_per_second": 9.147, "eval_wer_score": 4.0491518051326665, "step": 4200 }, { "epoch": 2.63, "learning_rate": 6.167491749174918e-06, "loss": 0.3683, "step": 4250 }, { "epoch": 2.63, "eval_loss": 0.38107800483703613, "eval_runtime": 1.7521, "eval_samples_per_second": 18.264, "eval_steps_per_second": 9.132, "eval_wer_score": 4.057416267942584, "step": 4250 }, { "epoch": 2.66, "learning_rate": 5.651815181518152e-06, "loss": 0.3688, "step": 4300 }, { "epoch": 2.66, "eval_loss": 0.3809460401535034, "eval_runtime": 1.7731, "eval_samples_per_second": 18.047, "eval_steps_per_second": 9.024, "eval_wer_score": 4.06002609830361, "step": 4300 }, { "epoch": 2.69, "learning_rate": 5.136138613861386e-06, "loss": 0.3661, "step": 4350 }, { "epoch": 2.69, "eval_loss": 0.3806704878807068, "eval_runtime": 1.7466, "eval_samples_per_second": 18.321, "eval_steps_per_second": 9.16, "eval_wer_score": 4.070030448020878, "step": 4350 }, { "epoch": 2.72, "learning_rate": 4.62046204620462e-06, "loss": 0.3685, "step": 4400 }, { "epoch": 2.72, "eval_loss": 0.3806542456150055, "eval_runtime": 1.7323, "eval_samples_per_second": 18.473, "eval_steps_per_second": 9.236, "eval_wer_score": 4.0400173988690735, "step": 4400 }, { "epoch": 2.75, "learning_rate": 4.104785478547855e-06, "loss": 0.3665, "step": 4450 }, { "epoch": 2.75, "eval_loss": 0.3803941607475281, "eval_runtime": 1.8056, "eval_samples_per_second": 17.723, "eval_steps_per_second": 8.861, "eval_wer_score": 4.07177033492823, "step": 4450 }, { "epoch": 2.78, "learning_rate": 3.589108910891089e-06, "loss": 0.3665, "step": 4500 }, { "epoch": 2.78, "eval_loss": 0.38016611337661743, "eval_runtime": 1.7258, "eval_samples_per_second": 18.542, "eval_steps_per_second": 9.271, "eval_wer_score": 4.04654197477164, "step": 4500 }, { "epoch": 2.82, "learning_rate": 3.073432343234324e-06, "loss": 0.3651, "step": 4550 }, { "epoch": 2.82, "eval_loss": 0.38055145740509033, "eval_runtime": 1.7219, "eval_samples_per_second": 18.584, "eval_steps_per_second": 9.292, "eval_wer_score": 4.047411918225316, "step": 4550 }, { "epoch": 2.85, "learning_rate": 2.557755775577558e-06, "loss": 0.3658, "step": 4600 }, { "epoch": 2.85, "eval_loss": 0.3797268867492676, "eval_runtime": 1.7777, "eval_samples_per_second": 18.0, "eval_steps_per_second": 9.0, "eval_wer_score": 4.038712483688561, "step": 4600 }, { "epoch": 2.88, "learning_rate": 2.042079207920792e-06, "loss": 0.3661, "step": 4650 }, { "epoch": 2.88, "eval_loss": 0.37951183319091797, "eval_runtime": 1.7219, "eval_samples_per_second": 18.584, "eval_steps_per_second": 9.292, "eval_wer_score": 4.044367116137451, "step": 4650 }, { "epoch": 2.91, "learning_rate": 1.5264026402640265e-06, "loss": 0.3679, "step": 4700 }, { "epoch": 2.91, "eval_loss": 0.37964126467704773, "eval_runtime": 1.7634, "eval_samples_per_second": 18.146, "eval_steps_per_second": 9.073, "eval_wer_score": 4.043062200956938, "step": 4700 }, { "epoch": 2.94, "learning_rate": 1.0107260726072606e-06, "loss": 0.3667, "step": 4750 }, { "epoch": 2.94, "eval_loss": 0.37969282269477844, "eval_runtime": 1.7225, "eval_samples_per_second": 18.577, "eval_steps_per_second": 9.289, "eval_wer_score": 4.039582427142236, "step": 4750 }, { "epoch": 2.97, "learning_rate": 4.950495049504951e-07, "loss": 0.3671, "step": 4800 }, { "epoch": 2.97, "eval_loss": 0.3793087899684906, "eval_runtime": 1.7729, "eval_samples_per_second": 18.049, "eval_steps_per_second": 9.025, "eval_wer_score": 4.044802087864289, "step": 4800 } ], "max_steps": 4848, "num_train_epochs": 3, "total_flos": 1.39557801614635e+18, "trial_name": null, "trial_params": null }