{ "best_metric": 0.3922309875488281, "best_model_checkpoint": "/data/users/yanyang/Projects/COCO_Caption_Refine/debug/git/2023-09-12-11-13-17_git-base/checkpoint-4800", "epoch": 2.9702970297029703, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.948432343234324e-05, "loss": 6.7185, "step": 50 }, { "epoch": 0.03, "eval_loss": 4.150215148925781, "eval_runtime": 2.7394, "eval_samples_per_second": 11.682, "eval_steps_per_second": 5.841, "eval_wer_score": 2.6076555023923444, "step": 50 }, { "epoch": 0.06, "learning_rate": 4.8968646864686466e-05, "loss": 2.2563, "step": 100 }, { "epoch": 0.06, "eval_loss": 0.7511033415794373, "eval_runtime": 2.511, "eval_samples_per_second": 12.744, "eval_steps_per_second": 6.372, "eval_wer_score": 1.8782079164854284, "step": 100 }, { "epoch": 0.09, "learning_rate": 4.8452970297029704e-05, "loss": 0.6084, "step": 150 }, { "epoch": 0.09, "eval_loss": 0.5207427144050598, "eval_runtime": 3.1225, "eval_samples_per_second": 10.248, "eval_steps_per_second": 5.124, "eval_wer_score": 2.3458025228360158, "step": 150 }, { "epoch": 0.12, "learning_rate": 4.793729372937294e-05, "loss": 0.5236, "step": 200 }, { "epoch": 0.12, "eval_loss": 0.48739808797836304, "eval_runtime": 3.4202, "eval_samples_per_second": 9.356, "eval_steps_per_second": 4.678, "eval_wer_score": 2.6250543714658545, "step": 200 }, { "epoch": 0.15, "learning_rate": 4.7421617161716174e-05, "loss": 0.5022, "step": 250 }, { "epoch": 0.15, "eval_loss": 0.4747964143753052, "eval_runtime": 3.5933, "eval_samples_per_second": 8.906, "eval_steps_per_second": 4.453, "eval_wer_score": 2.889952153110048, "step": 250 }, { "epoch": 0.19, "learning_rate": 4.6905940594059406e-05, "loss": 0.489, "step": 300 }, { "epoch": 0.19, "eval_loss": 0.4658946096897125, "eval_runtime": 3.2552, "eval_samples_per_second": 9.83, "eval_steps_per_second": 4.915, "eval_wer_score": 3.058721183123097, "step": 300 }, { "epoch": 0.22, "learning_rate": 4.6390264026402644e-05, "loss": 0.477, "step": 350 }, { "epoch": 0.22, "eval_loss": 0.46055227518081665, "eval_runtime": 3.7594, "eval_samples_per_second": 8.512, "eval_steps_per_second": 4.256, "eval_wer_score": 2.9904306220095696, "step": 350 }, { "epoch": 0.25, "learning_rate": 4.5874587458745876e-05, "loss": 0.4702, "step": 400 }, { "epoch": 0.25, "eval_loss": 0.4569026827812195, "eval_runtime": 3.9386, "eval_samples_per_second": 8.125, "eval_steps_per_second": 4.062, "eval_wer_score": 2.941278816876903, "step": 400 }, { "epoch": 0.28, "learning_rate": 4.5358910891089114e-05, "loss": 0.4673, "step": 450 }, { "epoch": 0.28, "eval_loss": 0.45087775588035583, "eval_runtime": 2.9403, "eval_samples_per_second": 10.883, "eval_steps_per_second": 5.442, "eval_wer_score": 3.01739886907351, "step": 450 }, { "epoch": 0.31, "learning_rate": 4.4843234323432346e-05, "loss": 0.4633, "step": 500 }, { "epoch": 0.31, "eval_loss": 0.4465155601501465, "eval_runtime": 3.3332, "eval_samples_per_second": 9.6, "eval_steps_per_second": 4.8, "eval_wer_score": 3.111787733797303, "step": 500 }, { "epoch": 0.34, "learning_rate": 4.432755775577558e-05, "loss": 0.4579, "step": 550 }, { "epoch": 0.34, "eval_loss": 0.44231322407722473, "eval_runtime": 3.3294, "eval_samples_per_second": 9.611, "eval_steps_per_second": 4.806, "eval_wer_score": 3.1004784688995217, "step": 550 }, { "epoch": 0.37, "learning_rate": 4.3811881188118816e-05, "loss": 0.4522, "step": 600 }, { "epoch": 0.37, "eval_loss": 0.4409943222999573, "eval_runtime": 4.0034, "eval_samples_per_second": 7.993, "eval_steps_per_second": 3.997, "eval_wer_score": 3.08916920400174, "step": 600 }, { "epoch": 0.4, "learning_rate": 4.329620462046205e-05, "loss": 0.4495, "step": 650 }, { "epoch": 0.4, "eval_loss": 0.4370802044868469, "eval_runtime": 2.7858, "eval_samples_per_second": 11.487, "eval_steps_per_second": 5.743, "eval_wer_score": 3.1792083514571554, "step": 650 }, { "epoch": 0.43, "learning_rate": 4.278052805280528e-05, "loss": 0.4498, "step": 700 }, { "epoch": 0.43, "eval_loss": 0.43357548117637634, "eval_runtime": 2.6149, "eval_samples_per_second": 12.238, "eval_steps_per_second": 6.119, "eval_wer_score": 3.1222270552414093, "step": 700 }, { "epoch": 0.46, "learning_rate": 4.226485148514852e-05, "loss": 0.4461, "step": 750 }, { "epoch": 0.46, "eval_loss": 0.4354948401451111, "eval_runtime": 2.7939, "eval_samples_per_second": 11.454, "eval_steps_per_second": 5.727, "eval_wer_score": 3.187037842540235, "step": 750 }, { "epoch": 0.5, "learning_rate": 4.174917491749175e-05, "loss": 0.4435, "step": 800 }, { "epoch": 0.5, "eval_loss": 0.4297381043434143, "eval_runtime": 2.5337, "eval_samples_per_second": 12.63, "eval_steps_per_second": 6.315, "eval_wer_score": 3.207046541974772, "step": 800 }, { "epoch": 0.53, "learning_rate": 4.123349834983499e-05, "loss": 0.4392, "step": 850 }, { "epoch": 0.53, "eval_loss": 0.4316774606704712, "eval_runtime": 2.6742, "eval_samples_per_second": 11.966, "eval_steps_per_second": 5.983, "eval_wer_score": 3.1857329273597217, "step": 850 }, { "epoch": 0.56, "learning_rate": 4.071782178217822e-05, "loss": 0.4385, "step": 900 }, { "epoch": 0.56, "eval_loss": 0.42789211869239807, "eval_runtime": 2.5419, "eval_samples_per_second": 12.589, "eval_steps_per_second": 6.294, "eval_wer_score": 3.186602870813397, "step": 900 }, { "epoch": 0.59, "learning_rate": 4.020214521452145e-05, "loss": 0.4352, "step": 950 }, { "epoch": 0.59, "eval_loss": 0.4274422526359558, "eval_runtime": 2.5697, "eval_samples_per_second": 12.453, "eval_steps_per_second": 6.226, "eval_wer_score": 3.23836450630709, "step": 950 }, { "epoch": 0.62, "learning_rate": 3.968646864686469e-05, "loss": 0.4354, "step": 1000 }, { "epoch": 0.62, "eval_loss": 0.42688965797424316, "eval_runtime": 2.9492, "eval_samples_per_second": 10.85, "eval_steps_per_second": 5.425, "eval_wer_score": 3.192692474989126, "step": 1000 }, { "epoch": 0.65, "learning_rate": 3.917079207920793e-05, "loss": 0.4336, "step": 1050 }, { "epoch": 0.65, "eval_loss": 0.42364591360092163, "eval_runtime": 3.3776, "eval_samples_per_second": 9.474, "eval_steps_per_second": 4.737, "eval_wer_score": 3.1705089169204004, "step": 1050 }, { "epoch": 0.68, "learning_rate": 3.865511551155115e-05, "loss": 0.4315, "step": 1100 }, { "epoch": 0.68, "eval_loss": 0.42294472455978394, "eval_runtime": 3.4043, "eval_samples_per_second": 9.4, "eval_steps_per_second": 4.7, "eval_wer_score": 3.2618529795563287, "step": 1100 }, { "epoch": 0.71, "learning_rate": 3.813943894389439e-05, "loss": 0.4297, "step": 1150 }, { "epoch": 0.71, "eval_loss": 0.4200877547264099, "eval_runtime": 3.2244, "eval_samples_per_second": 9.924, "eval_steps_per_second": 4.962, "eval_wer_score": 3.2818616789908654, "step": 1150 }, { "epoch": 0.74, "learning_rate": 3.762376237623763e-05, "loss": 0.429, "step": 1200 }, { "epoch": 0.74, "eval_loss": 0.4193739593029022, "eval_runtime": 3.0348, "eval_samples_per_second": 10.544, "eval_steps_per_second": 5.272, "eval_wer_score": 3.281426707264028, "step": 1200 }, { "epoch": 0.77, "learning_rate": 3.710808580858086e-05, "loss": 0.4272, "step": 1250 }, { "epoch": 0.77, "eval_loss": 0.41733482480049133, "eval_runtime": 3.4043, "eval_samples_per_second": 9.4, "eval_steps_per_second": 4.7, "eval_wer_score": 3.2501087429317095, "step": 1250 }, { "epoch": 0.8, "learning_rate": 3.659240924092409e-05, "loss": 0.4266, "step": 1300 }, { "epoch": 0.8, "eval_loss": 0.4167550206184387, "eval_runtime": 3.1991, "eval_samples_per_second": 10.003, "eval_steps_per_second": 5.001, "eval_wer_score": 3.240539364941279, "step": 1300 }, { "epoch": 0.84, "learning_rate": 3.607673267326733e-05, "loss": 0.4257, "step": 1350 }, { "epoch": 0.84, "eval_loss": 0.4143298268318176, "eval_runtime": 3.1177, "eval_samples_per_second": 10.264, "eval_steps_per_second": 5.132, "eval_wer_score": 3.2570682905611137, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.556105610561056e-05, "loss": 0.424, "step": 1400 }, { "epoch": 0.87, "eval_loss": 0.4156000018119812, "eval_runtime": 3.2132, "eval_samples_per_second": 9.959, "eval_steps_per_second": 4.979, "eval_wer_score": 3.2035667681600697, "step": 1400 }, { "epoch": 0.9, "learning_rate": 3.50453795379538e-05, "loss": 0.4249, "step": 1450 }, { "epoch": 0.9, "eval_loss": 0.4149695038795471, "eval_runtime": 3.367, "eval_samples_per_second": 9.504, "eval_steps_per_second": 4.752, "eval_wer_score": 3.2470639408438453, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.452970297029703e-05, "loss": 0.422, "step": 1500 }, { "epoch": 0.93, "eval_loss": 0.4136950373649597, "eval_runtime": 3.0485, "eval_samples_per_second": 10.497, "eval_steps_per_second": 5.249, "eval_wer_score": 3.257938234014789, "step": 1500 }, { "epoch": 0.96, "learning_rate": 3.4014026402640264e-05, "loss": 0.4193, "step": 1550 }, { "epoch": 0.96, "eval_loss": 0.41170167922973633, "eval_runtime": 3.4654, "eval_samples_per_second": 9.234, "eval_steps_per_second": 4.617, "eval_wer_score": 3.2470639408438453, "step": 1550 }, { "epoch": 0.99, "learning_rate": 3.34983498349835e-05, "loss": 0.4179, "step": 1600 }, { "epoch": 0.99, "eval_loss": 0.412392795085907, "eval_runtime": 3.3054, "eval_samples_per_second": 9.681, "eval_steps_per_second": 4.841, "eval_wer_score": 3.2440191387559807, "step": 1600 }, { "epoch": 1.02, "learning_rate": 3.2982673267326734e-05, "loss": 0.4164, "step": 1650 }, { "epoch": 1.02, "eval_loss": 0.41018491983413696, "eval_runtime": 3.215, "eval_samples_per_second": 9.953, "eval_steps_per_second": 4.977, "eval_wer_score": 3.2679425837320575, "step": 1650 }, { "epoch": 1.05, "learning_rate": 3.2466996699669965e-05, "loss": 0.4121, "step": 1700 }, { "epoch": 1.05, "eval_loss": 0.4093266427516937, "eval_runtime": 3.3525, "eval_samples_per_second": 9.545, "eval_steps_per_second": 4.773, "eval_wer_score": 3.222705524140931, "step": 1700 }, { "epoch": 1.08, "learning_rate": 3.1951320132013203e-05, "loss": 0.4103, "step": 1750 }, { "epoch": 1.08, "eval_loss": 0.4072987139225006, "eval_runtime": 3.3812, "eval_samples_per_second": 9.464, "eval_steps_per_second": 4.732, "eval_wer_score": 3.1874728142670725, "step": 1750 }, { "epoch": 1.11, "learning_rate": 3.1435643564356435e-05, "loss": 0.411, "step": 1800 }, { "epoch": 1.11, "eval_loss": 0.4119817614555359, "eval_runtime": 1.9138, "eval_samples_per_second": 16.721, "eval_steps_per_second": 8.36, "eval_wer_score": 3.201391909525881, "step": 1800 }, { "epoch": 1.14, "learning_rate": 3.0919966996699673e-05, "loss": 0.4095, "step": 1850 }, { "epoch": 1.14, "eval_loss": 0.4093400537967682, "eval_runtime": 2.0537, "eval_samples_per_second": 15.582, "eval_steps_per_second": 7.791, "eval_wer_score": 3.218790778599391, "step": 1850 }, { "epoch": 1.18, "learning_rate": 3.0404290429042902e-05, "loss": 0.4093, "step": 1900 }, { "epoch": 1.18, "eval_loss": 0.4096407890319824, "eval_runtime": 1.9992, "eval_samples_per_second": 16.006, "eval_steps_per_second": 8.003, "eval_wer_score": 3.1705089169204004, "step": 1900 }, { "epoch": 1.21, "learning_rate": 2.988861386138614e-05, "loss": 0.4081, "step": 1950 }, { "epoch": 1.21, "eval_loss": 0.4094192683696747, "eval_runtime": 2.0819, "eval_samples_per_second": 15.371, "eval_steps_per_second": 7.685, "eval_wer_score": 3.1966072205306655, "step": 1950 }, { "epoch": 1.24, "learning_rate": 2.9372937293729375e-05, "loss": 0.4083, "step": 2000 }, { "epoch": 1.24, "eval_loss": 0.4079236090183258, "eval_runtime": 1.9742, "eval_samples_per_second": 16.209, "eval_steps_per_second": 8.104, "eval_wer_score": 3.2231404958677685, "step": 2000 }, { "epoch": 1.27, "learning_rate": 2.885726072607261e-05, "loss": 0.4065, "step": 2050 }, { "epoch": 1.27, "eval_loss": 0.4078274965286255, "eval_runtime": 1.9767, "eval_samples_per_second": 16.189, "eval_steps_per_second": 8.094, "eval_wer_score": 3.2292301000434973, "step": 2050 }, { "epoch": 1.3, "learning_rate": 2.834158415841584e-05, "loss": 0.4074, "step": 2100 }, { "epoch": 1.3, "eval_loss": 0.40426379442214966, "eval_runtime": 2.1917, "eval_samples_per_second": 14.601, "eval_steps_per_second": 7.3, "eval_wer_score": 3.2127011744236627, "step": 2100 }, { "epoch": 1.33, "learning_rate": 2.7825907590759077e-05, "loss": 0.4066, "step": 2150 }, { "epoch": 1.33, "eval_loss": 0.40665364265441895, "eval_runtime": 2.255, "eval_samples_per_second": 14.191, "eval_steps_per_second": 7.095, "eval_wer_score": 3.2053066550674205, "step": 2150 }, { "epoch": 1.36, "learning_rate": 2.731023102310231e-05, "loss": 0.405, "step": 2200 }, { "epoch": 1.36, "eval_loss": 0.4042993485927582, "eval_runtime": 2.1192, "eval_samples_per_second": 15.1, "eval_steps_per_second": 7.55, "eval_wer_score": 3.2448890822096566, "step": 2200 }, { "epoch": 1.39, "learning_rate": 2.6794554455445547e-05, "loss": 0.4051, "step": 2250 }, { "epoch": 1.39, "eval_loss": 0.4049427807331085, "eval_runtime": 1.9064, "eval_samples_per_second": 16.786, "eval_steps_per_second": 8.393, "eval_wer_score": 3.2109612875163114, "step": 2250 }, { "epoch": 1.42, "learning_rate": 2.6278877887788778e-05, "loss": 0.4045, "step": 2300 }, { "epoch": 1.42, "eval_loss": 0.4028187394142151, "eval_runtime": 2.0181, "eval_samples_per_second": 15.856, "eval_steps_per_second": 7.928, "eval_wer_score": 3.2035667681600697, "step": 2300 }, { "epoch": 1.45, "learning_rate": 2.5763201320132013e-05, "loss": 0.4045, "step": 2350 }, { "epoch": 1.45, "eval_loss": 0.4024648070335388, "eval_runtime": 2.027, "eval_samples_per_second": 15.787, "eval_steps_per_second": 7.894, "eval_wer_score": 3.1757285776424533, "step": 2350 }, { "epoch": 1.49, "learning_rate": 2.5247524752475248e-05, "loss": 0.406, "step": 2400 }, { "epoch": 1.49, "eval_loss": 0.400738000869751, "eval_runtime": 2.1415, "eval_samples_per_second": 14.942, "eval_steps_per_second": 7.471, "eval_wer_score": 3.204001739886907, "step": 2400 }, { "epoch": 1.52, "learning_rate": 2.4731848184818483e-05, "loss": 0.4021, "step": 2450 }, { "epoch": 1.52, "eval_loss": 0.40221601724624634, "eval_runtime": 2.237, "eval_samples_per_second": 14.305, "eval_steps_per_second": 7.152, "eval_wer_score": 3.144410613310135, "step": 2450 }, { "epoch": 1.55, "learning_rate": 2.4216171617161718e-05, "loss": 0.4026, "step": 2500 }, { "epoch": 1.55, "eval_loss": 0.4028313159942627, "eval_runtime": 2.2063, "eval_samples_per_second": 14.504, "eval_steps_per_second": 7.252, "eval_wer_score": 3.168769030013049, "step": 2500 }, { "epoch": 1.58, "learning_rate": 2.370049504950495e-05, "loss": 0.4014, "step": 2550 }, { "epoch": 1.58, "eval_loss": 0.4026516079902649, "eval_runtime": 2.131, "eval_samples_per_second": 15.016, "eval_steps_per_second": 7.508, "eval_wer_score": 3.2000869943453676, "step": 2550 }, { "epoch": 1.61, "learning_rate": 2.3184818481848185e-05, "loss": 0.4015, "step": 2600 }, { "epoch": 1.61, "eval_loss": 0.402204692363739, "eval_runtime": 2.0851, "eval_samples_per_second": 15.347, "eval_steps_per_second": 7.673, "eval_wer_score": 3.19182253153545, "step": 2600 }, { "epoch": 1.64, "learning_rate": 2.266914191419142e-05, "loss": 0.401, "step": 2650 }, { "epoch": 1.64, "eval_loss": 0.40174347162246704, "eval_runtime": 2.1944, "eval_samples_per_second": 14.583, "eval_steps_per_second": 7.291, "eval_wer_score": 3.189212701174424, "step": 2650 }, { "epoch": 1.67, "learning_rate": 2.2153465346534655e-05, "loss": 0.4007, "step": 2700 }, { "epoch": 1.67, "eval_loss": 0.40014830231666565, "eval_runtime": 2.1544, "eval_samples_per_second": 14.853, "eval_steps_per_second": 7.427, "eval_wer_score": 3.2148760330578514, "step": 2700 }, { "epoch": 1.7, "learning_rate": 2.1637788778877886e-05, "loss": 0.399, "step": 2750 }, { "epoch": 1.7, "eval_loss": 0.3999301791191101, "eval_runtime": 2.196, "eval_samples_per_second": 14.572, "eval_steps_per_second": 7.286, "eval_wer_score": 3.177468464549804, "step": 2750 }, { "epoch": 1.73, "learning_rate": 2.1122112211221125e-05, "loss": 0.4004, "step": 2800 }, { "epoch": 1.73, "eval_loss": 0.40041935443878174, "eval_runtime": 2.2406, "eval_samples_per_second": 14.282, "eval_steps_per_second": 7.141, "eval_wer_score": 3.186602870813397, "step": 2800 }, { "epoch": 1.76, "learning_rate": 2.0606435643564356e-05, "loss": 0.3988, "step": 2850 }, { "epoch": 1.76, "eval_loss": 0.4005739092826843, "eval_runtime": 2.1589, "eval_samples_per_second": 14.822, "eval_steps_per_second": 7.411, "eval_wer_score": 3.2235754675946064, "step": 2850 }, { "epoch": 1.79, "learning_rate": 2.009075907590759e-05, "loss": 0.3985, "step": 2900 }, { "epoch": 1.79, "eval_loss": 0.4012880325317383, "eval_runtime": 2.1243, "eval_samples_per_second": 15.064, "eval_steps_per_second": 7.532, "eval_wer_score": 3.2083514571552847, "step": 2900 }, { "epoch": 1.83, "learning_rate": 1.9575082508250826e-05, "loss": 0.3995, "step": 2950 }, { "epoch": 1.83, "eval_loss": 0.3977855443954468, "eval_runtime": 2.1918, "eval_samples_per_second": 14.6, "eval_steps_per_second": 7.3, "eval_wer_score": 3.192257503262288, "step": 2950 }, { "epoch": 1.86, "learning_rate": 1.905940594059406e-05, "loss": 0.3975, "step": 3000 }, { "epoch": 1.86, "eval_loss": 0.39725542068481445, "eval_runtime": 2.2663, "eval_samples_per_second": 14.12, "eval_steps_per_second": 7.06, "eval_wer_score": 3.174858634188778, "step": 3000 }, { "epoch": 1.89, "learning_rate": 1.8543729372937293e-05, "loss": 0.3976, "step": 3050 }, { "epoch": 1.89, "eval_loss": 0.39580366015434265, "eval_runtime": 2.2795, "eval_samples_per_second": 14.038, "eval_steps_per_second": 7.019, "eval_wer_score": 3.12396694214876, "step": 3050 }, { "epoch": 1.92, "learning_rate": 1.8028052805280528e-05, "loss": 0.3977, "step": 3100 }, { "epoch": 1.92, "eval_loss": 0.3961202800273895, "eval_runtime": 2.1696, "eval_samples_per_second": 14.749, "eval_steps_per_second": 7.374, "eval_wer_score": 3.165724227925185, "step": 3100 }, { "epoch": 1.95, "learning_rate": 1.7512376237623763e-05, "loss": 0.3945, "step": 3150 }, { "epoch": 1.95, "eval_loss": 0.396453857421875, "eval_runtime": 1.988, "eval_samples_per_second": 16.097, "eval_steps_per_second": 8.048, "eval_wer_score": 3.1805132666376688, "step": 3150 }, { "epoch": 1.98, "learning_rate": 1.6996699669966998e-05, "loss": 0.3962, "step": 3200 }, { "epoch": 1.98, "eval_loss": 0.39566469192504883, "eval_runtime": 1.9188, "eval_samples_per_second": 16.677, "eval_steps_per_second": 8.339, "eval_wer_score": 3.1852979556328838, "step": 3200 }, { "epoch": 2.01, "learning_rate": 1.648102310231023e-05, "loss": 0.3953, "step": 3250 }, { "epoch": 2.01, "eval_loss": 0.39734578132629395, "eval_runtime": 2.1888, "eval_samples_per_second": 14.62, "eval_steps_per_second": 7.31, "eval_wer_score": 3.13571117877338, "step": 3250 }, { "epoch": 2.04, "learning_rate": 1.5965346534653468e-05, "loss": 0.3896, "step": 3300 }, { "epoch": 2.04, "eval_loss": 0.3978061079978943, "eval_runtime": 2.2259, "eval_samples_per_second": 14.376, "eval_steps_per_second": 7.188, "eval_wer_score": 3.110047846889952, "step": 3300 }, { "epoch": 2.07, "learning_rate": 1.54496699669967e-05, "loss": 0.3907, "step": 3350 }, { "epoch": 2.07, "eval_loss": 0.3961105942726135, "eval_runtime": 2.5092, "eval_samples_per_second": 12.753, "eval_steps_per_second": 6.377, "eval_wer_score": 3.1274467159634622, "step": 3350 }, { "epoch": 2.1, "learning_rate": 1.4933993399339935e-05, "loss": 0.3889, "step": 3400 }, { "epoch": 2.1, "eval_loss": 0.3963559865951538, "eval_runtime": 2.1557, "eval_samples_per_second": 14.845, "eval_steps_per_second": 7.422, "eval_wer_score": 3.190517616354937, "step": 3400 }, { "epoch": 2.13, "learning_rate": 1.4418316831683168e-05, "loss": 0.3902, "step": 3450 }, { "epoch": 2.13, "eval_loss": 0.3959140479564667, "eval_runtime": 2.1754, "eval_samples_per_second": 14.71, "eval_steps_per_second": 7.355, "eval_wer_score": 3.1857329273597217, "step": 3450 }, { "epoch": 2.17, "learning_rate": 1.3902640264026403e-05, "loss": 0.3902, "step": 3500 }, { "epoch": 2.17, "eval_loss": 0.3955221176147461, "eval_runtime": 2.075, "eval_samples_per_second": 15.422, "eval_steps_per_second": 7.711, "eval_wer_score": 3.162244454110483, "step": 3500 }, { "epoch": 2.2, "learning_rate": 1.3386963696369636e-05, "loss": 0.3891, "step": 3550 }, { "epoch": 2.2, "eval_loss": 0.39533841609954834, "eval_runtime": 2.1894, "eval_samples_per_second": 14.616, "eval_steps_per_second": 7.308, "eval_wer_score": 3.17442366246194, "step": 3550 }, { "epoch": 2.23, "learning_rate": 1.2871287128712873e-05, "loss": 0.3886, "step": 3600 }, { "epoch": 2.23, "eval_loss": 0.3946349620819092, "eval_runtime": 2.1424, "eval_samples_per_second": 14.936, "eval_steps_per_second": 7.468, "eval_wer_score": 3.142670726402784, "step": 3600 }, { "epoch": 2.26, "learning_rate": 1.2355610561056106e-05, "loss": 0.388, "step": 3650 }, { "epoch": 2.26, "eval_loss": 0.3959529995918274, "eval_runtime": 2.1922, "eval_samples_per_second": 14.597, "eval_steps_per_second": 7.299, "eval_wer_score": 3.1309264897781643, "step": 3650 }, { "epoch": 2.29, "learning_rate": 1.1839933993399341e-05, "loss": 0.3888, "step": 3700 }, { "epoch": 2.29, "eval_loss": 0.39452987909317017, "eval_runtime": 2.1798, "eval_samples_per_second": 14.68, "eval_steps_per_second": 7.34, "eval_wer_score": 3.13571117877338, "step": 3700 }, { "epoch": 2.32, "learning_rate": 1.1324257425742574e-05, "loss": 0.3891, "step": 3750 }, { "epoch": 2.32, "eval_loss": 0.39532509446144104, "eval_runtime": 2.0097, "eval_samples_per_second": 15.923, "eval_steps_per_second": 7.961, "eval_wer_score": 3.121357111787734, "step": 3750 }, { "epoch": 2.35, "learning_rate": 1.080858085808581e-05, "loss": 0.3883, "step": 3800 }, { "epoch": 2.35, "eval_loss": 0.39508694410324097, "eval_runtime": 2.1667, "eval_samples_per_second": 14.769, "eval_steps_per_second": 7.385, "eval_wer_score": 3.1361461505002173, "step": 3800 }, { "epoch": 2.38, "learning_rate": 1.0292904290429044e-05, "loss": 0.3876, "step": 3850 }, { "epoch": 2.38, "eval_loss": 0.39381179213523865, "eval_runtime": 2.1778, "eval_samples_per_second": 14.693, "eval_steps_per_second": 7.347, "eval_wer_score": 3.1309264897781643, "step": 3850 }, { "epoch": 2.41, "learning_rate": 9.777227722772278e-06, "loss": 0.3879, "step": 3900 }, { "epoch": 2.41, "eval_loss": 0.39416271448135376, "eval_runtime": 2.2392, "eval_samples_per_second": 14.291, "eval_steps_per_second": 7.145, "eval_wer_score": 3.148325358851675, "step": 3900 }, { "epoch": 2.44, "learning_rate": 9.261551155115513e-06, "loss": 0.386, "step": 3950 }, { "epoch": 2.44, "eval_loss": 0.39405977725982666, "eval_runtime": 2.2548, "eval_samples_per_second": 14.192, "eval_steps_per_second": 7.096, "eval_wer_score": 3.15311004784689, "step": 3950 }, { "epoch": 2.48, "learning_rate": 8.745874587458746e-06, "loss": 0.3862, "step": 4000 }, { "epoch": 2.48, "eval_loss": 0.3948515057563782, "eval_runtime": 2.2104, "eval_samples_per_second": 14.477, "eval_steps_per_second": 7.239, "eval_wer_score": 3.1378860374075686, "step": 4000 }, { "epoch": 2.51, "learning_rate": 8.230198019801981e-06, "loss": 0.3876, "step": 4050 }, { "epoch": 2.51, "eval_loss": 0.3954794704914093, "eval_runtime": 2.1178, "eval_samples_per_second": 15.11, "eval_steps_per_second": 7.555, "eval_wer_score": 3.1470204436711615, "step": 4050 }, { "epoch": 2.54, "learning_rate": 7.714521452145216e-06, "loss": 0.3876, "step": 4100 }, { "epoch": 2.54, "eval_loss": 0.3942318856716156, "eval_runtime": 2.1511, "eval_samples_per_second": 14.876, "eval_steps_per_second": 7.438, "eval_wer_score": 3.1326663766855156, "step": 4100 }, { "epoch": 2.57, "learning_rate": 7.198844884488449e-06, "loss": 0.3858, "step": 4150 }, { "epoch": 2.57, "eval_loss": 0.39369016885757446, "eval_runtime": 2.2193, "eval_samples_per_second": 14.419, "eval_steps_per_second": 7.21, "eval_wer_score": 3.1270117442366248, "step": 4150 }, { "epoch": 2.6, "learning_rate": 6.6831683168316835e-06, "loss": 0.3855, "step": 4200 }, { "epoch": 2.6, "eval_loss": 0.3940153121948242, "eval_runtime": 2.2424, "eval_samples_per_second": 14.27, "eval_steps_per_second": 7.135, "eval_wer_score": 3.1491953023053503, "step": 4200 }, { "epoch": 2.63, "learning_rate": 6.167491749174918e-06, "loss": 0.3864, "step": 4250 }, { "epoch": 2.63, "eval_loss": 0.3938477337360382, "eval_runtime": 2.1981, "eval_samples_per_second": 14.558, "eval_steps_per_second": 7.279, "eval_wer_score": 3.1431056981296215, "step": 4250 }, { "epoch": 2.66, "learning_rate": 5.651815181518152e-06, "loss": 0.3869, "step": 4300 }, { "epoch": 2.66, "eval_loss": 0.3936881422996521, "eval_runtime": 2.2164, "eval_samples_per_second": 14.438, "eval_steps_per_second": 7.219, "eval_wer_score": 3.1583297085689432, "step": 4300 }, { "epoch": 2.69, "learning_rate": 5.136138613861386e-06, "loss": 0.3841, "step": 4350 }, { "epoch": 2.69, "eval_loss": 0.39349794387817383, "eval_runtime": 2.2175, "eval_samples_per_second": 14.43, "eval_steps_per_second": 7.215, "eval_wer_score": 3.1278816876903, "step": 4350 }, { "epoch": 2.72, "learning_rate": 4.62046204620462e-06, "loss": 0.3866, "step": 4400 }, { "epoch": 2.72, "eval_loss": 0.3936805725097656, "eval_runtime": 1.9094, "eval_samples_per_second": 16.759, "eval_steps_per_second": 8.38, "eval_wer_score": 3.119182253153545, "step": 4400 }, { "epoch": 2.75, "learning_rate": 4.104785478547855e-06, "loss": 0.3848, "step": 4450 }, { "epoch": 2.75, "eval_loss": 0.3931500315666199, "eval_runtime": 2.1055, "eval_samples_per_second": 15.198, "eval_steps_per_second": 7.599, "eval_wer_score": 3.1235319704219227, "step": 4450 }, { "epoch": 2.78, "learning_rate": 3.589108910891089e-06, "loss": 0.3849, "step": 4500 }, { "epoch": 2.78, "eval_loss": 0.39277058839797974, "eval_runtime": 2.1028, "eval_samples_per_second": 15.218, "eval_steps_per_second": 7.609, "eval_wer_score": 3.1313614615050023, "step": 4500 }, { "epoch": 2.82, "learning_rate": 3.073432343234324e-06, "loss": 0.383, "step": 4550 }, { "epoch": 2.82, "eval_loss": 0.3926939368247986, "eval_runtime": 2.1315, "eval_samples_per_second": 15.013, "eval_steps_per_second": 7.506, "eval_wer_score": 3.1230969986950847, "step": 4550 }, { "epoch": 2.85, "learning_rate": 2.557755775577558e-06, "loss": 0.3839, "step": 4600 }, { "epoch": 2.85, "eval_loss": 0.39277368783950806, "eval_runtime": 2.1656, "eval_samples_per_second": 14.776, "eval_steps_per_second": 7.388, "eval_wer_score": 3.115702479338843, "step": 4600 }, { "epoch": 2.88, "learning_rate": 2.042079207920792e-06, "loss": 0.3843, "step": 4650 }, { "epoch": 2.88, "eval_loss": 0.3926578164100647, "eval_runtime": 2.2527, "eval_samples_per_second": 14.205, "eval_steps_per_second": 7.103, "eval_wer_score": 3.1226620269682472, "step": 4650 }, { "epoch": 2.91, "learning_rate": 1.5264026402640265e-06, "loss": 0.3862, "step": 4700 }, { "epoch": 2.91, "eval_loss": 0.3923312723636627, "eval_runtime": 2.1236, "eval_samples_per_second": 15.069, "eval_steps_per_second": 7.534, "eval_wer_score": 3.1287516311439756, "step": 4700 }, { "epoch": 2.94, "learning_rate": 1.0107260726072606e-06, "loss": 0.3848, "step": 4750 }, { "epoch": 2.94, "eval_loss": 0.3923192024230957, "eval_runtime": 2.1085, "eval_samples_per_second": 15.177, "eval_steps_per_second": 7.588, "eval_wer_score": 3.1448455850369728, "step": 4750 }, { "epoch": 2.97, "learning_rate": 4.950495049504951e-07, "loss": 0.3856, "step": 4800 }, { "epoch": 2.97, "eval_loss": 0.3922309875488281, "eval_runtime": 2.9258, "eval_samples_per_second": 10.937, "eval_steps_per_second": 5.469, "eval_wer_score": 3.133536320139191, "step": 4800 } ], "max_steps": 4848, "num_train_epochs": 3, "total_flos": 5.746828131664773e+17, "trial_name": null, "trial_params": null }