|
{ |
|
"best_metric": 0.9083333333333333, |
|
"best_model_checkpoint": "deverta_60k_fillna_val100/checkpoint-7600", |
|
"epoch": 1.9984551415846303, |
|
"eval_steps": 100, |
|
"global_step": 7600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.717791411042945e-06, |
|
"loss": 1.615, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.6094433069229126, |
|
"eval_map@3": 0.36999999999999994, |
|
"eval_runtime": 3.9269, |
|
"eval_samples_per_second": 25.465, |
|
"eval_steps_per_second": 25.465, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.470639789658195e-06, |
|
"loss": 1.615, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.6090234518051147, |
|
"eval_map@3": 0.5433333333333333, |
|
"eval_runtime": 3.9569, |
|
"eval_samples_per_second": 25.272, |
|
"eval_steps_per_second": 25.272, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.2234881682734454e-06, |
|
"loss": 1.6106, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.6112011671066284, |
|
"eval_map@3": 0.6250000000000001, |
|
"eval_runtime": 4.2374, |
|
"eval_samples_per_second": 23.6, |
|
"eval_steps_per_second": 23.6, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 6.976336546888695e-06, |
|
"loss": 1.4954, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.2830108404159546, |
|
"eval_map@3": 0.755, |
|
"eval_runtime": 4.1043, |
|
"eval_samples_per_second": 24.364, |
|
"eval_steps_per_second": 24.364, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.729184925503945e-06, |
|
"loss": 1.0519, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 0.8515128493309021, |
|
"eval_map@3": 0.8183333333333335, |
|
"eval_runtime": 4.0929, |
|
"eval_samples_per_second": 24.432, |
|
"eval_steps_per_second": 24.432, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.0482033304119195e-05, |
|
"loss": 0.8846, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 0.7408291101455688, |
|
"eval_map@3": 0.8366666666666667, |
|
"eval_runtime": 4.1086, |
|
"eval_samples_per_second": 24.339, |
|
"eval_steps_per_second": 24.339, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.2234881682734446e-05, |
|
"loss": 0.8651, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 0.7649513483047485, |
|
"eval_map@3": 0.8633333333333334, |
|
"eval_runtime": 4.0899, |
|
"eval_samples_per_second": 24.45, |
|
"eval_steps_per_second": 24.45, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3987730061349694e-05, |
|
"loss": 0.8166, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 0.7141894698143005, |
|
"eval_map@3": 0.8583333333333334, |
|
"eval_runtime": 4.1369, |
|
"eval_samples_per_second": 24.173, |
|
"eval_steps_per_second": 24.173, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5740578439964945e-05, |
|
"loss": 0.8283, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.6661826968193054, |
|
"eval_map@3": 0.845, |
|
"eval_runtime": 4.1543, |
|
"eval_samples_per_second": 24.071, |
|
"eval_steps_per_second": 24.071, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7493426818580194e-05, |
|
"loss": 0.7593, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.6564372181892395, |
|
"eval_map@3": 0.8483333333333334, |
|
"eval_runtime": 4.076, |
|
"eval_samples_per_second": 24.534, |
|
"eval_steps_per_second": 24.534, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9246275197195444e-05, |
|
"loss": 0.7401, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.6686395406723022, |
|
"eval_map@3": 0.85, |
|
"eval_runtime": 4.0963, |
|
"eval_samples_per_second": 24.412, |
|
"eval_steps_per_second": 24.412, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9998531353530498e-05, |
|
"loss": 0.7791, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.608403742313385, |
|
"eval_map@3": 0.875, |
|
"eval_runtime": 4.1272, |
|
"eval_samples_per_second": 24.229, |
|
"eval_steps_per_second": 24.229, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.998860488949288e-05, |
|
"loss": 0.7868, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.6702239513397217, |
|
"eval_map@3": 0.87, |
|
"eval_runtime": 4.0746, |
|
"eval_samples_per_second": 24.542, |
|
"eval_steps_per_second": 24.542, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9969323231985812e-05, |
|
"loss": 0.7567, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.6087381839752197, |
|
"eval_map@3": 0.8683333333333334, |
|
"eval_runtime": 3.8341, |
|
"eval_samples_per_second": 26.081, |
|
"eval_steps_per_second": 26.081, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.994070443995129e-05, |
|
"loss": 0.7488, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.6629943251609802, |
|
"eval_map@3": 0.8616666666666667, |
|
"eval_runtime": 4.0681, |
|
"eval_samples_per_second": 24.581, |
|
"eval_steps_per_second": 24.581, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.990277531736638e-05, |
|
"loss": 0.7833, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.592115581035614, |
|
"eval_map@3": 0.8566666666666667, |
|
"eval_runtime": 4.0797, |
|
"eval_samples_per_second": 24.512, |
|
"eval_steps_per_second": 24.512, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9855571388138954e-05, |
|
"loss": 0.7706, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 0.6199390888214111, |
|
"eval_map@3": 0.8733333333333333, |
|
"eval_runtime": 3.9265, |
|
"eval_samples_per_second": 25.468, |
|
"eval_steps_per_second": 25.468, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.979913686283649e-05, |
|
"loss": 0.7422, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.6019750237464905, |
|
"eval_map@3": 0.8833333333333334, |
|
"eval_runtime": 3.9986, |
|
"eval_samples_per_second": 25.009, |
|
"eval_steps_per_second": 25.009, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9733524597279037e-05, |
|
"loss": 0.7129, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.5683853626251221, |
|
"eval_map@3": 0.8766666666666666, |
|
"eval_runtime": 4.0628, |
|
"eval_samples_per_second": 24.613, |
|
"eval_steps_per_second": 24.613, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.965879604303516e-05, |
|
"loss": 0.753, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.536411464214325, |
|
"eval_map@3": 0.8733333333333333, |
|
"eval_runtime": 3.989, |
|
"eval_samples_per_second": 25.069, |
|
"eval_steps_per_second": 25.069, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.957502118986723e-05, |
|
"loss": 0.7057, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.5491364002227783, |
|
"eval_map@3": 0.8583333333333333, |
|
"eval_runtime": 3.9154, |
|
"eval_samples_per_second": 25.54, |
|
"eval_steps_per_second": 25.54, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.9482278500179953e-05, |
|
"loss": 0.73, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.5584819316864014, |
|
"eval_map@3": 0.8766666666666667, |
|
"eval_runtime": 3.9164, |
|
"eval_samples_per_second": 25.534, |
|
"eval_steps_per_second": 25.534, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9380654835533523e-05, |
|
"loss": 0.6898, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.5588586330413818, |
|
"eval_map@3": 0.88, |
|
"eval_runtime": 3.8846, |
|
"eval_samples_per_second": 25.743, |
|
"eval_steps_per_second": 25.743, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9270245375290263e-05, |
|
"loss": 0.6842, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.573777437210083, |
|
"eval_map@3": 0.8683333333333334, |
|
"eval_runtime": 4.0374, |
|
"eval_samples_per_second": 24.768, |
|
"eval_steps_per_second": 24.768, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9151153527470895e-05, |
|
"loss": 0.728, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.5937029123306274, |
|
"eval_map@3": 0.8866666666666666, |
|
"eval_runtime": 3.9109, |
|
"eval_samples_per_second": 25.57, |
|
"eval_steps_per_second": 25.57, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.902349083190399e-05, |
|
"loss": 0.7047, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.5620361566543579, |
|
"eval_map@3": 0.885, |
|
"eval_runtime": 3.9171, |
|
"eval_samples_per_second": 25.529, |
|
"eval_steps_per_second": 25.529, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.888737685575924e-05, |
|
"loss": 0.7554, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.5551219582557678, |
|
"eval_map@3": 0.8883333333333333, |
|
"eval_runtime": 3.9451, |
|
"eval_samples_per_second": 25.348, |
|
"eval_steps_per_second": 25.348, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.874293908156247e-05, |
|
"loss": 0.7252, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.5565034747123718, |
|
"eval_map@3": 0.8983333333333331, |
|
"eval_runtime": 4.1016, |
|
"eval_samples_per_second": 24.381, |
|
"eval_steps_per_second": 24.381, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8590312787797196e-05, |
|
"loss": 0.7245, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.5758660435676575, |
|
"eval_map@3": 0.8816666666666667, |
|
"eval_runtime": 3.9367, |
|
"eval_samples_per_second": 25.402, |
|
"eval_steps_per_second": 25.402, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8429640922204614e-05, |
|
"loss": 0.725, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.5699021816253662, |
|
"eval_map@3": 0.8883333333333333, |
|
"eval_runtime": 3.8274, |
|
"eval_samples_per_second": 26.128, |
|
"eval_steps_per_second": 26.128, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8261073967900676e-05, |
|
"loss": 0.6983, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 0.5696993470191956, |
|
"eval_map@3": 0.8883333333333333, |
|
"eval_runtime": 3.8632, |
|
"eval_samples_per_second": 25.885, |
|
"eval_steps_per_second": 25.885, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.80847698024356e-05, |
|
"loss": 0.7055, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 0.5589382648468018, |
|
"eval_map@3": 0.8933333333333333, |
|
"eval_runtime": 4.205, |
|
"eval_samples_per_second": 23.781, |
|
"eval_steps_per_second": 23.781, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.790089354992788e-05, |
|
"loss": 0.7091, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.5517598986625671, |
|
"eval_map@3": 0.8883333333333333, |
|
"eval_runtime": 4.2776, |
|
"eval_samples_per_second": 23.378, |
|
"eval_steps_per_second": 23.378, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7711566234443824e-05, |
|
"loss": 0.7128, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 0.5628377795219421, |
|
"eval_map@3": 0.8866666666666667, |
|
"eval_runtime": 4.1647, |
|
"eval_samples_per_second": 24.011, |
|
"eval_steps_per_second": 24.011, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.751314068469163e-05, |
|
"loss": 0.7465, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.5919340252876282, |
|
"eval_map@3": 0.8833333333333333, |
|
"eval_runtime": 4.0418, |
|
"eval_samples_per_second": 24.742, |
|
"eval_steps_per_second": 24.742, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.730767842806733e-05, |
|
"loss": 0.708, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.554470419883728, |
|
"eval_map@3": 0.895, |
|
"eval_runtime": 4.0703, |
|
"eval_samples_per_second": 24.568, |
|
"eval_steps_per_second": 24.568, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.709537189776684e-05, |
|
"loss": 0.7017, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.5175462365150452, |
|
"eval_map@3": 0.895, |
|
"eval_runtime": 4.1999, |
|
"eval_samples_per_second": 23.81, |
|
"eval_steps_per_second": 23.81, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6876419937241033e-05, |
|
"loss": 0.7, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5037916302680969, |
|
"eval_map@3": 0.8916666666666667, |
|
"eval_runtime": 4.1774, |
|
"eval_samples_per_second": 23.938, |
|
"eval_steps_per_second": 23.938, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.665102761396169e-05, |
|
"loss": 0.5923, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.48426902294158936, |
|
"eval_map@3": 0.8916666666666667, |
|
"eval_runtime": 4.139, |
|
"eval_samples_per_second": 24.16, |
|
"eval_steps_per_second": 24.16, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.641940602735802e-05, |
|
"loss": 0.594, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.48445701599121094, |
|
"eval_map@3": 0.895, |
|
"eval_runtime": 4.2445, |
|
"eval_samples_per_second": 23.56, |
|
"eval_steps_per_second": 23.56, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.6181772111103775e-05, |
|
"loss": 0.6075, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.5103535652160645, |
|
"eval_map@3": 0.8899999999999999, |
|
"eval_runtime": 4.059, |
|
"eval_samples_per_second": 24.636, |
|
"eval_steps_per_second": 24.636, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.5938348429940095e-05, |
|
"loss": 0.5226, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 0.5009714961051941, |
|
"eval_map@3": 0.8883333333333333, |
|
"eval_runtime": 4.0879, |
|
"eval_samples_per_second": 24.463, |
|
"eval_steps_per_second": 24.463, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.568936297122432e-05, |
|
"loss": 0.5886, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 0.49200114607810974, |
|
"eval_map@3": 0.9033333333333333, |
|
"eval_runtime": 4.1867, |
|
"eval_samples_per_second": 23.885, |
|
"eval_steps_per_second": 23.885, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.543504893140011e-05, |
|
"loss": 0.5619, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 0.49913597106933594, |
|
"eval_map@3": 0.89, |
|
"eval_runtime": 4.0492, |
|
"eval_samples_per_second": 24.696, |
|
"eval_steps_per_second": 24.696, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.517564449758878e-05, |
|
"loss": 0.6017, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 0.4905971586704254, |
|
"eval_map@3": 0.9033333333333333, |
|
"eval_runtime": 4.0275, |
|
"eval_samples_per_second": 24.83, |
|
"eval_steps_per_second": 24.83, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.4911392624506427e-05, |
|
"loss": 0.534, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 0.4901256263256073, |
|
"eval_map@3": 0.89, |
|
"eval_runtime": 5.205, |
|
"eval_samples_per_second": 19.212, |
|
"eval_steps_per_second": 19.212, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.4642540806915802e-05, |
|
"loss": 0.5914, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 0.4857093095779419, |
|
"eval_map@3": 0.8916666666666666, |
|
"eval_runtime": 4.357, |
|
"eval_samples_per_second": 22.952, |
|
"eval_steps_per_second": 22.952, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4369340847826037e-05, |
|
"loss": 0.6155, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.49262702465057373, |
|
"eval_map@3": 0.8849999999999999, |
|
"eval_runtime": 4.3241, |
|
"eval_samples_per_second": 23.126, |
|
"eval_steps_per_second": 23.126, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.4092048622657309e-05, |
|
"loss": 0.5779, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.4796687960624695, |
|
"eval_map@3": 0.8849999999999999, |
|
"eval_runtime": 4.3677, |
|
"eval_samples_per_second": 22.895, |
|
"eval_steps_per_second": 22.895, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.3810923839591368e-05, |
|
"loss": 0.6144, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.4864266514778137, |
|
"eval_map@3": 0.8916666666666666, |
|
"eval_runtime": 4.1788, |
|
"eval_samples_per_second": 23.93, |
|
"eval_steps_per_second": 23.93, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.3526229796332322e-05, |
|
"loss": 0.5532, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 0.5059311389923096, |
|
"eval_map@3": 0.9033333333333334, |
|
"eval_runtime": 4.1314, |
|
"eval_samples_per_second": 24.205, |
|
"eval_steps_per_second": 24.205, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.3238233133505538e-05, |
|
"loss": 0.5238, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.5035010576248169, |
|
"eval_map@3": 0.8933333333333334, |
|
"eval_runtime": 4.0704, |
|
"eval_samples_per_second": 24.568, |
|
"eval_steps_per_second": 24.568, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.2947203584925607e-05, |
|
"loss": 0.5993, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.5209221839904785, |
|
"eval_map@3": 0.8983333333333334, |
|
"eval_runtime": 4.0192, |
|
"eval_samples_per_second": 24.881, |
|
"eval_steps_per_second": 24.881, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.2653413724967272e-05, |
|
"loss": 0.5443, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 0.5166578888893127, |
|
"eval_map@3": 0.8883333333333334, |
|
"eval_runtime": 4.0162, |
|
"eval_samples_per_second": 24.899, |
|
"eval_steps_per_second": 24.899, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.2357138713275916e-05, |
|
"loss": 0.5564, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 0.50209641456604, |
|
"eval_map@3": 0.8883333333333334, |
|
"eval_runtime": 4.0769, |
|
"eval_samples_per_second": 24.529, |
|
"eval_steps_per_second": 24.529, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.2058656037056733e-05, |
|
"loss": 0.5962, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.5111860632896423, |
|
"eval_map@3": 0.8933333333333334, |
|
"eval_runtime": 4.1245, |
|
"eval_samples_per_second": 24.245, |
|
"eval_steps_per_second": 24.245, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.1758245251183917e-05, |
|
"loss": 0.5824, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 0.5007131695747375, |
|
"eval_map@3": 0.8966666666666666, |
|
"eval_runtime": 4.0577, |
|
"eval_samples_per_second": 24.644, |
|
"eval_steps_per_second": 24.644, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.1456187716373334e-05, |
|
"loss": 0.5327, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 0.5062887072563171, |
|
"eval_map@3": 0.8883333333333334, |
|
"eval_runtime": 3.9467, |
|
"eval_samples_per_second": 25.337, |
|
"eval_steps_per_second": 25.337, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1152766335663825e-05, |
|
"loss": 0.5649, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.5016913414001465, |
|
"eval_map@3": 0.8883333333333334, |
|
"eval_runtime": 4.1523, |
|
"eval_samples_per_second": 24.083, |
|
"eval_steps_per_second": 24.083, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.0848265289454028e-05, |
|
"loss": 0.5717, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.4974103271961212, |
|
"eval_map@3": 0.9033333333333334, |
|
"eval_runtime": 4.0909, |
|
"eval_samples_per_second": 24.444, |
|
"eval_steps_per_second": 24.444, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.054296976934281e-05, |
|
"loss": 0.5737, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.49186939001083374, |
|
"eval_map@3": 0.8933333333333334, |
|
"eval_runtime": 4.2135, |
|
"eval_samples_per_second": 23.733, |
|
"eval_steps_per_second": 23.733, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.0237165711022661e-05, |
|
"loss": 0.5351, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.4999094009399414, |
|
"eval_map@3": 0.895, |
|
"eval_runtime": 3.9962, |
|
"eval_samples_per_second": 25.024, |
|
"eval_steps_per_second": 25.024, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.934199946768965e-06, |
|
"loss": 0.5341, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.4991361200809479, |
|
"eval_map@3": 0.9016666666666667, |
|
"eval_runtime": 4.1686, |
|
"eval_samples_per_second": 23.989, |
|
"eval_steps_per_second": 23.989, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.628236192287725e-06, |
|
"loss": 0.5501, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 0.5340888500213623, |
|
"eval_map@3": 0.8866666666666667, |
|
"eval_runtime": 4.1621, |
|
"eval_samples_per_second": 24.027, |
|
"eval_steps_per_second": 24.027, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.32262062680631e-06, |
|
"loss": 0.5552, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.4900314211845398, |
|
"eval_map@3": 0.9016666666666667, |
|
"eval_runtime": 4.0101, |
|
"eval_samples_per_second": 24.937, |
|
"eval_steps_per_second": 24.937, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.017639485767398e-06, |
|
"loss": 0.5362, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 0.5052759647369385, |
|
"eval_map@3": 0.905, |
|
"eval_runtime": 4.083, |
|
"eval_samples_per_second": 24.492, |
|
"eval_steps_per_second": 24.492, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.713578410420254e-06, |
|
"loss": 0.5474, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.5121599435806274, |
|
"eval_map@3": 0.905, |
|
"eval_runtime": 3.9444, |
|
"eval_samples_per_second": 25.352, |
|
"eval_steps_per_second": 25.352, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.41072218029293e-06, |
|
"loss": 0.5134, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.510312020778656, |
|
"eval_map@3": 0.8983333333333334, |
|
"eval_runtime": 4.0574, |
|
"eval_samples_per_second": 24.647, |
|
"eval_steps_per_second": 24.647, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.1093544464716e-06, |
|
"loss": 0.5454, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 0.5062999725341797, |
|
"eval_map@3": 0.9, |
|
"eval_runtime": 4.0245, |
|
"eval_samples_per_second": 24.848, |
|
"eval_steps_per_second": 24.848, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.809757465936754e-06, |
|
"loss": 0.5726, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.5135546326637268, |
|
"eval_map@3": 0.905, |
|
"eval_runtime": 4.053, |
|
"eval_samples_per_second": 24.673, |
|
"eval_steps_per_second": 24.673, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.512211837205075e-06, |
|
"loss": 0.5173, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.49537014961242676, |
|
"eval_map@3": 0.9, |
|
"eval_runtime": 4.1604, |
|
"eval_samples_per_second": 24.036, |
|
"eval_steps_per_second": 24.036, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.216996237524646e-06, |
|
"loss": 0.5118, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.5108374357223511, |
|
"eval_map@3": 0.9066666666666667, |
|
"eval_runtime": 4.0629, |
|
"eval_samples_per_second": 24.613, |
|
"eval_steps_per_second": 24.613, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.924387161869565e-06, |
|
"loss": 0.5372, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.5062543749809265, |
|
"eval_map@3": 0.8866666666666666, |
|
"eval_runtime": 3.9525, |
|
"eval_samples_per_second": 25.3, |
|
"eval_steps_per_second": 25.3, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.634658663978463e-06, |
|
"loss": 0.4689, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.5014258623123169, |
|
"eval_map@3": 0.8933333333333333, |
|
"eval_runtime": 4.084, |
|
"eval_samples_per_second": 24.486, |
|
"eval_steps_per_second": 24.486, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.348082099679445e-06, |
|
"loss": 0.5096, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.5079813003540039, |
|
"eval_map@3": 0.905, |
|
"eval_runtime": 4.207, |
|
"eval_samples_per_second": 23.77, |
|
"eval_steps_per_second": 23.77, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.0649258727418535e-06, |
|
"loss": 0.5272, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.4955463409423828, |
|
"eval_map@3": 0.9083333333333333, |
|
"eval_runtime": 4.0938, |
|
"eval_samples_per_second": 24.427, |
|
"eval_steps_per_second": 24.427, |
|
"step": 7600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 11406, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 3.996618693693601e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|