{ "best_metric": 0.44485729932785034, "best_model_checkpoint": "model_output/e2e_opentable_5_way__19684-shot__seed-66__lstm/checkpoint-1650", "epoch": 10.714285714285714, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 4.9783362218370885e-05, "loss": 19.0082, "step": 20 }, { "epoch": 0.26, "learning_rate": 4.956672443674177e-05, "loss": 18.4578, "step": 40 }, { "epoch": 0.32, "eval_distillation_accuracy_counterfactual": 0.4474089276552078, "eval_distillation_accuracy_factual": 0.87891226269882, "eval_distillation_f1_counterfactual": 0.4340733851900615, "eval_distillation_f1_factual": 0.8800573438030022, "eval_groundtruth_accuracy_counterfactual": 0.36685479733196513, "eval_groundtruth_f1_counterfactual": 0.3475149282833413, "eval_groundtruth_f1_factual": 0.5718715108033396, "eval_icace_cosine": 0.6489389538764954, "eval_icace_l2": 0.7266151905059814, "eval_icace_normdiff": 0.6456053256988525, "eval_loss": 19.274616241455078, "eval_runtime": 4.1804, "eval_samples_per_second": 932.439, "eval_steps_per_second": 7.416, "step": 50 }, { "epoch": 0.39, "learning_rate": 4.935008665511265e-05, "loss": 16.6971, "step": 60 }, { "epoch": 0.52, "learning_rate": 4.913344887348354e-05, "loss": 16.0217, "step": 80 }, { "epoch": 0.65, "learning_rate": 4.891681109185442e-05, "loss": 14.7054, "step": 100 }, { "epoch": 0.65, "eval_distillation_accuracy_counterfactual": 0.4663930220625962, "eval_distillation_accuracy_factual": 0.8155464340687532, "eval_distillation_f1_counterfactual": 0.4524610795367727, "eval_distillation_f1_factual": 0.8181025319127461, "eval_groundtruth_accuracy_counterfactual": 0.387891226269882, "eval_groundtruth_f1_counterfactual": 0.3650946763135223, "eval_groundtruth_f1_factual": 0.5588856066889839, "eval_icace_cosine": 0.5872419476509094, "eval_icace_l2": 0.6969975233078003, "eval_icace_normdiff": 0.5919448733329773, "eval_loss": 15.743273735046387, "eval_runtime": 4.4635, "eval_samples_per_second": 873.3, "eval_steps_per_second": 6.945, "step": 100 }, { "epoch": 0.78, "learning_rate": 4.8700173310225307e-05, "loss": 13.7184, "step": 120 }, { "epoch": 0.91, "learning_rate": 4.848353552859619e-05, "loss": 12.0381, "step": 140 }, { "epoch": 0.97, "eval_distillation_accuracy_counterfactual": 0.49640841457157514, "eval_distillation_accuracy_factual": 0.7883530015392509, "eval_distillation_f1_counterfactual": 0.4823375562217772, "eval_distillation_f1_factual": 0.7879840021980078, "eval_groundtruth_accuracy_counterfactual": 0.4089276552077989, "eval_groundtruth_f1_counterfactual": 0.38657714790711345, "eval_groundtruth_f1_factual": 0.5442379284746408, "eval_icace_cosine": 0.5305299758911133, "eval_icace_l2": 0.6551415920257568, "eval_icace_normdiff": 0.5328993201255798, "eval_loss": 12.449591636657715, "eval_runtime": 4.0979, "eval_samples_per_second": 951.214, "eval_steps_per_second": 7.565, "step": 150 }, { "epoch": 1.04, "learning_rate": 4.826689774696707e-05, "loss": 10.7403, "step": 160 }, { "epoch": 1.17, "learning_rate": 4.8050259965337955e-05, "loss": 10.1961, "step": 180 }, { "epoch": 1.3, "learning_rate": 4.7833622183708845e-05, "loss": 9.0642, "step": 200 }, { "epoch": 1.3, "eval_distillation_accuracy_counterfactual": 0.5608004104669061, "eval_distillation_accuracy_factual": 0.7837352488455618, "eval_distillation_f1_counterfactual": 0.5400836383519623, "eval_distillation_f1_factual": 0.7725193659488229, "eval_groundtruth_accuracy_counterfactual": 0.4789635710620831, "eval_groundtruth_f1_counterfactual": 0.44404241289909907, "eval_groundtruth_f1_factual": 0.5322559226557246, "eval_icace_cosine": 0.48327478766441345, "eval_icace_l2": 0.5759322643280029, "eval_icace_normdiff": 0.43174561858177185, "eval_loss": 9.551604270935059, "eval_runtime": 4.2248, "eval_samples_per_second": 922.636, "eval_steps_per_second": 7.338, "step": 200 }, { "epoch": 1.43, "learning_rate": 4.761698440207972e-05, "loss": 8.7283, "step": 220 }, { "epoch": 1.56, "learning_rate": 4.740034662045061e-05, "loss": 8.4523, "step": 240 }, { "epoch": 1.62, "eval_distillation_accuracy_counterfactual": 0.5695228322216521, "eval_distillation_accuracy_factual": 0.8114417650076963, "eval_distillation_f1_counterfactual": 0.5467305836862474, "eval_distillation_f1_factual": 0.8027820833992848, "eval_groundtruth_accuracy_counterfactual": 0.4953822473063109, "eval_groundtruth_f1_counterfactual": 0.4591082174445691, "eval_groundtruth_f1_factual": 0.550223520551177, "eval_icace_cosine": 0.4666258990764618, "eval_icace_l2": 0.5574371814727783, "eval_icace_normdiff": 0.3956855237483978, "eval_loss": 8.846331596374512, "eval_runtime": 4.3676, "eval_samples_per_second": 892.49, "eval_steps_per_second": 7.098, "step": 250 }, { "epoch": 1.69, "learning_rate": 4.7183708838821494e-05, "loss": 7.9309, "step": 260 }, { "epoch": 1.82, "learning_rate": 4.6967071057192376e-05, "loss": 8.1515, "step": 280 }, { "epoch": 1.95, "learning_rate": 4.675043327556326e-05, "loss": 8.2538, "step": 300 }, { "epoch": 1.95, "eval_distillation_accuracy_counterfactual": 0.5785017957927142, "eval_distillation_accuracy_factual": 0.8099025141097999, "eval_distillation_f1_counterfactual": 0.5510551496999538, "eval_distillation_f1_factual": 0.8034508877679645, "eval_groundtruth_accuracy_counterfactual": 0.5192406362237044, "eval_groundtruth_f1_counterfactual": 0.4773009173751467, "eval_groundtruth_f1_factual": 0.5601908475598169, "eval_icace_cosine": 0.46449822187423706, "eval_icace_l2": 0.5464323163032532, "eval_icace_normdiff": 0.3835403621196747, "eval_loss": 8.654885292053223, "eval_runtime": 4.2096, "eval_samples_per_second": 925.989, "eval_steps_per_second": 7.364, "step": 300 }, { "epoch": 2.08, "learning_rate": 4.653379549393415e-05, "loss": 7.67, "step": 320 }, { "epoch": 2.21, "learning_rate": 4.6317157712305025e-05, "loss": 7.2333, "step": 340 }, { "epoch": 2.27, "eval_distillation_accuracy_counterfactual": 0.5828630066700872, "eval_distillation_accuracy_factual": 0.8129810159055926, "eval_distillation_f1_counterfactual": 0.5580116722898498, "eval_distillation_f1_factual": 0.8059470822588534, "eval_groundtruth_accuracy_counterfactual": 0.5161621344279117, "eval_groundtruth_f1_counterfactual": 0.47873711691724863, "eval_groundtruth_f1_factual": 0.5669218861407466, "eval_icace_cosine": 0.4622446298599243, "eval_icace_l2": 0.5438258051872253, "eval_icace_normdiff": 0.3780592679977417, "eval_loss": 8.574095726013184, "eval_runtime": 4.5769, "eval_samples_per_second": 851.663, "eval_steps_per_second": 6.773, "step": 350 }, { "epoch": 2.34, "learning_rate": 4.6100519930675915e-05, "loss": 7.5403, "step": 360 }, { "epoch": 2.47, "learning_rate": 4.58838821490468e-05, "loss": 7.5941, "step": 380 }, { "epoch": 2.6, "learning_rate": 4.566724436741768e-05, "loss": 7.4074, "step": 400 }, { "epoch": 2.6, "eval_distillation_accuracy_counterfactual": 0.5913288866085172, "eval_distillation_accuracy_factual": 0.8127244740892765, "eval_distillation_f1_counterfactual": 0.5655321920235192, "eval_distillation_f1_factual": 0.8043989295732594, "eval_groundtruth_accuracy_counterfactual": 0.5236018471010775, "eval_groundtruth_f1_counterfactual": 0.483629889760966, "eval_groundtruth_f1_factual": 0.566873960250893, "eval_icace_cosine": 0.4574372470378876, "eval_icace_l2": 0.5330100059509277, "eval_icace_normdiff": 0.3700498342514038, "eval_loss": 8.39880084991455, "eval_runtime": 4.3152, "eval_samples_per_second": 903.312, "eval_steps_per_second": 7.184, "step": 400 }, { "epoch": 2.73, "learning_rate": 4.5450606585788563e-05, "loss": 7.4824, "step": 420 }, { "epoch": 2.86, "learning_rate": 4.5233968804159446e-05, "loss": 7.3475, "step": 440 }, { "epoch": 2.92, "eval_distillation_accuracy_counterfactual": 0.5872242175474602, "eval_distillation_accuracy_factual": 0.8124679322729605, "eval_distillation_f1_counterfactual": 0.5625259640226865, "eval_distillation_f1_factual": 0.8042766212220256, "eval_groundtruth_accuracy_counterfactual": 0.5246280143663418, "eval_groundtruth_f1_counterfactual": 0.48658870418382777, "eval_groundtruth_f1_factual": 0.5685471377307885, "eval_icace_cosine": 0.45905160903930664, "eval_icace_l2": 0.5355878472328186, "eval_icace_normdiff": 0.36660289764404297, "eval_loss": 8.368142127990723, "eval_runtime": 4.1481, "eval_samples_per_second": 939.704, "eval_steps_per_second": 7.473, "step": 450 }, { "epoch": 2.99, "learning_rate": 4.501733102253033e-05, "loss": 7.0594, "step": 460 }, { "epoch": 3.12, "learning_rate": 4.480069324090121e-05, "loss": 6.9375, "step": 480 }, { "epoch": 3.25, "learning_rate": 4.45840554592721e-05, "loss": 6.7366, "step": 500 }, { "epoch": 3.25, "eval_distillation_accuracy_counterfactual": 0.595177013853258, "eval_distillation_accuracy_factual": 0.8070805541303232, "eval_distillation_f1_counterfactual": 0.5692331323641199, "eval_distillation_f1_factual": 0.7998207924147414, "eval_groundtruth_accuracy_counterfactual": 0.5361723961005644, "eval_groundtruth_f1_counterfactual": 0.4974673759555204, "eval_groundtruth_f1_factual": 0.5562996925682391, "eval_icace_cosine": 0.4534068703651428, "eval_icace_l2": 0.526775062084198, "eval_icace_normdiff": 0.3587745130062103, "eval_loss": 8.27935791015625, "eval_runtime": 4.1869, "eval_samples_per_second": 931.008, "eval_steps_per_second": 7.404, "step": 500 }, { "epoch": 3.38, "learning_rate": 4.436741767764298e-05, "loss": 7.1133, "step": 520 }, { "epoch": 3.51, "learning_rate": 4.415077989601387e-05, "loss": 6.8301, "step": 540 }, { "epoch": 3.57, "eval_distillation_accuracy_counterfactual": 0.5974858902001026, "eval_distillation_accuracy_factual": 0.801693175987686, "eval_distillation_f1_counterfactual": 0.5726258996562326, "eval_distillation_f1_factual": 0.7907913618161765, "eval_groundtruth_accuracy_counterfactual": 0.5320677270395074, "eval_groundtruth_f1_counterfactual": 0.4923485191341066, "eval_groundtruth_f1_factual": 0.5537961164314067, "eval_icace_cosine": 0.45169007778167725, "eval_icace_l2": 0.5279433727264404, "eval_icace_normdiff": 0.36275714635849, "eval_loss": 8.32939338684082, "eval_runtime": 5.2077, "eval_samples_per_second": 748.509, "eval_steps_per_second": 5.953, "step": 550 }, { "epoch": 3.64, "learning_rate": 4.393414211438475e-05, "loss": 6.7981, "step": 560 }, { "epoch": 3.77, "learning_rate": 4.371750433275563e-05, "loss": 7.1438, "step": 580 }, { "epoch": 3.9, "learning_rate": 4.3500866551126516e-05, "loss": 6.8997, "step": 600 }, { "epoch": 3.9, "eval_distillation_accuracy_counterfactual": 0.5926115956900975, "eval_distillation_accuracy_factual": 0.8004104669061057, "eval_distillation_f1_counterfactual": 0.5673896946111567, "eval_distillation_f1_factual": 0.7916000675958308, "eval_groundtruth_accuracy_counterfactual": 0.534633145202668, "eval_groundtruth_f1_counterfactual": 0.49769412855789863, "eval_groundtruth_f1_factual": 0.5643847406573682, "eval_icace_cosine": 0.456226110458374, "eval_icace_l2": 0.529480516910553, "eval_icace_normdiff": 0.3591635823249817, "eval_loss": 8.281085014343262, "eval_runtime": 4.129, "eval_samples_per_second": 944.045, "eval_steps_per_second": 7.508, "step": 600 }, { "epoch": 4.03, "learning_rate": 4.3284228769497406e-05, "loss": 6.9036, "step": 620 }, { "epoch": 4.16, "learning_rate": 4.306759098786828e-05, "loss": 6.5021, "step": 640 }, { "epoch": 4.22, "eval_distillation_accuracy_counterfactual": 0.6010774756285274, "eval_distillation_accuracy_factual": 0.8068240123140071, "eval_distillation_f1_counterfactual": 0.5748719382246691, "eval_distillation_f1_factual": 0.7986215841545606, "eval_groundtruth_accuracy_counterfactual": 0.5366854797331965, "eval_groundtruth_f1_counterfactual": 0.496750865177183, "eval_groundtruth_f1_factual": 0.5667016917523627, "eval_icace_cosine": 0.4511929750442505, "eval_icace_l2": 0.5219342708587646, "eval_icace_normdiff": 0.35563668608665466, "eval_loss": 8.183207511901855, "eval_runtime": 4.6568, "eval_samples_per_second": 837.051, "eval_steps_per_second": 6.657, "step": 650 }, { "epoch": 4.29, "learning_rate": 4.285095320623917e-05, "loss": 6.6648, "step": 660 }, { "epoch": 4.42, "learning_rate": 4.2634315424610055e-05, "loss": 6.5765, "step": 680 }, { "epoch": 4.55, "learning_rate": 4.241767764298094e-05, "loss": 6.8262, "step": 700 }, { "epoch": 4.55, "eval_distillation_accuracy_counterfactual": 0.6021036428937917, "eval_distillation_accuracy_factual": 0.8011800923550538, "eval_distillation_f1_counterfactual": 0.5782623323950351, "eval_distillation_f1_factual": 0.792234544848666, "eval_groundtruth_accuracy_counterfactual": 0.534633145202668, "eval_groundtruth_f1_counterfactual": 0.4980566768981932, "eval_groundtruth_f1_factual": 0.5659870209728001, "eval_icace_cosine": 0.45206764340400696, "eval_icace_l2": 0.5196635723114014, "eval_icace_normdiff": 0.3521099090576172, "eval_loss": 8.192151069641113, "eval_runtime": 4.2168, "eval_samples_per_second": 924.404, "eval_steps_per_second": 7.352, "step": 700 }, { "epoch": 4.68, "learning_rate": 4.220103986135182e-05, "loss": 6.3849, "step": 720 }, { "epoch": 4.81, "learning_rate": 4.198440207972271e-05, "loss": 6.2939, "step": 740 }, { "epoch": 4.87, "eval_distillation_accuracy_counterfactual": 0.6015905592611596, "eval_distillation_accuracy_factual": 0.7947665469471524, "eval_distillation_f1_counterfactual": 0.5720518381601882, "eval_distillation_f1_factual": 0.7847422669196575, "eval_groundtruth_accuracy_counterfactual": 0.5443817342226783, "eval_groundtruth_f1_counterfactual": 0.5024275126053424, "eval_groundtruth_f1_factual": 0.5643444437069502, "eval_icace_cosine": 0.45017117261886597, "eval_icace_l2": 0.52012699842453, "eval_icace_normdiff": 0.3526724576950073, "eval_loss": 8.209163665771484, "eval_runtime": 4.3873, "eval_samples_per_second": 888.48, "eval_steps_per_second": 7.066, "step": 750 }, { "epoch": 4.94, "learning_rate": 4.1767764298093586e-05, "loss": 6.5551, "step": 760 }, { "epoch": 5.06, "learning_rate": 4.1551126516464476e-05, "loss": 6.1778, "step": 780 }, { "epoch": 5.19, "learning_rate": 4.133448873483536e-05, "loss": 6.4037, "step": 800 }, { "epoch": 5.19, "eval_distillation_accuracy_counterfactual": 0.5987685992816829, "eval_distillation_accuracy_factual": 0.7981015905592611, "eval_distillation_f1_counterfactual": 0.5722989846682422, "eval_distillation_f1_factual": 0.7868708739280531, "eval_groundtruth_accuracy_counterfactual": 0.538481272447409, "eval_groundtruth_f1_counterfactual": 0.5003432328028673, "eval_groundtruth_f1_factual": 0.5649331255060308, "eval_icace_cosine": 0.4531785547733307, "eval_icace_l2": 0.5209774374961853, "eval_icace_normdiff": 0.35254132747650146, "eval_loss": 8.21574878692627, "eval_runtime": 4.3077, "eval_samples_per_second": 904.888, "eval_steps_per_second": 7.196, "step": 800 }, { "epoch": 5.32, "learning_rate": 4.111785095320624e-05, "loss": 6.3784, "step": 820 }, { "epoch": 5.45, "learning_rate": 4.0901213171577124e-05, "loss": 6.2024, "step": 840 }, { "epoch": 5.52, "eval_distillation_accuracy_counterfactual": 0.6033863519753719, "eval_distillation_accuracy_factual": 0.78450487429451, "eval_distillation_f1_counterfactual": 0.5779296645132648, "eval_distillation_f1_factual": 0.7716967495103348, "eval_groundtruth_accuracy_counterfactual": 0.5425859415084658, "eval_groundtruth_f1_counterfactual": 0.5062616807207319, "eval_groundtruth_f1_factual": 0.5623510278704009, "eval_icace_cosine": 0.45046135783195496, "eval_icace_l2": 0.5194692611694336, "eval_icace_normdiff": 0.349165141582489, "eval_loss": 8.216791152954102, "eval_runtime": 4.7068, "eval_samples_per_second": 828.166, "eval_steps_per_second": 6.586, "step": 850 }, { "epoch": 5.58, "learning_rate": 4.068457538994801e-05, "loss": 6.0701, "step": 860 }, { "epoch": 5.71, "learning_rate": 4.04679376083189e-05, "loss": 6.347, "step": 880 }, { "epoch": 5.84, "learning_rate": 4.025129982668977e-05, "loss": 6.2456, "step": 900 }, { "epoch": 5.84, "eval_distillation_accuracy_counterfactual": 0.6059517701385326, "eval_distillation_accuracy_factual": 0.7880964597229349, "eval_distillation_f1_counterfactual": 0.5782256234051215, "eval_distillation_f1_factual": 0.7772220422916071, "eval_groundtruth_accuracy_counterfactual": 0.5448948178553105, "eval_groundtruth_f1_counterfactual": 0.5055672923847118, "eval_groundtruth_f1_factual": 0.5609910180857884, "eval_icace_cosine": 0.448850154876709, "eval_icace_l2": 0.5155501365661621, "eval_icace_normdiff": 0.34565335512161255, "eval_loss": 8.134432792663574, "eval_runtime": 4.6313, "eval_samples_per_second": 841.656, "eval_steps_per_second": 6.694, "step": 900 }, { "epoch": 5.97, "learning_rate": 4.003466204506066e-05, "loss": 6.2184, "step": 920 }, { "epoch": 6.1, "learning_rate": 3.9818024263431546e-05, "loss": 6.167, "step": 940 }, { "epoch": 6.17, "eval_distillation_accuracy_counterfactual": 0.607747562852745, "eval_distillation_accuracy_factual": 0.7904053360697794, "eval_distillation_f1_counterfactual": 0.5830284801765655, "eval_distillation_f1_factual": 0.7803493947200735, "eval_groundtruth_accuracy_counterfactual": 0.5410466906105695, "eval_groundtruth_f1_counterfactual": 0.5040873537566849, "eval_groundtruth_f1_factual": 0.5644313788434017, "eval_icace_cosine": 0.4496186673641205, "eval_icace_l2": 0.5139620304107666, "eval_icace_normdiff": 0.34691381454467773, "eval_loss": 8.117876052856445, "eval_runtime": 4.5117, "eval_samples_per_second": 863.979, "eval_steps_per_second": 6.871, "step": 950 }, { "epoch": 6.23, "learning_rate": 3.960138648180243e-05, "loss": 6.0826, "step": 960 }, { "epoch": 6.36, "learning_rate": 3.938474870017331e-05, "loss": 6.0767, "step": 980 }, { "epoch": 6.49, "learning_rate": 3.91681109185442e-05, "loss": 5.8502, "step": 1000 }, { "epoch": 6.49, "eval_distillation_accuracy_counterfactual": 0.6062083119548486, "eval_distillation_accuracy_factual": 0.7834787070292457, "eval_distillation_f1_counterfactual": 0.580469937457371, "eval_distillation_f1_factual": 0.7727583593749504, "eval_groundtruth_accuracy_counterfactual": 0.5487429451000513, "eval_groundtruth_f1_counterfactual": 0.5097369423713092, "eval_groundtruth_f1_factual": 0.5561243921701791, "eval_icace_cosine": 0.44893112778663635, "eval_icace_l2": 0.5155116319656372, "eval_icace_normdiff": 0.3455946445465088, "eval_loss": 8.187788963317871, "eval_runtime": 4.1698, "eval_samples_per_second": 934.828, "eval_steps_per_second": 7.434, "step": 1000 }, { "epoch": 6.62, "learning_rate": 3.895147313691508e-05, "loss": 5.9917, "step": 1020 }, { "epoch": 6.75, "learning_rate": 3.873483535528597e-05, "loss": 6.1073, "step": 1040 }, { "epoch": 6.82, "eval_distillation_accuracy_counterfactual": 0.607747562852745, "eval_distillation_accuracy_factual": 0.7873268342739866, "eval_distillation_f1_counterfactual": 0.5805601357737593, "eval_distillation_f1_factual": 0.7743939373190072, "eval_groundtruth_accuracy_counterfactual": 0.5407901487942535, "eval_groundtruth_f1_counterfactual": 0.5018039142178429, "eval_groundtruth_f1_factual": 0.5561166543910189, "eval_icace_cosine": 0.4540616571903229, "eval_icace_l2": 0.5179798007011414, "eval_icace_normdiff": 0.34727445244789124, "eval_loss": 8.15165901184082, "eval_runtime": 4.1114, "eval_samples_per_second": 948.103, "eval_steps_per_second": 7.54, "step": 1050 }, { "epoch": 6.88, "learning_rate": 3.851819757365685e-05, "loss": 5.8656, "step": 1060 }, { "epoch": 7.01, "learning_rate": 3.830155979202773e-05, "loss": 5.8636, "step": 1080 }, { "epoch": 7.14, "learning_rate": 3.8084922010398616e-05, "loss": 5.7869, "step": 1100 }, { "epoch": 7.14, "eval_distillation_accuracy_counterfactual": 0.6062083119548486, "eval_distillation_accuracy_factual": 0.7811698306824012, "eval_distillation_f1_counterfactual": 0.5821710152799346, "eval_distillation_f1_factual": 0.7697086841325075, "eval_groundtruth_accuracy_counterfactual": 0.5489994869163674, "eval_groundtruth_f1_counterfactual": 0.5136511907225106, "eval_groundtruth_f1_factual": 0.5620812168179984, "eval_icace_cosine": 0.4501829743385315, "eval_icace_l2": 0.5179650187492371, "eval_icace_normdiff": 0.34533482789993286, "eval_loss": 8.237076759338379, "eval_runtime": 4.8254, "eval_samples_per_second": 807.813, "eval_steps_per_second": 6.424, "step": 1100 }, { "epoch": 7.27, "learning_rate": 3.78682842287695e-05, "loss": 5.7736, "step": 1120 }, { "epoch": 7.4, "learning_rate": 3.765164644714038e-05, "loss": 5.8242, "step": 1140 }, { "epoch": 7.47, "eval_distillation_accuracy_counterfactual": 0.6080041046690611, "eval_distillation_accuracy_factual": 0.7819394561313494, "eval_distillation_f1_counterfactual": 0.5840769716039321, "eval_distillation_f1_factual": 0.772426412144742, "eval_groundtruth_accuracy_counterfactual": 0.5461775269368907, "eval_groundtruth_f1_counterfactual": 0.5101523591778137, "eval_groundtruth_f1_factual": 0.5610800146045478, "eval_icace_cosine": 0.45291048288345337, "eval_icace_l2": 0.5156511068344116, "eval_icace_normdiff": 0.3455710709095001, "eval_loss": 8.170882225036621, "eval_runtime": 4.1782, "eval_samples_per_second": 932.944, "eval_steps_per_second": 7.42, "step": 1150 }, { "epoch": 7.53, "learning_rate": 3.7435008665511264e-05, "loss": 5.8566, "step": 1160 }, { "epoch": 7.66, "learning_rate": 3.7218370883882154e-05, "loss": 5.9544, "step": 1180 }, { "epoch": 7.79, "learning_rate": 3.700173310225303e-05, "loss": 5.6634, "step": 1200 }, { "epoch": 7.79, "eval_distillation_accuracy_counterfactual": 0.603129810159056, "eval_distillation_accuracy_factual": 0.7850179579271421, "eval_distillation_f1_counterfactual": 0.5749303676931736, "eval_distillation_f1_factual": 0.7744112177462537, "eval_groundtruth_accuracy_counterfactual": 0.5464340687532068, "eval_groundtruth_f1_counterfactual": 0.5066990026042995, "eval_groundtruth_f1_factual": 0.5557605077495628, "eval_icace_cosine": 0.4530617296695709, "eval_icace_l2": 0.5165730714797974, "eval_icace_normdiff": 0.3451988697052002, "eval_loss": 8.165973663330078, "eval_runtime": 4.1529, "eval_samples_per_second": 938.624, "eval_steps_per_second": 7.465, "step": 1200 }, { "epoch": 7.92, "learning_rate": 3.678509532062392e-05, "loss": 5.77, "step": 1220 }, { "epoch": 8.05, "learning_rate": 3.65684575389948e-05, "loss": 5.7482, "step": 1240 }, { "epoch": 8.12, "eval_distillation_accuracy_counterfactual": 0.6126218573627501, "eval_distillation_accuracy_factual": 0.7909184197024115, "eval_distillation_f1_counterfactual": 0.5843182944697987, "eval_distillation_f1_factual": 0.7795092000338903, "eval_groundtruth_accuracy_counterfactual": 0.5543868650590046, "eval_groundtruth_f1_counterfactual": 0.5137168302820662, "eval_groundtruth_f1_factual": 0.5678129066363756, "eval_icace_cosine": 0.4470921456813812, "eval_icace_l2": 0.5108927488327026, "eval_icace_normdiff": 0.3416588604450226, "eval_loss": 8.150604248046875, "eval_runtime": 4.251, "eval_samples_per_second": 916.95, "eval_steps_per_second": 7.292, "step": 1250 }, { "epoch": 8.18, "learning_rate": 3.6351819757365686e-05, "loss": 5.5205, "step": 1260 }, { "epoch": 8.31, "learning_rate": 3.613518197573657e-05, "loss": 5.8927, "step": 1280 }, { "epoch": 8.44, "learning_rate": 3.591854419410746e-05, "loss": 5.5395, "step": 1300 }, { "epoch": 8.44, "eval_distillation_accuracy_counterfactual": 0.6103129810159056, "eval_distillation_accuracy_factual": 0.7809132888660851, "eval_distillation_f1_counterfactual": 0.5841396061759035, "eval_distillation_f1_factual": 0.7714136537747058, "eval_groundtruth_accuracy_counterfactual": 0.5497691123653156, "eval_groundtruth_f1_counterfactual": 0.5118760188223795, "eval_groundtruth_f1_factual": 0.5589055520339826, "eval_icace_cosine": 0.45225635170936584, "eval_icace_l2": 0.5154218077659607, "eval_icace_normdiff": 0.344652384519577, "eval_loss": 8.205676078796387, "eval_runtime": 4.091, "eval_samples_per_second": 952.826, "eval_steps_per_second": 7.578, "step": 1300 }, { "epoch": 8.57, "learning_rate": 3.5701906412478334e-05, "loss": 5.8121, "step": 1320 }, { "epoch": 8.7, "learning_rate": 3.5485268630849224e-05, "loss": 5.441, "step": 1340 }, { "epoch": 8.77, "eval_distillation_accuracy_counterfactual": 0.6087737301180093, "eval_distillation_accuracy_factual": 0.7806567470497691, "eval_distillation_f1_counterfactual": 0.5802259630201693, "eval_distillation_f1_factual": 0.769666178731678, "eval_groundtruth_accuracy_counterfactual": 0.5497691123653156, "eval_groundtruth_f1_counterfactual": 0.5083817804397743, "eval_groundtruth_f1_factual": 0.5560730633022766, "eval_icace_cosine": 0.45125773549079895, "eval_icace_l2": 0.5135722160339355, "eval_icace_normdiff": 0.342280775308609, "eval_loss": 8.19408893585205, "eval_runtime": 4.2164, "eval_samples_per_second": 924.475, "eval_steps_per_second": 7.352, "step": 1350 }, { "epoch": 8.83, "learning_rate": 3.526863084922011e-05, "loss": 5.7596, "step": 1360 }, { "epoch": 8.96, "learning_rate": 3.505199306759099e-05, "loss": 5.4167, "step": 1380 }, { "epoch": 9.09, "learning_rate": 3.483535528596187e-05, "loss": 5.4932, "step": 1400 }, { "epoch": 9.09, "eval_distillation_accuracy_counterfactual": 0.6074910210364289, "eval_distillation_accuracy_factual": 0.7865572088250384, "eval_distillation_f1_counterfactual": 0.5796303315597431, "eval_distillation_f1_factual": 0.7750390231154594, "eval_groundtruth_accuracy_counterfactual": 0.5477167778347871, "eval_groundtruth_f1_counterfactual": 0.5087150951199725, "eval_groundtruth_f1_factual": 0.5570574749661429, "eval_icace_cosine": 0.4498434364795685, "eval_icace_l2": 0.516267716884613, "eval_icace_normdiff": 0.3403330147266388, "eval_loss": 8.22058391571045, "eval_runtime": 4.1848, "eval_samples_per_second": 931.47, "eval_steps_per_second": 7.408, "step": 1400 }, { "epoch": 9.22, "learning_rate": 3.461871750433276e-05, "loss": 5.5678, "step": 1420 }, { "epoch": 9.35, "learning_rate": 3.440207972270364e-05, "loss": 5.4716, "step": 1440 }, { "epoch": 9.42, "eval_distillation_accuracy_counterfactual": 0.6108260646485377, "eval_distillation_accuracy_factual": 0.780143663417137, "eval_distillation_f1_counterfactual": 0.5842917139467018, "eval_distillation_f1_factual": 0.7709900996479784, "eval_groundtruth_accuracy_counterfactual": 0.5533606977937404, "eval_groundtruth_f1_counterfactual": 0.5127818121914747, "eval_groundtruth_f1_factual": 0.5573652255180004, "eval_icace_cosine": 0.44960257411003113, "eval_icace_l2": 0.5159686207771301, "eval_icace_normdiff": 0.3424537181854248, "eval_loss": 8.257763862609863, "eval_runtime": 4.3143, "eval_samples_per_second": 903.512, "eval_steps_per_second": 7.185, "step": 1450 }, { "epoch": 9.48, "learning_rate": 3.418544194107453e-05, "loss": 5.4555, "step": 1460 }, { "epoch": 9.61, "learning_rate": 3.396880415944541e-05, "loss": 5.3255, "step": 1480 }, { "epoch": 9.74, "learning_rate": 3.3752166377816294e-05, "loss": 5.3938, "step": 1500 }, { "epoch": 9.74, "eval_distillation_accuracy_counterfactual": 0.6090302719343252, "eval_distillation_accuracy_factual": 0.7811698306824012, "eval_distillation_f1_counterfactual": 0.5842265693397752, "eval_distillation_f1_factual": 0.7722150004829093, "eval_groundtruth_accuracy_counterfactual": 0.5477167778347871, "eval_groundtruth_f1_counterfactual": 0.5107558580095164, "eval_groundtruth_f1_factual": 0.5685387585098263, "eval_icace_cosine": 0.4469769597053528, "eval_icace_l2": 0.5149244666099548, "eval_icace_normdiff": 0.3419099450111389, "eval_loss": 8.219366073608398, "eval_runtime": 4.196, "eval_samples_per_second": 928.989, "eval_steps_per_second": 7.388, "step": 1500 }, { "epoch": 9.87, "learning_rate": 3.353552859618718e-05, "loss": 5.5647, "step": 1520 }, { "epoch": 10.0, "learning_rate": 3.331889081455806e-05, "loss": 5.6336, "step": 1540 }, { "epoch": 10.06, "eval_distillation_accuracy_counterfactual": 0.6074910210364289, "eval_distillation_accuracy_factual": 0.7834787070292457, "eval_distillation_f1_counterfactual": 0.5825764249392762, "eval_distillation_f1_factual": 0.7747389963457406, "eval_groundtruth_accuracy_counterfactual": 0.5456644433042586, "eval_groundtruth_f1_counterfactual": 0.5091238986988211, "eval_groundtruth_f1_factual": 0.5599697616151967, "eval_icace_cosine": 0.4470330774784088, "eval_icace_l2": 0.5163958668708801, "eval_icace_normdiff": 0.3423549234867096, "eval_loss": 8.21544361114502, "eval_runtime": 4.0325, "eval_samples_per_second": 966.654, "eval_steps_per_second": 7.688, "step": 1550 }, { "epoch": 10.13, "learning_rate": 3.310225303292894e-05, "loss": 5.3259, "step": 1560 }, { "epoch": 10.26, "learning_rate": 3.2885615251299825e-05, "loss": 5.2595, "step": 1580 }, { "epoch": 10.39, "learning_rate": 3.2668977469670715e-05, "loss": 5.4352, "step": 1600 }, { "epoch": 10.39, "eval_distillation_accuracy_counterfactual": 0.6087737301180093, "eval_distillation_accuracy_factual": 0.7773217034376604, "eval_distillation_f1_counterfactual": 0.5823531075369515, "eval_distillation_f1_factual": 0.7693792326965714, "eval_groundtruth_accuracy_counterfactual": 0.5477167778347871, "eval_groundtruth_f1_counterfactual": 0.508966719485753, "eval_groundtruth_f1_factual": 0.5640053248952209, "eval_icace_cosine": 0.4500087797641754, "eval_icace_l2": 0.5169069170951843, "eval_icace_normdiff": 0.3419443964958191, "eval_loss": 8.280415534973145, "eval_runtime": 4.4038, "eval_samples_per_second": 885.139, "eval_steps_per_second": 7.039, "step": 1600 }, { "epoch": 10.52, "learning_rate": 3.245233968804159e-05, "loss": 5.5017, "step": 1620 }, { "epoch": 10.65, "learning_rate": 3.223570190641248e-05, "loss": 5.57, "step": 1640 }, { "epoch": 10.71, "eval_distillation_accuracy_counterfactual": 0.6082606464853771, "eval_distillation_accuracy_factual": 0.7857875833760903, "eval_distillation_f1_counterfactual": 0.5800740243677368, "eval_distillation_f1_factual": 0.7760634987859257, "eval_groundtruth_accuracy_counterfactual": 0.5500256541816316, "eval_groundtruth_f1_counterfactual": 0.508281184676912, "eval_groundtruth_f1_factual": 0.5590992844943354, "eval_icace_cosine": 0.44485729932785034, "eval_icace_l2": 0.5137429237365723, "eval_icace_normdiff": 0.33999693393707275, "eval_loss": 8.240898132324219, "eval_runtime": 4.1787, "eval_samples_per_second": 932.837, "eval_steps_per_second": 7.419, "step": 1650 } ], "max_steps": 4616, "num_train_epochs": 30, "total_flos": 249055307919360.0, "trial_name": null, "trial_params": null }