{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998551424432641, "eval_steps": 500, "global_step": 3105, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009657170449058426, "grad_norm": 3.7852027137880246, "learning_rate": 5e-06, "loss": 0.6254, "step": 10 }, { "epoch": 0.01931434089811685, "grad_norm": 2.0620749606772866, "learning_rate": 5e-06, "loss": 0.5439, "step": 20 }, { "epoch": 0.028971511347175277, "grad_norm": 2.085747916186358, "learning_rate": 5e-06, "loss": 0.5216, "step": 30 }, { "epoch": 0.0386286817962337, "grad_norm": 1.8268450890377532, "learning_rate": 5e-06, "loss": 0.5097, "step": 40 }, { "epoch": 0.04828585224529213, "grad_norm": 2.0581630455526123, "learning_rate": 5e-06, "loss": 0.504, "step": 50 }, { "epoch": 0.05794302269435055, "grad_norm": 1.602128099496524, "learning_rate": 5e-06, "loss": 0.5013, "step": 60 }, { "epoch": 0.06760019314340898, "grad_norm": 1.6673623755384994, "learning_rate": 5e-06, "loss": 0.4878, "step": 70 }, { "epoch": 0.0772573635924674, "grad_norm": 1.8459613232584249, "learning_rate": 5e-06, "loss": 0.4878, "step": 80 }, { "epoch": 0.08691453404152583, "grad_norm": 1.93193366344165, "learning_rate": 5e-06, "loss": 0.4879, "step": 90 }, { "epoch": 0.09657170449058426, "grad_norm": 1.5465868737214155, "learning_rate": 5e-06, "loss": 0.4808, "step": 100 }, { "epoch": 0.10622887493964268, "grad_norm": 1.6271945980777192, "learning_rate": 5e-06, "loss": 0.4782, "step": 110 }, { "epoch": 0.1158860453887011, "grad_norm": 1.8402435224859226, "learning_rate": 5e-06, "loss": 0.4767, "step": 120 }, { "epoch": 0.12554321583775954, "grad_norm": 1.7618646649255147, "learning_rate": 5e-06, "loss": 0.4745, "step": 130 }, { "epoch": 0.13520038628681796, "grad_norm": 1.5315385028605073, "learning_rate": 5e-06, "loss": 0.4733, "step": 140 }, { "epoch": 0.14485755673587639, "grad_norm": 1.6814511784859512, "learning_rate": 5e-06, "loss": 0.4733, "step": 150 }, { "epoch": 0.1545147271849348, "grad_norm": 1.5271479672490604, "learning_rate": 5e-06, "loss": 0.4716, "step": 160 }, { "epoch": 0.16417189763399323, "grad_norm": 1.6777577770303629, "learning_rate": 5e-06, "loss": 0.471, "step": 170 }, { "epoch": 0.17382906808305165, "grad_norm": 1.5955962931770318, "learning_rate": 5e-06, "loss": 0.4733, "step": 180 }, { "epoch": 0.1834862385321101, "grad_norm": 1.6328687609928156, "learning_rate": 5e-06, "loss": 0.4711, "step": 190 }, { "epoch": 0.19314340898116852, "grad_norm": 1.6077537949384313, "learning_rate": 5e-06, "loss": 0.4669, "step": 200 }, { "epoch": 0.20280057943022695, "grad_norm": 1.5250866943562713, "learning_rate": 5e-06, "loss": 0.4647, "step": 210 }, { "epoch": 0.21245774987928537, "grad_norm": 1.4889880734895962, "learning_rate": 5e-06, "loss": 0.4585, "step": 220 }, { "epoch": 0.2221149203283438, "grad_norm": 1.6089621732620885, "learning_rate": 5e-06, "loss": 0.4578, "step": 230 }, { "epoch": 0.2317720907774022, "grad_norm": 1.4899292253368934, "learning_rate": 5e-06, "loss": 0.4555, "step": 240 }, { "epoch": 0.24142926122646063, "grad_norm": 1.5257308623369976, "learning_rate": 5e-06, "loss": 0.4663, "step": 250 }, { "epoch": 0.2510864316755191, "grad_norm": 1.8459225661699417, "learning_rate": 5e-06, "loss": 0.4588, "step": 260 }, { "epoch": 0.2607436021245775, "grad_norm": 1.723748524461411, "learning_rate": 5e-06, "loss": 0.4622, "step": 270 }, { "epoch": 0.27040077257363593, "grad_norm": 1.6111594714193933, "learning_rate": 5e-06, "loss": 0.4562, "step": 280 }, { "epoch": 0.2800579430226944, "grad_norm": 1.489881509289716, "learning_rate": 5e-06, "loss": 0.4632, "step": 290 }, { "epoch": 0.28971511347175277, "grad_norm": 1.7437076586484017, "learning_rate": 5e-06, "loss": 0.4498, "step": 300 }, { "epoch": 0.2993722839208112, "grad_norm": 1.5023208696211863, "learning_rate": 5e-06, "loss": 0.4547, "step": 310 }, { "epoch": 0.3090294543698696, "grad_norm": 1.47145559032755, "learning_rate": 5e-06, "loss": 0.448, "step": 320 }, { "epoch": 0.31868662481892807, "grad_norm": 1.493111331589544, "learning_rate": 5e-06, "loss": 0.4493, "step": 330 }, { "epoch": 0.32834379526798646, "grad_norm": 1.3731787738087704, "learning_rate": 5e-06, "loss": 0.4548, "step": 340 }, { "epoch": 0.3380009657170449, "grad_norm": 1.5097895917724191, "learning_rate": 5e-06, "loss": 0.4468, "step": 350 }, { "epoch": 0.3476581361661033, "grad_norm": 1.3977565649576211, "learning_rate": 5e-06, "loss": 0.4497, "step": 360 }, { "epoch": 0.35731530661516175, "grad_norm": 1.597963004052136, "learning_rate": 5e-06, "loss": 0.446, "step": 370 }, { "epoch": 0.3669724770642202, "grad_norm": 1.3464438194432793, "learning_rate": 5e-06, "loss": 0.4462, "step": 380 }, { "epoch": 0.3766296475132786, "grad_norm": 2.13016639276551, "learning_rate": 5e-06, "loss": 0.4471, "step": 390 }, { "epoch": 0.38628681796233705, "grad_norm": 1.459345081562004, "learning_rate": 5e-06, "loss": 0.4449, "step": 400 }, { "epoch": 0.39594398841139544, "grad_norm": 1.4268521503710732, "learning_rate": 5e-06, "loss": 0.4484, "step": 410 }, { "epoch": 0.4056011588604539, "grad_norm": 1.5213089527197126, "learning_rate": 5e-06, "loss": 0.442, "step": 420 }, { "epoch": 0.4152583293095123, "grad_norm": 1.4107454080123742, "learning_rate": 5e-06, "loss": 0.4471, "step": 430 }, { "epoch": 0.42491549975857074, "grad_norm": 1.4224146665144493, "learning_rate": 5e-06, "loss": 0.4421, "step": 440 }, { "epoch": 0.4345726702076292, "grad_norm": 1.339648419205393, "learning_rate": 5e-06, "loss": 0.4459, "step": 450 }, { "epoch": 0.4442298406566876, "grad_norm": 1.541975682641463, "learning_rate": 5e-06, "loss": 0.4416, "step": 460 }, { "epoch": 0.45388701110574603, "grad_norm": 1.447132284857592, "learning_rate": 5e-06, "loss": 0.4529, "step": 470 }, { "epoch": 0.4635441815548044, "grad_norm": 1.385982642001702, "learning_rate": 5e-06, "loss": 0.4458, "step": 480 }, { "epoch": 0.4732013520038629, "grad_norm": 2.0640182165227428, "learning_rate": 5e-06, "loss": 0.4426, "step": 490 }, { "epoch": 0.48285852245292127, "grad_norm": 1.348125498390211, "learning_rate": 5e-06, "loss": 0.4329, "step": 500 }, { "epoch": 0.4925156929019797, "grad_norm": 1.509261123647944, "learning_rate": 5e-06, "loss": 0.4437, "step": 510 }, { "epoch": 0.5021728633510382, "grad_norm": 2.7536862793707493, "learning_rate": 5e-06, "loss": 0.4359, "step": 520 }, { "epoch": 0.5118300338000966, "grad_norm": 1.4717359737134839, "learning_rate": 5e-06, "loss": 0.4382, "step": 530 }, { "epoch": 0.521487204249155, "grad_norm": 1.442405927212471, "learning_rate": 5e-06, "loss": 0.4394, "step": 540 }, { "epoch": 0.5311443746982134, "grad_norm": 1.3948076832604182, "learning_rate": 5e-06, "loss": 0.4407, "step": 550 }, { "epoch": 0.5408015451472719, "grad_norm": 1.3948240431568104, "learning_rate": 5e-06, "loss": 0.4366, "step": 560 }, { "epoch": 0.5504587155963303, "grad_norm": 1.417839986279595, "learning_rate": 5e-06, "loss": 0.4369, "step": 570 }, { "epoch": 0.5601158860453888, "grad_norm": 1.5109506580715066, "learning_rate": 5e-06, "loss": 0.4353, "step": 580 }, { "epoch": 0.5697730564944471, "grad_norm": 1.4806935542579658, "learning_rate": 5e-06, "loss": 0.4378, "step": 590 }, { "epoch": 0.5794302269435055, "grad_norm": 1.5398960352117772, "learning_rate": 5e-06, "loss": 0.4352, "step": 600 }, { "epoch": 0.589087397392564, "grad_norm": 1.2925767785772035, "learning_rate": 5e-06, "loss": 0.4366, "step": 610 }, { "epoch": 0.5987445678416224, "grad_norm": 1.5201357387636154, "learning_rate": 5e-06, "loss": 0.4296, "step": 620 }, { "epoch": 0.6084017382906808, "grad_norm": 1.4717871703145071, "learning_rate": 5e-06, "loss": 0.431, "step": 630 }, { "epoch": 0.6180589087397392, "grad_norm": 1.3687678242722554, "learning_rate": 5e-06, "loss": 0.4345, "step": 640 }, { "epoch": 0.6277160791887977, "grad_norm": 1.3591593745283608, "learning_rate": 5e-06, "loss": 0.4313, "step": 650 }, { "epoch": 0.6373732496378561, "grad_norm": 1.4717658524480044, "learning_rate": 5e-06, "loss": 0.4291, "step": 660 }, { "epoch": 0.6470304200869146, "grad_norm": 1.398916059634764, "learning_rate": 5e-06, "loss": 0.4323, "step": 670 }, { "epoch": 0.6566875905359729, "grad_norm": 1.4300679905819138, "learning_rate": 5e-06, "loss": 0.4271, "step": 680 }, { "epoch": 0.6663447609850314, "grad_norm": 1.4886395606416152, "learning_rate": 5e-06, "loss": 0.4297, "step": 690 }, { "epoch": 0.6760019314340898, "grad_norm": 1.5034980432589369, "learning_rate": 5e-06, "loss": 0.4272, "step": 700 }, { "epoch": 0.6856591018831483, "grad_norm": 1.5962520697846152, "learning_rate": 5e-06, "loss": 0.4316, "step": 710 }, { "epoch": 0.6953162723322066, "grad_norm": 1.5447138904473863, "learning_rate": 5e-06, "loss": 0.4324, "step": 720 }, { "epoch": 0.7049734427812651, "grad_norm": 1.3937453432394886, "learning_rate": 5e-06, "loss": 0.4311, "step": 730 }, { "epoch": 0.7146306132303235, "grad_norm": 1.3011464123427467, "learning_rate": 5e-06, "loss": 0.4292, "step": 740 }, { "epoch": 0.724287783679382, "grad_norm": 1.4931973618415784, "learning_rate": 5e-06, "loss": 0.4295, "step": 750 }, { "epoch": 0.7339449541284404, "grad_norm": 1.3582576932610695, "learning_rate": 5e-06, "loss": 0.4322, "step": 760 }, { "epoch": 0.7436021245774987, "grad_norm": 1.5128036261098081, "learning_rate": 5e-06, "loss": 0.4306, "step": 770 }, { "epoch": 0.7532592950265572, "grad_norm": 1.3047038324570048, "learning_rate": 5e-06, "loss": 0.43, "step": 780 }, { "epoch": 0.7629164654756156, "grad_norm": 1.3637121209115217, "learning_rate": 5e-06, "loss": 0.4273, "step": 790 }, { "epoch": 0.7725736359246741, "grad_norm": 1.3919305669020587, "learning_rate": 5e-06, "loss": 0.4314, "step": 800 }, { "epoch": 0.7822308063737325, "grad_norm": 1.3941190921525122, "learning_rate": 5e-06, "loss": 0.4255, "step": 810 }, { "epoch": 0.7918879768227909, "grad_norm": 1.362917573056491, "learning_rate": 5e-06, "loss": 0.4333, "step": 820 }, { "epoch": 0.8015451472718493, "grad_norm": 1.415609597869972, "learning_rate": 5e-06, "loss": 0.4225, "step": 830 }, { "epoch": 0.8112023177209078, "grad_norm": 1.489818925031249, "learning_rate": 5e-06, "loss": 0.425, "step": 840 }, { "epoch": 0.8208594881699662, "grad_norm": 1.4658100940320271, "learning_rate": 5e-06, "loss": 0.4304, "step": 850 }, { "epoch": 0.8305166586190246, "grad_norm": 1.3385021753495208, "learning_rate": 5e-06, "loss": 0.4263, "step": 860 }, { "epoch": 0.840173829068083, "grad_norm": 1.4519295448528378, "learning_rate": 5e-06, "loss": 0.4274, "step": 870 }, { "epoch": 0.8498309995171415, "grad_norm": 1.5271445690200918, "learning_rate": 5e-06, "loss": 0.4244, "step": 880 }, { "epoch": 0.8594881699661999, "grad_norm": 9.54446637099851, "learning_rate": 5e-06, "loss": 0.4294, "step": 890 }, { "epoch": 0.8691453404152584, "grad_norm": 1.4822497861334036, "learning_rate": 5e-06, "loss": 0.4206, "step": 900 }, { "epoch": 0.8788025108643167, "grad_norm": 1.295865945244785, "learning_rate": 5e-06, "loss": 0.4223, "step": 910 }, { "epoch": 0.8884596813133752, "grad_norm": 1.4498222196547434, "learning_rate": 5e-06, "loss": 0.4222, "step": 920 }, { "epoch": 0.8981168517624336, "grad_norm": 1.4109015353277008, "learning_rate": 5e-06, "loss": 0.424, "step": 930 }, { "epoch": 0.9077740222114921, "grad_norm": 1.3782840436535198, "learning_rate": 5e-06, "loss": 0.4224, "step": 940 }, { "epoch": 0.9174311926605505, "grad_norm": 1.3492925117698618, "learning_rate": 5e-06, "loss": 0.4209, "step": 950 }, { "epoch": 0.9270883631096088, "grad_norm": 1.3072263370484556, "learning_rate": 5e-06, "loss": 0.419, "step": 960 }, { "epoch": 0.9367455335586673, "grad_norm": 1.3359087020152296, "learning_rate": 5e-06, "loss": 0.4213, "step": 970 }, { "epoch": 0.9464027040077257, "grad_norm": 1.4027836366902715, "learning_rate": 5e-06, "loss": 0.4264, "step": 980 }, { "epoch": 0.9560598744567842, "grad_norm": 1.3083915796128625, "learning_rate": 5e-06, "loss": 0.4213, "step": 990 }, { "epoch": 0.9657170449058425, "grad_norm": 1.283807856099459, "learning_rate": 5e-06, "loss": 0.4226, "step": 1000 }, { "epoch": 0.975374215354901, "grad_norm": 1.318305259783526, "learning_rate": 5e-06, "loss": 0.4177, "step": 1010 }, { "epoch": 0.9850313858039594, "grad_norm": 1.351081315674525, "learning_rate": 5e-06, "loss": 0.4184, "step": 1020 }, { "epoch": 0.9946885562530179, "grad_norm": 1.286367123540384, "learning_rate": 5e-06, "loss": 0.417, "step": 1030 }, { "epoch": 0.9995171414775471, "eval_loss": 0.4186817705631256, "eval_runtime": 178.2472, "eval_samples_per_second": 156.541, "eval_steps_per_second": 0.612, "step": 1035 }, { "epoch": 1.0043457267020763, "grad_norm": 2.083458671261006, "learning_rate": 5e-06, "loss": 0.3804, "step": 1040 }, { "epoch": 1.0140028971511348, "grad_norm": 1.6710464918103058, "learning_rate": 5e-06, "loss": 0.3159, "step": 1050 }, { "epoch": 1.0236600676001932, "grad_norm": 1.365743230792304, "learning_rate": 5e-06, "loss": 0.3138, "step": 1060 }, { "epoch": 1.0333172380492515, "grad_norm": 1.39749368883128, "learning_rate": 5e-06, "loss": 0.3146, "step": 1070 }, { "epoch": 1.04297440849831, "grad_norm": 1.5821963324713015, "learning_rate": 5e-06, "loss": 0.3101, "step": 1080 }, { "epoch": 1.0526315789473684, "grad_norm": 1.6454840741114403, "learning_rate": 5e-06, "loss": 0.3152, "step": 1090 }, { "epoch": 1.0622887493964268, "grad_norm": 1.4356941510220327, "learning_rate": 5e-06, "loss": 0.3211, "step": 1100 }, { "epoch": 1.0719459198454853, "grad_norm": 1.421015975147645, "learning_rate": 5e-06, "loss": 0.3185, "step": 1110 }, { "epoch": 1.0816030902945437, "grad_norm": 1.3637757751998314, "learning_rate": 5e-06, "loss": 0.3177, "step": 1120 }, { "epoch": 1.0912602607436022, "grad_norm": 1.4492110772881415, "learning_rate": 5e-06, "loss": 0.319, "step": 1130 }, { "epoch": 1.1009174311926606, "grad_norm": 1.4113019794162502, "learning_rate": 5e-06, "loss": 0.3183, "step": 1140 }, { "epoch": 1.110574601641719, "grad_norm": 1.4617901618486682, "learning_rate": 5e-06, "loss": 0.3206, "step": 1150 }, { "epoch": 1.1202317720907775, "grad_norm": 1.4362393573916201, "learning_rate": 5e-06, "loss": 0.3161, "step": 1160 }, { "epoch": 1.1298889425398357, "grad_norm": 1.480531545050256, "learning_rate": 5e-06, "loss": 0.3176, "step": 1170 }, { "epoch": 1.1395461129888942, "grad_norm": 1.4429791008774708, "learning_rate": 5e-06, "loss": 0.3206, "step": 1180 }, { "epoch": 1.1492032834379526, "grad_norm": 1.580292399548444, "learning_rate": 5e-06, "loss": 0.3189, "step": 1190 }, { "epoch": 1.158860453887011, "grad_norm": 1.4710472342154686, "learning_rate": 5e-06, "loss": 0.3176, "step": 1200 }, { "epoch": 1.1685176243360695, "grad_norm": 1.5510111053127804, "learning_rate": 5e-06, "loss": 0.3256, "step": 1210 }, { "epoch": 1.178174794785128, "grad_norm": 1.479606545432114, "learning_rate": 5e-06, "loss": 0.3194, "step": 1220 }, { "epoch": 1.1878319652341864, "grad_norm": 1.4470509715427249, "learning_rate": 5e-06, "loss": 0.315, "step": 1230 }, { "epoch": 1.1974891356832449, "grad_norm": 1.688180637289201, "learning_rate": 5e-06, "loss": 0.3203, "step": 1240 }, { "epoch": 1.2071463061323033, "grad_norm": 1.4261984744846448, "learning_rate": 5e-06, "loss": 0.3182, "step": 1250 }, { "epoch": 1.2168034765813616, "grad_norm": 1.5604626368447907, "learning_rate": 5e-06, "loss": 0.3204, "step": 1260 }, { "epoch": 1.22646064703042, "grad_norm": 1.5055974462561796, "learning_rate": 5e-06, "loss": 0.3194, "step": 1270 }, { "epoch": 1.2361178174794785, "grad_norm": 1.472413847791833, "learning_rate": 5e-06, "loss": 0.3164, "step": 1280 }, { "epoch": 1.245774987928537, "grad_norm": 1.4227053664977902, "learning_rate": 5e-06, "loss": 0.3195, "step": 1290 }, { "epoch": 1.2554321583775954, "grad_norm": 1.4555119482883845, "learning_rate": 5e-06, "loss": 0.3206, "step": 1300 }, { "epoch": 1.2650893288266538, "grad_norm": 1.3939863833537502, "learning_rate": 5e-06, "loss": 0.3174, "step": 1310 }, { "epoch": 1.2747464992757123, "grad_norm": 1.6503658733951097, "learning_rate": 5e-06, "loss": 0.3212, "step": 1320 }, { "epoch": 1.2844036697247707, "grad_norm": 1.5193288982909279, "learning_rate": 5e-06, "loss": 0.3185, "step": 1330 }, { "epoch": 1.2940608401738292, "grad_norm": 1.8992907121697116, "learning_rate": 5e-06, "loss": 0.3195, "step": 1340 }, { "epoch": 1.3037180106228874, "grad_norm": 1.462324131369204, "learning_rate": 5e-06, "loss": 0.3181, "step": 1350 }, { "epoch": 1.3133751810719458, "grad_norm": 1.5578779143588841, "learning_rate": 5e-06, "loss": 0.3203, "step": 1360 }, { "epoch": 1.3230323515210043, "grad_norm": 1.5253185582586557, "learning_rate": 5e-06, "loss": 0.3226, "step": 1370 }, { "epoch": 1.3326895219700627, "grad_norm": 1.598448718585017, "learning_rate": 5e-06, "loss": 0.3237, "step": 1380 }, { "epoch": 1.3423466924191212, "grad_norm": 1.4405857624240854, "learning_rate": 5e-06, "loss": 0.3223, "step": 1390 }, { "epoch": 1.3520038628681796, "grad_norm": 1.517684638613212, "learning_rate": 5e-06, "loss": 0.3231, "step": 1400 }, { "epoch": 1.361661033317238, "grad_norm": 1.483051649435277, "learning_rate": 5e-06, "loss": 0.3239, "step": 1410 }, { "epoch": 1.3713182037662965, "grad_norm": 1.5390732079855134, "learning_rate": 5e-06, "loss": 0.3218, "step": 1420 }, { "epoch": 1.380975374215355, "grad_norm": 1.5122394104164938, "learning_rate": 5e-06, "loss": 0.322, "step": 1430 }, { "epoch": 1.3906325446644132, "grad_norm": 1.5432820441426014, "learning_rate": 5e-06, "loss": 0.3145, "step": 1440 }, { "epoch": 1.4002897151134719, "grad_norm": 1.4643744673012662, "learning_rate": 5e-06, "loss": 0.3178, "step": 1450 }, { "epoch": 1.4099468855625301, "grad_norm": 1.4925264181574245, "learning_rate": 5e-06, "loss": 0.3243, "step": 1460 }, { "epoch": 1.4196040560115886, "grad_norm": 1.4092376086387746, "learning_rate": 5e-06, "loss": 0.3211, "step": 1470 }, { "epoch": 1.429261226460647, "grad_norm": 1.4232165437715756, "learning_rate": 5e-06, "loss": 0.3221, "step": 1480 }, { "epoch": 1.4389183969097055, "grad_norm": 1.4049090252857714, "learning_rate": 5e-06, "loss": 0.3259, "step": 1490 }, { "epoch": 1.448575567358764, "grad_norm": 1.5709064686147567, "learning_rate": 5e-06, "loss": 0.3192, "step": 1500 }, { "epoch": 1.4582327378078224, "grad_norm": 1.530143106655807, "learning_rate": 5e-06, "loss": 0.3198, "step": 1510 }, { "epoch": 1.4678899082568808, "grad_norm": 1.4642072322356405, "learning_rate": 5e-06, "loss": 0.3242, "step": 1520 }, { "epoch": 1.477547078705939, "grad_norm": 1.5078287858320247, "learning_rate": 5e-06, "loss": 0.3229, "step": 1530 }, { "epoch": 1.4872042491549977, "grad_norm": 1.6543135999245175, "learning_rate": 5e-06, "loss": 0.3258, "step": 1540 }, { "epoch": 1.496861419604056, "grad_norm": 1.4353311149318353, "learning_rate": 5e-06, "loss": 0.3218, "step": 1550 }, { "epoch": 1.5065185900531144, "grad_norm": 1.427362675163451, "learning_rate": 5e-06, "loss": 0.3203, "step": 1560 }, { "epoch": 1.5161757605021728, "grad_norm": 1.469685263972175, "learning_rate": 5e-06, "loss": 0.3233, "step": 1570 }, { "epoch": 1.5258329309512313, "grad_norm": 1.3556540711826985, "learning_rate": 5e-06, "loss": 0.32, "step": 1580 }, { "epoch": 1.5354901014002897, "grad_norm": 1.7785146026384562, "learning_rate": 5e-06, "loss": 0.3255, "step": 1590 }, { "epoch": 1.5451472718493482, "grad_norm": 1.6406483748665126, "learning_rate": 5e-06, "loss": 0.325, "step": 1600 }, { "epoch": 1.5548044422984066, "grad_norm": 1.487355254108548, "learning_rate": 5e-06, "loss": 0.3195, "step": 1610 }, { "epoch": 1.5644616127474649, "grad_norm": 1.5580699551469968, "learning_rate": 5e-06, "loss": 0.324, "step": 1620 }, { "epoch": 1.5741187831965235, "grad_norm": 1.6649624650081492, "learning_rate": 5e-06, "loss": 0.319, "step": 1630 }, { "epoch": 1.5837759536455818, "grad_norm": 1.5071762619332458, "learning_rate": 5e-06, "loss": 0.319, "step": 1640 }, { "epoch": 1.5934331240946402, "grad_norm": 1.4857917379863423, "learning_rate": 5e-06, "loss": 0.3246, "step": 1650 }, { "epoch": 1.6030902945436987, "grad_norm": 1.539444045216992, "learning_rate": 5e-06, "loss": 0.325, "step": 1660 }, { "epoch": 1.6127474649927571, "grad_norm": 1.4202629921913885, "learning_rate": 5e-06, "loss": 0.3249, "step": 1670 }, { "epoch": 1.6224046354418156, "grad_norm": 1.3824989312845557, "learning_rate": 5e-06, "loss": 0.3174, "step": 1680 }, { "epoch": 1.632061805890874, "grad_norm": 1.4180567800715427, "learning_rate": 5e-06, "loss": 0.32, "step": 1690 }, { "epoch": 1.6417189763399325, "grad_norm": 1.5877926258862851, "learning_rate": 5e-06, "loss": 0.3251, "step": 1700 }, { "epoch": 1.6513761467889907, "grad_norm": 1.4625682063733663, "learning_rate": 5e-06, "loss": 0.3251, "step": 1710 }, { "epoch": 1.6610333172380494, "grad_norm": 1.4726154892277907, "learning_rate": 5e-06, "loss": 0.3217, "step": 1720 }, { "epoch": 1.6706904876871076, "grad_norm": 1.4737546342638634, "learning_rate": 5e-06, "loss": 0.3241, "step": 1730 }, { "epoch": 1.6803476581361663, "grad_norm": 1.5913560262828996, "learning_rate": 5e-06, "loss": 0.3216, "step": 1740 }, { "epoch": 1.6900048285852245, "grad_norm": 1.6014273903495235, "learning_rate": 5e-06, "loss": 0.3228, "step": 1750 }, { "epoch": 1.699661999034283, "grad_norm": 1.4444614753017166, "learning_rate": 5e-06, "loss": 0.3219, "step": 1760 }, { "epoch": 1.7093191694833414, "grad_norm": 1.4711587303385112, "learning_rate": 5e-06, "loss": 0.3204, "step": 1770 }, { "epoch": 1.7189763399323998, "grad_norm": 1.4911136718247058, "learning_rate": 5e-06, "loss": 0.3225, "step": 1780 }, { "epoch": 1.7286335103814583, "grad_norm": 1.4836367449856465, "learning_rate": 5e-06, "loss": 0.3275, "step": 1790 }, { "epoch": 1.7382906808305165, "grad_norm": 1.5104260942145018, "learning_rate": 5e-06, "loss": 0.3261, "step": 1800 }, { "epoch": 1.7479478512795752, "grad_norm": 1.4587337741103181, "learning_rate": 5e-06, "loss": 0.3247, "step": 1810 }, { "epoch": 1.7576050217286334, "grad_norm": 1.5339818170022481, "learning_rate": 5e-06, "loss": 0.3237, "step": 1820 }, { "epoch": 1.767262192177692, "grad_norm": 1.4650422841731612, "learning_rate": 5e-06, "loss": 0.3218, "step": 1830 }, { "epoch": 1.7769193626267503, "grad_norm": 1.6273768535871191, "learning_rate": 5e-06, "loss": 0.3239, "step": 1840 }, { "epoch": 1.7865765330758088, "grad_norm": 1.6280662513787525, "learning_rate": 5e-06, "loss": 0.3267, "step": 1850 }, { "epoch": 1.7962337035248672, "grad_norm": 1.469702386246834, "learning_rate": 5e-06, "loss": 0.322, "step": 1860 }, { "epoch": 1.8058908739739257, "grad_norm": 1.6861190386571836, "learning_rate": 5e-06, "loss": 0.3246, "step": 1870 }, { "epoch": 1.8155480444229841, "grad_norm": 1.481497242440989, "learning_rate": 5e-06, "loss": 0.3226, "step": 1880 }, { "epoch": 1.8252052148720423, "grad_norm": 1.6155053021371237, "learning_rate": 5e-06, "loss": 0.3236, "step": 1890 }, { "epoch": 1.834862385321101, "grad_norm": 1.416112013187697, "learning_rate": 5e-06, "loss": 0.3228, "step": 1900 }, { "epoch": 1.8445195557701592, "grad_norm": 1.581676674672017, "learning_rate": 5e-06, "loss": 0.3241, "step": 1910 }, { "epoch": 1.854176726219218, "grad_norm": 1.5569951920933318, "learning_rate": 5e-06, "loss": 0.3259, "step": 1920 }, { "epoch": 1.8638338966682761, "grad_norm": 1.6829953573077656, "learning_rate": 5e-06, "loss": 0.3239, "step": 1930 }, { "epoch": 1.8734910671173346, "grad_norm": 1.4499124168214599, "learning_rate": 5e-06, "loss": 0.3237, "step": 1940 }, { "epoch": 1.883148237566393, "grad_norm": 1.3921897240084418, "learning_rate": 5e-06, "loss": 0.3202, "step": 1950 }, { "epoch": 1.8928054080154515, "grad_norm": 1.7399387456220219, "learning_rate": 5e-06, "loss": 0.3185, "step": 1960 }, { "epoch": 1.90246257846451, "grad_norm": 1.5558707224477395, "learning_rate": 5e-06, "loss": 0.323, "step": 1970 }, { "epoch": 1.9121197489135682, "grad_norm": 1.49802902853701, "learning_rate": 5e-06, "loss": 0.3234, "step": 1980 }, { "epoch": 1.9217769193626268, "grad_norm": 1.4807948737028658, "learning_rate": 5e-06, "loss": 0.322, "step": 1990 }, { "epoch": 1.931434089811685, "grad_norm": 1.3868348286955445, "learning_rate": 5e-06, "loss": 0.3247, "step": 2000 }, { "epoch": 1.9410912602607437, "grad_norm": 1.6163693821676344, "learning_rate": 5e-06, "loss": 0.3248, "step": 2010 }, { "epoch": 1.950748430709802, "grad_norm": 1.552439354775519, "learning_rate": 5e-06, "loss": 0.3233, "step": 2020 }, { "epoch": 1.9604056011588604, "grad_norm": 1.5224507289579325, "learning_rate": 5e-06, "loss": 0.3311, "step": 2030 }, { "epoch": 1.9700627716079189, "grad_norm": 1.3715156278923533, "learning_rate": 5e-06, "loss": 0.3224, "step": 2040 }, { "epoch": 1.9797199420569773, "grad_norm": 1.4690837804253396, "learning_rate": 5e-06, "loss": 0.3245, "step": 2050 }, { "epoch": 1.9893771125060358, "grad_norm": 1.5304270102797093, "learning_rate": 5e-06, "loss": 0.3283, "step": 2060 }, { "epoch": 1.999034282955094, "grad_norm": 1.5064039907716613, "learning_rate": 5e-06, "loss": 0.326, "step": 2070 }, { "epoch": 2.0, "eval_loss": 0.41363242268562317, "eval_runtime": 183.9864, "eval_samples_per_second": 151.658, "eval_steps_per_second": 0.592, "step": 2071 }, { "epoch": 2.0086914534041527, "grad_norm": 1.9529357489293813, "learning_rate": 5e-06, "loss": 0.2222, "step": 2080 }, { "epoch": 2.018348623853211, "grad_norm": 1.755518669301023, "learning_rate": 5e-06, "loss": 0.2019, "step": 2090 }, { "epoch": 2.0280057943022696, "grad_norm": 1.740635003243075, "learning_rate": 5e-06, "loss": 0.203, "step": 2100 }, { "epoch": 2.037662964751328, "grad_norm": 1.8237819578036996, "learning_rate": 5e-06, "loss": 0.1982, "step": 2110 }, { "epoch": 2.0473201352003865, "grad_norm": 1.5602633442114324, "learning_rate": 5e-06, "loss": 0.1992, "step": 2120 }, { "epoch": 2.0569773056494447, "grad_norm": 1.816471476491465, "learning_rate": 5e-06, "loss": 0.2049, "step": 2130 }, { "epoch": 2.066634476098503, "grad_norm": 1.7673442658203458, "learning_rate": 5e-06, "loss": 0.1993, "step": 2140 }, { "epoch": 2.0762916465475616, "grad_norm": 1.6973737687900528, "learning_rate": 5e-06, "loss": 0.1998, "step": 2150 }, { "epoch": 2.08594881699662, "grad_norm": 1.5869449008271677, "learning_rate": 5e-06, "loss": 0.202, "step": 2160 }, { "epoch": 2.0956059874456785, "grad_norm": 1.64683922032891, "learning_rate": 5e-06, "loss": 0.2002, "step": 2170 }, { "epoch": 2.1052631578947367, "grad_norm": 1.9386924431067427, "learning_rate": 5e-06, "loss": 0.2076, "step": 2180 }, { "epoch": 2.1149203283437954, "grad_norm": 1.6210647566768155, "learning_rate": 5e-06, "loss": 0.2028, "step": 2190 }, { "epoch": 2.1245774987928536, "grad_norm": 1.6287574889203644, "learning_rate": 5e-06, "loss": 0.2042, "step": 2200 }, { "epoch": 2.1342346692419123, "grad_norm": 1.655477861818872, "learning_rate": 5e-06, "loss": 0.2013, "step": 2210 }, { "epoch": 2.1438918396909705, "grad_norm": 1.5729371185522592, "learning_rate": 5e-06, "loss": 0.2015, "step": 2220 }, { "epoch": 2.153549010140029, "grad_norm": 1.6476425123948135, "learning_rate": 5e-06, "loss": 0.2012, "step": 2230 }, { "epoch": 2.1632061805890874, "grad_norm": 1.8001638906124682, "learning_rate": 5e-06, "loss": 0.2008, "step": 2240 }, { "epoch": 2.1728633510381457, "grad_norm": 1.7044240854710664, "learning_rate": 5e-06, "loss": 0.2054, "step": 2250 }, { "epoch": 2.1825205214872043, "grad_norm": 1.7542655066679125, "learning_rate": 5e-06, "loss": 0.2034, "step": 2260 }, { "epoch": 2.1921776919362626, "grad_norm": 1.6322702947383616, "learning_rate": 5e-06, "loss": 0.2057, "step": 2270 }, { "epoch": 2.2018348623853212, "grad_norm": 1.6677542060673687, "learning_rate": 5e-06, "loss": 0.207, "step": 2280 }, { "epoch": 2.2114920328343795, "grad_norm": 1.7199005988447569, "learning_rate": 5e-06, "loss": 0.2078, "step": 2290 }, { "epoch": 2.221149203283438, "grad_norm": 1.7417097237727022, "learning_rate": 5e-06, "loss": 0.2056, "step": 2300 }, { "epoch": 2.2308063737324964, "grad_norm": 1.7881569830854456, "learning_rate": 5e-06, "loss": 0.2018, "step": 2310 }, { "epoch": 2.240463544181555, "grad_norm": 1.778258239931456, "learning_rate": 5e-06, "loss": 0.2085, "step": 2320 }, { "epoch": 2.2501207146306133, "grad_norm": 1.7066617550978782, "learning_rate": 5e-06, "loss": 0.2081, "step": 2330 }, { "epoch": 2.2597778850796715, "grad_norm": 1.6359235244009738, "learning_rate": 5e-06, "loss": 0.2068, "step": 2340 }, { "epoch": 2.26943505552873, "grad_norm": 1.9690603812490861, "learning_rate": 5e-06, "loss": 0.2093, "step": 2350 }, { "epoch": 2.2790922259777884, "grad_norm": 1.8155935869005713, "learning_rate": 5e-06, "loss": 0.2086, "step": 2360 }, { "epoch": 2.288749396426847, "grad_norm": 1.7505898538326212, "learning_rate": 5e-06, "loss": 0.2082, "step": 2370 }, { "epoch": 2.2984065668759053, "grad_norm": 1.687851682529015, "learning_rate": 5e-06, "loss": 0.2094, "step": 2380 }, { "epoch": 2.308063737324964, "grad_norm": 2.0069895205569535, "learning_rate": 5e-06, "loss": 0.2114, "step": 2390 }, { "epoch": 2.317720907774022, "grad_norm": 1.8064841179245696, "learning_rate": 5e-06, "loss": 0.2059, "step": 2400 }, { "epoch": 2.327378078223081, "grad_norm": 1.7513896709251378, "learning_rate": 5e-06, "loss": 0.2073, "step": 2410 }, { "epoch": 2.337035248672139, "grad_norm": 1.7915692908106093, "learning_rate": 5e-06, "loss": 0.2097, "step": 2420 }, { "epoch": 2.3466924191211973, "grad_norm": 1.6328915879521833, "learning_rate": 5e-06, "loss": 0.2126, "step": 2430 }, { "epoch": 2.356349589570256, "grad_norm": 1.6676621870989963, "learning_rate": 5e-06, "loss": 0.2082, "step": 2440 }, { "epoch": 2.366006760019314, "grad_norm": 1.71621065929775, "learning_rate": 5e-06, "loss": 0.2079, "step": 2450 }, { "epoch": 2.375663930468373, "grad_norm": 1.6107515870333837, "learning_rate": 5e-06, "loss": 0.213, "step": 2460 }, { "epoch": 2.385321100917431, "grad_norm": 1.723592646527568, "learning_rate": 5e-06, "loss": 0.2112, "step": 2470 }, { "epoch": 2.3949782713664898, "grad_norm": 1.810751615032428, "learning_rate": 5e-06, "loss": 0.214, "step": 2480 }, { "epoch": 2.404635441815548, "grad_norm": 1.8583457997365094, "learning_rate": 5e-06, "loss": 0.2133, "step": 2490 }, { "epoch": 2.4142926122646067, "grad_norm": 1.8098882466647033, "learning_rate": 5e-06, "loss": 0.2118, "step": 2500 }, { "epoch": 2.423949782713665, "grad_norm": 1.7310394353267804, "learning_rate": 5e-06, "loss": 0.2091, "step": 2510 }, { "epoch": 2.433606953162723, "grad_norm": 1.9482929822769175, "learning_rate": 5e-06, "loss": 0.2103, "step": 2520 }, { "epoch": 2.443264123611782, "grad_norm": 1.8075435579847086, "learning_rate": 5e-06, "loss": 0.2111, "step": 2530 }, { "epoch": 2.45292129406084, "grad_norm": 1.6649997057492705, "learning_rate": 5e-06, "loss": 0.2119, "step": 2540 }, { "epoch": 2.4625784645098987, "grad_norm": 1.595570416233996, "learning_rate": 5e-06, "loss": 0.209, "step": 2550 }, { "epoch": 2.472235634958957, "grad_norm": 1.707621313978413, "learning_rate": 5e-06, "loss": 0.2124, "step": 2560 }, { "epoch": 2.4818928054080156, "grad_norm": 1.7540598817249424, "learning_rate": 5e-06, "loss": 0.2134, "step": 2570 }, { "epoch": 2.491549975857074, "grad_norm": 1.7644906677930094, "learning_rate": 5e-06, "loss": 0.212, "step": 2580 }, { "epoch": 2.5012071463061325, "grad_norm": 1.7556689404760746, "learning_rate": 5e-06, "loss": 0.2141, "step": 2590 }, { "epoch": 2.5108643167551907, "grad_norm": 1.7473267171283928, "learning_rate": 5e-06, "loss": 0.2124, "step": 2600 }, { "epoch": 2.520521487204249, "grad_norm": 1.9136137702521907, "learning_rate": 5e-06, "loss": 0.2127, "step": 2610 }, { "epoch": 2.5301786576533076, "grad_norm": 1.7481532118907182, "learning_rate": 5e-06, "loss": 0.2131, "step": 2620 }, { "epoch": 2.539835828102366, "grad_norm": 1.832460065168467, "learning_rate": 5e-06, "loss": 0.2153, "step": 2630 }, { "epoch": 2.5494929985514245, "grad_norm": 1.6966236245813655, "learning_rate": 5e-06, "loss": 0.2131, "step": 2640 }, { "epoch": 2.5591501690004828, "grad_norm": 1.7181221486120444, "learning_rate": 5e-06, "loss": 0.213, "step": 2650 }, { "epoch": 2.5688073394495414, "grad_norm": 1.6118010065233517, "learning_rate": 5e-06, "loss": 0.2148, "step": 2660 }, { "epoch": 2.5784645098985997, "grad_norm": 1.7015205827064956, "learning_rate": 5e-06, "loss": 0.219, "step": 2670 }, { "epoch": 2.5881216803476583, "grad_norm": 1.688200340928073, "learning_rate": 5e-06, "loss": 0.2145, "step": 2680 }, { "epoch": 2.5977788507967166, "grad_norm": 1.628479976082271, "learning_rate": 5e-06, "loss": 0.2173, "step": 2690 }, { "epoch": 2.607436021245775, "grad_norm": 1.6284654190992232, "learning_rate": 5e-06, "loss": 0.2166, "step": 2700 }, { "epoch": 2.6170931916948335, "grad_norm": 1.7027701458933544, "learning_rate": 5e-06, "loss": 0.2154, "step": 2710 }, { "epoch": 2.6267503621438917, "grad_norm": 1.6155718880952934, "learning_rate": 5e-06, "loss": 0.2154, "step": 2720 }, { "epoch": 2.6364075325929504, "grad_norm": 1.6351349351929865, "learning_rate": 5e-06, "loss": 0.2161, "step": 2730 }, { "epoch": 2.6460647030420086, "grad_norm": 1.7361275613850788, "learning_rate": 5e-06, "loss": 0.2183, "step": 2740 }, { "epoch": 2.6557218734910673, "grad_norm": 1.9309759687598864, "learning_rate": 5e-06, "loss": 0.2183, "step": 2750 }, { "epoch": 2.6653790439401255, "grad_norm": 1.7778715949832347, "learning_rate": 5e-06, "loss": 0.2164, "step": 2760 }, { "epoch": 2.675036214389184, "grad_norm": 1.689951172416981, "learning_rate": 5e-06, "loss": 0.2181, "step": 2770 }, { "epoch": 2.6846933848382424, "grad_norm": 1.9355370897589272, "learning_rate": 5e-06, "loss": 0.2205, "step": 2780 }, { "epoch": 2.6943505552873006, "grad_norm": 1.8460056442038406, "learning_rate": 5e-06, "loss": 0.2153, "step": 2790 }, { "epoch": 2.7040077257363593, "grad_norm": 1.857187825531335, "learning_rate": 5e-06, "loss": 0.2194, "step": 2800 }, { "epoch": 2.7136648961854175, "grad_norm": 1.7784483414745131, "learning_rate": 5e-06, "loss": 0.2155, "step": 2810 }, { "epoch": 2.723322066634476, "grad_norm": 1.6570596632282855, "learning_rate": 5e-06, "loss": 0.2204, "step": 2820 }, { "epoch": 2.7329792370835344, "grad_norm": 1.8006740077758066, "learning_rate": 5e-06, "loss": 0.2185, "step": 2830 }, { "epoch": 2.742636407532593, "grad_norm": 1.6910488555694076, "learning_rate": 5e-06, "loss": 0.2215, "step": 2840 }, { "epoch": 2.7522935779816513, "grad_norm": 1.6824061864717337, "learning_rate": 5e-06, "loss": 0.2168, "step": 2850 }, { "epoch": 2.76195074843071, "grad_norm": 1.667248411860486, "learning_rate": 5e-06, "loss": 0.2171, "step": 2860 }, { "epoch": 2.771607918879768, "grad_norm": 1.6621742506671506, "learning_rate": 5e-06, "loss": 0.2172, "step": 2870 }, { "epoch": 2.7812650893288264, "grad_norm": 1.951777361512585, "learning_rate": 5e-06, "loss": 0.2169, "step": 2880 }, { "epoch": 2.790922259777885, "grad_norm": 1.6376961183273515, "learning_rate": 5e-06, "loss": 0.2184, "step": 2890 }, { "epoch": 2.8005794302269438, "grad_norm": 1.7499302221356705, "learning_rate": 5e-06, "loss": 0.2172, "step": 2900 }, { "epoch": 2.810236600676002, "grad_norm": 1.6961296830905148, "learning_rate": 5e-06, "loss": 0.2191, "step": 2910 }, { "epoch": 2.8198937711250602, "grad_norm": 1.7673008414488, "learning_rate": 5e-06, "loss": 0.2203, "step": 2920 }, { "epoch": 2.829550941574119, "grad_norm": 1.756708895885833, "learning_rate": 5e-06, "loss": 0.2192, "step": 2930 }, { "epoch": 2.839208112023177, "grad_norm": 1.7064534219401073, "learning_rate": 5e-06, "loss": 0.2194, "step": 2940 }, { "epoch": 2.848865282472236, "grad_norm": 1.854536004459549, "learning_rate": 5e-06, "loss": 0.2207, "step": 2950 }, { "epoch": 2.858522452921294, "grad_norm": 1.7590173226380559, "learning_rate": 5e-06, "loss": 0.2234, "step": 2960 }, { "epoch": 2.8681796233703523, "grad_norm": 1.6888816007203014, "learning_rate": 5e-06, "loss": 0.2206, "step": 2970 }, { "epoch": 2.877836793819411, "grad_norm": 1.8848356855139519, "learning_rate": 5e-06, "loss": 0.2199, "step": 2980 }, { "epoch": 2.8874939642684696, "grad_norm": 1.7081747642661718, "learning_rate": 5e-06, "loss": 0.2181, "step": 2990 }, { "epoch": 2.897151134717528, "grad_norm": 1.8139898022279473, "learning_rate": 5e-06, "loss": 0.221, "step": 3000 }, { "epoch": 2.906808305166586, "grad_norm": 1.8123670196961439, "learning_rate": 5e-06, "loss": 0.2176, "step": 3010 }, { "epoch": 2.9164654756156447, "grad_norm": 1.6673754695154868, "learning_rate": 5e-06, "loss": 0.2232, "step": 3020 }, { "epoch": 2.926122646064703, "grad_norm": 1.6571101014785201, "learning_rate": 5e-06, "loss": 0.2203, "step": 3030 }, { "epoch": 2.9357798165137616, "grad_norm": 1.7173214731808049, "learning_rate": 5e-06, "loss": 0.2215, "step": 3040 }, { "epoch": 2.94543698696282, "grad_norm": 1.74112205509197, "learning_rate": 5e-06, "loss": 0.2193, "step": 3050 }, { "epoch": 2.955094157411878, "grad_norm": 1.6720045527666803, "learning_rate": 5e-06, "loss": 0.2215, "step": 3060 }, { "epoch": 2.9647513278609368, "grad_norm": 1.699005956548853, "learning_rate": 5e-06, "loss": 0.2208, "step": 3070 }, { "epoch": 2.9744084983099954, "grad_norm": 1.65145518846451, "learning_rate": 5e-06, "loss": 0.2229, "step": 3080 }, { "epoch": 2.9840656687590537, "grad_norm": 1.7864091472536987, "learning_rate": 5e-06, "loss": 0.2221, "step": 3090 }, { "epoch": 2.993722839208112, "grad_norm": 1.7192522447102805, "learning_rate": 5e-06, "loss": 0.2213, "step": 3100 }, { "epoch": 2.998551424432641, "eval_loss": 0.4467960000038147, "eval_runtime": 177.447, "eval_samples_per_second": 157.247, "eval_steps_per_second": 0.614, "step": 3105 }, { "epoch": 2.998551424432641, "step": 3105, "total_flos": 5200153128468480.0, "train_loss": 0.3273588392085691, "train_runtime": 26340.0491, "train_samples_per_second": 60.381, "train_steps_per_second": 0.118 } ], "logging_steps": 10, "max_steps": 3105, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5200153128468480.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }