Qwen2.5-1.5B-Open-R1-Distill / trainer_state.json
jeff-gao's picture
Model save
973fe9f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9988901220865705,
"eval_steps": 100,
"global_step": 450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011098779134295227,
"grad_norm": 0.32028938698735326,
"learning_rate": 2.222222222222222e-06,
"loss": 1.1057,
"mean_token_accuracy": 0.7074955803776843,
"step": 5
},
{
"epoch": 0.022197558268590455,
"grad_norm": 0.19534757580522985,
"learning_rate": 4.444444444444444e-06,
"loss": 1.0688,
"mean_token_accuracy": 0.7163213776876576,
"step": 10
},
{
"epoch": 0.033296337402885685,
"grad_norm": 0.1962807122200424,
"learning_rate": 6.666666666666667e-06,
"loss": 1.0285,
"mean_token_accuracy": 0.7219675252672673,
"step": 15
},
{
"epoch": 0.04439511653718091,
"grad_norm": 0.17661044382753963,
"learning_rate": 8.888888888888888e-06,
"loss": 0.9569,
"mean_token_accuracy": 0.7338179788852269,
"step": 20
},
{
"epoch": 0.05549389567147614,
"grad_norm": 0.13298871259606682,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.96,
"mean_token_accuracy": 0.7289285538378297,
"step": 25
},
{
"epoch": 0.06659267480577137,
"grad_norm": 0.10139226456438258,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.9003,
"mean_token_accuracy": 0.7429319064764655,
"step": 30
},
{
"epoch": 0.07769145394006659,
"grad_norm": 0.09671244610636456,
"learning_rate": 1.555555555555556e-05,
"loss": 0.8848,
"mean_token_accuracy": 0.7450362054794413,
"step": 35
},
{
"epoch": 0.08879023307436182,
"grad_norm": 0.10358003016336022,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.8681,
"mean_token_accuracy": 0.7483839065381315,
"step": 40
},
{
"epoch": 0.09988901220865705,
"grad_norm": 0.07881610121393638,
"learning_rate": 2e-05,
"loss": 0.8442,
"mean_token_accuracy": 0.7542158990266088,
"step": 45
},
{
"epoch": 0.11098779134295228,
"grad_norm": 0.07695709459998314,
"learning_rate": 1.9992479525042305e-05,
"loss": 0.8202,
"mean_token_accuracy": 0.7594429564506759,
"step": 50
},
{
"epoch": 0.1220865704772475,
"grad_norm": 0.0770940463572419,
"learning_rate": 1.996992941167792e-05,
"loss": 0.8204,
"mean_token_accuracy": 0.7596416663786116,
"step": 55
},
{
"epoch": 0.13318534961154274,
"grad_norm": 0.07120273045521322,
"learning_rate": 1.9932383577419432e-05,
"loss": 0.819,
"mean_token_accuracy": 0.7587201214083239,
"step": 60
},
{
"epoch": 0.14428412874583796,
"grad_norm": 0.07558536731907865,
"learning_rate": 1.9879898494768093e-05,
"loss": 0.8027,
"mean_token_accuracy": 0.7628218193365146,
"step": 65
},
{
"epoch": 0.15538290788013318,
"grad_norm": 0.0724884828137313,
"learning_rate": 1.9812553106273848e-05,
"loss": 0.8126,
"mean_token_accuracy": 0.7601266253946841,
"step": 70
},
{
"epoch": 0.16648168701442842,
"grad_norm": 0.0721988261195697,
"learning_rate": 1.973044870579824e-05,
"loss": 0.8027,
"mean_token_accuracy": 0.7625601714037885,
"step": 75
},
{
"epoch": 0.17758046614872364,
"grad_norm": 0.07812772834648507,
"learning_rate": 1.9633708786158803e-05,
"loss": 0.803,
"mean_token_accuracy": 0.7625912882794786,
"step": 80
},
{
"epoch": 0.18867924528301888,
"grad_norm": 0.06226943817830889,
"learning_rate": 1.9522478853384154e-05,
"loss": 0.7936,
"mean_token_accuracy": 0.7635993724131902,
"step": 85
},
{
"epoch": 0.1997780244173141,
"grad_norm": 0.0695985028428393,
"learning_rate": 1.9396926207859085e-05,
"loss": 0.7841,
"mean_token_accuracy": 0.7661331851833721,
"step": 90
},
{
"epoch": 0.21087680355160932,
"grad_norm": 0.07119143212066893,
"learning_rate": 1.9257239692688907e-05,
"loss": 0.7904,
"mean_token_accuracy": 0.7651270507722707,
"step": 95
},
{
"epoch": 0.22197558268590456,
"grad_norm": 0.06900642736695052,
"learning_rate": 1.9103629409661468e-05,
"loss": 0.7805,
"mean_token_accuracy": 0.7667522330276506,
"step": 100
},
{
"epoch": 0.22197558268590456,
"eval_loss": 0.8109510540962219,
"eval_mean_token_accuracy": 0.7568694476131612,
"eval_runtime": 2.9489,
"eval_samples_per_second": 43.744,
"eval_steps_per_second": 3.73,
"step": 100
},
{
"epoch": 0.23307436182019978,
"grad_norm": 0.07584475595205237,
"learning_rate": 1.8936326403234125e-05,
"loss": 0.7819,
"mean_token_accuracy": 0.7674415677558553,
"step": 105
},
{
"epoch": 0.244173140954495,
"grad_norm": 0.06583872042093958,
"learning_rate": 1.8755582313020912e-05,
"loss": 0.7819,
"mean_token_accuracy": 0.766467737858396,
"step": 110
},
{
"epoch": 0.25527192008879024,
"grad_norm": 0.07082657098771901,
"learning_rate": 1.8561668995302668e-05,
"loss": 0.7831,
"mean_token_accuracy": 0.76569958171409,
"step": 115
},
{
"epoch": 0.2663706992230855,
"grad_norm": 0.06762014391699019,
"learning_rate": 1.8354878114129368e-05,
"loss": 0.7729,
"mean_token_accuracy": 0.7691614712931135,
"step": 120
},
{
"epoch": 0.27746947835738067,
"grad_norm": 0.06572171584857384,
"learning_rate": 1.8135520702629677e-05,
"loss": 0.7727,
"mean_token_accuracy": 0.7692371318506055,
"step": 125
},
{
"epoch": 0.2885682574916759,
"grad_norm": 0.07387797497484562,
"learning_rate": 1.7903926695187595e-05,
"loss": 0.7748,
"mean_token_accuracy": 0.767360264364967,
"step": 130
},
{
"epoch": 0.29966703662597116,
"grad_norm": 0.0648507090395773,
"learning_rate": 1.766044443118978e-05,
"loss": 0.7876,
"mean_token_accuracy": 0.7646269472458946,
"step": 135
},
{
"epoch": 0.31076581576026635,
"grad_norm": 0.07594530254646713,
"learning_rate": 1.740544013109005e-05,
"loss": 0.763,
"mean_token_accuracy": 0.7709959638413746,
"step": 140
},
{
"epoch": 0.3218645948945616,
"grad_norm": 0.0732533507550857,
"learning_rate": 1.7139297345578992e-05,
"loss": 0.783,
"mean_token_accuracy": 0.7649811510602629,
"step": 145
},
{
"epoch": 0.33296337402885684,
"grad_norm": 0.06649981966991503,
"learning_rate": 1.686241637868734e-05,
"loss": 0.7699,
"mean_token_accuracy": 0.7692172433619623,
"step": 150
},
{
"epoch": 0.34406215316315203,
"grad_norm": 0.0663190385068203,
"learning_rate": 1.657521368569064e-05,
"loss": 0.7701,
"mean_token_accuracy": 0.7685507996235139,
"step": 155
},
{
"epoch": 0.3551609322974473,
"grad_norm": 0.07086248919589423,
"learning_rate": 1.627812124672099e-05,
"loss": 0.7916,
"mean_token_accuracy": 0.762897097306175,
"step": 160
},
{
"epoch": 0.3662597114317425,
"grad_norm": 0.06620650991928366,
"learning_rate": 1.5971585917027864e-05,
"loss": 0.7802,
"mean_token_accuracy": 0.7649210363933718,
"step": 165
},
{
"epoch": 0.37735849056603776,
"grad_norm": 0.06905926805248713,
"learning_rate": 1.5656068754865388e-05,
"loss": 0.7677,
"mean_token_accuracy": 0.7696914957690235,
"step": 170
},
{
"epoch": 0.38845726970033295,
"grad_norm": 0.07914298364560284,
"learning_rate": 1.5332044328016916e-05,
"loss": 0.7672,
"mean_token_accuracy": 0.7682120122871129,
"step": 175
},
{
"epoch": 0.3995560488346282,
"grad_norm": 0.07115670436565744,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.7774,
"mean_token_accuracy": 0.7662370568414045,
"step": 180
},
{
"epoch": 0.41065482796892344,
"grad_norm": 0.06396528657795297,
"learning_rate": 1.4660435197025391e-05,
"loss": 0.7478,
"mean_token_accuracy": 0.7749828723486186,
"step": 185
},
{
"epoch": 0.42175360710321863,
"grad_norm": 0.0628671379282561,
"learning_rate": 1.4313860656812537e-05,
"loss": 0.7527,
"mean_token_accuracy": 0.7723581904090526,
"step": 190
},
{
"epoch": 0.4328523862375139,
"grad_norm": 0.06693399129139697,
"learning_rate": 1.396079766039157e-05,
"loss": 0.7709,
"mean_token_accuracy": 0.768176693477312,
"step": 195
},
{
"epoch": 0.4439511653718091,
"grad_norm": 0.06898843383807368,
"learning_rate": 1.3601777248047105e-05,
"loss": 0.7558,
"mean_token_accuracy": 0.7725129471022092,
"step": 200
},
{
"epoch": 0.4439511653718091,
"eval_loss": 0.7829984426498413,
"eval_mean_token_accuracy": 0.7632673514716071,
"eval_runtime": 2.5178,
"eval_samples_per_second": 51.234,
"eval_steps_per_second": 4.369,
"step": 200
},
{
"epoch": 0.4550499445061043,
"grad_norm": 0.06712291331425221,
"learning_rate": 1.3237339420583213e-05,
"loss": 0.7423,
"mean_token_accuracy": 0.7751750598902079,
"step": 205
},
{
"epoch": 0.46614872364039955,
"grad_norm": 0.07066852759980123,
"learning_rate": 1.2868032327110904e-05,
"loss": 0.7847,
"mean_token_accuracy": 0.7637066112136016,
"step": 210
},
{
"epoch": 0.4772475027746948,
"grad_norm": 0.06797517881945832,
"learning_rate": 1.2494411440579814e-05,
"loss": 0.7627,
"mean_token_accuracy": 0.7701866684088337,
"step": 215
},
{
"epoch": 0.48834628190899,
"grad_norm": 0.0740366450868316,
"learning_rate": 1.211703872229411e-05,
"loss": 0.7554,
"mean_token_accuracy": 0.7726324974593143,
"step": 220
},
{
"epoch": 0.49944506104328523,
"grad_norm": 0.06603564857589732,
"learning_rate": 1.1736481776669307e-05,
"loss": 0.7703,
"mean_token_accuracy": 0.7674938853021173,
"step": 225
},
{
"epoch": 0.5105438401775805,
"grad_norm": 0.06456424170382646,
"learning_rate": 1.1353312997501313e-05,
"loss": 0.7681,
"mean_token_accuracy": 0.7680860839714745,
"step": 230
},
{
"epoch": 0.5216426193118757,
"grad_norm": 0.0680877993193484,
"learning_rate": 1.0968108707031792e-05,
"loss": 0.7568,
"mean_token_accuracy": 0.7713919990696916,
"step": 235
},
{
"epoch": 0.532741398446171,
"grad_norm": 0.06457964510433087,
"learning_rate": 1.0581448289104759e-05,
"loss": 0.7489,
"mean_token_accuracy": 0.7742981274310832,
"step": 240
},
{
"epoch": 0.5438401775804661,
"grad_norm": 0.06309191183030195,
"learning_rate": 1.0193913317718245e-05,
"loss": 0.7518,
"mean_token_accuracy": 0.7737425585967734,
"step": 245
},
{
"epoch": 0.5549389567147613,
"grad_norm": 0.06092764730736236,
"learning_rate": 9.806086682281759e-06,
"loss": 0.761,
"mean_token_accuracy": 0.7695176080139992,
"step": 250
},
{
"epoch": 0.5660377358490566,
"grad_norm": 0.06517875424025751,
"learning_rate": 9.418551710895243e-06,
"loss": 0.7451,
"mean_token_accuracy": 0.7737921205376608,
"step": 255
},
{
"epoch": 0.5771365149833518,
"grad_norm": 0.06586926609081382,
"learning_rate": 9.03189129296821e-06,
"loss": 0.7272,
"mean_token_accuracy": 0.7790233444795842,
"step": 260
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.06455882882155867,
"learning_rate": 8.646687002498692e-06,
"loss": 0.7493,
"mean_token_accuracy": 0.7728544867539606,
"step": 265
},
{
"epoch": 0.5993340732519423,
"grad_norm": 0.06419037506595338,
"learning_rate": 8.263518223330698e-06,
"loss": 0.7447,
"mean_token_accuracy": 0.774466472618278,
"step": 270
},
{
"epoch": 0.6104328523862376,
"grad_norm": 0.06483038923609051,
"learning_rate": 7.882961277705897e-06,
"loss": 0.7467,
"mean_token_accuracy": 0.7736083802309587,
"step": 275
},
{
"epoch": 0.6215316315205327,
"grad_norm": 0.06889145412976934,
"learning_rate": 7.505588559420188e-06,
"loss": 0.7423,
"mean_token_accuracy": 0.7752051151920257,
"step": 280
},
{
"epoch": 0.632630410654828,
"grad_norm": 0.06931221562176564,
"learning_rate": 7.131967672889101e-06,
"loss": 0.7766,
"mean_token_accuracy": 0.7648978880464531,
"step": 285
},
{
"epoch": 0.6437291897891232,
"grad_norm": 0.0645937272519637,
"learning_rate": 6.762660579416791e-06,
"loss": 0.7529,
"mean_token_accuracy": 0.7729185441854456,
"step": 290
},
{
"epoch": 0.6548279689234184,
"grad_norm": 0.06415673551420226,
"learning_rate": 6.3982227519528986e-06,
"loss": 0.7601,
"mean_token_accuracy": 0.7699141088391924,
"step": 295
},
{
"epoch": 0.6659267480577137,
"grad_norm": 0.06845419068799381,
"learning_rate": 6.039202339608432e-06,
"loss": 0.7519,
"mean_token_accuracy": 0.77195855669696,
"step": 300
},
{
"epoch": 0.6659267480577137,
"eval_loss": 0.7694035172462463,
"eval_mean_token_accuracy": 0.7656926514001757,
"eval_runtime": 2.5109,
"eval_samples_per_second": 51.376,
"eval_steps_per_second": 4.381,
"step": 300
},
{
"epoch": 0.6770255271920089,
"grad_norm": 0.06481669897887246,
"learning_rate": 5.686139343187468e-06,
"loss": 0.7368,
"mean_token_accuracy": 0.7768982703775535,
"step": 305
},
{
"epoch": 0.6881243063263041,
"grad_norm": 0.06555414493127953,
"learning_rate": 5.339564802974615e-06,
"loss": 0.7496,
"mean_token_accuracy": 0.7741114122022307,
"step": 310
},
{
"epoch": 0.6992230854605993,
"grad_norm": 0.06321637577126818,
"learning_rate": 5.000000000000003e-06,
"loss": 0.7379,
"mean_token_accuracy": 0.7763521164662632,
"step": 315
},
{
"epoch": 0.7103218645948945,
"grad_norm": 0.06534174478047594,
"learning_rate": 4.66795567198309e-06,
"loss": 0.7186,
"mean_token_accuracy": 0.7821842581348311,
"step": 320
},
{
"epoch": 0.7214206437291898,
"grad_norm": 0.0656418642194091,
"learning_rate": 4.343931245134616e-06,
"loss": 0.7429,
"mean_token_accuracy": 0.7750543137352807,
"step": 325
},
{
"epoch": 0.732519422863485,
"grad_norm": 0.06142434056090795,
"learning_rate": 4.028414082972141e-06,
"loss": 0.7432,
"mean_token_accuracy": 0.775647557164172,
"step": 330
},
{
"epoch": 0.7436182019977803,
"grad_norm": 0.06566009724006543,
"learning_rate": 3.7218787532790167e-06,
"loss": 0.7527,
"mean_token_accuracy": 0.7717515416461624,
"step": 335
},
{
"epoch": 0.7547169811320755,
"grad_norm": 0.06813787133901092,
"learning_rate": 3.424786314309365e-06,
"loss": 0.7397,
"mean_token_accuracy": 0.7761784463020833,
"step": 340
},
{
"epoch": 0.7658157602663707,
"grad_norm": 0.061296004900329354,
"learning_rate": 3.1375836213126653e-06,
"loss": 0.7547,
"mean_token_accuracy": 0.7708365795261976,
"step": 345
},
{
"epoch": 0.7769145394006659,
"grad_norm": 0.06086151158973008,
"learning_rate": 2.8607026544210115e-06,
"loss": 0.7449,
"mean_token_accuracy": 0.7737567810246656,
"step": 350
},
{
"epoch": 0.7880133185349611,
"grad_norm": 0.06066361422328344,
"learning_rate": 2.594559868909956e-06,
"loss": 0.7526,
"mean_token_accuracy": 0.7728921789426538,
"step": 355
},
{
"epoch": 0.7991120976692564,
"grad_norm": 0.061244946934632546,
"learning_rate": 2.339555568810221e-06,
"loss": 0.7428,
"mean_token_accuracy": 0.7745905285585644,
"step": 360
},
{
"epoch": 0.8102108768035516,
"grad_norm": 0.059950100330008936,
"learning_rate": 2.0960733048124082e-06,
"loss": 0.735,
"mean_token_accuracy": 0.7771190685426068,
"step": 365
},
{
"epoch": 0.8213096559378469,
"grad_norm": 0.062267646898023575,
"learning_rate": 1.8644792973703252e-06,
"loss": 0.757,
"mean_token_accuracy": 0.7711234095884432,
"step": 370
},
{
"epoch": 0.832408435072142,
"grad_norm": 0.05795250590930739,
"learning_rate": 1.6451218858706374e-06,
"loss": 0.7396,
"mean_token_accuracy": 0.7762826225755755,
"step": 375
},
{
"epoch": 0.8435072142064373,
"grad_norm": 0.05646366375927419,
"learning_rate": 1.4383310046973365e-06,
"loss": 0.742,
"mean_token_accuracy": 0.7754256081182773,
"step": 380
},
{
"epoch": 0.8546059933407325,
"grad_norm": 0.05933272693822518,
"learning_rate": 1.2444176869790925e-06,
"loss": 0.7457,
"mean_token_accuracy": 0.7742744235316502,
"step": 385
},
{
"epoch": 0.8657047724750278,
"grad_norm": 0.057540816299949595,
"learning_rate": 1.0636735967658785e-06,
"loss": 0.736,
"mean_token_accuracy": 0.7765978708392828,
"step": 390
},
{
"epoch": 0.876803551609323,
"grad_norm": 0.05844756012864216,
"learning_rate": 8.963705903385344e-07,
"loss": 0.7183,
"mean_token_accuracy": 0.7822006045277151,
"step": 395
},
{
"epoch": 0.8879023307436182,
"grad_norm": 0.05810320914079056,
"learning_rate": 7.427603073110967e-07,
"loss": 0.7442,
"mean_token_accuracy": 0.7742584918108283,
"step": 400
},
{
"epoch": 0.8879023307436182,
"eval_loss": 0.7638587951660156,
"eval_mean_token_accuracy": 0.7671856273651642,
"eval_runtime": 2.5128,
"eval_samples_per_second": 51.338,
"eval_steps_per_second": 4.378,
"step": 400
},
{
"epoch": 0.8990011098779135,
"grad_norm": 0.05687291272451405,
"learning_rate": 6.030737921409169e-07,
"loss": 0.7355,
"mean_token_accuracy": 0.7774604937569412,
"step": 405
},
{
"epoch": 0.9100998890122086,
"grad_norm": 0.05899588715439542,
"learning_rate": 4.775211466158469e-07,
"loss": 0.7319,
"mean_token_accuracy": 0.7774400412989902,
"step": 410
},
{
"epoch": 0.9211986681465039,
"grad_norm": 0.05577638815550462,
"learning_rate": 3.662912138411967e-07,
"loss": 0.7178,
"mean_token_accuracy": 0.7821916392303806,
"step": 415
},
{
"epoch": 0.9322974472807991,
"grad_norm": 0.05865365285983047,
"learning_rate": 2.6955129420176193e-07,
"loss": 0.7309,
"mean_token_accuracy": 0.778973121325068,
"step": 420
},
{
"epoch": 0.9433962264150944,
"grad_norm": 0.06038263301804733,
"learning_rate": 1.874468937261531e-07,
"loss": 0.7372,
"mean_token_accuracy": 0.7765551528106018,
"step": 425
},
{
"epoch": 0.9544950055493896,
"grad_norm": 0.059800638962906945,
"learning_rate": 1.201015052319099e-07,
"loss": 0.7291,
"mean_token_accuracy": 0.7794756294446461,
"step": 430
},
{
"epoch": 0.9655937846836848,
"grad_norm": 0.056012625483331906,
"learning_rate": 6.761642258056977e-08,
"loss": 0.7255,
"mean_token_accuracy": 0.7798846256811043,
"step": 435
},
{
"epoch": 0.97669256381798,
"grad_norm": 0.055552002158992475,
"learning_rate": 3.0070588322079765e-08,
"loss": 0.7351,
"mean_token_accuracy": 0.7766328434320331,
"step": 440
},
{
"epoch": 0.9877913429522752,
"grad_norm": 0.05677914033692145,
"learning_rate": 7.520474957699586e-09,
"loss": 0.7351,
"mean_token_accuracy": 0.7767306519256947,
"step": 445
},
{
"epoch": 0.9988901220865705,
"grad_norm": 0.06170289905752787,
"learning_rate": 0.0,
"loss": 0.7528,
"mean_token_accuracy": 0.7717083040396344,
"step": 450
},
{
"epoch": 0.9988901220865705,
"step": 450,
"total_flos": 6.086450124265882e+17,
"train_loss": 0.7795855527453952,
"train_runtime": 1563.698,
"train_samples_per_second": 13.828,
"train_steps_per_second": 0.288
}
],
"logging_steps": 5,
"max_steps": 450,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.086450124265882e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}