|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.12053638692180202, |
|
"eval_steps": 13, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0024107277384360403, |
|
"grad_norm": 0.18300533294677734, |
|
"learning_rate": 1e-05, |
|
"loss": 11.7954, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0024107277384360403, |
|
"eval_loss": 11.799297332763672, |
|
"eval_runtime": 14.7876, |
|
"eval_samples_per_second": 47.269, |
|
"eval_steps_per_second": 23.668, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0048214554768720806, |
|
"grad_norm": 0.1799505054950714, |
|
"learning_rate": 2e-05, |
|
"loss": 11.788, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007232183215308121, |
|
"grad_norm": 0.16323117911815643, |
|
"learning_rate": 3e-05, |
|
"loss": 11.8032, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009642910953744161, |
|
"grad_norm": 0.14314371347427368, |
|
"learning_rate": 4e-05, |
|
"loss": 11.7996, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.012053638692180202, |
|
"grad_norm": 0.14851737022399902, |
|
"learning_rate": 5e-05, |
|
"loss": 11.7834, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014464366430616242, |
|
"grad_norm": 0.15303704142570496, |
|
"learning_rate": 6e-05, |
|
"loss": 11.7991, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.016875094169052283, |
|
"grad_norm": 0.18720045685768127, |
|
"learning_rate": 7e-05, |
|
"loss": 11.8015, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.019285821907488322, |
|
"grad_norm": 0.20559322834014893, |
|
"learning_rate": 8e-05, |
|
"loss": 11.7867, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.021696549645924365, |
|
"grad_norm": 0.16245993971824646, |
|
"learning_rate": 9e-05, |
|
"loss": 11.7981, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.024107277384360404, |
|
"grad_norm": 0.15587429702281952, |
|
"learning_rate": 0.0001, |
|
"loss": 11.7978, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.026518005122796444, |
|
"grad_norm": 0.15051642060279846, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 11.7957, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.028928732861232483, |
|
"grad_norm": 0.1579117774963379, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 11.7856, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03133946059966852, |
|
"grad_norm": 0.15979625284671783, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 11.7926, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03133946059966852, |
|
"eval_loss": 11.796106338500977, |
|
"eval_runtime": 0.9529, |
|
"eval_samples_per_second": 733.577, |
|
"eval_steps_per_second": 367.313, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.033750188338104566, |
|
"grad_norm": 0.18402183055877686, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 11.7933, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03616091607654061, |
|
"grad_norm": 0.15201744437217712, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 11.7894, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.038571643814976644, |
|
"grad_norm": 0.178207665681839, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 11.7895, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04098237155341269, |
|
"grad_norm": 0.17396970093250275, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 11.7901, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04339309929184873, |
|
"grad_norm": 0.18107326328754425, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 11.7946, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.045803827030284766, |
|
"grad_norm": 0.1765798181295395, |
|
"learning_rate": 8.802029828000156e-05, |
|
"loss": 11.8012, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04821455476872081, |
|
"grad_norm": 0.23805269598960876, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 11.7868, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.050625282507156845, |
|
"grad_norm": 0.2204178422689438, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 11.7883, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05303601024559289, |
|
"grad_norm": 0.21025092899799347, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 11.7956, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05544673798402893, |
|
"grad_norm": 0.18470381200313568, |
|
"learning_rate": 7.612492823579745e-05, |
|
"loss": 11.7915, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05785746572246497, |
|
"grad_norm": 0.1800222247838974, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 11.7856, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06026819346090101, |
|
"grad_norm": 0.22917437553405762, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 11.7965, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06267892119933705, |
|
"grad_norm": 0.2602176070213318, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 11.7896, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06267892119933705, |
|
"eval_loss": 11.790661811828613, |
|
"eval_runtime": 0.9583, |
|
"eval_samples_per_second": 729.402, |
|
"eval_steps_per_second": 365.223, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0650896489377731, |
|
"grad_norm": 0.2239828109741211, |
|
"learning_rate": 6.167226819279528e-05, |
|
"loss": 11.7856, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06750037667620913, |
|
"grad_norm": 0.24870462715625763, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 11.7983, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06991110441464517, |
|
"grad_norm": 0.22321942448616028, |
|
"learning_rate": 5.392295478639225e-05, |
|
"loss": 11.779, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07232183215308122, |
|
"grad_norm": 0.21131961047649384, |
|
"learning_rate": 5e-05, |
|
"loss": 11.7853, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07473255989151725, |
|
"grad_norm": 0.18241849541664124, |
|
"learning_rate": 4.607704521360776e-05, |
|
"loss": 11.7763, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07714328762995329, |
|
"grad_norm": 0.19740787148475647, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 11.7796, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07955401536838934, |
|
"grad_norm": 0.21648883819580078, |
|
"learning_rate": 3.832773180720475e-05, |
|
"loss": 11.7823, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08196474310682537, |
|
"grad_norm": 0.17037546634674072, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 11.7975, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08437547084526141, |
|
"grad_norm": 0.21887211501598358, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 11.7907, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08678619858369746, |
|
"grad_norm": 0.23544444143772125, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 11.7838, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0891969263221335, |
|
"grad_norm": 0.22000566124916077, |
|
"learning_rate": 2.3875071764202563e-05, |
|
"loss": 11.7847, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09160765406056953, |
|
"grad_norm": 0.28442928194999695, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 11.7825, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09401838179900557, |
|
"grad_norm": 0.25764262676239014, |
|
"learning_rate": 1.7527597583490822e-05, |
|
"loss": 11.7722, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09401838179900557, |
|
"eval_loss": 11.787515640258789, |
|
"eval_runtime": 0.9775, |
|
"eval_samples_per_second": 715.095, |
|
"eval_steps_per_second": 358.059, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09642910953744162, |
|
"grad_norm": 0.23126229643821716, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 11.7885, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09883983727587765, |
|
"grad_norm": 0.21654988825321198, |
|
"learning_rate": 1.1979701719998453e-05, |
|
"loss": 11.7704, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10125056501431369, |
|
"grad_norm": 0.2214740812778473, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 11.7851, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10366129275274974, |
|
"grad_norm": 0.24273362755775452, |
|
"learning_rate": 7.367991782295391e-06, |
|
"loss": 11.7811, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10607202049118578, |
|
"grad_norm": 0.2188863307237625, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 11.7789, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10848274822962181, |
|
"grad_norm": 0.20905157923698425, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 11.7927, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11089347596805786, |
|
"grad_norm": 0.2186051905155182, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 11.7862, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1133042037064939, |
|
"grad_norm": 0.2207147777080536, |
|
"learning_rate": 1.3815039801161721e-06, |
|
"loss": 11.7808, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11571493144492993, |
|
"grad_norm": 0.260870099067688, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 11.7878, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11812565918336598, |
|
"grad_norm": 0.1919865906238556, |
|
"learning_rate": 1.5413331334360182e-07, |
|
"loss": 11.7874, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12053638692180202, |
|
"grad_norm": 0.2674182653427124, |
|
"learning_rate": 0.0, |
|
"loss": 11.7881, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 10295707336704.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|