|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.998784511199862, |
|
"global_step": 7190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.860917941585536e-05, |
|
"loss": 1.7018, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.721835883171071e-05, |
|
"loss": 1.5678, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.582753824756607e-05, |
|
"loss": 1.5333, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.443671766342142e-05, |
|
"loss": 1.5235, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.304589707927678e-05, |
|
"loss": 1.4892, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.165507649513213e-05, |
|
"loss": 1.4781, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.026425591098748e-05, |
|
"loss": 1.4319, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.887343532684285e-05, |
|
"loss": 1.3011, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.74826147426982e-05, |
|
"loss": 1.2885, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.609179415855355e-05, |
|
"loss": 1.2524, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 8.47009735744089e-05, |
|
"loss": 1.2674, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.331015299026426e-05, |
|
"loss": 1.2533, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.191933240611962e-05, |
|
"loss": 1.268, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.052851182197498e-05, |
|
"loss": 1.2499, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 7.913769123783032e-05, |
|
"loss": 1.1782, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.774687065368567e-05, |
|
"loss": 1.1113, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.635605006954103e-05, |
|
"loss": 1.1167, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 7.496522948539638e-05, |
|
"loss": 1.1422, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.357440890125175e-05, |
|
"loss": 1.118, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.21835883171071e-05, |
|
"loss": 1.1133, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.079276773296244e-05, |
|
"loss": 1.1058, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 6.94019471488178e-05, |
|
"loss": 1.0747, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.801112656467315e-05, |
|
"loss": 1.0059, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.662030598052852e-05, |
|
"loss": 0.9988, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.522948539638388e-05, |
|
"loss": 1.0111, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 6.383866481223923e-05, |
|
"loss": 1.0237, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 6.244784422809457e-05, |
|
"loss": 1.0011, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.105702364394992e-05, |
|
"loss": 1.0133, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.966620305980529e-05, |
|
"loss": 0.9724, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 5.827538247566065e-05, |
|
"loss": 0.9005, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 5.6884561891515995e-05, |
|
"loss": 0.9264, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 5.549374130737135e-05, |
|
"loss": 0.933, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 5.41029207232267e-05, |
|
"loss": 0.9283, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.2712100139082064e-05, |
|
"loss": 0.9033, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 5.132127955493742e-05, |
|
"loss": 0.9139, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.993045897079277e-05, |
|
"loss": 0.9383, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 4.853963838664812e-05, |
|
"loss": 0.8268, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 4.714881780250348e-05, |
|
"loss": 0.841, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.5757997218358836e-05, |
|
"loss": 0.8594, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 4.436717663421418e-05, |
|
"loss": 0.8346, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 4.2976356050069544e-05, |
|
"loss": 0.8626, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 4.15855354659249e-05, |
|
"loss": 0.8691, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 4.019471488178025e-05, |
|
"loss": 0.8424, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 3.880389429763561e-05, |
|
"loss": 0.793, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 3.741307371349096e-05, |
|
"loss": 0.8025, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.6022253129346316e-05, |
|
"loss": 0.8245, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 3.463143254520167e-05, |
|
"loss": 0.797, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.3240611961057024e-05, |
|
"loss": 0.8063, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3.184979137691238e-05, |
|
"loss": 0.8042, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.0458970792767733e-05, |
|
"loss": 0.8068, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 2.906815020862309e-05, |
|
"loss": 0.7873, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 2.767732962447844e-05, |
|
"loss": 0.7495, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 2.6286509040333796e-05, |
|
"loss": 0.7538, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 2.4895688456189153e-05, |
|
"loss": 0.7631, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 2.3504867872044508e-05, |
|
"loss": 0.7532, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 2.2114047287899862e-05, |
|
"loss": 0.7772, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 2.0723226703755216e-05, |
|
"loss": 0.7691, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 1.933240611961057e-05, |
|
"loss": 0.7544, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 1.7941585535465928e-05, |
|
"loss": 0.7243, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.655076495132128e-05, |
|
"loss": 0.7308, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 1.5159944367176635e-05, |
|
"loss": 0.7115, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1.376912378303199e-05, |
|
"loss": 0.7275, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 1.2378303198887344e-05, |
|
"loss": 0.7279, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 1.0987482614742698e-05, |
|
"loss": 0.7497, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 9.596662030598054e-06, |
|
"loss": 0.7163, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 8.205841446453408e-06, |
|
"loss": 0.7047, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.815020862308763e-06, |
|
"loss": 0.7089, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 5.424200278164117e-06, |
|
"loss": 0.7078, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.033379694019471e-06, |
|
"loss": 0.7065, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.6425591098748263e-06, |
|
"loss": 0.7017, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 1.2517385257301808e-06, |
|
"loss": 0.6855, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7190, |
|
"total_flos": 6.20936877252096e+16, |
|
"train_loss": 0.9689414988638463, |
|
"train_runtime": 6282.5026, |
|
"train_samples_per_second": 18.333, |
|
"train_steps_per_second": 1.144 |
|
} |
|
], |
|
"max_steps": 7190, |
|
"num_train_epochs": 10, |
|
"total_flos": 6.20936877252096e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|