|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 10, |
|
"global_step": 54, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0, |
|
"loss": 0.9219, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001292029674220179, |
|
"loss": 0.8431, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00020478185834579558, |
|
"loss": 0.8259, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002584059348440358, |
|
"loss": 0.8513, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8788, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8157, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8119, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8002, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0003, |
|
"loss": 0.743, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0003, |
|
"loss": 0.793, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.22686079972512133, |
|
"eval_loss": 0.7526410222053528, |
|
"eval_runtime": 229.5348, |
|
"eval_samples_per_second": 0.157, |
|
"eval_steps_per_second": 0.009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7927, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7881, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7832, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7478, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7615, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003, |
|
"loss": 0.741, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7564, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7315, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7531, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7549, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.22750504660052398, |
|
"eval_loss": 0.7324042320251465, |
|
"eval_runtime": 224.0786, |
|
"eval_samples_per_second": 0.161, |
|
"eval_steps_per_second": 0.009, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7742, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7763, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6916, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6979, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7256, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7626, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7329, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6927, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6781, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0003, |
|
"loss": 0.672, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.2278271700382253, |
|
"eval_loss": 0.7340984344482422, |
|
"eval_runtime": 224.2193, |
|
"eval_samples_per_second": 0.161, |
|
"eval_steps_per_second": 0.009, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6875, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7242, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7121, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6614, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6649, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6056, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0003, |
|
"loss": 0.656, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6624, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6428, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6425, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.22737619722544344, |
|
"eval_loss": 0.7448343634605408, |
|
"eval_runtime": 223.9913, |
|
"eval_samples_per_second": 0.161, |
|
"eval_steps_per_second": 0.009, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6096, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6172, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5967, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5958, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5921, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6026, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5941, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0003, |
|
"loss": 0.612, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6084, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5446, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.22646351415195637, |
|
"eval_loss": 0.7739428281784058, |
|
"eval_runtime": 223.8688, |
|
"eval_samples_per_second": 0.161, |
|
"eval_steps_per_second": 0.009, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5375, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5548, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6092, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.4286, |
|
"step": 54 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 135, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 68912311697408.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|