File size: 2,620 Bytes
a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 9ff5857 a496d21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
{
"best_metric": 3.6455252170562744,
"best_model_checkpoint": "/Users/frapadovani/Desktop/babyLM_controlled/models_trained/convergence_french/random_sentence_french/checkpoint-8000",
"epoch": 0.23687561069493382,
"eval_steps": 2000,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.059218902673733455,
"grad_norm": 1.0812722444534302,
"learning_rate": 0.0001,
"loss": 4.7523,
"step": 2000
},
{
"epoch": 0.059218902673733455,
"eval_loss": 4.0236735343933105,
"eval_runtime": 5.173,
"eval_samples_per_second": 425.477,
"eval_steps_per_second": 26.677,
"step": 2000
},
{
"epoch": 0.11843780534746691,
"grad_norm": 1.2557106018066406,
"learning_rate": 0.0001,
"loss": 3.8958,
"step": 4000
},
{
"epoch": 0.11843780534746691,
"eval_loss": 3.8087611198425293,
"eval_runtime": 5.2666,
"eval_samples_per_second": 417.913,
"eval_steps_per_second": 26.203,
"step": 4000
},
{
"epoch": 0.17765670802120037,
"grad_norm": 1.3326870203018188,
"learning_rate": 0.0001,
"loss": 3.7344,
"step": 6000
},
{
"epoch": 0.17765670802120037,
"eval_loss": 3.704986333847046,
"eval_runtime": 5.2277,
"eval_samples_per_second": 421.023,
"eval_steps_per_second": 26.398,
"step": 6000
},
{
"epoch": 0.23687561069493382,
"grad_norm": 1.38993239402771,
"learning_rate": 0.0001,
"loss": 3.6379,
"step": 8000
},
{
"epoch": 0.23687561069493382,
"eval_loss": 3.6455252170562744,
"eval_runtime": 5.1485,
"eval_samples_per_second": 427.5,
"eval_steps_per_second": 26.804,
"step": 8000
}
],
"logging_steps": 2000,
"max_steps": 33773,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 155952611328000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|