{ "best_metric": 3.555619239807129, "best_model_checkpoint": "/Users/frapadovani/Desktop/babyLM_controlled/models_trained/convergence_french/random_sentence_french/checkpoint-14000", "epoch": 0.41453231871613416, "eval_steps": 2000, "global_step": 14000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.059218902673733455, "grad_norm": 1.0812722444534302, "learning_rate": 0.0001, "loss": 4.7523, "step": 2000 }, { "epoch": 0.059218902673733455, "eval_loss": 4.0236735343933105, "eval_runtime": 5.173, "eval_samples_per_second": 425.477, "eval_steps_per_second": 26.677, "step": 2000 }, { "epoch": 0.11843780534746691, "grad_norm": 1.2557106018066406, "learning_rate": 0.0001, "loss": 3.8958, "step": 4000 }, { "epoch": 0.11843780534746691, "eval_loss": 3.8087611198425293, "eval_runtime": 5.2666, "eval_samples_per_second": 417.913, "eval_steps_per_second": 26.203, "step": 4000 }, { "epoch": 0.17765670802120037, "grad_norm": 1.3326870203018188, "learning_rate": 0.0001, "loss": 3.7344, "step": 6000 }, { "epoch": 0.17765670802120037, "eval_loss": 3.704986333847046, "eval_runtime": 5.2277, "eval_samples_per_second": 421.023, "eval_steps_per_second": 26.398, "step": 6000 }, { "epoch": 0.23687561069493382, "grad_norm": 1.38993239402771, "learning_rate": 0.0001, "loss": 3.6379, "step": 8000 }, { "epoch": 0.23687561069493382, "eval_loss": 3.6455252170562744, "eval_runtime": 5.1485, "eval_samples_per_second": 427.5, "eval_steps_per_second": 26.804, "step": 8000 }, { "epoch": 0.29609451336866727, "grad_norm": 1.3884963989257812, "learning_rate": 0.0001, "loss": 3.5687, "step": 10000 }, { "epoch": 0.29609451336866727, "eval_loss": 3.6062421798706055, "eval_runtime": 5.1466, "eval_samples_per_second": 427.661, "eval_steps_per_second": 26.814, "step": 10000 }, { "epoch": 0.35531341604240074, "grad_norm": 1.435062289237976, "learning_rate": 0.0001, "loss": 3.5165, "step": 12000 }, { "epoch": 0.35531341604240074, "eval_loss": 3.5764389038085938, "eval_runtime": 5.132, "eval_samples_per_second": 428.881, "eval_steps_per_second": 26.89, "step": 12000 }, { "epoch": 0.41453231871613416, "grad_norm": 1.3539327383041382, "learning_rate": 0.0001, "loss": 3.4738, "step": 14000 }, { "epoch": 0.41453231871613416, "eval_loss": 3.555619239807129, "eval_runtime": 5.1586, "eval_samples_per_second": 426.667, "eval_steps_per_second": 26.752, "step": 14000 } ], "logging_steps": 2000, "max_steps": 33773, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 272917069824000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }