{ "best_metric": 5.878845691680908, "best_model_checkpoint": "./Qwen1-5-4B-Chat-hindi-sft/checkpoint-50", "epoch": 0.007214486689272059, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.076544761657715, "learning_rate": 4.807692307692308e-06, "loss": 6.0722, "step": 25 }, { "epoch": 0.0, "eval_loss": 6.239687919616699, "eval_runtime": 258.5805, "eval_samples_per_second": 1.087, "eval_steps_per_second": 1.087, "step": 25 }, { "epoch": 0.01, "grad_norm": 2.1901891231536865, "learning_rate": 9.615384615384616e-06, "loss": 6.1105, "step": 50 }, { "epoch": 0.01, "eval_loss": 5.878845691680908, "eval_runtime": 259.0078, "eval_samples_per_second": 1.085, "eval_steps_per_second": 1.085, "step": 50 } ], "logging_steps": 25, "max_steps": 34650, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 25, "total_flos": 1263381335884800.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }