batch_size: 1 checkpoint: true ckpt_freq: 100 data: data: /content/data/HansardSequences_250k.big.txt eval_instruct_data: '' instruct: dynamic_chunk_fn_call: true shuffle: true instruct_data: '' shuffle: false eval_freq: 100 log_freq: 1 lora: dropout: 0.0 enable: true rank: 64 scaling: 2.0 max_norm: 1.0 max_steps: 100 mlflow: experiment_name: null tracking_uri: null model_id_or_path: /content/mistral_models/7B-v0.3 no_ckpt: false no_eval: true num_ckpt_keep: 3 num_microbatches: 8 optim: lr: 0.0001 pct_start: 0.05 weight_decay: 0.1 run_dir: /content/debategpt save_adapters: true seed: 0 seq_len: 8192 wandb: key: null offline: false project: null run_name: null world_size: 1