{ "model_name_or_path": "Qwen/Qwen2-7B-Instruct", "stage": "sft", "do_train": true, "finetuning_type": "lora", "lora_target": "all", "quantization_bit": 4, "loraplus_lr_ratio": 16.0, "dataset": "mgtv_train", "template": "alpaca", "cutoff_len": 1024, "max_samples": 50, "overwrite_cache": "true", "preprocessing_num_workers": 16, "output_dir": "/content/qwen2-7b", "logging_steps": 10, "save_steps": 10, "plot_loss": "true", "overwrite_output_dir": "true", "resume_from_checkpoint": true, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 8, "learning_rate": 0.0001, "num_train_epochs": 6.0, "lr_scheduler_type": "cosine", "warmup_ratio": 0.1, "bf16": true, "ddp_timeout": 180000000, "val_size": 0.02, "per_device_eval_batch_size": 1, "eval_strategy": "steps", "eval_steps": 10, "report_to": "wandb" }