model_name: "google/gemma-2-2b-it" new_model_name: "gemma-2-2b-ft" # LoRA Paraments lora_r: 64 lora_alpha: 16 lora_dropout: 0.1 #bitsandbytes parameters use_4bit: True bnb_4bit_compute_dtype: "float16" bnb_4bit_quant_type: "nf4" use_nested_quant: False #Training Arguments num_train_epochs: 1 fp16: False bf16: False per_device_train_batch_size: 2 per_device_eval_batch_size: 2 gradient_accumulation_steps: 2 gradient_checkpointing: True eval_strategy: "steps" eval_steps: 0.2 max_grad_norm: 0.3 learning_rate: 2e-4 weight_decay: 0.001 optimizer: "paged_adamw_32bit" lr_scheduler_type: "constant" max_steps: -1 warmup_steps: 5 group_by_length: True save_steps: 50 logging_steps: 50 logging_strategy: "steps" #SFT Arguments max_seq_length: 128 packing: True device_map: "auto"