# Promt Format ``` alpaca_prompt = """Sen bir doktorsun. Soruları buna göre cevapla. ### <|reserved_special_token_0|>: {} ### <|reserved_special_token_1|>: {}""" ``` # Training args ``` batch_size = 128 gradient_accumulation_steps = 32 num_train_epochs = 2 per_device_batch_size = int(batch_size / gradient_accumulation_steps) training_args = TrainingArguments( per_device_train_batch_size = per_device_batch_size, per_device_eval_batch_size = per_device_batch_size, gradient_accumulation_steps = gradient_accumulation_steps, save_total_limit = 1, warmup_steps = int(2000 / batch_size), num_train_epochs = num_train_epochs, learning_rate = 1e-4, fp16 = not is_bfloat16_supported(), bf16 = is_bfloat16_supported(), optim = "adamw_8bit", weight_decay = 0.01, lr_scheduler_type = "linear", seed = 3407, output_dir = output_dir, save_strategy = "steps", eval_strategy = "steps", logging_strategy = "steps", save_steps = int(5000 / batch_size * num_train_epochs), eval_steps = int(28900 / batch_size * num_train_epochs), logging_steps = int(28900 / batch_size * num_train_epochs), ) ``` # Trainer args ``` max_seq_length = 8192 trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset = train_dataset, eval_dataset = eval_dataset, dataset_text_field = "text", max_seq_length = max_seq_length, dataset_num_proc = 1, packing = False, # Can make training 5x faster for short sequences. args = training_args ) ``` # From pretrained args ``` from unsloth import FastLanguageModel dtype = None load_in_4bit = False model, tokenizer = FastLanguageModel.from_pretrained( model_name = output_dir, max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, ) ``` # Peft model args ``` model = FastLanguageModel.get_peft_model( model, r = 8, target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",], lora_alpha = 16, lora_dropout = 0, bias = "none", use_gradient_checkpointing = True random_state = 3407, use_rslora = False, loftq_config = None, ) ```