Kukedlc commited on
Commit
c0a43e5
1 Parent(s): 7ef7b80

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +56 -0
README.md CHANGED
@@ -6,6 +6,62 @@ license: apache-2.0
6
 
7
  ## Fine-tuning + lnegth(choose)
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
 
 
6
 
7
  ## Fine-tuning + lnegth(choose)
8
 
9
+ - Training Args:
10
+
11
+ ´´´python
12
+ # LoRA configuration
13
+ peft_config = LoraConfig(
14
+ r=16,
15
+ lora_alpha=16,
16
+ lora_dropout=0.05,
17
+ bias="none",
18
+ task_type="CAUSAL_LM",
19
+ target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
20
+ )
21
+
22
+ # Model to fine-tune
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ model_name,
25
+ torch_dtype=torch.float16,
26
+ load_in_4bit=True
27
+ )
28
+ model.config.use_cache = False
29
+
30
+
31
+
32
+ # Training arguments
33
+ training_args = TrainingArguments(
34
+ per_device_train_batch_size=4,
35
+ gradient_accumulation_steps=4,
36
+ gradient_checkpointing=True,
37
+ learning_rate=5e-5,
38
+ lr_scheduler_type="cosine",
39
+ max_steps=120,
40
+ save_strategy="no",
41
+ logging_steps=1,
42
+ output_dir=new_model,
43
+ optim="paged_adamw_32bit",
44
+ warmup_steps=50,
45
+ bf16=True,
46
+ report_to="wandb",
47
+ )
48
+
49
+ # Create DPO trainer
50
+ dpo_trainer = DPOTrainer(
51
+ model,
52
+ args=training_args,
53
+ train_dataset=dataset,
54
+ tokenizer=tokenizer,
55
+ peft_config=peft_config,
56
+ beta=0.1,
57
+ max_prompt_length=1024,
58
+ max_length=1536,
59
+ )
60
+
61
+ # Fine-tune model with DPO
62
+ dpo_trainer.train()
63
+ ´´´
64
+
65
 
66
 
67