sehyun66 commited on
Commit
0539e24
1 Parent(s): bb6d0fd

update Read.me

Browse files
Files changed (1) hide show
  1. README.md +28 -1
README.md CHANGED
@@ -11,4 +11,31 @@ tags:
11
  - HH-RLHF
12
  - PPO
13
  - lama-1.3B
14
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  - HH-RLHF
12
  - PPO
13
  - lama-1.3B
14
+ ---
15
+
16
+ # RLHF with ppo_Trainer and Lora
17
+
18
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/piXU-OqDgrBKs7qR7fICw.png)
19
+
20
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/OXD2TqlQQY9NuC7JTiv_H.png)
21
+
22
+ # Hyperparameter
23
+
24
+ #ppo
25
+ learning_rate=5e-6,
26
+ batch_size=32,
27
+ mini_batch_size=1,
28
+ horizon=10000,
29
+ cliprange =0.2,
30
+ cliprange_value=0.2,
31
+ lam=0.95,
32
+ target_kl=2,
33
+ use_score_scaling = True,
34
+ log_with='wandb'
35
+
36
+ #lora
37
+ r=16,
38
+ lora_alpha=32,
39
+ lora_dropout=0.05,
40
+ bias="none",
41
+ task_type="CAUSAL_LM",