sehyun66 commited on
Commit
da9c395
1 Parent(s): 5bc5370

Create README.md

Browse files

Train with ppo_trainer

parameter:
adap_kl_ctrl :true
backward_batch_size :1
batch_size :32
cliprange :0.2
cliprange_value :0.2
compare_steps :1
early_stopping :false
exp_name :"example_1_3b"
forward_batch_size :null
gamma :1
global_backward_batch_size :1
global_batch_size :32
gradient_accumulation_steps :1
horizon ;10,000
init_kl_coef :0.2
is_encoder_decoder :false
is_peft_model :true
kl_penalty :"kl"
lam :0.95
learning_rate :0.000005
log_with :"wandb"
max_grad_norm :null
mini_batch_size : 1
model_name :null
optimize_cuda_cache :null
optimize_device_cache ;false
ppo_epochs :4
query_dataset :null
ratio_threshold :10
remove_unused_columns :true
reward_model :null
score_clip :null
seed :0
steps :20,000
target :6
target_kl :2
task_name :null
total_ppo_epochs :3
tracker_project_name :"trl"
use_score_norm :false
use_score_scaling :true
vf_coef :0.1
whiten_rewards :false
world_size :1

![W&B Chart 1_13_2024, 7_31_37 PM.png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/IFNEMnDS0B0pSCSQGpHcJ.png)


![regplot-0.png](https://cdn-uploads.huggingface.co/production/uploads/64c0be34e175dd56a57151ca/u1ebamFLrhvMRnY1pDF6P.png)

Files changed (1) hide show
  1. README.md +14 -0
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - HuggingFaceH4/ultrafeedback_binarized
5
+ language:
6
+ - en
7
+ library_name: transformers
8
+ pipeline_tag: question-answering
9
+ tags:
10
+ - humman feedback
11
+ - HH-RLHF
12
+ - PPO
13
+ - lama-1.3B
14
+ ---