philschmid HF staff commited on
Commit
23a4ca8
1 Parent(s): f6bdb40

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +78 -0
README.md CHANGED
@@ -97,6 +97,84 @@ print(helpsteer_rewards_pred)
97
  # [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
98
  ```
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  ## Citation
101
 
102
  If you find this work useful for your research, please consider citing:
 
97
  # [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
98
  ```
99
 
100
+ ## Easy to use Pipeline
101
+
102
+ ```python
103
+ from typing import Dict, List
104
+ import torch
105
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
106
+
107
+
108
+ class ArmoRMPipeline:
109
+ def __init__(self, model_id, device_map="auto", torch_dtype=torch.bfloat16, truncation=True, trust_remote_code=False, max_length=4096):
110
+ self.model = AutoModelForSequenceClassification.from_pretrained(
111
+ model_id,
112
+ device_map=device_map,
113
+ trust_remote_code=trust_remote_code,
114
+ torch_dtype=torch_dtype,
115
+ )
116
+ self.tokenizer = AutoTokenizer.from_pretrained(
117
+ model_id,
118
+ use_fast=True,
119
+ )
120
+ self.truncation = truncation
121
+ self.device = self.model.device
122
+ self.max_length = max_length
123
+
124
+ def __call__(self, messages: List[Dict[str, str]]) -> Dict[str, float]:
125
+ """
126
+ messages: OpenAI chat messages to be scored
127
+ Note: no batching since due to length differences, the model will have to pad to the max length which is not efficient
128
+ Returns: a dictionary with the score between 0 and 1
129
+ """
130
+ input_ids = self.tokenizer.apply_chat_template(
131
+ messages,
132
+ return_tensors="pt",
133
+ padding=True,
134
+ truncation=self.truncation,
135
+ max_length=self.max_length,
136
+ ).to(self.device)
137
+ with torch.no_grad():
138
+ output = self.model(input_ids)
139
+ score = output.score.float().item()
140
+ return {"score": score}
141
+
142
+ # Create Reward Model Pipeline
143
+ prompt = 'What are some synonyms for the word "beautiful"?'
144
+ rm = ArmoRMPipeline("RLHFlow/ArmoRM-Llama3-8B-v0.1", trust_remote_code=True)
145
+ # score the messages
146
+ response1 = 'Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant'
147
+ score1 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}])
148
+ print(score1)
149
+
150
+ response2 = '''Certainly! Here are some synonyms for the word "beautiful":
151
+
152
+ 1. Gorgeous
153
+ 2. Lovely
154
+ 3. Stunning
155
+ 4. Attractive
156
+ 5. Pretty
157
+ 6. Elegant
158
+ 7. Exquisite
159
+ 8. Handsome
160
+ 9. Charming
161
+ 10. Alluring
162
+ 11. Radiant
163
+ 12. Magnificent
164
+ 13. Graceful
165
+ 14. Enchanting
166
+ 15. Dazzling
167
+
168
+ These synonyms can be used in various contexts to convey the idea of beauty.'''
169
+ score2 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}])
170
+ print(score2)
171
+
172
+ response3 = 'Sorry i cannot answer this.'
173
+ score3 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response3}])
174
+ print(score3)
175
+
176
+ ```
177
+
178
  ## Citation
179
 
180
  If you find this work useful for your research, please consider citing: