Update README.md
Browse files
README.md
CHANGED
@@ -37,11 +37,12 @@ We evaluate GRM 2B on the [reward model benchmark](https://huggingface.co/spaces
|
|
37 |
import torch
|
38 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
39 |
|
|
|
40 |
# load model and tokenizer
|
41 |
tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-Gemma-2B-sftreg')
|
42 |
reward_model = AutoModelForSequenceClassification.from_pretrained(
|
43 |
'Ray2333/GRM-Gemma-2B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
|
44 |
-
device_map=
|
45 |
)
|
46 |
message = [
|
47 |
{'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
|
@@ -54,7 +55,7 @@ kwargs = {"padding": 'max_length', "truncation": True, "return_tensors": "pt"}
|
|
54 |
tokens = tokenizer.encode_plus(message_template, **kwargs)
|
55 |
|
56 |
with torch.no_grad():
|
57 |
-
_, _, reward_tensor =
|
58 |
reward = reward_tensor.cpu().detach().item()
|
59 |
```
|
60 |
|
|
|
37 |
import torch
|
38 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
39 |
|
40 |
+
device = 'cuda:2'
|
41 |
# load model and tokenizer
|
42 |
tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-Gemma-2B-sftreg')
|
43 |
reward_model = AutoModelForSequenceClassification.from_pretrained(
|
44 |
'Ray2333/GRM-Gemma-2B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
|
45 |
+
device_map=device,
|
46 |
)
|
47 |
message = [
|
48 |
{'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
|
|
|
55 |
tokens = tokenizer.encode_plus(message_template, **kwargs)
|
56 |
|
57 |
with torch.no_grad():
|
58 |
+
_, _, reward_tensor = reward_model(tokens["input_ids"][0].view(1,-1).to(device), attention_mask=tokens["attention_mask"][0].view(1,-1).to(device))
|
59 |
reward = reward_tensor.cpu().detach().item()
|
60 |
```
|
61 |
|