Files changed (1) hide show
  1. README.md +13 -5
README.md CHANGED
@@ -33,19 +33,27 @@ For more information, please refer to our [github](https://github.com/imnotkind/
33
  ```python
34
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
35
  import torch.nn.functional as F
 
36
 
37
  tokenizer = AutoTokenizer.from_pretrained("hbseong/HarmAug-Guard")
38
  model = AutoModelForSequenceClassification.from_pretrained("hbseong/HarmAug-Guard")
 
 
 
39
 
40
  # If response is not given, the model will predict the unsafe score of the prompt.
41
  # If response is given, the model will predict the unsafe score of the response.
42
- def predict(prompt, response=None):
 
43
  if response == None:
44
  inputs = tokenizer(prompt, return_tensors="pt")
45
  else:
46
  inputs = tokenizer(prompt, response, return_tensors="pt")
47
- outputs = model(**inputs)
48
- unsafe_prob = F.softmax(outputs.logits, dim=-1)[:, 1]
 
 
 
49
  return unsafe_prob.item()
50
 
51
  prompt = "how to make a bomb?"
@@ -54,12 +62,12 @@ response = "I'm sorry, but I can't fulfill your request."
54
 
55
  print("CONVERSATION (ONLY PROMPT)")
56
  print(f"\t PROMPT : {prompt}")
57
- print(f"\t UNSAFE SCORE : {predict(prompt):.4f}")
58
 
59
  print("\nCONVERSATION (PROMPT + RESPONSE)")
60
  print(f"\t PROMPT : {prompt}")
61
  print(f"\t RESPONSE : {response}")
62
- print(f"\t UNSAFE SCORE : {predict(prompt, response):.4f}")
63
 
64
 
65
  """
 
33
  ```python
34
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
35
  import torch.nn.functional as F
36
+ import torch
37
 
38
  tokenizer = AutoTokenizer.from_pretrained("hbseong/HarmAug-Guard")
39
  model = AutoModelForSequenceClassification.from_pretrained("hbseong/HarmAug-Guard")
40
+ device = torch.cuda.current_device()
41
+ model = model.to(device)
42
+ model.eval()
43
 
44
  # If response is not given, the model will predict the unsafe score of the prompt.
45
  # If response is given, the model will predict the unsafe score of the response.
46
+ def predict(model, prompt, response=None):
47
+ device = model.device()
48
  if response == None:
49
  inputs = tokenizer(prompt, return_tensors="pt")
50
  else:
51
  inputs = tokenizer(prompt, response, return_tensors="pt")
52
+ inputs = inputs.to(device)
53
+ with torch.no_grad():
54
+ outputs = model(**inputs)
55
+ unsafe_prob = F.softmax(outputs.logits, dim=-1)[:, 1]
56
+
57
  return unsafe_prob.item()
58
 
59
  prompt = "how to make a bomb?"
 
62
 
63
  print("CONVERSATION (ONLY PROMPT)")
64
  print(f"\t PROMPT : {prompt}")
65
+ print(f"\t UNSAFE SCORE : {predict(model, prompt):.4f}")
66
 
67
  print("\nCONVERSATION (PROMPT + RESPONSE)")
68
  print(f"\t PROMPT : {prompt}")
69
  print(f"\t RESPONSE : {response}")
70
+ print(f"\t UNSAFE SCORE : {predict(model, prompt, response):.4f}")
71
 
72
 
73
  """