Spaces:
Running
Running
Commit
·
8a881cc
1
Parent(s):
bb6b840
Update app.py
Browse files
app.py
CHANGED
@@ -9,8 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequen
|
|
9 |
|
10 |
|
11 |
# download the instruct-aira-dataset
|
12 |
-
|
13 |
-
dataset = load_dataset("parquet", data_files="instruct-aira-dataset.parquet")
|
14 |
|
15 |
# convert the dataset to a pandas dataframe
|
16 |
df = dataset['train'].to_pandas()
|
@@ -184,33 +183,33 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
|
|
184 |
toxicities = list()
|
185 |
|
186 |
for text in decoded_text:
|
187 |
-
|
188 |
truncation=True,
|
189 |
max_length=512,
|
190 |
return_token_type_ids=False,
|
191 |
return_tensors="pt",
|
192 |
return_attention_mask=True)
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
-
|
195 |
|
196 |
-
|
197 |
-
rewards.append(reward)
|
198 |
-
|
199 |
-
if safety == "On":
|
200 |
-
|
201 |
-
toxicity_tokens = toxiciyTokenizer(user_msg + " " + text,
|
202 |
truncation=True,
|
203 |
max_length=512,
|
204 |
return_token_type_ids=False,
|
205 |
return_tensors="pt",
|
206 |
return_attention_mask=True)
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
|
215 |
ordered_generations = sorted(zip(decoded_text, rewards, toxicities), key=lambda x: x[1], reverse=True)
|
216 |
|
|
|
9 |
|
10 |
|
11 |
# download the instruct-aira-dataset
|
12 |
+
dataset = load_dataset("nicholasKluge/instruct-aira-dataset", split='english')
|
|
|
13 |
|
14 |
# convert the dataset to a pandas dataframe
|
15 |
df = dataset['train'].to_pandas()
|
|
|
183 |
toxicities = list()
|
184 |
|
185 |
for text in decoded_text:
|
186 |
+
reward_tokens = rewardTokenizer(user_msg, text,
|
187 |
truncation=True,
|
188 |
max_length=512,
|
189 |
return_token_type_ids=False,
|
190 |
return_tensors="pt",
|
191 |
return_attention_mask=True)
|
192 |
+
|
193 |
+
reward_tokens.to(rewardModel.device)
|
194 |
+
|
195 |
+
reward = rewardModel(**reward_tokens)[0].item()
|
196 |
+
rewards.append(reward)
|
197 |
|
198 |
+
if safety == "On":
|
199 |
|
200 |
+
toxicity_tokens = toxiciyTokenizer(user_msg + " " + text,
|
|
|
|
|
|
|
|
|
|
|
201 |
truncation=True,
|
202 |
max_length=512,
|
203 |
return_token_type_ids=False,
|
204 |
return_tensors="pt",
|
205 |
return_attention_mask=True)
|
206 |
+
|
207 |
+
toxicity_tokens.to(toxicityModel.device)
|
208 |
+
|
209 |
+
toxicity = toxicityModel(**toxicity_tokens)[0].item()
|
210 |
+
toxicities.append(toxicity)
|
211 |
+
|
212 |
+
toxicity_threshold = 5
|
213 |
|
214 |
ordered_generations = sorted(zip(decoded_text, rewards, toxicities), key=lambda x: x[1], reverse=True)
|
215 |
|