Spaces:

nicholasKluge
/

Aira-Demo

Running

App Files Files Community

nicholasKluge commited on Dec 1, 2023

Commit

8a881cc

1 Parent(s): bb6b840

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -17

app.py CHANGED Viewed

@@ -9,8 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequen
 # download the instruct-aira-dataset
-#dataset = load_dataset("nicholasKluge/instruct-aira-dataset", split='english')
-dataset = load_dataset("parquet", data_files="instruct-aira-dataset.parquet")
 # convert the dataset to a pandas dataframe
 df = dataset['train'].to_pandas()
@@ -184,33 +183,33 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped') as demo:
             toxicities = list()
         for text in decoded_text:
-          reward_tokens = rewardTokenizer(user_msg, text,
                         truncation=True,
                         max_length=512,
                         return_token_type_ids=False,
                         return_tensors="pt",
                         return_attention_mask=True)
-          reward_tokens.to(rewardModel.device)
-          reward = rewardModel(**reward_tokens)[0].item()
-          rewards.append(reward)
-          if safety == "On":
-              toxicity_tokens = toxiciyTokenizer(user_msg + " " + text,
                             truncation=True,
                             max_length=512,
                             return_token_type_ids=False,
                             return_tensors="pt",
                             return_attention_mask=True)
-              toxicity_tokens.to(toxicityModel.device)
-              toxicity = toxicityModel(**toxicity_tokens)[0].item()
-              toxicities.append(toxicity)
-              toxicity_threshold = 5
         ordered_generations = sorted(zip(decoded_text, rewards, toxicities), key=lambda x: x[1], reverse=True)

 # download the instruct-aira-dataset
+dataset = load_dataset("nicholasKluge/instruct-aira-dataset", split='english')
 # convert the dataset to a pandas dataframe
 df = dataset['train'].to_pandas()
             toxicities = list()
         for text in decoded_text:
+            reward_tokens = rewardTokenizer(user_msg, text,
                         truncation=True,
                         max_length=512,
                         return_token_type_ids=False,
                         return_tensors="pt",
                         return_attention_mask=True)
+            reward_tokens.to(rewardModel.device)
+            reward = rewardModel(**reward_tokens)[0].item()
+            rewards.append(reward)
+            if safety == "On":
+                toxicity_tokens = toxiciyTokenizer(user_msg + " " + text,
                             truncation=True,
                             max_length=512,
                             return_token_type_ids=False,
                             return_tensors="pt",
                             return_attention_mask=True)
+                toxicity_tokens.to(toxicityModel.device)
+                toxicity = toxicityModel(**toxicity_tokens)[0].item()
+                toxicities.append(toxicity)
+                toxicity_threshold = 5
         ordered_generations = sorted(zip(decoded_text, rewards, toxicities), key=lambda x: x[1], reverse=True)