Spaces:

polygraf-ai
/

copyright_checker

Running

App Files Files Community

minko186 commited on Mar 13, 2024

Commit

2333c59

1 Parent(s): 09f0b85

add modularity to model selection + new 1on1 models

Browse files

Files changed (2) hide show

app.py +18 -18
predictors.py +78 -29

app.py CHANGED Viewed

@@ -6,18 +6,21 @@ from analysis import depth_analysis
 from predictors import predict_quillbot
 from plagiarism import plagiarism_check, build_date
 from utils import extract_text_from_pdf, len_validator
 np.set_printoptions(suppress=True)
-def ai_generated_test(option, input):
     if option == "Human vs AI":
         return predict_bc_scores(input), None
     elif option == "Human vs AI Source Models":
-        return predict_bc_scores(input), predict_mc_scores(input)
-    # elif option == "Human vs AI Source Models (1 on 1)":
-    #     return predict_bc_scores(input), None, predict_1on1_scores(input)
     return None, None
@@ -26,7 +29,7 @@ def main(
     ai_option,
     plag_option,
     input,
-    # models,
     year_from,
     month_from,
     day_from,
@@ -49,14 +52,12 @@ def main(
     )
     depth_analysis_plot = depth_analysis(input)
     bc_score = predict_bc_scores(input)
-    mc_score = predict_mc_scores(input)
-    mc_1on1_score = predict_1on1_scores(input)
     quilscore = predict_quillbot(input)
     return (
         bc_score,
         mc_score,
-        mc_1on1_score,
         formatted_tokens,
         depth_analysis_plot,
         quilscore,
@@ -88,7 +89,6 @@ with gr.Blocks() as demo:
     d1 = today.strftime("%d/%B/%Y")
     d1 = d1.split("/")
-    model_list = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "LLAMA2"]
     domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
     gr.Markdown(
         """
@@ -142,12 +142,12 @@ with gr.Blocks() as demo:
         """
     )
-    # models = gr.Dropdown(
-    # model_list,
-    # value=model_list,
-    # multiselect=True,
-    # label="Models to test against",
-    # )
     with gr.Row():
         with gr.Column():
@@ -210,7 +210,7 @@ with gr.Blocks() as demo:
             ai_option,
             plag_option,
             input_text,
-            # models,
             year_from,
             month_from,
             day_from,
@@ -232,7 +232,7 @@ with gr.Blocks() as demo:
     only_ai_btn.click(
         fn=ai_generated_test,
-        inputs=[ai_option, input_text],
         # outputs=[bcLabel, mcLabel, mc1on1Label],
         outputs=[bcLabel, mcLabel],
         api_name="ai_check",

 from predictors import predict_quillbot
 from plagiarism import plagiarism_check, build_date
 from utils import extract_text_from_pdf, len_validator
+import yaml
 np.set_printoptions(suppress=True)
+with open("config.yaml", "r") as file:
+    params = yaml.safe_load(file)
+model_list = params["MC_OUTPUT_LABELS"]
+def ai_generated_test(option, input, models):
     if option == "Human vs AI":
         return predict_bc_scores(input), None
     elif option == "Human vs AI Source Models":
+        return predict_bc_scores(input), predict_1on1_scores(input, models)
     return None, None
     ai_option,
     plag_option,
     input,
+    models,
     year_from,
     month_from,
     day_from,
     )
     depth_analysis_plot = depth_analysis(input)
     bc_score = predict_bc_scores(input)
+    mc_score = predict_1on1_scores(input, models)
     quilscore = predict_quillbot(input)
     return (
         bc_score,
         mc_score,
         formatted_tokens,
         depth_analysis_plot,
         quilscore,
     d1 = today.strftime("%d/%B/%Y")
     d1 = d1.split("/")
     domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
     gr.Markdown(
         """
         """
     )
+    models = gr.Dropdown(
+        model_list,
+        value=model_list,
+        multiselect=True,
+        label="Models to test against",
+    )
     with gr.Row():
         with gr.Column():
             ai_option,
             plag_option,
             input_text,
+            models,
             year_from,
             month_from,
             day_from,
     only_ai_btn.click(
         fn=ai_generated_test,
+        inputs=[ai_option, input_text, models],
         # outputs=[bcLabel, mcLabel, mc1on1Label],
         outputs=[bcLabel, mcLabel],
         api_name="ai_check",

predictors.py CHANGED Viewed

@@ -46,13 +46,13 @@ quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path)
 quillbot_model = AutoModelForSequenceClassification.from_pretrained(
     text_quillbot_model_path
 ).to(device)
-# tokenizers_1on1 = {}
-# models_1on1 = {}
-# for model in text_1on1_models:
-#     tokenizers_1on1[model] = AutoTokenizer.from_pretrained(model)
-#     models_1on1[model] = AutoModelForSequenceClassification.from_pretrained(
-#         model
-#     ).to(device)
 def split_text_allow_complete_sentences_nltk(
@@ -225,7 +225,6 @@ def predict_mc_scores(input):
 def predict_bc_scores(input):
     bc_scores = []
-    mc_scores = []
     samples_len_bc = len(
         split_text_allow_complete_sentences_nltk(input, type_det="bc")
     )
@@ -265,7 +264,16 @@ def predict_1on1_combined(input):
     return predictions
-def predict_1on1_scores(input):
     # BC SCORE
     bc_scores = []
     samples_len_bc = len(
@@ -282,27 +290,68 @@ def predict_1on1_scores(input):
     bc_score = {"AI": bc_score_list[1], "HUMAN": bc_score_list[0]}
     # MC SCORE
-    mc_scores = []
-    segments_mc = split_text_allow_complete_sentences_nltk(input, type_det="mc")
-    samples_len_mc = len(
-        split_text_allow_complete_sentences_nltk(input, type_det="mc")
-    )
-    for i in range(samples_len_mc):
-        cleaned_text_mc = remove_special_characters(segments_mc[i])
-        mc_score = predict_1on1_combined(cleaned_text_mc)
-        mc_scores.append(mc_score)
-    mc_scores_array = np.array(mc_scores)
-    average_mc_scores = np.mean(mc_scores_array, axis=0)
-    normalized_mc_scores = average_mc_scores / np.sum(average_mc_scores)
-    mc_score_list = normalized_mc_scores.tolist()
-    mc_score = {}
-    for score, label in zip(mc_score_list, text_1on1_label_map):
-        mc_score[label.upper()] = score
-    sum_prob = 1 - bc_score["HUMAN"]
-    for key, value in mc_score.items():
-        mc_score[key] = value * sum_prob
-    if sum_prob < 0.01:
         mc_score = {}
     return mc_score

 quillbot_model = AutoModelForSequenceClassification.from_pretrained(
     text_quillbot_model_path
 ).to(device)
+tokenizers_1on1 = {}
+models_1on1 = {}
+for model_name, model in zip(mc_label_map, text_1on1_models):
+    tokenizers_1on1[model_name] = AutoTokenizer.from_pretrained(model)
+    models_1on1[model_name] = (
+        AutoModelForSequenceClassification.from_pretrained(model).to(device)
+    )
 def split_text_allow_complete_sentences_nltk(
 def predict_bc_scores(input):
     bc_scores = []
     samples_len_bc = len(
         split_text_allow_complete_sentences_nltk(input, type_det="bc")
     )
     return predictions
+def predict_1on1_single(input, model):
+    predictions = predict_1on1(
+        models_1on1[model], tokenizers_1on1[model], input
+    )[1]
+    return predictions
+def predict_1on1_scores(input, models):
+    print(f"Models to Test: {models}")
     # BC SCORE
     bc_scores = []
     samples_len_bc = len(
     bc_score = {"AI": bc_score_list[1], "HUMAN": bc_score_list[0]}
     # MC SCORE
+    if len(models) > 1:
+        print("Starting MC")
+        mc_scores = []
+        segments_mc = split_text_allow_complete_sentences_nltk(
+            input, type_det="mc"
+        )
+        samples_len_mc = len(
+            split_text_allow_complete_sentences_nltk(input, type_det="mc")
+        )
+        for i in range(samples_len_mc):
+            cleaned_text_mc = remove_special_characters(segments_mc[i])
+            mc_score = predict_mc(
+                text_mc_model, text_mc_tokenizer, cleaned_text_mc
+            )
+            mc_scores.append(mc_score)
+        mc_scores_array = np.array(mc_scores)
+        average_mc_scores = np.mean(mc_scores_array, axis=0)
+        mc_score_list = average_mc_scores.tolist()
+        mc_score = {}
+        for score, label in zip(mc_score_list, mc_label_map):
+            mc_score[label.upper()] = score
+        mc_score = {
+            key: mc_score[key.upper()]
+            for key in models
+            if key.upper() in mc_score
+        }
+        total = sum(mc_score.values())
+        # Normalize each value by dividing it by the total
+        mc_score = {key: value / total for key, value in mc_score.items()}
+        sum_prob = 1 - bc_score["HUMAN"]
+        for key, value in mc_score.items():
+            mc_score[key] = value * sum_prob
+        if sum_prob < 0.01:
+            mc_score = {}
+    elif len(models) == 1:
+        print("Starting 1on1")
+        mc_scores = []
+        segments_mc = split_text_allow_complete_sentences_nltk(
+            input, type_det="mc"
+        )
+        samples_len_mc = len(
+            split_text_allow_complete_sentences_nltk(input, type_det="mc")
+        )
+        for i in range(samples_len_mc):
+            cleaned_text_mc = remove_special_characters(segments_mc[i])
+            mc_score = predict_1on1_single(cleaned_text_mc, models[0])
+            mc_scores.append(mc_score)
+        mc_scores_array = np.array(mc_scores)
+        average_mc_scores = np.mean(mc_scores_array, axis=0)
+        print(average_mc_scores)
+        mc_score_list = average_mc_scores.tolist()
         mc_score = {}
+        mc_score[models[0].upper()] = mc_score_list
+        mc_score["OTHER"] = 1 - mc_score_list
+        sum_prob = 1 - bc_score["HUMAN"]
+        for key, value in mc_score.items():
+            mc_score[key] = value * sum_prob
+        if sum_prob < 0.01:
+            mc_score = {}
     return mc_score