Spaces:

polygraf-ai
/

copyright_checker

Sleeping

App Files Files Community

aliasgerovs commited on Feb 27, 2024

Commit

d53b62d

1 Parent(s): 3d16af9

Uptdated

Browse files

Files changed (1) hide show

app.py +28 -7

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ import fitz
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast
 import nltk, spacy, subprocess, torch
 import plotly.graph_objects as go
 import nltk
 from unidecode import unidecode
@@ -104,7 +105,6 @@ def plagiarism_check(
     # print("New Score Array:\n")
     # print2D(ScoreArray)
     # Gradio formatting section
     sentencePlag = [False] * len(sentences)
     sentenceToMaxURL = [-1] * len(sentences)
@@ -192,9 +192,11 @@ text_mc_model_path = "polygraf-ai/ai-text-detection-mc-robert-open-ai-detector-v
 text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
 text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
 def remove_accents(input_str):
-    # nfkd_form = unicodedata.normalize('NFKD', input_str)
-    # return "".join([char for char in nfkd_form if not unicodedata.combining(char)])
     text_no_accents = unidecode(input_str)
     return text_no_accents
@@ -266,12 +268,17 @@ def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=30,
         decoded_segments.append(decoded_segment)
     return decoded_segments
 def predict_bc(model, tokenizer, text):
     tokens = text_bc_tokenizer(
         text, padding='max_length', truncation=True, max_length=256, return_tensors="pt"
     ).to(device)["input_ids"]
     output = model(tokens)
     output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
     print("BC Score: ", output_norm)
@@ -360,12 +367,14 @@ def main(
     )
     depth_analysis_plot = depth_analysis(input)
     bc_score, mc_score = ai_generated_test(ai_option,input)
     return (
     bc_score,
     mc_score,
     formatted_tokens,
     depth_analysis_plot,
             )
@@ -520,8 +529,11 @@ with gr.Blocks() as demo:
             only_ai_btn = gr.Button("AI Check")
         with gr.Column():
-            only_plagiarism_btn = gr.Button("Source Detection")
     with gr.Row():
         depth_analysis_btn = gr.Button("Detailed Writing Analysis")
@@ -546,7 +558,8 @@ with gr.Blocks() as demo:
             bcLabel = gr.Label(label="Source")
         with gr.Column():
             mcLabel = gr.Label(label="Creator")
     with gr.Group():
         with gr.Row():
             month_from = gr.Dropdown(
@@ -615,6 +628,7 @@ with gr.Blocks() as demo:
             mcLabel,
             sentenceBreakdown,
             writing_analysis_plot,
         ],
         api_name="main",
     )
@@ -629,6 +643,13 @@ with gr.Blocks() as demo:
         api_name="ai_check",
     )
     only_plagiarism_btn.click(
         fn=plagiarism_check,
         inputs=[

 from transformers import GPT2LMHeadModel, GPT2TokenizerFast
 import nltk, spacy, subprocess, torch
 import plotly.graph_objects as go
+import torch.nn.functional as F
 import nltk
 from unidecode import unidecode
     # print("New Score Array:\n")
     # print2D(ScoreArray)
     # Gradio formatting section
     sentencePlag = [False] * len(sentences)
     sentenceToMaxURL = [-1] * len(sentences)
 text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
 text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
+quillbot_labels = ["Original", "QuillBot"]
+quillbot_tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")
+quillbot_model = AutoModelForSequenceClassification.from_pretrained("polygraf-ai/quillbot-detector-roberta-base-28K").to(device)
 def remove_accents(input_str):
     text_no_accents = unidecode(input_str)
     return text_no_accents
         decoded_segments.append(decoded_segment)
     return decoded_segments
+def predict_quillbot(text):
+    tokenized_text = quillbot_tokenizer(text, padding="max_length", truncation=True, max_length=256, return_tensors="pt").to(device)["input_ids"]
+    output = quillbot_model(tokenized_text)
+    output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
+    q_score = {"QuillBot": output_norm[1].item(), "Original": output_norm[0].item()}
+    return q_score
 def predict_bc(model, tokenizer, text):
     tokens = text_bc_tokenizer(
         text, padding='max_length', truncation=True, max_length=256, return_tensors="pt"
     ).to(device)["input_ids"]
     output = model(tokens)
     output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
     print("BC Score: ", output_norm)
     )
     depth_analysis_plot = depth_analysis(input)
     bc_score, mc_score = ai_generated_test(ai_option,input)
+    quilscore = predict_quillbot(input)
     return (
     bc_score,
     mc_score,
     formatted_tokens,
     depth_analysis_plot,
+    quilscore
             )
             only_ai_btn = gr.Button("AI Check")
         with gr.Column():
+            only_plagiarism_btn = gr.Button("Source Check")
+    with gr.Row():
+        quillbot_check = gr.Button("Humanized Text Check (Quillbot)")
     with gr.Row():
         depth_analysis_btn = gr.Button("Detailed Writing Analysis")
             bcLabel = gr.Label(label="Source")
         with gr.Column():
             mcLabel = gr.Label(label="Creator")
+    with gr.Row():
+        QLabel = gr.Label(label="Humanized")
     with gr.Group():
         with gr.Row():
             month_from = gr.Dropdown(
             mcLabel,
             sentenceBreakdown,
             writing_analysis_plot,
+            QLabel
         ],
         api_name="main",
     )
         api_name="ai_check",
     )
+    quillbot_check.click(
+        fn=predict_quillbot,
+        inputs=[input_text],
+        outputs=[QLabel],
+        api_name="quillbot_check",
+    )
     only_plagiarism_btn.click(
         fn=plagiarism_check,
         inputs=[