Spaces:
Running
Running
aliasgerovs
commited on
Commit
•
d53b62d
1
Parent(s):
3d16af9
Uptdated
Browse files
app.py
CHANGED
@@ -17,6 +17,7 @@ import fitz
|
|
17 |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
|
18 |
import nltk, spacy, subprocess, torch
|
19 |
import plotly.graph_objects as go
|
|
|
20 |
import nltk
|
21 |
from unidecode import unidecode
|
22 |
|
@@ -104,7 +105,6 @@ def plagiarism_check(
|
|
104 |
# print("New Score Array:\n")
|
105 |
# print2D(ScoreArray)
|
106 |
|
107 |
-
|
108 |
# Gradio formatting section
|
109 |
sentencePlag = [False] * len(sentences)
|
110 |
sentenceToMaxURL = [-1] * len(sentences)
|
@@ -192,9 +192,11 @@ text_mc_model_path = "polygraf-ai/ai-text-detection-mc-robert-open-ai-detector-v
|
|
192 |
text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
|
193 |
text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
|
194 |
|
|
|
|
|
|
|
|
|
195 |
def remove_accents(input_str):
|
196 |
-
# nfkd_form = unicodedata.normalize('NFKD', input_str)
|
197 |
-
# return "".join([char for char in nfkd_form if not unicodedata.combining(char)])
|
198 |
text_no_accents = unidecode(input_str)
|
199 |
return text_no_accents
|
200 |
|
@@ -266,12 +268,17 @@ def split_text_allow_complete_sentences_nltk(text, max_length=256, tolerance=30,
|
|
266 |
decoded_segments.append(decoded_segment)
|
267 |
return decoded_segments
|
268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
def predict_bc(model, tokenizer, text):
|
271 |
tokens = text_bc_tokenizer(
|
272 |
text, padding='max_length', truncation=True, max_length=256, return_tensors="pt"
|
273 |
).to(device)["input_ids"]
|
274 |
-
|
275 |
output = model(tokens)
|
276 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
277 |
print("BC Score: ", output_norm)
|
@@ -360,12 +367,14 @@ def main(
|
|
360 |
)
|
361 |
depth_analysis_plot = depth_analysis(input)
|
362 |
bc_score, mc_score = ai_generated_test(ai_option,input)
|
|
|
363 |
|
364 |
return (
|
365 |
bc_score,
|
366 |
mc_score,
|
367 |
formatted_tokens,
|
368 |
depth_analysis_plot,
|
|
|
369 |
)
|
370 |
|
371 |
|
@@ -520,8 +529,11 @@ with gr.Blocks() as demo:
|
|
520 |
only_ai_btn = gr.Button("AI Check")
|
521 |
|
522 |
with gr.Column():
|
523 |
-
only_plagiarism_btn = gr.Button("Source
|
524 |
-
|
|
|
|
|
|
|
525 |
with gr.Row():
|
526 |
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
|
527 |
|
@@ -546,7 +558,8 @@ with gr.Blocks() as demo:
|
|
546 |
bcLabel = gr.Label(label="Source")
|
547 |
with gr.Column():
|
548 |
mcLabel = gr.Label(label="Creator")
|
549 |
-
|
|
|
550 |
with gr.Group():
|
551 |
with gr.Row():
|
552 |
month_from = gr.Dropdown(
|
@@ -615,6 +628,7 @@ with gr.Blocks() as demo:
|
|
615 |
mcLabel,
|
616 |
sentenceBreakdown,
|
617 |
writing_analysis_plot,
|
|
|
618 |
],
|
619 |
api_name="main",
|
620 |
)
|
@@ -629,6 +643,13 @@ with gr.Blocks() as demo:
|
|
629 |
api_name="ai_check",
|
630 |
)
|
631 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
632 |
only_plagiarism_btn.click(
|
633 |
fn=plagiarism_check,
|
634 |
inputs=[
|
|
|
17 |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
|
18 |
import nltk, spacy, subprocess, torch
|
19 |
import plotly.graph_objects as go
|
20 |
+
import torch.nn.functional as F
|
21 |
import nltk
|
22 |
from unidecode import unidecode
|
23 |
|
|
|
105 |
# print("New Score Array:\n")
|
106 |
# print2D(ScoreArray)
|
107 |
|
|
|
108 |
# Gradio formatting section
|
109 |
sentencePlag = [False] * len(sentences)
|
110 |
sentenceToMaxURL = [-1] * len(sentences)
|
|
|
192 |
text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
|
193 |
text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
|
194 |
|
195 |
+
quillbot_labels = ["Original", "QuillBot"]
|
196 |
+
quillbot_tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")
|
197 |
+
quillbot_model = AutoModelForSequenceClassification.from_pretrained("polygraf-ai/quillbot-detector-roberta-base-28K").to(device)
|
198 |
+
|
199 |
def remove_accents(input_str):
|
|
|
|
|
200 |
text_no_accents = unidecode(input_str)
|
201 |
return text_no_accents
|
202 |
|
|
|
268 |
decoded_segments.append(decoded_segment)
|
269 |
return decoded_segments
|
270 |
|
271 |
+
def predict_quillbot(text):
|
272 |
+
tokenized_text = quillbot_tokenizer(text, padding="max_length", truncation=True, max_length=256, return_tensors="pt").to(device)["input_ids"]
|
273 |
+
output = quillbot_model(tokenized_text)
|
274 |
+
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
275 |
+
q_score = {"QuillBot": output_norm[1].item(), "Original": output_norm[0].item()}
|
276 |
+
return q_score
|
277 |
|
278 |
def predict_bc(model, tokenizer, text):
|
279 |
tokens = text_bc_tokenizer(
|
280 |
text, padding='max_length', truncation=True, max_length=256, return_tensors="pt"
|
281 |
).to(device)["input_ids"]
|
|
|
282 |
output = model(tokens)
|
283 |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
|
284 |
print("BC Score: ", output_norm)
|
|
|
367 |
)
|
368 |
depth_analysis_plot = depth_analysis(input)
|
369 |
bc_score, mc_score = ai_generated_test(ai_option,input)
|
370 |
+
quilscore = predict_quillbot(input)
|
371 |
|
372 |
return (
|
373 |
bc_score,
|
374 |
mc_score,
|
375 |
formatted_tokens,
|
376 |
depth_analysis_plot,
|
377 |
+
quilscore
|
378 |
)
|
379 |
|
380 |
|
|
|
529 |
only_ai_btn = gr.Button("AI Check")
|
530 |
|
531 |
with gr.Column():
|
532 |
+
only_plagiarism_btn = gr.Button("Source Check")
|
533 |
+
|
534 |
+
with gr.Row():
|
535 |
+
quillbot_check = gr.Button("Humanized Text Check (Quillbot)")
|
536 |
+
|
537 |
with gr.Row():
|
538 |
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
|
539 |
|
|
|
558 |
bcLabel = gr.Label(label="Source")
|
559 |
with gr.Column():
|
560 |
mcLabel = gr.Label(label="Creator")
|
561 |
+
with gr.Row():
|
562 |
+
QLabel = gr.Label(label="Humanized")
|
563 |
with gr.Group():
|
564 |
with gr.Row():
|
565 |
month_from = gr.Dropdown(
|
|
|
628 |
mcLabel,
|
629 |
sentenceBreakdown,
|
630 |
writing_analysis_plot,
|
631 |
+
QLabel
|
632 |
],
|
633 |
api_name="main",
|
634 |
)
|
|
|
643 |
api_name="ai_check",
|
644 |
)
|
645 |
|
646 |
+
quillbot_check.click(
|
647 |
+
fn=predict_quillbot,
|
648 |
+
inputs=[input_text],
|
649 |
+
outputs=[QLabel],
|
650 |
+
api_name="quillbot_check",
|
651 |
+
)
|
652 |
+
|
653 |
only_plagiarism_btn.click(
|
654 |
fn=plagiarism_check,
|
655 |
inputs=[
|