Spaces:
Build error
Build error
Wintersmith
commited on
Commit
•
b7b3296
1
Parent(s):
2e96147
second commit
Browse files- app.py +44 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import tensorflow as tf
|
3 |
+
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
|
4 |
+
import gradio as gr
|
5 |
+
import re
|
6 |
+
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained("Wintersmith/LLM_generated_text_detector")
|
8 |
+
model = TFAutoModelForSequenceClassification.from_pretrained("Wintersmith/LLM_generated_text_detector")
|
9 |
+
|
10 |
+
def clean_text(text):
|
11 |
+
text = re.sub(r"[^A-Za-z0-9\s]", "", text)
|
12 |
+
text = re.sub(r"\s+", " ", text)
|
13 |
+
text = text.lower()
|
14 |
+
return text
|
15 |
+
|
16 |
+
def get_probabilities(input_text):
|
17 |
+
cleaned_text = clean_text(input_text)
|
18 |
+
model_input = tokenizer(cleaned_text, max_length=512, padding=True, truncation=True, return_tensors='tf')
|
19 |
+
model_input = dict(model_input)
|
20 |
+
logits_pred = model.predict(model_input)['logits']
|
21 |
+
probs = tf.nn.sigmoid(logits_pred)
|
22 |
+
probs_list = list(probs[0].numpy())
|
23 |
+
class_names = ["Written by student", "AI generated"]
|
24 |
+
return {class_names[i]: probs_list[i] for i in range(2)}
|
25 |
+
|
26 |
+
student_written_eassay = "Most schools allow students to have cell phones for safety, which seems unlikely to change as long as school shootings remain a common occurrence. But phones aren't just tools for emergencies; they can also be valuable tools in the classroom. If there's a word or concept a student doesn't understand, the student can find information instantly. Phones have calculators as well as spelling and grammar checks. Most importantly, phones allow students to communicate with one another and with experts in fields of interest. The question remains, however, whether the use of cell phones in school outweighs the distraction they cause."
|
27 |
+
|
28 |
+
ai_generated_essay = "In today’s digital age, the presence of smartphones in schools has sparked heated debates among educators, parents, and policymakers. While some argue that banning phones is essential for maintaining classroom focus, others emphasize the need to consider students’ sense of connection and adapt to the changing educational landscape."
|
29 |
+
|
30 |
+
description = """This is a Distilbert model fine-tuned on thousands of essays of various themes like 'Phones in school', 'Car-free cities' etc. <img src="https://image.khaleejtimes.com/?uuid=99d0d917-5420-4344-84d8-4fabd2578882&function=cropresize&type=preview&source=false&q=75&crop_w=0.99999&crop_h=0.75&x=0&y=0&width=1500&height=844" width=300px>"""
|
31 |
+
|
32 |
+
article = "This app is based on a Kaggle competition, learn more about it [here](https://www.kaggle.com/competitions/llm-detect-ai-generated-text)."
|
33 |
+
|
34 |
+
iface = gr.Interface(
|
35 |
+
fn=get_probabilities,
|
36 |
+
inputs="text",
|
37 |
+
outputs=gr.Label(),
|
38 |
+
title="Detect AI Generated Text - Identify which essay was written by a LLM",
|
39 |
+
examples = [student_written_eassay, ai_generated_essay],
|
40 |
+
description=description,
|
41 |
+
article=article,
|
42 |
+
)
|
43 |
+
|
44 |
+
iface.launch(share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
tensorflow
|
3 |
+
gradio
|
4 |
+
numpy
|