Spaces:
Running
Running
add modularity to model selection + new 1on1 models
Browse files- app.py +18 -18
- predictors.py +78 -29
app.py
CHANGED
@@ -6,18 +6,21 @@ from analysis import depth_analysis
|
|
6 |
from predictors import predict_quillbot
|
7 |
from plagiarism import plagiarism_check, build_date
|
8 |
from utils import extract_text_from_pdf, len_validator
|
|
|
9 |
|
10 |
np.set_printoptions(suppress=True)
|
11 |
|
|
|
|
|
12 |
|
13 |
-
|
|
|
|
|
|
|
14 |
if option == "Human vs AI":
|
15 |
return predict_bc_scores(input), None
|
16 |
elif option == "Human vs AI Source Models":
|
17 |
-
return predict_bc_scores(input),
|
18 |
-
# elif option == "Human vs AI Source Models (1 on 1)":
|
19 |
-
# return predict_bc_scores(input), None, predict_1on1_scores(input)
|
20 |
-
|
21 |
return None, None
|
22 |
|
23 |
|
@@ -26,7 +29,7 @@ def main(
|
|
26 |
ai_option,
|
27 |
plag_option,
|
28 |
input,
|
29 |
-
|
30 |
year_from,
|
31 |
month_from,
|
32 |
day_from,
|
@@ -49,14 +52,12 @@ def main(
|
|
49 |
)
|
50 |
depth_analysis_plot = depth_analysis(input)
|
51 |
bc_score = predict_bc_scores(input)
|
52 |
-
mc_score =
|
53 |
-
mc_1on1_score = predict_1on1_scores(input)
|
54 |
quilscore = predict_quillbot(input)
|
55 |
|
56 |
return (
|
57 |
bc_score,
|
58 |
mc_score,
|
59 |
-
mc_1on1_score,
|
60 |
formatted_tokens,
|
61 |
depth_analysis_plot,
|
62 |
quilscore,
|
@@ -88,7 +89,6 @@ with gr.Blocks() as demo:
|
|
88 |
d1 = today.strftime("%d/%B/%Y")
|
89 |
d1 = d1.split("/")
|
90 |
|
91 |
-
model_list = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "LLAMA2"]
|
92 |
domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
|
93 |
gr.Markdown(
|
94 |
"""
|
@@ -142,12 +142,12 @@ with gr.Blocks() as demo:
|
|
142 |
"""
|
143 |
)
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
|
152 |
with gr.Row():
|
153 |
with gr.Column():
|
@@ -210,7 +210,7 @@ with gr.Blocks() as demo:
|
|
210 |
ai_option,
|
211 |
plag_option,
|
212 |
input_text,
|
213 |
-
|
214 |
year_from,
|
215 |
month_from,
|
216 |
day_from,
|
@@ -232,7 +232,7 @@ with gr.Blocks() as demo:
|
|
232 |
|
233 |
only_ai_btn.click(
|
234 |
fn=ai_generated_test,
|
235 |
-
inputs=[ai_option, input_text],
|
236 |
# outputs=[bcLabel, mcLabel, mc1on1Label],
|
237 |
outputs=[bcLabel, mcLabel],
|
238 |
api_name="ai_check",
|
|
|
6 |
from predictors import predict_quillbot
|
7 |
from plagiarism import plagiarism_check, build_date
|
8 |
from utils import extract_text_from_pdf, len_validator
|
9 |
+
import yaml
|
10 |
|
11 |
np.set_printoptions(suppress=True)
|
12 |
|
13 |
+
with open("config.yaml", "r") as file:
|
14 |
+
params = yaml.safe_load(file)
|
15 |
|
16 |
+
model_list = params["MC_OUTPUT_LABELS"]
|
17 |
+
|
18 |
+
|
19 |
+
def ai_generated_test(option, input, models):
|
20 |
if option == "Human vs AI":
|
21 |
return predict_bc_scores(input), None
|
22 |
elif option == "Human vs AI Source Models":
|
23 |
+
return predict_bc_scores(input), predict_1on1_scores(input, models)
|
|
|
|
|
|
|
24 |
return None, None
|
25 |
|
26 |
|
|
|
29 |
ai_option,
|
30 |
plag_option,
|
31 |
input,
|
32 |
+
models,
|
33 |
year_from,
|
34 |
month_from,
|
35 |
day_from,
|
|
|
52 |
)
|
53 |
depth_analysis_plot = depth_analysis(input)
|
54 |
bc_score = predict_bc_scores(input)
|
55 |
+
mc_score = predict_1on1_scores(input, models)
|
|
|
56 |
quilscore = predict_quillbot(input)
|
57 |
|
58 |
return (
|
59 |
bc_score,
|
60 |
mc_score,
|
|
|
61 |
formatted_tokens,
|
62 |
depth_analysis_plot,
|
63 |
quilscore,
|
|
|
89 |
d1 = today.strftime("%d/%B/%Y")
|
90 |
d1 = d1.split("/")
|
91 |
|
|
|
92 |
domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
|
93 |
gr.Markdown(
|
94 |
"""
|
|
|
142 |
"""
|
143 |
)
|
144 |
|
145 |
+
models = gr.Dropdown(
|
146 |
+
model_list,
|
147 |
+
value=model_list,
|
148 |
+
multiselect=True,
|
149 |
+
label="Models to test against",
|
150 |
+
)
|
151 |
|
152 |
with gr.Row():
|
153 |
with gr.Column():
|
|
|
210 |
ai_option,
|
211 |
plag_option,
|
212 |
input_text,
|
213 |
+
models,
|
214 |
year_from,
|
215 |
month_from,
|
216 |
day_from,
|
|
|
232 |
|
233 |
only_ai_btn.click(
|
234 |
fn=ai_generated_test,
|
235 |
+
inputs=[ai_option, input_text, models],
|
236 |
# outputs=[bcLabel, mcLabel, mc1on1Label],
|
237 |
outputs=[bcLabel, mcLabel],
|
238 |
api_name="ai_check",
|
predictors.py
CHANGED
@@ -46,13 +46,13 @@ quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path)
|
|
46 |
quillbot_model = AutoModelForSequenceClassification.from_pretrained(
|
47 |
text_quillbot_model_path
|
48 |
).to(device)
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
|
57 |
|
58 |
def split_text_allow_complete_sentences_nltk(
|
@@ -225,7 +225,6 @@ def predict_mc_scores(input):
|
|
225 |
|
226 |
def predict_bc_scores(input):
|
227 |
bc_scores = []
|
228 |
-
mc_scores = []
|
229 |
samples_len_bc = len(
|
230 |
split_text_allow_complete_sentences_nltk(input, type_det="bc")
|
231 |
)
|
@@ -265,7 +264,16 @@ def predict_1on1_combined(input):
|
|
265 |
return predictions
|
266 |
|
267 |
|
268 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
# BC SCORE
|
270 |
bc_scores = []
|
271 |
samples_len_bc = len(
|
@@ -282,27 +290,68 @@ def predict_1on1_scores(input):
|
|
282 |
bc_score = {"AI": bc_score_list[1], "HUMAN": bc_score_list[0]}
|
283 |
|
284 |
# MC SCORE
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
split_text_allow_complete_sentences_nltk(
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
|
|
|
|
|
|
|
|
|
|
301 |
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
mc_score = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
|
308 |
return mc_score
|
|
|
46 |
quillbot_model = AutoModelForSequenceClassification.from_pretrained(
|
47 |
text_quillbot_model_path
|
48 |
).to(device)
|
49 |
+
tokenizers_1on1 = {}
|
50 |
+
models_1on1 = {}
|
51 |
+
for model_name, model in zip(mc_label_map, text_1on1_models):
|
52 |
+
tokenizers_1on1[model_name] = AutoTokenizer.from_pretrained(model)
|
53 |
+
models_1on1[model_name] = (
|
54 |
+
AutoModelForSequenceClassification.from_pretrained(model).to(device)
|
55 |
+
)
|
56 |
|
57 |
|
58 |
def split_text_allow_complete_sentences_nltk(
|
|
|
225 |
|
226 |
def predict_bc_scores(input):
|
227 |
bc_scores = []
|
|
|
228 |
samples_len_bc = len(
|
229 |
split_text_allow_complete_sentences_nltk(input, type_det="bc")
|
230 |
)
|
|
|
264 |
return predictions
|
265 |
|
266 |
|
267 |
+
def predict_1on1_single(input, model):
|
268 |
+
predictions = predict_1on1(
|
269 |
+
models_1on1[model], tokenizers_1on1[model], input
|
270 |
+
)[1]
|
271 |
+
return predictions
|
272 |
+
|
273 |
+
|
274 |
+
def predict_1on1_scores(input, models):
|
275 |
+
|
276 |
+
print(f"Models to Test: {models}")
|
277 |
# BC SCORE
|
278 |
bc_scores = []
|
279 |
samples_len_bc = len(
|
|
|
290 |
bc_score = {"AI": bc_score_list[1], "HUMAN": bc_score_list[0]}
|
291 |
|
292 |
# MC SCORE
|
293 |
+
if len(models) > 1:
|
294 |
+
print("Starting MC")
|
295 |
+
mc_scores = []
|
296 |
+
segments_mc = split_text_allow_complete_sentences_nltk(
|
297 |
+
input, type_det="mc"
|
298 |
+
)
|
299 |
+
samples_len_mc = len(
|
300 |
+
split_text_allow_complete_sentences_nltk(input, type_det="mc")
|
301 |
+
)
|
302 |
+
for i in range(samples_len_mc):
|
303 |
+
cleaned_text_mc = remove_special_characters(segments_mc[i])
|
304 |
+
mc_score = predict_mc(
|
305 |
+
text_mc_model, text_mc_tokenizer, cleaned_text_mc
|
306 |
+
)
|
307 |
+
mc_scores.append(mc_score)
|
308 |
+
mc_scores_array = np.array(mc_scores)
|
309 |
+
average_mc_scores = np.mean(mc_scores_array, axis=0)
|
310 |
+
mc_score_list = average_mc_scores.tolist()
|
311 |
+
mc_score = {}
|
312 |
+
for score, label in zip(mc_score_list, mc_label_map):
|
313 |
+
mc_score[label.upper()] = score
|
314 |
|
315 |
+
mc_score = {
|
316 |
+
key: mc_score[key.upper()]
|
317 |
+
for key in models
|
318 |
+
if key.upper() in mc_score
|
319 |
+
}
|
320 |
+
total = sum(mc_score.values())
|
321 |
+
# Normalize each value by dividing it by the total
|
322 |
+
mc_score = {key: value / total for key, value in mc_score.items()}
|
323 |
+
|
324 |
+
sum_prob = 1 - bc_score["HUMAN"]
|
325 |
+
for key, value in mc_score.items():
|
326 |
+
mc_score[key] = value * sum_prob
|
327 |
+
if sum_prob < 0.01:
|
328 |
+
mc_score = {}
|
329 |
+
|
330 |
+
elif len(models) == 1:
|
331 |
+
print("Starting 1on1")
|
332 |
+
mc_scores = []
|
333 |
+
segments_mc = split_text_allow_complete_sentences_nltk(
|
334 |
+
input, type_det="mc"
|
335 |
+
)
|
336 |
+
samples_len_mc = len(
|
337 |
+
split_text_allow_complete_sentences_nltk(input, type_det="mc")
|
338 |
+
)
|
339 |
+
for i in range(samples_len_mc):
|
340 |
+
cleaned_text_mc = remove_special_characters(segments_mc[i])
|
341 |
+
mc_score = predict_1on1_single(cleaned_text_mc, models[0])
|
342 |
+
mc_scores.append(mc_score)
|
343 |
+
mc_scores_array = np.array(mc_scores)
|
344 |
+
average_mc_scores = np.mean(mc_scores_array, axis=0)
|
345 |
+
print(average_mc_scores)
|
346 |
+
mc_score_list = average_mc_scores.tolist()
|
347 |
mc_score = {}
|
348 |
+
mc_score[models[0].upper()] = mc_score_list
|
349 |
+
mc_score["OTHER"] = 1 - mc_score_list
|
350 |
+
|
351 |
+
sum_prob = 1 - bc_score["HUMAN"]
|
352 |
+
for key, value in mc_score.items():
|
353 |
+
mc_score[key] = value * sum_prob
|
354 |
+
if sum_prob < 0.01:
|
355 |
+
mc_score = {}
|
356 |
|
357 |
return mc_score
|