minko186 commited on
Commit
2333c59
1 Parent(s): 09f0b85

add modularity to model selection + new 1on1 models

Browse files
Files changed (2) hide show
  1. app.py +18 -18
  2. predictors.py +78 -29
app.py CHANGED
@@ -6,18 +6,21 @@ from analysis import depth_analysis
6
  from predictors import predict_quillbot
7
  from plagiarism import plagiarism_check, build_date
8
  from utils import extract_text_from_pdf, len_validator
 
9
 
10
  np.set_printoptions(suppress=True)
11
 
 
 
12
 
13
- def ai_generated_test(option, input):
 
 
 
14
  if option == "Human vs AI":
15
  return predict_bc_scores(input), None
16
  elif option == "Human vs AI Source Models":
17
- return predict_bc_scores(input), predict_mc_scores(input)
18
- # elif option == "Human vs AI Source Models (1 on 1)":
19
- # return predict_bc_scores(input), None, predict_1on1_scores(input)
20
-
21
  return None, None
22
 
23
 
@@ -26,7 +29,7 @@ def main(
26
  ai_option,
27
  plag_option,
28
  input,
29
- # models,
30
  year_from,
31
  month_from,
32
  day_from,
@@ -49,14 +52,12 @@ def main(
49
  )
50
  depth_analysis_plot = depth_analysis(input)
51
  bc_score = predict_bc_scores(input)
52
- mc_score = predict_mc_scores(input)
53
- mc_1on1_score = predict_1on1_scores(input)
54
  quilscore = predict_quillbot(input)
55
 
56
  return (
57
  bc_score,
58
  mc_score,
59
- mc_1on1_score,
60
  formatted_tokens,
61
  depth_analysis_plot,
62
  quilscore,
@@ -88,7 +89,6 @@ with gr.Blocks() as demo:
88
  d1 = today.strftime("%d/%B/%Y")
89
  d1 = d1.split("/")
90
 
91
- model_list = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "LLAMA2"]
92
  domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
93
  gr.Markdown(
94
  """
@@ -142,12 +142,12 @@ with gr.Blocks() as demo:
142
  """
143
  )
144
 
145
- # models = gr.Dropdown(
146
- # model_list,
147
- # value=model_list,
148
- # multiselect=True,
149
- # label="Models to test against",
150
- # )
151
 
152
  with gr.Row():
153
  with gr.Column():
@@ -210,7 +210,7 @@ with gr.Blocks() as demo:
210
  ai_option,
211
  plag_option,
212
  input_text,
213
- # models,
214
  year_from,
215
  month_from,
216
  day_from,
@@ -232,7 +232,7 @@ with gr.Blocks() as demo:
232
 
233
  only_ai_btn.click(
234
  fn=ai_generated_test,
235
- inputs=[ai_option, input_text],
236
  # outputs=[bcLabel, mcLabel, mc1on1Label],
237
  outputs=[bcLabel, mcLabel],
238
  api_name="ai_check",
 
6
  from predictors import predict_quillbot
7
  from plagiarism import plagiarism_check, build_date
8
  from utils import extract_text_from_pdf, len_validator
9
+ import yaml
10
 
11
  np.set_printoptions(suppress=True)
12
 
13
+ with open("config.yaml", "r") as file:
14
+ params = yaml.safe_load(file)
15
 
16
+ model_list = params["MC_OUTPUT_LABELS"]
17
+
18
+
19
+ def ai_generated_test(option, input, models):
20
  if option == "Human vs AI":
21
  return predict_bc_scores(input), None
22
  elif option == "Human vs AI Source Models":
23
+ return predict_bc_scores(input), predict_1on1_scores(input, models)
 
 
 
24
  return None, None
25
 
26
 
 
29
  ai_option,
30
  plag_option,
31
  input,
32
+ models,
33
  year_from,
34
  month_from,
35
  day_from,
 
52
  )
53
  depth_analysis_plot = depth_analysis(input)
54
  bc_score = predict_bc_scores(input)
55
+ mc_score = predict_1on1_scores(input, models)
 
56
  quilscore = predict_quillbot(input)
57
 
58
  return (
59
  bc_score,
60
  mc_score,
 
61
  formatted_tokens,
62
  depth_analysis_plot,
63
  quilscore,
 
89
  d1 = today.strftime("%d/%B/%Y")
90
  d1 = d1.split("/")
91
 
 
92
  domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
93
  gr.Markdown(
94
  """
 
142
  """
143
  )
144
 
145
+ models = gr.Dropdown(
146
+ model_list,
147
+ value=model_list,
148
+ multiselect=True,
149
+ label="Models to test against",
150
+ )
151
 
152
  with gr.Row():
153
  with gr.Column():
 
210
  ai_option,
211
  plag_option,
212
  input_text,
213
+ models,
214
  year_from,
215
  month_from,
216
  day_from,
 
232
 
233
  only_ai_btn.click(
234
  fn=ai_generated_test,
235
+ inputs=[ai_option, input_text, models],
236
  # outputs=[bcLabel, mcLabel, mc1on1Label],
237
  outputs=[bcLabel, mcLabel],
238
  api_name="ai_check",
predictors.py CHANGED
@@ -46,13 +46,13 @@ quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path)
46
  quillbot_model = AutoModelForSequenceClassification.from_pretrained(
47
  text_quillbot_model_path
48
  ).to(device)
49
- # tokenizers_1on1 = {}
50
- # models_1on1 = {}
51
- # for model in text_1on1_models:
52
- # tokenizers_1on1[model] = AutoTokenizer.from_pretrained(model)
53
- # models_1on1[model] = AutoModelForSequenceClassification.from_pretrained(
54
- # model
55
- # ).to(device)
56
 
57
 
58
  def split_text_allow_complete_sentences_nltk(
@@ -225,7 +225,6 @@ def predict_mc_scores(input):
225
 
226
  def predict_bc_scores(input):
227
  bc_scores = []
228
- mc_scores = []
229
  samples_len_bc = len(
230
  split_text_allow_complete_sentences_nltk(input, type_det="bc")
231
  )
@@ -265,7 +264,16 @@ def predict_1on1_combined(input):
265
  return predictions
266
 
267
 
268
- def predict_1on1_scores(input):
 
 
 
 
 
 
 
 
 
269
  # BC SCORE
270
  bc_scores = []
271
  samples_len_bc = len(
@@ -282,27 +290,68 @@ def predict_1on1_scores(input):
282
  bc_score = {"AI": bc_score_list[1], "HUMAN": bc_score_list[0]}
283
 
284
  # MC SCORE
285
- mc_scores = []
286
- segments_mc = split_text_allow_complete_sentences_nltk(input, type_det="mc")
287
- samples_len_mc = len(
288
- split_text_allow_complete_sentences_nltk(input, type_det="mc")
289
- )
290
- for i in range(samples_len_mc):
291
- cleaned_text_mc = remove_special_characters(segments_mc[i])
292
- mc_score = predict_1on1_combined(cleaned_text_mc)
293
- mc_scores.append(mc_score)
294
- mc_scores_array = np.array(mc_scores)
295
- average_mc_scores = np.mean(mc_scores_array, axis=0)
296
- normalized_mc_scores = average_mc_scores / np.sum(average_mc_scores)
297
- mc_score_list = normalized_mc_scores.tolist()
298
- mc_score = {}
299
- for score, label in zip(mc_score_list, text_1on1_label_map):
300
- mc_score[label.upper()] = score
 
 
 
 
 
301
 
302
- sum_prob = 1 - bc_score["HUMAN"]
303
- for key, value in mc_score.items():
304
- mc_score[key] = value * sum_prob
305
- if sum_prob < 0.01:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  mc_score = {}
 
 
 
 
 
 
 
 
307
 
308
  return mc_score
 
46
  quillbot_model = AutoModelForSequenceClassification.from_pretrained(
47
  text_quillbot_model_path
48
  ).to(device)
49
+ tokenizers_1on1 = {}
50
+ models_1on1 = {}
51
+ for model_name, model in zip(mc_label_map, text_1on1_models):
52
+ tokenizers_1on1[model_name] = AutoTokenizer.from_pretrained(model)
53
+ models_1on1[model_name] = (
54
+ AutoModelForSequenceClassification.from_pretrained(model).to(device)
55
+ )
56
 
57
 
58
  def split_text_allow_complete_sentences_nltk(
 
225
 
226
  def predict_bc_scores(input):
227
  bc_scores = []
 
228
  samples_len_bc = len(
229
  split_text_allow_complete_sentences_nltk(input, type_det="bc")
230
  )
 
264
  return predictions
265
 
266
 
267
+ def predict_1on1_single(input, model):
268
+ predictions = predict_1on1(
269
+ models_1on1[model], tokenizers_1on1[model], input
270
+ )[1]
271
+ return predictions
272
+
273
+
274
+ def predict_1on1_scores(input, models):
275
+
276
+ print(f"Models to Test: {models}")
277
  # BC SCORE
278
  bc_scores = []
279
  samples_len_bc = len(
 
290
  bc_score = {"AI": bc_score_list[1], "HUMAN": bc_score_list[0]}
291
 
292
  # MC SCORE
293
+ if len(models) > 1:
294
+ print("Starting MC")
295
+ mc_scores = []
296
+ segments_mc = split_text_allow_complete_sentences_nltk(
297
+ input, type_det="mc"
298
+ )
299
+ samples_len_mc = len(
300
+ split_text_allow_complete_sentences_nltk(input, type_det="mc")
301
+ )
302
+ for i in range(samples_len_mc):
303
+ cleaned_text_mc = remove_special_characters(segments_mc[i])
304
+ mc_score = predict_mc(
305
+ text_mc_model, text_mc_tokenizer, cleaned_text_mc
306
+ )
307
+ mc_scores.append(mc_score)
308
+ mc_scores_array = np.array(mc_scores)
309
+ average_mc_scores = np.mean(mc_scores_array, axis=0)
310
+ mc_score_list = average_mc_scores.tolist()
311
+ mc_score = {}
312
+ for score, label in zip(mc_score_list, mc_label_map):
313
+ mc_score[label.upper()] = score
314
 
315
+ mc_score = {
316
+ key: mc_score[key.upper()]
317
+ for key in models
318
+ if key.upper() in mc_score
319
+ }
320
+ total = sum(mc_score.values())
321
+ # Normalize each value by dividing it by the total
322
+ mc_score = {key: value / total for key, value in mc_score.items()}
323
+
324
+ sum_prob = 1 - bc_score["HUMAN"]
325
+ for key, value in mc_score.items():
326
+ mc_score[key] = value * sum_prob
327
+ if sum_prob < 0.01:
328
+ mc_score = {}
329
+
330
+ elif len(models) == 1:
331
+ print("Starting 1on1")
332
+ mc_scores = []
333
+ segments_mc = split_text_allow_complete_sentences_nltk(
334
+ input, type_det="mc"
335
+ )
336
+ samples_len_mc = len(
337
+ split_text_allow_complete_sentences_nltk(input, type_det="mc")
338
+ )
339
+ for i in range(samples_len_mc):
340
+ cleaned_text_mc = remove_special_characters(segments_mc[i])
341
+ mc_score = predict_1on1_single(cleaned_text_mc, models[0])
342
+ mc_scores.append(mc_score)
343
+ mc_scores_array = np.array(mc_scores)
344
+ average_mc_scores = np.mean(mc_scores_array, axis=0)
345
+ print(average_mc_scores)
346
+ mc_score_list = average_mc_scores.tolist()
347
  mc_score = {}
348
+ mc_score[models[0].upper()] = mc_score_list
349
+ mc_score["OTHER"] = 1 - mc_score_list
350
+
351
+ sum_prob = 1 - bc_score["HUMAN"]
352
+ for key, value in mc_score.items():
353
+ mc_score[key] = value * sum_prob
354
+ if sum_prob < 0.01:
355
+ mc_score = {}
356
 
357
  return mc_score