aliasgerovs commited on
Commit
8fb8d86
·
1 Parent(s): 0eaca07

Added fix on plagiarsim

Browse files
Files changed (2) hide show
  1. plagiarism.py +6 -5
  2. predictors.py +1 -0
plagiarism.py CHANGED
@@ -290,9 +290,8 @@ def plagiarism_check(
290
  # api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
291
  # api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
292
  # api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
293
- # api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
294
  # api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
295
- api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
296
  cse_id = "851813e81162b4ed4"
297
 
298
  url_scores = []
@@ -374,7 +373,7 @@ def plagiarism_check(
374
  print("SNIPPETS: ", snippets)
375
  snippets = [[item for item in sublist if item] for sublist in snippets]
376
  for ind in index_descending:
377
- if url_source[ind] > 0.35:
378
  matched_sentence_array = [
379
  [item for item in sublist if item]
380
  for sublist in matched_sentence_array
@@ -470,15 +469,16 @@ def html_highlight(
470
  combined_sentence = ""
471
  total_score = 0
472
  total_count = 0
473
- category_scores = defaultdict(list)
474
  for sentence, score, url, idx in sentence_scores:
475
  category = check_url_category(url)
476
  if score is None:
477
  total_score += 0
478
  else:
479
  total_score += score
 
480
  total_count += 1
481
- category_scores[category].append(score)
482
  if idx != prev_idx and prev_idx is not None:
483
  color = color_map[prev_idx - 1]
484
  index_part = f"<span>[{prev_idx}]</span>"
@@ -488,6 +488,7 @@ def html_highlight(
488
  combined_sentence += " " + sentence
489
  prev_idx = idx
490
 
 
491
  total_average_score = round(total_score / total_count, 2)
492
  category_averages = {
493
  category: round((sum(scores) / len(scores)), 2)
 
290
  # api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
291
  # api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
292
  # api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
293
+ api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
294
  # api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
 
295
  cse_id = "851813e81162b4ed4"
296
 
297
  url_scores = []
 
373
  print("SNIPPETS: ", snippets)
374
  snippets = [[item for item in sublist if item] for sublist in snippets]
375
  for ind in index_descending:
376
+ if url_source[ind] > 0.1:
377
  matched_sentence_array = [
378
  [item for item in sublist if item]
379
  for sublist in matched_sentence_array
 
469
  combined_sentence = ""
470
  total_score = 0
471
  total_count = 0
472
+ category_scores = defaultdict(set)
473
  for sentence, score, url, idx in sentence_scores:
474
  category = check_url_category(url)
475
  if score is None:
476
  total_score += 0
477
  else:
478
  total_score += score
479
+ category_scores[category].add(score)
480
  total_count += 1
481
+
482
  if idx != prev_idx and prev_idx is not None:
483
  color = color_map[prev_idx - 1]
484
  index_part = f"<span>[{prev_idx}]</span>"
 
488
  combined_sentence += " " + sentence
489
  prev_idx = idx
490
 
491
+ print(category_scores)
492
  total_average_score = round(total_score / total_count, 2)
493
  category_averages = {
494
  category: round((sum(scores) / len(scores)), 2)
predictors.py CHANGED
@@ -26,6 +26,7 @@ nltk.download("punkt")
26
  nltk.download("stopwords")
27
  device_needed = "cuda" if torch.cuda.is_available() else "cpu"
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
29
 
30
  text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
31
  text_mc_model_path = params["TEXT_MC_MODEL_PATH"]
 
26
  nltk.download("stopwords")
27
  device_needed = "cuda" if torch.cuda.is_available() else "cpu"
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ print('DEVICE IS :' , device)
30
 
31
  text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
32
  text_mc_model_path = params["TEXT_MC_MODEL_PATH"]