eljanmahammadli commited on
Commit
038d754
1 Parent(s): 61b027b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -22,6 +22,7 @@ import torch.nn.functional as F
22
  import nltk
23
  from unidecode import unidecode
24
  import time
 
25
 
26
  nltk.download('punkt')
27
 
@@ -56,6 +57,8 @@ def plagiarism_check(
56
  api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
57
  api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
58
  # api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
 
 
59
  cse_id = "851813e81162b4ed4"
60
 
61
  time1 = time.perf_counter()
@@ -101,6 +104,14 @@ def plagiarism_check(
101
  + str(len(urlList))
102
  )
103
 
 
 
 
 
 
 
 
 
104
  # Populate matching scores for scrapped pages
105
  for i, soup in enumerate(soups):
106
  print(f"Analyzing {i+1} of {len(soups)} soups........................")
@@ -108,7 +119,8 @@ def plagiarism_check(
108
  page_content = soup.text
109
  for j, sent in enumerate(sentences):
110
  # score = matchingScore(sent, page_content)
111
- score = matchingScoreWithTimeout(sent, page_content)
 
112
  ScoreArray[i][j] = score
113
 
114
  print(f"Time for matching score: {time.perf_counter()-time1}")
@@ -342,7 +354,7 @@ def ai_generated_test(ai_option, input):
342
  bc_score_list = average_bc_scores.tolist()
343
  mc_score_list = average_mc_scores.tolist()
344
 
345
- bc_score = {"AI": bc_score[1].item(), "HUMAN": bc_score[0].item()}
346
  mc_score = {}
347
  label_map = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "LLAMA 2"]
348
 
 
22
  import nltk
23
  from unidecode import unidecode
24
  import time
25
+ from utils import cos_sim_torch, embed_text
26
 
27
  nltk.download('punkt')
28
 
 
57
  api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
58
  api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
59
  # api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
60
+ api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
61
+
62
  cse_id = "851813e81162b4ed4"
63
 
64
  time1 = time.perf_counter()
 
104
  + str(len(urlList))
105
  )
106
 
107
+ source_embeddings = []
108
+ for i, soup in enumerate(soups):
109
+ if soup:
110
+ page_content = soup.text
111
+ source_embeddings.append(embed_text(page_content))
112
+ else:
113
+ source_embeddings.append(None)
114
+
115
  # Populate matching scores for scrapped pages
116
  for i, soup in enumerate(soups):
117
  print(f"Analyzing {i+1} of {len(soups)} soups........................")
 
119
  page_content = soup.text
120
  for j, sent in enumerate(sentences):
121
  # score = matchingScore(sent, page_content)
122
+ # score = matchingScoreWithTimeout(sent, page_content)
123
+ score = cos_sim_torch(embed_text(sent), source_embeddings[i])
124
  ScoreArray[i][j] = score
125
 
126
  print(f"Time for matching score: {time.perf_counter()-time1}")
 
354
  bc_score_list = average_bc_scores.tolist()
355
  mc_score_list = average_mc_scores.tolist()
356
 
357
+ bc_score = {"AI": bc_score_list[1], "HUMAN": bc_score_list[0]}
358
  mc_score = {}
359
  label_map = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "LLAMA 2"]
360