aliasgerovs commited on
Commit
6e86f65
·
1 Parent(s): 6897d4d

Updated matching score logic.

Browse files
Files changed (1) hide show
  1. utils.py +25 -16
utils.py CHANGED
@@ -218,29 +218,38 @@ def matchingScoreWithTimeout(sentence, content):
218
  timer = threading.Timer(2, timeout_handler) # Set a timer for 2 seconds
219
  timer.start()
220
  try:
221
- score = matchingScore(sentence, content)
222
  timer.cancel() # Cancel the timer if calculation completes before timeout
223
  return score
224
  except TimeoutError:
225
  return 0
226
 
227
 
228
- async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  content = removePunc(content)
230
  for j, sentence in enumerate(sentences):
231
- sentence = removePunc(sentence)
232
- if sentence in content:
233
- ScoreArray[content_idx][j] = 1
234
- else:
235
- n = 5
236
- ngrams = getQueries(sentence, n)
237
- if len(ngrams) == 0:
238
- return 0
239
- matched = [x for x in ngrams if " ".join(x) in content]
240
- ScoreArray[content_idx][j] = len(matched) / len(ngrams)
241
- print(
242
- f"Analyzed {content_idx+1} of soups (SOUP SUCCEEDED)........................"
243
- )
244
  return ScoreArray
245
 
246
 
@@ -269,7 +278,7 @@ async def parallel_analyze_2(soups, sentences, ScoreArray):
269
  print(
270
  f"Analyzing {i+1} of {len(soups)} soups with {j+1} of {len(sentences)} sentences........................"
271
  )
272
- tasks[i][j] = matchingScore(sent, page_content)
273
  else:
274
  print(
275
  f"Analyzed {i+1} of soups (SOUP FAILED)........................"
 
218
  timer = threading.Timer(2, timeout_handler) # Set a timer for 2 seconds
219
  timer.start()
220
  try:
221
+ score = sentence_similarity(sentence, content)
222
  timer.cancel() # Cancel the timer if calculation completes before timeout
223
  return score
224
  except TimeoutError:
225
  return 0
226
 
227
 
228
+ # async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
229
+ # content = removePunc(content)
230
+ # for j, sentence in enumerate(sentences):
231
+ # sentence = removePunc(sentence)
232
+ # if sentence in content:
233
+ # ScoreArray[content_idx][j] = 1
234
+ # else:
235
+ # n = 5
236
+ # ngrams = getQueries(sentence, n)
237
+ # if len(ngrams) == 0:
238
+ # return 0
239
+ # matched = [x for x in ngrams if " ".join(x) in content]
240
+ # ScoreArray[content_idx][j] = len(matched) / len(ngrams)
241
+ # print(
242
+ # f"Analyzed {content_idx+1} of soups (SOUP SUCCEEDED)........................"
243
+ # )
244
+ # return ScoreArray
245
+
246
+ async def matchingScoreAsync(sentences, content, content_idx, ScoreArray, model, util):
247
  content = removePunc(content)
248
  for j, sentence in enumerate(sentences):
249
+ sentence = removePunc(sentence)
250
+ similarity_score = sentence_similarity(sentence, content, model, util)
251
+ ScoreArray[content_idx][j] = similarity_score
252
+ print(f"Analyzed {content_idx+1} of contents (CONTENT ANALYZED)........................")
 
 
 
 
 
 
 
 
 
253
  return ScoreArray
254
 
255
 
 
278
  print(
279
  f"Analyzing {i+1} of {len(soups)} soups with {j+1} of {len(sentences)} sentences........................"
280
  )
281
+ tasks[i][j] = sentence_similarity(sent, page_content)
282
  else:
283
  print(
284
  f"Analyzed {i+1} of soups (SOUP FAILED)........................"