Spaces:

polygraf-ai
/

copyright_checker

Runtime error

App Files Files Community

aliasgerovs commited on Mar 1, 2024

Commit

6e86f65

1 Parent(s): 6897d4d

Updated matching score logic.

Browse files

Files changed (1) hide show

utils.py +25 -16

utils.py CHANGED Viewed

@@ -218,29 +218,38 @@ def matchingScoreWithTimeout(sentence, content):
     timer = threading.Timer(2, timeout_handler)  # Set a timer for 2 seconds
     timer.start()
     try:
-        score = matchingScore(sentence, content)
         timer.cancel()  # Cancel the timer if calculation completes before timeout
         return score
     except TimeoutError:
         return 0
-async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
     content = removePunc(content)
     for j, sentence in enumerate(sentences):
-        sentence = removePunc(sentence)
-        if sentence in content:
-            ScoreArray[content_idx][j] = 1
-        else:
-            n = 5
-            ngrams = getQueries(sentence, n)
-            if len(ngrams) == 0:
-                return 0
-            matched = [x for x in ngrams if " ".join(x) in content]
-            ScoreArray[content_idx][j] = len(matched) / len(ngrams)
-    print(
-        f"Analyzed {content_idx+1} of soups (SOUP SUCCEEDED)........................"
-    )
     return ScoreArray
@@ -269,7 +278,7 @@ async def parallel_analyze_2(soups, sentences, ScoreArray):
                 print(
                     f"Analyzing {i+1} of {len(soups)} soups with {j+1} of {len(sentences)} sentences........................"
                 )
-                tasks[i][j] = matchingScore(sent, page_content)
         else:
             print(
                 f"Analyzed {i+1} of soups (SOUP FAILED)........................"

     timer = threading.Timer(2, timeout_handler)  # Set a timer for 2 seconds
     timer.start()
     try:
+        score = sentence_similarity(sentence, content)
         timer.cancel()  # Cancel the timer if calculation completes before timeout
         return score
     except TimeoutError:
         return 0
+# async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
+#     content = removePunc(content)
+#     for j, sentence in enumerate(sentences):
+#         sentence = removePunc(sentence)
+#         if sentence in content:
+#             ScoreArray[content_idx][j] = 1
+#         else:
+#             n = 5
+#             ngrams = getQueries(sentence, n)
+#             if len(ngrams) == 0:
+#                 return 0
+#             matched = [x for x in ngrams if " ".join(x) in content]
+#             ScoreArray[content_idx][j] = len(matched) / len(ngrams)
+#     print(
+#         f"Analyzed {content_idx+1} of soups (SOUP SUCCEEDED)........................"
+#     )
+#     return ScoreArray
+async def matchingScoreAsync(sentences, content, content_idx, ScoreArray, model, util):
     content = removePunc(content)
     for j, sentence in enumerate(sentences):
+        sentence = removePunc(sentence)
+        similarity_score = sentence_similarity(sentence, content, model, util)
+        ScoreArray[content_idx][j] = similarity_score
+    print(f"Analyzed {content_idx+1} of contents (CONTENT ANALYZED)........................")
     return ScoreArray
                 print(
                     f"Analyzing {i+1} of {len(soups)} soups with {j+1} of {len(sentences)} sentences........................"
                 )
+                tasks[i][j] = sentence_similarity(sent, page_content)
         else:
             print(
                 f"Analyzed {i+1} of soups (SOUP FAILED)........................"