Spaces:
Running
Running
Update utils.py
Browse files
utils.py
CHANGED
@@ -10,7 +10,7 @@ import numpy as np
|
|
10 |
import asyncio
|
11 |
import nltk
|
12 |
from sentence_transformers import SentenceTransformer, util
|
13 |
-
import
|
14 |
|
15 |
nltk.download('punkt')
|
16 |
|
@@ -193,33 +193,38 @@ async def parallel_scrap(urls):
|
|
193 |
class TimeoutError(Exception):
|
194 |
pass
|
195 |
|
196 |
-
|
197 |
-
def signal_handler(signum, frame):
|
198 |
-
raise TimeoutError("Function timed out")
|
199 |
|
200 |
|
201 |
def matchingScore(sentence, content):
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
try:
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
content = removePunc(content)
|
210 |
-
if sentence in content:
|
211 |
-
return 1
|
212 |
-
else:
|
213 |
-
n = 5
|
214 |
-
ngrams = getQueries(sentence, n)
|
215 |
-
print("ngrams done.......")
|
216 |
-
if len(ngrams) == 0:
|
217 |
-
return 0
|
218 |
-
matched = [x for x in ngrams if " ".join(x) in content]
|
219 |
-
signal.alarm(0) # Cancel the alarm if calculation completes before timeout
|
220 |
-
return len(matched) / len(ngrams)
|
221 |
except TimeoutError:
|
222 |
-
return 0
|
223 |
|
224 |
|
225 |
async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
|
|
|
10 |
import asyncio
|
11 |
import nltk
|
12 |
from sentence_transformers import SentenceTransformer, util
|
13 |
+
import threading
|
14 |
|
15 |
nltk.download('punkt')
|
16 |
|
|
|
193 |
class TimeoutError(Exception):
|
194 |
pass
|
195 |
|
|
|
|
|
|
|
196 |
|
197 |
|
198 |
def matchingScore(sentence, content):
|
199 |
+
if sentence in content:
|
200 |
+
return 1
|
201 |
+
sentence = removePunc(sentence)
|
202 |
+
content = removePunc(content)
|
203 |
+
if sentence in content:
|
204 |
+
return 1
|
205 |
+
else:
|
206 |
+
n = 5
|
207 |
+
ngrams = getQueries(sentence, n)
|
208 |
+
print("ngrams done.......")
|
209 |
+
if len(ngrams) == 0:
|
210 |
+
return 0
|
211 |
+
matched = [x for x in ngrams if " ".join(x) in content]
|
212 |
+
return len(matched) / len(ngrams)
|
213 |
+
|
214 |
+
|
215 |
+
def matchingScoreWithTimeout(sentence, content):
|
216 |
+
def timeout_handler():
|
217 |
+
raise TimeoutError("Function timed out")
|
218 |
+
|
219 |
+
timer = threading.Timer(10, timeout_handler) # Set a timer for 10 seconds
|
220 |
+
timer.start()
|
221 |
|
222 |
try:
|
223 |
+
score = matchingScore(sentence, content)
|
224 |
+
timer.cancel() # Cancel the timer if calculation completes before timeout
|
225 |
+
return score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
except TimeoutError:
|
227 |
+
return 0
|
228 |
|
229 |
|
230 |
async def matchingScoreAsync(sentences, content, content_idx, ScoreArray):
|