Spaces:
Runtime error
Runtime error
as-cle-bert
commited on
Commit
•
24c6451
1
Parent(s):
dfbb358
Create websearching.py
Browse files- websearching.py +25 -0
websearching.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from googlesearch import search
|
2 |
+
from rake_nltk import Rake
|
3 |
+
from boilerpy3 import extractors
|
4 |
+
import json
|
5 |
+
|
6 |
+
extractor = extractors.ArticleExtractor()
|
7 |
+
r = Rake()
|
8 |
+
|
9 |
+
# Function to perform web search
|
10 |
+
def web_search(query, num_results=5):
|
11 |
+
search_results = []
|
12 |
+
for url in search(query, num_results=num_results):
|
13 |
+
search_results.append(url)
|
14 |
+
urls = list(set(search_results))
|
15 |
+
jsonlike = {}
|
16 |
+
for url in urls:
|
17 |
+
try:
|
18 |
+
content = extractor.get_content_from_url(url)
|
19 |
+
r.extract_keywords_from_text(content)
|
20 |
+
keywords = r.get_ranked_phrases()[:20]
|
21 |
+
jsonlike.update({url: {"keywords": keywords}})
|
22 |
+
except Exception:
|
23 |
+
continue
|
24 |
+
jsonstr = json.dumps(jsonlike)
|
25 |
+
return jsonstr
|