File size: 815 Bytes
24c6451
 
78319c1
24c6451
 
 
78319c1
 
 
 
24c6451
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from googlesearch import search
from rake_nltk import Rake
import nltk
from boilerpy3 import extractors
import json


nltk.download("punkt")
nltk.download("stopwords")

extractor = extractors.ArticleExtractor()
r = Rake()

# Function to perform web search
def web_search(query, num_results=5):
    search_results = []
    for url in search(query, num_results=num_results):
        search_results.append(url)
    urls = list(set(search_results))
    jsonlike = {}
    for url in urls:
        try:
            content = extractor.get_content_from_url(url)
            r.extract_keywords_from_text(content)
            keywords = r.get_ranked_phrases()[:20]
            jsonlike.update({url: {"keywords": keywords}})
        except Exception:
            continue
    jsonstr = json.dumps(jsonlike)
    return jsonstr