NCTCMumbai commited on
Commit
adc862d
1 Parent(s): 62008fe

Update middlewares/search_client.py

Browse files
Files changed (1) hide show
  1. middlewares/search_client.py +9 -20
middlewares/search_client.py CHANGED
@@ -2,7 +2,7 @@ import requests
2
  from bs4 import BeautifulSoup
3
  import re
4
  import concurrent.futures
5
- from GoogleNews import GoogleNews
6
 
7
 
8
  class SearchClient:
@@ -43,25 +43,13 @@ class SearchClient:
43
  print(f"Error fetching data from {link}: {e}")
44
  return results
45
 
46
- def _google_search(self, start_date,end_date, query, n_crawl):
47
- keywords_lst=query.split(',')
48
- for keyword in keywords_lst:
49
- googlenews = GoogleNews(start=start_date,end=end_date)
50
- googlenews = GoogleNews(lang='en', region='IN')
51
- googlenews.get_news(keyword)
52
- results=googlenews.results
53
- texts = googlenews.get_texts() # List of news texts
54
- links = googlenews.get_links() # List of news links
55
 
56
- if len(texts)<n_crawl:
57
- data = {'Keyword': [keyword]*len(texts), 'Links': links, 'Text': texts}
58
- else:
59
- data = {'Keyword': [keyword]*(n_crawl), 'Links': links[:n_crawl], 'Text': texts[:n_crawl]}
60
 
61
- results=data['Links']
62
- corrected_urls = ["https://" + url for url in results]
63
-
64
- text_results = self._fetch_text_from_links(corrected_urls)
65
  return text_results
66
 
67
  def _bing_search(self, query, n_crawl):
@@ -87,8 +75,9 @@ class SearchClient:
87
  else:
88
  return "Invalid vendor"
89
 
90
- def search_google(self,start_date,end_date, query, n_crawl):
91
  if self.vendor == "google":
92
- return self._google_search(start_date,end_date,query, n_crawl)
93
  else:
94
  return "Invalid vendor"
 
 
2
  from bs4 import BeautifulSoup
3
  import re
4
  import concurrent.futures
5
+ from googlesearch import search
6
 
7
 
8
  class SearchClient:
 
43
  print(f"Error fetching data from {link}: {e}")
44
  return results
45
 
46
+ def _google_search(self,query, n_crawl):
47
+ search_results = search(query, stop=n_crawl, lang='en', country='IN')
 
 
 
 
 
 
 
48
 
49
+ # Convert the search results to a list
50
+ links = list(search_results)
 
 
51
 
52
+ text_results = self._fetch_text_from_links(links)
 
 
 
53
  return text_results
54
 
55
  def _bing_search(self, query, n_crawl):
 
75
  else:
76
  return "Invalid vendor"
77
 
78
+ def search_google(self, query, n_crawl):
79
  if self.vendor == "google":
80
+ return self._google_search(query, n_crawl)
81
  else:
82
  return "Invalid vendor"
83
+