Hansimov commited on
Commit
f150f6b
1 Parent(s): ef3de03

:gem: [Feature] SearchResultsExtractor: related questions

Browse files
documents/search_results_extractor.py CHANGED
@@ -27,17 +27,21 @@ class SearchResultsExtractor:
27
  print(
28
  f"{title}\n" f" - {site}\n" f" - {link}\n" f" - {abstract}\n" f"\n"
29
  )
 
30
 
31
  def extract_related_questions(self):
32
- related_questions = self.soup.find_all("div", class_="related-question-pair")
33
- for question in related_questions:
 
 
 
34
  print(question)
35
- # print(question.find("a")["href"])
36
- # print(question.find("a").text)
37
 
38
  def extract(self, html_path):
39
  self.load_html(html_path)
40
  self.extract_search_results()
 
41
 
42
 
43
  if __name__ == "__main__":
 
27
  print(
28
  f"{title}\n" f" - {site}\n" f" - {link}\n" f" - {abstract}\n" f"\n"
29
  )
30
+ print(len(search_result_elements))
31
 
32
  def extract_related_questions(self):
33
+ related_question_elements = self.soup.find_all(
34
+ "div", class_="related-question-pair"
35
+ )
36
+ for question_element in related_question_elements:
37
+ question = question_element.find("span").text.strip()
38
  print(question)
39
+ print(len(related_question_elements))
 
40
 
41
  def extract(self, html_path):
42
  self.load_html(html_path)
43
  self.extract_search_results()
44
+ self.extract_related_questions()
45
 
46
 
47
  if __name__ == "__main__":