neon_arch commited on
Commit
e1e426c
1 Parent(s): f11d35f

✨ feat(engine): provide startpage search engine (#314)

Browse files
src/engines/mod.rs CHANGED
@@ -7,3 +7,4 @@ pub mod brave;
7
  pub mod duckduckgo;
8
  pub mod search_result_parser;
9
  pub mod searx;
 
 
7
  pub mod duckduckgo;
8
  pub mod search_result_parser;
9
  pub mod searx;
10
+ pub mod startpage;
src/engines/startpage.rs ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! The `duckduckgo` module handles the scraping of results from the duckduckgo search engine
2
+ //! by querying the upstream duckduckgo search engine with user provided query and with a page
3
+ //! number if provided.
4
+
5
+ use std::collections::HashMap;
6
+
7
+ use reqwest::header::HeaderMap;
8
+ use reqwest::Client;
9
+ use scraper::Html;
10
+
11
+ use crate::models::aggregation_models::SearchResult;
12
+
13
+ use crate::models::engine_models::{EngineError, SearchEngine};
14
+
15
+ use error_stack::{Report, Result, ResultExt};
16
+
17
+ use super::search_result_parser::SearchResultParser;
18
+
19
+ /// A new Startpage engine type defined in-order to implement the `SearchEngine` trait which allows to
20
+ /// reduce code duplication as well as allows to create vector of different search engines easily.
21
+ pub struct Startpage {
22
+ /// The parser, used to interpret the search result.
23
+ parser: SearchResultParser,
24
+ }
25
+
26
+ impl Startpage {
27
+ /// Creates the Startpage parser.
28
+ pub fn new() -> Result<Self, EngineError> {
29
+ Ok(Self {
30
+ parser: SearchResultParser::new(
31
+ ".no-results",
32
+ ".w-gl__result__main",
33
+ ".w-gl__result-second-line-container>.w-gl__result-title>h3",
34
+ ".w-gl__result-url",
35
+ ".w-gl__description",
36
+ )?,
37
+ })
38
+ }
39
+ }
40
+
41
+ #[async_trait::async_trait]
42
+ impl SearchEngine for Startpage {
43
+ async fn results(
44
+ &self,
45
+ query: &str,
46
+ page: u32,
47
+ user_agent: &str,
48
+ client: &Client,
49
+ _safe_search: u8,
50
+ ) -> Result<HashMap<String, SearchResult>, EngineError> {
51
+ // Page number can be missing or empty string and so appropriate handling is required
52
+ // so that upstream server recieves valid page number.
53
+ let url: String = match page {
54
+ 1 | 0 => {
55
+ format!("https://startpage.com/do/dsearch?q={query}&num=10&start=0")
56
+ }
57
+ _ => {
58
+ format!(
59
+ "https://startpage.com/do/dsearch?q={query}&num=10&start={}",
60
+ page * 10,
61
+ )
62
+ }
63
+ };
64
+
65
+ // initializing HeaderMap and adding appropriate headers.
66
+ let header_map = HeaderMap::try_from(&HashMap::from([
67
+ ("USER_AGENT".to_string(), user_agent.to_string()),
68
+ ("REFERER".to_string(), "https://google.com/".to_string()),
69
+ (
70
+ "CONTENT_TYPE".to_string(),
71
+ "application/x-www-form-urlencoded".to_string(),
72
+ ),
73
+ ("COOKIE".to_string(), "preferences=connect_to_serverEEE0N1Ndate_timeEEEworldN1Ndisable_family_filterEEE0N1Ndisable_open_in_new_windowEEE0N1Nenable_post_methodEEE1N1Nenable_proxy_safety_suggestEEE1N1Nenable_stay_controlEEE0N1Ninstant_answersEEE1N1Nlang_homepageEEEs%2Fnight%2FenN1NlanguageEEEenglishN1Nlanguage_uiEEEenglishN1Nnum_of_resultsEEE10N1Nsearch_results_regionEEEallN1NsuggestionsEEE1N1Nwt_unitEEEcelsius".to_string()),
74
+ ]))
75
+ .change_context(EngineError::UnexpectedError)?;
76
+
77
+ let document: Html = Html::parse_document(
78
+ &Startpage::fetch_html_from_upstream(self, &url, header_map, client).await?,
79
+ );
80
+
81
+ if self.parser.parse_for_no_results(&document).next().is_some() {
82
+ return Err(Report::new(EngineError::EmptyResultSet));
83
+ }
84
+
85
+ // scrape all the results from the html
86
+ self.parser
87
+ .parse_for_results(&document, |title, url, desc| {
88
+ Some(SearchResult::new(
89
+ title.inner_html().trim(),
90
+ &format!("{}", url.inner_html().trim()),
91
+ desc.inner_html().trim(),
92
+ &["startpage"],
93
+ ))
94
+ })
95
+ }
96
+ }
src/models/engine_models.rs CHANGED
@@ -154,6 +154,10 @@ impl EngineHandler {
154
  let engine = crate::engines::brave::Brave::new()?;
155
  ("brave", Box::new(engine))
156
  }
 
 
 
 
157
  _ => {
158
  return Err(Report::from(EngineError::NoSuchEngineFound(
159
  engine_name.to_string(),
 
154
  let engine = crate::engines::brave::Brave::new()?;
155
  ("brave", Box::new(engine))
156
  }
157
+ "startpage" => {
158
+ let engine = crate::engines::startpage::Startpage::new()?;
159
+ ("startpage", Box::new(engine))
160
+ }
161
  _ => {
162
  return Err(Report::from(EngineError::NoSuchEngineFound(
163
  engine_name.to_string(),