neon_arch commited on
Commit
42686b9
·
unverified ·
2 Parent(s): e8935bc ebb9e9e

Merge pull request #104 from xffxff/error_stack

Browse files

improve error handling by using `error-stack` crate

Cargo.lock CHANGED
@@ -268,6 +268,12 @@ dependencies = [
268
  "alloc-no-stdlib",
269
  ]
270
 
 
 
 
 
 
 
271
  [[package]]
272
  name = "askama_escape"
273
  version = "0.10.3"
@@ -733,6 +739,16 @@ dependencies = [
733
  "libc",
734
  ]
735
 
 
 
 
 
 
 
 
 
 
 
736
  [[package]]
737
  name = "failure"
738
  version = "0.1.8"
@@ -3373,6 +3389,7 @@ dependencies = [
3373
  "actix-files",
3374
  "actix-web",
3375
  "env_logger",
 
3376
  "fake-useragent",
3377
  "handlebars",
3378
  "log",
 
268
  "alloc-no-stdlib",
269
  ]
270
 
271
+ [[package]]
272
+ name = "anyhow"
273
+ version = "1.0.71"
274
+ source = "registry+https://github.com/rust-lang/crates.io-index"
275
+ checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
276
+
277
  [[package]]
278
  name = "askama_escape"
279
  version = "0.10.3"
 
739
  "libc",
740
  ]
741
 
742
+ [[package]]
743
+ name = "error-stack"
744
+ version = "0.3.1"
745
+ source = "registry+https://github.com/rust-lang/crates.io-index"
746
+ checksum = "5f00447f331c7f726db5b8532ebc9163519eed03c6d7c8b73c90b3ff5646ac85"
747
+ dependencies = [
748
+ "anyhow",
749
+ "rustc_version 0.4.0",
750
+ ]
751
+
752
  [[package]]
753
  name = "failure"
754
  version = "0.1.8"
 
3389
  "actix-files",
3390
  "actix-web",
3391
  "env_logger",
3392
+ "error-stack",
3393
  "fake-useragent",
3394
  "handlebars",
3395
  "log",
Cargo.toml CHANGED
@@ -23,6 +23,7 @@ redis = {version="*"}
23
  md5 = {version="*"}
24
  rand={version="*"}
25
  once_cell = {version="*"}
 
26
 
27
  [dev-dependencies]
28
  rusty-hook = "^0.11.2"
 
23
  md5 = {version="*"}
24
  rand={version="*"}
25
  once_cell = {version="*"}
26
+ error-stack = "0.3.1"
27
 
28
  [dev-dependencies]
29
  rusty-hook = "^0.11.2"
src/engines/duckduckgo.rs CHANGED
@@ -9,7 +9,9 @@ use scraper::{Html, Selector};
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
11
 
12
- use super::engine_models::EngineErrorKind;
 
 
13
 
14
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
15
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
@@ -32,7 +34,7 @@ pub async fn results(
32
  query: &str,
33
  page: u32,
34
  user_agent: &str,
35
- ) -> Result<HashMap<String, RawSearchResult>, EngineErrorKind> {
36
  // Page number can be missing or empty string and so appropriate handling is required
37
  // so that upstream server recieves valid page number.
38
  let url: String = match page {
@@ -51,33 +53,71 @@ pub async fn results(
51
 
52
  // initializing HeaderMap and adding appropriate headers.
53
  let mut header_map = HeaderMap::new();
54
- header_map.insert(USER_AGENT, user_agent.parse()?);
55
- header_map.insert(REFERER, "https://google.com/".parse()?);
56
- header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
57
- header_map.insert(COOKIE, "kl=wt-wt".parse()?);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  // fetch the html from upstream duckduckgo engine
60
  let results: String = reqwest::Client::new()
61
  .get(url)
62
- .timeout(Duration::from_secs(30))
63
  .headers(header_map) // add spoofed headers to emulate human behaviour
64
  .send()
65
- .await?
 
 
66
  .text()
67
- .await?;
 
 
68
 
69
  let document: Html = Html::parse_document(&results);
70
 
71
- let no_result: Selector = Selector::parse(".no-results")?;
 
 
72
 
73
  if document.select(&no_result).next().is_some() {
74
- return Err(EngineErrorKind::EmptyResultSet);
75
  }
76
 
77
- let results: Selector = Selector::parse(".result")?;
78
- let result_title: Selector = Selector::parse(".result__a")?;
79
- let result_url: Selector = Selector::parse(".result__url")?;
80
- let result_desc: Selector = Selector::parse(".result__snippet")?;
 
 
 
 
 
 
 
 
81
 
82
  // scrape all the results from the html
83
  Ok(document
 
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
11
 
12
+ use super::engine_models::EngineError;
13
+
14
+ use error_stack::{IntoReport, Report, Result, ResultExt};
15
 
16
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
17
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
 
34
  query: &str,
35
  page: u32,
36
  user_agent: &str,
37
+ ) -> Result<HashMap<String, RawSearchResult>, EngineError> {
38
  // Page number can be missing or empty string and so appropriate handling is required
39
  // so that upstream server recieves valid page number.
40
  let url: String = match page {
 
53
 
54
  // initializing HeaderMap and adding appropriate headers.
55
  let mut header_map = HeaderMap::new();
56
+ header_map.insert(
57
+ USER_AGENT,
58
+ user_agent
59
+ .parse()
60
+ .into_report()
61
+ .change_context(EngineError::UnexpectedError)?,
62
+ );
63
+ header_map.insert(
64
+ REFERER,
65
+ "https://google.com/"
66
+ .parse()
67
+ .into_report()
68
+ .change_context(EngineError::UnexpectedError)?,
69
+ );
70
+ header_map.insert(
71
+ CONTENT_TYPE,
72
+ "application/x-www-form-urlencoded"
73
+ .parse()
74
+ .into_report()
75
+ .change_context(EngineError::UnexpectedError)?,
76
+ );
77
+ header_map.insert(
78
+ COOKIE,
79
+ "kl=wt-wt"
80
+ .parse()
81
+ .into_report()
82
+ .change_context(EngineError::UnexpectedError)?,
83
+ );
84
 
85
  // fetch the html from upstream duckduckgo engine
86
  let results: String = reqwest::Client::new()
87
  .get(url)
88
+ .timeout(Duration::from_secs(5))
89
  .headers(header_map) // add spoofed headers to emulate human behaviour
90
  .send()
91
+ .await
92
+ .into_report()
93
+ .change_context(EngineError::RequestError)?
94
  .text()
95
+ .await
96
+ .into_report()
97
+ .change_context(EngineError::RequestError)?;
98
 
99
  let document: Html = Html::parse_document(&results);
100
 
101
+ let no_result: Selector = Selector::parse(".no-results")
102
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
103
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".no-results"))?;
104
 
105
  if document.select(&no_result).next().is_some() {
106
+ return Err(Report::new(EngineError::EmptyResultSet));
107
  }
108
 
109
+ let results: Selector = Selector::parse(".result")
110
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
111
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
112
+ let result_title: Selector = Selector::parse(".result__a")
113
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
114
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__a"))?;
115
+ let result_url: Selector = Selector::parse(".result__url")
116
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
117
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__url"))?;
118
+ let result_desc: Selector = Selector::parse(".result__snippet")
119
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
120
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__snippet"))?;
121
 
122
  // scrape all the results from the html
123
  Ok(document
src/engines/engine_models.rs CHANGED
@@ -1,8 +1,8 @@
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
- use reqwest::header::InvalidHeaderValue;
5
- use scraper::error::SelectorErrorKind;
6
 
7
  /// A custom error type used for handle engine associated errors.
8
  ///
@@ -15,73 +15,29 @@ use scraper::error::SelectorErrorKind;
15
  /// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
16
  /// all other errors occuring within the code handling the `upstream search engines`.
17
  #[derive(Debug)]
18
- pub enum EngineErrorKind {
19
- RequestError(reqwest::Error),
20
  EmptyResultSet,
21
- UnexpectedError {
22
- message: String,
23
- source: Option<Box<dyn std::error::Error>>,
24
- },
25
  }
26
 
27
- /// Implementing `Display` trait to make errors writable on the stdout and also providing/passing the
28
- /// appropriate errors that should be written to the stdout when this error is raised/encountered.
29
- impl std::fmt::Display for EngineErrorKind {
30
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31
  match self {
32
- EngineErrorKind::RequestError(request_error) => {
33
- write!(f, "Request error: {}", request_error)
34
- }
35
- EngineErrorKind::EmptyResultSet => {
36
  write!(f, "The upstream search engine returned an empty result set")
37
  }
38
- EngineErrorKind::UnexpectedError { message, source } => {
39
- write!(f, "Unexpected error: {}", message)?;
40
- if let Some(source) = source {
41
- write!(f, "\nCaused by: {}", source)?;
42
- }
43
- Ok(())
 
 
44
  }
45
  }
46
  }
47
  }
48
 
49
- /// Implementing `Error` trait to make the the `EngineErrorKind` enum an error type and
50
- /// mapping `ReqwestErrors` to `RequestError` and `UnexpectedError` errors to all other unexpected
51
- /// errors ocurring within the code handling the upstream search engines.
52
- impl std::error::Error for EngineErrorKind {
53
- fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
54
- match self {
55
- EngineErrorKind::RequestError(request_error) => Some(request_error),
56
- EngineErrorKind::UnexpectedError { source, .. } => source.as_deref().map(|s| s),
57
- _ => None,
58
- }
59
- }
60
- }
61
-
62
- /// Implementing `From` trait to map the `SelectorErrorKind` to `UnexpectedError` variant.
63
- impl From<SelectorErrorKind<'_>> for EngineErrorKind {
64
- fn from(err: SelectorErrorKind<'_>) -> Self {
65
- Self::UnexpectedError {
66
- message: err.to_string(),
67
- source: None,
68
- }
69
- }
70
- }
71
-
72
- /// Implementing `From` trait to map the `InvalidHeaderValue` to `UnexpectedError` variant.
73
- impl From<InvalidHeaderValue> for EngineErrorKind {
74
- fn from(err: InvalidHeaderValue) -> Self {
75
- Self::UnexpectedError {
76
- message: err.to_string(),
77
- source: Some(Box::new(err)),
78
- }
79
- }
80
- }
81
-
82
- /// Implementing `From` trait to map all `reqwest::Error` to `UnexpectedError` variant.
83
- impl From<reqwest::Error> for EngineErrorKind {
84
- fn from(err: reqwest::Error) -> Self {
85
- Self::RequestError(err)
86
- }
87
- }
 
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
+ use error_stack::Context;
5
+ use std::fmt;
6
 
7
  /// A custom error type used for handle engine associated errors.
8
  ///
 
15
  /// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
16
  /// all other errors occuring within the code handling the `upstream search engines`.
17
  #[derive(Debug)]
18
+ pub enum EngineError {
 
19
  EmptyResultSet,
20
+ RequestError,
21
+ UnexpectedError,
 
 
22
  }
23
 
24
+ impl fmt::Display for EngineError {
25
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 
 
26
  match self {
27
+ EngineError::EmptyResultSet => {
 
 
 
28
  write!(f, "The upstream search engine returned an empty result set")
29
  }
30
+ EngineError::RequestError => {
31
+ write!(
32
+ f,
33
+ "Error occurred while requesting data from upstream search engine"
34
+ )
35
+ }
36
+ EngineError::UnexpectedError => {
37
+ write!(f, "An unexpected error occurred while processing the data")
38
  }
39
  }
40
  }
41
  }
42
 
43
+ impl Context for EngineError {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/engines/searx.rs CHANGED
@@ -8,7 +8,8 @@ use std::collections::HashMap;
8
 
9
  use crate::search_results_handler::aggregation_models::RawSearchResult;
10
 
11
- use super::engine_models::EngineErrorKind;
 
12
 
13
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
14
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
@@ -31,43 +32,76 @@ pub async fn results(
31
  query: &str,
32
  page: u32,
33
  user_agent: &str,
34
- ) -> Result<HashMap<String, RawSearchResult>, EngineErrorKind> {
35
  // Page number can be missing or empty string and so appropriate handling is required
36
  // so that upstream server recieves valid page number.
37
  let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
38
 
39
  // initializing headers and adding appropriate headers.
40
  let mut header_map = HeaderMap::new();
41
- header_map.insert(USER_AGENT, user_agent.parse()?);
42
- header_map.insert(REFERER, "https://google.com/".parse()?);
43
- header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
44
- header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  // fetch the html from upstream searx instance engine
47
  let results: String = reqwest::Client::new()
48
  .get(url)
49
  .headers(header_map) // add spoofed headers to emulate human behaviours.
50
  .send()
51
- .await?
 
 
52
  .text()
53
- .await?;
 
 
54
 
55
  let document: Html = Html::parse_document(&results);
56
 
57
- let no_result: Selector = Selector::parse("#urls>.dialog-error>p")?;
 
 
58
 
59
  if let Some(no_result_msg) = document.select(&no_result).nth(1) {
60
  if no_result_msg.inner_html()
61
  == "we didn't find any results. Please use another query or search in more categories"
62
  {
63
- return Err(EngineErrorKind::EmptyResultSet);
64
  }
65
  }
66
 
67
- let results: Selector = Selector::parse(".result")?;
68
- let result_title: Selector = Selector::parse("h3>a")?;
69
- let result_url: Selector = Selector::parse("h3>a")?;
70
- let result_desc: Selector = Selector::parse(".content")?;
 
 
 
 
 
 
 
 
 
71
 
72
  // scrape all the results from the html
73
  Ok(document
 
8
 
9
  use crate::search_results_handler::aggregation_models::RawSearchResult;
10
 
11
+ use super::engine_models::EngineError;
12
+ use error_stack::{IntoReport, Report, Result, ResultExt};
13
 
14
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
15
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
 
32
  query: &str,
33
  page: u32,
34
  user_agent: &str,
35
+ ) -> Result<HashMap<String, RawSearchResult>, EngineError> {
36
  // Page number can be missing or empty string and so appropriate handling is required
37
  // so that upstream server recieves valid page number.
38
  let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
39
 
40
  // initializing headers and adding appropriate headers.
41
  let mut header_map = HeaderMap::new();
42
+ header_map.insert(
43
+ USER_AGENT,
44
+ user_agent
45
+ .parse()
46
+ .into_report()
47
+ .change_context(EngineError::UnexpectedError)?,
48
+ );
49
+ header_map.insert(
50
+ REFERER,
51
+ "https://google.com/"
52
+ .parse()
53
+ .into_report()
54
+ .change_context(EngineError::UnexpectedError)?,
55
+ );
56
+ header_map.insert(
57
+ CONTENT_TYPE,
58
+ "application/x-www-form-urlencoded"
59
+ .parse()
60
+ .into_report()
61
+ .change_context(EngineError::UnexpectedError)?,
62
+ );
63
+ header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
64
 
65
  // fetch the html from upstream searx instance engine
66
  let results: String = reqwest::Client::new()
67
  .get(url)
68
  .headers(header_map) // add spoofed headers to emulate human behaviours.
69
  .send()
70
+ .await
71
+ .into_report()
72
+ .change_context(EngineError::RequestError)?
73
  .text()
74
+ .await
75
+ .into_report()
76
+ .change_context(EngineError::RequestError)?;
77
 
78
  let document: Html = Html::parse_document(&results);
79
 
80
+ let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
81
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
82
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", "#urls>.dialog-error>p"))?;
83
 
84
  if let Some(no_result_msg) = document.select(&no_result).nth(1) {
85
  if no_result_msg.inner_html()
86
  == "we didn't find any results. Please use another query or search in more categories"
87
  {
88
+ return Err(Report::new(EngineError::EmptyResultSet));
89
  }
90
  }
91
 
92
+ let results: Selector = Selector::parse(".result")
93
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
94
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
95
+ let result_title: Selector = Selector::parse("h3>a")
96
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
97
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
98
+ let result_url: Selector = Selector::parse("h3>a")
99
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
100
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
101
+
102
+ let result_desc: Selector = Selector::parse(".content")
103
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
104
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".content"))?;
105
 
106
  // scrape all the results from the html
107
  Ok(document
src/search_results_handler/aggregator.rs CHANGED
@@ -58,8 +58,19 @@ pub async fn aggregate(
58
  searx::results(query, page, &user_agent)
59
  );
60
 
61
- let ddg_map_results: HashMap<String, RawSearchResult> = ddg_map_results?;
62
- let searx_map_results: HashMap<String, RawSearchResult> = searx_map_results?;
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  result_map.extend(ddg_map_results);
65
 
 
58
  searx::results(query, page, &user_agent)
59
  );
60
 
61
+ let ddg_map_results = ddg_map_results.unwrap_or_else(|e| {
62
+ if debug {
63
+ log::error!("Error fetching results from DuckDuckGo: {:?}", e);
64
+ }
65
+ HashMap::new()
66
+ });
67
+
68
+ let searx_map_results = searx_map_results.unwrap_or_else(|e| {
69
+ if debug {
70
+ log::error!("Error fetching results from Searx: {:?}", e);
71
+ }
72
+ HashMap::new()
73
+ });
74
 
75
  result_map.extend(ddg_map_results);
76