alamin655 commited on
Commit
b3b914d
·
unverified ·
2 Parent(s): 9cded97 d14a5d4

Merge pull request #195 from neon-mmd/optimize-and-make-code-idiomatic-3

Browse files
.gitignore CHANGED
@@ -4,3 +4,4 @@ package-lock.json
4
  dump.rdb
5
  .vscode
6
  megalinter-reports/
 
 
4
  dump.rdb
5
  .vscode
6
  megalinter-reports/
7
+ dhat-heap.json
Cargo.lock CHANGED
@@ -830,6 +830,22 @@ dependencies = [
830
  "syn 1.0.109",
831
  ]
832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
  [[package]]
834
  name = "digest"
835
  version = "0.10.7"
@@ -1738,6 +1754,16 @@ dependencies = [
1738
  "adler",
1739
  ]
1740
 
 
 
 
 
 
 
 
 
 
 
1741
  [[package]]
1742
  name = "mio"
1743
  version = "0.6.23"
@@ -2891,6 +2917,9 @@ name = "smallvec"
2891
  version = "1.11.0"
2892
  source = "registry+https://github.com/rust-lang/crates.io-index"
2893
  checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
 
 
 
2894
 
2895
  [[package]]
2896
  name = "socket2"
@@ -3032,6 +3061,16 @@ dependencies = [
3032
  "unicode-xid 0.2.4",
3033
  ]
3034
 
 
 
 
 
 
 
 
 
 
 
3035
  [[package]]
3036
  name = "tempfile"
3037
  version = "3.8.0"
@@ -3085,6 +3124,12 @@ dependencies = [
3085
  "syn 2.0.29",
3086
  ]
3087
 
 
 
 
 
 
 
3088
  [[package]]
3089
  name = "time"
3090
  version = "0.1.45"
@@ -3634,6 +3679,7 @@ dependencies = [
3634
  "actix-web",
3635
  "async-trait",
3636
  "criterion",
 
3637
  "env_logger",
3638
  "error-stack",
3639
  "fake-useragent",
@@ -3651,6 +3697,7 @@ dependencies = [
3651
  "scraper",
3652
  "serde",
3653
  "serde_json",
 
3654
  "tempfile",
3655
  "tokio 1.32.0",
3656
  ]
 
830
  "syn 1.0.109",
831
  ]
832
 
833
+ [[package]]
834
+ name = "dhat"
835
+ version = "0.3.2"
836
+ source = "registry+https://github.com/rust-lang/crates.io-index"
837
+ checksum = "4f2aaf837aaf456f6706cb46386ba8dffd4013a757e36f4ea05c20dd46b209a3"
838
+ dependencies = [
839
+ "backtrace",
840
+ "lazy_static",
841
+ "mintex",
842
+ "parking_lot 0.12.1",
843
+ "rustc-hash",
844
+ "serde",
845
+ "serde_json",
846
+ "thousands",
847
+ ]
848
+
849
  [[package]]
850
  name = "digest"
851
  version = "0.10.7"
 
1754
  "adler",
1755
  ]
1756
 
1757
+ [[package]]
1758
+ name = "mintex"
1759
+ version = "0.1.2"
1760
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1761
+ checksum = "fd7c5ba1c3b5a23418d7bbf98c71c3d4946a0125002129231da8d6b723d559cb"
1762
+ dependencies = [
1763
+ "once_cell",
1764
+ "sys-info",
1765
+ ]
1766
+
1767
  [[package]]
1768
  name = "mio"
1769
  version = "0.6.23"
 
2917
  version = "1.11.0"
2918
  source = "registry+https://github.com/rust-lang/crates.io-index"
2919
  checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
2920
+ dependencies = [
2921
+ "serde",
2922
+ ]
2923
 
2924
  [[package]]
2925
  name = "socket2"
 
3061
  "unicode-xid 0.2.4",
3062
  ]
3063
 
3064
+ [[package]]
3065
+ name = "sys-info"
3066
+ version = "0.9.1"
3067
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3068
+ checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
3069
+ dependencies = [
3070
+ "cc",
3071
+ "libc",
3072
+ ]
3073
+
3074
  [[package]]
3075
  name = "tempfile"
3076
  version = "3.8.0"
 
3124
  "syn 2.0.29",
3125
  ]
3126
 
3127
+ [[package]]
3128
+ name = "thousands"
3129
+ version = "0.2.0"
3130
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3131
+ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
3132
+
3133
  [[package]]
3134
  name = "time"
3135
  version = "0.1.45"
 
3679
  "actix-web",
3680
  "async-trait",
3681
  "criterion",
3682
+ "dhat",
3683
  "env_logger",
3684
  "error-stack",
3685
  "fake-useragent",
 
3697
  "scraper",
3698
  "serde",
3699
  "serde_json",
3700
+ "smallvec 1.11.0",
3701
  "tempfile",
3702
  "tokio 1.32.0",
3703
  ]
Cargo.toml CHANGED
@@ -8,7 +8,7 @@ license = "AGPL-3.0"
8
 
9
  [dependencies]
10
  reqwest = {version="0.11.20",features=["json"]}
11
- tokio = {version="1.32.0",features=["full"]}
12
  serde = {version="1.0.188",features=["derive"]}
13
  handlebars = { version = "4.3.7", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
@@ -28,6 +28,8 @@ error-stack = {version="0.4.0"}
28
  async-trait = {version="0.1.73"}
29
  regex = {version="1.9.4", features=["perf"]}
30
  futures = {version="0.3.28"}
 
 
31
 
32
  [dev-dependencies]
33
  rusty-hook = "^0.11.2"
@@ -48,7 +50,8 @@ rpath = false
48
 
49
  [profile.release]
50
  opt-level = 3
51
- debug = false
 
52
  split-debuginfo = '...'
53
  debug-assertions = false
54
  overflow-checks = false
@@ -58,3 +61,6 @@ incremental = false
58
  codegen-units = 16
59
  rpath = false
60
  strip = "debuginfo"
 
 
 
 
8
 
9
  [dependencies]
10
  reqwest = {version="0.11.20",features=["json"]}
11
+ tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
12
  serde = {version="1.0.188",features=["derive"]}
13
  handlebars = { version = "4.3.7", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
 
28
  async-trait = {version="0.1.73"}
29
  regex = {version="1.9.4", features=["perf"]}
30
  futures = {version="0.3.28"}
31
+ dhat = {version="0.3.2", optional = true}
32
+ smallvec = {version="1.11.0", features=["union", "serde"]}
33
 
34
  [dev-dependencies]
35
  rusty-hook = "^0.11.2"
 
50
 
51
  [profile.release]
52
  opt-level = 3
53
+ debug = false # This should only be commented when testing with dhat profiler
54
+ # debug = 1 # This should only be uncommented when testing with dhat profiler
55
  split-debuginfo = '...'
56
  debug-assertions = false
57
  overflow-checks = false
 
61
  codegen-units = 16
62
  rpath = false
63
  strip = "debuginfo"
64
+
65
+ [features]
66
+ dhat-heap = ["dep:dhat"]
src/engines/duckduckgo.rs CHANGED
@@ -4,14 +4,14 @@
4
 
5
  use std::collections::HashMap;
6
 
7
- use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
  use crate::results::aggregation_models::SearchResult;
11
 
12
  use super::engine_models::{EngineError, SearchEngine};
13
 
14
- use error_stack::{IntoReport, Report, Result, ResultExt};
15
 
16
  /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
17
  /// reduce code duplication as well as allows to create vector of different search engines easily.
@@ -39,9 +39,9 @@ impl SearchEngine for DuckDuckGo {
39
  /// or HeaderMap fails to initialize.
40
  async fn results(
41
  &self,
42
- query: String,
43
  page: u32,
44
- user_agent: String,
45
  request_timeout: u8,
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
@@ -61,38 +61,19 @@ impl SearchEngine for DuckDuckGo {
61
  };
62
 
63
  // initializing HeaderMap and adding appropriate headers.
64
- let mut header_map = HeaderMap::new();
65
- header_map.insert(
66
- USER_AGENT,
67
- user_agent
68
- .parse()
69
- .into_report()
70
- .change_context(EngineError::UnexpectedError)?,
71
- );
72
- header_map.insert(
73
- REFERER,
74
- "https://google.com/"
75
- .parse()
76
- .into_report()
77
- .change_context(EngineError::UnexpectedError)?,
78
- );
79
- header_map.insert(
80
- CONTENT_TYPE,
81
- "application/x-www-form-urlencoded"
82
- .parse()
83
- .into_report()
84
- .change_context(EngineError::UnexpectedError)?,
85
- );
86
- header_map.insert(
87
- COOKIE,
88
- "kl=wt-wt"
89
- .parse()
90
- .into_report()
91
- .change_context(EngineError::UnexpectedError)?,
92
- );
93
 
94
  let document: Html = Html::parse_document(
95
- &DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
96
  );
97
 
98
  let no_result: Selector = Selector::parse(".no-results")
@@ -126,8 +107,7 @@ impl SearchEngine for DuckDuckGo {
126
  .next()
127
  .unwrap()
128
  .inner_html()
129
- .trim()
130
- .to_string(),
131
  format!(
132
  "https://{}",
133
  result
@@ -136,15 +116,15 @@ impl SearchEngine for DuckDuckGo {
136
  .unwrap()
137
  .inner_html()
138
  .trim()
139
- ),
 
140
  result
141
  .select(&result_desc)
142
  .next()
143
  .unwrap()
144
  .inner_html()
145
- .trim()
146
- .to_string(),
147
- vec!["duckduckgo".to_string()],
148
  )
149
  })
150
  .map(|search_result| (search_result.url.clone(), search_result))
 
4
 
5
  use std::collections::HashMap;
6
 
7
+ use reqwest::header::HeaderMap;
8
  use scraper::{Html, Selector};
9
 
10
  use crate::results::aggregation_models::SearchResult;
11
 
12
  use super::engine_models::{EngineError, SearchEngine};
13
 
14
+ use error_stack::{Report, Result, ResultExt};
15
 
16
  /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
17
  /// reduce code duplication as well as allows to create vector of different search engines easily.
 
39
  /// or HeaderMap fails to initialize.
40
  async fn results(
41
  &self,
42
+ query: &str,
43
  page: u32,
44
+ user_agent: &str,
45
  request_timeout: u8,
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
 
61
  };
62
 
63
  // initializing HeaderMap and adding appropriate headers.
64
+ let header_map = HeaderMap::try_from(&HashMap::from([
65
+ ("USER_AGENT".to_string(), user_agent.to_string()),
66
+ ("REFERER".to_string(), "https://google.com/".to_string()),
67
+ (
68
+ "CONTENT_TYPE".to_string(),
69
+ "application/x-www-form-urlencoded".to_string(),
70
+ ),
71
+ ("COOKIE".to_string(), "kl=wt-wt".to_string()),
72
+ ]))
73
+ .change_context(EngineError::UnexpectedError)?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  let document: Html = Html::parse_document(
76
+ &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
77
  );
78
 
79
  let no_result: Selector = Selector::parse(".no-results")
 
107
  .next()
108
  .unwrap()
109
  .inner_html()
110
+ .trim(),
 
111
  format!(
112
  "https://{}",
113
  result
 
116
  .unwrap()
117
  .inner_html()
118
  .trim()
119
+ )
120
+ .as_str(),
121
  result
122
  .select(&result_desc)
123
  .next()
124
  .unwrap()
125
  .inner_html()
126
+ .trim(),
127
+ &["duckduckgo"],
 
128
  )
129
  })
130
  .map(|search_result| (search_result.url.clone(), search_result))
src/engines/engine_models.rs CHANGED
@@ -2,7 +2,7 @@
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
  use crate::results::aggregation_models::SearchResult;
5
- use error_stack::{IntoReport, Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
@@ -48,7 +48,7 @@ impl error_stack::Context for EngineError {}
48
  pub trait SearchEngine: Sync + Send {
49
  async fn fetch_html_from_upstream(
50
  &self,
51
- url: String,
52
  header_map: reqwest::header::HeaderMap,
53
  request_timeout: u8,
54
  ) -> Result<String, EngineError> {
@@ -59,19 +59,17 @@ pub trait SearchEngine: Sync + Send {
59
  .headers(header_map) // add spoofed headers to emulate human behavior
60
  .send()
61
  .await
62
- .into_report()
63
  .change_context(EngineError::RequestError)?
64
  .text()
65
  .await
66
- .into_report()
67
  .change_context(EngineError::RequestError)?)
68
  }
69
 
70
  async fn results(
71
  &self,
72
- query: String,
73
  page: u32,
74
- user_agent: String,
75
  request_timeout: u8,
76
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
77
  }
 
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
  use crate::results::aggregation_models::SearchResult;
5
+ use error_stack::{Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
 
48
  pub trait SearchEngine: Sync + Send {
49
  async fn fetch_html_from_upstream(
50
  &self,
51
+ url: &str,
52
  header_map: reqwest::header::HeaderMap,
53
  request_timeout: u8,
54
  ) -> Result<String, EngineError> {
 
59
  .headers(header_map) // add spoofed headers to emulate human behavior
60
  .send()
61
  .await
 
62
  .change_context(EngineError::RequestError)?
63
  .text()
64
  .await
 
65
  .change_context(EngineError::RequestError)?)
66
  }
67
 
68
  async fn results(
69
  &self,
70
+ query: &str,
71
  page: u32,
72
+ user_agent: &str,
73
  request_timeout: u8,
74
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
75
  }
src/engines/searx.rs CHANGED
@@ -2,14 +2,14 @@
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
- use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
  use crate::results::aggregation_models::SearchResult;
10
 
11
  use super::engine_models::{EngineError, SearchEngine};
12
- use error_stack::{IntoReport, Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
15
  /// reduce code duplication as well as allows to create vector of different search engines easily.
@@ -38,9 +38,9 @@ impl SearchEngine for Searx {
38
 
39
  async fn results(
40
  &self,
41
- query: String,
42
  page: u32,
43
- user_agent: String,
44
  request_timeout: u8,
45
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
46
  // Page number can be missing or empty string and so appropriate handling is required
@@ -51,32 +51,16 @@ impl SearchEngine for Searx {
51
  };
52
 
53
  // initializing headers and adding appropriate headers.
54
- let mut header_map = HeaderMap::new();
55
- header_map.insert(
56
- USER_AGENT,
57
- user_agent
58
- .parse()
59
- .into_report()
60
- .change_context(EngineError::UnexpectedError)?,
61
- );
62
- header_map.insert(
63
- REFERER,
64
- "https://google.com/"
65
- .parse()
66
- .into_report()
67
- .change_context(EngineError::UnexpectedError)?,
68
- );
69
- header_map.insert(
70
- CONTENT_TYPE,
71
- "application/x-www-form-urlencoded"
72
- .parse()
73
- .into_report()
74
- .change_context(EngineError::UnexpectedError)?,
75
- );
76
- header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
77
 
78
  let document: Html = Html::parse_document(
79
- &Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
80
  );
81
 
82
  let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
@@ -117,24 +101,21 @@ impl SearchEngine for Searx {
117
  .next()
118
  .unwrap()
119
  .inner_html()
120
- .trim()
121
- .to_string(),
122
  result
123
  .select(&result_url)
124
  .next()
125
  .unwrap()
126
  .value()
127
  .attr("href")
128
- .unwrap()
129
- .to_string(),
130
  result
131
  .select(&result_desc)
132
  .next()
133
  .unwrap()
134
  .inner_html()
135
- .trim()
136
- .to_string(),
137
- vec!["searx".to_string()],
138
  )
139
  })
140
  .map(|search_result| (search_result.url.clone(), search_result))
 
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
+ use reqwest::header::HeaderMap;
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
  use crate::results::aggregation_models::SearchResult;
10
 
11
  use super::engine_models::{EngineError, SearchEngine};
12
+ use error_stack::{Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
15
  /// reduce code duplication as well as allows to create vector of different search engines easily.
 
38
 
39
  async fn results(
40
  &self,
41
+ query: &str,
42
  page: u32,
43
+ user_agent: &str,
44
  request_timeout: u8,
45
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
46
  // Page number can be missing or empty string and so appropriate handling is required
 
51
  };
52
 
53
  // initializing headers and adding appropriate headers.
54
+ let header_map = HeaderMap::try_from(&HashMap::from([
55
+ ("USER_AGENT".to_string(), user_agent.to_string()),
56
+ ("REFERER".to_string(), "https://google.com/".to_string()),
57
+ ("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
58
+ ("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
59
+ ]))
60
+ .change_context(EngineError::UnexpectedError)?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  let document: Html = Html::parse_document(
63
+ &Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
64
  );
65
 
66
  let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
 
101
  .next()
102
  .unwrap()
103
  .inner_html()
104
+ .trim(),
 
105
  result
106
  .select(&result_url)
107
  .next()
108
  .unwrap()
109
  .value()
110
  .attr("href")
111
+ .unwrap(),
 
112
  result
113
  .select(&result_desc)
114
  .next()
115
  .unwrap()
116
  .inner_html()
117
+ .trim(),
118
+ &["searx"],
 
119
  )
120
  })
121
  .map(|search_result| (search_result.url.clone(), search_result))
src/results/aggregation_models.rs CHANGED
@@ -2,6 +2,7 @@
2
  //! data scraped from the upstream search engines.
3
 
4
  use serde::{Deserialize, Serialize};
 
5
 
6
  use crate::{config::parser_models::Style, engines::engine_models::EngineError};
7
 
@@ -16,13 +17,13 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
16
  /// (href url in html in simple words).
17
  /// * `description` - The description of the search result.
18
  /// * `engine` - The names of the upstream engines from which this results were provided.
19
- #[derive(Clone, Serialize, Deserialize)]
20
  #[serde(rename_all = "camelCase")]
21
  pub struct SearchResult {
22
  pub title: String,
23
  pub url: String,
24
  pub description: String,
25
- pub engine: Vec<String>,
26
  }
27
 
28
  impl SearchResult {
@@ -35,12 +36,12 @@ impl SearchResult {
35
  /// (href url in html in simple words).
36
  /// * `description` - The description of the search result.
37
  /// * `engine` - The names of the upstream engines from which this results were provided.
38
- pub fn new(title: String, url: String, description: String, engine: Vec<String>) -> Self {
39
  SearchResult {
40
- title,
41
- url,
42
- description,
43
- engine,
44
  }
45
  }
46
 
@@ -49,8 +50,8 @@ impl SearchResult {
49
  /// # Arguments
50
  ///
51
  /// * `engine` - Takes an engine name provided as a String.
52
- pub fn add_engines(&mut self, engine: String) {
53
- self.engine.push(engine)
54
  }
55
 
56
  /// A function which returns the engine name stored from the struct as a string.
@@ -58,13 +59,12 @@ impl SearchResult {
58
  /// # Returns
59
  ///
60
  /// An engine name stored as a string from the struct.
61
- pub fn engine(self) -> String {
62
- self.engine.get(0).unwrap().to_string()
63
  }
64
  }
65
 
66
- ///
67
- #[derive(Serialize, Deserialize)]
68
  pub struct EngineErrorInfo {
69
  pub error: String,
70
  pub engine: String,
@@ -72,18 +72,18 @@ pub struct EngineErrorInfo {
72
  }
73
 
74
  impl EngineErrorInfo {
75
- pub fn new(error: &EngineError, engine: String) -> Self {
76
  Self {
77
  error: match error {
78
- EngineError::RequestError => String::from("RequestError"),
79
- EngineError::EmptyResultSet => String::from("EmptyResultSet"),
80
- EngineError::UnexpectedError => String::from("UnexpectedError"),
81
  },
82
- engine,
83
  severity_color: match error {
84
- EngineError::RequestError => String::from("green"),
85
- EngineError::EmptyResultSet => String::from("blue"),
86
- EngineError::UnexpectedError => String::from("red"),
87
  },
88
  }
89
  }
@@ -108,7 +108,7 @@ pub struct SearchResults {
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
- pub engine_errors_info: Vec<EngineErrorInfo>,
112
  }
113
 
114
  impl SearchResults {
@@ -124,19 +124,19 @@ impl SearchResults {
124
  /// given search query.
125
  pub fn new(
126
  results: Vec<SearchResult>,
127
- page_query: String,
128
- engine_errors_info: Vec<EngineErrorInfo>,
129
  ) -> Self {
130
- SearchResults {
131
  results,
132
- page_query,
133
- style: Style::new("".to_string(), "".to_string()),
134
- engine_errors_info,
135
  }
136
  }
137
 
138
  /// A setter function to add website style to the return search results.
139
- pub fn add_style(&mut self, style: Style) {
140
- self.style = style;
141
  }
142
  }
 
2
  //! data scraped from the upstream search engines.
3
 
4
  use serde::{Deserialize, Serialize};
5
+ use smallvec::SmallVec;
6
 
7
  use crate::{config::parser_models::Style, engines::engine_models::EngineError};
8
 
 
17
  /// (href url in html in simple words).
18
  /// * `description` - The description of the search result.
19
  /// * `engine` - The names of the upstream engines from which this results were provided.
20
+ #[derive(Clone, Serialize, Deserialize, Debug)]
21
  #[serde(rename_all = "camelCase")]
22
  pub struct SearchResult {
23
  pub title: String,
24
  pub url: String,
25
  pub description: String,
26
+ pub engine: SmallVec<[String; 0]>,
27
  }
28
 
29
  impl SearchResult {
 
36
  /// (href url in html in simple words).
37
  /// * `description` - The description of the search result.
38
  /// * `engine` - The names of the upstream engines from which this results were provided.
39
+ pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
40
  SearchResult {
41
+ title: title.to_owned(),
42
+ url: url.to_owned(),
43
+ description: description.to_owned(),
44
+ engine: engine.iter().map(|name| name.to_string()).collect(),
45
  }
46
  }
47
 
 
50
  /// # Arguments
51
  ///
52
  /// * `engine` - Takes an engine name provided as a String.
53
+ pub fn add_engines(&mut self, engine: &str) {
54
+ self.engine.push(engine.to_owned())
55
  }
56
 
57
  /// A function which returns the engine name stored from the struct as a string.
 
59
  /// # Returns
60
  ///
61
  /// An engine name stored as a string from the struct.
62
+ pub fn engine(&mut self) -> String {
63
+ std::mem::take(&mut self.engine[0])
64
  }
65
  }
66
 
67
+ #[derive(Serialize, Deserialize, Clone)]
 
68
  pub struct EngineErrorInfo {
69
  pub error: String,
70
  pub engine: String,
 
72
  }
73
 
74
  impl EngineErrorInfo {
75
+ pub fn new(error: &EngineError, engine: &str) -> Self {
76
  Self {
77
  error: match error {
78
+ EngineError::RequestError => "RequestError".to_owned(),
79
+ EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
80
+ EngineError::UnexpectedError => "UnexpectedError".to_owned(),
81
  },
82
+ engine: engine.to_owned(),
83
  severity_color: match error {
84
+ EngineError::RequestError => "green".to_owned(),
85
+ EngineError::EmptyResultSet => "blue".to_owned(),
86
+ EngineError::UnexpectedError => "red".to_owned(),
87
  },
88
  }
89
  }
 
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
+ pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
112
  }
113
 
114
  impl SearchResults {
 
124
  /// given search query.
125
  pub fn new(
126
  results: Vec<SearchResult>,
127
+ page_query: &str,
128
+ engine_errors_info: &[EngineErrorInfo],
129
  ) -> Self {
130
+ Self {
131
  results,
132
+ page_query: page_query.to_owned(),
133
+ style: Style::default(),
134
+ engine_errors_info: SmallVec::from(engine_errors_info),
135
  }
136
  }
137
 
138
  /// A setter function to add website style to the return search results.
139
+ pub fn add_style(&mut self, style: &Style) {
140
+ self.style = style.to_owned();
141
  }
142
  }
src/results/aggregator.rs CHANGED
@@ -64,11 +64,11 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
64
  /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
65
  /// containing appropriate values.
66
  pub async fn aggregate(
67
- query: String,
68
  page: u32,
69
  random_delay: bool,
70
  debug: bool,
71
- upstream_search_engines: Vec<EngineHandler>,
72
  request_timeout: u8,
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
  let user_agent: &str = random_user_agent();
@@ -80,18 +80,18 @@ pub async fn aggregate(
80
  tokio::time::sleep(Duration::from_secs(delay_secs)).await;
81
  }
82
 
83
- let mut names: Vec<&str> = vec![];
84
 
85
  // create tasks for upstream result fetching
86
  let mut tasks: FutureVec = FutureVec::new();
87
 
88
  for engine_handler in upstream_search_engines {
89
- let (name, search_engine) = engine_handler.into_name_engine();
90
  names.push(name);
91
- let query: String = query.clone();
92
  tasks.push(tokio::spawn(async move {
93
  search_engine
94
- .results(query, page, user_agent.to_owned(), request_timeout)
95
  .await
96
  }));
97
  }
@@ -109,7 +109,7 @@ pub async fn aggregate(
109
  let mut result_map: HashMap<String, SearchResult> = HashMap::new();
110
  let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
111
 
112
- let mut handle_error = |error: Report<EngineError>, engine_name: String| {
113
  log::error!("Engine Error: {:?}", error);
114
  engine_errors_info.push(EngineErrorInfo::new(
115
  error.downcast_ref::<EngineError>().unwrap(),
@@ -119,7 +119,7 @@ pub async fn aggregate(
119
 
120
  for _ in 0..responses.len() {
121
  let response = responses.pop().unwrap();
122
- let engine = names.pop().unwrap().to_string();
123
 
124
  if result_map.is_empty() {
125
  match response {
@@ -127,7 +127,7 @@ pub async fn aggregate(
127
  result_map = results.clone();
128
  }
129
  Err(error) => {
130
- handle_error(error, engine);
131
  }
132
  }
133
  continue;
@@ -139,13 +139,13 @@ pub async fn aggregate(
139
  result_map
140
  .entry(key)
141
  .and_modify(|result| {
142
- result.add_engines(engine.clone());
143
  })
144
  .or_insert_with(|| -> SearchResult { value });
145
  });
146
  }
147
  Err(error) => {
148
- handle_error(error, engine);
149
  }
150
  }
151
  }
@@ -167,11 +167,7 @@ pub async fn aggregate(
167
 
168
  let results: Vec<SearchResult> = result_map.into_values().collect();
169
 
170
- Ok(SearchResults::new(
171
- results,
172
- query.to_string(),
173
- engine_errors_info,
174
- ))
175
  }
176
 
177
  /// Filters a map of search results using a list of regex patterns.
@@ -202,7 +198,10 @@ pub fn filter_with_lists(
202
  || re.is_match(&search_result.description.to_lowercase())
203
  {
204
  // If the search result matches the regex pattern, move it from the original map to the resultant map
205
- resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
 
 
 
206
  }
207
  }
208
  }
@@ -213,6 +212,7 @@ pub fn filter_with_lists(
213
  #[cfg(test)]
214
  mod tests {
215
  use super::*;
 
216
  use std::collections::HashMap;
217
  use std::io::Write;
218
  use tempfile::NamedTempFile;
@@ -222,22 +222,22 @@ mod tests {
222
  // Create a map of search results to filter
223
  let mut map_to_be_filtered = HashMap::new();
224
  map_to_be_filtered.insert(
225
- "https://www.example.com".to_string(),
226
  SearchResult {
227
- title: "Example Domain".to_string(),
228
- url: "https://www.example.com".to_string(),
229
  description: "This domain is for use in illustrative examples in documents."
230
- .to_string(),
231
- engine: vec!["Google".to_string(), "Bing".to_string()],
232
  },
233
  );
234
  map_to_be_filtered.insert(
235
- "https://www.rust-lang.org/".to_string(),
236
  SearchResult {
237
- title: "Rust Programming Language".to_string(),
238
- url: "https://www.rust-lang.org/".to_string(),
239
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
240
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
241
  },
242
  );
243
 
@@ -266,22 +266,22 @@ mod tests {
266
  fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
267
  let mut map_to_be_filtered = HashMap::new();
268
  map_to_be_filtered.insert(
269
- "https://www.example.com".to_string(),
270
  SearchResult {
271
- title: "Example Domain".to_string(),
272
- url: "https://www.example.com".to_string(),
273
  description: "This domain is for use in illustrative examples in documents."
274
- .to_string(),
275
- engine: vec!["Google".to_string(), "Bing".to_string()],
276
  },
277
  );
278
  map_to_be_filtered.insert(
279
- "https://www.rust-lang.org/".to_string(),
280
  SearchResult {
281
- title: "Rust Programming Language".to_string(),
282
- url: "https://www.rust-lang.org/".to_string(),
283
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
284
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
285
  },
286
  );
287
 
@@ -326,13 +326,13 @@ mod tests {
326
  fn test_filter_with_lists_invalid_regex() {
327
  let mut map_to_be_filtered = HashMap::new();
328
  map_to_be_filtered.insert(
329
- "https://www.example.com".to_string(),
330
  SearchResult {
331
- title: "Example Domain".to_string(),
332
- url: "https://www.example.com".to_string(),
333
  description: "This domain is for use in illustrative examples in documents."
334
- .to_string(),
335
- engine: vec!["Google".to_string(), "Bing".to_string()],
336
  },
337
  );
338
 
 
64
  /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
65
  /// containing appropriate values.
66
  pub async fn aggregate(
67
+ query: &str,
68
  page: u32,
69
  random_delay: bool,
70
  debug: bool,
71
+ upstream_search_engines: &[EngineHandler],
72
  request_timeout: u8,
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
  let user_agent: &str = random_user_agent();
 
80
  tokio::time::sleep(Duration::from_secs(delay_secs)).await;
81
  }
82
 
83
+ let mut names: Vec<&str> = Vec::with_capacity(0);
84
 
85
  // create tasks for upstream result fetching
86
  let mut tasks: FutureVec = FutureVec::new();
87
 
88
  for engine_handler in upstream_search_engines {
89
+ let (name, search_engine) = engine_handler.to_owned().into_name_engine();
90
  names.push(name);
91
+ let query: String = query.to_owned();
92
  tasks.push(tokio::spawn(async move {
93
  search_engine
94
+ .results(&query, page, user_agent, request_timeout)
95
  .await
96
  }));
97
  }
 
109
  let mut result_map: HashMap<String, SearchResult> = HashMap::new();
110
  let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
111
 
112
+ let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
113
  log::error!("Engine Error: {:?}", error);
114
  engine_errors_info.push(EngineErrorInfo::new(
115
  error.downcast_ref::<EngineError>().unwrap(),
 
119
 
120
  for _ in 0..responses.len() {
121
  let response = responses.pop().unwrap();
122
+ let engine = names.pop().unwrap();
123
 
124
  if result_map.is_empty() {
125
  match response {
 
127
  result_map = results.clone();
128
  }
129
  Err(error) => {
130
+ handle_error(&error, engine);
131
  }
132
  }
133
  continue;
 
139
  result_map
140
  .entry(key)
141
  .and_modify(|result| {
142
+ result.add_engines(engine);
143
  })
144
  .or_insert_with(|| -> SearchResult { value });
145
  });
146
  }
147
  Err(error) => {
148
+ handle_error(&error, engine);
149
  }
150
  }
151
  }
 
167
 
168
  let results: Vec<SearchResult> = result_map.into_values().collect();
169
 
170
+ Ok(SearchResults::new(results, query, &engine_errors_info))
 
 
 
 
171
  }
172
 
173
  /// Filters a map of search results using a list of regex patterns.
 
198
  || re.is_match(&search_result.description.to_lowercase())
199
  {
200
  // If the search result matches the regex pattern, move it from the original map to the resultant map
201
+ resultant_map.insert(
202
+ url.to_owned(),
203
+ map_to_be_filtered.remove(&url.to_owned()).unwrap(),
204
+ );
205
  }
206
  }
207
  }
 
212
  #[cfg(test)]
213
  mod tests {
214
  use super::*;
215
+ use smallvec::smallvec;
216
  use std::collections::HashMap;
217
  use std::io::Write;
218
  use tempfile::NamedTempFile;
 
222
  // Create a map of search results to filter
223
  let mut map_to_be_filtered = HashMap::new();
224
  map_to_be_filtered.insert(
225
+ "https://www.example.com".to_owned(),
226
  SearchResult {
227
+ title: "Example Domain".to_owned(),
228
+ url: "https://www.example.com".to_owned(),
229
  description: "This domain is for use in illustrative examples in documents."
230
+ .to_owned(),
231
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
232
  },
233
  );
234
  map_to_be_filtered.insert(
235
+ "https://www.rust-lang.org/".to_owned(),
236
  SearchResult {
237
+ title: "Rust Programming Language".to_owned(),
238
+ url: "https://www.rust-lang.org/".to_owned(),
239
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
240
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
241
  },
242
  );
243
 
 
266
  fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
267
  let mut map_to_be_filtered = HashMap::new();
268
  map_to_be_filtered.insert(
269
+ "https://www.example.com".to_owned(),
270
  SearchResult {
271
+ title: "Example Domain".to_owned(),
272
+ url: "https://www.example.com".to_owned(),
273
  description: "This domain is for use in illustrative examples in documents."
274
+ .to_owned(),
275
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
276
  },
277
  );
278
  map_to_be_filtered.insert(
279
+ "https://www.rust-lang.org/".to_owned(),
280
  SearchResult {
281
+ title: "Rust Programming Language".to_owned(),
282
+ url: "https://www.rust-lang.org/".to_owned(),
283
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
284
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
285
  },
286
  );
287
 
 
326
  fn test_filter_with_lists_invalid_regex() {
327
  let mut map_to_be_filtered = HashMap::new();
328
  map_to_be_filtered.insert(
329
+ "https://www.example.com".to_owned(),
330
  SearchResult {
331
+ title: "Example Domain".to_owned(),
332
+ url: "https://www.example.com".to_owned(),
333
  description: "This domain is for use in illustrative examples in documents."
334
+ .to_owned(),
335
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
336
  },
337
  );
338
 
src/server/routes.rs CHANGED
@@ -62,10 +62,10 @@ pub async fn not_found(
62
  /// * `engines` - It stores the user selected upstream search engines selected from the UI.
63
  #[allow(dead_code)]
64
  #[derive(Deserialize)]
65
- struct Cookie {
66
- theme: String,
67
- colorscheme: String,
68
- engines: Vec<String>,
69
  }
70
 
71
  /// Handles the route of search page of the `websurfx` meta search engine website and it takes
@@ -111,9 +111,9 @@ pub async fn search(
111
  page - 1
112
  ),
113
  &config,
114
- query.to_string(),
115
  page - 1,
116
- req.clone(),
117
  ),
118
  results(
119
  format!(
@@ -121,9 +121,9 @@ pub async fn search(
121
  config.binding_ip, config.port, query, page
122
  ),
123
  &config,
124
- query.to_string(),
125
  page,
126
- req.clone(),
127
  ),
128
  results(
129
  format!(
@@ -134,9 +134,9 @@ pub async fn search(
134
  page + 1
135
  ),
136
  &config,
137
- query.to_string(),
138
  page + 1,
139
- req.clone(),
140
  )
141
  );
142
 
@@ -154,9 +154,9 @@ pub async fn search(
154
  async fn results(
155
  url: String,
156
  config: &Config,
157
- query: String,
158
  page: u32,
159
- req: HttpRequest,
160
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
161
  //Initialize redis cache connection struct
162
  let mut redis_cache = RedisCache::new(&config.redis_url, 5).await?;
@@ -165,19 +165,17 @@ async fn results(
165
  // check if fetched cache results was indeed fetched or it was an error and if so
166
  // handle the data accordingly.
167
  match cached_results_json {
168
- Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results).unwrap()),
169
  Err(_) => {
170
  // check if the cookie value is empty or not if it is empty then use the
171
  // default selected upstream search engines from the config file otherwise
172
  // parse the non-empty cookie and grab the user selected engines from the
173
  // UI and use that.
174
- let mut results: crate::results::aggregation_models::SearchResults = match req
175
- .cookie("appCookie")
176
- {
177
  Some(cookie_value) => {
178
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
179
 
180
- let engines = cookie_value
181
  .engines
182
  .iter()
183
  .filter_map(|name| EngineHandler::new(name))
@@ -188,7 +186,7 @@ async fn results(
188
  page,
189
  config.aggregator.random_delay,
190
  config.debug,
191
- engines,
192
  config.request_timeout,
193
  )
194
  .await?
@@ -199,13 +197,14 @@ async fn results(
199
  page,
200
  config.aggregator.random_delay,
201
  config.debug,
202
- config.upstream_search_engines.clone(),
203
  config.request_timeout,
204
  )
205
  .await?
206
  }
207
  };
208
- results.add_style(config.style.clone());
 
209
  redis_cache
210
  .cache_results(&serde_json::to_string(&results)?, &url)
211
  .await?;
 
62
  /// * `engines` - It stores the user selected upstream search engines selected from the UI.
63
  #[allow(dead_code)]
64
  #[derive(Deserialize)]
65
+ struct Cookie<'a> {
66
+ theme: &'a str,
67
+ colorscheme: &'a str,
68
+ engines: Vec<&'a str>,
69
  }
70
 
71
  /// Handles the route of search page of the `websurfx` meta search engine website and it takes
 
111
  page - 1
112
  ),
113
  &config,
114
+ query,
115
  page - 1,
116
+ &req,
117
  ),
118
  results(
119
  format!(
 
121
  config.binding_ip, config.port, query, page
122
  ),
123
  &config,
124
+ query,
125
  page,
126
+ &req,
127
  ),
128
  results(
129
  format!(
 
134
  page + 1
135
  ),
136
  &config,
137
+ query,
138
  page + 1,
139
+ &req,
140
  )
141
  );
142
 
 
154
  async fn results(
155
  url: String,
156
  config: &Config,
157
+ query: &str,
158
  page: u32,
159
+ req: &HttpRequest,
160
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
161
  //Initialize redis cache connection struct
162
  let mut redis_cache = RedisCache::new(&config.redis_url, 5).await?;
 
165
  // check if fetched cache results was indeed fetched or it was an error and if so
166
  // handle the data accordingly.
167
  match cached_results_json {
168
+ Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
169
  Err(_) => {
170
  // check if the cookie value is empty or not if it is empty then use the
171
  // default selected upstream search engines from the config file otherwise
172
  // parse the non-empty cookie and grab the user selected engines from the
173
  // UI and use that.
174
+ let mut results: SearchResults = match req.cookie("appCookie") {
 
 
175
  Some(cookie_value) => {
176
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
177
 
178
+ let engines: Vec<EngineHandler> = cookie_value
179
  .engines
180
  .iter()
181
  .filter_map(|name| EngineHandler::new(name))
 
186
  page,
187
  config.aggregator.random_delay,
188
  config.debug,
189
+ &engines,
190
  config.request_timeout,
191
  )
192
  .await?
 
197
  page,
198
  config.aggregator.random_delay,
199
  config.debug,
200
+ &config.upstream_search_engines,
201
  config.request_timeout,
202
  )
203
  .await?
204
  }
205
  };
206
+
207
+ results.add_style(&config.style);
208
  redis_cache
209
  .cache_results(&serde_json::to_string(&results)?, &url)
210
  .await?;