neon_arch commited on
Commit
2b7e28c
2 Parent(s): 8384560 c37a9b4

Merge branch 'rolling' into improve-and-fix-settings-page

Browse files
Cargo.lock CHANGED
@@ -268,6 +268,12 @@ dependencies = [
268
  "alloc-no-stdlib",
269
  ]
270
 
 
 
 
 
 
 
271
  [[package]]
272
  name = "askama_escape"
273
  version = "0.10.3"
@@ -739,6 +745,16 @@ dependencies = [
739
  "libc",
740
  ]
741
 
 
 
 
 
 
 
 
 
 
 
742
  [[package]]
743
  name = "failure"
744
  version = "0.1.8"
@@ -3370,6 +3386,7 @@ dependencies = [
3370
  "actix-files",
3371
  "actix-web",
3372
  "env_logger",
 
3373
  "fake-useragent",
3374
  "handlebars",
3375
  "log",
 
268
  "alloc-no-stdlib",
269
  ]
270
 
271
+ [[package]]
272
+ name = "anyhow"
273
+ version = "1.0.71"
274
+ source = "registry+https://github.com/rust-lang/crates.io-index"
275
+ checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
276
+
277
  [[package]]
278
  name = "askama_escape"
279
  version = "0.10.3"
 
745
  "libc",
746
  ]
747
 
748
+ [[package]]
749
+ name = "error-stack"
750
+ version = "0.3.1"
751
+ source = "registry+https://github.com/rust-lang/crates.io-index"
752
+ checksum = "5f00447f331c7f726db5b8532ebc9163519eed03c6d7c8b73c90b3ff5646ac85"
753
+ dependencies = [
754
+ "anyhow",
755
+ "rustc_version 0.4.0",
756
+ ]
757
+
758
  [[package]]
759
  name = "failure"
760
  version = "0.1.8"
 
3386
  "actix-files",
3387
  "actix-web",
3388
  "env_logger",
3389
+ "error-stack",
3390
  "fake-useragent",
3391
  "handlebars",
3392
  "log",
Cargo.toml CHANGED
@@ -2,8 +2,9 @@
2
  name = "websurfx"
3
  version = "0.13.0"
4
  edition = "2021"
5
-
6
- # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
7
 
8
  [dependencies]
9
  reqwest = {version="*",features=["json"]}
@@ -22,6 +23,32 @@ redis = {version="*"}
22
  md5 = {version="*"}
23
  rand={version="*"}
24
  once_cell = {version="*"}
 
25
 
26
  [dev-dependencies]
27
  rusty-hook = "^0.11.2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  name = "websurfx"
3
  version = "0.13.0"
4
  edition = "2021"
5
+ description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
+ repository = "https://github.com/neon-mmd/websurfx"
7
+ license = "AGPL-3.0"
8
 
9
  [dependencies]
10
  reqwest = {version="*",features=["json"]}
 
23
  md5 = {version="*"}
24
  rand={version="*"}
25
  once_cell = {version="*"}
26
+ error-stack = {version="0.3.1"}
27
 
28
  [dev-dependencies]
29
  rusty-hook = "^0.11.2"
30
+
31
+ [profile.dev]
32
+ opt-level = 0
33
+ debug = true
34
+ split-debuginfo = '...'
35
+ debug-assertions = true
36
+ overflow-checks = true
37
+ lto = false
38
+ panic = 'unwind'
39
+ incremental = true
40
+ codegen-units = 256
41
+ rpath = false
42
+
43
+ [profile.release]
44
+ opt-level = 3
45
+ debug = false
46
+ split-debuginfo = '...'
47
+ debug-assertions = false
48
+ overflow-checks = false
49
+ lto = 'thin'
50
+ panic = 'unwind'
51
+ incremental = false
52
+ codegen-units = 16
53
+ rpath = false
54
+ strip = "debuginfo"
README.md CHANGED
@@ -1,4 +1,4 @@
1
- <h1 align="center">
2
  <img src="./images/websurfx_logo.png" alt="websurfx logo" align="center" />
3
  </h1>
4
  <p align="center">
@@ -39,7 +39,7 @@
39
  >meta search engine</a
40
  >
41
  (pronounced as websurface or web-surface /wɛbˈsɜːrfəs/.) written in Rust. It
42
- provides a quick and secure search experience while maintaining user
43
  privacy.</i
44
  >
45
  </p>
@@ -72,7 +72,7 @@
72
 
73
  # Preview 🔭
74
 
75
- ## Main Page
76
 
77
  <img align="center" src="./images/main_page.png" />
78
 
@@ -88,7 +88,7 @@
88
 
89
  # Features 🚀
90
 
91
- - 🎨 High level of customizability with nine color schemes provided by default with a simple theme, also supporting the creation of your custom themes and colorschemes very quickly and easily
92
  - 🔐 Fast, private, and secure
93
  - 🆓 100% free and open source
94
  - 💨 Ad-free and clean results
@@ -116,7 +116,7 @@ redis-server --port 8082 &
116
  Once you have started the server, open your preferred web browser and navigate to <http://127.0.0.1:8080> to start using Websurfx.
117
 
118
  > **Warning**
119
- > Please be aware that the project is still in the testing phase and is not ready for production use.
120
 
121
  **[⬆️ Back to Top](#--)**
122
 
@@ -132,14 +132,14 @@ Websurfx is configured through the config.lua file, located at `websurfx/config.
132
 
133
  > For full theming and customization instructions, see: [**Theming**](./docs/theming.md)
134
 
135
- Websurfx comes with several themes and color schemes by default, which you can apply and edit through the config file. Supports custom themes and color schemes using CSS, allowing you to develop your own unique-looking website.
136
 
137
  **[⬆️ Back to Top](#--)**
138
 
139
  # Multi-Language Support 🌍
140
 
141
  > **Note**
142
- > Currently, we do not support other languages, but in the future, we will start accepting contributions regarding language support because we believe that language should not be a barrier to entry.
143
 
144
  **[⬆️ Back to Top](#--)**
145
 
@@ -153,15 +153,15 @@ At present, we only support x86_64 architecture systems, but we would love to ha
153
 
154
  ## Why Websurfx?
155
 
156
- The primary purpose of the Websurfx project is to create a fast, secure, and privacy-focused meta-search engine. While there are numerous meta-search engines available, not all of them guarantee the security of their search engine, which is critical for maintaining privacy. Memory flaws, for example, can expose private or sensitive information, which is never a good thing. Also, there is the added problem of Spam, ads, and unorganic results which most engines don't have the full-proof answer to it till now but with Websurfx I finally put a full stop to this problem, also, Rust is used to write Websurfx, which ensures memory safety and removes such issues. Many meta-search engines also lack important features like advanced picture search, which is required by many graphic designers, content providers, and others. Websurfx attempts to improve the user experience by providing these and other features, such as proper NSFW blocking and Micro-apps or Quick results (like providing a calculator, currency exchanges, etc in the search results).
157
 
158
  ## Why AGPLv3?
159
 
160
- Websurfx is distributed under the **AGPLv3** license to keep the source code open and transparent. This helps to keep malware, telemetry, and other dangerous programs out of the project. **AGPLv3** is a strong copyleft license that ensures the software's source code, including any modifications or improvements made to the code, remains open and available to everyone.
161
 
162
  ## Why Rust?
163
 
164
- Rust was chosen as the programming language for Websurfx because of its memory safety features, which can help prevent vulnerabilities and make the codebase more secure. Rust is also faster than C++, which contributes to Websurfx's speed and responsiveness. Furthermore, the Rust ownership and borrowing system enables secure concurrency and thread safety in the program.
165
 
166
  **[⬆️ Back to Top](#--)**
167
 
@@ -175,14 +175,14 @@ We are looking for more willing contributors to help grow this project. For more
175
 
176
  > For full details and other ways you can help out, see: [**Contributing**]()
177
 
178
- If you use Websurfx and would like to contribute to its development, that would be fantastic! Contributions of any size or type are always welcome, and we will properly acknowledge your efforts.
179
 
180
  Several areas that we need a bit of help with at the moment are:
181
  - **Better and more color schemes**: Help fix color schemes and add other famous color schemes.
182
  - **Improve evasion code for bot detection** - Help improve code related to evading IP blocking and emulating human behaviors located in everyone's engine file.
183
  - **Logo** - Help create a logo for the project and website.
184
  - **Docker Support** - Help write a Docker Compose file for the project.
185
- - Submit a PR to add a new feature, fix a bug, update the docs, add a theme, widget, or something else.
186
  - Star Websurfx on GitHub.
187
 
188
  **[⬆️ Back to Top](#--)**
@@ -196,13 +196,13 @@ Several areas that we need a bit of help with at the moment are:
196
 
197
  # Roadmap 🛣️
198
 
199
- > Coming soon!! 🙂.
200
 
201
  **[⬆️ Back to Top](#--)**
202
 
203
  # Contributing 🙋
204
 
205
- Contributions are welcome from anyone. It doesn\'t matter who you are; you can still contribute to the project in your own way.
206
 
207
  ## Not a developer but still want to contribute?
208
 
 
1
+ <h1 align="center">
2
  <img src="./images/websurfx_logo.png" alt="websurfx logo" align="center" />
3
  </h1>
4
  <p align="center">
 
39
  >meta search engine</a
40
  >
41
  (pronounced as websurface or web-surface /wɛbˈsɜːrfəs/.) written in Rust. It
42
+ provides a quick and secure search experience while completely respecting user
43
  privacy.</i
44
  >
45
  </p>
 
72
 
73
  # Preview 🔭
74
 
75
+ ## Home Page
76
 
77
  <img align="center" src="./images/main_page.png" />
78
 
 
88
 
89
  # Features 🚀
90
 
91
+ - 🎨 Make Websurfx uniquely yours with nine color schemes provided by default. It also supports creation of custom themes and color schemes in a quick and easy way, so unleash your creativity!
92
  - 🔐 Fast, private, and secure
93
  - 🆓 100% free and open source
94
  - 💨 Ad-free and clean results
 
116
  Once you have started the server, open your preferred web browser and navigate to <http://127.0.0.1:8080> to start using Websurfx.
117
 
118
  > **Warning**
119
+ > This project is still in the testing phase and is **not** ready for production use.
120
 
121
  **[⬆️ Back to Top](#--)**
122
 
 
132
 
133
  > For full theming and customization instructions, see: [**Theming**](./docs/theming.md)
134
 
135
+ Websurfx comes loaded with several themes and color schemes, which you can apply and edit through the config file. It also supports custom themes and color schemes using CSS, allowing you to make it truly yours.
136
 
137
  **[⬆️ Back to Top](#--)**
138
 
139
  # Multi-Language Support 🌍
140
 
141
  > **Note**
142
+ > Currently, we do not support other languages but we will start accepting contributions regarding language support in the future. We believe language should never be a barrier to entry.
143
 
144
  **[⬆️ Back to Top](#--)**
145
 
 
153
 
154
  ## Why Websurfx?
155
 
156
+ The primary purpose of the Websurfx project is to create a fast, secure, and privacy-focused meta-search engine. There are numerous meta-search engines available, but not all guarantee the security of their search engine, which is critical for maintaining privacy. Memory flaws, for example, can expose private or sensitive information, which is understandably bad. There is also the added problem of spam, ads, and inorganic results which most engines don't have a fool-proof answer to. Until now. With Websurfx I finally put a full stop to this problem. Websurfx is based on Rust, which ensures memory safety and removes such issues. Many meta-search engines also lack important features like advanced picture search, required by graphic designers, content providers, and others. Websurfx improves the user experience by providing these and other features, such as proper NSFW blocking and Micro-apps or Quick Results (providing a calculator, currency exchanges, etc in the search results).
157
 
158
  ## Why AGPLv3?
159
 
160
+ Websurfx is distributed under the **AGPLv3** license to keep the source code open and transparent. This helps keep malware, telemetry, and other dangers out of the project. **AGPLv3** is a strong copyleft license that ensures the software's source code, including any modifications or improvements made to the code, remains open and available to everyone.
161
 
162
  ## Why Rust?
163
 
164
+ Websurfx is based on Rust due to its memory safety features, which prevents vulnerabilities and makes the codebase more secure. Rust is also faster than C++, contributing to Websurfx's speed and responsiveness. Finally, the Rust ownership and borrowing system enables secure concurrency and thread safety in the program.
165
 
166
  **[⬆️ Back to Top](#--)**
167
 
 
175
 
176
  > For full details and other ways you can help out, see: [**Contributing**]()
177
 
178
+ If you use Websurfx and would like to contribute to its development, we're glad to have you on board! Contributions of any size or type are always welcome, and we will always acknowledge your efforts.
179
 
180
  Several areas that we need a bit of help with at the moment are:
181
  - **Better and more color schemes**: Help fix color schemes and add other famous color schemes.
182
  - **Improve evasion code for bot detection** - Help improve code related to evading IP blocking and emulating human behaviors located in everyone's engine file.
183
  - **Logo** - Help create a logo for the project and website.
184
  - **Docker Support** - Help write a Docker Compose file for the project.
185
+ - Submit a PR to add a new feature, fix a bug, update the docs, add a theme, widget, or anything else.
186
  - Star Websurfx on GitHub.
187
 
188
  **[⬆️ Back to Top](#--)**
 
196
 
197
  # Roadmap 🛣️
198
 
199
+ > Coming soon! 🙂.
200
 
201
  **[⬆️ Back to Top](#--)**
202
 
203
  # Contributing 🙋
204
 
205
+ Contributions are welcome from anyone. It doesn't matter who you are; you can still contribute to the project in your own way.
206
 
207
  ## Not a developer but still want to contribute?
208
 
public/static/index.js CHANGED
@@ -1,10 +1,25 @@
1
- let search_box = document.querySelector('input')
2
- function search_web() {
3
- window.location = `search?q=${search_box.value}`
 
 
 
 
 
 
 
 
 
 
 
4
  }
5
 
6
- search_box.addEventListener('keyup', (e) => {
7
- if (e.keyCode === 13) {
8
- search_web()
 
 
 
 
9
  }
10
- })
 
1
+ /**
2
+ * Selects the input element for the search box
3
+ * @type {HTMLInputElement}
4
+ */
5
+ const searchBox = document.querySelector('input');
6
+
7
+ /**
8
+ * Redirects the user to the search results page with the query parameter
9
+ */
10
+ function searchWeb() {
11
+ const query = searchBox.value.trim();
12
+ if (query) {
13
+ window.location.href = `search?q=${encodeURIComponent(query)}`;
14
+ }
15
  }
16
 
17
+ /**
18
+ * Listens for the 'Enter' key press event on the search box and calls the searchWeb function
19
+ * @param {KeyboardEvent} e - The keyboard event object
20
+ */
21
+ searchBox.addEventListener('keyup', (e) => {
22
+ if (e.key === 'Enter') {
23
+ searchWeb();
24
  }
25
+ });
public/static/pagination.js CHANGED
@@ -1,26 +1,39 @@
 
 
 
 
1
  function navigate_forward() {
2
- const url = new URL(window.location)
3
- const searchParams = url.searchParams
4
 
5
- let q = searchParams.get('q')
6
- let page = searchParams.get('page')
7
 
8
- if (page === null) {
9
- page = 2
10
- window.location = `${url.origin}${url.pathname}?q=${q}&page=${page}`
11
  } else {
12
- window.location = `${url.origin}${url.pathname}?q=${q}&page=${++page}`
13
  }
 
 
14
  }
15
 
 
 
 
 
16
  function navigate_backward() {
17
- const url = new URL(window.location)
18
- const searchParams = url.searchParams
19
 
20
- let q = searchParams.get('q')
21
- let page = searchParams.get('page')
22
 
23
- if (page !== null && page > 1) {
24
- window.location = `${url.origin}${url.pathname}?q=${q}&page=${--page}`
 
 
25
  }
 
 
26
  }
 
1
+ /**
2
+ * Navigates to the next page by incrementing the current page number in the URL query parameters.
3
+ * @returns {void}
4
+ */
5
  function navigate_forward() {
6
+ const url = new URL(window.location);
7
+ const searchParams = url.searchParams;
8
 
9
+ let q = searchParams.get('q');
10
+ let page = parseInt(searchParams.get('page'));
11
 
12
+ if (isNaN(page)) {
13
+ page = 1;
 
14
  } else {
15
+ page++;
16
  }
17
+
18
+ window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
19
  }
20
 
21
+ /**
22
+ * Navigates to the previous page by decrementing the current page number in the URL query parameters.
23
+ * @returns {void}
24
+ */
25
  function navigate_backward() {
26
+ const url = new URL(window.location);
27
+ const searchParams = url.searchParams;
28
 
29
+ let q = searchParams.get('q');
30
+ let page = parseInt(searchParams.get('page'));
31
 
32
+ if (isNaN(page)) {
33
+ page = 1;
34
+ } else if (page > 1) {
35
+ page--;
36
  }
37
+
38
+ window.location.href = `${url.origin}${url.pathname}?q=${encodeURIComponent(q)}&page=${page}`;
39
  }
src/config_parser/parser.rs CHANGED
@@ -118,7 +118,7 @@ impl Config {
118
  {
119
  Ok("./websurfx/config.lua".to_string())
120
  } else {
121
- Err(format!("Config file not found!!").into())
122
  }
123
  }
124
  }
 
118
  {
119
  Ok("./websurfx/config.lua".to_string())
120
  } else {
121
+ Err("Config file not found!!".to_string().into())
122
  }
123
  }
124
  }
src/engines/duckduckgo.rs CHANGED
@@ -2,13 +2,17 @@
2
  //! by querying the upstream duckduckgo search engine with user provided query and with a page
3
  //! number if provided.
4
 
5
- use std::collections::HashMap;
6
 
7
  use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
11
 
 
 
 
 
12
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
13
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
14
  /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
@@ -22,14 +26,15 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
22
  ///
23
  /// # Errors
24
  ///
25
- /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
26
- /// reach the above `upstream search engine` page and also returns error if the scraping
27
- /// selector fails to initialize"
 
28
  pub async fn results(
29
  query: &str,
30
  page: u32,
31
  user_agent: &str,
32
- ) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
33
  // Page number can be missing or empty string and so appropriate handling is required
34
  // so that upstream server recieves valid page number.
35
  let url: String = match page {
@@ -48,26 +53,71 @@ pub async fn results(
48
 
49
  // initializing HeaderMap and adding appropriate headers.
50
  let mut header_map = HeaderMap::new();
51
- header_map.insert(USER_AGENT, user_agent.parse()?);
52
- header_map.insert(REFERER, "https://google.com/".parse()?);
53
- header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
54
- header_map.insert(COOKIE, "kl=wt-wt".parse()?);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  // fetch the html from upstream duckduckgo engine
57
- // TODO: Write better error handling code to handle no results case.
58
  let results: String = reqwest::Client::new()
59
  .get(url)
 
60
  .headers(header_map) // add spoofed headers to emulate human behaviour
61
  .send()
62
- .await?
 
 
63
  .text()
64
- .await?;
 
 
65
 
66
  let document: Html = Html::parse_document(&results);
67
- let results: Selector = Selector::parse(".result")?;
68
- let result_title: Selector = Selector::parse(".result__a")?;
69
- let result_url: Selector = Selector::parse(".result__url")?;
70
- let result_desc: Selector = Selector::parse(".result__snippet")?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  // scrape all the results from the html
73
  Ok(document
 
2
  //! by querying the upstream duckduckgo search engine with user provided query and with a page
3
  //! number if provided.
4
 
5
+ use std::{collections::HashMap, time::Duration};
6
 
7
  use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
11
 
12
+ use super::engine_models::EngineError;
13
+
14
+ use error_stack::{IntoReport, Report, Result, ResultExt};
15
+
16
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
17
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
18
  /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
 
26
  ///
27
  /// # Errors
28
  ///
29
+ /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
30
+ /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
31
+ /// provide results for the requested search query and also returns error if the scraping selector
32
+ /// or HeaderMap fails to initialize.
33
  pub async fn results(
34
  query: &str,
35
  page: u32,
36
  user_agent: &str,
37
+ ) -> Result<HashMap<String, RawSearchResult>, EngineError> {
38
  // Page number can be missing or empty string and so appropriate handling is required
39
  // so that upstream server recieves valid page number.
40
  let url: String = match page {
 
53
 
54
  // initializing HeaderMap and adding appropriate headers.
55
  let mut header_map = HeaderMap::new();
56
+ header_map.insert(
57
+ USER_AGENT,
58
+ user_agent
59
+ .parse()
60
+ .into_report()
61
+ .change_context(EngineError::UnexpectedError)?,
62
+ );
63
+ header_map.insert(
64
+ REFERER,
65
+ "https://google.com/"
66
+ .parse()
67
+ .into_report()
68
+ .change_context(EngineError::UnexpectedError)?,
69
+ );
70
+ header_map.insert(
71
+ CONTENT_TYPE,
72
+ "application/x-www-form-urlencoded"
73
+ .parse()
74
+ .into_report()
75
+ .change_context(EngineError::UnexpectedError)?,
76
+ );
77
+ header_map.insert(
78
+ COOKIE,
79
+ "kl=wt-wt"
80
+ .parse()
81
+ .into_report()
82
+ .change_context(EngineError::UnexpectedError)?,
83
+ );
84
 
85
  // fetch the html from upstream duckduckgo engine
 
86
  let results: String = reqwest::Client::new()
87
  .get(url)
88
+ .timeout(Duration::from_secs(5))
89
  .headers(header_map) // add spoofed headers to emulate human behaviour
90
  .send()
91
+ .await
92
+ .into_report()
93
+ .change_context(EngineError::RequestError)?
94
  .text()
95
+ .await
96
+ .into_report()
97
+ .change_context(EngineError::RequestError)?;
98
 
99
  let document: Html = Html::parse_document(&results);
100
+
101
+ let no_result: Selector = Selector::parse(".no-results")
102
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
103
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".no-results"))?;
104
+
105
+ if document.select(&no_result).next().is_some() {
106
+ return Err(Report::new(EngineError::EmptyResultSet));
107
+ }
108
+
109
+ let results: Selector = Selector::parse(".result")
110
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
111
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
112
+ let result_title: Selector = Selector::parse(".result__a")
113
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
114
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__a"))?;
115
+ let result_url: Selector = Selector::parse(".result__url")
116
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
117
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__url"))?;
118
+ let result_desc: Selector = Selector::parse(".result__snippet")
119
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
120
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result__snippet"))?;
121
 
122
  // scrape all the results from the html
123
  Ok(document
src/engines/engine_models.rs ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the error enum to handle different errors associated while requesting data from
2
+ //! the upstream search engines with the search query provided by the user.
3
+
4
+ use error_stack::Context;
5
+ use std::fmt;
6
+
7
+ /// A custom error type used for handle engine associated errors.
8
+ ///
9
+ /// This enum provides variants three different categories of errors:
10
+ /// * `RequestError` - This variant handles all request related errors like forbidden, not found,
11
+ /// etc.
12
+ /// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
13
+ /// search engines.
14
+ /// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
15
+ /// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
16
+ /// all other errors occuring within the code handling the `upstream search engines`.
17
+ #[derive(Debug)]
18
+ pub enum EngineError {
19
+ EmptyResultSet,
20
+ RequestError,
21
+ UnexpectedError,
22
+ }
23
+
24
+ impl fmt::Display for EngineError {
25
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26
+ match self {
27
+ EngineError::EmptyResultSet => {
28
+ write!(f, "The upstream search engine returned an empty result set")
29
+ }
30
+ EngineError::RequestError => {
31
+ write!(
32
+ f,
33
+ "Error occurred while requesting data from upstream search engine"
34
+ )
35
+ }
36
+ EngineError::UnexpectedError => {
37
+ write!(f, "An unexpected error occurred while processing the data")
38
+ }
39
+ }
40
+ }
41
+ }
42
+
43
+ impl Context for EngineError {}
src/engines/mod.rs CHANGED
@@ -1,2 +1,3 @@
1
  pub mod duckduckgo;
 
2
  pub mod searx;
 
1
  pub mod duckduckgo;
2
+ pub mod engine_models;
3
  pub mod searx;
src/engines/searx.rs CHANGED
@@ -8,6 +8,9 @@ use std::collections::HashMap;
8
 
9
  use crate::search_results_handler::aggregation_models::RawSearchResult;
10
 
 
 
 
11
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
12
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
13
  /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
@@ -21,40 +24,84 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
21
  ///
22
  /// # Errors
23
  ///
24
- /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
25
- /// reach the above `upstream search engine` page and also returns error if the scraping
26
- /// selector fails to initialize"
 
27
  pub async fn results(
28
  query: &str,
29
  page: u32,
30
  user_agent: &str,
31
- ) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
32
  // Page number can be missing or empty string and so appropriate handling is required
33
  // so that upstream server recieves valid page number.
34
  let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
35
 
36
  // initializing headers and adding appropriate headers.
37
  let mut header_map = HeaderMap::new();
38
- header_map.insert(USER_AGENT, user_agent.parse()?);
39
- header_map.insert(REFERER, "https://google.com/".parse()?);
40
- header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
41
- header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  // fetch the html from upstream searx instance engine
44
- // TODO: Write better error handling code to handle no results case.
45
  let results: String = reqwest::Client::new()
46
  .get(url)
47
  .headers(header_map) // add spoofed headers to emulate human behaviours.
48
  .send()
49
- .await?
 
 
50
  .text()
51
- .await?;
 
 
52
 
53
  let document: Html = Html::parse_document(&results);
54
- let results: Selector = Selector::parse(".result")?;
55
- let result_title: Selector = Selector::parse("h3>a")?;
56
- let result_url: Selector = Selector::parse("h3>a")?;
57
- let result_desc: Selector = Selector::parse(".content")?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  // scrape all the results from the html
60
  Ok(document
 
8
 
9
  use crate::search_results_handler::aggregation_models::RawSearchResult;
10
 
11
+ use super::engine_models::EngineError;
12
+ use error_stack::{IntoReport, Report, Result, ResultExt};
13
+
14
  /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
15
  /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
16
  /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
 
24
  ///
25
  /// # Errors
26
  ///
27
+ /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
28
+ /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
29
+ /// provide results for the requested search query and also returns error if the scraping selector
30
+ /// or HeaderMap fails to initialize.
31
  pub async fn results(
32
  query: &str,
33
  page: u32,
34
  user_agent: &str,
35
+ ) -> Result<HashMap<String, RawSearchResult>, EngineError> {
36
  // Page number can be missing or empty string and so appropriate handling is required
37
  // so that upstream server recieves valid page number.
38
  let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
39
 
40
  // initializing headers and adding appropriate headers.
41
  let mut header_map = HeaderMap::new();
42
+ header_map.insert(
43
+ USER_AGENT,
44
+ user_agent
45
+ .parse()
46
+ .into_report()
47
+ .change_context(EngineError::UnexpectedError)?,
48
+ );
49
+ header_map.insert(
50
+ REFERER,
51
+ "https://google.com/"
52
+ .parse()
53
+ .into_report()
54
+ .change_context(EngineError::UnexpectedError)?,
55
+ );
56
+ header_map.insert(
57
+ CONTENT_TYPE,
58
+ "application/x-www-form-urlencoded"
59
+ .parse()
60
+ .into_report()
61
+ .change_context(EngineError::UnexpectedError)?,
62
+ );
63
+ header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
64
 
65
  // fetch the html from upstream searx instance engine
 
66
  let results: String = reqwest::Client::new()
67
  .get(url)
68
  .headers(header_map) // add spoofed headers to emulate human behaviours.
69
  .send()
70
+ .await
71
+ .into_report()
72
+ .change_context(EngineError::RequestError)?
73
  .text()
74
+ .await
75
+ .into_report()
76
+ .change_context(EngineError::RequestError)?;
77
 
78
  let document: Html = Html::parse_document(&results);
79
+
80
+ let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
81
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
82
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", "#urls>.dialog-error>p"))?;
83
+
84
+ if let Some(no_result_msg) = document.select(&no_result).nth(1) {
85
+ if no_result_msg.inner_html()
86
+ == "we didn't find any results. Please use another query or search in more categories"
87
+ {
88
+ return Err(Report::new(EngineError::EmptyResultSet));
89
+ }
90
+ }
91
+
92
+ let results: Selector = Selector::parse(".result")
93
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
94
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".result"))?;
95
+ let result_title: Selector = Selector::parse("h3>a")
96
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
97
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
98
+ let result_url: Selector = Selector::parse("h3>a")
99
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
100
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", "h3>a"))?;
101
+
102
+ let result_desc: Selector = Selector::parse(".content")
103
+ .map_err(|_| Report::new(EngineError::UnexpectedError))
104
+ .attach_printable_lazy(|| format!("invalid CSS selector: {}", ".content"))?;
105
 
106
  // scrape all the results from the html
107
  Ok(document
src/search_results_handler/aggregator.rs CHANGED
@@ -58,8 +58,19 @@ pub async fn aggregate(
58
  searx::results(query, page, &user_agent)
59
  );
60
 
61
- let ddg_map_results: HashMap<String, RawSearchResult> = ddg_map_results?;
62
- let searx_map_results: HashMap<String, RawSearchResult> = searx_map_results?;
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  result_map.extend(ddg_map_results);
65
 
 
58
  searx::results(query, page, &user_agent)
59
  );
60
 
61
+ let ddg_map_results = ddg_map_results.unwrap_or_else(|e| {
62
+ if debug {
63
+ log::error!("Error fetching results from DuckDuckGo: {:?}", e);
64
+ }
65
+ HashMap::new()
66
+ });
67
+
68
+ let searx_map_results = searx_map_results.unwrap_or_else(|e| {
69
+ if debug {
70
+ log::error!("Error fetching results from Searx: {:?}", e);
71
+ }
72
+ HashMap::new()
73
+ });
74
 
75
  result_map.extend(ddg_map_results);
76