alamin655 commited on
Commit
2790eef
2 Parent(s): 86991a2 867753a

Merge branch 'rolling' into feat-rate-limiter-for-websurfx

Browse files
Cargo.lock CHANGED
@@ -544,18 +544,18 @@ dependencies = [
544
 
545
  [[package]]
546
  name = "clap"
547
- version = "4.4.1"
548
  source = "registry+https://github.com/rust-lang/crates.io-index"
549
- checksum = "7c8d502cbaec4595d2e7d5f61e318f05417bd2b66fdc3809498f0d3fdf0bea27"
550
  dependencies = [
551
  "clap_builder",
552
  ]
553
 
554
  [[package]]
555
  name = "clap_builder"
556
- version = "4.4.1"
557
  source = "registry+https://github.com/rust-lang/crates.io-index"
558
- checksum = "5891c7bc0edb3e1c2204fc5e94009affabeb1821c9e5fdc3959536c5c0bb984d"
559
  dependencies = [
560
  "anstyle",
561
  "clap_lex",
@@ -2764,9 +2764,9 @@ dependencies = [
2764
 
2765
  [[package]]
2766
  name = "rustix"
2767
- version = "0.38.10"
2768
  source = "registry+https://github.com/rust-lang/crates.io-index"
2769
- checksum = "ed6248e1caa625eb708e266e06159f135e8c26f2bb7ceb72dc4b2766d0340964"
2770
  dependencies = [
2771
  "bitflags 2.4.0",
2772
  "errno",
 
544
 
545
  [[package]]
546
  name = "clap"
547
+ version = "4.4.2"
548
  source = "registry+https://github.com/rust-lang/crates.io-index"
549
+ checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
550
  dependencies = [
551
  "clap_builder",
552
  ]
553
 
554
  [[package]]
555
  name = "clap_builder"
556
+ version = "4.4.2"
557
  source = "registry+https://github.com/rust-lang/crates.io-index"
558
+ checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
559
  dependencies = [
560
  "anstyle",
561
  "clap_lex",
 
2764
 
2765
  [[package]]
2766
  name = "rustix"
2767
+ version = "0.38.11"
2768
  source = "registry+https://github.com/rust-lang/crates.io-index"
2769
+ checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453"
2770
  dependencies = [
2771
  "bitflags 2.4.0",
2772
  "errno",
Cargo.toml CHANGED
@@ -20,8 +20,8 @@ serde_json = {version="1.0.105"}
20
  fake-useragent = {version="0.1.3"}
21
  env_logger = {version="0.10.0"}
22
  log = {version="0.4.20"}
23
- rlua = {version="0.19.7"}
24
- redis = {version="0.23.3"}
25
  md5 = {version="0.7.0"}
26
  rand={version="0.8.5"}
27
  once_cell = {version="1.18.0"}
 
20
  fake-useragent = {version="0.1.3"}
21
  env_logger = {version="0.10.0"}
22
  log = {version="0.4.20"}
23
+ mlua = {version="0.8.10", features=["luajit"]}
24
+ redis = {version="0.23.3", features=["tokio-comp","connection-manager"]}
25
  md5 = {version="0.7.0"}
26
  rand={version="0.8.5"}
27
  once_cell = {version="1.18.0"}
README.md CHANGED
@@ -51,7 +51,7 @@
51
  - **Getting Started**
52
  - [🔭 Preview](#preview-)
53
  - [🚀 Features](#features-)
54
- - [🛠️ Installation and Testing](#installation-and-testing-)
55
  - [🔧 Configuration](#configuration-)
56
  - **Feature Overview**
57
  - [🎨 Theming](#theming-)
 
51
  - **Getting Started**
52
  - [🔭 Preview](#preview-)
53
  - [🚀 Features](#features-)
54
+ - [🛠️ Installation and Testing](#installation-and-testing-%EF%B8%8F)
55
  - [🔧 Configuration](#configuration-)
56
  - **Feature Overview**
57
  - [🎨 Theming](#theming-)
public/images/barricade.png ADDED
public/images/filter.png ADDED
public/static/themes/simple.css CHANGED
@@ -132,6 +132,35 @@ body {
132
  width: 1.2rem;
133
  height: 1.2rem;
134
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  /* styles for the footer and header */
137
 
 
132
  width: 1.2rem;
133
  height: 1.2rem;
134
  }
135
+ .results .result_disallowed,
136
+ .results .result_filtered {
137
+ display: flex;
138
+ justify-content: center;
139
+ align-items: center;
140
+ gap: 10rem;
141
+ font-size: 2rem;
142
+ color: var(--foreground-color);
143
+ margin: 0rem 7rem;
144
+ }
145
+
146
+ .results .result_disallowed .user_query,
147
+ .results .result_filtered .user_query {
148
+ color: var(--background-color);
149
+ font-weight: 300;
150
+ }
151
+
152
+ .results .result_disallowed img,
153
+ .results .result_filtered img {
154
+ width: 30rem;
155
+ }
156
+
157
+ .results .result_disallowed div,
158
+ .results .result_filtered div {
159
+ display: flex;
160
+ flex-direction: column;
161
+ gap: 1rem;
162
+ line-break: strict;
163
+ }
164
 
165
  /* styles for the footer and header */
166
 
public/templates/search.html CHANGED
@@ -1,37 +1,69 @@
1
  {{>header this.style}}
2
  <main class="results">
3
- {{>search_bar this}}
4
- <div class="results_aggregated">
5
- {{#if results}} {{#each results}}
6
- <div class="result">
7
- <h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
8
- <small>{{{this.url}}}</small>
9
- <p>{{{this.description}}}</p>
10
- <div class="upstream_engines">
11
- {{#each engine}}
12
- <span>{{{this}}}</span>
13
- {{/each}}
14
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  </div>
16
- {{/each}} {{else}}
17
- <div class="result_not_found">
18
- <p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
19
- <p class="suggestions">Suggestions:</p>
20
- <ul>
21
- <li>Make sure that all words are spelled correctly.</li>
22
- <li>Try different keywords.</li>
23
- <li>Try more general keywords.</li>
24
- </ul>
25
- <img src="./images/no_results.gif" alt="Man fishing gif" />
26
  </div>
27
- {{/if}}
28
- </div>
29
- <div class="page_navigation">
30
- <button type="button" onclick="navigate_backward()">
31
- &#8592; previous
32
- </button>
33
- <button type="button" onclick="navigate_forward()">next &#8594;</button>
34
- </div>
35
  </main>
36
  <script src="static/index.js"></script>
37
  <script src="static/pagination.js"></script>
 
1
  {{>header this.style}}
2
  <main class="results">
3
+ {{>search_bar this}}
4
+ <div class="results_aggregated">
5
+ {{#if results}} {{#each results}}
6
+ <div class="result">
7
+ <h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
8
+ <small>{{{this.url}}}</small>
9
+ <p>{{{this.description}}}</p>
10
+ <div class="upstream_engines">
11
+ {{#each engine}}
12
+ <span>{{{this}}}</span>
13
+ {{/each}}
14
+ </div>
15
+ </div>
16
+ {{/each}} {{else}} {{#if disallowed}}
17
+ <div class="result_disallowed">
18
+ <div class="description">
19
+ <p>
20
+ Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
21
+ has been disallowed.
22
+ </p>
23
+ <p class="description_paragraph">Dear user,</p>
24
+ <p class="description_paragraph">
25
+ The query - <span class="user_query">{{{this.pageQuery}}}</span> - has
26
+ been blacklisted via server configuration and hence disallowed by the
27
+ server. Henceforth no results could be displayed for your query.
28
+ </p>
29
+ </div>
30
+ <img src="./images/barricade.png" alt="Image of a Barricade" />
31
+ </div>
32
+ {{else}} {{#if filtered}}
33
+ <div class="result_filtered">
34
+ <div class="description">
35
+ <p>
36
+ Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
37
+ has been filtered.
38
+ </p>
39
+ <p class="description_paragraph">Dear user,</p>
40
+ <p class="description_paragraph">
41
+ All the search results contain results that has been configured to be
42
+ filtered out via server configuration and henceforth has been
43
+ completely filtered out.
44
+ </p>
45
+ </div>
46
+ <img src="./images/filter.png" alt="Image of a paper inside a funnel" />
47
+ </div>
48
+ {{else}}
49
+ <div class="result_not_found">
50
+ <p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
51
+ <p class="suggestions">Suggestions:</p>
52
+ <ul>
53
+ <li>Make sure that all words are spelled correctly.</li>
54
+ <li>Try different keywords.</li>
55
+ <li>Try more general keywords.</li>
56
+ </ul>
57
+ <img src="./images/no_results.gif" alt="Man fishing gif" />
58
+ </div>
59
+ {{/if}} {{/if}} {{/if}}
60
  </div>
61
+ <div class="page_navigation">
62
+ <button type="button" onclick="navigate_backward()">
63
+ &#8592; previous
64
+ </button>
65
+ <button type="button" onclick="navigate_forward()">next &#8594;</button>
 
 
 
 
 
66
  </div>
 
 
 
 
 
 
 
 
67
  </main>
68
  <script src="static/index.js"></script>
69
  <script src="static/pagination.js"></script>
src/engines/duckduckgo.rs CHANGED
@@ -43,6 +43,7 @@ impl SearchEngine for DuckDuckGo {
43
  page: u32,
44
  user_agent: &str,
45
  request_timeout: u8,
 
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
48
  // so that upstream server recieves valid page number.
 
43
  page: u32,
44
  user_agent: &str,
45
  request_timeout: u8,
46
+ _safe_search: u8,
47
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
48
  // Page number can be missing or empty string and so appropriate handling is required
49
  // so that upstream server recieves valid page number.
src/engines/engine_models.rs CHANGED
@@ -71,6 +71,7 @@ pub trait SearchEngine: Sync + Send {
71
  page: u32,
72
  user_agent: &str,
73
  request_timeout: u8,
 
74
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
75
  }
76
 
 
71
  page: u32,
72
  user_agent: &str,
73
  request_timeout: u8,
74
+ safe_search: u8,
75
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
76
  }
77
 
src/engines/searx.rs CHANGED
@@ -42,12 +42,21 @@ impl SearchEngine for Searx {
42
  page: u32,
43
  user_agent: &str,
44
  request_timeout: u8,
 
45
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
46
  // Page number can be missing or empty string and so appropriate handling is required
47
  // so that upstream server recieves valid page number.
 
 
 
 
48
  let url: String = match page {
49
- 0 | 1 => format!("https://searx.work/search?q={query}&pageno=1"),
50
- _ => format!("https://searx.work/search?q={query}&pageno={page}"),
 
 
 
 
51
  };
52
 
53
  // initializing headers and adding appropriate headers.
 
42
  page: u32,
43
  user_agent: &str,
44
  request_timeout: u8,
45
+ mut safe_search: u8,
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
48
  // so that upstream server recieves valid page number.
49
+ if safe_search == 3 {
50
+ safe_search = 2;
51
+ };
52
+
53
  let url: String = match page {
54
+ 0 | 1 => {
55
+ format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}")
56
+ }
57
+ _ => format!(
58
+ "https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}"
59
+ ),
60
  };
61
 
62
  // initializing headers and adding appropriate headers.
src/results/aggregation_models.rs CHANGED
@@ -102,13 +102,15 @@ impl EngineErrorInfo {
102
  /// and the type of error that caused it.
103
  /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
104
  /// given search query.
105
- #[derive(Serialize, Deserialize)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
- pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
 
 
112
  }
113
 
114
  impl SearchResults {
@@ -122,6 +124,7 @@ impl SearchResults {
122
  /// the search url.
123
  /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
124
  /// given search query.
 
125
  pub fn new(
126
  results: Vec<SearchResult>,
127
  page_query: &str,
@@ -131,12 +134,38 @@ impl SearchResults {
131
  results,
132
  page_query: page_query.to_owned(),
133
  style: Style::default(),
134
- engine_errors_info: SmallVec::from(engine_errors_info),
 
 
135
  }
136
  }
137
 
138
  /// A setter function to add website style to the return search results.
139
  pub fn add_style(&mut self, style: &Style) {
140
- self.style = style.to_owned();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  }
142
  }
 
102
  /// and the type of error that caused it.
103
  /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
104
  /// given search query.
105
+ #[derive(Serialize, Deserialize, Default)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
+ pub engine_errors_info: Vec<EngineErrorInfo>,
112
+ pub disallowed: bool,
113
+ pub filtered: bool,
114
  }
115
 
116
  impl SearchResults {
 
124
  /// the search url.
125
  /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
126
  /// given search query.
127
+ /// * ``
128
  pub fn new(
129
  results: Vec<SearchResult>,
130
  page_query: &str,
 
134
  results,
135
  page_query: page_query.to_owned(),
136
  style: Style::default(),
137
+ engine_errors_info: engine_errors_info.to_owned(),
138
+ disallowed: Default::default(),
139
+ filtered: Default::default(),
140
  }
141
  }
142
 
143
  /// A setter function to add website style to the return search results.
144
  pub fn add_style(&mut self, style: &Style) {
145
+ self.style = style.clone();
146
+ }
147
+
148
+ /// A setter function that sets disallowed to true.
149
+ pub fn set_disallowed(&mut self) {
150
+ self.disallowed = true;
151
+ }
152
+
153
+ /// A setter function to set the current page search query.
154
+ pub fn set_page_query(&mut self, page: &str) {
155
+ self.page_query = page.to_owned();
156
+ }
157
+
158
+ /// A setter function that sets the filtered to true.
159
+ pub fn set_filtered(&mut self) {
160
+ self.filtered = true;
161
+ }
162
+
163
+ /// A getter function that gets the value of `engine_errors_info`.
164
+ pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
165
+ std::mem::take(&mut self.engine_errors_info)
166
+ }
167
+ /// A getter function that gets the value of `results`.
168
+ pub fn results(&mut self) -> Vec<SearchResult> {
169
+ self.results.clone()
170
  }
171
  }
src/results/aggregator.rs CHANGED
@@ -70,6 +70,7 @@ pub async fn aggregate(
70
  debug: bool,
71
  upstream_search_engines: &[EngineHandler],
72
  request_timeout: u8,
 
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
  let user_agent: &str = random_user_agent();
75
 
@@ -91,7 +92,13 @@ pub async fn aggregate(
91
  let query: String = query.to_owned();
92
  tasks.push(tokio::spawn(async move {
93
  search_engine
94
- .results(&query, page, user_agent, request_timeout)
 
 
 
 
 
 
95
  .await
96
  }));
97
  }
@@ -150,20 +157,22 @@ pub async fn aggregate(
150
  }
151
  }
152
 
153
- let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
154
- filter_with_lists(
155
- &mut result_map,
156
- &mut blacklist_map,
157
- file_path(FileType::BlockList)?,
158
- )?;
 
159
 
160
- filter_with_lists(
161
- &mut blacklist_map,
162
- &mut result_map,
163
- file_path(FileType::AllowList)?,
164
- )?;
165
 
166
- drop(blacklist_map);
 
167
 
168
  let results: Vec<SearchResult> = result_map.into_values().collect();
169
 
@@ -189,7 +198,7 @@ pub fn filter_with_lists(
189
  let mut reader = BufReader::new(File::open(file_path)?);
190
 
191
  for line in reader.by_ref().lines() {
192
- let re = Regex::new(&line?)?;
193
 
194
  // Iterate over each search result in the map and check if it matches the regex pattern
195
  for (url, search_result) in map_to_be_filtered.clone().into_iter() {
 
70
  debug: bool,
71
  upstream_search_engines: &[EngineHandler],
72
  request_timeout: u8,
73
+ safe_search: u8,
74
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
75
  let user_agent: &str = random_user_agent();
76
 
 
92
  let query: String = query.to_owned();
93
  tasks.push(tokio::spawn(async move {
94
  search_engine
95
+ .results(
96
+ &query,
97
+ page,
98
+ user_agent.clone(),
99
+ request_timeout,
100
+ safe_search,
101
+ )
102
  .await
103
  }));
104
  }
 
157
  }
158
  }
159
 
160
+ if safe_search >= 3 {
161
+ let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
162
+ filter_with_lists(
163
+ &mut result_map,
164
+ &mut blacklist_map,
165
+ file_path(FileType::BlockList)?,
166
+ )?;
167
 
168
+ filter_with_lists(
169
+ &mut blacklist_map,
170
+ &mut result_map,
171
+ file_path(FileType::AllowList)?,
172
+ )?;
173
 
174
+ drop(blacklist_map);
175
+ }
176
 
177
  let results: Vec<SearchResult> = result_map.into_values().collect();
178
 
 
198
  let mut reader = BufReader::new(File::open(file_path)?);
199
 
200
  for line in reader.by_ref().lines() {
201
+ let re = Regex::new(line?.trim())?;
202
 
203
  // Iterate over each search result in the map and check if it matches the regex pattern
204
  for (url, search_result) in map_to_be_filtered.clone().into_iter() {
src/server/routes.rs CHANGED
@@ -2,7 +2,10 @@
2
  //! meta search engine website and provide appropriate response to each route/page
3
  //! when requested.
4
 
5
- use std::fs::read_to_string;
 
 
 
6
 
7
  use crate::{
8
  cache::cacher::RedisCache,
@@ -13,12 +16,13 @@ use crate::{
13
  };
14
  use actix_web::{get, web, HttpRequest, HttpResponse};
15
  use handlebars::Handlebars;
 
16
  use serde::Deserialize;
17
  use tokio::join;
18
 
19
  // ---- Constants ----
20
  /// Initialize redis cache connection once and store it on the heap.
21
- const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
22
 
23
  /// A named struct which deserializes all the user provided search parameters and stores them.
24
  ///
@@ -32,6 +36,7 @@ const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Once
32
  struct SearchParams {
33
  q: Option<String>,
34
  page: Option<u32>,
 
35
  }
36
 
37
  /// Handles the route of index page or main page of the `websurfx` meta search engine website.
@@ -105,42 +110,58 @@ pub async fn search(
105
  None => 1,
106
  };
107
 
 
 
 
 
 
 
 
 
 
 
 
108
  let (_, results, _) = join!(
109
  results(
110
  format!(
111
- "http://{}:{}/search?q={}&page={}",
112
  config.binding_ip,
113
  config.port,
114
  query,
115
- page - 1
 
116
  ),
117
  &config,
118
  query,
119
  page - 1,
120
- &req,
 
121
  ),
122
  results(
123
  format!(
124
- "http://{}:{}/search?q={}&page={}",
125
- config.binding_ip, config.port, query, page
126
  ),
127
  &config,
128
  query,
129
  page,
130
- &req,
 
131
  ),
132
  results(
133
  format!(
134
- "http://{}:{}/search?q={}&page={}",
135
  config.binding_ip,
136
  config.port,
137
  query,
138
- page + 1
 
139
  ),
140
  &config,
141
  query,
142
  page + 1,
143
- &req,
 
144
  )
145
  );
146
 
@@ -160,9 +181,10 @@ async fn results(
160
  config: &Config,
161
  query: &str,
162
  page: u32,
163
- req: &HttpRequest,
 
164
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
165
- let redis_cache: RedisCache = REDIS_CACHE
166
  .get_or_init(async {
167
  // Initialize redis cache connection pool only one and store it in the heap.
168
  RedisCache::new(&config.redis_url, 5).await.unwrap()
@@ -178,6 +200,23 @@ async fn results(
178
  match cached_results_json {
179
  Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
180
  Err(_) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  // check if the cookie value is empty or not if it is empty then use the
182
  // default selected upstream search engines from the config file otherwise
183
  // parse the non-empty cookie and grab the user selected engines from the
@@ -199,6 +238,7 @@ async fn results(
199
  config.debug,
200
  &engines,
201
  config.request_timeout,
 
202
  )
203
  .await?
204
  }
@@ -210,14 +250,16 @@ async fn results(
210
  config.debug,
211
  &config.upstream_search_engines,
212
  config.request_timeout,
 
213
  )
214
  .await?
215
  }
216
  };
217
-
 
 
218
  results.add_style(&config.style);
219
  redis_cache
220
- .clone()
221
  .cache_results(&serde_json::to_string(&results)?, &url)
222
  .await?;
223
  Ok(results)
@@ -225,6 +267,22 @@ async fn results(
225
  }
226
  }
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
229
  #[get("/robots.txt")]
230
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
 
2
  //! meta search engine website and provide appropriate response to each route/page
3
  //! when requested.
4
 
5
+ use std::{
6
+ fs::{read_to_string, File},
7
+ io::{BufRead, BufReader, Read},
8
+ };
9
 
10
  use crate::{
11
  cache::cacher::RedisCache,
 
16
  };
17
  use actix_web::{get, web, HttpRequest, HttpResponse};
18
  use handlebars::Handlebars;
19
+ use regex::Regex;
20
  use serde::Deserialize;
21
  use tokio::join;
22
 
23
  // ---- Constants ----
24
  /// Initialize redis cache connection once and store it on the heap.
25
+ static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
26
 
27
  /// A named struct which deserializes all the user provided search parameters and stores them.
28
  ///
 
36
  struct SearchParams {
37
  q: Option<String>,
38
  page: Option<u32>,
39
+ safesearch: Option<u8>,
40
  }
41
 
42
  /// Handles the route of index page or main page of the `websurfx` meta search engine website.
 
110
  None => 1,
111
  };
112
 
113
+ let safe_search: u8 = match config.safe_search {
114
+ 3..=4 => config.safe_search,
115
+ _ => match &params.safesearch {
116
+ Some(safesearch) => match safesearch {
117
+ 0..=2 => *safesearch,
118
+ _ => 1,
119
+ },
120
+ None => config.safe_search,
121
+ },
122
+ };
123
+
124
  let (_, results, _) = join!(
125
  results(
126
  format!(
127
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
128
  config.binding_ip,
129
  config.port,
130
  query,
131
+ page - 1,
132
+ safe_search
133
  ),
134
  &config,
135
  query,
136
  page - 1,
137
+ req.clone(),
138
+ safe_search
139
  ),
140
  results(
141
  format!(
142
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
143
+ config.binding_ip, config.port, query, page, safe_search
144
  ),
145
  &config,
146
  query,
147
  page,
148
+ req.clone(),
149
+ safe_search
150
  ),
151
  results(
152
  format!(
153
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
154
  config.binding_ip,
155
  config.port,
156
  query,
157
+ page + 1,
158
+ safe_search
159
  ),
160
  &config,
161
  query,
162
  page + 1,
163
+ req.clone(),
164
+ safe_search
165
  )
166
  );
167
 
 
181
  config: &Config,
182
  query: &str,
183
  page: u32,
184
+ req: HttpRequest,
185
+ safe_search: u8,
186
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
187
+ let mut redis_cache: RedisCache = REDIS_CACHE
188
  .get_or_init(async {
189
  // Initialize redis cache connection pool only one and store it in the heap.
190
  RedisCache::new(&config.redis_url, 5).await.unwrap()
 
200
  match cached_results_json {
201
  Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
202
  Err(_) => {
203
+ if safe_search == 4 {
204
+ let mut results: SearchResults = SearchResults::default();
205
+ let mut _flag: bool =
206
+ is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
207
+ _flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
208
+
209
+ if _flag {
210
+ results.set_disallowed();
211
+ results.add_style(&config.style);
212
+ results.set_page_query(query);
213
+ redis_cache
214
+ .cache_results(&serde_json::to_string(&results)?, &url)
215
+ .await?;
216
+ return Ok(results);
217
+ }
218
+ }
219
+
220
  // check if the cookie value is empty or not if it is empty then use the
221
  // default selected upstream search engines from the config file otherwise
222
  // parse the non-empty cookie and grab the user selected engines from the
 
238
  config.debug,
239
  &engines,
240
  config.request_timeout,
241
+ safe_search,
242
  )
243
  .await?
244
  }
 
250
  config.debug,
251
  &config.upstream_search_engines,
252
  config.request_timeout,
253
+ safe_search,
254
  )
255
  .await?
256
  }
257
  };
258
+ if results.engine_errors_info().is_empty() && results.results().is_empty() {
259
+ results.set_filtered();
260
+ }
261
  results.add_style(&config.style);
262
  redis_cache
 
263
  .cache_results(&serde_json::to_string(&results)?, &url)
264
  .await?;
265
  Ok(results)
 
267
  }
268
  }
269
 
270
+ fn is_match_from_filter_list(
271
+ file_path: &str,
272
+ query: &str,
273
+ ) -> Result<bool, Box<dyn std::error::Error>> {
274
+ let mut flag = false;
275
+ let mut reader = BufReader::new(File::open(file_path)?);
276
+ for line in reader.by_ref().lines() {
277
+ let re = Regex::new(&line?)?;
278
+ if re.is_match(query) {
279
+ flag = true;
280
+ break;
281
+ }
282
+ }
283
+ Ok(flag)
284
+ }
285
+
286
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
287
  #[get("/robots.txt")]
288
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
websurfx/config.lua CHANGED
@@ -15,6 +15,17 @@ rate_limiter = {
15
  time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
16
  }
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  -- ### Website ###
19
  -- The different colorschemes provided are:
20
  -- {{
 
15
  time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
16
  }
17
 
18
+ -- ### Search ###
19
+ -- Filter results based on different levels. The levels provided are:
20
+ -- {{
21
+ -- 0 - None
22
+ -- 1 - Low
23
+ -- 2 - Moderate
24
+ -- 3 - High
25
+ -- 4 - Aggressive
26
+ -- }}
27
+ safe_search = 2
28
+
29
  -- ### Website ###
30
  -- The different colorschemes provided are:
31
  -- {{