neon_arch commited on
Commit
05c3e6c
1 Parent(s): 410257c

✨ feat: implement new search parameter `safe_search` & code to disallow

Browse files
Files changed (1) hide show
  1. src/server/routes.rs +68 -12
src/server/routes.rs CHANGED
@@ -2,7 +2,10 @@
2
  //! meta search engine website and provide appropriate response to each route/page
3
  //! when requested.
4
 
5
- use std::fs::read_to_string;
 
 
 
6
 
7
  use crate::{
8
  cache::cacher::RedisCache,
@@ -13,6 +16,7 @@ use crate::{
13
  };
14
  use actix_web::{get, web, HttpRequest, HttpResponse};
15
  use handlebars::Handlebars;
 
16
  use serde::Deserialize;
17
  use tokio::join;
18
 
@@ -28,6 +32,7 @@ use tokio::join;
28
  struct SearchParams {
29
  q: Option<String>,
30
  page: Option<u32>,
 
31
  }
32
 
33
  /// Handles the route of index page or main page of the `websurfx` meta search engine website.
@@ -101,42 +106,58 @@ pub async fn search(
101
  None => 1,
102
  };
103
 
 
 
 
 
 
 
 
 
 
 
 
104
  let (_, results, _) = join!(
105
  results(
106
  format!(
107
- "http://{}:{}/search?q={}&page={}",
108
  config.binding_ip,
109
  config.port,
110
  query,
111
- page - 1
 
112
  ),
113
  &config,
114
  query.to_string(),
115
  page - 1,
116
  req.clone(),
 
117
  ),
118
  results(
119
  format!(
120
- "http://{}:{}/search?q={}&page={}",
121
- config.binding_ip, config.port, query, page
122
  ),
123
  &config,
124
  query.to_string(),
125
  page,
126
  req.clone(),
 
127
  ),
128
  results(
129
  format!(
130
- "http://{}:{}/search?q={}&page={}",
131
  config.binding_ip,
132
  config.port,
133
  query,
134
- page + 1
 
135
  ),
136
  &config,
137
  query.to_string(),
138
  page + 1,
139
  req.clone(),
 
140
  )
141
  );
142
 
@@ -157,6 +178,7 @@ async fn results(
157
  query: String,
158
  page: u32,
159
  req: HttpRequest,
 
160
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
161
  //Initialize redis cache connection struct
162
  let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
@@ -165,15 +187,28 @@ async fn results(
165
  // check if fetched cache results was indeed fetched or it was an error and if so
166
  // handle the data accordingly.
167
  match cached_results_json {
168
- Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results).unwrap()),
169
  Err(_) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  // check if the cookie value is empty or not if it is empty then use the
171
  // default selected upstream search engines from the config file otherwise
172
  // parse the non-empty cookie and grab the user selected engines from the
173
  // UI and use that.
174
- let mut results: crate::results::aggregation_models::SearchResults = match req
175
- .cookie("appCookie")
176
- {
177
  Some(cookie_value) => {
178
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
179
 
@@ -190,6 +225,7 @@ async fn results(
190
  config.debug,
191
  engines,
192
  config.request_timeout,
 
193
  )
194
  .await?
195
  }
@@ -201,17 +237,37 @@ async fn results(
201
  config.debug,
202
  config.upstream_search_engines.clone(),
203
  config.request_timeout,
 
204
  )
205
  .await?
206
  }
207
  };
208
- results.add_style(config.style.clone());
 
 
 
209
  redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
210
  Ok(results)
211
  }
212
  }
213
  }
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
216
  #[get("/robots.txt")]
217
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
 
2
  //! meta search engine website and provide appropriate response to each route/page
3
  //! when requested.
4
 
5
+ use std::{
6
+ fs::{read_to_string, File},
7
+ io::{BufRead, BufReader, Read},
8
+ };
9
 
10
  use crate::{
11
  cache::cacher::RedisCache,
 
16
  };
17
  use actix_web::{get, web, HttpRequest, HttpResponse};
18
  use handlebars::Handlebars;
19
+ use regex::Regex;
20
  use serde::Deserialize;
21
  use tokio::join;
22
 
 
32
  struct SearchParams {
33
  q: Option<String>,
34
  page: Option<u32>,
35
+ safesearch: Option<u8>,
36
  }
37
 
38
  /// Handles the route of index page or main page of the `websurfx` meta search engine website.
 
106
  None => 1,
107
  };
108
 
109
+ let safe_search: u8 = match config.safe_search {
110
+ 3..=4 => config.safe_search,
111
+ _ => match &params.safesearch {
112
+ Some(safesearch) => match safesearch {
113
+ 0..=2 => *safesearch,
114
+ _ => 1,
115
+ },
116
+ None => config.safe_search,
117
+ },
118
+ };
119
+
120
  let (_, results, _) = join!(
121
  results(
122
  format!(
123
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
124
  config.binding_ip,
125
  config.port,
126
  query,
127
+ page - 1,
128
+ safe_search
129
  ),
130
  &config,
131
  query.to_string(),
132
  page - 1,
133
  req.clone(),
134
+ safe_search
135
  ),
136
  results(
137
  format!(
138
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
139
+ config.binding_ip, config.port, query, page, safe_search
140
  ),
141
  &config,
142
  query.to_string(),
143
  page,
144
  req.clone(),
145
+ safe_search
146
  ),
147
  results(
148
  format!(
149
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
150
  config.binding_ip,
151
  config.port,
152
  query,
153
+ page + 1,
154
+ safe_search
155
  ),
156
  &config,
157
  query.to_string(),
158
  page + 1,
159
  req.clone(),
160
+ safe_search
161
  )
162
  );
163
 
 
178
  query: String,
179
  page: u32,
180
  req: HttpRequest,
181
+ safe_search: u8,
182
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
183
  //Initialize redis cache connection struct
184
  let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
 
187
  // check if fetched cache results was indeed fetched or it was an error and if so
188
  // handle the data accordingly.
189
  match cached_results_json {
190
+ Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
191
  Err(_) => {
192
+ if safe_search == 4 {
193
+ let mut results: SearchResults = SearchResults::default();
194
+ let mut _flag: bool =
195
+ is_match_from_filter_list(&file_path(FileType::BlockList)?, &query)?;
196
+ _flag = !is_match_from_filter_list(&file_path(FileType::AllowList)?, &query)?;
197
+
198
+ if _flag {
199
+ results.set_disallowed();
200
+ results.add_style(&config.style);
201
+ results.set_page_query(&query);
202
+ redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
203
+ return Ok(results);
204
+ }
205
+ }
206
+
207
  // check if the cookie value is empty or not if it is empty then use the
208
  // default selected upstream search engines from the config file otherwise
209
  // parse the non-empty cookie and grab the user selected engines from the
210
  // UI and use that.
211
+ let mut results: SearchResults = match req.cookie("appCookie") {
 
 
212
  Some(cookie_value) => {
213
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
214
 
 
225
  config.debug,
226
  engines,
227
  config.request_timeout,
228
+ safe_search,
229
  )
230
  .await?
231
  }
 
237
  config.debug,
238
  config.upstream_search_engines.clone(),
239
  config.request_timeout,
240
+ safe_search,
241
  )
242
  .await?
243
  }
244
  };
245
+ if results.engine_errors_info().is_empty() && results.results().is_empty() {
246
+ results.set_filtered();
247
+ }
248
+ results.add_style(&config.style);
249
  redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
250
  Ok(results)
251
  }
252
  }
253
  }
254
 
255
+ fn is_match_from_filter_list(
256
+ file_path: &str,
257
+ query: &str,
258
+ ) -> Result<bool, Box<dyn std::error::Error>> {
259
+ let mut flag = false;
260
+ let mut reader = BufReader::new(File::open(file_path)?);
261
+ for line in reader.by_ref().lines() {
262
+ let re = Regex::new(&line?)?;
263
+ if re.is_match(query) {
264
+ flag = true;
265
+ break;
266
+ }
267
+ }
268
+ Ok(flag)
269
+ }
270
+
271
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
272
  #[get("/robots.txt")]
273
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {