neon_arch commited on
Commit
7a8bf02
2 Parent(s): 7f371bf 64948b8

Merge pull request #191 from xffxff/unit_tests

Browse files

🧹 Unit tests for the function `filter_with_lists`

Files changed (3) hide show
  1. Cargo.lock +2 -1
  2. Cargo.toml +2 -1
  3. src/results/aggregator.rs +160 -1
Cargo.lock CHANGED
@@ -3543,7 +3543,7 @@ dependencies = [
3543
 
3544
  [[package]]
3545
  name = "websurfx"
3546
- version = "0.18.0"
3547
  dependencies = [
3548
  "actix-cors",
3549
  "actix-files",
@@ -3566,6 +3566,7 @@ dependencies = [
3566
  "scraper",
3567
  "serde",
3568
  "serde_json",
 
3569
  "tokio 1.32.0",
3570
  ]
3571
 
 
3543
 
3544
  [[package]]
3545
  name = "websurfx"
3546
+ version = "0.18.1"
3547
  dependencies = [
3548
  "actix-cors",
3549
  "actix-files",
 
3566
  "scraper",
3567
  "serde",
3568
  "serde_json",
3569
+ "tempfile",
3570
  "tokio 1.32.0",
3571
  ]
3572
 
Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
  [package]
2
  name = "websurfx"
3
- version = "0.18.0"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
@@ -31,6 +31,7 @@ regex = {version="1.9.3", features=["perf"]}
31
  [dev-dependencies]
32
  rusty-hook = "^0.11.2"
33
  criterion = "0.5.1"
 
34
 
35
  [profile.dev]
36
  opt-level = 0
 
1
  [package]
2
  name = "websurfx"
3
+ version = "0.18.1"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
 
31
  [dev-dependencies]
32
  rusty-hook = "^0.11.2"
33
  criterion = "0.5.1"
34
+ tempfile = "3.8.0"
35
 
36
  [profile.dev]
37
  opt-level = 0
src/results/aggregator.rs CHANGED
@@ -175,22 +175,181 @@ pub async fn aggregate(
175
  ))
176
  }
177
 
178
- fn filter_with_lists(
 
 
 
 
 
 
 
 
 
 
 
179
  map_to_be_filtered: &mut HashMap<String, SearchResult>,
180
  resultant_map: &mut HashMap<String, SearchResult>,
181
  file_path: &str,
182
  ) -> Result<(), Box<dyn std::error::Error>> {
183
  let mut reader = BufReader::new(File::open(file_path)?);
 
184
  for line in reader.by_ref().lines() {
185
  let re = Regex::new(&line?)?;
 
 
186
  for (url, search_result) in map_to_be_filtered.clone().into_iter() {
187
  if re.is_match(&url.to_lowercase())
188
  || re.is_match(&search_result.title.to_lowercase())
189
  || re.is_match(&search_result.description.to_lowercase())
190
  {
 
191
  resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
192
  }
193
  }
194
  }
 
195
  Ok(())
196
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  ))
176
  }
177
 
178
+ /// Filters a map of search results using a list of regex patterns.
179
+ ///
180
+ /// # Arguments
181
+ ///
182
+ /// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
183
+ /// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
184
+ /// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
185
+ ///
186
+ /// # Errors
187
+ ///
188
+ /// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
189
+ pub fn filter_with_lists(
190
  map_to_be_filtered: &mut HashMap<String, SearchResult>,
191
  resultant_map: &mut HashMap<String, SearchResult>,
192
  file_path: &str,
193
  ) -> Result<(), Box<dyn std::error::Error>> {
194
  let mut reader = BufReader::new(File::open(file_path)?);
195
+
196
  for line in reader.by_ref().lines() {
197
  let re = Regex::new(&line?)?;
198
+
199
+ // Iterate over each search result in the map and check if it matches the regex pattern
200
  for (url, search_result) in map_to_be_filtered.clone().into_iter() {
201
  if re.is_match(&url.to_lowercase())
202
  || re.is_match(&search_result.title.to_lowercase())
203
  || re.is_match(&search_result.description.to_lowercase())
204
  {
205
+ // If the search result matches the regex pattern, move it from the original map to the resultant map
206
  resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
207
  }
208
  }
209
  }
210
+
211
  Ok(())
212
  }
213
+
214
+ #[cfg(test)]
215
+ mod tests {
216
+ use super::*;
217
+ use std::collections::HashMap;
218
+ use std::io::Write;
219
+ use tempfile::NamedTempFile;
220
+
221
+ #[test]
222
+ fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
223
+ // Create a map of search results to filter
224
+ let mut map_to_be_filtered = HashMap::new();
225
+ map_to_be_filtered.insert(
226
+ "https://www.example.com".to_string(),
227
+ SearchResult {
228
+ title: "Example Domain".to_string(),
229
+ url: "https://www.example.com".to_string(),
230
+ description: "This domain is for use in illustrative examples in documents."
231
+ .to_string(),
232
+ engine: vec!["Google".to_string(), "Bing".to_string()],
233
+ },
234
+ );
235
+ map_to_be_filtered.insert(
236
+ "https://www.rust-lang.org/".to_string(),
237
+ SearchResult {
238
+ title: "Rust Programming Language".to_string(),
239
+ url: "https://www.rust-lang.org/".to_string(),
240
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
241
+ engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
242
+ },
243
+ );
244
+
245
+ // Create a temporary file with regex patterns
246
+ let mut file = NamedTempFile::new()?;
247
+ writeln!(file, "example")?;
248
+ writeln!(file, "rust")?;
249
+ file.flush()?;
250
+
251
+ let mut resultant_map = HashMap::new();
252
+ filter_with_lists(
253
+ &mut map_to_be_filtered,
254
+ &mut resultant_map,
255
+ file.path().to_str().unwrap(),
256
+ )?;
257
+
258
+ assert_eq!(resultant_map.len(), 2);
259
+ assert!(resultant_map.contains_key("https://www.example.com"));
260
+ assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
261
+ assert_eq!(map_to_be_filtered.len(), 0);
262
+
263
+ Ok(())
264
+ }
265
+
266
+ #[test]
267
+ fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
268
+ let mut map_to_be_filtered = HashMap::new();
269
+ map_to_be_filtered.insert(
270
+ "https://www.example.com".to_string(),
271
+ SearchResult {
272
+ title: "Example Domain".to_string(),
273
+ url: "https://www.example.com".to_string(),
274
+ description: "This domain is for use in illustrative examples in documents."
275
+ .to_string(),
276
+ engine: vec!["Google".to_string(), "Bing".to_string()],
277
+ },
278
+ );
279
+ map_to_be_filtered.insert(
280
+ "https://www.rust-lang.org/".to_string(),
281
+ SearchResult {
282
+ title: "Rust Programming Language".to_string(),
283
+ url: "https://www.rust-lang.org/".to_string(),
284
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
285
+ engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
286
+ },
287
+ );
288
+
289
+ // Create a temporary file with a regex pattern containing a wildcard
290
+ let mut file = NamedTempFile::new()?;
291
+ writeln!(file, "ex.*le")?;
292
+ file.flush()?;
293
+
294
+ let mut resultant_map = HashMap::new();
295
+
296
+ filter_with_lists(
297
+ &mut map_to_be_filtered,
298
+ &mut resultant_map,
299
+ file.path().to_str().unwrap(),
300
+ )?;
301
+
302
+ assert_eq!(resultant_map.len(), 1);
303
+ assert!(resultant_map.contains_key("https://www.example.com"));
304
+ assert_eq!(map_to_be_filtered.len(), 1);
305
+ assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
306
+
307
+ Ok(())
308
+ }
309
+
310
+ #[test]
311
+ fn test_filter_with_lists_file_not_found() {
312
+ let mut map_to_be_filtered = HashMap::new();
313
+
314
+ let mut resultant_map = HashMap::new();
315
+
316
+ // Call the `filter_with_lists` function with a non-existent file path
317
+ let result = filter_with_lists(
318
+ &mut map_to_be_filtered,
319
+ &mut resultant_map,
320
+ "non-existent-file.txt",
321
+ );
322
+
323
+ assert!(result.is_err());
324
+ }
325
+
326
+ #[test]
327
+ fn test_filter_with_lists_invalid_regex() {
328
+ let mut map_to_be_filtered = HashMap::new();
329
+ map_to_be_filtered.insert(
330
+ "https://www.example.com".to_string(),
331
+ SearchResult {
332
+ title: "Example Domain".to_string(),
333
+ url: "https://www.example.com".to_string(),
334
+ description: "This domain is for use in illustrative examples in documents."
335
+ .to_string(),
336
+ engine: vec!["Google".to_string(), "Bing".to_string()],
337
+ },
338
+ );
339
+
340
+ let mut resultant_map = HashMap::new();
341
+
342
+ // Create a temporary file with an invalid regex pattern
343
+ let mut file = NamedTempFile::new().unwrap();
344
+ writeln!(file, "example(").unwrap();
345
+ file.flush().unwrap();
346
+
347
+ let result = filter_with_lists(
348
+ &mut map_to_be_filtered,
349
+ &mut resultant_map,
350
+ file.path().to_str().unwrap(),
351
+ );
352
+
353
+ assert!(result.is_err());
354
+ }
355
+ }