Merge pull request #191 from xffxff/unit_tests
Browse files🧹 Unit tests for the function `filter_with_lists`
- Cargo.lock +2 -1
- Cargo.toml +2 -1
- src/results/aggregator.rs +160 -1
Cargo.lock
CHANGED
@@ -3543,7 +3543,7 @@ dependencies = [
|
|
3543 |
|
3544 |
[[package]]
|
3545 |
name = "websurfx"
|
3546 |
-
version = "0.18.
|
3547 |
dependencies = [
|
3548 |
"actix-cors",
|
3549 |
"actix-files",
|
@@ -3566,6 +3566,7 @@ dependencies = [
|
|
3566 |
"scraper",
|
3567 |
"serde",
|
3568 |
"serde_json",
|
|
|
3569 |
"tokio 1.32.0",
|
3570 |
]
|
3571 |
|
|
|
3543 |
|
3544 |
[[package]]
|
3545 |
name = "websurfx"
|
3546 |
+
version = "0.18.1"
|
3547 |
dependencies = [
|
3548 |
"actix-cors",
|
3549 |
"actix-files",
|
|
|
3566 |
"scraper",
|
3567 |
"serde",
|
3568 |
"serde_json",
|
3569 |
+
"tempfile",
|
3570 |
"tokio 1.32.0",
|
3571 |
]
|
3572 |
|
Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
-
version = "0.18.
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
@@ -31,6 +31,7 @@ regex = {version="1.9.3", features=["perf"]}
|
|
31 |
[dev-dependencies]
|
32 |
rusty-hook = "^0.11.2"
|
33 |
criterion = "0.5.1"
|
|
|
34 |
|
35 |
[profile.dev]
|
36 |
opt-level = 0
|
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
+
version = "0.18.1"
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
|
|
31 |
[dev-dependencies]
|
32 |
rusty-hook = "^0.11.2"
|
33 |
criterion = "0.5.1"
|
34 |
+
tempfile = "3.8.0"
|
35 |
|
36 |
[profile.dev]
|
37 |
opt-level = 0
|
src/results/aggregator.rs
CHANGED
@@ -175,22 +175,181 @@ pub async fn aggregate(
|
|
175 |
))
|
176 |
}
|
177 |
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
180 |
resultant_map: &mut HashMap<String, SearchResult>,
|
181 |
file_path: &str,
|
182 |
) -> Result<(), Box<dyn std::error::Error>> {
|
183 |
let mut reader = BufReader::new(File::open(file_path)?);
|
|
|
184 |
for line in reader.by_ref().lines() {
|
185 |
let re = Regex::new(&line?)?;
|
|
|
|
|
186 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
187 |
if re.is_match(&url.to_lowercase())
|
188 |
|| re.is_match(&search_result.title.to_lowercase())
|
189 |
|| re.is_match(&search_result.description.to_lowercase())
|
190 |
{
|
|
|
191 |
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
192 |
}
|
193 |
}
|
194 |
}
|
|
|
195 |
Ok(())
|
196 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
))
|
176 |
}
|
177 |
|
178 |
+
/// Filters a map of search results using a list of regex patterns.
|
179 |
+
///
|
180 |
+
/// # Arguments
|
181 |
+
///
|
182 |
+
/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
|
183 |
+
/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
|
184 |
+
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
|
185 |
+
///
|
186 |
+
/// # Errors
|
187 |
+
///
|
188 |
+
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
189 |
+
pub fn filter_with_lists(
|
190 |
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
191 |
resultant_map: &mut HashMap<String, SearchResult>,
|
192 |
file_path: &str,
|
193 |
) -> Result<(), Box<dyn std::error::Error>> {
|
194 |
let mut reader = BufReader::new(File::open(file_path)?);
|
195 |
+
|
196 |
for line in reader.by_ref().lines() {
|
197 |
let re = Regex::new(&line?)?;
|
198 |
+
|
199 |
+
// Iterate over each search result in the map and check if it matches the regex pattern
|
200 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
201 |
if re.is_match(&url.to_lowercase())
|
202 |
|| re.is_match(&search_result.title.to_lowercase())
|
203 |
|| re.is_match(&search_result.description.to_lowercase())
|
204 |
{
|
205 |
+
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
206 |
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
207 |
}
|
208 |
}
|
209 |
}
|
210 |
+
|
211 |
Ok(())
|
212 |
}
|
213 |
+
|
214 |
+
#[cfg(test)]
|
215 |
+
mod tests {
|
216 |
+
use super::*;
|
217 |
+
use std::collections::HashMap;
|
218 |
+
use std::io::Write;
|
219 |
+
use tempfile::NamedTempFile;
|
220 |
+
|
221 |
+
#[test]
|
222 |
+
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
223 |
+
// Create a map of search results to filter
|
224 |
+
let mut map_to_be_filtered = HashMap::new();
|
225 |
+
map_to_be_filtered.insert(
|
226 |
+
"https://www.example.com".to_string(),
|
227 |
+
SearchResult {
|
228 |
+
title: "Example Domain".to_string(),
|
229 |
+
url: "https://www.example.com".to_string(),
|
230 |
+
description: "This domain is for use in illustrative examples in documents."
|
231 |
+
.to_string(),
|
232 |
+
engine: vec!["Google".to_string(), "Bing".to_string()],
|
233 |
+
},
|
234 |
+
);
|
235 |
+
map_to_be_filtered.insert(
|
236 |
+
"https://www.rust-lang.org/".to_string(),
|
237 |
+
SearchResult {
|
238 |
+
title: "Rust Programming Language".to_string(),
|
239 |
+
url: "https://www.rust-lang.org/".to_string(),
|
240 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
241 |
+
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
242 |
+
},
|
243 |
+
);
|
244 |
+
|
245 |
+
// Create a temporary file with regex patterns
|
246 |
+
let mut file = NamedTempFile::new()?;
|
247 |
+
writeln!(file, "example")?;
|
248 |
+
writeln!(file, "rust")?;
|
249 |
+
file.flush()?;
|
250 |
+
|
251 |
+
let mut resultant_map = HashMap::new();
|
252 |
+
filter_with_lists(
|
253 |
+
&mut map_to_be_filtered,
|
254 |
+
&mut resultant_map,
|
255 |
+
file.path().to_str().unwrap(),
|
256 |
+
)?;
|
257 |
+
|
258 |
+
assert_eq!(resultant_map.len(), 2);
|
259 |
+
assert!(resultant_map.contains_key("https://www.example.com"));
|
260 |
+
assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
|
261 |
+
assert_eq!(map_to_be_filtered.len(), 0);
|
262 |
+
|
263 |
+
Ok(())
|
264 |
+
}
|
265 |
+
|
266 |
+
#[test]
|
267 |
+
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
268 |
+
let mut map_to_be_filtered = HashMap::new();
|
269 |
+
map_to_be_filtered.insert(
|
270 |
+
"https://www.example.com".to_string(),
|
271 |
+
SearchResult {
|
272 |
+
title: "Example Domain".to_string(),
|
273 |
+
url: "https://www.example.com".to_string(),
|
274 |
+
description: "This domain is for use in illustrative examples in documents."
|
275 |
+
.to_string(),
|
276 |
+
engine: vec!["Google".to_string(), "Bing".to_string()],
|
277 |
+
},
|
278 |
+
);
|
279 |
+
map_to_be_filtered.insert(
|
280 |
+
"https://www.rust-lang.org/".to_string(),
|
281 |
+
SearchResult {
|
282 |
+
title: "Rust Programming Language".to_string(),
|
283 |
+
url: "https://www.rust-lang.org/".to_string(),
|
284 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
285 |
+
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
286 |
+
},
|
287 |
+
);
|
288 |
+
|
289 |
+
// Create a temporary file with a regex pattern containing a wildcard
|
290 |
+
let mut file = NamedTempFile::new()?;
|
291 |
+
writeln!(file, "ex.*le")?;
|
292 |
+
file.flush()?;
|
293 |
+
|
294 |
+
let mut resultant_map = HashMap::new();
|
295 |
+
|
296 |
+
filter_with_lists(
|
297 |
+
&mut map_to_be_filtered,
|
298 |
+
&mut resultant_map,
|
299 |
+
file.path().to_str().unwrap(),
|
300 |
+
)?;
|
301 |
+
|
302 |
+
assert_eq!(resultant_map.len(), 1);
|
303 |
+
assert!(resultant_map.contains_key("https://www.example.com"));
|
304 |
+
assert_eq!(map_to_be_filtered.len(), 1);
|
305 |
+
assert!(map_to_be_filtered.contains_key("https://www.rust-lang.org/"));
|
306 |
+
|
307 |
+
Ok(())
|
308 |
+
}
|
309 |
+
|
310 |
+
#[test]
|
311 |
+
fn test_filter_with_lists_file_not_found() {
|
312 |
+
let mut map_to_be_filtered = HashMap::new();
|
313 |
+
|
314 |
+
let mut resultant_map = HashMap::new();
|
315 |
+
|
316 |
+
// Call the `filter_with_lists` function with a non-existent file path
|
317 |
+
let result = filter_with_lists(
|
318 |
+
&mut map_to_be_filtered,
|
319 |
+
&mut resultant_map,
|
320 |
+
"non-existent-file.txt",
|
321 |
+
);
|
322 |
+
|
323 |
+
assert!(result.is_err());
|
324 |
+
}
|
325 |
+
|
326 |
+
#[test]
|
327 |
+
fn test_filter_with_lists_invalid_regex() {
|
328 |
+
let mut map_to_be_filtered = HashMap::new();
|
329 |
+
map_to_be_filtered.insert(
|
330 |
+
"https://www.example.com".to_string(),
|
331 |
+
SearchResult {
|
332 |
+
title: "Example Domain".to_string(),
|
333 |
+
url: "https://www.example.com".to_string(),
|
334 |
+
description: "This domain is for use in illustrative examples in documents."
|
335 |
+
.to_string(),
|
336 |
+
engine: vec!["Google".to_string(), "Bing".to_string()],
|
337 |
+
},
|
338 |
+
);
|
339 |
+
|
340 |
+
let mut resultant_map = HashMap::new();
|
341 |
+
|
342 |
+
// Create a temporary file with an invalid regex pattern
|
343 |
+
let mut file = NamedTempFile::new().unwrap();
|
344 |
+
writeln!(file, "example(").unwrap();
|
345 |
+
file.flush().unwrap();
|
346 |
+
|
347 |
+
let result = filter_with_lists(
|
348 |
+
&mut map_to_be_filtered,
|
349 |
+
&mut resultant_map,
|
350 |
+
file.path().to_str().unwrap(),
|
351 |
+
);
|
352 |
+
|
353 |
+
assert!(result.is_err());
|
354 |
+
}
|
355 |
+
}
|