neon_arch
commited on
Commit
•
13ce420
1
Parent(s):
2885f23
⚙️ refactor: add several minor optimizations (#180)(#178)
Browse files- src/results/aggregator.rs +45 -46
src/results/aggregator.rs
CHANGED
@@ -64,14 +64,14 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|
64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
65 |
/// containing appropriate values.
|
66 |
pub async fn aggregate(
|
67 |
-
query:
|
68 |
page: u32,
|
69 |
random_delay: bool,
|
70 |
debug: bool,
|
71 |
-
upstream_search_engines:
|
72 |
request_timeout: u8,
|
73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
74 |
-
let user_agent:
|
75 |
|
76 |
// Add a random delay before making the request.
|
77 |
if random_delay || !debug {
|
@@ -80,19 +80,18 @@ pub async fn aggregate(
|
|
80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
81 |
}
|
82 |
|
83 |
-
let mut names: Vec<&str> =
|
84 |
|
85 |
// create tasks for upstream result fetching
|
86 |
let mut tasks: FutureVec = FutureVec::new();
|
87 |
|
88 |
for engine_handler in upstream_search_engines {
|
89 |
-
let (name, search_engine) = engine_handler.into_name_engine();
|
90 |
names.push(name);
|
91 |
-
let query: String = query.
|
92 |
-
let user_agent: String = user_agent.clone();
|
93 |
tasks.push(tokio::spawn(async move {
|
94 |
search_engine
|
95 |
-
.results(query, page, user_agent
|
96 |
.await
|
97 |
}));
|
98 |
}
|
@@ -110,7 +109,7 @@ pub async fn aggregate(
|
|
110 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
111 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
112 |
|
113 |
-
let mut handle_error = |error: Report<EngineError>, engine_name:
|
114 |
log::error!("Engine Error: {:?}", error);
|
115 |
engine_errors_info.push(EngineErrorInfo::new(
|
116 |
error.downcast_ref::<EngineError>().unwrap(),
|
@@ -120,7 +119,7 @@ pub async fn aggregate(
|
|
120 |
|
121 |
for _ in 0..responses.len() {
|
122 |
let response = responses.pop().unwrap();
|
123 |
-
let engine = names.pop().unwrap()
|
124 |
|
125 |
if result_map.is_empty() {
|
126 |
match response {
|
@@ -128,7 +127,7 @@ pub async fn aggregate(
|
|
128 |
result_map = results.clone();
|
129 |
}
|
130 |
Err(error) => {
|
131 |
-
handle_error(error, engine);
|
132 |
}
|
133 |
}
|
134 |
continue;
|
@@ -140,13 +139,13 @@ pub async fn aggregate(
|
|
140 |
result_map
|
141 |
.entry(key)
|
142 |
.and_modify(|result| {
|
143 |
-
result.add_engines(engine
|
144 |
})
|
145 |
.or_insert_with(|| -> SearchResult { value });
|
146 |
});
|
147 |
}
|
148 |
Err(error) => {
|
149 |
-
handle_error(error, engine);
|
150 |
}
|
151 |
}
|
152 |
}
|
@@ -155,24 +154,20 @@ pub async fn aggregate(
|
|
155 |
filter_with_lists(
|
156 |
&mut result_map,
|
157 |
&mut blacklist_map,
|
158 |
-
|
159 |
)?;
|
160 |
|
161 |
filter_with_lists(
|
162 |
&mut blacklist_map,
|
163 |
&mut result_map,
|
164 |
-
|
165 |
)?;
|
166 |
|
167 |
drop(blacklist_map);
|
168 |
|
169 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
170 |
|
171 |
-
Ok(SearchResults::new(
|
172 |
-
results,
|
173 |
-
query.to_string(),
|
174 |
-
engine_errors_info,
|
175 |
-
))
|
176 |
}
|
177 |
|
178 |
/// Filters a map of search results using a list of regex patterns.
|
@@ -203,7 +198,10 @@ pub fn filter_with_lists(
|
|
203 |
|| re.is_match(&search_result.description.to_lowercase())
|
204 |
{
|
205 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
206 |
-
resultant_map.insert(
|
|
|
|
|
|
|
207 |
}
|
208 |
}
|
209 |
}
|
@@ -214,6 +212,7 @@ pub fn filter_with_lists(
|
|
214 |
#[cfg(test)]
|
215 |
mod tests {
|
216 |
use super::*;
|
|
|
217 |
use std::collections::HashMap;
|
218 |
use std::io::Write;
|
219 |
use tempfile::NamedTempFile;
|
@@ -223,22 +222,22 @@ mod tests {
|
|
223 |
// Create a map of search results to filter
|
224 |
let mut map_to_be_filtered = HashMap::new();
|
225 |
map_to_be_filtered.insert(
|
226 |
-
"https://www.example.com".
|
227 |
SearchResult {
|
228 |
-
title: "Example Domain".
|
229 |
-
url: "https://www.example.com".
|
230 |
description: "This domain is for use in illustrative examples in documents."
|
231 |
-
.
|
232 |
-
engine:
|
233 |
},
|
234 |
);
|
235 |
map_to_be_filtered.insert(
|
236 |
-
"https://www.rust-lang.org/".
|
237 |
SearchResult {
|
238 |
-
title: "Rust Programming Language".
|
239 |
-
url: "https://www.rust-lang.org/".
|
240 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
241 |
-
engine:
|
242 |
},
|
243 |
);
|
244 |
|
@@ -267,22 +266,22 @@ mod tests {
|
|
267 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
268 |
let mut map_to_be_filtered = HashMap::new();
|
269 |
map_to_be_filtered.insert(
|
270 |
-
"https://www.example.com".
|
271 |
SearchResult {
|
272 |
-
title: "Example Domain".
|
273 |
-
url: "https://www.example.com".
|
274 |
description: "This domain is for use in illustrative examples in documents."
|
275 |
-
.
|
276 |
-
engine:
|
277 |
},
|
278 |
);
|
279 |
map_to_be_filtered.insert(
|
280 |
-
"https://www.rust-lang.org/".
|
281 |
SearchResult {
|
282 |
-
title: "Rust Programming Language".
|
283 |
-
url: "https://www.rust-lang.org/".
|
284 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
285 |
-
engine:
|
286 |
},
|
287 |
);
|
288 |
|
@@ -327,13 +326,13 @@ mod tests {
|
|
327 |
fn test_filter_with_lists_invalid_regex() {
|
328 |
let mut map_to_be_filtered = HashMap::new();
|
329 |
map_to_be_filtered.insert(
|
330 |
-
"https://www.example.com".
|
331 |
SearchResult {
|
332 |
-
title: "Example Domain".
|
333 |
-
url: "https://www.example.com".
|
334 |
description: "This domain is for use in illustrative examples in documents."
|
335 |
-
.
|
336 |
-
engine:
|
337 |
},
|
338 |
);
|
339 |
|
|
|
64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
65 |
/// containing appropriate values.
|
66 |
pub async fn aggregate(
|
67 |
+
query: &str,
|
68 |
page: u32,
|
69 |
random_delay: bool,
|
70 |
debug: bool,
|
71 |
+
upstream_search_engines: &[EngineHandler],
|
72 |
request_timeout: u8,
|
73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
74 |
+
let user_agent: &str = random_user_agent();
|
75 |
|
76 |
// Add a random delay before making the request.
|
77 |
if random_delay || !debug {
|
|
|
80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
81 |
}
|
82 |
|
83 |
+
let mut names: Vec<&str> = Vec::with_capacity(0);
|
84 |
|
85 |
// create tasks for upstream result fetching
|
86 |
let mut tasks: FutureVec = FutureVec::new();
|
87 |
|
88 |
for engine_handler in upstream_search_engines {
|
89 |
+
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
90 |
names.push(name);
|
91 |
+
let query: String = query.to_owned();
|
|
|
92 |
tasks.push(tokio::spawn(async move {
|
93 |
search_engine
|
94 |
+
.results(&query, page, user_agent, request_timeout)
|
95 |
.await
|
96 |
}));
|
97 |
}
|
|
|
109 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
110 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
111 |
|
112 |
+
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
113 |
log::error!("Engine Error: {:?}", error);
|
114 |
engine_errors_info.push(EngineErrorInfo::new(
|
115 |
error.downcast_ref::<EngineError>().unwrap(),
|
|
|
119 |
|
120 |
for _ in 0..responses.len() {
|
121 |
let response = responses.pop().unwrap();
|
122 |
+
let engine = names.pop().unwrap();
|
123 |
|
124 |
if result_map.is_empty() {
|
125 |
match response {
|
|
|
127 |
result_map = results.clone();
|
128 |
}
|
129 |
Err(error) => {
|
130 |
+
handle_error(&error, engine);
|
131 |
}
|
132 |
}
|
133 |
continue;
|
|
|
139 |
result_map
|
140 |
.entry(key)
|
141 |
.and_modify(|result| {
|
142 |
+
result.add_engines(engine);
|
143 |
})
|
144 |
.or_insert_with(|| -> SearchResult { value });
|
145 |
});
|
146 |
}
|
147 |
Err(error) => {
|
148 |
+
handle_error(&error, engine);
|
149 |
}
|
150 |
}
|
151 |
}
|
|
|
154 |
filter_with_lists(
|
155 |
&mut result_map,
|
156 |
&mut blacklist_map,
|
157 |
+
file_path(FileType::BlockList)?,
|
158 |
)?;
|
159 |
|
160 |
filter_with_lists(
|
161 |
&mut blacklist_map,
|
162 |
&mut result_map,
|
163 |
+
file_path(FileType::AllowList)?,
|
164 |
)?;
|
165 |
|
166 |
drop(blacklist_map);
|
167 |
|
168 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
169 |
|
170 |
+
Ok(SearchResults::new(results, query, &engine_errors_info))
|
|
|
|
|
|
|
|
|
171 |
}
|
172 |
|
173 |
/// Filters a map of search results using a list of regex patterns.
|
|
|
198 |
|| re.is_match(&search_result.description.to_lowercase())
|
199 |
{
|
200 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
201 |
+
resultant_map.insert(
|
202 |
+
url.to_owned(),
|
203 |
+
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
204 |
+
);
|
205 |
}
|
206 |
}
|
207 |
}
|
|
|
212 |
#[cfg(test)]
|
213 |
mod tests {
|
214 |
use super::*;
|
215 |
+
use smallvec::smallvec;
|
216 |
use std::collections::HashMap;
|
217 |
use std::io::Write;
|
218 |
use tempfile::NamedTempFile;
|
|
|
222 |
// Create a map of search results to filter
|
223 |
let mut map_to_be_filtered = HashMap::new();
|
224 |
map_to_be_filtered.insert(
|
225 |
+
"https://www.example.com".to_owned(),
|
226 |
SearchResult {
|
227 |
+
title: "Example Domain".to_owned(),
|
228 |
+
url: "https://www.example.com".to_owned(),
|
229 |
description: "This domain is for use in illustrative examples in documents."
|
230 |
+
.to_owned(),
|
231 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
232 |
},
|
233 |
);
|
234 |
map_to_be_filtered.insert(
|
235 |
+
"https://www.rust-lang.org/".to_owned(),
|
236 |
SearchResult {
|
237 |
+
title: "Rust Programming Language".to_owned(),
|
238 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
239 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
240 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
241 |
},
|
242 |
);
|
243 |
|
|
|
266 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
267 |
let mut map_to_be_filtered = HashMap::new();
|
268 |
map_to_be_filtered.insert(
|
269 |
+
"https://www.example.com".to_owned(),
|
270 |
SearchResult {
|
271 |
+
title: "Example Domain".to_owned(),
|
272 |
+
url: "https://www.example.com".to_owned(),
|
273 |
description: "This domain is for use in illustrative examples in documents."
|
274 |
+
.to_owned(),
|
275 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
276 |
},
|
277 |
);
|
278 |
map_to_be_filtered.insert(
|
279 |
+
"https://www.rust-lang.org/".to_owned(),
|
280 |
SearchResult {
|
281 |
+
title: "Rust Programming Language".to_owned(),
|
282 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
283 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
284 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
285 |
},
|
286 |
);
|
287 |
|
|
|
326 |
fn test_filter_with_lists_invalid_regex() {
|
327 |
let mut map_to_be_filtered = HashMap::new();
|
328 |
map_to_be_filtered.insert(
|
329 |
+
"https://www.example.com".to_owned(),
|
330 |
SearchResult {
|
331 |
+
title: "Example Domain".to_owned(),
|
332 |
+
url: "https://www.example.com".to_owned(),
|
333 |
description: "This domain is for use in illustrative examples in documents."
|
334 |
+
.to_owned(),
|
335 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
336 |
},
|
337 |
);
|
338 |
|