neon_arch commited on
Commit
13ce420
1 Parent(s): 2885f23

⚙️ refactor: add several minor optimizations (#180)(#178)

Browse files
Files changed (1) hide show
  1. src/results/aggregator.rs +45 -46
src/results/aggregator.rs CHANGED
@@ -64,14 +64,14 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
64
  /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
65
  /// containing appropriate values.
66
  pub async fn aggregate(
67
- query: String,
68
  page: u32,
69
  random_delay: bool,
70
  debug: bool,
71
- upstream_search_engines: Vec<EngineHandler>,
72
  request_timeout: u8,
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
- let user_agent: String = random_user_agent();
75
 
76
  // Add a random delay before making the request.
77
  if random_delay || !debug {
@@ -80,19 +80,18 @@ pub async fn aggregate(
80
  tokio::time::sleep(Duration::from_secs(delay_secs)).await;
81
  }
82
 
83
- let mut names: Vec<&str> = vec![];
84
 
85
  // create tasks for upstream result fetching
86
  let mut tasks: FutureVec = FutureVec::new();
87
 
88
  for engine_handler in upstream_search_engines {
89
- let (name, search_engine) = engine_handler.into_name_engine();
90
  names.push(name);
91
- let query: String = query.clone();
92
- let user_agent: String = user_agent.clone();
93
  tasks.push(tokio::spawn(async move {
94
  search_engine
95
- .results(query, page, user_agent.clone(), request_timeout)
96
  .await
97
  }));
98
  }
@@ -110,7 +109,7 @@ pub async fn aggregate(
110
  let mut result_map: HashMap<String, SearchResult> = HashMap::new();
111
  let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
112
 
113
- let mut handle_error = |error: Report<EngineError>, engine_name: String| {
114
  log::error!("Engine Error: {:?}", error);
115
  engine_errors_info.push(EngineErrorInfo::new(
116
  error.downcast_ref::<EngineError>().unwrap(),
@@ -120,7 +119,7 @@ pub async fn aggregate(
120
 
121
  for _ in 0..responses.len() {
122
  let response = responses.pop().unwrap();
123
- let engine = names.pop().unwrap().to_string();
124
 
125
  if result_map.is_empty() {
126
  match response {
@@ -128,7 +127,7 @@ pub async fn aggregate(
128
  result_map = results.clone();
129
  }
130
  Err(error) => {
131
- handle_error(error, engine);
132
  }
133
  }
134
  continue;
@@ -140,13 +139,13 @@ pub async fn aggregate(
140
  result_map
141
  .entry(key)
142
  .and_modify(|result| {
143
- result.add_engines(engine.clone());
144
  })
145
  .or_insert_with(|| -> SearchResult { value });
146
  });
147
  }
148
  Err(error) => {
149
- handle_error(error, engine);
150
  }
151
  }
152
  }
@@ -155,24 +154,20 @@ pub async fn aggregate(
155
  filter_with_lists(
156
  &mut result_map,
157
  &mut blacklist_map,
158
- &file_path(FileType::BlockList)?,
159
  )?;
160
 
161
  filter_with_lists(
162
  &mut blacklist_map,
163
  &mut result_map,
164
- &file_path(FileType::AllowList)?,
165
  )?;
166
 
167
  drop(blacklist_map);
168
 
169
  let results: Vec<SearchResult> = result_map.into_values().collect();
170
 
171
- Ok(SearchResults::new(
172
- results,
173
- query.to_string(),
174
- engine_errors_info,
175
- ))
176
  }
177
 
178
  /// Filters a map of search results using a list of regex patterns.
@@ -203,7 +198,10 @@ pub fn filter_with_lists(
203
  || re.is_match(&search_result.description.to_lowercase())
204
  {
205
  // If the search result matches the regex pattern, move it from the original map to the resultant map
206
- resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
 
 
 
207
  }
208
  }
209
  }
@@ -214,6 +212,7 @@ pub fn filter_with_lists(
214
  #[cfg(test)]
215
  mod tests {
216
  use super::*;
 
217
  use std::collections::HashMap;
218
  use std::io::Write;
219
  use tempfile::NamedTempFile;
@@ -223,22 +222,22 @@ mod tests {
223
  // Create a map of search results to filter
224
  let mut map_to_be_filtered = HashMap::new();
225
  map_to_be_filtered.insert(
226
- "https://www.example.com".to_string(),
227
  SearchResult {
228
- title: "Example Domain".to_string(),
229
- url: "https://www.example.com".to_string(),
230
  description: "This domain is for use in illustrative examples in documents."
231
- .to_string(),
232
- engine: vec!["Google".to_string(), "Bing".to_string()],
233
  },
234
  );
235
  map_to_be_filtered.insert(
236
- "https://www.rust-lang.org/".to_string(),
237
  SearchResult {
238
- title: "Rust Programming Language".to_string(),
239
- url: "https://www.rust-lang.org/".to_string(),
240
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
241
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
242
  },
243
  );
244
 
@@ -267,22 +266,22 @@ mod tests {
267
  fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
268
  let mut map_to_be_filtered = HashMap::new();
269
  map_to_be_filtered.insert(
270
- "https://www.example.com".to_string(),
271
  SearchResult {
272
- title: "Example Domain".to_string(),
273
- url: "https://www.example.com".to_string(),
274
  description: "This domain is for use in illustrative examples in documents."
275
- .to_string(),
276
- engine: vec!["Google".to_string(), "Bing".to_string()],
277
  },
278
  );
279
  map_to_be_filtered.insert(
280
- "https://www.rust-lang.org/".to_string(),
281
  SearchResult {
282
- title: "Rust Programming Language".to_string(),
283
- url: "https://www.rust-lang.org/".to_string(),
284
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
285
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
286
  },
287
  );
288
 
@@ -327,13 +326,13 @@ mod tests {
327
  fn test_filter_with_lists_invalid_regex() {
328
  let mut map_to_be_filtered = HashMap::new();
329
  map_to_be_filtered.insert(
330
- "https://www.example.com".to_string(),
331
  SearchResult {
332
- title: "Example Domain".to_string(),
333
- url: "https://www.example.com".to_string(),
334
  description: "This domain is for use in illustrative examples in documents."
335
- .to_string(),
336
- engine: vec!["Google".to_string(), "Bing".to_string()],
337
  },
338
  );
339
 
 
64
  /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
65
  /// containing appropriate values.
66
  pub async fn aggregate(
67
+ query: &str,
68
  page: u32,
69
  random_delay: bool,
70
  debug: bool,
71
+ upstream_search_engines: &[EngineHandler],
72
  request_timeout: u8,
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
+ let user_agent: &str = random_user_agent();
75
 
76
  // Add a random delay before making the request.
77
  if random_delay || !debug {
 
80
  tokio::time::sleep(Duration::from_secs(delay_secs)).await;
81
  }
82
 
83
+ let mut names: Vec<&str> = Vec::with_capacity(0);
84
 
85
  // create tasks for upstream result fetching
86
  let mut tasks: FutureVec = FutureVec::new();
87
 
88
  for engine_handler in upstream_search_engines {
89
+ let (name, search_engine) = engine_handler.to_owned().into_name_engine();
90
  names.push(name);
91
+ let query: String = query.to_owned();
 
92
  tasks.push(tokio::spawn(async move {
93
  search_engine
94
+ .results(&query, page, user_agent, request_timeout)
95
  .await
96
  }));
97
  }
 
109
  let mut result_map: HashMap<String, SearchResult> = HashMap::new();
110
  let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
111
 
112
+ let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
113
  log::error!("Engine Error: {:?}", error);
114
  engine_errors_info.push(EngineErrorInfo::new(
115
  error.downcast_ref::<EngineError>().unwrap(),
 
119
 
120
  for _ in 0..responses.len() {
121
  let response = responses.pop().unwrap();
122
+ let engine = names.pop().unwrap();
123
 
124
  if result_map.is_empty() {
125
  match response {
 
127
  result_map = results.clone();
128
  }
129
  Err(error) => {
130
+ handle_error(&error, engine);
131
  }
132
  }
133
  continue;
 
139
  result_map
140
  .entry(key)
141
  .and_modify(|result| {
142
+ result.add_engines(engine);
143
  })
144
  .or_insert_with(|| -> SearchResult { value });
145
  });
146
  }
147
  Err(error) => {
148
+ handle_error(&error, engine);
149
  }
150
  }
151
  }
 
154
  filter_with_lists(
155
  &mut result_map,
156
  &mut blacklist_map,
157
+ file_path(FileType::BlockList)?,
158
  )?;
159
 
160
  filter_with_lists(
161
  &mut blacklist_map,
162
  &mut result_map,
163
+ file_path(FileType::AllowList)?,
164
  )?;
165
 
166
  drop(blacklist_map);
167
 
168
  let results: Vec<SearchResult> = result_map.into_values().collect();
169
 
170
+ Ok(SearchResults::new(results, query, &engine_errors_info))
 
 
 
 
171
  }
172
 
173
  /// Filters a map of search results using a list of regex patterns.
 
198
  || re.is_match(&search_result.description.to_lowercase())
199
  {
200
  // If the search result matches the regex pattern, move it from the original map to the resultant map
201
+ resultant_map.insert(
202
+ url.to_owned(),
203
+ map_to_be_filtered.remove(&url.to_owned()).unwrap(),
204
+ );
205
  }
206
  }
207
  }
 
212
  #[cfg(test)]
213
  mod tests {
214
  use super::*;
215
+ use smallvec::smallvec;
216
  use std::collections::HashMap;
217
  use std::io::Write;
218
  use tempfile::NamedTempFile;
 
222
  // Create a map of search results to filter
223
  let mut map_to_be_filtered = HashMap::new();
224
  map_to_be_filtered.insert(
225
+ "https://www.example.com".to_owned(),
226
  SearchResult {
227
+ title: "Example Domain".to_owned(),
228
+ url: "https://www.example.com".to_owned(),
229
  description: "This domain is for use in illustrative examples in documents."
230
+ .to_owned(),
231
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
232
  },
233
  );
234
  map_to_be_filtered.insert(
235
+ "https://www.rust-lang.org/".to_owned(),
236
  SearchResult {
237
+ title: "Rust Programming Language".to_owned(),
238
+ url: "https://www.rust-lang.org/".to_owned(),
239
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
240
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
241
  },
242
  );
243
 
 
266
  fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
267
  let mut map_to_be_filtered = HashMap::new();
268
  map_to_be_filtered.insert(
269
+ "https://www.example.com".to_owned(),
270
  SearchResult {
271
+ title: "Example Domain".to_owned(),
272
+ url: "https://www.example.com".to_owned(),
273
  description: "This domain is for use in illustrative examples in documents."
274
+ .to_owned(),
275
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
276
  },
277
  );
278
  map_to_be_filtered.insert(
279
+ "https://www.rust-lang.org/".to_owned(),
280
  SearchResult {
281
+ title: "Rust Programming Language".to_owned(),
282
+ url: "https://www.rust-lang.org/".to_owned(),
283
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
284
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
285
  },
286
  );
287
 
 
326
  fn test_filter_with_lists_invalid_regex() {
327
  let mut map_to_be_filtered = HashMap::new();
328
  map_to_be_filtered.insert(
329
+ "https://www.example.com".to_owned(),
330
  SearchResult {
331
+ title: "Example Domain".to_owned(),
332
+ url: "https://www.example.com".to_owned(),
333
  description: "This domain is for use in illustrative examples in documents."
334
+ .to_owned(),
335
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
336
  },
337
  );
338