File size: 2,982 Bytes
fc69ace
 
 
15fc415
 
f94ac50
 
 
 
15fc415
 
 
fc69ace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15fc415
 
 
 
f94ac50
15fc415
 
 
 
 
 
 
 
 
f94ac50
 
 
 
28fee6b
f94ac50
 
 
 
 
 
 
 
 
 
15fc415
f94ac50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15fc415
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
//! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results.

use std::collections::HashMap;

use super::{
    aggregation_models::{RawSearchResult, SearchResult, SearchResults},
    user_agent::random_user_agent,
};

use crate::engines::{duckduckgo, searx};

/// A function that aggregates all the scraped results from the above upstream engines and
/// then removes duplicate results and if two results are found to be from two or more engines
/// then puts their names together to show the results are fetched from these upstream engines
/// and then removes all data from the HashMap and puts into a struct of all results aggregated
/// into a vector and also adds the query used into the struct this is neccessory because 
/// otherwise the search bar in search remains empty if searched from the query url
///
/// # Example:
///
/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
/// contain the word huston and not remain empty.
/// 
/// # Arguments
///
/// * `query` - Accepts a string to query with the above upstream search engines.
/// * `page` - Accepts an Option<u32> which could either be a None or a valid page number.
///
/// # Error
///
/// Returns an error a reqwest and scraping selector errors if any error occurs in the results 
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
/// containing appropriate values.
pub async fn aggregate(
    query: &str,
    page: Option<u32>,
) -> Result<SearchResults, Box<dyn std::error::Error>> {
    let user_agent: String = random_user_agent();
    let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();

    let ddg_map_results: HashMap<String, RawSearchResult> =
        duckduckgo::results(query, page, &user_agent).await?;
    let searx_map_results: HashMap<String, RawSearchResult> =
        searx::results(query, page, &user_agent).await?;

    result_map.extend(ddg_map_results);

    searx_map_results.into_iter().for_each(|(key, value)| {
        result_map
            .entry(key)
            .and_modify(|result| {
                result.add_engines(value.clone().engine());
            })
            .or_insert_with(|| -> RawSearchResult {
                RawSearchResult::new(
                    value.title.clone(),
                    value.visiting_url.clone(),
                    value.description.clone(),
                    value.engine.clone(),
                )
            });
    });

    Ok(SearchResults::new(
        result_map
            .into_iter()
            .map(|(key, value)| {
                SearchResult::new(
                    value.title,
                    value.visiting_url,
                    key,
                    value.description,
                    value.engine,
                )
            })
            .collect(),
        query.to_string(),
    ))
}