File size: 3,755 Bytes
fc69ace
 
 
c5c1684
 
 
 
15fc415
f94ac50
 
 
 
15fc415
 
 
fc69ace
 
 
 
c5c1684
fc69ace
 
 
 
 
 
c5c1684
fc69ace
 
 
c170de8
0527288
fc69ace
 
 
c5c1684
fc69ace
 
15fc415
 
c170de8
0527288
13632f1
15fc415
f94ac50
15fc415
 
c5c1684
13632f1
0527288
 
 
 
c5c1684
 
 
 
 
 
 
1ebf888
 
 
 
 
 
 
 
 
 
 
 
 
15fc415
 
 
f94ac50
 
 
 
28fee6b
f94ac50
 
 
 
 
 
 
 
 
 
15fc415
f94ac50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15fc415
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
//! This module provides the functionality to scrape and gathers all the results from the upstream
//! search engines and then removes duplicate results.

use std::{collections::HashMap, time::Duration};

use rand::Rng;
use tokio::join;

use super::{
    aggregation_models::{RawSearchResult, SearchResult, SearchResults},
    user_agent::random_user_agent,
};

use crate::engines::{duckduckgo, searx};

/// A function that aggregates all the scraped results from the above upstream engines and
/// then removes duplicate results and if two results are found to be from two or more engines
/// then puts their names together to show the results are fetched from these upstream engines
/// and then removes all data from the HashMap and puts into a struct of all results aggregated
/// into a vector and also adds the query used into the struct this is neccessory because
/// otherwise the search bar in search remains empty if searched from the query url
///
/// # Example:
///
/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
/// contain the word huston and not remain empty.
///
/// # Arguments
///
/// * `query` - Accepts a string to query with the above upstream search engines.
/// * `page` - Accepts an u32 page number.
/// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
///
/// # Error
///
/// Returns an error a reqwest and scraping selector errors if any error occurs in the results
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
/// containing appropriate values.
pub async fn aggregate(
    query: &str,
    page: u32,
    random_delay: bool,
    debug: bool,
) -> Result<SearchResults, Box<dyn std::error::Error>> {
    let user_agent: String = random_user_agent();
    let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();

    // Add a random delay before making the request.
    if random_delay || !debug {
        let mut rng = rand::thread_rng();
        let delay_secs = rng.gen_range(1..10);
        std::thread::sleep(Duration::from_secs(delay_secs));
    }

    // fetch results from upstream search engines simultaneously/concurrently.
    let (ddg_map_results, searx_map_results) = join!(
        duckduckgo::results(query, page, &user_agent),
        searx::results(query, page, &user_agent)
    );

    let ddg_map_results = ddg_map_results.unwrap_or_else(|e| {
        if debug {
            log::error!("Error fetching results from DuckDuckGo: {:?}", e);
        }
        HashMap::new()
    });

    let searx_map_results = searx_map_results.unwrap_or_else(|e| {
        if debug {
            log::error!("Error fetching results from Searx: {:?}", e);
        }
        HashMap::new()
    });

    result_map.extend(ddg_map_results);

    searx_map_results.into_iter().for_each(|(key, value)| {
        result_map
            .entry(key)
            .and_modify(|result| {
                result.add_engines(value.clone().engine());
            })
            .or_insert_with(|| -> RawSearchResult {
                RawSearchResult::new(
                    value.title.clone(),
                    value.visiting_url.clone(),
                    value.description.clone(),
                    value.engine.clone(),
                )
            });
    });

    Ok(SearchResults::new(
        result_map
            .into_iter()
            .map(|(key, value)| {
                SearchResult::new(
                    value.title,
                    value.visiting_url,
                    key,
                    value.description,
                    value.engine,
                )
            })
            .collect(),
        query.to_string(),
    ))
}