|
use std::collections::HashMap; |
|
|
|
use reqwest::header::USER_AGENT; |
|
use scraper::{Html, Selector}; |
|
|
|
use crate::search_results_handler::aggregation_models::RawSearchResult; |
|
|
|
|
|
|
|
|
|
|
|
pub async fn results( |
|
query: &str, |
|
page: Option<u32>, |
|
user_agent: &str, |
|
) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> { |
|
|
|
|
|
let url: String = match page { |
|
Some(page_number) => { |
|
if page_number <= 1 { |
|
format!("https://searx.work/search?q={query}") |
|
} else { |
|
format!("https://searx.work/search?q={query}&pageno={page_number}",) |
|
} |
|
} |
|
None => format!("https://searx.work/search?q={query}"), |
|
}; |
|
|
|
|
|
|
|
let results: String = reqwest::Client::new() |
|
.get(url) |
|
.header(USER_AGENT, user_agent) |
|
.send() |
|
.await? |
|
.text() |
|
.await?; |
|
|
|
let document: Html = Html::parse_document(&results); |
|
let results: Selector = Selector::parse(".result")?; |
|
let result_title: Selector = Selector::parse("h3>a")?; |
|
let result_url: Selector = Selector::parse("h3>a")?; |
|
let result_desc: Selector = Selector::parse(".content")?; |
|
|
|
let mut search_results: HashMap<String, RawSearchResult> = HashMap::new(); |
|
|
|
|
|
for result in document.select(&results) { |
|
let search_result: RawSearchResult = RawSearchResult { |
|
title: result |
|
.select(&result_title) |
|
.next() |
|
.unwrap() |
|
.inner_html() |
|
.trim() |
|
.to_string(), |
|
visiting_url: result |
|
.select(&result_url) |
|
.next() |
|
.unwrap() |
|
.value() |
|
.attr("href") |
|
.unwrap() |
|
.to_string(), |
|
description: result |
|
.select(&result_desc) |
|
.next() |
|
.unwrap() |
|
.inner_html() |
|
.trim() |
|
.to_string(), |
|
engine: vec!["searx".to_string()], |
|
}; |
|
search_results.insert( |
|
result |
|
.select(&result_url) |
|
.next() |
|
.unwrap() |
|
.value() |
|
.attr("href") |
|
.unwrap() |
|
.to_string(), |
|
search_result, |
|
); |
|
} |
|
|
|
Ok(search_results) |
|
} |
|
|