Spaces:
Running
Running
Merge pull request #195 from neon-mmd/optimize-and-make-code-idiomatic-3
Browse files- .gitignore +1 -0
- Cargo.lock +47 -0
- Cargo.toml +8 -2
- src/engines/duckduckgo.rs +20 -40
- src/engines/engine_models.rs +4 -6
- src/engines/searx.rs +16 -35
- src/results/aggregation_models.rs +30 -30
- src/results/aggregator.rs +42 -42
- src/server/routes.rs +19 -20
.gitignore
CHANGED
@@ -4,3 +4,4 @@ package-lock.json
|
|
4 |
dump.rdb
|
5 |
.vscode
|
6 |
megalinter-reports/
|
|
|
|
4 |
dump.rdb
|
5 |
.vscode
|
6 |
megalinter-reports/
|
7 |
+
dhat-heap.json
|
Cargo.lock
CHANGED
@@ -830,6 +830,22 @@ dependencies = [
|
|
830 |
"syn 1.0.109",
|
831 |
]
|
832 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
[[package]]
|
834 |
name = "digest"
|
835 |
version = "0.10.7"
|
@@ -1738,6 +1754,16 @@ dependencies = [
|
|
1738 |
"adler",
|
1739 |
]
|
1740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1741 |
[[package]]
|
1742 |
name = "mio"
|
1743 |
version = "0.6.23"
|
@@ -2891,6 +2917,9 @@ name = "smallvec"
|
|
2891 |
version = "1.11.0"
|
2892 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2893 |
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
|
|
|
|
|
|
|
2894 |
|
2895 |
[[package]]
|
2896 |
name = "socket2"
|
@@ -3032,6 +3061,16 @@ dependencies = [
|
|
3032 |
"unicode-xid 0.2.4",
|
3033 |
]
|
3034 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3035 |
[[package]]
|
3036 |
name = "tempfile"
|
3037 |
version = "3.8.0"
|
@@ -3085,6 +3124,12 @@ dependencies = [
|
|
3085 |
"syn 2.0.29",
|
3086 |
]
|
3087 |
|
|
|
|
|
|
|
|
|
|
|
|
|
3088 |
[[package]]
|
3089 |
name = "time"
|
3090 |
version = "0.1.45"
|
@@ -3634,6 +3679,7 @@ dependencies = [
|
|
3634 |
"actix-web",
|
3635 |
"async-trait",
|
3636 |
"criterion",
|
|
|
3637 |
"env_logger",
|
3638 |
"error-stack",
|
3639 |
"fake-useragent",
|
@@ -3651,6 +3697,7 @@ dependencies = [
|
|
3651 |
"scraper",
|
3652 |
"serde",
|
3653 |
"serde_json",
|
|
|
3654 |
"tempfile",
|
3655 |
"tokio 1.32.0",
|
3656 |
]
|
|
|
830 |
"syn 1.0.109",
|
831 |
]
|
832 |
|
833 |
+
[[package]]
|
834 |
+
name = "dhat"
|
835 |
+
version = "0.3.2"
|
836 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
837 |
+
checksum = "4f2aaf837aaf456f6706cb46386ba8dffd4013a757e36f4ea05c20dd46b209a3"
|
838 |
+
dependencies = [
|
839 |
+
"backtrace",
|
840 |
+
"lazy_static",
|
841 |
+
"mintex",
|
842 |
+
"parking_lot 0.12.1",
|
843 |
+
"rustc-hash",
|
844 |
+
"serde",
|
845 |
+
"serde_json",
|
846 |
+
"thousands",
|
847 |
+
]
|
848 |
+
|
849 |
[[package]]
|
850 |
name = "digest"
|
851 |
version = "0.10.7"
|
|
|
1754 |
"adler",
|
1755 |
]
|
1756 |
|
1757 |
+
[[package]]
|
1758 |
+
name = "mintex"
|
1759 |
+
version = "0.1.2"
|
1760 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1761 |
+
checksum = "fd7c5ba1c3b5a23418d7bbf98c71c3d4946a0125002129231da8d6b723d559cb"
|
1762 |
+
dependencies = [
|
1763 |
+
"once_cell",
|
1764 |
+
"sys-info",
|
1765 |
+
]
|
1766 |
+
|
1767 |
[[package]]
|
1768 |
name = "mio"
|
1769 |
version = "0.6.23"
|
|
|
2917 |
version = "1.11.0"
|
2918 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2919 |
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
|
2920 |
+
dependencies = [
|
2921 |
+
"serde",
|
2922 |
+
]
|
2923 |
|
2924 |
[[package]]
|
2925 |
name = "socket2"
|
|
|
3061 |
"unicode-xid 0.2.4",
|
3062 |
]
|
3063 |
|
3064 |
+
[[package]]
|
3065 |
+
name = "sys-info"
|
3066 |
+
version = "0.9.1"
|
3067 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3068 |
+
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
|
3069 |
+
dependencies = [
|
3070 |
+
"cc",
|
3071 |
+
"libc",
|
3072 |
+
]
|
3073 |
+
|
3074 |
[[package]]
|
3075 |
name = "tempfile"
|
3076 |
version = "3.8.0"
|
|
|
3124 |
"syn 2.0.29",
|
3125 |
]
|
3126 |
|
3127 |
+
[[package]]
|
3128 |
+
name = "thousands"
|
3129 |
+
version = "0.2.0"
|
3130 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3131 |
+
checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
|
3132 |
+
|
3133 |
[[package]]
|
3134 |
name = "time"
|
3135 |
version = "0.1.45"
|
|
|
3679 |
"actix-web",
|
3680 |
"async-trait",
|
3681 |
"criterion",
|
3682 |
+
"dhat",
|
3683 |
"env_logger",
|
3684 |
"error-stack",
|
3685 |
"fake-useragent",
|
|
|
3697 |
"scraper",
|
3698 |
"serde",
|
3699 |
"serde_json",
|
3700 |
+
"smallvec 1.11.0",
|
3701 |
"tempfile",
|
3702 |
"tokio 1.32.0",
|
3703 |
]
|
Cargo.toml
CHANGED
@@ -8,7 +8,7 @@ license = "AGPL-3.0"
|
|
8 |
|
9 |
[dependencies]
|
10 |
reqwest = {version="0.11.20",features=["json"]}
|
11 |
-
tokio = {version="1.32.0",features=["
|
12 |
serde = {version="1.0.188",features=["derive"]}
|
13 |
handlebars = { version = "4.3.7", features = ["dir_source"] }
|
14 |
scraper = {version="0.17.1"}
|
@@ -28,6 +28,8 @@ error-stack = {version="0.4.0"}
|
|
28 |
async-trait = {version="0.1.73"}
|
29 |
regex = {version="1.9.4", features=["perf"]}
|
30 |
futures = {version="0.3.28"}
|
|
|
|
|
31 |
|
32 |
[dev-dependencies]
|
33 |
rusty-hook = "^0.11.2"
|
@@ -48,7 +50,8 @@ rpath = false
|
|
48 |
|
49 |
[profile.release]
|
50 |
opt-level = 3
|
51 |
-
debug = false
|
|
|
52 |
split-debuginfo = '...'
|
53 |
debug-assertions = false
|
54 |
overflow-checks = false
|
@@ -58,3 +61,6 @@ incremental = false
|
|
58 |
codegen-units = 16
|
59 |
rpath = false
|
60 |
strip = "debuginfo"
|
|
|
|
|
|
|
|
8 |
|
9 |
[dependencies]
|
10 |
reqwest = {version="0.11.20",features=["json"]}
|
11 |
+
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
12 |
serde = {version="1.0.188",features=["derive"]}
|
13 |
handlebars = { version = "4.3.7", features = ["dir_source"] }
|
14 |
scraper = {version="0.17.1"}
|
|
|
28 |
async-trait = {version="0.1.73"}
|
29 |
regex = {version="1.9.4", features=["perf"]}
|
30 |
futures = {version="0.3.28"}
|
31 |
+
dhat = {version="0.3.2", optional = true}
|
32 |
+
smallvec = {version="1.11.0", features=["union", "serde"]}
|
33 |
|
34 |
[dev-dependencies]
|
35 |
rusty-hook = "^0.11.2"
|
|
|
50 |
|
51 |
[profile.release]
|
52 |
opt-level = 3
|
53 |
+
debug = false # This should only be commented when testing with dhat profiler
|
54 |
+
# debug = 1 # This should only be uncommented when testing with dhat profiler
|
55 |
split-debuginfo = '...'
|
56 |
debug-assertions = false
|
57 |
overflow-checks = false
|
|
|
61 |
codegen-units = 16
|
62 |
rpath = false
|
63 |
strip = "debuginfo"
|
64 |
+
|
65 |
+
[features]
|
66 |
+
dhat-heap = ["dep:dhat"]
|
src/engines/duckduckgo.rs
CHANGED
@@ -4,14 +4,14 @@
|
|
4 |
|
5 |
use std::collections::HashMap;
|
6 |
|
7 |
-
use reqwest::header::
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
use crate::results::aggregation_models::SearchResult;
|
11 |
|
12 |
use super::engine_models::{EngineError, SearchEngine};
|
13 |
|
14 |
-
use error_stack::{
|
15 |
|
16 |
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
17 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
@@ -39,9 +39,9 @@ impl SearchEngine for DuckDuckGo {
|
|
39 |
/// or HeaderMap fails to initialize.
|
40 |
async fn results(
|
41 |
&self,
|
42 |
-
query:
|
43 |
page: u32,
|
44 |
-
user_agent:
|
45 |
request_timeout: u8,
|
46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
47 |
// Page number can be missing or empty string and so appropriate handling is required
|
@@ -61,38 +61,19 @@ impl SearchEngine for DuckDuckGo {
|
|
61 |
};
|
62 |
|
63 |
// initializing HeaderMap and adding appropriate headers.
|
64 |
-
let
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
.
|
69 |
-
.
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
"https://google.com/"
|
75 |
-
.parse()
|
76 |
-
.into_report()
|
77 |
-
.change_context(EngineError::UnexpectedError)?,
|
78 |
-
);
|
79 |
-
header_map.insert(
|
80 |
-
CONTENT_TYPE,
|
81 |
-
"application/x-www-form-urlencoded"
|
82 |
-
.parse()
|
83 |
-
.into_report()
|
84 |
-
.change_context(EngineError::UnexpectedError)?,
|
85 |
-
);
|
86 |
-
header_map.insert(
|
87 |
-
COOKIE,
|
88 |
-
"kl=wt-wt"
|
89 |
-
.parse()
|
90 |
-
.into_report()
|
91 |
-
.change_context(EngineError::UnexpectedError)?,
|
92 |
-
);
|
93 |
|
94 |
let document: Html = Html::parse_document(
|
95 |
-
&DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
96 |
);
|
97 |
|
98 |
let no_result: Selector = Selector::parse(".no-results")
|
@@ -126,8 +107,7 @@ impl SearchEngine for DuckDuckGo {
|
|
126 |
.next()
|
127 |
.unwrap()
|
128 |
.inner_html()
|
129 |
-
.trim()
|
130 |
-
.to_string(),
|
131 |
format!(
|
132 |
"https://{}",
|
133 |
result
|
@@ -136,15 +116,15 @@ impl SearchEngine for DuckDuckGo {
|
|
136 |
.unwrap()
|
137 |
.inner_html()
|
138 |
.trim()
|
139 |
-
)
|
|
|
140 |
result
|
141 |
.select(&result_desc)
|
142 |
.next()
|
143 |
.unwrap()
|
144 |
.inner_html()
|
145 |
-
.trim()
|
146 |
-
|
147 |
-
vec!["duckduckgo".to_string()],
|
148 |
)
|
149 |
})
|
150 |
.map(|search_result| (search_result.url.clone(), search_result))
|
|
|
4 |
|
5 |
use std::collections::HashMap;
|
6 |
|
7 |
+
use reqwest::header::HeaderMap;
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
use crate::results::aggregation_models::SearchResult;
|
11 |
|
12 |
use super::engine_models::{EngineError, SearchEngine};
|
13 |
|
14 |
+
use error_stack::{Report, Result, ResultExt};
|
15 |
|
16 |
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
17 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
|
|
39 |
/// or HeaderMap fails to initialize.
|
40 |
async fn results(
|
41 |
&self,
|
42 |
+
query: &str,
|
43 |
page: u32,
|
44 |
+
user_agent: &str,
|
45 |
request_timeout: u8,
|
46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
47 |
// Page number can be missing or empty string and so appropriate handling is required
|
|
|
61 |
};
|
62 |
|
63 |
// initializing HeaderMap and adding appropriate headers.
|
64 |
+
let header_map = HeaderMap::try_from(&HashMap::from([
|
65 |
+
("USER_AGENT".to_string(), user_agent.to_string()),
|
66 |
+
("REFERER".to_string(), "https://google.com/".to_string()),
|
67 |
+
(
|
68 |
+
"CONTENT_TYPE".to_string(),
|
69 |
+
"application/x-www-form-urlencoded".to_string(),
|
70 |
+
),
|
71 |
+
("COOKIE".to_string(), "kl=wt-wt".to_string()),
|
72 |
+
]))
|
73 |
+
.change_context(EngineError::UnexpectedError)?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
let document: Html = Html::parse_document(
|
76 |
+
&DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
77 |
);
|
78 |
|
79 |
let no_result: Selector = Selector::parse(".no-results")
|
|
|
107 |
.next()
|
108 |
.unwrap()
|
109 |
.inner_html()
|
110 |
+
.trim(),
|
|
|
111 |
format!(
|
112 |
"https://{}",
|
113 |
result
|
|
|
116 |
.unwrap()
|
117 |
.inner_html()
|
118 |
.trim()
|
119 |
+
)
|
120 |
+
.as_str(),
|
121 |
result
|
122 |
.select(&result_desc)
|
123 |
.next()
|
124 |
.unwrap()
|
125 |
.inner_html()
|
126 |
+
.trim(),
|
127 |
+
&["duckduckgo"],
|
|
|
128 |
)
|
129 |
})
|
130 |
.map(|search_result| (search_result.url.clone(), search_result))
|
src/engines/engine_models.rs
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
//! the upstream search engines with the search query provided by the user.
|
3 |
|
4 |
use crate::results::aggregation_models::SearchResult;
|
5 |
-
use error_stack::{
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
@@ -48,7 +48,7 @@ impl error_stack::Context for EngineError {}
|
|
48 |
pub trait SearchEngine: Sync + Send {
|
49 |
async fn fetch_html_from_upstream(
|
50 |
&self,
|
51 |
-
url:
|
52 |
header_map: reqwest::header::HeaderMap,
|
53 |
request_timeout: u8,
|
54 |
) -> Result<String, EngineError> {
|
@@ -59,19 +59,17 @@ pub trait SearchEngine: Sync + Send {
|
|
59 |
.headers(header_map) // add spoofed headers to emulate human behavior
|
60 |
.send()
|
61 |
.await
|
62 |
-
.into_report()
|
63 |
.change_context(EngineError::RequestError)?
|
64 |
.text()
|
65 |
.await
|
66 |
-
.into_report()
|
67 |
.change_context(EngineError::RequestError)?)
|
68 |
}
|
69 |
|
70 |
async fn results(
|
71 |
&self,
|
72 |
-
query:
|
73 |
page: u32,
|
74 |
-
user_agent:
|
75 |
request_timeout: u8,
|
76 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
77 |
}
|
|
|
2 |
//! the upstream search engines with the search query provided by the user.
|
3 |
|
4 |
use crate::results::aggregation_models::SearchResult;
|
5 |
+
use error_stack::{Result, ResultExt};
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
|
|
48 |
pub trait SearchEngine: Sync + Send {
|
49 |
async fn fetch_html_from_upstream(
|
50 |
&self,
|
51 |
+
url: &str,
|
52 |
header_map: reqwest::header::HeaderMap,
|
53 |
request_timeout: u8,
|
54 |
) -> Result<String, EngineError> {
|
|
|
59 |
.headers(header_map) // add spoofed headers to emulate human behavior
|
60 |
.send()
|
61 |
.await
|
|
|
62 |
.change_context(EngineError::RequestError)?
|
63 |
.text()
|
64 |
.await
|
|
|
65 |
.change_context(EngineError::RequestError)?)
|
66 |
}
|
67 |
|
68 |
async fn results(
|
69 |
&self,
|
70 |
+
query: &str,
|
71 |
page: u32,
|
72 |
+
user_agent: &str,
|
73 |
request_timeout: u8,
|
74 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
75 |
}
|
src/engines/searx.rs
CHANGED
@@ -2,14 +2,14 @@
|
|
2 |
//! by querying the upstream searx search engine instance with user provided query and with a page
|
3 |
//! number if provided.
|
4 |
|
5 |
-
use reqwest::header::
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
9 |
use crate::results::aggregation_models::SearchResult;
|
10 |
|
11 |
use super::engine_models::{EngineError, SearchEngine};
|
12 |
-
use error_stack::{
|
13 |
|
14 |
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
15 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
@@ -38,9 +38,9 @@ impl SearchEngine for Searx {
|
|
38 |
|
39 |
async fn results(
|
40 |
&self,
|
41 |
-
query:
|
42 |
page: u32,
|
43 |
-
user_agent:
|
44 |
request_timeout: u8,
|
45 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
46 |
// Page number can be missing or empty string and so appropriate handling is required
|
@@ -51,32 +51,16 @@ impl SearchEngine for Searx {
|
|
51 |
};
|
52 |
|
53 |
// initializing headers and adding appropriate headers.
|
54 |
-
let
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
);
|
62 |
-
header_map.insert(
|
63 |
-
REFERER,
|
64 |
-
"https://google.com/"
|
65 |
-
.parse()
|
66 |
-
.into_report()
|
67 |
-
.change_context(EngineError::UnexpectedError)?,
|
68 |
-
);
|
69 |
-
header_map.insert(
|
70 |
-
CONTENT_TYPE,
|
71 |
-
"application/x-www-form-urlencoded"
|
72 |
-
.parse()
|
73 |
-
.into_report()
|
74 |
-
.change_context(EngineError::UnexpectedError)?,
|
75 |
-
);
|
76 |
-
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
|
77 |
|
78 |
let document: Html = Html::parse_document(
|
79 |
-
&Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
80 |
);
|
81 |
|
82 |
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
@@ -117,24 +101,21 @@ impl SearchEngine for Searx {
|
|
117 |
.next()
|
118 |
.unwrap()
|
119 |
.inner_html()
|
120 |
-
.trim()
|
121 |
-
.to_string(),
|
122 |
result
|
123 |
.select(&result_url)
|
124 |
.next()
|
125 |
.unwrap()
|
126 |
.value()
|
127 |
.attr("href")
|
128 |
-
.unwrap()
|
129 |
-
.to_string(),
|
130 |
result
|
131 |
.select(&result_desc)
|
132 |
.next()
|
133 |
.unwrap()
|
134 |
.inner_html()
|
135 |
-
.trim()
|
136 |
-
|
137 |
-
vec!["searx".to_string()],
|
138 |
)
|
139 |
})
|
140 |
.map(|search_result| (search_result.url.clone(), search_result))
|
|
|
2 |
//! by querying the upstream searx search engine instance with user provided query and with a page
|
3 |
//! number if provided.
|
4 |
|
5 |
+
use reqwest::header::HeaderMap;
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
9 |
use crate::results::aggregation_models::SearchResult;
|
10 |
|
11 |
use super::engine_models::{EngineError, SearchEngine};
|
12 |
+
use error_stack::{Report, Result, ResultExt};
|
13 |
|
14 |
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
15 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
|
|
38 |
|
39 |
async fn results(
|
40 |
&self,
|
41 |
+
query: &str,
|
42 |
page: u32,
|
43 |
+
user_agent: &str,
|
44 |
request_timeout: u8,
|
45 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
46 |
// Page number can be missing or empty string and so appropriate handling is required
|
|
|
51 |
};
|
52 |
|
53 |
// initializing headers and adding appropriate headers.
|
54 |
+
let header_map = HeaderMap::try_from(&HashMap::from([
|
55 |
+
("USER_AGENT".to_string(), user_agent.to_string()),
|
56 |
+
("REFERER".to_string(), "https://google.com/".to_string()),
|
57 |
+
("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
|
58 |
+
("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
|
59 |
+
]))
|
60 |
+
.change_context(EngineError::UnexpectedError)?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
let document: Html = Html::parse_document(
|
63 |
+
&Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
64 |
);
|
65 |
|
66 |
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
|
|
101 |
.next()
|
102 |
.unwrap()
|
103 |
.inner_html()
|
104 |
+
.trim(),
|
|
|
105 |
result
|
106 |
.select(&result_url)
|
107 |
.next()
|
108 |
.unwrap()
|
109 |
.value()
|
110 |
.attr("href")
|
111 |
+
.unwrap(),
|
|
|
112 |
result
|
113 |
.select(&result_desc)
|
114 |
.next()
|
115 |
.unwrap()
|
116 |
.inner_html()
|
117 |
+
.trim(),
|
118 |
+
&["searx"],
|
|
|
119 |
)
|
120 |
})
|
121 |
.map(|search_result| (search_result.url.clone(), search_result))
|
src/results/aggregation_models.rs
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
//! data scraped from the upstream search engines.
|
3 |
|
4 |
use serde::{Deserialize, Serialize};
|
|
|
5 |
|
6 |
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
7 |
|
@@ -16,13 +17,13 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
|
16 |
/// (href url in html in simple words).
|
17 |
/// * `description` - The description of the search result.
|
18 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
19 |
-
#[derive(Clone, Serialize, Deserialize)]
|
20 |
#[serde(rename_all = "camelCase")]
|
21 |
pub struct SearchResult {
|
22 |
pub title: String,
|
23 |
pub url: String,
|
24 |
pub description: String,
|
25 |
-
pub engine:
|
26 |
}
|
27 |
|
28 |
impl SearchResult {
|
@@ -35,12 +36,12 @@ impl SearchResult {
|
|
35 |
/// (href url in html in simple words).
|
36 |
/// * `description` - The description of the search result.
|
37 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
38 |
-
pub fn new(title:
|
39 |
SearchResult {
|
40 |
-
title,
|
41 |
-
url,
|
42 |
-
description,
|
43 |
-
engine,
|
44 |
}
|
45 |
}
|
46 |
|
@@ -49,8 +50,8 @@ impl SearchResult {
|
|
49 |
/// # Arguments
|
50 |
///
|
51 |
/// * `engine` - Takes an engine name provided as a String.
|
52 |
-
pub fn add_engines(&mut self, engine:
|
53 |
-
self.engine.push(engine)
|
54 |
}
|
55 |
|
56 |
/// A function which returns the engine name stored from the struct as a string.
|
@@ -58,13 +59,12 @@ impl SearchResult {
|
|
58 |
/// # Returns
|
59 |
///
|
60 |
/// An engine name stored as a string from the struct.
|
61 |
-
pub fn engine(self) -> String {
|
62 |
-
self.engine
|
63 |
}
|
64 |
}
|
65 |
|
66 |
-
|
67 |
-
#[derive(Serialize, Deserialize)]
|
68 |
pub struct EngineErrorInfo {
|
69 |
pub error: String,
|
70 |
pub engine: String,
|
@@ -72,18 +72,18 @@ pub struct EngineErrorInfo {
|
|
72 |
}
|
73 |
|
74 |
impl EngineErrorInfo {
|
75 |
-
pub fn new(error: &EngineError, engine:
|
76 |
Self {
|
77 |
error: match error {
|
78 |
-
EngineError::RequestError =>
|
79 |
-
EngineError::EmptyResultSet =>
|
80 |
-
EngineError::UnexpectedError =>
|
81 |
},
|
82 |
-
engine,
|
83 |
severity_color: match error {
|
84 |
-
EngineError::RequestError =>
|
85 |
-
EngineError::EmptyResultSet =>
|
86 |
-
EngineError::UnexpectedError =>
|
87 |
},
|
88 |
}
|
89 |
}
|
@@ -108,7 +108,7 @@ pub struct SearchResults {
|
|
108 |
pub results: Vec<SearchResult>,
|
109 |
pub page_query: String,
|
110 |
pub style: Style,
|
111 |
-
pub engine_errors_info:
|
112 |
}
|
113 |
|
114 |
impl SearchResults {
|
@@ -124,19 +124,19 @@ impl SearchResults {
|
|
124 |
/// given search query.
|
125 |
pub fn new(
|
126 |
results: Vec<SearchResult>,
|
127 |
-
page_query:
|
128 |
-
engine_errors_info:
|
129 |
) -> Self {
|
130 |
-
|
131 |
results,
|
132 |
-
page_query,
|
133 |
-
style: Style::
|
134 |
-
engine_errors_info,
|
135 |
}
|
136 |
}
|
137 |
|
138 |
/// A setter function to add website style to the return search results.
|
139 |
-
pub fn add_style(&mut self, style: Style) {
|
140 |
-
self.style = style;
|
141 |
}
|
142 |
}
|
|
|
2 |
//! data scraped from the upstream search engines.
|
3 |
|
4 |
use serde::{Deserialize, Serialize};
|
5 |
+
use smallvec::SmallVec;
|
6 |
|
7 |
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
8 |
|
|
|
17 |
/// (href url in html in simple words).
|
18 |
/// * `description` - The description of the search result.
|
19 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
20 |
+
#[derive(Clone, Serialize, Deserialize, Debug)]
|
21 |
#[serde(rename_all = "camelCase")]
|
22 |
pub struct SearchResult {
|
23 |
pub title: String,
|
24 |
pub url: String,
|
25 |
pub description: String,
|
26 |
+
pub engine: SmallVec<[String; 0]>,
|
27 |
}
|
28 |
|
29 |
impl SearchResult {
|
|
|
36 |
/// (href url in html in simple words).
|
37 |
/// * `description` - The description of the search result.
|
38 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
39 |
+
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
40 |
SearchResult {
|
41 |
+
title: title.to_owned(),
|
42 |
+
url: url.to_owned(),
|
43 |
+
description: description.to_owned(),
|
44 |
+
engine: engine.iter().map(|name| name.to_string()).collect(),
|
45 |
}
|
46 |
}
|
47 |
|
|
|
50 |
/// # Arguments
|
51 |
///
|
52 |
/// * `engine` - Takes an engine name provided as a String.
|
53 |
+
pub fn add_engines(&mut self, engine: &str) {
|
54 |
+
self.engine.push(engine.to_owned())
|
55 |
}
|
56 |
|
57 |
/// A function which returns the engine name stored from the struct as a string.
|
|
|
59 |
/// # Returns
|
60 |
///
|
61 |
/// An engine name stored as a string from the struct.
|
62 |
+
pub fn engine(&mut self) -> String {
|
63 |
+
std::mem::take(&mut self.engine[0])
|
64 |
}
|
65 |
}
|
66 |
|
67 |
+
#[derive(Serialize, Deserialize, Clone)]
|
|
|
68 |
pub struct EngineErrorInfo {
|
69 |
pub error: String,
|
70 |
pub engine: String,
|
|
|
72 |
}
|
73 |
|
74 |
impl EngineErrorInfo {
|
75 |
+
pub fn new(error: &EngineError, engine: &str) -> Self {
|
76 |
Self {
|
77 |
error: match error {
|
78 |
+
EngineError::RequestError => "RequestError".to_owned(),
|
79 |
+
EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
|
80 |
+
EngineError::UnexpectedError => "UnexpectedError".to_owned(),
|
81 |
},
|
82 |
+
engine: engine.to_owned(),
|
83 |
severity_color: match error {
|
84 |
+
EngineError::RequestError => "green".to_owned(),
|
85 |
+
EngineError::EmptyResultSet => "blue".to_owned(),
|
86 |
+
EngineError::UnexpectedError => "red".to_owned(),
|
87 |
},
|
88 |
}
|
89 |
}
|
|
|
108 |
pub results: Vec<SearchResult>,
|
109 |
pub page_query: String,
|
110 |
pub style: Style,
|
111 |
+
pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
|
112 |
}
|
113 |
|
114 |
impl SearchResults {
|
|
|
124 |
/// given search query.
|
125 |
pub fn new(
|
126 |
results: Vec<SearchResult>,
|
127 |
+
page_query: &str,
|
128 |
+
engine_errors_info: &[EngineErrorInfo],
|
129 |
) -> Self {
|
130 |
+
Self {
|
131 |
results,
|
132 |
+
page_query: page_query.to_owned(),
|
133 |
+
style: Style::default(),
|
134 |
+
engine_errors_info: SmallVec::from(engine_errors_info),
|
135 |
}
|
136 |
}
|
137 |
|
138 |
/// A setter function to add website style to the return search results.
|
139 |
+
pub fn add_style(&mut self, style: &Style) {
|
140 |
+
self.style = style.to_owned();
|
141 |
}
|
142 |
}
|
src/results/aggregator.rs
CHANGED
@@ -64,11 +64,11 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|
64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
65 |
/// containing appropriate values.
|
66 |
pub async fn aggregate(
|
67 |
-
query:
|
68 |
page: u32,
|
69 |
random_delay: bool,
|
70 |
debug: bool,
|
71 |
-
upstream_search_engines:
|
72 |
request_timeout: u8,
|
73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
74 |
let user_agent: &str = random_user_agent();
|
@@ -80,18 +80,18 @@ pub async fn aggregate(
|
|
80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
81 |
}
|
82 |
|
83 |
-
let mut names: Vec<&str> =
|
84 |
|
85 |
// create tasks for upstream result fetching
|
86 |
let mut tasks: FutureVec = FutureVec::new();
|
87 |
|
88 |
for engine_handler in upstream_search_engines {
|
89 |
-
let (name, search_engine) = engine_handler.into_name_engine();
|
90 |
names.push(name);
|
91 |
-
let query: String = query.
|
92 |
tasks.push(tokio::spawn(async move {
|
93 |
search_engine
|
94 |
-
.results(query, page, user_agent
|
95 |
.await
|
96 |
}));
|
97 |
}
|
@@ -109,7 +109,7 @@ pub async fn aggregate(
|
|
109 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
110 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
111 |
|
112 |
-
let mut handle_error = |error: Report<EngineError>, engine_name:
|
113 |
log::error!("Engine Error: {:?}", error);
|
114 |
engine_errors_info.push(EngineErrorInfo::new(
|
115 |
error.downcast_ref::<EngineError>().unwrap(),
|
@@ -119,7 +119,7 @@ pub async fn aggregate(
|
|
119 |
|
120 |
for _ in 0..responses.len() {
|
121 |
let response = responses.pop().unwrap();
|
122 |
-
let engine = names.pop().unwrap()
|
123 |
|
124 |
if result_map.is_empty() {
|
125 |
match response {
|
@@ -127,7 +127,7 @@ pub async fn aggregate(
|
|
127 |
result_map = results.clone();
|
128 |
}
|
129 |
Err(error) => {
|
130 |
-
handle_error(error, engine);
|
131 |
}
|
132 |
}
|
133 |
continue;
|
@@ -139,13 +139,13 @@ pub async fn aggregate(
|
|
139 |
result_map
|
140 |
.entry(key)
|
141 |
.and_modify(|result| {
|
142 |
-
result.add_engines(engine
|
143 |
})
|
144 |
.or_insert_with(|| -> SearchResult { value });
|
145 |
});
|
146 |
}
|
147 |
Err(error) => {
|
148 |
-
handle_error(error, engine);
|
149 |
}
|
150 |
}
|
151 |
}
|
@@ -167,11 +167,7 @@ pub async fn aggregate(
|
|
167 |
|
168 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
169 |
|
170 |
-
Ok(SearchResults::new(
|
171 |
-
results,
|
172 |
-
query.to_string(),
|
173 |
-
engine_errors_info,
|
174 |
-
))
|
175 |
}
|
176 |
|
177 |
/// Filters a map of search results using a list of regex patterns.
|
@@ -202,7 +198,10 @@ pub fn filter_with_lists(
|
|
202 |
|| re.is_match(&search_result.description.to_lowercase())
|
203 |
{
|
204 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
205 |
-
resultant_map.insert(
|
|
|
|
|
|
|
206 |
}
|
207 |
}
|
208 |
}
|
@@ -213,6 +212,7 @@ pub fn filter_with_lists(
|
|
213 |
#[cfg(test)]
|
214 |
mod tests {
|
215 |
use super::*;
|
|
|
216 |
use std::collections::HashMap;
|
217 |
use std::io::Write;
|
218 |
use tempfile::NamedTempFile;
|
@@ -222,22 +222,22 @@ mod tests {
|
|
222 |
// Create a map of search results to filter
|
223 |
let mut map_to_be_filtered = HashMap::new();
|
224 |
map_to_be_filtered.insert(
|
225 |
-
"https://www.example.com".
|
226 |
SearchResult {
|
227 |
-
title: "Example Domain".
|
228 |
-
url: "https://www.example.com".
|
229 |
description: "This domain is for use in illustrative examples in documents."
|
230 |
-
.
|
231 |
-
engine:
|
232 |
},
|
233 |
);
|
234 |
map_to_be_filtered.insert(
|
235 |
-
"https://www.rust-lang.org/".
|
236 |
SearchResult {
|
237 |
-
title: "Rust Programming Language".
|
238 |
-
url: "https://www.rust-lang.org/".
|
239 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
240 |
-
engine:
|
241 |
},
|
242 |
);
|
243 |
|
@@ -266,22 +266,22 @@ mod tests {
|
|
266 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
267 |
let mut map_to_be_filtered = HashMap::new();
|
268 |
map_to_be_filtered.insert(
|
269 |
-
"https://www.example.com".
|
270 |
SearchResult {
|
271 |
-
title: "Example Domain".
|
272 |
-
url: "https://www.example.com".
|
273 |
description: "This domain is for use in illustrative examples in documents."
|
274 |
-
.
|
275 |
-
engine:
|
276 |
},
|
277 |
);
|
278 |
map_to_be_filtered.insert(
|
279 |
-
"https://www.rust-lang.org/".
|
280 |
SearchResult {
|
281 |
-
title: "Rust Programming Language".
|
282 |
-
url: "https://www.rust-lang.org/".
|
283 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
284 |
-
engine:
|
285 |
},
|
286 |
);
|
287 |
|
@@ -326,13 +326,13 @@ mod tests {
|
|
326 |
fn test_filter_with_lists_invalid_regex() {
|
327 |
let mut map_to_be_filtered = HashMap::new();
|
328 |
map_to_be_filtered.insert(
|
329 |
-
"https://www.example.com".
|
330 |
SearchResult {
|
331 |
-
title: "Example Domain".
|
332 |
-
url: "https://www.example.com".
|
333 |
description: "This domain is for use in illustrative examples in documents."
|
334 |
-
.
|
335 |
-
engine:
|
336 |
},
|
337 |
);
|
338 |
|
|
|
64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
65 |
/// containing appropriate values.
|
66 |
pub async fn aggregate(
|
67 |
+
query: &str,
|
68 |
page: u32,
|
69 |
random_delay: bool,
|
70 |
debug: bool,
|
71 |
+
upstream_search_engines: &[EngineHandler],
|
72 |
request_timeout: u8,
|
73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
74 |
let user_agent: &str = random_user_agent();
|
|
|
80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
81 |
}
|
82 |
|
83 |
+
let mut names: Vec<&str> = Vec::with_capacity(0);
|
84 |
|
85 |
// create tasks for upstream result fetching
|
86 |
let mut tasks: FutureVec = FutureVec::new();
|
87 |
|
88 |
for engine_handler in upstream_search_engines {
|
89 |
+
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
90 |
names.push(name);
|
91 |
+
let query: String = query.to_owned();
|
92 |
tasks.push(tokio::spawn(async move {
|
93 |
search_engine
|
94 |
+
.results(&query, page, user_agent, request_timeout)
|
95 |
.await
|
96 |
}));
|
97 |
}
|
|
|
109 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
110 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
111 |
|
112 |
+
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
113 |
log::error!("Engine Error: {:?}", error);
|
114 |
engine_errors_info.push(EngineErrorInfo::new(
|
115 |
error.downcast_ref::<EngineError>().unwrap(),
|
|
|
119 |
|
120 |
for _ in 0..responses.len() {
|
121 |
let response = responses.pop().unwrap();
|
122 |
+
let engine = names.pop().unwrap();
|
123 |
|
124 |
if result_map.is_empty() {
|
125 |
match response {
|
|
|
127 |
result_map = results.clone();
|
128 |
}
|
129 |
Err(error) => {
|
130 |
+
handle_error(&error, engine);
|
131 |
}
|
132 |
}
|
133 |
continue;
|
|
|
139 |
result_map
|
140 |
.entry(key)
|
141 |
.and_modify(|result| {
|
142 |
+
result.add_engines(engine);
|
143 |
})
|
144 |
.or_insert_with(|| -> SearchResult { value });
|
145 |
});
|
146 |
}
|
147 |
Err(error) => {
|
148 |
+
handle_error(&error, engine);
|
149 |
}
|
150 |
}
|
151 |
}
|
|
|
167 |
|
168 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
169 |
|
170 |
+
Ok(SearchResults::new(results, query, &engine_errors_info))
|
|
|
|
|
|
|
|
|
171 |
}
|
172 |
|
173 |
/// Filters a map of search results using a list of regex patterns.
|
|
|
198 |
|| re.is_match(&search_result.description.to_lowercase())
|
199 |
{
|
200 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
201 |
+
resultant_map.insert(
|
202 |
+
url.to_owned(),
|
203 |
+
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
204 |
+
);
|
205 |
}
|
206 |
}
|
207 |
}
|
|
|
212 |
#[cfg(test)]
|
213 |
mod tests {
|
214 |
use super::*;
|
215 |
+
use smallvec::smallvec;
|
216 |
use std::collections::HashMap;
|
217 |
use std::io::Write;
|
218 |
use tempfile::NamedTempFile;
|
|
|
222 |
// Create a map of search results to filter
|
223 |
let mut map_to_be_filtered = HashMap::new();
|
224 |
map_to_be_filtered.insert(
|
225 |
+
"https://www.example.com".to_owned(),
|
226 |
SearchResult {
|
227 |
+
title: "Example Domain".to_owned(),
|
228 |
+
url: "https://www.example.com".to_owned(),
|
229 |
description: "This domain is for use in illustrative examples in documents."
|
230 |
+
.to_owned(),
|
231 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
232 |
},
|
233 |
);
|
234 |
map_to_be_filtered.insert(
|
235 |
+
"https://www.rust-lang.org/".to_owned(),
|
236 |
SearchResult {
|
237 |
+
title: "Rust Programming Language".to_owned(),
|
238 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
239 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
240 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
241 |
},
|
242 |
);
|
243 |
|
|
|
266 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
267 |
let mut map_to_be_filtered = HashMap::new();
|
268 |
map_to_be_filtered.insert(
|
269 |
+
"https://www.example.com".to_owned(),
|
270 |
SearchResult {
|
271 |
+
title: "Example Domain".to_owned(),
|
272 |
+
url: "https://www.example.com".to_owned(),
|
273 |
description: "This domain is for use in illustrative examples in documents."
|
274 |
+
.to_owned(),
|
275 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
276 |
},
|
277 |
);
|
278 |
map_to_be_filtered.insert(
|
279 |
+
"https://www.rust-lang.org/".to_owned(),
|
280 |
SearchResult {
|
281 |
+
title: "Rust Programming Language".to_owned(),
|
282 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
283 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
284 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
285 |
},
|
286 |
);
|
287 |
|
|
|
326 |
fn test_filter_with_lists_invalid_regex() {
|
327 |
let mut map_to_be_filtered = HashMap::new();
|
328 |
map_to_be_filtered.insert(
|
329 |
+
"https://www.example.com".to_owned(),
|
330 |
SearchResult {
|
331 |
+
title: "Example Domain".to_owned(),
|
332 |
+
url: "https://www.example.com".to_owned(),
|
333 |
description: "This domain is for use in illustrative examples in documents."
|
334 |
+
.to_owned(),
|
335 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
336 |
},
|
337 |
);
|
338 |
|
src/server/routes.rs
CHANGED
@@ -62,10 +62,10 @@ pub async fn not_found(
|
|
62 |
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
63 |
#[allow(dead_code)]
|
64 |
#[derive(Deserialize)]
|
65 |
-
struct Cookie {
|
66 |
-
theme:
|
67 |
-
colorscheme:
|
68 |
-
engines: Vec
|
69 |
}
|
70 |
|
71 |
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
@@ -111,9 +111,9 @@ pub async fn search(
|
|
111 |
page - 1
|
112 |
),
|
113 |
&config,
|
114 |
-
query
|
115 |
page - 1,
|
116 |
-
req
|
117 |
),
|
118 |
results(
|
119 |
format!(
|
@@ -121,9 +121,9 @@ pub async fn search(
|
|
121 |
config.binding_ip, config.port, query, page
|
122 |
),
|
123 |
&config,
|
124 |
-
query
|
125 |
page,
|
126 |
-
req
|
127 |
),
|
128 |
results(
|
129 |
format!(
|
@@ -134,9 +134,9 @@ pub async fn search(
|
|
134 |
page + 1
|
135 |
),
|
136 |
&config,
|
137 |
-
query
|
138 |
page + 1,
|
139 |
-
req
|
140 |
)
|
141 |
);
|
142 |
|
@@ -154,9 +154,9 @@ pub async fn search(
|
|
154 |
async fn results(
|
155 |
url: String,
|
156 |
config: &Config,
|
157 |
-
query:
|
158 |
page: u32,
|
159 |
-
req: HttpRequest,
|
160 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
161 |
//Initialize redis cache connection struct
|
162 |
let mut redis_cache = RedisCache::new(&config.redis_url, 5).await?;
|
@@ -165,19 +165,17 @@ async fn results(
|
|
165 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
166 |
// handle the data accordingly.
|
167 |
match cached_results_json {
|
168 |
-
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)
|
169 |
Err(_) => {
|
170 |
// check if the cookie value is empty or not if it is empty then use the
|
171 |
// default selected upstream search engines from the config file otherwise
|
172 |
// parse the non-empty cookie and grab the user selected engines from the
|
173 |
// UI and use that.
|
174 |
-
let mut results:
|
175 |
-
.cookie("appCookie")
|
176 |
-
{
|
177 |
Some(cookie_value) => {
|
178 |
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
179 |
|
180 |
-
let engines = cookie_value
|
181 |
.engines
|
182 |
.iter()
|
183 |
.filter_map(|name| EngineHandler::new(name))
|
@@ -188,7 +186,7 @@ async fn results(
|
|
188 |
page,
|
189 |
config.aggregator.random_delay,
|
190 |
config.debug,
|
191 |
-
engines,
|
192 |
config.request_timeout,
|
193 |
)
|
194 |
.await?
|
@@ -199,13 +197,14 @@ async fn results(
|
|
199 |
page,
|
200 |
config.aggregator.random_delay,
|
201 |
config.debug,
|
202 |
-
config.upstream_search_engines
|
203 |
config.request_timeout,
|
204 |
)
|
205 |
.await?
|
206 |
}
|
207 |
};
|
208 |
-
|
|
|
209 |
redis_cache
|
210 |
.cache_results(&serde_json::to_string(&results)?, &url)
|
211 |
.await?;
|
|
|
62 |
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
63 |
#[allow(dead_code)]
|
64 |
#[derive(Deserialize)]
|
65 |
+
struct Cookie<'a> {
|
66 |
+
theme: &'a str,
|
67 |
+
colorscheme: &'a str,
|
68 |
+
engines: Vec<&'a str>,
|
69 |
}
|
70 |
|
71 |
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
|
|
111 |
page - 1
|
112 |
),
|
113 |
&config,
|
114 |
+
query,
|
115 |
page - 1,
|
116 |
+
&req,
|
117 |
),
|
118 |
results(
|
119 |
format!(
|
|
|
121 |
config.binding_ip, config.port, query, page
|
122 |
),
|
123 |
&config,
|
124 |
+
query,
|
125 |
page,
|
126 |
+
&req,
|
127 |
),
|
128 |
results(
|
129 |
format!(
|
|
|
134 |
page + 1
|
135 |
),
|
136 |
&config,
|
137 |
+
query,
|
138 |
page + 1,
|
139 |
+
&req,
|
140 |
)
|
141 |
);
|
142 |
|
|
|
154 |
async fn results(
|
155 |
url: String,
|
156 |
config: &Config,
|
157 |
+
query: &str,
|
158 |
page: u32,
|
159 |
+
req: &HttpRequest,
|
160 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
161 |
//Initialize redis cache connection struct
|
162 |
let mut redis_cache = RedisCache::new(&config.redis_url, 5).await?;
|
|
|
165 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
166 |
// handle the data accordingly.
|
167 |
match cached_results_json {
|
168 |
+
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
169 |
Err(_) => {
|
170 |
// check if the cookie value is empty or not if it is empty then use the
|
171 |
// default selected upstream search engines from the config file otherwise
|
172 |
// parse the non-empty cookie and grab the user selected engines from the
|
173 |
// UI and use that.
|
174 |
+
let mut results: SearchResults = match req.cookie("appCookie") {
|
|
|
|
|
175 |
Some(cookie_value) => {
|
176 |
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
177 |
|
178 |
+
let engines: Vec<EngineHandler> = cookie_value
|
179 |
.engines
|
180 |
.iter()
|
181 |
.filter_map(|name| EngineHandler::new(name))
|
|
|
186 |
page,
|
187 |
config.aggregator.random_delay,
|
188 |
config.debug,
|
189 |
+
&engines,
|
190 |
config.request_timeout,
|
191 |
)
|
192 |
.await?
|
|
|
197 |
page,
|
198 |
config.aggregator.random_delay,
|
199 |
config.debug,
|
200 |
+
&config.upstream_search_engines,
|
201 |
config.request_timeout,
|
202 |
)
|
203 |
.await?
|
204 |
}
|
205 |
};
|
206 |
+
|
207 |
+
results.add_style(&config.style);
|
208 |
redis_cache
|
209 |
.cache_results(&serde_json::to_string(&results)?, &url)
|
210 |
.await?;
|