Spaces:
Runtime error
Runtime error
neon_arch
commited on
Commit
•
049b1c1
1
Parent(s):
0d2d449
⚙️ refactor: change & add documentation to the code based on the lints (#205)
Browse files- src/cache/cacher.rs +1 -4
- src/cache/mod.rs +3 -0
- src/config/mod.rs +3 -0
- src/config/parser.rs +18 -19
- src/config/parser_models.rs +3 -6
- src/engines/duckduckgo.rs +1 -19
- src/engines/engine_models.rs +58 -10
- src/engines/mod.rs +5 -0
- src/engines/searx.rs +0 -19
- src/handler/mod.rs +4 -0
- src/handler/paths.rs +25 -21
- src/results/aggregation_models.rs +22 -18
- src/results/mod.rs +4 -0
- src/results/user_agent.rs +2 -0
- src/server/mod.rs +5 -0
- src/server/routes.rs +23 -16
src/cache/cacher.rs
CHANGED
@@ -6,11 +6,8 @@ use redis::{Client, Commands, Connection};
|
|
6 |
|
7 |
/// A named struct which stores the redis Connection url address to which the client will
|
8 |
/// connect to.
|
9 |
-
///
|
10 |
-
/// # Fields
|
11 |
-
///
|
12 |
-
/// * `redis_connection_url` - It stores the redis Connection url address.
|
13 |
pub struct RedisCache {
|
|
|
14 |
connection: Connection,
|
15 |
}
|
16 |
|
|
|
6 |
|
7 |
/// A named struct which stores the redis Connection url address to which the client will
|
8 |
/// connect to.
|
|
|
|
|
|
|
|
|
9 |
pub struct RedisCache {
|
10 |
+
/// It stores the redis Connection url address.
|
11 |
connection: Connection,
|
12 |
}
|
13 |
|
src/cache/mod.rs
CHANGED
@@ -1 +1,4 @@
|
|
|
|
|
|
|
|
1 |
pub mod cacher;
|
|
|
1 |
+
//! This module provides the modules which provide the functionality to cache the aggregated
|
2 |
+
//! results fetched and aggregated from the upstream search engines in a json format.
|
3 |
+
|
4 |
pub mod cacher;
|
src/config/mod.rs
CHANGED
@@ -1,2 +1,5 @@
|
|
|
|
|
|
|
|
1 |
pub mod parser;
|
2 |
pub mod parser_models;
|
|
|
1 |
+
//! This module provides the modules which handles the functionality to parse the lua config
|
2 |
+
//! and convert the config options into rust readable form.
|
3 |
+
|
4 |
pub mod parser;
|
5 |
pub mod parser_models;
|
src/config/parser.rs
CHANGED
@@ -9,42 +9,36 @@ use rlua::Lua;
|
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
12 |
-
///
|
13 |
-
/// # Fields
|
14 |
-
//
|
15 |
-
/// * `port` - It stores the parsed port number option on which the server should launch.
|
16 |
-
/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
|
17 |
-
/// * `style` - It stores the theming options for the website.
|
18 |
-
/// * `redis_url` - It stores the redis connection url address on which the redis
|
19 |
-
/// client should connect.
|
20 |
-
/// * `aggregator` - It stores the option to whether enable or disable production use.
|
21 |
-
/// * `logging` - It stores the option to whether enable or disable logs.
|
22 |
-
/// * `debug` - It stores the option to whether enable or disable debug mode.
|
23 |
-
/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
|
24 |
-
/// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
|
25 |
-
/// * `threads` - It stores the number of threads which controls the app will use to run.
|
26 |
#[derive(Clone)]
|
27 |
pub struct Config {
|
|
|
28 |
pub port: u16,
|
|
|
29 |
pub binding_ip: String,
|
|
|
30 |
pub style: Style,
|
|
|
|
|
31 |
pub redis_url: String,
|
|
|
32 |
pub aggregator: AggregatorConfig,
|
|
|
33 |
pub logging: bool,
|
|
|
34 |
pub debug: bool,
|
|
|
35 |
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
|
|
|
36 |
pub request_timeout: u8,
|
|
|
37 |
pub threads: u8,
|
38 |
}
|
39 |
|
40 |
/// Configuration options for the aggregator.
|
41 |
-
///
|
42 |
-
/// # Fields
|
43 |
-
///
|
44 |
-
/// * `random_delay` - It stores the option to whether enable or disable random delays between
|
45 |
-
/// requests.
|
46 |
#[derive(Clone)]
|
47 |
pub struct AggregatorConfig {
|
|
|
|
|
48 |
pub random_delay: bool,
|
49 |
}
|
50 |
|
@@ -115,6 +109,11 @@ impl Config {
|
|
115 |
}
|
116 |
|
117 |
/// a helper function that sets the proper logging level
|
|
|
|
|
|
|
|
|
|
|
118 |
fn set_logging_level(debug: bool, logging: bool) {
|
119 |
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
|
120 |
if pkg_env_var.to_lowercase() == "dev" {
|
|
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
#[derive(Clone)]
|
13 |
pub struct Config {
|
14 |
+
/// It stores the parsed port number option on which the server should launch.
|
15 |
pub port: u16,
|
16 |
+
/// It stores the parsed ip address option on which the server should launch
|
17 |
pub binding_ip: String,
|
18 |
+
/// It stores the theming options for the website.
|
19 |
pub style: Style,
|
20 |
+
/// It stores the redis connection url address on which the redis
|
21 |
+
/// client should connect.
|
22 |
pub redis_url: String,
|
23 |
+
/// It stores the option to whether enable or disable production use.
|
24 |
pub aggregator: AggregatorConfig,
|
25 |
+
/// It stores the option to whether enable or disable logs.
|
26 |
pub logging: bool,
|
27 |
+
/// It stores the option to whether enable or disable debug mode.
|
28 |
pub debug: bool,
|
29 |
+
/// It stores all the engine names that were enabled by the user.
|
30 |
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
|
31 |
+
/// It stores the time (secs) which controls the server request timeout.
|
32 |
pub request_timeout: u8,
|
33 |
+
/// It stores the number of threads which controls the app will use to run.
|
34 |
pub threads: u8,
|
35 |
}
|
36 |
|
37 |
/// Configuration options for the aggregator.
|
|
|
|
|
|
|
|
|
|
|
38 |
#[derive(Clone)]
|
39 |
pub struct AggregatorConfig {
|
40 |
+
/// It stores the option to whether enable or disable random delays between
|
41 |
+
/// requests.
|
42 |
pub random_delay: bool,
|
43 |
}
|
44 |
|
|
|
109 |
}
|
110 |
|
111 |
/// a helper function that sets the proper logging level
|
112 |
+
///
|
113 |
+
/// # Arguments
|
114 |
+
///
|
115 |
+
/// * `debug` - It takes the option to whether enable or disable debug mode.
|
116 |
+
/// * `logging` - It takes the option to whether enable or disable logs.
|
117 |
fn set_logging_level(debug: bool, logging: bool) {
|
118 |
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
|
119 |
if pkg_env_var.to_lowercase() == "dev" {
|
src/config/parser_models.rs
CHANGED
@@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
|
|
12 |
/// order to allow the deserializing the json back to struct in aggregate function in
|
13 |
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
14 |
/// it to the template files.
|
15 |
-
///
|
16 |
-
/// # Fields
|
17 |
-
//
|
18 |
-
/// * `theme` - It stores the parsed theme option used to set a theme for the website.
|
19 |
-
/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
|
20 |
-
/// theme being used.
|
21 |
#[derive(Serialize, Deserialize, Clone)]
|
22 |
pub struct Style {
|
|
|
23 |
pub theme: String,
|
|
|
|
|
24 |
pub colorscheme: String,
|
25 |
}
|
26 |
|
|
|
12 |
/// order to allow the deserializing the json back to struct in aggregate function in
|
13 |
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
14 |
/// it to the template files.
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
#[derive(Serialize, Deserialize, Clone)]
|
16 |
pub struct Style {
|
17 |
+
/// It stores the parsed theme option used to set a theme for the website.
|
18 |
pub theme: String,
|
19 |
+
/// It stores the parsed colorscheme option used to set a colorscheme for the
|
20 |
+
/// theme being used.
|
21 |
pub colorscheme: String,
|
22 |
}
|
23 |
|
src/engines/duckduckgo.rs
CHANGED
@@ -19,25 +19,7 @@ pub struct DuckDuckGo;
|
|
19 |
|
20 |
#[async_trait::async_trait]
|
21 |
impl SearchEngine for DuckDuckGo {
|
22 |
-
|
23 |
-
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
24 |
-
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
25 |
-
/// values are RawSearchResult struct and then returns it within a Result enum.
|
26 |
-
///
|
27 |
-
/// # Arguments
|
28 |
-
///
|
29 |
-
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
30 |
-
/// * `page` - Takes an u32 as an argument.
|
31 |
-
/// * `user_agent` - Takes a random user agent string as an argument.
|
32 |
-
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
33 |
-
///
|
34 |
-
/// # Errors
|
35 |
-
///
|
36 |
-
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
37 |
-
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
38 |
-
/// provide results for the requested search query and also returns error if the scraping selector
|
39 |
-
/// or HeaderMap fails to initialize.
|
40 |
-
async fn results(
|
41 |
&self,
|
42 |
query: String,
|
43 |
page: u32,
|
|
|
19 |
|
20 |
#[async_trait::async_trait]
|
21 |
impl SearchEngine for DuckDuckGo {
|
22 |
+
async fn results(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
&self,
|
24 |
query: String,
|
25 |
page: u32,
|
src/engines/engine_models.rs
CHANGED
@@ -6,19 +6,18 @@ use error_stack::{IntoReport, Result, ResultExt};
|
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
9 |
-
///
|
10 |
-
/// This enum provides variants three different categories of errors:
|
11 |
-
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
|
12 |
-
/// etc.
|
13 |
-
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
|
14 |
-
/// search engines.
|
15 |
-
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
16 |
-
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
17 |
-
/// all other errors occurring within the code handling the `upstream search engines`.
|
18 |
#[derive(Debug)]
|
19 |
pub enum EngineError {
|
|
|
|
|
20 |
EmptyResultSet,
|
|
|
|
|
21 |
RequestError,
|
|
|
|
|
|
|
|
|
22 |
UnexpectedError,
|
23 |
}
|
24 |
|
@@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
|
|
46 |
/// A trait to define common behavior for all search engines.
|
47 |
#[async_trait::async_trait]
|
48 |
pub trait SearchEngine: Sync + Send {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
async fn fetch_html_from_upstream(
|
50 |
&self,
|
51 |
url: String,
|
@@ -67,6 +83,24 @@ pub trait SearchEngine: Sync + Send {
|
|
67 |
.change_context(EngineError::RequestError)?)
|
68 |
}
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
async fn results(
|
71 |
&self,
|
72 |
query: String,
|
@@ -76,8 +110,12 @@ pub trait SearchEngine: Sync + Send {
|
|
76 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
77 |
}
|
78 |
|
|
|
79 |
pub struct EngineHandler {
|
|
|
|
|
80 |
engine: Box<dyn SearchEngine>,
|
|
|
81 |
name: &'static str,
|
82 |
}
|
83 |
|
@@ -88,7 +126,15 @@ impl Clone for EngineHandler {
|
|
88 |
}
|
89 |
|
90 |
impl EngineHandler {
|
91 |
-
///
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
pub fn new(engine_name: &str) -> Option<Self> {
|
93 |
let engine: (&'static str, Box<dyn SearchEngine>) =
|
94 |
match engine_name.to_lowercase().as_str() {
|
@@ -103,6 +149,8 @@ impl EngineHandler {
|
|
103 |
})
|
104 |
}
|
105 |
|
|
|
|
|
106 |
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
107 |
(self.name, self.engine)
|
108 |
}
|
|
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
#[derive(Debug)]
|
10 |
pub enum EngineError {
|
11 |
+
/// This variant handles all request related errors like forbidden, not found,
|
12 |
+
/// etc.
|
13 |
EmptyResultSet,
|
14 |
+
/// This variant handles the not results found error provide by the upstream
|
15 |
+
/// search engines.
|
16 |
RequestError,
|
17 |
+
/// This variant handles all the errors which are unexpected or occur rarely
|
18 |
+
/// and are errors mostly related to failure in initialization of HeaderMap,
|
19 |
+
/// Selector errors and all other errors occurring within the code handling
|
20 |
+
/// the `upstream search engines`.
|
21 |
UnexpectedError,
|
22 |
}
|
23 |
|
|
|
45 |
/// A trait to define common behavior for all search engines.
|
46 |
#[async_trait::async_trait]
|
47 |
pub trait SearchEngine: Sync + Send {
|
48 |
+
/// This helper function fetches/requests the search results from the upstream search engine in
|
49 |
+
/// an html form.
|
50 |
+
///
|
51 |
+
/// # Arguments
|
52 |
+
///
|
53 |
+
/// * `url` - It takes the url of the upstream search engine with the user requested search
|
54 |
+
/// query appended in the search parameters.
|
55 |
+
/// * `header_map` - It takes the http request headers to be sent to the upstream engine in
|
56 |
+
/// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
|
57 |
+
/// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
|
58 |
+
/// the amount of time for each request to remain connected when until the results can be provided
|
59 |
+
/// by the upstream engine.
|
60 |
+
///
|
61 |
+
/// # Error
|
62 |
+
///
|
63 |
+
/// It returns the html data as a string if the upstream engine provides the data as expected
|
64 |
+
/// otherwise it returns a custom `EngineError`.
|
65 |
async fn fetch_html_from_upstream(
|
66 |
&self,
|
67 |
url: String,
|
|
|
83 |
.change_context(EngineError::RequestError)?)
|
84 |
}
|
85 |
|
86 |
+
/// This function scrapes results from the upstream engine and puts all the scraped results like
|
87 |
+
/// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
|
88 |
+
/// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
|
89 |
+
/// struct and then returns it within a Result enum.
|
90 |
+
///
|
91 |
+
/// # Arguments
|
92 |
+
///
|
93 |
+
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
94 |
+
/// * `page` - Takes an u32 as an argument.
|
95 |
+
/// * `user_agent` - Takes a random user agent string as an argument.
|
96 |
+
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
97 |
+
///
|
98 |
+
/// # Errors
|
99 |
+
///
|
100 |
+
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
101 |
+
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
102 |
+
/// provide results for the requested search query and also returns error if the scraping selector
|
103 |
+
/// or HeaderMap fails to initialize.
|
104 |
async fn results(
|
105 |
&self,
|
106 |
query: String,
|
|
|
110 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
111 |
}
|
112 |
|
113 |
+
/// A named struct which stores the engine struct with the name of the associated engine.
|
114 |
pub struct EngineHandler {
|
115 |
+
/// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
|
116 |
+
/// the `SearchEngine` trait.
|
117 |
engine: Box<dyn SearchEngine>,
|
118 |
+
/// It stores the name of the engine to which the struct is associated to.
|
119 |
name: &'static str,
|
120 |
}
|
121 |
|
|
|
126 |
}
|
127 |
|
128 |
impl EngineHandler {
|
129 |
+
/// Parses an engine name into an engine handler.
|
130 |
+
///
|
131 |
+
/// # Arguments
|
132 |
+
///
|
133 |
+
/// * `engine_name` - It takes the name of the engine to which the struct was associated to.
|
134 |
+
///
|
135 |
+
/// # Returns
|
136 |
+
///
|
137 |
+
/// It returns an option either containing the value or a none if the engine is unknown
|
138 |
pub fn new(engine_name: &str) -> Option<Self> {
|
139 |
let engine: (&'static str, Box<dyn SearchEngine>) =
|
140 |
match engine_name.to_lowercase().as_str() {
|
|
|
149 |
})
|
150 |
}
|
151 |
|
152 |
+
/// This function converts the EngineHandler type into a tuple containing the engine name and
|
153 |
+
/// the associated engine struct.
|
154 |
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
155 |
(self.name, self.engine)
|
156 |
}
|
src/engines/mod.rs
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
pub mod duckduckgo;
|
2 |
pub mod engine_models;
|
3 |
pub mod searx;
|
|
|
1 |
+
//! This module provides different modules which handles the functionlity to fetch results from the
|
2 |
+
//! upstream search engines based on user requested queries. Also provides different models to
|
3 |
+
//! provide a standard functions to be implemented for all the upstream search engine handling
|
4 |
+
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
5 |
+
|
6 |
pub mod duckduckgo;
|
7 |
pub mod engine_models;
|
8 |
pub mod searx;
|
src/engines/searx.rs
CHANGED
@@ -17,25 +17,6 @@ pub struct Searx;
|
|
17 |
|
18 |
#[async_trait::async_trait]
|
19 |
impl SearchEngine for Searx {
|
20 |
-
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
21 |
-
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
22 |
-
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
23 |
-
/// values are RawSearchResult struct and then returns it within a Result enum.
|
24 |
-
///
|
25 |
-
/// # Arguments
|
26 |
-
///
|
27 |
-
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
28 |
-
/// * `page` - Takes an u32 as an argument.
|
29 |
-
/// * `user_agent` - Takes a random user agent string as an argument.
|
30 |
-
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
31 |
-
///
|
32 |
-
/// # Errors
|
33 |
-
///
|
34 |
-
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
35 |
-
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
36 |
-
/// provide results for the requested search query and also returns error if the scraping selector
|
37 |
-
/// or HeaderMap fails to initialize.
|
38 |
-
|
39 |
async fn results(
|
40 |
&self,
|
41 |
query: String,
|
|
|
17 |
|
18 |
#[async_trait::async_trait]
|
19 |
impl SearchEngine for Searx {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
async fn results(
|
21 |
&self,
|
22 |
query: String,
|
src/handler/mod.rs
CHANGED
@@ -1 +1,5 @@
|
|
|
|
|
|
|
|
|
|
1 |
pub mod paths;
|
|
|
1 |
+
//! This module provides modules which provide the functionality to handle paths for different
|
2 |
+
//! files present on different paths and provide one appropriate path on which it is present and
|
3 |
+
//! can be used.
|
4 |
+
|
5 |
pub mod paths;
|
src/handler/paths.rs
CHANGED
@@ -6,20 +6,31 @@ use std::io::Error;
|
|
6 |
use std::path::Path;
|
7 |
|
8 |
// ------- Constants --------
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
14 |
|
|
|
15 |
#[derive(Hash, PartialEq, Eq, Debug)]
|
16 |
pub enum FileType {
|
|
|
17 |
Config,
|
|
|
18 |
AllowList,
|
|
|
19 |
BlockList,
|
|
|
20 |
Theme,
|
21 |
}
|
22 |
|
|
|
23 |
static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
|
24 |
once_cell::sync::Lazy::new(|| {
|
25 |
HashMap::from([
|
@@ -72,26 +83,19 @@ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, V
|
|
72 |
])
|
73 |
});
|
74 |
|
75 |
-
/// A
|
76 |
-
/// file exists on that path.
|
77 |
///
|
78 |
/// # Error
|
79 |
///
|
80 |
-
/// Returns a
|
81 |
-
///
|
82 |
-
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
|
83 |
-
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
|
84 |
-
/// one (3).
|
85 |
-
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
86 |
-
/// here then it returns an error as mentioned above.
|
87 |
-
|
88 |
-
/// A function which returns an appropriate theme directory path checking if the theme
|
89 |
-
/// directory exists on that path.
|
90 |
///
|
91 |
-
/// #
|
|
|
|
|
|
|
92 |
///
|
93 |
-
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
|
94 |
-
/// paths which are:
|
95 |
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
96 |
/// 2. Under project folder ( or codebase in other words) if it is not present
|
97 |
/// here then it returns an error as mentioned above.
|
@@ -106,6 +110,6 @@ pub fn file_path(file_type: FileType) -> Result<String, Error> {
|
|
106 |
// if no of the configs above exist, return error
|
107 |
Err(Error::new(
|
108 |
std::io::ErrorKind::NotFound,
|
109 |
-
format!("{:?} file not found!!", file_type),
|
110 |
))
|
111 |
}
|
|
|
6 |
use std::path::Path;
|
7 |
|
8 |
// ------- Constants --------
|
9 |
+
/// The constant holding the name of the theme folder.
|
10 |
+
const PUBLIC_DIRECTORY_NAME: &str = "public";
|
11 |
+
/// The constant holding the name of the common folder.
|
12 |
+
const COMMON_DIRECTORY_NAME: &str = "websurfx";
|
13 |
+
/// The constant holding the name of the config file.
|
14 |
+
const CONFIG_FILE_NAME: &str = "config.lua";
|
15 |
+
/// The constant holding the name of the AllowList text file.
|
16 |
+
const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
|
17 |
+
/// The constant holding the name of the BlockList text file.
|
18 |
+
const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
|
19 |
|
20 |
+
/// An enum type which provides different variants to handle paths for various files/folders.
|
21 |
#[derive(Hash, PartialEq, Eq, Debug)]
|
22 |
pub enum FileType {
|
23 |
+
/// This variant handles all the paths associated with the config file.
|
24 |
Config,
|
25 |
+
/// This variant handles all the paths associated with the Allowlist text file.
|
26 |
AllowList,
|
27 |
+
/// This variant handles all the paths associated with the BlockList text file.
|
28 |
BlockList,
|
29 |
+
/// This variant handles all the paths associated with the public folder (Theme folder).
|
30 |
Theme,
|
31 |
}
|
32 |
|
33 |
+
/// A static variable which stores the different filesystem paths for various file/folder types.
|
34 |
static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
|
35 |
once_cell::sync::Lazy::new(|| {
|
36 |
HashMap::from([
|
|
|
83 |
])
|
84 |
});
|
85 |
|
86 |
+
/// A function which returns an appropriate path for thr provided file type by checking if the path
|
87 |
+
/// for the given file type exists on that path.
|
88 |
///
|
89 |
/// # Error
|
90 |
///
|
91 |
+
/// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
|
92 |
+
/// present on the path on which it is being tested.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
///
|
94 |
+
/// # Example
|
95 |
+
///
|
96 |
+
/// If this function is give the file_type of Theme variant then the theme folder is checked by the
|
97 |
+
/// following steps:
|
98 |
///
|
|
|
|
|
99 |
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
100 |
/// 2. Under project folder ( or codebase in other words) if it is not present
|
101 |
/// here then it returns an error as mentioned above.
|
|
|
110 |
// if no of the configs above exist, return error
|
111 |
Err(Error::new(
|
112 |
std::io::ErrorKind::NotFound,
|
113 |
+
format!("{:?} file/folder not found!!", file_type),
|
114 |
))
|
115 |
}
|
src/results/aggregation_models.rs
CHANGED
@@ -8,20 +8,17 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
|
8 |
/// A named struct to store the raw scraped search results scraped search results from the
|
9 |
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
10 |
/// to write idiomatic rust using `Iterators`.
|
11 |
-
///
|
12 |
-
/// # Fields
|
13 |
-
///
|
14 |
-
/// * `title` - The title of the search result.
|
15 |
-
/// * `url` - The url which is accessed when clicked on it
|
16 |
/// (href url in html in simple words).
|
17 |
-
/// * `description` - The description of the search result.
|
18 |
-
/// * `engine` - The names of the upstream engines from which this results were provided.
|
19 |
#[derive(Clone, Serialize, Deserialize)]
|
20 |
#[serde(rename_all = "camelCase")]
|
21 |
pub struct SearchResult {
|
|
|
22 |
pub title: String,
|
|
|
23 |
pub url: String,
|
|
|
24 |
pub description: String,
|
|
|
25 |
pub engine: Vec<String>,
|
26 |
}
|
27 |
|
@@ -63,15 +60,27 @@ impl SearchResult {
|
|
63 |
}
|
64 |
}
|
65 |
|
66 |
-
///
|
67 |
#[derive(Serialize, Deserialize)]
|
68 |
pub struct EngineErrorInfo {
|
|
|
|
|
69 |
pub error: String,
|
|
|
70 |
pub engine: String,
|
|
|
|
|
71 |
pub severity_color: String,
|
72 |
}
|
73 |
|
74 |
impl EngineErrorInfo {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
pub fn new(error: &EngineError, engine: String) -> Self {
|
76 |
Self {
|
77 |
error: match error {
|
@@ -91,23 +100,18 @@ impl EngineErrorInfo {
|
|
91 |
|
92 |
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
93 |
/// aggregated from the upstream search engines.
|
94 |
-
///
|
95 |
-
/// # Fields
|
96 |
-
///
|
97 |
-
/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
|
98 |
/// `SearchResult` structs.
|
99 |
-
/// * `page_query` - Stores the current pages search query `q` provided in the search url.
|
100 |
-
/// * `style` - Stores the theming options for the website.
|
101 |
-
/// * `engine_errors_info` - Stores the information on which engines failed with their engine name
|
102 |
-
/// and the type of error that caused it.
|
103 |
-
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
104 |
-
/// given search query.
|
105 |
#[derive(Serialize, Deserialize)]
|
106 |
#[serde(rename_all = "camelCase")]
|
107 |
pub struct SearchResults {
|
|
|
108 |
pub results: Vec<SearchResult>,
|
|
|
109 |
pub page_query: String,
|
|
|
110 |
pub style: Style,
|
|
|
|
|
111 |
pub engine_errors_info: Vec<EngineErrorInfo>,
|
112 |
}
|
113 |
|
|
|
8 |
/// A named struct to store the raw scraped search results scraped search results from the
|
9 |
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
10 |
/// to write idiomatic rust using `Iterators`.
|
|
|
|
|
|
|
|
|
|
|
11 |
/// (href url in html in simple words).
|
|
|
|
|
12 |
#[derive(Clone, Serialize, Deserialize)]
|
13 |
#[serde(rename_all = "camelCase")]
|
14 |
pub struct SearchResult {
|
15 |
+
/// The title of the search result.
|
16 |
pub title: String,
|
17 |
+
/// The url which is accessed when clicked on it
|
18 |
pub url: String,
|
19 |
+
/// The description of the search result.
|
20 |
pub description: String,
|
21 |
+
/// The names of the upstream engines from which this results were provided.
|
22 |
pub engine: Vec<String>,
|
23 |
}
|
24 |
|
|
|
60 |
}
|
61 |
}
|
62 |
|
63 |
+
/// A named struct that stores the error info related to the upstream search engines.
|
64 |
#[derive(Serialize, Deserialize)]
|
65 |
pub struct EngineErrorInfo {
|
66 |
+
/// It stores the error type which occured while fetching the result from a particular search
|
67 |
+
/// engine.
|
68 |
pub error: String,
|
69 |
+
/// It stores the name of the engine that failed to provide the requested search results.
|
70 |
pub engine: String,
|
71 |
+
/// It stores the name of the color to indicate whether how severe the particular error is (In
|
72 |
+
/// other words it indicates the severity of the error/issue).
|
73 |
pub severity_color: String,
|
74 |
}
|
75 |
|
76 |
impl EngineErrorInfo {
|
77 |
+
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
78 |
+
///
|
79 |
+
/// # Arguments
|
80 |
+
///
|
81 |
+
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
82 |
+
/// search engine.
|
83 |
+
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
84 |
pub fn new(error: &EngineError, engine: String) -> Self {
|
85 |
Self {
|
86 |
error: match error {
|
|
|
100 |
|
101 |
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
102 |
/// aggregated from the upstream search engines.
|
|
|
|
|
|
|
|
|
103 |
/// `SearchResult` structs.
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
#[derive(Serialize, Deserialize)]
|
105 |
#[serde(rename_all = "camelCase")]
|
106 |
pub struct SearchResults {
|
107 |
+
/// Stores the individual serializable `SearchResult` struct into a vector of
|
108 |
pub results: Vec<SearchResult>,
|
109 |
+
/// Stores the current pages search query `q` provided in the search url.
|
110 |
pub page_query: String,
|
111 |
+
/// Stores the theming options for the website.
|
112 |
pub style: Style,
|
113 |
+
/// Stores the information on which engines failed with their engine name
|
114 |
+
/// and the type of error that caused it.
|
115 |
pub engine_errors_info: Vec<EngineErrorInfo>,
|
116 |
}
|
117 |
|
src/results/mod.rs
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
pub mod aggregation_models;
|
2 |
pub mod aggregator;
|
3 |
pub mod user_agent;
|
|
|
1 |
+
//! This module provides modules that handle the functionality to aggregate the fetched search
|
2 |
+
//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
|
3 |
+
//! provides various models to aggregate search results into a standardized form.
|
4 |
+
|
5 |
pub mod aggregation_models;
|
6 |
pub mod aggregator;
|
7 |
pub mod user_agent;
|
src/results/user_agent.rs
CHANGED
@@ -2,6 +2,8 @@
|
|
2 |
|
3 |
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
4 |
|
|
|
|
|
5 |
static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
|
6 |
UserAgentsBuilder::new()
|
7 |
.cache(false)
|
|
|
2 |
|
3 |
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
4 |
|
5 |
+
/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
|
6 |
+
/// again and again without the need of reinitializing the `UserAgents` struct.
|
7 |
static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
|
8 |
UserAgentsBuilder::new()
|
9 |
.cache(false)
|
src/server/mod.rs
CHANGED
@@ -1 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
pub mod routes;
|
|
|
1 |
+
//! This module provides modules that handle the functionality of handling different routes/paths
|
2 |
+
//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
|
3 |
+
//! the search route. Also, caches the next, current and previous search results in the search
|
4 |
+
//! routes with the help of the redis server.
|
5 |
+
|
6 |
pub mod routes;
|
src/server/routes.rs
CHANGED
@@ -17,16 +17,13 @@ use serde::Deserialize;
|
|
17 |
use tokio::join;
|
18 |
|
19 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
20 |
-
///
|
21 |
-
/// # Fields
|
22 |
-
///
|
23 |
-
/// * `q` - It stores the search parameter option `q` (or query in simple words)
|
24 |
-
/// of the search url.
|
25 |
-
/// * `page` - It stores the search parameter `page` (or pageno in simple words)
|
26 |
-
/// of the search url.
|
27 |
#[derive(Deserialize)]
|
28 |
struct SearchParams {
|
|
|
|
|
29 |
q: Option<String>,
|
|
|
|
|
30 |
page: Option<u32>,
|
31 |
}
|
32 |
|
@@ -54,17 +51,14 @@ pub async fn not_found(
|
|
54 |
}
|
55 |
|
56 |
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
57 |
-
///
|
58 |
-
/// # Fields
|
59 |
-
///
|
60 |
-
/// * `theme` - It stores the theme name used in the website.
|
61 |
-
/// * `colorscheme` - It stores the colorscheme name used for the website theme.
|
62 |
-
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
63 |
#[allow(dead_code)]
|
64 |
#[derive(Deserialize)]
|
65 |
struct Cookie {
|
|
|
66 |
theme: String,
|
|
|
67 |
colorscheme: String,
|
|
|
68 |
engines: Vec<String>,
|
69 |
}
|
70 |
|
@@ -149,8 +143,21 @@ pub async fn search(
|
|
149 |
}
|
150 |
}
|
151 |
|
152 |
-
/// Fetches the results for a query and page.
|
153 |
-
///
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
async fn results(
|
155 |
url: String,
|
156 |
config: &Config,
|
@@ -158,7 +165,7 @@ async fn results(
|
|
158 |
page: u32,
|
159 |
req: HttpRequest,
|
160 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
161 |
-
//Initialize redis cache connection struct
|
162 |
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
|
163 |
// fetch the cached results json.
|
164 |
let cached_results_json = redis_cache.cached_json(&url);
|
|
|
17 |
use tokio::join;
|
18 |
|
19 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
#[derive(Deserialize)]
|
21 |
struct SearchParams {
|
22 |
+
/// It stores the search parameter option `q` (or query in simple words)
|
23 |
+
/// of the search url.
|
24 |
q: Option<String>,
|
25 |
+
/// It stores the search parameter `page` (or pageno in simple words)
|
26 |
+
/// of the search url.
|
27 |
page: Option<u32>,
|
28 |
}
|
29 |
|
|
|
51 |
}
|
52 |
|
53 |
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
#[allow(dead_code)]
|
55 |
#[derive(Deserialize)]
|
56 |
struct Cookie {
|
57 |
+
/// It stores the theme name used in the website.
|
58 |
theme: String,
|
59 |
+
/// It stores the colorscheme name used for the website theme.
|
60 |
colorscheme: String,
|
61 |
+
/// It stores the user selected upstream search engines selected from the UI.
|
62 |
engines: Vec<String>,
|
63 |
}
|
64 |
|
|
|
143 |
}
|
144 |
}
|
145 |
|
146 |
+
/// Fetches the results for a query and page. It First checks the redis cache, if that
|
147 |
+
/// fails it gets proper results by requesting from the upstream search engines.
|
148 |
+
///
|
149 |
+
/// # Arguments
|
150 |
+
///
|
151 |
+
/// * `url` - It takes the url of the current page that requested the search results for a
|
152 |
+
/// particular search query.
|
153 |
+
/// * `config` - It takes a parsed config struct.
|
154 |
+
/// * `query` - It takes the page number as u32 value.
|
155 |
+
/// * `req` - It takes the `HttpRequest` struct as a value.
|
156 |
+
///
|
157 |
+
/// # Error
|
158 |
+
///
|
159 |
+
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
160 |
+
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
161 |
async fn results(
|
162 |
url: String,
|
163 |
config: &Config,
|
|
|
165 |
page: u32,
|
166 |
req: HttpRequest,
|
167 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
168 |
+
// Initialize redis cache connection struct
|
169 |
let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
|
170 |
// fetch the cached results json.
|
171 |
let cached_results_json = redis_cache.cached_json(&url);
|