alamin655 commited on
Commit
7a64454
·
unverified ·
2 Parent(s): 61eaa47 c60fdb8

Merge pull request #206 from neon-mmd/change-document-style-with-linter-warnings

Browse files
Cargo.lock CHANGED
@@ -3797,7 +3797,7 @@ dependencies = [
3797
 
3798
  [[package]]
3799
  name = "websurfx"
3800
- version = "0.20.0"
3801
  dependencies = [
3802
  "actix-cors",
3803
  "actix-files",
 
3797
 
3798
  [[package]]
3799
  name = "websurfx"
3800
+ version = "0.20.1"
3801
  dependencies = [
3802
  "actix-cors",
3803
  "actix-files",
Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
  [package]
2
  name = "websurfx"
3
- version = "0.20.0"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
 
1
  [package]
2
  name = "websurfx"
3
+ version = "0.20.1"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
src/cache/cacher.rs CHANGED
@@ -10,17 +10,14 @@ use super::error::PoolError;
10
 
11
  /// A named struct which stores the redis Connection url address to which the client will
12
  /// connect to.
13
- ///
14
- /// # Fields
15
- ///
16
- /// * `connection_pool` - It stores a pool of connections ready to be used.
17
- /// * `pool_size` - It stores the size of the connection pool (in other words the number of
18
- /// connections that should be stored in the pool).
19
- /// * `current_connection` - It stores the index of which connection is being used at the moment.
20
  #[derive(Clone)]
21
  pub struct RedisCache {
 
22
  connection_pool: Vec<ConnectionManager>,
 
 
23
  pool_size: u8,
 
24
  current_connection: u8,
25
  }
26
 
 
10
 
11
  /// A named struct which stores the redis Connection url address to which the client will
12
  /// connect to.
 
 
 
 
 
 
 
13
  #[derive(Clone)]
14
  pub struct RedisCache {
15
+ /// It stores a pool of connections ready to be used.
16
  connection_pool: Vec<ConnectionManager>,
17
+ /// It stores the size of the connection pool (in other words the number of
18
+ /// connections that should be stored in the pool).
19
  pool_size: u8,
20
+ /// It stores the index of which connection is being used at the moment.
21
  current_connection: u8,
22
  }
23
 
src/cache/error.rs CHANGED
@@ -5,15 +5,12 @@ use std::fmt;
5
  use redis::RedisError;
6
 
7
  /// A custom error type used for handling redis async pool associated errors.
8
- ///
9
- /// This enum provides variants three different categories of errors:
10
- /// * `RedisError` - This variant handles all errors related to `RedisError`,
11
- /// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
12
- /// which occurs when all the connections in the connection pool return a connection
13
- /// dropped redis error.
14
  #[derive(Debug)]
15
  pub enum PoolError {
 
16
  RedisError(RedisError),
 
 
17
  PoolExhaustionWithConnectionDropError,
18
  }
19
 
 
5
  use redis::RedisError;
6
 
7
  /// A custom error type used for handling redis async pool associated errors.
 
 
 
 
 
 
8
  #[derive(Debug)]
9
  pub enum PoolError {
10
+ /// This variant handles all errors related to `RedisError`,
11
  RedisError(RedisError),
12
+ /// This variant handles the errors which occurs when all the connections
13
+ /// in the connection pool return a connection dropped redis error.
14
  PoolExhaustionWithConnectionDropError,
15
  }
16
 
src/cache/mod.rs CHANGED
@@ -1,2 +1,5 @@
 
 
 
1
  pub mod cacher;
2
  pub mod error;
 
1
+ //! This module provides the modules which provide the functionality to cache the aggregated
2
+ //! results fetched and aggregated from the upstream search engines in a json format.
3
+
4
  pub mod cacher;
5
  pub mod error;
src/config/mod.rs CHANGED
@@ -1,2 +1,5 @@
 
 
 
1
  pub mod parser;
2
  pub mod parser_models;
 
1
+ //! This module provides the modules which handles the functionality to parse the lua config
2
+ //! and convert the config options into rust readable form.
3
+
4
  pub mod parser;
5
  pub mod parser_models;
src/config/parser.rs CHANGED
@@ -9,33 +9,33 @@ use mlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
10
 
11
  /// A named struct which stores the parsed config file options.
12
- ///
13
- /// # Fields
14
- //
15
- /// * `port` - It stores the parsed port number option on which the server should launch.
16
- /// * `binding_ip` - It stores the parsed ip address option on which the server should launch
17
- /// * `style` - It stores the theming options for the website.
18
- /// * `redis_url` - It stores the redis connection url address on which the redis
19
- /// client should connect.
20
- /// * `aggregator` - It stores the option to whether enable or disable production use.
21
- /// * `logging` - It stores the option to whether enable or disable logs.
22
- /// * `debug` - It stores the option to whether enable or disable debug mode.
23
- /// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
24
- /// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
25
- /// * `threads` - It stores the number of threads which controls the app will use to run.
26
  #[derive(Clone)]
27
  pub struct Config {
 
28
  pub port: u16,
 
29
  pub binding_ip: String,
 
30
  pub style: Style,
 
 
31
  pub redis_url: String,
 
32
  pub aggregator: AggregatorConfig,
 
33
  pub logging: bool,
 
34
  pub debug: bool,
 
35
  pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
 
36
  pub request_timeout: u8,
 
37
  pub threads: u8,
 
38
  pub rate_limiter: RateLimiter,
 
 
39
  pub safe_search: u8,
40
  }
41
 
@@ -123,6 +123,11 @@ impl Config {
123
  }
124
 
125
  /// a helper function that sets the proper logging level
 
 
 
 
 
126
  fn set_logging_level(debug: bool, logging: bool) {
127
  if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
128
  if pkg_env_var.to_lowercase() == "dev" {
 
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
10
 
11
  /// A named struct which stores the parsed config file options.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  #[derive(Clone)]
13
  pub struct Config {
14
+ /// It stores the parsed port number option on which the server should launch.
15
  pub port: u16,
16
+ /// It stores the parsed ip address option on which the server should launch
17
  pub binding_ip: String,
18
+ /// It stores the theming options for the website.
19
  pub style: Style,
20
+ /// It stores the redis connection url address on which the redis
21
+ /// client should connect.
22
  pub redis_url: String,
23
+ /// It stores the option to whether enable or disable production use.
24
  pub aggregator: AggregatorConfig,
25
+ /// It stores the option to whether enable or disable logs.
26
  pub logging: bool,
27
+ /// It stores the option to whether enable or disable debug mode.
28
  pub debug: bool,
29
+ /// It stores all the engine names that were enabled by the user.
30
  pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
31
+ /// It stores the time (secs) which controls the server request timeout.
32
  pub request_timeout: u8,
33
+ /// It stores the number of threads which controls the app will use to run.
34
  pub threads: u8,
35
+ /// It stores configuration options for the ratelimiting middleware.
36
  pub rate_limiter: RateLimiter,
37
+ /// It stores the level of safe search to be used for restricting content in the
38
+ /// search results.
39
  pub safe_search: u8,
40
  }
41
 
 
123
  }
124
 
125
  /// a helper function that sets the proper logging level
126
+ ///
127
+ /// # Arguments
128
+ ///
129
+ /// * `debug` - It takes the option to whether enable or disable debug mode.
130
+ /// * `logging` - It takes the option to whether enable or disable logs.
131
  fn set_logging_level(debug: bool, logging: bool) {
132
  if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
133
  if pkg_env_var.to_lowercase() == "dev" {
src/config/parser_models.rs CHANGED
@@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
12
  /// order to allow the deserializing the json back to struct in aggregate function in
13
  /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
14
  /// it to the template files.
15
- ///
16
- /// # Fields
17
- //
18
- /// * `theme` - It stores the parsed theme option used to set a theme for the website.
19
- /// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
20
- /// theme being used.
21
  #[derive(Serialize, Deserialize, Clone, Default)]
22
  pub struct Style {
 
23
  pub theme: String,
 
 
24
  pub colorscheme: String,
25
  }
26
 
@@ -38,24 +35,18 @@ impl Style {
38
  }
39
 
40
  /// Configuration options for the aggregator.
41
- ///
42
- /// # Fields
43
- ///
44
- /// * `random_delay` - It stores the option to whether enable or disable random delays between
45
- /// requests.
46
  #[derive(Clone)]
47
  pub struct AggregatorConfig {
 
 
48
  pub random_delay: bool,
49
  }
50
 
51
  /// Configuration options for the rate limiter middleware.
52
- ///
53
- /// # Fields
54
- ///
55
- /// * `number_of_requests` - The number of request that are allowed within a provided time limit.
56
- /// * `time_limit` - The time limit in which the quantity of requests that should be accepted.
57
  #[derive(Clone)]
58
  pub struct RateLimiter {
 
59
  pub number_of_requests: u8,
 
60
  pub time_limit: u8,
61
  }
 
12
  /// order to allow the deserializing the json back to struct in aggregate function in
13
  /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
14
  /// it to the template files.
 
 
 
 
 
 
15
  #[derive(Serialize, Deserialize, Clone, Default)]
16
  pub struct Style {
17
+ /// It stores the parsed theme option used to set a theme for the website.
18
  pub theme: String,
19
+ /// It stores the parsed colorscheme option used to set a colorscheme for the
20
+ /// theme being used.
21
  pub colorscheme: String,
22
  }
23
 
 
35
  }
36
 
37
  /// Configuration options for the aggregator.
 
 
 
 
 
38
  #[derive(Clone)]
39
  pub struct AggregatorConfig {
40
+ /// It stores the option to whether enable or disable random delays between
41
+ /// requests.
42
  pub random_delay: bool,
43
  }
44
 
45
  /// Configuration options for the rate limiter middleware.
 
 
 
 
 
46
  #[derive(Clone)]
47
  pub struct RateLimiter {
48
+ /// The number of request that are allowed within a provided time limit.
49
  pub number_of_requests: u8,
50
+ /// The time limit in which the quantity of requests that should be accepted.
51
  pub time_limit: u8,
52
  }
src/engines/duckduckgo.rs CHANGED
@@ -19,24 +19,6 @@ pub struct DuckDuckGo;
19
 
20
  #[async_trait::async_trait]
21
  impl SearchEngine for DuckDuckGo {
22
- /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
23
- /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
24
- /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
25
- /// values are RawSearchResult struct and then returns it within a Result enum.
26
- ///
27
- /// # Arguments
28
- ///
29
- /// * `query` - Takes the user provided query to query to the upstream search engine with.
30
- /// * `page` - Takes an u32 as an argument.
31
- /// * `user_agent` - Takes a random user agent string as an argument.
32
- /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
33
- ///
34
- /// # Errors
35
- ///
36
- /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
37
- /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
38
- /// provide results for the requested search query and also returns error if the scraping selector
39
- /// or HeaderMap fails to initialize.
40
  async fn results(
41
  &self,
42
  query: &str,
 
19
 
20
  #[async_trait::async_trait]
21
  impl SearchEngine for DuckDuckGo {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  async fn results(
23
  &self,
24
  query: &str,
src/engines/engine_models.rs CHANGED
@@ -6,19 +6,18 @@ use error_stack::{Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
9
- ///
10
- /// This enum provides variants three different categories of errors:
11
- /// * `RequestError` - This variant handles all request related errors like forbidden, not found,
12
- /// etc.
13
- /// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
14
- /// search engines.
15
- /// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
16
- /// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
17
- /// all other errors occurring within the code handling the `upstream search engines`.
18
  #[derive(Debug)]
19
  pub enum EngineError {
 
 
20
  EmptyResultSet,
 
 
21
  RequestError,
 
 
 
 
22
  UnexpectedError,
23
  }
24
 
@@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
46
  /// A trait to define common behavior for all search engines.
47
  #[async_trait::async_trait]
48
  pub trait SearchEngine: Sync + Send {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  async fn fetch_html_from_upstream(
50
  &self,
51
  url: &str,
@@ -65,6 +81,24 @@ pub trait SearchEngine: Sync + Send {
65
  .change_context(EngineError::RequestError)?)
66
  }
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  async fn results(
69
  &self,
70
  query: &str,
@@ -75,8 +109,12 @@ pub trait SearchEngine: Sync + Send {
75
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
76
  }
77
 
 
78
  pub struct EngineHandler {
 
 
79
  engine: Box<dyn SearchEngine>,
 
80
  name: &'static str,
81
  }
82
 
@@ -87,7 +125,15 @@ impl Clone for EngineHandler {
87
  }
88
 
89
  impl EngineHandler {
90
- /// parses an engine name into an engine handler, returns none if the engine is unknown
 
 
 
 
 
 
 
 
91
  pub fn new(engine_name: &str) -> Option<Self> {
92
  let engine: (&'static str, Box<dyn SearchEngine>) =
93
  match engine_name.to_lowercase().as_str() {
@@ -102,6 +148,8 @@ impl EngineHandler {
102
  })
103
  }
104
 
 
 
105
  pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
106
  (self.name, self.engine)
107
  }
 
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
 
 
 
 
 
 
 
 
 
9
  #[derive(Debug)]
10
  pub enum EngineError {
11
+ /// This variant handles all request related errors like forbidden, not found,
12
+ /// etc.
13
  EmptyResultSet,
14
+ /// This variant handles the not results found error provide by the upstream
15
+ /// search engines.
16
  RequestError,
17
+ /// This variant handles all the errors which are unexpected or occur rarely
18
+ /// and are errors mostly related to failure in initialization of HeaderMap,
19
+ /// Selector errors and all other errors occurring within the code handling
20
+ /// the `upstream search engines`.
21
  UnexpectedError,
22
  }
23
 
 
45
  /// A trait to define common behavior for all search engines.
46
  #[async_trait::async_trait]
47
  pub trait SearchEngine: Sync + Send {
48
+ /// This helper function fetches/requests the search results from the upstream search engine in
49
+ /// an html form.
50
+ ///
51
+ /// # Arguments
52
+ ///
53
+ /// * `url` - It takes the url of the upstream search engine with the user requested search
54
+ /// query appended in the search parameters.
55
+ /// * `header_map` - It takes the http request headers to be sent to the upstream engine in
56
+ /// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
57
+ /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
58
+ /// the amount of time for each request to remain connected when until the results can be provided
59
+ /// by the upstream engine.
60
+ ///
61
+ /// # Error
62
+ ///
63
+ /// It returns the html data as a string if the upstream engine provides the data as expected
64
+ /// otherwise it returns a custom `EngineError`.
65
  async fn fetch_html_from_upstream(
66
  &self,
67
  url: &str,
 
81
  .change_context(EngineError::RequestError)?)
82
  }
83
 
84
+ /// This function scrapes results from the upstream engine and puts all the scraped results like
85
+ /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
86
+ /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
87
+ /// struct and then returns it within a Result enum.
88
+ ///
89
+ /// # Arguments
90
+ ///
91
+ /// * `query` - Takes the user provided query to query to the upstream search engine with.
92
+ /// * `page` - Takes an u32 as an argument.
93
+ /// * `user_agent` - Takes a random user agent string as an argument.
94
+ /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
95
+ ///
96
+ /// # Errors
97
+ ///
98
+ /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
99
+ /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
100
+ /// provide results for the requested search query and also returns error if the scraping selector
101
+ /// or HeaderMap fails to initialize.
102
  async fn results(
103
  &self,
104
  query: &str,
 
109
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
110
  }
111
 
112
+ /// A named struct which stores the engine struct with the name of the associated engine.
113
  pub struct EngineHandler {
114
+ /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
115
+ /// the `SearchEngine` trait.
116
  engine: Box<dyn SearchEngine>,
117
+ /// It stores the name of the engine to which the struct is associated to.
118
  name: &'static str,
119
  }
120
 
 
125
  }
126
 
127
  impl EngineHandler {
128
+ /// Parses an engine name into an engine handler.
129
+ ///
130
+ /// # Arguments
131
+ ///
132
+ /// * `engine_name` - It takes the name of the engine to which the struct was associated to.
133
+ ///
134
+ /// # Returns
135
+ ///
136
+ /// It returns an option either containing the value or a none if the engine is unknown
137
  pub fn new(engine_name: &str) -> Option<Self> {
138
  let engine: (&'static str, Box<dyn SearchEngine>) =
139
  match engine_name.to_lowercase().as_str() {
 
148
  })
149
  }
150
 
151
+ /// This function converts the EngineHandler type into a tuple containing the engine name and
152
+ /// the associated engine struct.
153
  pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
154
  (self.name, self.engine)
155
  }
src/engines/mod.rs CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  pub mod duckduckgo;
2
  pub mod engine_models;
3
  pub mod searx;
 
1
+ //! This module provides different modules which handles the functionlity to fetch results from the
2
+ //! upstream search engines based on user requested queries. Also provides different models to
3
+ //! provide a standard functions to be implemented for all the upstream search engine handling
4
+ //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
+
6
  pub mod duckduckgo;
7
  pub mod engine_models;
8
  pub mod searx;
src/engines/searx.rs CHANGED
@@ -17,25 +17,6 @@ pub struct Searx;
17
 
18
  #[async_trait::async_trait]
19
  impl SearchEngine for Searx {
20
- /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
21
- /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
22
- /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
23
- /// values are RawSearchResult struct and then returns it within a Result enum.
24
- ///
25
- /// # Arguments
26
- ///
27
- /// * `query` - Takes the user provided query to query to the upstream search engine with.
28
- /// * `page` - Takes an u32 as an argument.
29
- /// * `user_agent` - Takes a random user agent string as an argument.
30
- /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
31
- ///
32
- /// # Errors
33
- ///
34
- /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
35
- /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
36
- /// provide results for the requested search query and also returns error if the scraping selector
37
- /// or HeaderMap fails to initialize.
38
-
39
  async fn results(
40
  &self,
41
  query: &str,
 
17
 
18
  #[async_trait::async_trait]
19
  impl SearchEngine for Searx {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  async fn results(
21
  &self,
22
  query: &str,
src/handler/mod.rs CHANGED
@@ -1 +1,5 @@
 
 
 
 
1
  pub mod paths;
 
1
+ //! This module provides modules which provide the functionality to handle paths for different
2
+ //! files present on different paths and provide one appropriate path on which it is present and
3
+ //! can be used.
4
+
5
  pub mod paths;
src/handler/paths.rs CHANGED
@@ -7,42 +7,46 @@ use std::path::Path;
7
  use std::sync::OnceLock;
8
 
9
  // ------- Constants --------
10
- static PUBLIC_DIRECTORY_NAME: &str = "public";
11
- static COMMON_DIRECTORY_NAME: &str = "websurfx";
12
- static CONFIG_FILE_NAME: &str = "config.lua";
13
- static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
14
- static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
 
 
 
 
 
15
 
 
16
  #[derive(Hash, PartialEq, Eq, Debug)]
17
  pub enum FileType {
 
18
  Config,
 
19
  AllowList,
 
20
  BlockList,
 
21
  Theme,
22
  }
23
 
 
24
  static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
25
 
26
- /// A helper function which returns an appropriate config file path checking if the config
27
- /// file exists on that path.
28
  ///
29
  /// # Error
30
  ///
31
- /// Returns a `config file not found!!` error if the config file is not present under following
32
- /// paths which are:
33
- /// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
34
- /// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
35
- /// one (3).
36
- /// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
37
- /// here then it returns an error as mentioned above.
38
-
39
- /// A function which returns an appropriate theme directory path checking if the theme
40
- /// directory exists on that path.
41
  ///
42
- /// # Error
 
 
 
43
  ///
44
- /// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
45
- /// paths which are:
46
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
47
  /// 2. Under project folder ( or codebase in other words) if it is not present
48
  /// here then it returns an error as mentioned above.
@@ -110,6 +114,6 @@ pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
110
  // if no of the configs above exist, return error
111
  Err(Error::new(
112
  std::io::ErrorKind::NotFound,
113
- format!("{:?} file not found!!", file_type),
114
  ))
115
  }
 
7
  use std::sync::OnceLock;
8
 
9
  // ------- Constants --------
10
+ /// The constant holding the name of the theme folder.
11
+ const PUBLIC_DIRECTORY_NAME: &str = "public";
12
+ /// The constant holding the name of the common folder.
13
+ const COMMON_DIRECTORY_NAME: &str = "websurfx";
14
+ /// The constant holding the name of the config file.
15
+ const CONFIG_FILE_NAME: &str = "config.lua";
16
+ /// The constant holding the name of the AllowList text file.
17
+ const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
18
+ /// The constant holding the name of the BlockList text file.
19
+ const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
20
 
21
+ /// An enum type which provides different variants to handle paths for various files/folders.
22
  #[derive(Hash, PartialEq, Eq, Debug)]
23
  pub enum FileType {
24
+ /// This variant handles all the paths associated with the config file.
25
  Config,
26
+ /// This variant handles all the paths associated with the Allowlist text file.
27
  AllowList,
28
+ /// This variant handles all the paths associated with the BlockList text file.
29
  BlockList,
30
+ /// This variant handles all the paths associated with the public folder (Theme folder).
31
  Theme,
32
  }
33
 
34
+ /// A static variable which stores the different filesystem paths for various file/folder types.
35
  static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
36
 
37
+ /// A function which returns an appropriate path for thr provided file type by checking if the path
38
+ /// for the given file type exists on that path.
39
  ///
40
  /// # Error
41
  ///
42
+ /// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
43
+ /// present on the path on which it is being tested.
 
 
 
 
 
 
 
 
44
  ///
45
+ /// # Example
46
+ ///
47
+ /// If this function is give the file_type of Theme variant then the theme folder is checked by the
48
+ /// following steps:
49
  ///
 
 
50
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
51
  /// 2. Under project folder ( or codebase in other words) if it is not present
52
  /// here then it returns an error as mentioned above.
 
114
  // if no of the configs above exist, return error
115
  Err(Error::new(
116
  std::io::ErrorKind::NotFound,
117
+ format!("{:?} file/folder not found!!", file_type),
118
  ))
119
  }
src/lib.rs CHANGED
@@ -1,6 +1,10 @@
1
  //! This main library module provides the functionality to provide and handle the Tcp server
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
 
 
 
 
4
  pub mod cache;
5
  pub mod config;
6
  pub mod engines;
@@ -41,7 +45,7 @@ use handler::paths::{file_path, FileType};
41
  /// let server = run(listener,config).expect("Failed to start server");
42
  /// ```
43
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
44
- let mut handlebars: Handlebars = Handlebars::new();
45
 
46
  let public_folder_path: &str = file_path(FileType::Theme)?;
47
 
@@ -49,7 +53,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
49
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
50
  .unwrap();
51
 
52
- let handlebars_ref: web::Data<Handlebars> = web::Data::new(handlebars);
53
 
54
  let cloned_config_threads_opt: u8 = config.threads;
55
 
 
1
  //! This main library module provides the functionality to provide and handle the Tcp server
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
4
+ #![forbid(unsafe_code, clippy::panic)]
5
+ #![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
6
+ #![warn(clippy::cognitive_complexity, rust_2018_idioms)]
7
+
8
  pub mod cache;
9
  pub mod config;
10
  pub mod engines;
 
45
  /// let server = run(listener,config).expect("Failed to start server");
46
  /// ```
47
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
48
+ let mut handlebars: Handlebars<'_> = Handlebars::new();
49
 
50
  let public_folder_path: &str = file_path(FileType::Theme)?;
51
 
 
53
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
54
  .unwrap();
55
 
56
+ let handlebars_ref: web::Data<Handlebars<'_>> = web::Data::new(handlebars);
57
 
58
  let cloned_config_threads_opt: u8 = config.threads;
59
 
src/results/aggregation_models.rs CHANGED
@@ -9,20 +9,17 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
9
  /// A named struct to store the raw scraped search results scraped search results from the
10
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
11
  /// to write idiomatic rust using `Iterators`.
12
- ///
13
- /// # Fields
14
- ///
15
- /// * `title` - The title of the search result.
16
- /// * `url` - The url which is accessed when clicked on it
17
  /// (href url in html in simple words).
18
- /// * `description` - The description of the search result.
19
- /// * `engine` - The names of the upstream engines from which this results were provided.
20
- #[derive(Clone, Serialize, Deserialize, Debug)]
21
  #[serde(rename_all = "camelCase")]
22
  pub struct SearchResult {
 
23
  pub title: String,
 
24
  pub url: String,
 
25
  pub description: String,
 
26
  pub engine: SmallVec<[String; 0]>,
27
  }
28
 
@@ -64,14 +61,27 @@ impl SearchResult {
64
  }
65
  }
66
 
 
67
  #[derive(Serialize, Deserialize, Clone)]
68
  pub struct EngineErrorInfo {
 
 
69
  pub error: String,
 
70
  pub engine: String,
 
 
71
  pub severity_color: String,
72
  }
73
 
74
  impl EngineErrorInfo {
 
 
 
 
 
 
 
75
  pub fn new(error: &EngineError, engine: &str) -> Self {
76
  Self {
77
  error: match error {
@@ -91,25 +101,26 @@ impl EngineErrorInfo {
91
 
92
  /// A named struct to store, serialize, deserialize the all the search results scraped and
93
  /// aggregated from the upstream search engines.
94
- ///
95
- /// # Fields
96
- ///
97
- /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
98
  /// `SearchResult` structs.
99
- /// * `page_query` - Stores the current pages search query `q` provided in the search url.
100
- /// * `style` - Stores the theming options for the website.
101
- /// * `engine_errors_info` - Stores the information on which engines failed with their engine name
102
- /// and the type of error that caused it.
103
- /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
104
- /// given search query.
105
  #[derive(Serialize, Deserialize, Default)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
 
108
  pub results: Vec<SearchResult>,
 
109
  pub page_query: String,
 
110
  pub style: Style,
 
 
111
  pub engine_errors_info: Vec<EngineErrorInfo>,
 
 
 
112
  pub disallowed: bool,
 
 
 
113
  pub filtered: bool,
114
  }
115
 
@@ -122,9 +133,8 @@ impl SearchResults {
122
  /// and stores it into a vector of `SearchResult` structs.
123
  /// * `page_query` - Takes an argument of current page`s search query `q` provided in
124
  /// the search url.
125
- /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
126
- /// given search query.
127
- /// * ``
128
  pub fn new(
129
  results: Vec<SearchResult>,
130
  page_query: &str,
 
9
  /// A named struct to store the raw scraped search results scraped search results from the
10
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
11
  /// to write idiomatic rust using `Iterators`.
 
 
 
 
 
12
  /// (href url in html in simple words).
13
+ #[derive(Clone, Serialize, Deserialize)]
 
 
14
  #[serde(rename_all = "camelCase")]
15
  pub struct SearchResult {
16
+ /// The title of the search result.
17
  pub title: String,
18
+ /// The url which is accessed when clicked on it
19
  pub url: String,
20
+ /// The description of the search result.
21
  pub description: String,
22
+ /// The names of the upstream engines from which this results were provided.
23
  pub engine: SmallVec<[String; 0]>,
24
  }
25
 
 
61
  }
62
  }
63
 
64
+ /// A named struct that stores the error info related to the upstream search engines.
65
  #[derive(Serialize, Deserialize, Clone)]
66
  pub struct EngineErrorInfo {
67
+ /// It stores the error type which occured while fetching the result from a particular search
68
+ /// engine.
69
  pub error: String,
70
+ /// It stores the name of the engine that failed to provide the requested search results.
71
  pub engine: String,
72
+ /// It stores the name of the color to indicate whether how severe the particular error is (In
73
+ /// other words it indicates the severity of the error/issue).
74
  pub severity_color: String,
75
  }
76
 
77
  impl EngineErrorInfo {
78
+ /// Constructs a new `SearchResult` with the given arguments needed for the struct.
79
+ ///
80
+ /// # Arguments
81
+ ///
82
+ /// * `error` - It takes the error type which occured while fetching the result from a particular
83
+ /// search engine.
84
+ /// * `engine` - It takes the name of the engine that failed to provide the requested search results.
85
  pub fn new(error: &EngineError, engine: &str) -> Self {
86
  Self {
87
  error: match error {
 
101
 
102
  /// A named struct to store, serialize, deserialize the all the search results scraped and
103
  /// aggregated from the upstream search engines.
 
 
 
 
104
  /// `SearchResult` structs.
 
 
 
 
 
 
105
  #[derive(Serialize, Deserialize, Default)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
108
+ /// Stores the individual serializable `SearchResult` struct into a vector of
109
  pub results: Vec<SearchResult>,
110
+ /// Stores the current pages search query `q` provided in the search url.
111
  pub page_query: String,
112
+ /// Stores the theming options for the website.
113
  pub style: Style,
114
+ /// Stores the information on which engines failed with their engine name
115
+ /// and the type of error that caused it.
116
  pub engine_errors_info: Vec<EngineErrorInfo>,
117
+ /// Stores the flag option which holds the check value that the following
118
+ /// search query was disallowed when the safe search level set to 4 and it
119
+ /// was present in the `Blocklist` file.
120
  pub disallowed: bool,
121
+ /// Stores the flag option which holds the check value that the following
122
+ /// search query was filtered when the safe search level set to 3 and it
123
+ /// was present in the `Blocklist` file.
124
  pub filtered: bool,
125
  }
126
 
 
133
  /// and stores it into a vector of `SearchResult` structs.
134
  /// * `page_query` - Takes an argument of current page`s search query `q` provided in
135
  /// the search url.
136
+ /// * `engine_errors_info` - Takes an array of structs which contains information regarding
137
+ /// which engines failed with their names, reason and their severity color name.
 
138
  pub fn new(
139
  results: Vec<SearchResult>,
140
  page_query: &str,
src/results/mod.rs CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  pub mod aggregation_models;
2
  pub mod aggregator;
3
  pub mod user_agent;
 
1
+ //! This module provides modules that handle the functionality to aggregate the fetched search
2
+ //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
3
+ //! provides various models to aggregate search results into a standardized form.
4
+
5
  pub mod aggregation_models;
6
  pub mod aggregator;
7
  pub mod user_agent;
src/results/user_agent.rs CHANGED
@@ -4,6 +4,8 @@ use std::sync::OnceLock;
4
 
5
  use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
6
 
 
 
7
  static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
8
 
9
  /// A function to generate random user agent to improve privacy of the user.
 
4
 
5
  use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
6
 
7
+ /// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
8
+ /// again and again without the need of reinitializing the `UserAgents` struct.
9
  static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
10
 
11
  /// A function to generate random user agent to improve privacy of the user.
src/server/mod.rs CHANGED
@@ -1 +1,6 @@
 
 
 
 
 
1
  pub mod routes;
 
1
+ //! This module provides modules that handle the functionality of handling different routes/paths
2
+ //! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
3
+ //! the search route. Also, caches the next, current and previous search results in the search
4
+ //! routes with the help of the redis server.
5
+
6
  pub mod routes;
src/server/routes.rs CHANGED
@@ -25,17 +25,16 @@ use tokio::join;
25
  static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
26
 
27
  /// A named struct which deserializes all the user provided search parameters and stores them.
28
- ///
29
- /// # Fields
30
- ///
31
- /// * `q` - It stores the search parameter option `q` (or query in simple words)
32
- /// of the search url.
33
- /// * `page` - It stores the search parameter `page` (or pageno in simple words)
34
- /// of the search url.
35
  #[derive(Deserialize)]
36
  struct SearchParams {
 
 
37
  q: Option<String>,
 
 
38
  page: Option<u32>,
 
 
39
  safesearch: Option<u8>,
40
  }
41
 
@@ -63,17 +62,14 @@ pub async fn not_found(
63
  }
64
 
65
  /// A named struct which is used to deserialize the cookies fetched from the client side.
66
- ///
67
- /// # Fields
68
- ///
69
- /// * `theme` - It stores the theme name used in the website.
70
- /// * `colorscheme` - It stores the colorscheme name used for the website theme.
71
- /// * `engines` - It stores the user selected upstream search engines selected from the UI.
72
  #[allow(dead_code)]
73
  #[derive(Deserialize)]
74
  struct Cookie<'a> {
 
75
  theme: &'a str,
 
76
  colorscheme: &'a str,
 
77
  engines: Vec<&'a str>,
78
  }
79
 
@@ -174,8 +170,21 @@ pub async fn search(
174
  }
175
  }
176
 
177
- /// Fetches the results for a query and page.
178
- /// First checks the redis cache, if that fails it gets proper results
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  async fn results(
180
  url: String,
181
  config: &Config,
@@ -184,6 +193,7 @@ async fn results(
184
  req: HttpRequest,
185
  safe_search: u8,
186
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
 
187
  let mut redis_cache: RedisCache = REDIS_CACHE
188
  .get_or_init(async {
189
  // Initialize redis cache connection pool only one and store it in the heap.
@@ -191,7 +201,6 @@ async fn results(
191
  })
192
  .await
193
  .clone();
194
-
195
  // fetch the cached results json.
196
  let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
197
  redis_cache.clone().cached_json(&url).await;
@@ -223,7 +232,8 @@ async fn results(
223
  // UI and use that.
224
  let mut results: SearchResults = match req.cookie("appCookie") {
225
  Some(cookie_value) => {
226
- let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
 
227
 
228
  let engines: Vec<EngineHandler> = cookie_value
229
  .engines
@@ -267,6 +277,8 @@ async fn results(
267
  }
268
  }
269
 
 
 
270
  fn is_match_from_filter_list(
271
  file_path: &str,
272
  query: &str,
 
25
  static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
26
 
27
  /// A named struct which deserializes all the user provided search parameters and stores them.
 
 
 
 
 
 
 
28
  #[derive(Deserialize)]
29
  struct SearchParams {
30
+ /// It stores the search parameter option `q` (or query in simple words)
31
+ /// of the search url.
32
  q: Option<String>,
33
+ /// It stores the search parameter `page` (or pageno in simple words)
34
+ /// of the search url.
35
  page: Option<u32>,
36
+ /// It stores the search parameter `safesearch` (or safe search level in simple words) of the
37
+ /// search url.
38
  safesearch: Option<u8>,
39
  }
40
 
 
62
  }
63
 
64
  /// A named struct which is used to deserialize the cookies fetched from the client side.
 
 
 
 
 
 
65
  #[allow(dead_code)]
66
  #[derive(Deserialize)]
67
  struct Cookie<'a> {
68
+ /// It stores the theme name used in the website.
69
  theme: &'a str,
70
+ /// It stores the colorscheme name used for the website theme.
71
  colorscheme: &'a str,
72
+ /// It stores the user selected upstream search engines selected from the UI.
73
  engines: Vec<&'a str>,
74
  }
75
 
 
170
  }
171
  }
172
 
173
+ /// Fetches the results for a query and page. It First checks the redis cache, if that
174
+ /// fails it gets proper results by requesting from the upstream search engines.
175
+ ///
176
+ /// # Arguments
177
+ ///
178
+ /// * `url` - It takes the url of the current page that requested the search results for a
179
+ /// particular search query.
180
+ /// * `config` - It takes a parsed config struct.
181
+ /// * `query` - It takes the page number as u32 value.
182
+ /// * `req` - It takes the `HttpRequest` struct as a value.
183
+ ///
184
+ /// # Error
185
+ ///
186
+ /// It returns the `SearchResults` struct if the search results could be successfully fetched from
187
+ /// the cache or from the upstream search engines otherwise it returns an appropriate error.
188
  async fn results(
189
  url: String,
190
  config: &Config,
 
193
  req: HttpRequest,
194
  safe_search: u8,
195
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
196
+ // Initialize redis cache connection struct
197
  let mut redis_cache: RedisCache = REDIS_CACHE
198
  .get_or_init(async {
199
  // Initialize redis cache connection pool only one and store it in the heap.
 
201
  })
202
  .await
203
  .clone();
 
204
  // fetch the cached results json.
205
  let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
206
  redis_cache.clone().cached_json(&url).await;
 
232
  // UI and use that.
233
  let mut results: SearchResults = match req.cookie("appCookie") {
234
  Some(cookie_value) => {
235
+ let cookie_value: Cookie<'_> =
236
+ serde_json::from_str(cookie_value.name_value().1)?;
237
 
238
  let engines: Vec<EngineHandler> = cookie_value
239
  .engines
 
277
  }
278
  }
279
 
280
+ /// A helper function which checks whether the search query contains any keywords which should be
281
+ /// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
282
  fn is_match_from_filter_list(
283
  file_path: &str,
284
  query: &str,