XFFXFF commited on
Commit
11ba09a
2 Parent(s): 9d2fb6c 44216e4

Merge pull request #190 from neon-mmd/feat-content-blocking-using-lists

Browse files

✨ Filter user specified content from search results using a `blacklist` and `Allowlist`.

Cargo.lock CHANGED
@@ -77,7 +77,7 @@ dependencies = [
77
  "encoding_rs",
78
  "flate2",
79
  "futures-core",
80
- "h2 0.3.20",
81
  "http 0.2.9",
82
  "httparse",
83
  "httpdate",
@@ -475,9 +475,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
475
 
476
  [[package]]
477
  name = "cc"
478
- version = "1.0.82"
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
480
- checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01"
481
  dependencies = [
482
  "jobserver",
483
  "libc",
@@ -816,9 +816,9 @@ dependencies = [
816
 
817
  [[package]]
818
  name = "deranged"
819
- version = "0.3.7"
820
  source = "registry+https://github.com/rust-lang/crates.io-index"
821
- checksum = "7684a49fb1af197853ef7b2ee694bc1f5b4179556f1e5710e1760c5db6f5e929"
822
 
823
  [[package]]
824
  name = "derive_more"
@@ -1176,9 +1176,9 @@ dependencies = [
1176
 
1177
  [[package]]
1178
  name = "h2"
1179
- version = "0.3.20"
1180
  source = "registry+https://github.com/rust-lang/crates.io-index"
1181
- checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049"
1182
  dependencies = [
1183
  "bytes 1.4.0",
1184
  "fnv",
@@ -1363,7 +1363,7 @@ dependencies = [
1363
  "futures-channel",
1364
  "futures-core",
1365
  "futures-util",
1366
- "h2 0.3.20",
1367
  "http 0.2.9",
1368
  "http-body 0.4.5",
1369
  "httparse",
@@ -2454,16 +2454,16 @@ dependencies = [
2454
 
2455
  [[package]]
2456
  name = "reqwest"
2457
- version = "0.11.18"
2458
  source = "registry+https://github.com/rust-lang/crates.io-index"
2459
- checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
2460
  dependencies = [
2461
  "base64 0.21.2",
2462
  "bytes 1.4.0",
2463
  "encoding_rs",
2464
  "futures-core",
2465
  "futures-util",
2466
- "h2 0.3.20",
2467
  "http 0.2.9",
2468
  "http-body 0.4.5",
2469
  "hyper 0.14.27",
@@ -2486,7 +2486,7 @@ dependencies = [
2486
  "wasm-bindgen",
2487
  "wasm-bindgen-futures",
2488
  "web-sys",
2489
- "winreg 0.10.1",
2490
  ]
2491
 
2492
  [[package]]
@@ -2684,18 +2684,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
2684
 
2685
  [[package]]
2686
  name = "serde"
2687
- version = "1.0.183"
2688
  source = "registry+https://github.com/rust-lang/crates.io-index"
2689
- checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c"
2690
  dependencies = [
2691
  "serde_derive",
2692
  ]
2693
 
2694
  [[package]]
2695
  name = "serde_derive"
2696
- version = "1.0.183"
2697
  source = "registry+https://github.com/rust-lang/crates.io-index"
2698
- checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816"
2699
  dependencies = [
2700
  "proc-macro2 1.0.66",
2701
  "quote 1.0.33",
@@ -2797,9 +2797,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
2797
 
2798
  [[package]]
2799
  name = "slab"
2800
- version = "0.4.8"
2801
  source = "registry+https://github.com/rust-lang/crates.io-index"
2802
- checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d"
2803
  dependencies = [
2804
  "autocfg 1.1.0",
2805
  ]
@@ -3328,9 +3328,9 @@ checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
3328
 
3329
  [[package]]
3330
  name = "unicase"
3331
- version = "2.6.0"
3332
  source = "registry+https://github.com/rust-lang/crates.io-index"
3333
- checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
3334
  dependencies = [
3335
  "version_check",
3336
  ]
@@ -3543,7 +3543,7 @@ dependencies = [
3543
 
3544
  [[package]]
3545
  name = "websurfx"
3546
- version = "0.17.0"
3547
  dependencies = [
3548
  "actix-cors",
3549
  "actix-files",
@@ -3559,7 +3559,8 @@ dependencies = [
3559
  "once_cell",
3560
  "rand 0.8.5",
3561
  "redis",
3562
- "reqwest 0.11.18",
 
3563
  "rlua",
3564
  "rusty-hook",
3565
  "scraper",
@@ -3688,11 +3689,12 @@ dependencies = [
3688
 
3689
  [[package]]
3690
  name = "winreg"
3691
- version = "0.10.1"
3692
  source = "registry+https://github.com/rust-lang/crates.io-index"
3693
- checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
3694
  dependencies = [
3695
- "winapi 0.3.9",
 
3696
  ]
3697
 
3698
  [[package]]
 
77
  "encoding_rs",
78
  "flate2",
79
  "futures-core",
80
+ "h2 0.3.21",
81
  "http 0.2.9",
82
  "httparse",
83
  "httpdate",
 
475
 
476
  [[package]]
477
  name = "cc"
478
+ version = "1.0.83"
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
480
+ checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
481
  dependencies = [
482
  "jobserver",
483
  "libc",
 
816
 
817
  [[package]]
818
  name = "deranged"
819
+ version = "0.3.8"
820
  source = "registry+https://github.com/rust-lang/crates.io-index"
821
+ checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946"
822
 
823
  [[package]]
824
  name = "derive_more"
 
1176
 
1177
  [[package]]
1178
  name = "h2"
1179
+ version = "0.3.21"
1180
  source = "registry+https://github.com/rust-lang/crates.io-index"
1181
+ checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833"
1182
  dependencies = [
1183
  "bytes 1.4.0",
1184
  "fnv",
 
1363
  "futures-channel",
1364
  "futures-core",
1365
  "futures-util",
1366
+ "h2 0.3.21",
1367
  "http 0.2.9",
1368
  "http-body 0.4.5",
1369
  "httparse",
 
2454
 
2455
  [[package]]
2456
  name = "reqwest"
2457
+ version = "0.11.19"
2458
  source = "registry+https://github.com/rust-lang/crates.io-index"
2459
+ checksum = "20b9b67e2ca7dd9e9f9285b759de30ff538aab981abaaf7bc9bd90b84a0126c3"
2460
  dependencies = [
2461
  "base64 0.21.2",
2462
  "bytes 1.4.0",
2463
  "encoding_rs",
2464
  "futures-core",
2465
  "futures-util",
2466
+ "h2 0.3.21",
2467
  "http 0.2.9",
2468
  "http-body 0.4.5",
2469
  "hyper 0.14.27",
 
2486
  "wasm-bindgen",
2487
  "wasm-bindgen-futures",
2488
  "web-sys",
2489
+ "winreg 0.50.0",
2490
  ]
2491
 
2492
  [[package]]
 
2684
 
2685
  [[package]]
2686
  name = "serde"
2687
+ version = "1.0.185"
2688
  source = "registry+https://github.com/rust-lang/crates.io-index"
2689
+ checksum = "be9b6f69f1dfd54c3b568ffa45c310d6973a5e5148fd40cf515acaf38cf5bc31"
2690
  dependencies = [
2691
  "serde_derive",
2692
  ]
2693
 
2694
  [[package]]
2695
  name = "serde_derive"
2696
+ version = "1.0.185"
2697
  source = "registry+https://github.com/rust-lang/crates.io-index"
2698
+ checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec"
2699
  dependencies = [
2700
  "proc-macro2 1.0.66",
2701
  "quote 1.0.33",
 
2797
 
2798
  [[package]]
2799
  name = "slab"
2800
+ version = "0.4.9"
2801
  source = "registry+https://github.com/rust-lang/crates.io-index"
2802
+ checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
2803
  dependencies = [
2804
  "autocfg 1.1.0",
2805
  ]
 
3328
 
3329
  [[package]]
3330
  name = "unicase"
3331
+ version = "2.7.0"
3332
  source = "registry+https://github.com/rust-lang/crates.io-index"
3333
+ checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89"
3334
  dependencies = [
3335
  "version_check",
3336
  ]
 
3543
 
3544
  [[package]]
3545
  name = "websurfx"
3546
+ version = "0.18.0"
3547
  dependencies = [
3548
  "actix-cors",
3549
  "actix-files",
 
3559
  "once_cell",
3560
  "rand 0.8.5",
3561
  "redis",
3562
+ "regex",
3563
+ "reqwest 0.11.19",
3564
  "rlua",
3565
  "rusty-hook",
3566
  "scraper",
 
3689
 
3690
  [[package]]
3691
  name = "winreg"
3692
+ version = "0.50.0"
3693
  source = "registry+https://github.com/rust-lang/crates.io-index"
3694
+ checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
3695
  dependencies = [
3696
+ "cfg-if 1.0.0",
3697
+ "windows-sys",
3698
  ]
3699
 
3700
  [[package]]
Cargo.toml CHANGED
@@ -1,15 +1,15 @@
1
  [package]
2
  name = "websurfx"
3
- version = "0.17.0"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
7
  license = "AGPL-3.0"
8
 
9
  [dependencies]
10
- reqwest = {version="0.11.18",features=["json"]}
11
  tokio = {version="1.32.0",features=["full"]}
12
- serde = {version="1.0.183",features=["derive"]}
13
  handlebars = { version = "4.3.7", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
15
  actix-web = {version="4.3.1", features = ["cookies"]}
@@ -26,6 +26,7 @@ rand={version="0.8.5"}
26
  once_cell = {version="1.18.0"}
27
  error-stack = {version="0.3.1"}
28
  async-trait = {version="0.1.73"}
 
29
 
30
  [dev-dependencies]
31
  rusty-hook = "^0.11.2"
@@ -50,7 +51,7 @@ split-debuginfo = '...'
50
  debug-assertions = false
51
  overflow-checks = false
52
  lto = 'thin'
53
- panic = 'unwind'
54
  incremental = false
55
  codegen-units = 16
56
  rpath = false
 
1
  [package]
2
  name = "websurfx"
3
+ version = "0.18.0"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
7
  license = "AGPL-3.0"
8
 
9
  [dependencies]
10
+ reqwest = {version="0.11.19",features=["json"]}
11
  tokio = {version="1.32.0",features=["full"]}
12
+ serde = {version="1.0.185",features=["derive"]}
13
  handlebars = { version = "4.3.7", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
15
  actix-web = {version="4.3.1", features = ["cookies"]}
 
26
  once_cell = {version="1.18.0"}
27
  error-stack = {version="0.3.1"}
28
  async-trait = {version="0.1.73"}
29
+ regex = {version="1.9.3", features=["perf"]}
30
 
31
  [dev-dependencies]
32
  rusty-hook = "^0.11.2"
 
51
  debug-assertions = false
52
  overflow-checks = false
53
  lto = 'thin'
54
+ panic = 'abort'
55
  incremental = false
56
  codegen-units = 16
57
  rpath = false
src/config/parser.rs CHANGED
@@ -1,14 +1,12 @@
1
  //! This module provides the functionality to parse the lua config and convert the config options
2
  //! into rust readable form.
3
 
 
 
4
  use super::parser_models::Style;
5
  use log::LevelFilter;
6
  use rlua::Lua;
7
- use std::{collections::HashMap, format, fs, path::Path, thread::available_parallelism};
8
-
9
- // ------- Constants --------
10
- static COMMON_DIRECTORY_NAME: &str = "websurfx";
11
- static CONFIG_FILE_NAME: &str = "config.lua";
12
 
13
  /// A named struct which stores the parsed config file options.
14
  ///
@@ -69,7 +67,7 @@ impl Config {
69
  let globals = context.globals();
70
 
71
  context
72
- .load(&fs::read_to_string(Config::config_path()?)?)
73
  .exec()?;
74
 
75
  let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
@@ -114,52 +112,6 @@ impl Config {
114
  })
115
  })
116
  }
117
-
118
- /// A helper function which returns an appropriate config file path checking if the config
119
- /// file exists on that path.
120
- ///
121
- /// # Error
122
- ///
123
- /// Returns a `config file not found!!` error if the config file is not present under following
124
- /// paths which are:
125
- /// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
126
- /// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
127
- /// one (3).
128
- /// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
129
- /// here then it returns an error as mentioned above.
130
- fn config_path() -> Result<String, Box<dyn std::error::Error>> {
131
- // check user config
132
-
133
- let path = format!(
134
- "{}/.config/{}/config.lua",
135
- std::env::var("HOME").unwrap(),
136
- COMMON_DIRECTORY_NAME
137
- );
138
- if Path::new(path.as_str()).exists() {
139
- return Ok(format!(
140
- "{}/.config/{}/{}",
141
- std::env::var("HOME").unwrap(),
142
- COMMON_DIRECTORY_NAME,
143
- CONFIG_FILE_NAME
144
- ));
145
- }
146
-
147
- // look for config in /etc/xdg
148
- if Path::new(format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
149
- .exists()
150
- {
151
- return Ok("/etc/xdg/websurfx/config.lua".to_string());
152
- }
153
-
154
- // use dev config
155
- if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()).exists()
156
- {
157
- return Ok("./websurfx/config.lua".to_string());
158
- }
159
-
160
- // if no of the configs above exist, return error
161
- Err("Config file not found!!".to_string().into())
162
- }
163
  }
164
 
165
  /// a helper function that sets the proper logging level
 
1
  //! This module provides the functionality to parse the lua config and convert the config options
2
  //! into rust readable form.
3
 
4
+ use crate::handler::paths::{file_path, FileType};
5
+
6
  use super::parser_models::Style;
7
  use log::LevelFilter;
8
  use rlua::Lua;
9
+ use std::{collections::HashMap, fs, thread::available_parallelism};
 
 
 
 
10
 
11
  /// A named struct which stores the parsed config file options.
12
  ///
 
67
  let globals = context.globals();
68
 
69
  context
70
+ .load(&fs::read_to_string(file_path(FileType::Config)?)?)
71
  .exec()?;
72
 
73
  let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
 
112
  })
113
  })
114
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  }
116
 
117
  /// a helper function that sets the proper logging level
src/handler/mod.rs CHANGED
@@ -1 +1 @@
1
- pub mod public_paths;
 
1
+ pub mod paths;
src/handler/paths.rs ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the functionality to handle theme folder present on different paths and
2
+ //! provide one appropriate path on which it is present and can be used.
3
+
4
+ use std::collections::HashMap;
5
+ use std::io::Error;
6
+ use std::path::Path;
7
+
8
+ // ------- Constants --------
9
+ static PUBLIC_DIRECTORY_NAME: &str = "public";
10
+ static COMMON_DIRECTORY_NAME: &str = "websurfx";
11
+ static CONFIG_FILE_NAME: &str = "config.lua";
12
+ static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
13
+ static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
14
+
15
+ #[derive(Hash, PartialEq, Eq, Debug)]
16
+ pub enum FileType {
17
+ Config,
18
+ AllowList,
19
+ BlockList,
20
+ Theme,
21
+ }
22
+
23
+ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
24
+ once_cell::sync::Lazy::new(|| {
25
+ HashMap::from([
26
+ (
27
+ FileType::Config,
28
+ vec![
29
+ format!(
30
+ "{}/.config/{}/{}",
31
+ std::env::var("HOME").unwrap(),
32
+ COMMON_DIRECTORY_NAME,
33
+ CONFIG_FILE_NAME
34
+ ),
35
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
36
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
37
+ ],
38
+ ),
39
+ (
40
+ FileType::Theme,
41
+ vec![
42
+ format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
43
+ format!("./{}/", PUBLIC_DIRECTORY_NAME),
44
+ ],
45
+ ),
46
+ (
47
+ FileType::AllowList,
48
+ vec![
49
+ format!(
50
+ "{}/.config/{}/{}",
51
+ std::env::var("HOME").unwrap(),
52
+ COMMON_DIRECTORY_NAME,
53
+ ALLOWLIST_FILE_NAME
54
+ ),
55
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
56
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
57
+ ],
58
+ ),
59
+ (
60
+ FileType::BlockList,
61
+ vec![
62
+ format!(
63
+ "{}/.config/{}/{}",
64
+ std::env::var("HOME").unwrap(),
65
+ COMMON_DIRECTORY_NAME,
66
+ BLOCKLIST_FILE_NAME
67
+ ),
68
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
69
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
70
+ ],
71
+ ),
72
+ ])
73
+ });
74
+
75
+ /// A helper function which returns an appropriate config file path checking if the config
76
+ /// file exists on that path.
77
+ ///
78
+ /// # Error
79
+ ///
80
+ /// Returns a `config file not found!!` error if the config file is not present under following
81
+ /// paths which are:
82
+ /// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
83
+ /// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
84
+ /// one (3).
85
+ /// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
86
+ /// here then it returns an error as mentioned above.
87
+
88
+ /// A function which returns an appropriate theme directory path checking if the theme
89
+ /// directory exists on that path.
90
+ ///
91
+ /// # Error
92
+ ///
93
+ /// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
94
+ /// paths which are:
95
+ /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
96
+ /// 2. Under project folder ( or codebase in other words) if it is not present
97
+ /// here then it returns an error as mentioned above.
98
+ pub fn file_path(file_type: FileType) -> Result<String, Error> {
99
+ let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
100
+ for (idx, _) in file_path.iter().enumerate() {
101
+ if Path::new(file_path[idx].as_str()).exists() {
102
+ return Ok(file_path[idx].clone());
103
+ }
104
+ }
105
+
106
+ // if no of the configs above exist, return error
107
+ Err(Error::new(
108
+ std::io::ErrorKind::NotFound,
109
+ format!("{:?} file not found!!", file_type),
110
+ ))
111
+ }
src/handler/public_paths.rs DELETED
@@ -1,33 +0,0 @@
1
- //! This module provides the functionality to handle theme folder present on different paths and
2
- //! provide one appropriate path on which it is present and can be used.
3
-
4
- use std::io::Error;
5
- use std::path::Path;
6
-
7
- // ------- Constants --------
8
- static PUBLIC_DIRECTORY_NAME: &str = "public";
9
-
10
- /// A function which returns an appropriate theme directory path checking if the theme
11
- /// directory exists on that path.
12
- ///
13
- /// # Error
14
- ///
15
- /// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
16
- /// paths which are:
17
- /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
18
- /// 2. Under project folder ( or codebase in other words) if it is not present
19
- /// here then it returns an error as mentioned above.
20
- pub fn public_path() -> Result<String, Error> {
21
- if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
22
- return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
23
- }
24
-
25
- if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
26
- return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
27
- }
28
-
29
- Err(Error::new(
30
- std::io::ErrorKind::NotFound,
31
- "Themes (public) folder not found!!",
32
- ))
33
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib.rs CHANGED
@@ -17,7 +17,7 @@ use actix_files as fs;
17
  use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
18
  use config::parser::Config;
19
  use handlebars::Handlebars;
20
- use handler::public_paths::public_path;
21
 
22
  /// Runs the web server on the provided TCP listener and returns a `Server` instance.
23
  ///
@@ -42,7 +42,7 @@ use handler::public_paths::public_path;
42
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
43
  let mut handlebars: Handlebars = Handlebars::new();
44
 
45
- let public_folder_path: String = public_path()?;
46
 
47
  handlebars
48
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
 
17
  use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
18
  use config::parser::Config;
19
  use handlebars::Handlebars;
20
+ use handler::paths::{file_path, FileType};
21
 
22
  /// Runs the web server on the provided TCP listener and returns a `Server` instance.
23
  ///
 
42
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
43
  let mut handlebars: Handlebars = Handlebars::new();
44
 
45
+ let public_folder_path: String = file_path(FileType::Theme)?;
46
 
47
  handlebars
48
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
src/results/aggregator.rs CHANGED
@@ -1,18 +1,26 @@
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
- use std::{collections::HashMap, time::Duration};
5
-
6
- use error_stack::Report;
7
- use rand::Rng;
8
- use tokio::task::JoinHandle;
9
 
10
  use super::{
11
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
12
  user_agent::random_user_agent,
13
  };
 
 
 
 
 
14
 
15
- use crate::engines::engine_models::{EngineError, EngineHandler};
 
 
 
16
 
17
  /// Aliases for long type annotations
18
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
@@ -106,7 +114,7 @@ pub async fn aggregate(
106
  log::error!("Engine Error: {:?}", error);
107
  engine_errors_info.push(EngineErrorInfo::new(
108
  error.downcast_ref::<EngineError>().unwrap(),
109
- engine_name.to_string(),
110
  ));
111
  };
112
 
@@ -143,7 +151,22 @@ pub async fn aggregate(
143
  }
144
  }
145
 
146
- let results = result_map.into_values().collect();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  Ok(SearchResults::new(
149
  results,
@@ -151,3 +174,23 @@ pub async fn aggregate(
151
  engine_errors_info,
152
  ))
153
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
+ use std::{
5
+ collections::HashMap,
6
+ io::{BufReader, Read},
7
+ time::Duration,
8
+ };
9
 
10
  use super::{
11
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
12
  user_agent::random_user_agent,
13
  };
14
+ use error_stack::Report;
15
+ use rand::Rng;
16
+ use regex::Regex;
17
+ use std::{fs::File, io::BufRead};
18
+ use tokio::task::JoinHandle;
19
 
20
+ use crate::{
21
+ engines::engine_models::{EngineError, EngineHandler},
22
+ handler::paths::{file_path, FileType},
23
+ };
24
 
25
  /// Aliases for long type annotations
26
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
 
114
  log::error!("Engine Error: {:?}", error);
115
  engine_errors_info.push(EngineErrorInfo::new(
116
  error.downcast_ref::<EngineError>().unwrap(),
117
+ engine_name,
118
  ));
119
  };
120
 
 
151
  }
152
  }
153
 
154
+ let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
155
+ filter_with_lists(
156
+ &mut result_map,
157
+ &mut blacklist_map,
158
+ &file_path(FileType::BlockList)?,
159
+ )?;
160
+
161
+ filter_with_lists(
162
+ &mut blacklist_map,
163
+ &mut result_map,
164
+ &file_path(FileType::AllowList)?,
165
+ )?;
166
+
167
+ drop(blacklist_map);
168
+
169
+ let results: Vec<SearchResult> = result_map.into_values().collect();
170
 
171
  Ok(SearchResults::new(
172
  results,
 
174
  engine_errors_info,
175
  ))
176
  }
177
+
178
+ fn filter_with_lists(
179
+ map_to_be_filtered: &mut HashMap<String, SearchResult>,
180
+ resultant_map: &mut HashMap<String, SearchResult>,
181
+ file_path: &str,
182
+ ) -> Result<(), Box<dyn std::error::Error>> {
183
+ let mut reader = BufReader::new(File::open(file_path)?);
184
+ for line in reader.by_ref().lines() {
185
+ let re = Regex::new(&line?)?;
186
+ for (url, search_result) in map_to_be_filtered.clone().into_iter() {
187
+ if re.is_match(&url.to_lowercase())
188
+ || re.is_match(&search_result.title.to_lowercase())
189
+ || re.is_match(&search_result.description.to_lowercase())
190
+ {
191
+ resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
192
+ }
193
+ }
194
+ }
195
+ Ok(())
196
+ }
src/server/routes.rs CHANGED
@@ -8,7 +8,7 @@ use crate::{
8
  cache::cacher::RedisCache,
9
  config::parser::Config,
10
  engines::engine_models::EngineHandler,
11
- handler::public_paths::public_path,
12
  results::{aggregation_models::SearchResults, aggregator::aggregate},
13
  };
14
  use actix_web::{get, web, HttpRequest, HttpResponse};
@@ -215,7 +215,8 @@ async fn results(
215
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
216
  #[get("/robots.txt")]
217
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
218
- let page_content: String = read_to_string(format!("{}/robots.txt", public_path()?))?;
 
219
  Ok(HttpResponse::Ok()
220
  .content_type("text/plain; charset=ascii")
221
  .body(page_content))
 
8
  cache::cacher::RedisCache,
9
  config::parser::Config,
10
  engines::engine_models::EngineHandler,
11
+ handler::paths::{file_path, FileType},
12
  results::{aggregation_models::SearchResults, aggregator::aggregate},
13
  };
14
  use actix_web::{get, web, HttpRequest, HttpResponse};
 
215
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
216
  #[get("/robots.txt")]
217
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
218
+ let page_content: String =
219
+ read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
220
  Ok(HttpResponse::Ok()
221
  .content_type("text/plain; charset=ascii")
222
  .body(page_content))
websurfx/allowlist.txt ADDED
File without changes
websurfx/blocklist.txt ADDED
File without changes