Merge pull request #190 from neon-mmd/feat-content-blocking-using-lists
Browse files✨ Filter user specified content from search results using a `blacklist` and `Allowlist`.
- Cargo.lock +27 -25
- Cargo.toml +5 -4
- src/config/parser.rs +4 -52
- src/handler/mod.rs +1 -1
- src/handler/paths.rs +111 -0
- src/handler/public_paths.rs +0 -33
- src/lib.rs +2 -2
- src/results/aggregator.rs +51 -8
- src/server/routes.rs +3 -2
- websurfx/allowlist.txt +0 -0
- websurfx/blocklist.txt +0 -0
Cargo.lock
CHANGED
@@ -77,7 +77,7 @@ dependencies = [
|
|
77 |
"encoding_rs",
|
78 |
"flate2",
|
79 |
"futures-core",
|
80 |
-
"h2 0.3.
|
81 |
"http 0.2.9",
|
82 |
"httparse",
|
83 |
"httpdate",
|
@@ -475,9 +475,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
|
475 |
|
476 |
[[package]]
|
477 |
name = "cc"
|
478 |
-
version = "1.0.
|
479 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
480 |
-
checksum = "
|
481 |
dependencies = [
|
482 |
"jobserver",
|
483 |
"libc",
|
@@ -816,9 +816,9 @@ dependencies = [
|
|
816 |
|
817 |
[[package]]
|
818 |
name = "deranged"
|
819 |
-
version = "0.3.
|
820 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
821 |
-
checksum = "
|
822 |
|
823 |
[[package]]
|
824 |
name = "derive_more"
|
@@ -1176,9 +1176,9 @@ dependencies = [
|
|
1176 |
|
1177 |
[[package]]
|
1178 |
name = "h2"
|
1179 |
-
version = "0.3.
|
1180 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1181 |
-
checksum = "
|
1182 |
dependencies = [
|
1183 |
"bytes 1.4.0",
|
1184 |
"fnv",
|
@@ -1363,7 +1363,7 @@ dependencies = [
|
|
1363 |
"futures-channel",
|
1364 |
"futures-core",
|
1365 |
"futures-util",
|
1366 |
-
"h2 0.3.
|
1367 |
"http 0.2.9",
|
1368 |
"http-body 0.4.5",
|
1369 |
"httparse",
|
@@ -2454,16 +2454,16 @@ dependencies = [
|
|
2454 |
|
2455 |
[[package]]
|
2456 |
name = "reqwest"
|
2457 |
-
version = "0.11.
|
2458 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2459 |
-
checksum = "
|
2460 |
dependencies = [
|
2461 |
"base64 0.21.2",
|
2462 |
"bytes 1.4.0",
|
2463 |
"encoding_rs",
|
2464 |
"futures-core",
|
2465 |
"futures-util",
|
2466 |
-
"h2 0.3.
|
2467 |
"http 0.2.9",
|
2468 |
"http-body 0.4.5",
|
2469 |
"hyper 0.14.27",
|
@@ -2486,7 +2486,7 @@ dependencies = [
|
|
2486 |
"wasm-bindgen",
|
2487 |
"wasm-bindgen-futures",
|
2488 |
"web-sys",
|
2489 |
-
"winreg 0.
|
2490 |
]
|
2491 |
|
2492 |
[[package]]
|
@@ -2684,18 +2684,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
|
2684 |
|
2685 |
[[package]]
|
2686 |
name = "serde"
|
2687 |
-
version = "1.0.
|
2688 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2689 |
-
checksum = "
|
2690 |
dependencies = [
|
2691 |
"serde_derive",
|
2692 |
]
|
2693 |
|
2694 |
[[package]]
|
2695 |
name = "serde_derive"
|
2696 |
-
version = "1.0.
|
2697 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2698 |
-
checksum = "
|
2699 |
dependencies = [
|
2700 |
"proc-macro2 1.0.66",
|
2701 |
"quote 1.0.33",
|
@@ -2797,9 +2797,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
|
2797 |
|
2798 |
[[package]]
|
2799 |
name = "slab"
|
2800 |
-
version = "0.4.
|
2801 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2802 |
-
checksum = "
|
2803 |
dependencies = [
|
2804 |
"autocfg 1.1.0",
|
2805 |
]
|
@@ -3328,9 +3328,9 @@ checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
|
|
3328 |
|
3329 |
[[package]]
|
3330 |
name = "unicase"
|
3331 |
-
version = "2.
|
3332 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3333 |
-
checksum = "
|
3334 |
dependencies = [
|
3335 |
"version_check",
|
3336 |
]
|
@@ -3543,7 +3543,7 @@ dependencies = [
|
|
3543 |
|
3544 |
[[package]]
|
3545 |
name = "websurfx"
|
3546 |
-
version = "0.
|
3547 |
dependencies = [
|
3548 |
"actix-cors",
|
3549 |
"actix-files",
|
@@ -3559,7 +3559,8 @@ dependencies = [
|
|
3559 |
"once_cell",
|
3560 |
"rand 0.8.5",
|
3561 |
"redis",
|
3562 |
-
"
|
|
|
3563 |
"rlua",
|
3564 |
"rusty-hook",
|
3565 |
"scraper",
|
@@ -3688,11 +3689,12 @@ dependencies = [
|
|
3688 |
|
3689 |
[[package]]
|
3690 |
name = "winreg"
|
3691 |
-
version = "0.
|
3692 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3693 |
-
checksum = "
|
3694 |
dependencies = [
|
3695 |
-
"
|
|
|
3696 |
]
|
3697 |
|
3698 |
[[package]]
|
|
|
77 |
"encoding_rs",
|
78 |
"flate2",
|
79 |
"futures-core",
|
80 |
+
"h2 0.3.21",
|
81 |
"http 0.2.9",
|
82 |
"httparse",
|
83 |
"httpdate",
|
|
|
475 |
|
476 |
[[package]]
|
477 |
name = "cc"
|
478 |
+
version = "1.0.83"
|
479 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
480 |
+
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
|
481 |
dependencies = [
|
482 |
"jobserver",
|
483 |
"libc",
|
|
|
816 |
|
817 |
[[package]]
|
818 |
name = "deranged"
|
819 |
+
version = "0.3.8"
|
820 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
821 |
+
checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946"
|
822 |
|
823 |
[[package]]
|
824 |
name = "derive_more"
|
|
|
1176 |
|
1177 |
[[package]]
|
1178 |
name = "h2"
|
1179 |
+
version = "0.3.21"
|
1180 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1181 |
+
checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833"
|
1182 |
dependencies = [
|
1183 |
"bytes 1.4.0",
|
1184 |
"fnv",
|
|
|
1363 |
"futures-channel",
|
1364 |
"futures-core",
|
1365 |
"futures-util",
|
1366 |
+
"h2 0.3.21",
|
1367 |
"http 0.2.9",
|
1368 |
"http-body 0.4.5",
|
1369 |
"httparse",
|
|
|
2454 |
|
2455 |
[[package]]
|
2456 |
name = "reqwest"
|
2457 |
+
version = "0.11.19"
|
2458 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2459 |
+
checksum = "20b9b67e2ca7dd9e9f9285b759de30ff538aab981abaaf7bc9bd90b84a0126c3"
|
2460 |
dependencies = [
|
2461 |
"base64 0.21.2",
|
2462 |
"bytes 1.4.0",
|
2463 |
"encoding_rs",
|
2464 |
"futures-core",
|
2465 |
"futures-util",
|
2466 |
+
"h2 0.3.21",
|
2467 |
"http 0.2.9",
|
2468 |
"http-body 0.4.5",
|
2469 |
"hyper 0.14.27",
|
|
|
2486 |
"wasm-bindgen",
|
2487 |
"wasm-bindgen-futures",
|
2488 |
"web-sys",
|
2489 |
+
"winreg 0.50.0",
|
2490 |
]
|
2491 |
|
2492 |
[[package]]
|
|
|
2684 |
|
2685 |
[[package]]
|
2686 |
name = "serde"
|
2687 |
+
version = "1.0.185"
|
2688 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2689 |
+
checksum = "be9b6f69f1dfd54c3b568ffa45c310d6973a5e5148fd40cf515acaf38cf5bc31"
|
2690 |
dependencies = [
|
2691 |
"serde_derive",
|
2692 |
]
|
2693 |
|
2694 |
[[package]]
|
2695 |
name = "serde_derive"
|
2696 |
+
version = "1.0.185"
|
2697 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2698 |
+
checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec"
|
2699 |
dependencies = [
|
2700 |
"proc-macro2 1.0.66",
|
2701 |
"quote 1.0.33",
|
|
|
2797 |
|
2798 |
[[package]]
|
2799 |
name = "slab"
|
2800 |
+
version = "0.4.9"
|
2801 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2802 |
+
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
|
2803 |
dependencies = [
|
2804 |
"autocfg 1.1.0",
|
2805 |
]
|
|
|
3328 |
|
3329 |
[[package]]
|
3330 |
name = "unicase"
|
3331 |
+
version = "2.7.0"
|
3332 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3333 |
+
checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89"
|
3334 |
dependencies = [
|
3335 |
"version_check",
|
3336 |
]
|
|
|
3543 |
|
3544 |
[[package]]
|
3545 |
name = "websurfx"
|
3546 |
+
version = "0.18.0"
|
3547 |
dependencies = [
|
3548 |
"actix-cors",
|
3549 |
"actix-files",
|
|
|
3559 |
"once_cell",
|
3560 |
"rand 0.8.5",
|
3561 |
"redis",
|
3562 |
+
"regex",
|
3563 |
+
"reqwest 0.11.19",
|
3564 |
"rlua",
|
3565 |
"rusty-hook",
|
3566 |
"scraper",
|
|
|
3689 |
|
3690 |
[[package]]
|
3691 |
name = "winreg"
|
3692 |
+
version = "0.50.0"
|
3693 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3694 |
+
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
|
3695 |
dependencies = [
|
3696 |
+
"cfg-if 1.0.0",
|
3697 |
+
"windows-sys",
|
3698 |
]
|
3699 |
|
3700 |
[[package]]
|
Cargo.toml
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
-
version = "0.
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
7 |
license = "AGPL-3.0"
|
8 |
|
9 |
[dependencies]
|
10 |
-
reqwest = {version="0.11.
|
11 |
tokio = {version="1.32.0",features=["full"]}
|
12 |
-
serde = {version="1.0.
|
13 |
handlebars = { version = "4.3.7", features = ["dir_source"] }
|
14 |
scraper = {version="0.17.1"}
|
15 |
actix-web = {version="4.3.1", features = ["cookies"]}
|
@@ -26,6 +26,7 @@ rand={version="0.8.5"}
|
|
26 |
once_cell = {version="1.18.0"}
|
27 |
error-stack = {version="0.3.1"}
|
28 |
async-trait = {version="0.1.73"}
|
|
|
29 |
|
30 |
[dev-dependencies]
|
31 |
rusty-hook = "^0.11.2"
|
@@ -50,7 +51,7 @@ split-debuginfo = '...'
|
|
50 |
debug-assertions = false
|
51 |
overflow-checks = false
|
52 |
lto = 'thin'
|
53 |
-
panic = '
|
54 |
incremental = false
|
55 |
codegen-units = 16
|
56 |
rpath = false
|
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
+
version = "0.18.0"
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
7 |
license = "AGPL-3.0"
|
8 |
|
9 |
[dependencies]
|
10 |
+
reqwest = {version="0.11.19",features=["json"]}
|
11 |
tokio = {version="1.32.0",features=["full"]}
|
12 |
+
serde = {version="1.0.185",features=["derive"]}
|
13 |
handlebars = { version = "4.3.7", features = ["dir_source"] }
|
14 |
scraper = {version="0.17.1"}
|
15 |
actix-web = {version="4.3.1", features = ["cookies"]}
|
|
|
26 |
once_cell = {version="1.18.0"}
|
27 |
error-stack = {version="0.3.1"}
|
28 |
async-trait = {version="0.1.73"}
|
29 |
+
regex = {version="1.9.3", features=["perf"]}
|
30 |
|
31 |
[dev-dependencies]
|
32 |
rusty-hook = "^0.11.2"
|
|
|
51 |
debug-assertions = false
|
52 |
overflow-checks = false
|
53 |
lto = 'thin'
|
54 |
+
panic = 'abort'
|
55 |
incremental = false
|
56 |
codegen-units = 16
|
57 |
rpath = false
|
src/config/parser.rs
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
//! This module provides the functionality to parse the lua config and convert the config options
|
2 |
//! into rust readable form.
|
3 |
|
|
|
|
|
4 |
use super::parser_models::Style;
|
5 |
use log::LevelFilter;
|
6 |
use rlua::Lua;
|
7 |
-
use std::{collections::HashMap,
|
8 |
-
|
9 |
-
// ------- Constants --------
|
10 |
-
static COMMON_DIRECTORY_NAME: &str = "websurfx";
|
11 |
-
static CONFIG_FILE_NAME: &str = "config.lua";
|
12 |
|
13 |
/// A named struct which stores the parsed config file options.
|
14 |
///
|
@@ -69,7 +67,7 @@ impl Config {
|
|
69 |
let globals = context.globals();
|
70 |
|
71 |
context
|
72 |
-
.load(&fs::read_to_string(Config
|
73 |
.exec()?;
|
74 |
|
75 |
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
|
@@ -114,52 +112,6 @@ impl Config {
|
|
114 |
})
|
115 |
})
|
116 |
}
|
117 |
-
|
118 |
-
/// A helper function which returns an appropriate config file path checking if the config
|
119 |
-
/// file exists on that path.
|
120 |
-
///
|
121 |
-
/// # Error
|
122 |
-
///
|
123 |
-
/// Returns a `config file not found!!` error if the config file is not present under following
|
124 |
-
/// paths which are:
|
125 |
-
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
|
126 |
-
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
|
127 |
-
/// one (3).
|
128 |
-
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
129 |
-
/// here then it returns an error as mentioned above.
|
130 |
-
fn config_path() -> Result<String, Box<dyn std::error::Error>> {
|
131 |
-
// check user config
|
132 |
-
|
133 |
-
let path = format!(
|
134 |
-
"{}/.config/{}/config.lua",
|
135 |
-
std::env::var("HOME").unwrap(),
|
136 |
-
COMMON_DIRECTORY_NAME
|
137 |
-
);
|
138 |
-
if Path::new(path.as_str()).exists() {
|
139 |
-
return Ok(format!(
|
140 |
-
"{}/.config/{}/{}",
|
141 |
-
std::env::var("HOME").unwrap(),
|
142 |
-
COMMON_DIRECTORY_NAME,
|
143 |
-
CONFIG_FILE_NAME
|
144 |
-
));
|
145 |
-
}
|
146 |
-
|
147 |
-
// look for config in /etc/xdg
|
148 |
-
if Path::new(format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str())
|
149 |
-
.exists()
|
150 |
-
{
|
151 |
-
return Ok("/etc/xdg/websurfx/config.lua".to_string());
|
152 |
-
}
|
153 |
-
|
154 |
-
// use dev config
|
155 |
-
if Path::new(format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME).as_str()).exists()
|
156 |
-
{
|
157 |
-
return Ok("./websurfx/config.lua".to_string());
|
158 |
-
}
|
159 |
-
|
160 |
-
// if no of the configs above exist, return error
|
161 |
-
Err("Config file not found!!".to_string().into())
|
162 |
-
}
|
163 |
}
|
164 |
|
165 |
/// a helper function that sets the proper logging level
|
|
|
1 |
//! This module provides the functionality to parse the lua config and convert the config options
|
2 |
//! into rust readable form.
|
3 |
|
4 |
+
use crate::handler::paths::{file_path, FileType};
|
5 |
+
|
6 |
use super::parser_models::Style;
|
7 |
use log::LevelFilter;
|
8 |
use rlua::Lua;
|
9 |
+
use std::{collections::HashMap, fs, thread::available_parallelism};
|
|
|
|
|
|
|
|
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
12 |
///
|
|
|
67 |
let globals = context.globals();
|
68 |
|
69 |
context
|
70 |
+
.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
71 |
.exec()?;
|
72 |
|
73 |
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
|
|
|
112 |
})
|
113 |
})
|
114 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
}
|
116 |
|
117 |
/// a helper function that sets the proper logging level
|
src/handler/mod.rs
CHANGED
@@ -1 +1 @@
|
|
1 |
-
pub mod
|
|
|
1 |
+
pub mod paths;
|
src/handler/paths.rs
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! This module provides the functionality to handle theme folder present on different paths and
|
2 |
+
//! provide one appropriate path on which it is present and can be used.
|
3 |
+
|
4 |
+
use std::collections::HashMap;
|
5 |
+
use std::io::Error;
|
6 |
+
use std::path::Path;
|
7 |
+
|
8 |
+
// ------- Constants --------
|
9 |
+
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
10 |
+
static COMMON_DIRECTORY_NAME: &str = "websurfx";
|
11 |
+
static CONFIG_FILE_NAME: &str = "config.lua";
|
12 |
+
static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
|
13 |
+
static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
|
14 |
+
|
15 |
+
#[derive(Hash, PartialEq, Eq, Debug)]
|
16 |
+
pub enum FileType {
|
17 |
+
Config,
|
18 |
+
AllowList,
|
19 |
+
BlockList,
|
20 |
+
Theme,
|
21 |
+
}
|
22 |
+
|
23 |
+
static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
|
24 |
+
once_cell::sync::Lazy::new(|| {
|
25 |
+
HashMap::from([
|
26 |
+
(
|
27 |
+
FileType::Config,
|
28 |
+
vec![
|
29 |
+
format!(
|
30 |
+
"{}/.config/{}/{}",
|
31 |
+
std::env::var("HOME").unwrap(),
|
32 |
+
COMMON_DIRECTORY_NAME,
|
33 |
+
CONFIG_FILE_NAME
|
34 |
+
),
|
35 |
+
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
36 |
+
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
37 |
+
],
|
38 |
+
),
|
39 |
+
(
|
40 |
+
FileType::Theme,
|
41 |
+
vec![
|
42 |
+
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
|
43 |
+
format!("./{}/", PUBLIC_DIRECTORY_NAME),
|
44 |
+
],
|
45 |
+
),
|
46 |
+
(
|
47 |
+
FileType::AllowList,
|
48 |
+
vec![
|
49 |
+
format!(
|
50 |
+
"{}/.config/{}/{}",
|
51 |
+
std::env::var("HOME").unwrap(),
|
52 |
+
COMMON_DIRECTORY_NAME,
|
53 |
+
ALLOWLIST_FILE_NAME
|
54 |
+
),
|
55 |
+
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
56 |
+
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
57 |
+
],
|
58 |
+
),
|
59 |
+
(
|
60 |
+
FileType::BlockList,
|
61 |
+
vec![
|
62 |
+
format!(
|
63 |
+
"{}/.config/{}/{}",
|
64 |
+
std::env::var("HOME").unwrap(),
|
65 |
+
COMMON_DIRECTORY_NAME,
|
66 |
+
BLOCKLIST_FILE_NAME
|
67 |
+
),
|
68 |
+
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
69 |
+
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
70 |
+
],
|
71 |
+
),
|
72 |
+
])
|
73 |
+
});
|
74 |
+
|
75 |
+
/// A helper function which returns an appropriate config file path checking if the config
|
76 |
+
/// file exists on that path.
|
77 |
+
///
|
78 |
+
/// # Error
|
79 |
+
///
|
80 |
+
/// Returns a `config file not found!!` error if the config file is not present under following
|
81 |
+
/// paths which are:
|
82 |
+
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
|
83 |
+
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
|
84 |
+
/// one (3).
|
85 |
+
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
86 |
+
/// here then it returns an error as mentioned above.
|
87 |
+
|
88 |
+
/// A function which returns an appropriate theme directory path checking if the theme
|
89 |
+
/// directory exists on that path.
|
90 |
+
///
|
91 |
+
/// # Error
|
92 |
+
///
|
93 |
+
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
|
94 |
+
/// paths which are:
|
95 |
+
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
96 |
+
/// 2. Under project folder ( or codebase in other words) if it is not present
|
97 |
+
/// here then it returns an error as mentioned above.
|
98 |
+
pub fn file_path(file_type: FileType) -> Result<String, Error> {
|
99 |
+
let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
|
100 |
+
for (idx, _) in file_path.iter().enumerate() {
|
101 |
+
if Path::new(file_path[idx].as_str()).exists() {
|
102 |
+
return Ok(file_path[idx].clone());
|
103 |
+
}
|
104 |
+
}
|
105 |
+
|
106 |
+
// if no of the configs above exist, return error
|
107 |
+
Err(Error::new(
|
108 |
+
std::io::ErrorKind::NotFound,
|
109 |
+
format!("{:?} file not found!!", file_type),
|
110 |
+
))
|
111 |
+
}
|
src/handler/public_paths.rs
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
//! This module provides the functionality to handle theme folder present on different paths and
|
2 |
-
//! provide one appropriate path on which it is present and can be used.
|
3 |
-
|
4 |
-
use std::io::Error;
|
5 |
-
use std::path::Path;
|
6 |
-
|
7 |
-
// ------- Constants --------
|
8 |
-
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
9 |
-
|
10 |
-
/// A function which returns an appropriate theme directory path checking if the theme
|
11 |
-
/// directory exists on that path.
|
12 |
-
///
|
13 |
-
/// # Error
|
14 |
-
///
|
15 |
-
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
|
16 |
-
/// paths which are:
|
17 |
-
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
18 |
-
/// 2. Under project folder ( or codebase in other words) if it is not present
|
19 |
-
/// here then it returns an error as mentioned above.
|
20 |
-
pub fn public_path() -> Result<String, Error> {
|
21 |
-
if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
22 |
-
return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
|
23 |
-
}
|
24 |
-
|
25 |
-
if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
|
26 |
-
return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
|
27 |
-
}
|
28 |
-
|
29 |
-
Err(Error::new(
|
30 |
-
std::io::ErrorKind::NotFound,
|
31 |
-
"Themes (public) folder not found!!",
|
32 |
-
))
|
33 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib.rs
CHANGED
@@ -17,7 +17,7 @@ use actix_files as fs;
|
|
17 |
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
18 |
use config::parser::Config;
|
19 |
use handlebars::Handlebars;
|
20 |
-
use handler::
|
21 |
|
22 |
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
23 |
///
|
@@ -42,7 +42,7 @@ use handler::public_paths::public_path;
|
|
42 |
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
43 |
let mut handlebars: Handlebars = Handlebars::new();
|
44 |
|
45 |
-
let public_folder_path: String =
|
46 |
|
47 |
handlebars
|
48 |
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
|
|
17 |
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
18 |
use config::parser::Config;
|
19 |
use handlebars::Handlebars;
|
20 |
+
use handler::paths::{file_path, FileType};
|
21 |
|
22 |
/// Runs the web server on the provided TCP listener and returns a `Server` instance.
|
23 |
///
|
|
|
42 |
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
43 |
let mut handlebars: Handlebars = Handlebars::new();
|
44 |
|
45 |
+
let public_folder_path: String = file_path(FileType::Theme)?;
|
46 |
|
47 |
handlebars
|
48 |
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
src/results/aggregator.rs
CHANGED
@@ -1,18 +1,26 @@
|
|
1 |
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
2 |
//! search engines and then removes duplicate results.
|
3 |
|
4 |
-
use std::{
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
|
10 |
use super::{
|
11 |
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
12 |
user_agent::random_user_agent,
|
13 |
};
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
use crate::
|
|
|
|
|
|
|
16 |
|
17 |
/// Aliases for long type annotations
|
18 |
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
@@ -106,7 +114,7 @@ pub async fn aggregate(
|
|
106 |
log::error!("Engine Error: {:?}", error);
|
107 |
engine_errors_info.push(EngineErrorInfo::new(
|
108 |
error.downcast_ref::<EngineError>().unwrap(),
|
109 |
-
engine_name
|
110 |
));
|
111 |
};
|
112 |
|
@@ -143,7 +151,22 @@ pub async fn aggregate(
|
|
143 |
}
|
144 |
}
|
145 |
|
146 |
-
let
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
Ok(SearchResults::new(
|
149 |
results,
|
@@ -151,3 +174,23 @@ pub async fn aggregate(
|
|
151 |
engine_errors_info,
|
152 |
))
|
153 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
2 |
//! search engines and then removes duplicate results.
|
3 |
|
4 |
+
use std::{
|
5 |
+
collections::HashMap,
|
6 |
+
io::{BufReader, Read},
|
7 |
+
time::Duration,
|
8 |
+
};
|
9 |
|
10 |
use super::{
|
11 |
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
12 |
user_agent::random_user_agent,
|
13 |
};
|
14 |
+
use error_stack::Report;
|
15 |
+
use rand::Rng;
|
16 |
+
use regex::Regex;
|
17 |
+
use std::{fs::File, io::BufRead};
|
18 |
+
use tokio::task::JoinHandle;
|
19 |
|
20 |
+
use crate::{
|
21 |
+
engines::engine_models::{EngineError, EngineHandler},
|
22 |
+
handler::paths::{file_path, FileType},
|
23 |
+
};
|
24 |
|
25 |
/// Aliases for long type annotations
|
26 |
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
|
|
114 |
log::error!("Engine Error: {:?}", error);
|
115 |
engine_errors_info.push(EngineErrorInfo::new(
|
116 |
error.downcast_ref::<EngineError>().unwrap(),
|
117 |
+
engine_name,
|
118 |
));
|
119 |
};
|
120 |
|
|
|
151 |
}
|
152 |
}
|
153 |
|
154 |
+
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
155 |
+
filter_with_lists(
|
156 |
+
&mut result_map,
|
157 |
+
&mut blacklist_map,
|
158 |
+
&file_path(FileType::BlockList)?,
|
159 |
+
)?;
|
160 |
+
|
161 |
+
filter_with_lists(
|
162 |
+
&mut blacklist_map,
|
163 |
+
&mut result_map,
|
164 |
+
&file_path(FileType::AllowList)?,
|
165 |
+
)?;
|
166 |
+
|
167 |
+
drop(blacklist_map);
|
168 |
+
|
169 |
+
let results: Vec<SearchResult> = result_map.into_values().collect();
|
170 |
|
171 |
Ok(SearchResults::new(
|
172 |
results,
|
|
|
174 |
engine_errors_info,
|
175 |
))
|
176 |
}
|
177 |
+
|
178 |
+
fn filter_with_lists(
|
179 |
+
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
180 |
+
resultant_map: &mut HashMap<String, SearchResult>,
|
181 |
+
file_path: &str,
|
182 |
+
) -> Result<(), Box<dyn std::error::Error>> {
|
183 |
+
let mut reader = BufReader::new(File::open(file_path)?);
|
184 |
+
for line in reader.by_ref().lines() {
|
185 |
+
let re = Regex::new(&line?)?;
|
186 |
+
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
187 |
+
if re.is_match(&url.to_lowercase())
|
188 |
+
|| re.is_match(&search_result.title.to_lowercase())
|
189 |
+
|| re.is_match(&search_result.description.to_lowercase())
|
190 |
+
{
|
191 |
+
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
192 |
+
}
|
193 |
+
}
|
194 |
+
}
|
195 |
+
Ok(())
|
196 |
+
}
|
src/server/routes.rs
CHANGED
@@ -8,7 +8,7 @@ use crate::{
|
|
8 |
cache::cacher::RedisCache,
|
9 |
config::parser::Config,
|
10 |
engines::engine_models::EngineHandler,
|
11 |
-
handler::
|
12 |
results::{aggregation_models::SearchResults, aggregator::aggregate},
|
13 |
};
|
14 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
@@ -215,7 +215,8 @@ async fn results(
|
|
215 |
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
216 |
#[get("/robots.txt")]
|
217 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
218 |
-
let page_content: String =
|
|
|
219 |
Ok(HttpResponse::Ok()
|
220 |
.content_type("text/plain; charset=ascii")
|
221 |
.body(page_content))
|
|
|
8 |
cache::cacher::RedisCache,
|
9 |
config::parser::Config,
|
10 |
engines::engine_models::EngineHandler,
|
11 |
+
handler::paths::{file_path, FileType},
|
12 |
results::{aggregation_models::SearchResults, aggregator::aggregate},
|
13 |
};
|
14 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
|
|
215 |
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
216 |
#[get("/robots.txt")]
|
217 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
218 |
+
let page_content: String =
|
219 |
+
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
|
220 |
Ok(HttpResponse::Ok()
|
221 |
.content_type("text/plain; charset=ascii")
|
222 |
.body(page_content))
|
websurfx/allowlist.txt
ADDED
File without changes
|
websurfx/blocklist.txt
ADDED
File without changes
|