Merge branch 'rolling' into ability-to-have-config-on-diff-paths
Browse files- .github/workflows/rust_format.yml +10 -1
- public/images/websurfx_logo.png +0 -0
- public/templates/index.html +1 -1
- src/cache/mod.rs +1 -1
- src/config_parser/parser.rs +18 -0
- src/search_results_handler/aggregation_models.rs +1 -1
- src/search_results_handler/aggregator.rs +7 -3
- src/server/routes.rs +6 -6
- websurfx/config.lua +4 -0
.github/workflows/rust_format.yml
CHANGED
@@ -19,7 +19,16 @@ jobs:
|
|
19 |
profile: minimal
|
20 |
toolchain: stable
|
21 |
components: rustfmt, clippy
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
- name: Run cargo check
|
24 |
uses: actions-rs/cargo@v1
|
25 |
with:
|
|
|
19 |
profile: minimal
|
20 |
toolchain: stable
|
21 |
components: rustfmt, clippy
|
22 |
+
- name: Format
|
23 |
+
uses: actions-rs/cargo@v1
|
24 |
+
with:
|
25 |
+
command: fmt
|
26 |
+
args: -- --check
|
27 |
+
- name: Clippy
|
28 |
+
uses: actions-rs/cargo@v1
|
29 |
+
with:
|
30 |
+
command: clippy
|
31 |
+
args: --all-features --all-targets --all
|
32 |
- name: Run cargo check
|
33 |
uses: actions-rs/cargo@v1
|
34 |
with:
|
public/images/websurfx_logo.png
ADDED
public/templates/index.html
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{{>header this}}
|
2 |
<main class="search-container">
|
3 |
-
<img src="images/
|
4 |
{{>search_bar}}
|
5 |
</main>
|
6 |
<script src="static/index.js"></script>
|
|
|
1 |
{{>header this}}
|
2 |
<main class="search-container">
|
3 |
+
<img src="../images/websurfx_logo.png" alt="Websurfx meta-search engine logo" />
|
4 |
{{>search_bar}}
|
5 |
</main>
|
6 |
<script src="static/index.js"></script>
|
src/cache/mod.rs
CHANGED
@@ -1 +1 @@
|
|
1 |
-
pub mod cacher;
|
|
|
1 |
+
pub mod cacher;
|
src/config_parser/parser.rs
CHANGED
@@ -24,6 +24,14 @@ pub struct Config {
|
|
24 |
pub binding_ip_addr: String,
|
25 |
pub style: Style,
|
26 |
pub redis_connection_url: String,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
}
|
28 |
|
29 |
impl Config {
|
@@ -45,6 +53,15 @@ impl Config {
|
|
45 |
)?)
|
46 |
.exec()?;
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
Ok(Config {
|
49 |
port: globals.get::<_, u16>("port")?,
|
50 |
binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?,
|
@@ -53,6 +70,7 @@ impl Config {
|
|
53 |
globals.get::<_, String>("colorscheme")?,
|
54 |
),
|
55 |
redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
|
|
|
56 |
})
|
57 |
})
|
58 |
}
|
|
|
24 |
pub binding_ip_addr: String,
|
25 |
pub style: Style,
|
26 |
pub redis_connection_url: String,
|
27 |
+
pub aggregator: AggreatorConfig,
|
28 |
+
}
|
29 |
+
|
30 |
+
/// Configuration options for the aggregator.
|
31 |
+
#[derive(Clone)]
|
32 |
+
pub struct AggreatorConfig {
|
33 |
+
/// Whether to introduce a random delay before sending the request to the search engine.
|
34 |
+
pub random_delay: bool,
|
35 |
}
|
36 |
|
37 |
impl Config {
|
|
|
53 |
)?)
|
54 |
.exec()?;
|
55 |
|
56 |
+
let production_use = globals.get::<_, bool>("production_use")?;
|
57 |
+
let aggregator_config = if production_use {
|
58 |
+
AggreatorConfig { random_delay: true }
|
59 |
+
} else {
|
60 |
+
AggreatorConfig {
|
61 |
+
random_delay: false,
|
62 |
+
}
|
63 |
+
};
|
64 |
+
|
65 |
Ok(Config {
|
66 |
port: globals.get::<_, u16>("port")?,
|
67 |
binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?,
|
|
|
70 |
globals.get::<_, String>("colorscheme")?,
|
71 |
),
|
72 |
redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
|
73 |
+
aggregator: aggregator_config,
|
74 |
})
|
75 |
})
|
76 |
}
|
src/search_results_handler/aggregation_models.rs
CHANGED
@@ -116,7 +116,7 @@ impl RawSearchResult {
|
|
116 |
}
|
117 |
}
|
118 |
|
119 |
-
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
120 |
/// aggregated from the upstream search engines.
|
121 |
///
|
122 |
/// # Fields
|
|
|
116 |
}
|
117 |
}
|
118 |
|
119 |
+
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
120 |
/// aggregated from the upstream search engines.
|
121 |
///
|
122 |
/// # Fields
|
src/search_results_handler/aggregator.rs
CHANGED
@@ -29,6 +29,7 @@ use crate::engines::{duckduckgo, searx};
|
|
29 |
///
|
30 |
/// * `query` - Accepts a string to query with the above upstream search engines.
|
31 |
/// * `page` - Accepts an u32 page number.
|
|
|
32 |
///
|
33 |
/// # Error
|
34 |
///
|
@@ -38,14 +39,17 @@ use crate::engines::{duckduckgo, searx};
|
|
38 |
pub async fn aggregate(
|
39 |
query: &str,
|
40 |
page: u32,
|
|
|
41 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
42 |
let user_agent: String = random_user_agent();
|
43 |
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
|
44 |
|
45 |
// Add a random delay before making the request.
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
49 |
|
50 |
// fetch results from upstream search engines simultaneously/concurrently.
|
51 |
let (ddg_map_results, searx_map_results) = join!(
|
|
|
29 |
///
|
30 |
/// * `query` - Accepts a string to query with the above upstream search engines.
|
31 |
/// * `page` - Accepts an u32 page number.
|
32 |
+
/// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
|
33 |
///
|
34 |
/// # Error
|
35 |
///
|
|
|
39 |
pub async fn aggregate(
|
40 |
query: &str,
|
41 |
page: u32,
|
42 |
+
random_delay: bool,
|
43 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
44 |
let user_agent: String = random_user_agent();
|
45 |
let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
|
46 |
|
47 |
// Add a random delay before making the request.
|
48 |
+
if random_delay {
|
49 |
+
let mut rng = rand::thread_rng();
|
50 |
+
let delay_secs = rng.gen_range(1..10);
|
51 |
+
std::thread::sleep(Duration::from_secs(delay_secs));
|
52 |
+
}
|
53 |
|
54 |
// fetch results from upstream search engines simultaneously/concurrently.
|
55 |
let (ddg_map_results, searx_map_results) = join!(
|
src/server/routes.rs
CHANGED
@@ -81,10 +81,10 @@ pub async fn search(
|
|
81 |
.insert_header(("location", "/"))
|
82 |
.finish())
|
83 |
} else {
|
84 |
-
let page_url: String;
|
85 |
|
86 |
// ...
|
87 |
-
|
88 |
let page = match params.page {
|
89 |
Some(page_number) => {
|
90 |
if page_number <= 1 {
|
@@ -98,7 +98,7 @@ pub async fn search(
|
|
98 |
"http://{}:{}/search?q={}&page={}",
|
99 |
config.binding_ip_addr, config.port, query, page_number
|
100 |
);
|
101 |
-
|
102 |
page_number
|
103 |
}
|
104 |
}
|
@@ -110,11 +110,11 @@ pub async fn search(
|
|
110 |
req.uri(),
|
111 |
1
|
112 |
);
|
113 |
-
|
114 |
1
|
115 |
}
|
116 |
};
|
117 |
-
|
118 |
// fetch the cached results json.
|
119 |
let cached_results_json = redis_cache.cached_results_json(&page_url);
|
120 |
// check if fetched results was indeed fetched or it was an error and if so
|
@@ -127,7 +127,7 @@ pub async fn search(
|
|
127 |
}
|
128 |
Err(_) => {
|
129 |
let mut results_json: crate::search_results_handler::aggregation_models::SearchResults =
|
130 |
-
aggregate(query, page).await?;
|
131 |
results_json.add_style(config.style.clone());
|
132 |
redis_cache
|
133 |
.cache_results(serde_json::to_string(&results_json)?, &page_url)?;
|
|
|
81 |
.insert_header(("location", "/"))
|
82 |
.finish())
|
83 |
} else {
|
84 |
+
let page_url: String; // Declare the page_url variable without initializing it
|
85 |
|
86 |
// ...
|
87 |
+
|
88 |
let page = match params.page {
|
89 |
Some(page_number) => {
|
90 |
if page_number <= 1 {
|
|
|
98 |
"http://{}:{}/search?q={}&page={}",
|
99 |
config.binding_ip_addr, config.port, query, page_number
|
100 |
);
|
101 |
+
|
102 |
page_number
|
103 |
}
|
104 |
}
|
|
|
110 |
req.uri(),
|
111 |
1
|
112 |
);
|
113 |
+
|
114 |
1
|
115 |
}
|
116 |
};
|
117 |
+
|
118 |
// fetch the cached results json.
|
119 |
let cached_results_json = redis_cache.cached_results_json(&page_url);
|
120 |
// check if fetched results was indeed fetched or it was an error and if so
|
|
|
127 |
}
|
128 |
Err(_) => {
|
129 |
let mut results_json: crate::search_results_handler::aggregation_models::SearchResults =
|
130 |
+
aggregate(query, page, config.aggregator.random_delay).await?;
|
131 |
results_json.add_style(config.style.clone());
|
132 |
redis_cache
|
133 |
.cache_results(serde_json::to_string(&results_json)?, &page_url)?;
|
websurfx/config.lua
CHANGED
@@ -19,3 +19,7 @@ theme = "simple" -- the theme name which should be used for the website
|
|
19 |
|
20 |
-- Caching
|
21 |
redis_connection_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-- Caching
|
21 |
redis_connection_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
22 |
+
|
23 |
+
production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users)
|
24 |
+
-- if production_use is set to true
|
25 |
+
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|