neon_arch
commited on
Commit
•
fc69ace
1
Parent(s):
ed13a16
Improving source code documentation.
Browse files- src/bin/websurfx.rs +25 -4
- src/engines/duckduckgo.rs +1 -1
- src/engines/searx.rs +1 -1
- src/search_results_handler/aggregation_models.rs +66 -0
- src/search_results_handler/aggregator.rs +25 -11
- src/search_results_handler/user_agent.rs +7 -1
- src/server/routes.rs +35 -0
src/bin/websurfx.rs
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
use std::ops::RangeInclusive;
|
2 |
|
3 |
use websurfx::server::routes;
|
@@ -8,6 +13,7 @@ use clap::{command, Parser};
|
|
8 |
use env_logger::Env;
|
9 |
use handlebars::Handlebars;
|
10 |
|
|
|
11 |
#[derive(Parser, Debug, Default)]
|
12 |
#[clap(author = "neon_arch", version, about = "Websurfx server application")]
|
13 |
#[command(propagate_version = true)]
|
@@ -19,8 +25,18 @@ struct CliArgs {
|
|
19 |
|
20 |
const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
|
21 |
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
fn is_port_in_range(s: &str) -> Result<u16, String> {
|
25 |
let port: usize = s
|
26 |
.parse()
|
@@ -36,7 +52,12 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
|
|
36 |
}
|
37 |
}
|
38 |
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
40 |
#[actix_web::main]
|
41 |
async fn main() -> std::io::Result<()> {
|
42 |
let args = CliArgs::parse();
|
@@ -68,7 +89,7 @@ async fn main() -> std::io::Result<()> {
|
|
68 |
.service(routes::settings) // settings page
|
69 |
.default_service(web::route().to(routes::not_found)) // error page
|
70 |
})
|
71 |
-
// Start server on 127.0.0.1:8080
|
72 |
.bind(("127.0.0.1", args.port))?
|
73 |
.run()
|
74 |
.await
|
|
|
1 |
+
//! Main module of the application
|
2 |
+
//!
|
3 |
+
//! This module contains the main function which handles the logging of the application to the
|
4 |
+
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
|
5 |
+
|
6 |
use std::ops::RangeInclusive;
|
7 |
|
8 |
use websurfx::server::routes;
|
|
|
13 |
use env_logger::Env;
|
14 |
use handlebars::Handlebars;
|
15 |
|
16 |
+
/// A commandline arguments struct.
|
17 |
#[derive(Parser, Debug, Default)]
|
18 |
#[clap(author = "neon_arch", version, about = "Websurfx server application")]
|
19 |
#[command(propagate_version = true)]
|
|
|
25 |
|
26 |
const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
|
27 |
|
28 |
+
/// A function to check whether port is valid u32 number or is in range
|
29 |
+
/// between [1024-65536] otherwise display an appropriate error message.
|
30 |
+
///
|
31 |
+
/// # Arguments
|
32 |
+
///
|
33 |
+
/// * `s` - Takes a commandline argument port as a string.
|
34 |
+
///
|
35 |
+
/// # Error
|
36 |
+
///
|
37 |
+
/// Check whether the provided argument to `--port` commandline option is a valid
|
38 |
+
/// u16 argument and returns it as a u16 value otherwise returns an error with an
|
39 |
+
/// appropriate error message.
|
40 |
fn is_port_in_range(s: &str) -> Result<u16, String> {
|
41 |
let port: usize = s
|
42 |
.parse()
|
|
|
52 |
}
|
53 |
}
|
54 |
|
55 |
+
/// The function that launches the main server and registers all the routes of the website.
|
56 |
+
///
|
57 |
+
/// # Error
|
58 |
+
///
|
59 |
+
/// Returns an error if the port is being used by something else on the system and is not
|
60 |
+
/// available for being used for other applications.
|
61 |
#[actix_web::main]
|
62 |
async fn main() -> std::io::Result<()> {
|
63 |
let args = CliArgs::parse();
|
|
|
89 |
.service(routes::settings) // settings page
|
90 |
.default_service(web::route().to(routes::not_found)) // error page
|
91 |
})
|
92 |
+
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
93 |
.bind(("127.0.0.1", args.port))?
|
94 |
.run()
|
95 |
.await
|
src/engines/duckduckgo.rs
CHANGED
@@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
|
|
23 |
/// # Errors
|
24 |
///
|
25 |
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
|
26 |
-
/// reach the above
|
27 |
/// selector fails to initialize"
|
28 |
pub async fn results(
|
29 |
query: &str,
|
|
|
23 |
/// # Errors
|
24 |
///
|
25 |
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
|
26 |
+
/// reach the above `upstream search engine` page and also returns error if the scraping
|
27 |
/// selector fails to initialize"
|
28 |
pub async fn results(
|
29 |
query: &str,
|
src/engines/searx.rs
CHANGED
@@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
|
|
23 |
/// # Errors
|
24 |
///
|
25 |
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
|
26 |
-
/// reach the above
|
27 |
/// selector fails to initialize"
|
28 |
pub async fn results(
|
29 |
query: &str,
|
|
|
23 |
/// # Errors
|
24 |
///
|
25 |
/// Returns a reqwest error if the user is not connected to the internet or if their is failure to
|
26 |
+
/// reach the above `upstream search engine` page and also returns error if the scraping
|
27 |
/// selector fails to initialize"
|
28 |
pub async fn results(
|
29 |
query: &str,
|
src/search_results_handler/aggregation_models.rs
CHANGED
@@ -1,5 +1,19 @@
|
|
|
|
|
|
|
|
1 |
use serde::Serialize;
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
#[derive(Debug, Serialize)]
|
4 |
#[serde(rename_all = "camelCase")]
|
5 |
pub struct SearchResult {
|
@@ -11,6 +25,16 @@ pub struct SearchResult {
|
|
11 |
}
|
12 |
|
13 |
impl SearchResult {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
pub fn new(
|
15 |
title: String,
|
16 |
visiting_url: String,
|
@@ -28,6 +52,17 @@ impl SearchResult {
|
|
28 |
}
|
29 |
}
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
#[derive(Clone)]
|
32 |
pub struct RawSearchResult {
|
33 |
pub title: String,
|
@@ -37,6 +72,15 @@ pub struct RawSearchResult {
|
|
37 |
}
|
38 |
|
39 |
impl RawSearchResult {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
pub fn new(
|
41 |
title: String,
|
42 |
visiting_url: String,
|
@@ -50,6 +94,12 @@ impl RawSearchResult {
|
|
50 |
engine,
|
51 |
}
|
52 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
pub fn add_engines(&mut self, engine: String) {
|
54 |
self.engine.push(engine)
|
55 |
}
|
@@ -59,6 +109,14 @@ impl RawSearchResult {
|
|
59 |
}
|
60 |
}
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
#[derive(Debug, Serialize)]
|
63 |
#[serde(rename_all = "camelCase")]
|
64 |
pub struct SearchResults {
|
@@ -67,6 +125,14 @@ pub struct SearchResults {
|
|
67 |
}
|
68 |
|
69 |
impl SearchResults {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
|
71 |
SearchResults {
|
72 |
results,
|
|
|
1 |
+
//! This module provides public models for handling, storing and serializing of search results
|
2 |
+
//! data scraped from the upstream search engines.
|
3 |
+
|
4 |
use serde::Serialize;
|
5 |
|
6 |
+
/// A named struct to store and serialize the individual search result from all the scraped
|
7 |
+
/// and aggregated search results from the upstream search engines.
|
8 |
+
///
|
9 |
+
/// # Fields
|
10 |
+
///
|
11 |
+
/// * `title` - The title of the search result.
|
12 |
+
/// * `visiting_url` - The url which is accessed when clicked on it (href url in html in simple
|
13 |
+
/// words).
|
14 |
+
/// * `url` - The url to be displayed below the search result title in html.
|
15 |
+
/// * `description` - The description of the search result.
|
16 |
+
/// * `engine` - The names of the upstream engines from which this results were provided.
|
17 |
#[derive(Debug, Serialize)]
|
18 |
#[serde(rename_all = "camelCase")]
|
19 |
pub struct SearchResult {
|
|
|
25 |
}
|
26 |
|
27 |
impl SearchResult {
|
28 |
+
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
29 |
+
///
|
30 |
+
/// # Arguments
|
31 |
+
///
|
32 |
+
/// * `title` - The title of the search result.
|
33 |
+
/// * `visiting_url` - The url which is accessed when clicked on it
|
34 |
+
/// (href url in html in simple words).
|
35 |
+
/// * `url` - The url to be displayed below the search result title in html.
|
36 |
+
/// * `description` - The description of the search result.
|
37 |
+
/// * `engine` - The names of the upstream engines from which this results were provided.
|
38 |
pub fn new(
|
39 |
title: String,
|
40 |
visiting_url: String,
|
|
|
52 |
}
|
53 |
}
|
54 |
|
55 |
+
/// A named struct to store the raw scraped search results scraped search results from the
|
56 |
+
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
57 |
+
/// to write idiomatic rust using `Iterators`.
|
58 |
+
///
|
59 |
+
/// # Fields
|
60 |
+
///
|
61 |
+
/// * `title` - The title of the search result.
|
62 |
+
/// * `visiting_url` - The url which is accessed when clicked on it
|
63 |
+
/// (href url in html in simple words).
|
64 |
+
/// * `description` - The description of the search result.
|
65 |
+
/// * `engine` - The names of the upstream engines from which this results were provided.
|
66 |
#[derive(Clone)]
|
67 |
pub struct RawSearchResult {
|
68 |
pub title: String,
|
|
|
72 |
}
|
73 |
|
74 |
impl RawSearchResult {
|
75 |
+
/// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
|
76 |
+
///
|
77 |
+
/// # Arguments
|
78 |
+
///
|
79 |
+
/// * `title` - The title of the search result.
|
80 |
+
/// * `visiting_url` - The url which is accessed when clicked on it
|
81 |
+
/// (href url in html in simple words).
|
82 |
+
/// * `description` - The description of the search result.
|
83 |
+
/// * `engine` - The names of the upstream engines from which this results were provided.
|
84 |
pub fn new(
|
85 |
title: String,
|
86 |
visiting_url: String,
|
|
|
94 |
engine,
|
95 |
}
|
96 |
}
|
97 |
+
|
98 |
+
/// A function which adds the engine name provided as a string into a vector of strings.
|
99 |
+
///
|
100 |
+
/// # Arguments
|
101 |
+
///
|
102 |
+
/// * `engine` - Takes an engine name provided as a String.
|
103 |
pub fn add_engines(&mut self, engine: String) {
|
104 |
self.engine.push(engine)
|
105 |
}
|
|
|
109 |
}
|
110 |
}
|
111 |
|
112 |
+
/// A named struct to store and serialize the all the search results scraped and aggregated
|
113 |
+
/// from the upstream search engines.
|
114 |
+
///
|
115 |
+
/// # Fields
|
116 |
+
///
|
117 |
+
/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
|
118 |
+
/// `SearchResult` structs.
|
119 |
+
/// * `page_query` - Stores the current pages search query `q` provided in the search url.
|
120 |
#[derive(Debug, Serialize)]
|
121 |
#[serde(rename_all = "camelCase")]
|
122 |
pub struct SearchResults {
|
|
|
125 |
}
|
126 |
|
127 |
impl SearchResults {
|
128 |
+
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
129 |
+
///
|
130 |
+
/// # Arguments
|
131 |
+
///
|
132 |
+
/// * `results` - Takes an argument of individual serializable `SearchResult` struct
|
133 |
+
/// and stores it into a vector of `SearchResult` structs.
|
134 |
+
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
135 |
+
/// the search url.
|
136 |
pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
|
137 |
SearchResults {
|
138 |
results,
|
src/search_results_handler/aggregator.rs
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
use std::collections::HashMap;
|
2 |
|
3 |
use super::{
|
@@ -7,17 +10,28 @@ use super::{
|
|
7 |
|
8 |
use crate::engines::{duckduckgo, searx};
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
pub async fn aggregate(
|
22 |
query: &str,
|
23 |
page: Option<u32>,
|
|
|
1 |
+
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
2 |
+
//! search engines and then removes duplicate results.
|
3 |
+
|
4 |
use std::collections::HashMap;
|
5 |
|
6 |
use super::{
|
|
|
10 |
|
11 |
use crate::engines::{duckduckgo, searx};
|
12 |
|
13 |
+
/// A function that aggregates all the scraped results from the above upstream engines and
|
14 |
+
/// then removes duplicate results and if two results are found to be from two or more engines
|
15 |
+
/// then puts their names together to show the results are fetched from these upstream engines
|
16 |
+
/// and then removes all data from the HashMap and puts into a struct of all results aggregated
|
17 |
+
/// into a vector and also adds the query used into the struct this is neccessory because
|
18 |
+
/// otherwise the search bar in search remains empty if searched from the query url
|
19 |
+
///
|
20 |
+
/// # Example:
|
21 |
+
///
|
22 |
+
/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
|
23 |
+
/// contain the word huston and not remain empty.
|
24 |
+
///
|
25 |
+
/// # Arguments
|
26 |
+
///
|
27 |
+
/// * `query` - Accepts a string to query with the above upstream search engines.
|
28 |
+
/// * `page` - Accepts an Option<u32> which could either be a None or a valid page number.
|
29 |
+
///
|
30 |
+
/// # Error
|
31 |
+
///
|
32 |
+
/// Returns an error a reqwest and scraping selector errors if any error occurs in the results
|
33 |
+
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
34 |
+
/// containing appropriate values.
|
35 |
pub async fn aggregate(
|
36 |
query: &str,
|
37 |
page: Option<u32>,
|
src/search_results_handler/user_agent.rs
CHANGED
@@ -1,6 +1,12 @@
|
|
|
|
|
|
1 |
use fake_useragent::{Browsers, UserAgentsBuilder};
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
4 |
pub fn random_user_agent() -> String {
|
5 |
UserAgentsBuilder::new()
|
6 |
.cache(false)
|
|
|
1 |
+
//! This module provides the functionality to generate random user agent string.
|
2 |
+
|
3 |
use fake_useragent::{Browsers, UserAgentsBuilder};
|
4 |
|
5 |
+
/// A function to generate random user agent to improve privacy of the user.
|
6 |
+
///
|
7 |
+
/// # Returns
|
8 |
+
///
|
9 |
+
/// A randomly generated user agent string.
|
10 |
pub fn random_user_agent() -> String {
|
11 |
UserAgentsBuilder::new()
|
12 |
.cache(false)
|
src/server/routes.rs
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
use std::fs::read_to_string;
|
2 |
|
3 |
use crate::search_results_handler::aggregator::aggregate;
|
@@ -5,12 +9,21 @@ use actix_web::{get, web, HttpRequest, HttpResponse};
|
|
5 |
use handlebars::Handlebars;
|
6 |
use serde::Deserialize;
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
#[derive(Debug, Deserialize)]
|
9 |
struct SearchParams {
|
10 |
q: Option<String>,
|
11 |
page: Option<u32>,
|
12 |
}
|
13 |
|
|
|
14 |
#[get("/")]
|
15 |
pub async fn index(
|
16 |
hbs: web::Data<Handlebars<'_>>,
|
@@ -19,6 +32,8 @@ pub async fn index(
|
|
19 |
Ok(HttpResponse::Ok().body(page_content))
|
20 |
}
|
21 |
|
|
|
|
|
22 |
pub async fn not_found(
|
23 |
hbs: web::Data<Handlebars<'_>>,
|
24 |
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
@@ -29,6 +44,20 @@ pub async fn not_found(
|
|
29 |
.body(page_content))
|
30 |
}
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
#[get("/search")]
|
33 |
pub async fn search(
|
34 |
hbs: web::Data<Handlebars<'_>>,
|
@@ -54,6 +83,7 @@ pub async fn search(
|
|
54 |
}
|
55 |
}
|
56 |
|
|
|
57 |
#[get("/robots.txt")]
|
58 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
59 |
let page_content: String = read_to_string("./public/robots.txt")?;
|
@@ -62,6 +92,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
|
|
62 |
.body(page_content))
|
63 |
}
|
64 |
|
|
|
65 |
#[get("/about")]
|
66 |
pub async fn about(
|
67 |
hbs: web::Data<Handlebars<'_>>,
|
@@ -70,6 +101,7 @@ pub async fn about(
|
|
70 |
Ok(HttpResponse::Ok().body(page_content))
|
71 |
}
|
72 |
|
|
|
73 |
#[get("/settings")]
|
74 |
pub async fn settings(
|
75 |
hbs: web::Data<Handlebars<'_>>,
|
@@ -77,3 +109,6 @@ pub async fn settings(
|
|
77 |
let page_content: String = hbs.render("settings", &"")?;
|
78 |
Ok(HttpResponse::Ok().body(page_content))
|
79 |
}
|
|
|
|
|
|
|
|
1 |
+
//! This module provides the functionality to handle different routes of the `websurfx`
|
2 |
+
//! meta search engine website and provide approriate response to each route/page
|
3 |
+
//! when requested.
|
4 |
+
|
5 |
use std::fs::read_to_string;
|
6 |
|
7 |
use crate::search_results_handler::aggregator::aggregate;
|
|
|
9 |
use handlebars::Handlebars;
|
10 |
use serde::Deserialize;
|
11 |
|
12 |
+
/// A named struct which deserializes all the user provided search parameters and stores them.
|
13 |
+
///
|
14 |
+
/// # Fields
|
15 |
+
///
|
16 |
+
/// * `q` - It stores the search parameter option `q` (or query in simple words)
|
17 |
+
/// of the search url.
|
18 |
+
/// * `page` - It stores the search parameter `page` (or pageno in simple words)
|
19 |
+
/// of the search url.
|
20 |
#[derive(Debug, Deserialize)]
|
21 |
struct SearchParams {
|
22 |
q: Option<String>,
|
23 |
page: Option<u32>,
|
24 |
}
|
25 |
|
26 |
+
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
27 |
#[get("/")]
|
28 |
pub async fn index(
|
29 |
hbs: web::Data<Handlebars<'_>>,
|
|
|
32 |
Ok(HttpResponse::Ok().body(page_content))
|
33 |
}
|
34 |
|
35 |
+
/// Handles the route of any other accessed route/page which is not provided by the
|
36 |
+
/// website essentially the 404 error page.
|
37 |
pub async fn not_found(
|
38 |
hbs: web::Data<Handlebars<'_>>,
|
39 |
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
|
|
44 |
.body(page_content))
|
45 |
}
|
46 |
|
47 |
+
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
48 |
+
/// two search url parameters `q` and `page` where `page` parameter is optional.
|
49 |
+
///
|
50 |
+
/// # Example
|
51 |
+
///
|
52 |
+
/// ```bash
|
53 |
+
/// curl "http://127.0.0.1:8080/search?q=sweden&page=1"
|
54 |
+
/// ```
|
55 |
+
///
|
56 |
+
/// Or
|
57 |
+
///
|
58 |
+
/// ```bash
|
59 |
+
/// curl "http://127.0.0.1:8080/search?q=sweden"
|
60 |
+
/// ```
|
61 |
#[get("/search")]
|
62 |
pub async fn search(
|
63 |
hbs: web::Data<Handlebars<'_>>,
|
|
|
83 |
}
|
84 |
}
|
85 |
|
86 |
+
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
87 |
#[get("/robots.txt")]
|
88 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
89 |
let page_content: String = read_to_string("./public/robots.txt")?;
|
|
|
92 |
.body(page_content))
|
93 |
}
|
94 |
|
95 |
+
/// Handles the route of about page of the `websurfx` meta search engine website.
|
96 |
#[get("/about")]
|
97 |
pub async fn about(
|
98 |
hbs: web::Data<Handlebars<'_>>,
|
|
|
101 |
Ok(HttpResponse::Ok().body(page_content))
|
102 |
}
|
103 |
|
104 |
+
/// Handles the route of settings page of the `websurfx` meta search engine website.
|
105 |
#[get("/settings")]
|
106 |
pub async fn settings(
|
107 |
hbs: web::Data<Handlebars<'_>>,
|
|
|
109 |
let page_content: String = hbs.render("settings", &"")?;
|
110 |
Ok(HttpResponse::Ok().body(page_content))
|
111 |
}
|
112 |
+
|
113 |
+
// TODO: Write tests for tesing parameters for search function that if provided with something
|
114 |
+
// other than u32 like alphabets and special characters than it should panic
|