Spaces:

alamin655
/

surfx

Running

App Files Files Community

neon_arch commited on Apr 27, 2023

Commit

fc69ace

1 Parent(s): ed13a16

Improving source code documentation.

Browse files

Files changed (7) hide show

src/bin/websurfx.rs +25 -4
src/engines/duckduckgo.rs +1 -1
src/engines/searx.rs +1 -1
src/search_results_handler/aggregation_models.rs +66 -0
src/search_results_handler/aggregator.rs +25 -11
src/search_results_handler/user_agent.rs +7 -1
src/server/routes.rs +35 -0

src/bin/websurfx.rs CHANGED Viewed

@@ -1,3 +1,8 @@
 use std::ops::RangeInclusive;
 use websurfx::server::routes;
@@ -8,6 +13,7 @@ use clap::{command, Parser};
 use env_logger::Env;
 use handlebars::Handlebars;
 #[derive(Parser, Debug, Default)]
 #[clap(author = "neon_arch", version, about = "Websurfx server application")]
 #[command(propagate_version = true)]
@@ -19,8 +25,18 @@ struct CliArgs {
 const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
-// A function to check whether port is valid u32 number or is in range
-// between [1024-65536] otherwise display an appropriate error message.
 fn is_port_in_range(s: &str) -> Result<u16, String> {
     let port: usize = s
         .parse()
@@ -36,7 +52,12 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
     }
 }
-// The function that launches the main server and handle routing functionality
 #[actix_web::main]
 async fn main() -> std::io::Result<()> {
     let args = CliArgs::parse();
@@ -68,7 +89,7 @@ async fn main() -> std::io::Result<()> {
             .service(routes::settings) // settings page
             .default_service(web::route().to(routes::not_found)) // error page
     })
-    // Start server on 127.0.0.1:8080
     .bind(("127.0.0.1", args.port))?
     .run()
     .await

+//! Main module of the application
+//!
+//! This module contains the main function which handles the logging of the application to the
+//! stdout and handles the command line arguments provided and launches the `websurfx` server.
 use std::ops::RangeInclusive;
 use websurfx::server::routes;
 use env_logger::Env;
 use handlebars::Handlebars;
+/// A commandline arguments struct.
 #[derive(Parser, Debug, Default)]
 #[clap(author = "neon_arch", version, about = "Websurfx server application")]
 #[command(propagate_version = true)]
 const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
+/// A function to check whether port is valid u32 number or is in range
+/// between [1024-65536] otherwise display an appropriate error message.
+///
+/// # Arguments
+///
+/// * `s` - Takes a commandline argument port as a string.
+///
+/// # Error
+///
+/// Check whether the provided argument to `--port` commandline option is a valid
+/// u16 argument and returns it as a u16 value otherwise returns an error with an
+/// appropriate error message.
 fn is_port_in_range(s: &str) -> Result<u16, String> {
     let port: usize = s
         .parse()
     }
 }
+/// The function that launches the main server and registers all the routes of the website.
+///
+/// # Error
+///
+/// Returns an error if the port is being used by something else on the system and is not
+/// available for being used for other applications.
 #[actix_web::main]
 async fn main() -> std::io::Result<()> {
     let args = CliArgs::parse();
             .service(routes::settings) // settings page
             .default_service(web::route().to(routes::not_found)) // error page
     })
+    // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
     .bind(("127.0.0.1", args.port))?
     .run()
     .await

src/engines/duckduckgo.rs CHANGED Viewed

@@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
 /// # Errors
 ///
 /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
-/// reach the above **upstream search engine** page and also returns error if the scraping
 /// selector fails to initialize"
 pub async fn results(
     query: &str,

 /// # Errors
 ///
 /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
+/// reach the above `upstream search engine` page and also returns error if the scraping
 /// selector fails to initialize"
 pub async fn results(
     query: &str,

src/engines/searx.rs CHANGED Viewed

@@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
 /// # Errors
 ///
 /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
-/// reach the above **upstream search engine** page and also returns error if the scraping
 /// selector fails to initialize"
 pub async fn results(
     query: &str,

 /// # Errors
 ///
 /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
+/// reach the above `upstream search engine` page and also returns error if the scraping
 /// selector fails to initialize"
 pub async fn results(
     query: &str,

src/search_results_handler/aggregation_models.rs CHANGED Viewed

@@ -1,5 +1,19 @@
 use serde::Serialize;
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResult {
@@ -11,6 +25,16 @@ pub struct SearchResult {
 }
 impl SearchResult {
     pub fn new(
         title: String,
         visiting_url: String,
@@ -28,6 +52,17 @@ impl SearchResult {
     }
 }
 #[derive(Clone)]
 pub struct RawSearchResult {
     pub title: String,
@@ -37,6 +72,15 @@ pub struct RawSearchResult {
 }
 impl RawSearchResult {
     pub fn new(
         title: String,
         visiting_url: String,
@@ -50,6 +94,12 @@ impl RawSearchResult {
             engine,
         }
     }
     pub fn add_engines(&mut self, engine: String) {
         self.engine.push(engine)
     }
@@ -59,6 +109,14 @@ impl RawSearchResult {
     }
 }
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
@@ -67,6 +125,14 @@ pub struct SearchResults {
 }
 impl SearchResults {
     pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
         SearchResults {
             results,

+//! This module provides public models for handling, storing and serializing of search results
+//! data scraped from the upstream search engines.
 use serde::Serialize;
+/// A named struct to store and serialize the individual search result from all the scraped
+/// and aggregated search results from the upstream search engines.
+///
+/// # Fields
+///
+/// * `title` - The title of the search result.
+/// * `visiting_url` - The url which is accessed when clicked on it (href url in html in simple
+/// words).
+/// * `url` - The url to be displayed below the search result title in html.
+/// * `description` - The description of the search result.
+/// * `engine` - The names of the upstream engines from which this results were provided.
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResult {
 }
 impl SearchResult {
+    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
+    ///
+    /// # Arguments
+    ///
+    /// * `title` - The title of the search result.
+    /// * `visiting_url` - The url which is accessed when clicked on it
+    /// (href url in html in simple words).
+    /// * `url` - The url to be displayed below the search result title in html.
+    /// * `description` - The description of the search result.
+    /// * `engine` - The names of the upstream engines from which this results were provided.
     pub fn new(
         title: String,
         visiting_url: String,
     }
 }
+/// A named struct to store the raw scraped search results scraped search results from the
+/// upstream search engines before aggregating it.It derives the Clone trait which is needed
+/// to write idiomatic rust using `Iterators`.
+///
+/// # Fields
+///
+/// * `title` - The title of the search result.
+/// * `visiting_url` - The url which is accessed when clicked on it
+/// (href url in html in simple words).
+/// * `description` - The description of the search result.
+/// * `engine` - The names of the upstream engines from which this results were provided.
 #[derive(Clone)]
 pub struct RawSearchResult {
     pub title: String,
 }
 impl RawSearchResult {
+    /// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
+    ///
+    /// # Arguments
+    ///
+    /// * `title` - The title of the search result.
+    /// * `visiting_url` - The url which is accessed when clicked on it
+    /// (href url in html in simple words).
+    /// * `description` - The description of the search result.
+    /// * `engine` - The names of the upstream engines from which this results were provided.
     pub fn new(
         title: String,
         visiting_url: String,
             engine,
         }
     }
+    /// A function which adds the engine name provided as a string into a vector of strings.
+    ///
+    /// # Arguments
+    ///
+    /// * `engine` - Takes an engine name provided as a String.
     pub fn add_engines(&mut self, engine: String) {
         self.engine.push(engine)
     }
     }
 }
+/// A named struct to store and serialize the all the search results scraped and aggregated
+/// from the upstream search engines.
+///
+/// # Fields
+///
+/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
+/// `SearchResult` structs.
+/// * `page_query` - Stores the current pages search query `q` provided in the search url.
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
 }
 impl SearchResults {
+    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
+    ///
+    /// # Arguments
+    ///
+    /// * `results` - Takes an argument of individual serializable `SearchResult` struct
+    /// and stores it into a vector of `SearchResult` structs.
+    /// * `page_query` - Takes an argument of current page`s search query `q` provided in
+    /// the search url.
     pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
         SearchResults {
             results,

src/search_results_handler/aggregator.rs CHANGED Viewed

@@ -1,3 +1,6 @@
 use std::collections::HashMap;
 use super::{
@@ -7,17 +10,28 @@ use super::{
 use crate::engines::{duckduckgo, searx};
-// A function that aggregates all the scraped results from the above upstream engines and
-// then removes duplicate results and if two results are found to be from two or more engines
-// then puts their names together to show the results are fetched from these upstream engines
-// and then removes all data from the HashMap and puts into a struct of all results aggregated
-// into a vector and also adds the query used into the struct this is neccessory because otherwise
-// the search bar in search remains empty if searched from the query url
-//
-// For Example:
-//
-// If you search from the url like *https://127.0.0.1/search?q=huston* then the search bar should
-// contain the word huston and not remain empty.
 pub async fn aggregate(
     query: &str,
     page: Option<u32>,

+//! This module provides the functionality to scrape and gathers all the results from the upstream
+//! search engines and then removes duplicate results.
 use std::collections::HashMap;
 use super::{
 use crate::engines::{duckduckgo, searx};
+/// A function that aggregates all the scraped results from the above upstream engines and
+/// then removes duplicate results and if two results are found to be from two or more engines
+/// then puts their names together to show the results are fetched from these upstream engines
+/// and then removes all data from the HashMap and puts into a struct of all results aggregated
+/// into a vector and also adds the query used into the struct this is neccessory because
+/// otherwise the search bar in search remains empty if searched from the query url
+///
+/// # Example:
+///
+/// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
+/// contain the word huston and not remain empty.
+///
+/// # Arguments
+///
+/// * `query` - Accepts a string to query with the above upstream search engines.
+/// * `page` - Accepts an Option<u32> which could either be a None or a valid page number.
+///
+/// # Error
+///
+/// Returns an error a reqwest and scraping selector errors if any error occurs in the results
+/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
+/// containing appropriate values.
 pub async fn aggregate(
     query: &str,
     page: Option<u32>,

src/search_results_handler/user_agent.rs CHANGED Viewed

@@ -1,6 +1,12 @@
 use fake_useragent::{Browsers, UserAgentsBuilder};
-// A function to generate random user agent to improve privacy of the user.
 pub fn random_user_agent() -> String {
     UserAgentsBuilder::new()
         .cache(false)

+//! This module provides the functionality to generate random user agent string.
 use fake_useragent::{Browsers, UserAgentsBuilder};
+/// A function to generate random user agent to improve privacy of the user.
+///
+/// # Returns
+///
+/// A randomly generated user agent string.
 pub fn random_user_agent() -> String {
     UserAgentsBuilder::new()
         .cache(false)

src/server/routes.rs CHANGED Viewed

@@ -1,3 +1,7 @@
 use std::fs::read_to_string;
 use crate::search_results_handler::aggregator::aggregate;
@@ -5,12 +9,21 @@ use actix_web::{get, web, HttpRequest, HttpResponse};
 use handlebars::Handlebars;
 use serde::Deserialize;
 #[derive(Debug, Deserialize)]
 struct SearchParams {
     q: Option<String>,
     page: Option<u32>,
 }
 #[get("/")]
 pub async fn index(
     hbs: web::Data<Handlebars<'_>>,
@@ -19,6 +32,8 @@ pub async fn index(
     Ok(HttpResponse::Ok().body(page_content))
 }
 pub async fn not_found(
     hbs: web::Data<Handlebars<'_>>,
 ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
@@ -29,6 +44,20 @@ pub async fn not_found(
         .body(page_content))
 }
 #[get("/search")]
 pub async fn search(
     hbs: web::Data<Handlebars<'_>>,
@@ -54,6 +83,7 @@ pub async fn search(
     }
 }
 #[get("/robots.txt")]
 pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
     let page_content: String = read_to_string("./public/robots.txt")?;
@@ -62,6 +92,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
         .body(page_content))
 }
 #[get("/about")]
 pub async fn about(
     hbs: web::Data<Handlebars<'_>>,
@@ -70,6 +101,7 @@ pub async fn about(
     Ok(HttpResponse::Ok().body(page_content))
 }
 #[get("/settings")]
 pub async fn settings(
     hbs: web::Data<Handlebars<'_>>,
@@ -77,3 +109,6 @@ pub async fn settings(
     let page_content: String = hbs.render("settings", &"")?;
     Ok(HttpResponse::Ok().body(page_content))
 }

+//! This module provides the functionality to handle different routes of the `websurfx`
+//! meta search engine website and provide approriate response to each route/page
+//! when requested.
 use std::fs::read_to_string;
 use crate::search_results_handler::aggregator::aggregate;
 use handlebars::Handlebars;
 use serde::Deserialize;
+/// A named struct which deserializes all the user provided search parameters and stores them.
+///
+/// # Fields
+///
+/// * `q` - It stores the search parameter option `q` (or query in simple words)
+/// of the search url.
+/// * `page` - It stores the search parameter `page` (or pageno in simple words)
+/// of the search url.
 #[derive(Debug, Deserialize)]
 struct SearchParams {
     q: Option<String>,
     page: Option<u32>,
 }
+/// Handles the route of index page or main page of the `websurfx` meta search engine website.
 #[get("/")]
 pub async fn index(
     hbs: web::Data<Handlebars<'_>>,
     Ok(HttpResponse::Ok().body(page_content))
 }
+/// Handles the route of any other accessed route/page which is not provided by the
+/// website essentially the 404 error page.
 pub async fn not_found(
     hbs: web::Data<Handlebars<'_>>,
 ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
         .body(page_content))
 }
+/// Handles the route of search page of the `websurfx` meta search engine website and it takes
+/// two search url parameters `q` and `page` where `page` parameter is optional.
+///
+/// # Example
+///
+/// ```bash
+/// curl "http://127.0.0.1:8080/search?q=sweden&page=1"
+/// ```
+///
+/// Or
+///
+/// ```bash
+/// curl "http://127.0.0.1:8080/search?q=sweden"
+/// ```
 #[get("/search")]
 pub async fn search(
     hbs: web::Data<Handlebars<'_>>,
     }
 }
+/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
 #[get("/robots.txt")]
 pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
     let page_content: String = read_to_string("./public/robots.txt")?;
         .body(page_content))
 }
+/// Handles the route of about page of the `websurfx` meta search engine website.
 #[get("/about")]
 pub async fn about(
     hbs: web::Data<Handlebars<'_>>,
     Ok(HttpResponse::Ok().body(page_content))
 }
+/// Handles the route of settings page of the `websurfx` meta search engine website.
 #[get("/settings")]
 pub async fn settings(
     hbs: web::Data<Handlebars<'_>>,
     let page_content: String = hbs.render("settings", &"")?;
     Ok(HttpResponse::Ok().body(page_content))
 }
+// TODO: Write tests for tesing parameters for search function that if provided with something
+// other than u32 like alphabets and special characters than it should panic