neon_arch commited on
Commit
f94ac50
·
1 Parent(s): bb61ee3

Refactoring code and separating code into files for better maintainability

Browse files
src/bin/websurfx.rs CHANGED
@@ -19,6 +19,8 @@ struct CliArgs {
19
 
20
  const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
21
 
 
 
22
  fn is_port_in_range(s: &str) -> Result<u16, String> {
23
  let port: usize = s
24
  .parse()
@@ -39,6 +41,7 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
39
  async fn main() -> std::io::Result<()> {
40
  let args = CliArgs::parse();
41
 
 
42
  env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
43
 
44
  log::info!("started server on port {}", args.port);
@@ -54,7 +57,7 @@ async fn main() -> std::io::Result<()> {
54
  HttpServer::new(move || {
55
  App::new()
56
  .app_data(handlebars_ref.clone())
57
- .wrap(Logger::default())
58
  // Serve images and static files (css and js files).
59
  .service(fs::Files::new("/static", "./public/static").show_files_listing())
60
  .service(fs::Files::new("/images", "./public/images").show_files_listing())
 
19
 
20
  const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
21
 
22
+ // A function to check whether port is valid u32 number or is in range
23
+ // between [1024-65536] otherwise display an appropriate error message.
24
  fn is_port_in_range(s: &str) -> Result<u16, String> {
25
  let port: usize = s
26
  .parse()
 
41
  async fn main() -> std::io::Result<()> {
42
  let args = CliArgs::parse();
43
 
44
+ // Initializing logging middleware with level set to default or info.
45
  env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
46
 
47
  log::info!("started server on port {}", args.port);
 
57
  HttpServer::new(move || {
58
  App::new()
59
  .app_data(handlebars_ref.clone())
60
+ .wrap(Logger::default()) // added logging middleware for logging.
61
  // Serve images and static files (css and js files).
62
  .service(fs::Files::new("/static", "./public/static").show_files_listing())
63
  .service(fs::Files::new("/images", "./public/images").show_files_listing())
src/engines/duckduckgo.rs CHANGED
@@ -48,49 +48,37 @@ pub async fn results(
48
  let result_url: Selector = Selector::parse(".result__url")?;
49
  let result_desc: Selector = Selector::parse(".result__snippet")?;
50
 
51
- let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
52
-
53
  // scrape all the results from the html
54
- for result in document.select(&results) {
55
- let search_result: RawSearchResult = RawSearchResult {
56
- title: result
57
- .select(&result_title)
58
- .next()
59
- .unwrap()
60
- .inner_html()
61
- .trim()
62
- .to_string(),
63
- visiting_url: format!(
64
- "https://{}",
65
  result
66
- .select(&result_url)
67
  .next()
68
  .unwrap()
69
  .inner_html()
70
  .trim()
71
- ),
72
- description: result
73
- .select(&result_desc)
74
- .next()
75
- .unwrap()
76
- .inner_html()
77
- .trim()
78
- .to_string(),
79
- engine: vec!["duckduckgo".to_string()],
80
- };
81
- search_results.insert(
82
- format!(
83
- "https://{}",
84
  result
85
- .select(&result_url)
86
  .next()
87
  .unwrap()
88
  .inner_html()
89
  .trim()
90
- ),
91
- search_result,
92
- );
93
- }
94
-
95
- Ok(search_results)
96
  }
 
48
  let result_url: Selector = Selector::parse(".result__url")?;
49
  let result_desc: Selector = Selector::parse(".result__snippet")?;
50
 
 
 
51
  // scrape all the results from the html
52
+ Ok(document
53
+ .select(&results)
54
+ .map(|result| {
55
+ RawSearchResult::new(
 
 
 
 
 
 
 
56
  result
57
+ .select(&result_title)
58
  .next()
59
  .unwrap()
60
  .inner_html()
61
  .trim()
62
+ .to_string(),
63
+ format!(
64
+ "https://{}",
65
+ result
66
+ .select(&result_url)
67
+ .next()
68
+ .unwrap()
69
+ .inner_html()
70
+ .trim()
71
+ ),
 
 
 
72
  result
73
+ .select(&result_desc)
74
  .next()
75
  .unwrap()
76
  .inner_html()
77
  .trim()
78
+ .to_string(),
79
+ vec!["duckduckgo".to_string()],
80
+ )
81
+ })
82
+ .map(|search_result| (search_result.visiting_url.clone(), search_result))
83
+ .collect())
84
  }
src/engines/searx.rs CHANGED
@@ -43,47 +43,36 @@ pub async fn results(
43
  let result_url: Selector = Selector::parse("h3>a")?;
44
  let result_desc: Selector = Selector::parse(".content")?;
45
 
46
- let mut search_results: HashMap<String, RawSearchResult> = HashMap::new();
47
-
48
  // scrape all the results from the html
49
- for result in document.select(&results) {
50
- let search_result: RawSearchResult = RawSearchResult {
51
- title: result
52
- .select(&result_title)
53
- .next()
54
- .unwrap()
55
- .inner_html()
56
- .trim()
57
- .to_string(),
58
- visiting_url: result
59
- .select(&result_url)
60
- .next()
61
- .unwrap()
62
- .value()
63
- .attr("href")
64
- .unwrap()
65
- .to_string(),
66
- description: result
67
- .select(&result_desc)
68
- .next()
69
- .unwrap()
70
- .inner_html()
71
- .trim()
72
- .to_string(),
73
- engine: vec!["searx".to_string()],
74
- };
75
- search_results.insert(
76
- result
77
- .select(&result_url)
78
- .next()
79
- .unwrap()
80
- .value()
81
- .attr("href")
82
- .unwrap()
83
- .to_string(),
84
- search_result,
85
- );
86
- }
87
-
88
- Ok(search_results)
89
  }
 
43
  let result_url: Selector = Selector::parse("h3>a")?;
44
  let result_desc: Selector = Selector::parse(".content")?;
45
 
 
 
46
  // scrape all the results from the html
47
+ Ok(document
48
+ .select(&results)
49
+ .map(|result| {
50
+ RawSearchResult::new(
51
+ result
52
+ .select(&result_title)
53
+ .next()
54
+ .unwrap()
55
+ .inner_html()
56
+ .trim()
57
+ .to_string(),
58
+ result
59
+ .select(&result_url)
60
+ .next()
61
+ .unwrap()
62
+ .value()
63
+ .attr("href")
64
+ .unwrap()
65
+ .to_string(),
66
+ result
67
+ .select(&result_desc)
68
+ .next()
69
+ .unwrap()
70
+ .inner_html()
71
+ .trim()
72
+ .to_string(),
73
+ vec!["searx".to_string()],
74
+ )
75
+ })
76
+ .map(|search_result| (search_result.visiting_url.clone(), search_result))
77
+ .collect())
 
 
 
 
 
 
 
 
 
78
  }
src/search_results_handler/aggregation_models.rs CHANGED
@@ -10,6 +10,24 @@ pub struct SearchResult {
10
  pub engine: Vec<String>,
11
  }
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  pub struct RawSearchResult {
14
  pub title: String,
15
  pub visiting_url: String,
@@ -17,9 +35,37 @@ pub struct RawSearchResult {
17
  pub engine: Vec<String>,
18
  }
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  #[derive(Debug, Serialize)]
21
  #[serde(rename_all = "camelCase")]
22
  pub struct SearchResults {
23
  pub results: Vec<SearchResult>,
24
  pub page_query: String,
25
  }
 
 
 
 
 
 
 
 
 
 
10
  pub engine: Vec<String>,
11
  }
12
 
13
+ impl SearchResult {
14
+ pub fn new(
15
+ title: String,
16
+ visiting_url: String,
17
+ url: String,
18
+ description: String,
19
+ engine: Vec<String>,
20
+ ) -> Self {
21
+ SearchResult {
22
+ title,
23
+ visiting_url,
24
+ url,
25
+ description,
26
+ engine,
27
+ }
28
+ }
29
+ }
30
+
31
  pub struct RawSearchResult {
32
  pub title: String,
33
  pub visiting_url: String,
 
35
  pub engine: Vec<String>,
36
  }
37
 
38
+ impl RawSearchResult {
39
+ pub fn new(
40
+ title: String,
41
+ visiting_url: String,
42
+ description: String,
43
+ engine: Vec<String>,
44
+ ) -> Self {
45
+ RawSearchResult {
46
+ title,
47
+ visiting_url,
48
+ description,
49
+ engine,
50
+ }
51
+ }
52
+ pub fn add_engines(&mut self, engine: String) {
53
+ self.engine.push(engine)
54
+ }
55
+ }
56
+
57
  #[derive(Debug, Serialize)]
58
  #[serde(rename_all = "camelCase")]
59
  pub struct SearchResults {
60
  pub results: Vec<SearchResult>,
61
  pub page_query: String,
62
  }
63
+
64
+ impl SearchResults {
65
+ pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
66
+ SearchResults {
67
+ results,
68
+ page_query,
69
+ }
70
+ }
71
+ }
src/search_results_handler/aggregator.rs CHANGED
@@ -1,8 +1,10 @@
1
  use std::collections::HashMap;
2
 
3
- use fake_useragent::{Browsers, UserAgentsBuilder};
 
 
 
4
 
5
- use super::aggregation_models::{RawSearchResult, SearchResult, SearchResults};
6
  use crate::engines::{duckduckgo, searx};
7
 
8
  // A function that aggregates all the scraped results from the above upstream engines and
@@ -20,23 +22,7 @@ pub async fn aggregate(
20
  query: &str,
21
  page: Option<u32>,
22
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
23
- // Generate random user agent to improve privacy of the user.
24
- let user_agent: String = UserAgentsBuilder::new()
25
- .cache(false)
26
- .dir("/tmp")
27
- .thread(1)
28
- .set_browsers(
29
- Browsers::new()
30
- .set_chrome()
31
- .set_safari()
32
- .set_edge()
33
- .set_firefox()
34
- .set_mozilla(),
35
- )
36
- .build()
37
- .random()
38
- .to_string();
39
-
40
  let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
41
 
42
  let ddg_map_results: HashMap<String, RawSearchResult> =
@@ -46,32 +32,35 @@ pub async fn aggregate(
46
 
47
  result_map.extend(ddg_map_results);
48
 
49
- for (key, value) in searx_map_results.into_iter() {
50
- if result_map.contains_key(&key) {
51
- result_map
52
- .get_mut(&key)
53
- .unwrap()
54
- .engine
55
- .push(value.engine.get(0).unwrap().to_string())
56
- } else {
57
- result_map.insert(key, value);
58
- }
59
- }
60
-
61
- let mut search_results: Vec<SearchResult> = Vec::new();
62
-
63
- for (key, value) in result_map.into_iter() {
64
- search_results.push(SearchResult {
65
- title: value.title,
66
- visiting_url: value.visiting_url,
67
- url: key,
68
- description: value.description,
69
- engine: value.engine,
70
- })
71
- }
72
 
73
- Ok(SearchResults {
74
- results: search_results,
75
- page_query: query.to_string(),
76
- })
 
 
 
 
 
 
 
 
 
 
 
77
  }
 
1
  use std::collections::HashMap;
2
 
3
+ use super::{
4
+ aggregation_models::{RawSearchResult, SearchResult, SearchResults},
5
+ user_agent::random_user_agent,
6
+ };
7
 
 
8
  use crate::engines::{duckduckgo, searx};
9
 
10
  // A function that aggregates all the scraped results from the above upstream engines and
 
22
  query: &str,
23
  page: Option<u32>,
24
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
25
+ let user_agent: String = random_user_agent();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
27
 
28
  let ddg_map_results: HashMap<String, RawSearchResult> =
 
32
 
33
  result_map.extend(ddg_map_results);
34
 
35
+ searx_map_results.into_iter().for_each(|(key, value)| {
36
+ result_map
37
+ .entry(key)
38
+ .and_modify(|result| {
39
+ result.add_engines(value.engine[0].clone());
40
+ })
41
+ .or_insert_with(|| -> RawSearchResult {
42
+ RawSearchResult::new(
43
+ value.title.clone(),
44
+ value.visiting_url.clone(),
45
+ value.description.clone(),
46
+ value.engine.clone(),
47
+ )
48
+ });
49
+ });
 
 
 
 
 
 
 
 
50
 
51
+ Ok(SearchResults::new(
52
+ result_map
53
+ .into_iter()
54
+ .map(|(key, value)| {
55
+ SearchResult::new(
56
+ value.title,
57
+ value.visiting_url,
58
+ key,
59
+ value.description,
60
+ value.engine,
61
+ )
62
+ })
63
+ .collect(),
64
+ query.to_string(),
65
+ ))
66
  }
src/search_results_handler/mod.rs CHANGED
@@ -1,2 +1,3 @@
1
  pub mod aggregation_models;
2
  pub mod aggregator;
 
 
1
  pub mod aggregation_models;
2
  pub mod aggregator;
3
+ pub mod user_agent;
src/search_results_handler/user_agent.rs ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ use fake_useragent::{Browsers, UserAgentsBuilder};
2
+
3
+ // A function to generate random user agent to improve privacy of the user.
4
+ pub fn random_user_agent() -> String {
5
+ UserAgentsBuilder::new()
6
+ .cache(false)
7
+ .dir("/tmp")
8
+ .thread(1)
9
+ .set_browsers(
10
+ Browsers::new()
11
+ .set_chrome()
12
+ .set_safari()
13
+ .set_edge()
14
+ .set_firefox()
15
+ .set_mozilla(),
16
+ )
17
+ .build()
18
+ .random()
19
+ .to_string()
20
+ }