jkaczmarkiewicz commited on
Commit
27bc52c
1 Parent(s): fc3b416

✨ `Brave` for the search engine (#335)

Browse files

* feat: implement brave engine

* refactor: correct indentations in stylelint config

* docs: add dummy config option to config.lua

* feat: implement safe_search_level in brave engine

* refactor: move var to format

* fix: make strict search above level 1

.stylelintrc.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "extends": "stylelint-config-standard",
3
  "rules": {
4
  "alpha-value-notation": "number",
5
  "selector-class-pattern": null
6
  },
7
- "overrides": [
8
  {
9
  "files": ["*.js"],
10
  "customSyntax": "postcss-lit"
 
1
  {
2
+ "extends": "stylelint-config-standard",
3
  "rules": {
4
  "alpha-value-notation": "number",
5
  "selector-class-pattern": null
6
  },
7
+ "overrides": [
8
  {
9
  "files": ["*.js"],
10
  "customSyntax": "postcss-lit"
src/engines/brave.rs ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! The `brave` module handles the scraping of results from the brave search engine
2
+ //! by querying the upstream brave search engine with user provided query and with a page
3
+ //! number if provided.
4
+
5
+ use std::collections::HashMap;
6
+
7
+ use reqwest::header::HeaderMap;
8
+ use scraper::Html;
9
+
10
+ use crate::models::aggregation_models::SearchResult;
11
+ use error_stack::{Report, Result, ResultExt};
12
+
13
+ use crate::models::engine_models::{EngineError, SearchEngine};
14
+
15
+ use super::search_result_parser::SearchResultParser;
16
+
17
+ /// Scrapes the results from the Brave search engine.
18
+ pub struct Brave {
19
+ /// Utilises generic logic for parsing search results.
20
+ parser: SearchResultParser,
21
+ }
22
+
23
+ impl Brave {
24
+ /// Creates the Brave parser.
25
+ pub fn new() -> Result<Brave, EngineError> {
26
+ Ok(Self {
27
+ parser: SearchResultParser::new(
28
+ "#results h4",
29
+ "#results [data-pos]",
30
+ "a > .url",
31
+ "a",
32
+ ".snippet-description",
33
+ )?,
34
+ })
35
+ }
36
+ }
37
+
38
+ #[async_trait::async_trait]
39
+ impl SearchEngine for Brave {
40
+ async fn results(
41
+ &self,
42
+ query: &str,
43
+ page: u32,
44
+ user_agent: &str,
45
+ request_timeout: u8,
46
+ safe_search: u8,
47
+ ) -> Result<HashMap<String, SearchResult>, EngineError> {
48
+ let url = format!("https://search.brave.com/search?q={query}&offset={page}");
49
+
50
+ let safe_search_level = match safe_search {
51
+ 0 => "off",
52
+ 1 => "moderate",
53
+ _ => "strict",
54
+ };
55
+
56
+ let header_map = HeaderMap::try_from(&HashMap::from([
57
+ ("USER_AGENT".to_string(), user_agent.to_string()),
58
+ (
59
+ "CONTENT_TYPE".to_string(),
60
+ "application/x-www-form-urlencoded".to_string(),
61
+ ),
62
+ ("REFERER".to_string(), "https://google.com/".to_string()),
63
+ (
64
+ "COOKIE".to_string(),
65
+ format!("safe_search={safe_search_level}"),
66
+ ),
67
+ ]))
68
+ .change_context(EngineError::UnexpectedError)?;
69
+
70
+ let document: Html = Html::parse_document(
71
+ &Brave::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
72
+ );
73
+
74
+ if let Some(no_result_msg) = self.parser.parse_for_no_results(&document).nth(0) {
75
+ if no_result_msg
76
+ .inner_html()
77
+ .contains("Not many great matches came back for your search")
78
+ {
79
+ return Err(Report::new(EngineError::EmptyResultSet));
80
+ }
81
+ }
82
+
83
+ self.parser
84
+ .parse_for_results(&document, |title, url, desc| {
85
+ url.value().attr("href").map(|url| {
86
+ SearchResult::new(
87
+ title.text().collect::<Vec<_>>().join("").trim(),
88
+ url.trim(),
89
+ desc.inner_html().trim(),
90
+ &["brave"],
91
+ )
92
+ })
93
+ })
94
+ }
95
+ }
src/engines/mod.rs CHANGED
@@ -3,6 +3,7 @@
3
  //! provide a standard functions to be implemented for all the upstream search engine handling
4
  //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
 
 
6
  pub mod duckduckgo;
7
  pub mod search_result_parser;
8
  pub mod searx;
 
3
  //! provide a standard functions to be implemented for all the upstream search engine handling
4
  //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
 
6
+ pub mod brave;
7
  pub mod duckduckgo;
8
  pub mod search_result_parser;
9
  pub mod searx;
src/models/engine_models.rs CHANGED
@@ -150,6 +150,10 @@ impl EngineHandler {
150
  let engine = crate::engines::searx::Searx::new()?;
151
  ("searx", Box::new(engine))
152
  }
 
 
 
 
153
  _ => {
154
  return Err(Report::from(EngineError::NoSuchEngineFound(
155
  engine_name.to_string(),
 
150
  let engine = crate::engines::searx::Searx::new()?;
151
  ("searx", Box::new(engine))
152
  }
153
+ "brave" => {
154
+ let engine = crate::engines::brave::Brave::new()?;
155
+ ("brave", Box::new(engine))
156
+ }
157
  _ => {
158
  return Err(Report::from(EngineError::NoSuchEngineFound(
159
  engine_name.to_string(),
websurfx/config.lua CHANGED
@@ -52,4 +52,5 @@ redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which th
52
  upstream_search_engines = {
53
  DuckDuckGo = true,
54
  Searx = false,
 
55
  } -- select the upstream search engines from which the results should be fetched.
 
52
  upstream_search_engines = {
53
  DuckDuckGo = true,
54
  Searx = false,
55
+ Brave = false,
56
  } -- select the upstream search engines from which the results should be fetched.