alamin655 commited on
Commit
86991a2
·
unverified ·
2 Parent(s): 9fec52f e581de3

Merge branch 'rolling' into feat-rate-limiter-for-websurfx

Browse files
.gitignore CHANGED
@@ -4,3 +4,4 @@ package-lock.json
4
  dump.rdb
5
  .vscode
6
  megalinter-reports/
 
 
4
  dump.rdb
5
  .vscode
6
  megalinter-reports/
7
+ dhat-heap.json
Cargo.lock CHANGED
@@ -300,12 +300,24 @@ version = "1.0.75"
300
  source = "registry+https://github.com/rust-lang/crates.io-index"
301
  checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
302
 
 
 
 
 
 
 
303
  [[package]]
304
  name = "askama_escape"
305
  version = "0.10.3"
306
  source = "registry+https://github.com/rust-lang/crates.io-index"
307
  checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
308
 
 
 
 
 
 
 
309
  [[package]]
310
  name = "async-trait"
311
  version = "0.1.73"
@@ -571,7 +583,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
571
  checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
572
  dependencies = [
573
  "bytes 1.4.0",
 
574
  "memchr",
 
 
 
575
  ]
576
 
577
  [[package]]
@@ -845,6 +861,22 @@ dependencies = [
845
  "syn 1.0.109",
846
  ]
847
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
848
  [[package]]
849
  name = "digest"
850
  version = "0.10.7"
@@ -1630,6 +1662,16 @@ version = "0.2.147"
1630
  source = "registry+https://github.com/rust-lang/crates.io-index"
1631
  checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
1632
 
 
 
 
 
 
 
 
 
 
 
1633
  [[package]]
1634
  name = "linux-raw-sys"
1635
  version = "0.4.5"
@@ -1767,6 +1809,15 @@ dependencies = [
1767
  "autocfg 1.1.0",
1768
  ]
1769
 
 
 
 
 
 
 
 
 
 
1770
  [[package]]
1771
  name = "mime"
1772
  version = "0.3.17"
@@ -1792,6 +1843,16 @@ dependencies = [
1792
  "adler",
1793
  ]
1794
 
 
 
 
 
 
 
 
 
 
 
1795
  [[package]]
1796
  name = "mio"
1797
  version = "0.6.23"
@@ -1835,6 +1896,20 @@ dependencies = [
1835
  "ws2_32-sys",
1836
  ]
1837
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1838
  [[package]]
1839
  name = "native-tls"
1840
  version = "0.2.11"
@@ -2202,6 +2277,26 @@ dependencies = [
2202
  "siphasher 0.3.11",
2203
  ]
2204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2205
  [[package]]
2206
  name = "pin-project-lite"
2207
  version = "0.2.13"
@@ -2504,12 +2599,21 @@ version = "0.23.3"
2504
  source = "registry+https://github.com/rust-lang/crates.io-index"
2505
  checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
2506
  dependencies = [
 
 
 
2507
  "combine",
 
 
2508
  "itoa 1.0.9",
2509
  "percent-encoding 2.3.0",
 
2510
  "ryu",
2511
  "sha1_smol",
2512
  "socket2 0.4.9",
 
 
 
2513
  "url 2.4.1",
2514
  ]
2515
 
@@ -2628,36 +2732,18 @@ dependencies = [
2628
  "winreg 0.50.0",
2629
  ]
2630
 
2631
- [[package]]
2632
- name = "rlua"
2633
- version = "0.19.7"
2634
- source = "registry+https://github.com/rust-lang/crates.io-index"
2635
- checksum = "5d33e5ba15c3d43178f283ed5863d4531e292fc0e56fb773f3bea45f18e3a42a"
2636
- dependencies = [
2637
- "bitflags 1.3.2",
2638
- "bstr",
2639
- "libc",
2640
- "num-traits",
2641
- "rlua-lua54-sys",
2642
- ]
2643
-
2644
- [[package]]
2645
- name = "rlua-lua54-sys"
2646
- version = "0.1.6"
2647
- source = "registry+https://github.com/rust-lang/crates.io-index"
2648
- checksum = "7aafabafe1895cb4a2be81a56d7ff3d46bf4b5d2f9cfdbea2ed404cdabe96474"
2649
- dependencies = [
2650
- "cc",
2651
- "libc",
2652
- "pkg-config",
2653
- ]
2654
-
2655
  [[package]]
2656
  name = "rustc-demangle"
2657
  version = "0.1.23"
2658
  source = "registry+https://github.com/rust-lang/crates.io-index"
2659
  checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
2660
 
 
 
 
 
 
 
2661
  [[package]]
2662
  name = "rustc_version"
2663
  version = "0.2.3"
@@ -2957,6 +3043,9 @@ name = "smallvec"
2957
  version = "1.11.0"
2958
  source = "registry+https://github.com/rust-lang/crates.io-index"
2959
  checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
 
 
 
2960
 
2961
  [[package]]
2962
  name = "socket2"
@@ -3098,6 +3187,16 @@ dependencies = [
3098
  "unicode-xid 0.2.4",
3099
  ]
3100
 
 
 
 
 
 
 
 
 
 
 
3101
  [[package]]
3102
  name = "tempfile"
3103
  version = "3.8.0"
@@ -3151,6 +3250,12 @@ dependencies = [
3151
  "syn 2.0.29",
3152
  ]
3153
 
 
 
 
 
 
 
3154
  [[package]]
3155
  name = "time"
3156
  version = "0.1.45"
@@ -3335,6 +3440,17 @@ dependencies = [
3335
  "tokio-sync",
3336
  ]
3337
 
 
 
 
 
 
 
 
 
 
 
 
3338
  [[package]]
3339
  name = "tokio-sync"
3340
  version = "0.1.8"
@@ -3688,24 +3804,29 @@ dependencies = [
3688
  "actix-files",
3689
  "actix-governor",
3690
  "actix-web",
 
3691
  "async-trait",
3692
  "criterion",
 
3693
  "env_logger",
3694
  "error-stack",
3695
  "fake-useragent",
 
3696
  "handlebars",
3697
  "log",
3698
  "md5",
 
 
3699
  "once_cell",
3700
  "rand 0.8.5",
3701
  "redis",
3702
  "regex",
3703
  "reqwest 0.11.20",
3704
- "rlua",
3705
  "rusty-hook",
3706
  "scraper",
3707
  "serde",
3708
  "serde_json",
 
3709
  "tempfile",
3710
  "tokio 1.32.0",
3711
  ]
 
300
  source = "registry+https://github.com/rust-lang/crates.io-index"
301
  checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
302
 
303
+ [[package]]
304
+ name = "arc-swap"
305
+ version = "1.6.0"
306
+ source = "registry+https://github.com/rust-lang/crates.io-index"
307
+ checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
308
+
309
  [[package]]
310
  name = "askama_escape"
311
  version = "0.10.3"
312
  source = "registry+https://github.com/rust-lang/crates.io-index"
313
  checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
314
 
315
+ [[package]]
316
+ name = "async-once-cell"
317
+ version = "0.5.3"
318
+ source = "registry+https://github.com/rust-lang/crates.io-index"
319
+ checksum = "9338790e78aa95a416786ec8389546c4b6a1dfc3dc36071ed9518a9413a542eb"
320
+
321
  [[package]]
322
  name = "async-trait"
323
  version = "0.1.73"
 
583
  checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
584
  dependencies = [
585
  "bytes 1.4.0",
586
+ "futures-core",
587
  "memchr",
588
+ "pin-project-lite",
589
+ "tokio 1.32.0",
590
+ "tokio-util",
591
  ]
592
 
593
  [[package]]
 
861
  "syn 1.0.109",
862
  ]
863
 
864
+ [[package]]
865
+ name = "dhat"
866
+ version = "0.3.2"
867
+ source = "registry+https://github.com/rust-lang/crates.io-index"
868
+ checksum = "4f2aaf837aaf456f6706cb46386ba8dffd4013a757e36f4ea05c20dd46b209a3"
869
+ dependencies = [
870
+ "backtrace",
871
+ "lazy_static",
872
+ "mintex",
873
+ "parking_lot 0.12.1",
874
+ "rustc-hash",
875
+ "serde",
876
+ "serde_json",
877
+ "thousands",
878
+ ]
879
+
880
  [[package]]
881
  name = "digest"
882
  version = "0.10.7"
 
1662
  source = "registry+https://github.com/rust-lang/crates.io-index"
1663
  checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
1664
 
1665
+ [[package]]
1666
+ name = "libmimalloc-sys"
1667
+ version = "0.1.34"
1668
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1669
+ checksum = "25d058a81af0d1c22d7a1c948576bee6d673f7af3c0f35564abd6c81122f513d"
1670
+ dependencies = [
1671
+ "cc",
1672
+ "libc",
1673
+ ]
1674
+
1675
  [[package]]
1676
  name = "linux-raw-sys"
1677
  version = "0.4.5"
 
1809
  "autocfg 1.1.0",
1810
  ]
1811
 
1812
+ [[package]]
1813
+ name = "mimalloc"
1814
+ version = "0.1.38"
1815
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1816
+ checksum = "972e5f23f6716f62665760b0f4cbf592576a80c7b879ba9beaafc0e558894127"
1817
+ dependencies = [
1818
+ "libmimalloc-sys",
1819
+ ]
1820
+
1821
  [[package]]
1822
  name = "mime"
1823
  version = "0.3.17"
 
1843
  "adler",
1844
  ]
1845
 
1846
+ [[package]]
1847
+ name = "mintex"
1848
+ version = "0.1.2"
1849
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1850
+ checksum = "fd7c5ba1c3b5a23418d7bbf98c71c3d4946a0125002129231da8d6b723d559cb"
1851
+ dependencies = [
1852
+ "once_cell",
1853
+ "sys-info",
1854
+ ]
1855
+
1856
  [[package]]
1857
  name = "mio"
1858
  version = "0.6.23"
 
1896
  "ws2_32-sys",
1897
  ]
1898
 
1899
+ [[package]]
1900
+ name = "mlua"
1901
+ version = "0.8.10"
1902
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1903
+ checksum = "0bb37b0ba91f017aa7ca2b98ef99496827770cd635b4a932a6047c5b4bbe678e"
1904
+ dependencies = [
1905
+ "bstr",
1906
+ "cc",
1907
+ "num-traits",
1908
+ "once_cell",
1909
+ "pkg-config",
1910
+ "rustc-hash",
1911
+ ]
1912
+
1913
  [[package]]
1914
  name = "native-tls"
1915
  version = "0.2.11"
 
2277
  "siphasher 0.3.11",
2278
  ]
2279
 
2280
+ [[package]]
2281
+ name = "pin-project"
2282
+ version = "1.1.3"
2283
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2284
+ checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
2285
+ dependencies = [
2286
+ "pin-project-internal",
2287
+ ]
2288
+
2289
+ [[package]]
2290
+ name = "pin-project-internal"
2291
+ version = "1.1.3"
2292
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2293
+ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
2294
+ dependencies = [
2295
+ "proc-macro2 1.0.66",
2296
+ "quote 1.0.33",
2297
+ "syn 2.0.29",
2298
+ ]
2299
+
2300
  [[package]]
2301
  name = "pin-project-lite"
2302
  version = "0.2.13"
 
2599
  source = "registry+https://github.com/rust-lang/crates.io-index"
2600
  checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
2601
  dependencies = [
2602
+ "arc-swap",
2603
+ "async-trait",
2604
+ "bytes 1.4.0",
2605
  "combine",
2606
+ "futures 0.3.28",
2607
+ "futures-util",
2608
  "itoa 1.0.9",
2609
  "percent-encoding 2.3.0",
2610
+ "pin-project-lite",
2611
  "ryu",
2612
  "sha1_smol",
2613
  "socket2 0.4.9",
2614
+ "tokio 1.32.0",
2615
+ "tokio-retry",
2616
+ "tokio-util",
2617
  "url 2.4.1",
2618
  ]
2619
 
 
2732
  "winreg 0.50.0",
2733
  ]
2734
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2735
  [[package]]
2736
  name = "rustc-demangle"
2737
  version = "0.1.23"
2738
  source = "registry+https://github.com/rust-lang/crates.io-index"
2739
  checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
2740
 
2741
+ [[package]]
2742
+ name = "rustc-hash"
2743
+ version = "1.1.0"
2744
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2745
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
2746
+
2747
  [[package]]
2748
  name = "rustc_version"
2749
  version = "0.2.3"
 
3043
  version = "1.11.0"
3044
  source = "registry+https://github.com/rust-lang/crates.io-index"
3045
  checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
3046
+ dependencies = [
3047
+ "serde",
3048
+ ]
3049
 
3050
  [[package]]
3051
  name = "socket2"
 
3187
  "unicode-xid 0.2.4",
3188
  ]
3189
 
3190
+ [[package]]
3191
+ name = "sys-info"
3192
+ version = "0.9.1"
3193
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3194
+ checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
3195
+ dependencies = [
3196
+ "cc",
3197
+ "libc",
3198
+ ]
3199
+
3200
  [[package]]
3201
  name = "tempfile"
3202
  version = "3.8.0"
 
3250
  "syn 2.0.29",
3251
  ]
3252
 
3253
+ [[package]]
3254
+ name = "thousands"
3255
+ version = "0.2.0"
3256
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3257
+ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
3258
+
3259
  [[package]]
3260
  name = "time"
3261
  version = "0.1.45"
 
3440
  "tokio-sync",
3441
  ]
3442
 
3443
+ [[package]]
3444
+ name = "tokio-retry"
3445
+ version = "0.3.0"
3446
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3447
+ checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f"
3448
+ dependencies = [
3449
+ "pin-project",
3450
+ "rand 0.8.5",
3451
+ "tokio 1.32.0",
3452
+ ]
3453
+
3454
  [[package]]
3455
  name = "tokio-sync"
3456
  version = "0.1.8"
 
3804
  "actix-files",
3805
  "actix-governor",
3806
  "actix-web",
3807
+ "async-once-cell",
3808
  "async-trait",
3809
  "criterion",
3810
+ "dhat",
3811
  "env_logger",
3812
  "error-stack",
3813
  "fake-useragent",
3814
+ "futures 0.3.28",
3815
  "handlebars",
3816
  "log",
3817
  "md5",
3818
+ "mimalloc",
3819
+ "mlua",
3820
  "once_cell",
3821
  "rand 0.8.5",
3822
  "redis",
3823
  "regex",
3824
  "reqwest 0.11.20",
 
3825
  "rusty-hook",
3826
  "scraper",
3827
  "serde",
3828
  "serde_json",
3829
+ "smallvec 1.11.0",
3830
  "tempfile",
3831
  "tokio 1.32.0",
3832
  ]
Cargo.toml CHANGED
@@ -8,7 +8,7 @@ license = "AGPL-3.0"
8
 
9
  [dependencies]
10
  reqwest = {version="0.11.20",features=["json"]}
11
- tokio = {version="1.32.0",features=["full"]}
12
  serde = {version="1.0.188",features=["derive"]}
13
  handlebars = { version = "4.4.0", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
@@ -48,13 +48,17 @@ rpath = false
48
 
49
  [profile.release]
50
  opt-level = 3
51
- debug = false
 
52
  split-debuginfo = '...'
53
  debug-assertions = false
54
  overflow-checks = false
55
- lto = 'thin'
56
  panic = 'abort'
57
  incremental = false
58
- codegen-units = 16
59
  rpath = false
60
  strip = "debuginfo"
 
 
 
 
8
 
9
  [dependencies]
10
  reqwest = {version="0.11.20",features=["json"]}
11
+ tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
12
  serde = {version="1.0.188",features=["derive"]}
13
  handlebars = { version = "4.4.0", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
 
48
 
49
  [profile.release]
50
  opt-level = 3
51
+ debug = false # This should only be commented when testing with dhat profiler
52
+ # debug = 1 # This should only be uncommented when testing with dhat profiler
53
  split-debuginfo = '...'
54
  debug-assertions = false
55
  overflow-checks = false
56
+ lto = true
57
  panic = 'abort'
58
  incremental = false
59
+ codegen-units = 1
60
  rpath = false
61
  strip = "debuginfo"
62
+
63
+ [features]
64
+ dhat-heap = ["dep:dhat"]
Dockerfile CHANGED
@@ -19,7 +19,7 @@ COPY . .
19
  RUN cargo install --path .
20
 
21
  # We do not need the Rust toolchain to run the binary!
22
- FROM gcr.io/distroless/cc-debian11
23
  COPY --from=builder /app/public/ /opt/websurfx/public/
24
  COPY --from=builder /app/websurfx/config.lua /etc/xdg/websurfx/config.lua
25
  COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
 
19
  RUN cargo install --path .
20
 
21
  # We do not need the Rust toolchain to run the binary!
22
+ FROM gcr.io/distroless/cc-debian12
23
  COPY --from=builder /app/public/ /opt/websurfx/public/
24
  COPY --from=builder /app/websurfx/config.lua /etc/xdg/websurfx/config.lua
25
  COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
README.md CHANGED
@@ -5,7 +5,7 @@
5
  <b align="center"><a href="README.md">Readme</a></b> |
6
  <b><a href="https://discord.gg/SWnda7Mw5u">Discord</a></b> |
7
  <b><a href="https://github.com/neon-mmd/websurfx">GitHub</a></b> |
8
- <b><a href="./docs/README.md">Documentation</a></b>
9
  <br /><br />
10
  <a href="#">
11
  <img
 
5
  <b align="center"><a href="README.md">Readme</a></b> |
6
  <b><a href="https://discord.gg/SWnda7Mw5u">Discord</a></b> |
7
  <b><a href="https://github.com/neon-mmd/websurfx">GitHub</a></b> |
8
+ <b><a href="../../tree/HEAD/docs/">Documentation</a></b>
9
  <br /><br />
10
  <a href="#">
11
  <img
docs/installation.md CHANGED
@@ -109,7 +109,7 @@ colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used fo
109
  theme = "simple" -- the theme name which should be used for the website
110
 
111
  -- ### Caching ###
112
- redis_connection_url = "redis://redis:6379" -- redis connection url address on which the client should connect on.
113
 
114
  -- ### Search Engines ###
115
  upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
 
109
  theme = "simple" -- the theme name which should be used for the website
110
 
111
  -- ### Caching ###
112
+ redis_url = "redis://redis:6379" -- redis connection url address on which the client should connect on.
113
 
114
  -- ### Search Engines ###
115
  upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
src/bin/websurfx.rs CHANGED
@@ -3,9 +3,19 @@
3
  //! This module contains the main function which handles the logging of the application to the
4
  //! stdout and handles the command line arguments provided and launches the `websurfx` server.
5
 
 
6
  use std::net::TcpListener;
7
  use websurfx::{config::parser::Config, run};
8
 
 
 
 
 
 
 
 
 
 
9
  /// The function that launches the main server and registers all the routes of the website.
10
  ///
11
  /// # Error
@@ -14,6 +24,10 @@ use websurfx::{config::parser::Config, run};
14
  /// available for being used for other applications.
15
  #[actix_web::main]
16
  async fn main() -> std::io::Result<()> {
 
 
 
 
17
  // Initialize the parsed config file.
18
  let config = Config::parse(false).unwrap();
19
 
 
3
  //! This module contains the main function which handles the logging of the application to the
4
  //! stdout and handles the command line arguments provided and launches the `websurfx` server.
5
 
6
+ use mimalloc::MiMalloc;
7
  use std::net::TcpListener;
8
  use websurfx::{config::parser::Config, run};
9
 
10
+ /// A dhat heap memory profiler
11
+ #[cfg(feature = "dhat-heap")]
12
+ #[global_allocator]
13
+ static ALLOC: dhat::Alloc = dhat::Alloc;
14
+
15
+ #[cfg(not(feature = "dhat-heap"))]
16
+ #[global_allocator]
17
+ static GLOBAL: MiMalloc = MiMalloc;
18
+
19
  /// The function that launches the main server and registers all the routes of the website.
20
  ///
21
  /// # Error
 
24
  /// available for being used for other applications.
25
  #[actix_web::main]
26
  async fn main() -> std::io::Result<()> {
27
+ // A dhat heap profiler initialization.
28
+ #[cfg(feature = "dhat-heap")]
29
+ let _profiler = dhat::Profiler::new_heap();
30
+
31
  // Initialize the parsed config file.
32
  let config = Config::parse(false).unwrap();
33
 
src/cache/cacher.rs CHANGED
@@ -1,17 +1,27 @@
1
  //! This module provides the functionality to cache the aggregated results fetched and aggregated
2
  //! from the upstream search engines in a json format.
3
 
 
 
4
  use md5::compute;
5
- use redis::{Client, Commands, Connection};
 
 
6
 
7
  /// A named struct which stores the redis Connection url address to which the client will
8
  /// connect to.
9
  ///
10
  /// # Fields
11
  ///
12
- /// * `redis_connection_url` - It stores the redis Connection url address.
 
 
 
 
13
  pub struct RedisCache {
14
- connection: Connection,
 
 
15
  }
16
 
17
  impl RedisCache {
@@ -19,11 +29,25 @@ impl RedisCache {
19
  ///
20
  /// # Arguments
21
  ///
22
- /// * `redis_connection_url` - It stores the redis Connection url address.
23
- pub fn new(redis_connection_url: String) -> Result<Self, Box<dyn std::error::Error>> {
 
 
 
 
 
24
  let client = Client::open(redis_connection_url)?;
25
- let connection = client.get_connection()?;
26
- let redis_cache = RedisCache { connection };
 
 
 
 
 
 
 
 
 
27
  Ok(redis_cache)
28
  }
29
 
@@ -32,7 +56,7 @@ impl RedisCache {
32
  /// # Arguments
33
  ///
34
  /// * `url` - It takes an url as string.
35
- fn hash_url(url: &str) -> String {
36
  format!("{:?}", compute(url))
37
  }
38
 
@@ -41,9 +65,42 @@ impl RedisCache {
41
  /// # Arguments
42
  ///
43
  /// * `url` - It takes an url as a string.
44
- pub fn cached_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
45
- let hashed_url_string = Self::hash_url(url);
46
- Ok(self.connection.get(hashed_url_string)?)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
 
49
  /// A function which caches the results by using the hashed `url` as the key and
@@ -54,21 +111,45 @@ impl RedisCache {
54
  ///
55
  /// * `json_results` - It takes the json results string as an argument.
56
  /// * `url` - It takes the url as a String.
57
- pub fn cache_results(
58
  &mut self,
59
- json_results: String,
60
  url: &str,
61
- ) -> Result<(), Box<dyn std::error::Error>> {
62
- let hashed_url_string = Self::hash_url(url);
63
-
64
- // put results_json into cache
65
- self.connection.set(&hashed_url_string, json_results)?;
66
 
67
- // Set the TTL for the key to 60 seconds
68
- self.connection
69
- .expire::<String, u32>(hashed_url_string, 60)
70
- .unwrap();
71
 
72
- Ok(())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
  }
 
1
  //! This module provides the functionality to cache the aggregated results fetched and aggregated
2
  //! from the upstream search engines in a json format.
3
 
4
+ use error_stack::Report;
5
+ use futures::future::try_join_all;
6
  use md5::compute;
7
+ use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
8
+
9
+ use super::error::PoolError;
10
 
11
  /// A named struct which stores the redis Connection url address to which the client will
12
  /// connect to.
13
  ///
14
  /// # Fields
15
  ///
16
+ /// * `connection_pool` - It stores a pool of connections ready to be used.
17
+ /// * `pool_size` - It stores the size of the connection pool (in other words the number of
18
+ /// connections that should be stored in the pool).
19
+ /// * `current_connection` - It stores the index of which connection is being used at the moment.
20
+ #[derive(Clone)]
21
  pub struct RedisCache {
22
+ connection_pool: Vec<ConnectionManager>,
23
+ pool_size: u8,
24
+ current_connection: u8,
25
  }
26
 
27
  impl RedisCache {
 
29
  ///
30
  /// # Arguments
31
  ///
32
+ /// * `redis_connection_url` - It takes the redis Connection url address.
33
+ /// * `pool_size` - It takes the size of the connection pool (in other words the number of
34
+ /// connections that should be stored in the pool).
35
+ pub async fn new(
36
+ redis_connection_url: &str,
37
+ pool_size: u8,
38
+ ) -> Result<Self, Box<dyn std::error::Error>> {
39
  let client = Client::open(redis_connection_url)?;
40
+ let mut tasks: Vec<_> = Vec::new();
41
+
42
+ for _ in 0..pool_size {
43
+ tasks.push(client.get_tokio_connection_manager());
44
+ }
45
+
46
+ let redis_cache = RedisCache {
47
+ connection_pool: try_join_all(tasks).await?,
48
+ pool_size,
49
+ current_connection: Default::default(),
50
+ };
51
  Ok(redis_cache)
52
  }
53
 
 
56
  /// # Arguments
57
  ///
58
  /// * `url` - It takes an url as string.
59
+ fn hash_url(&self, url: &str) -> String {
60
  format!("{:?}", compute(url))
61
  }
62
 
 
65
  /// # Arguments
66
  ///
67
  /// * `url` - It takes an url as a string.
68
+ pub async fn cached_json(&mut self, url: &str) -> Result<String, Report<PoolError>> {
69
+ self.current_connection = Default::default();
70
+ let hashed_url_string: &str = &self.hash_url(url);
71
+
72
+ let mut result: Result<String, RedisError> = self.connection_pool
73
+ [self.current_connection as usize]
74
+ .get(hashed_url_string)
75
+ .await;
76
+
77
+ // Code to check whether the current connection being used is dropped with connection error
78
+ // or not. if it drops with the connection error then the current connection is replaced
79
+ // with a new connection from the pool which is then used to run the redis command then
80
+ // that connection is also checked whether it is dropped or not if it is not then the
81
+ // result is passed as a `Result` or else the same process repeats again and if all of the
82
+ // connections in the pool result in connection drop error then a custom pool error is
83
+ // returned.
84
+ loop {
85
+ match result {
86
+ Err(error) => match error.is_connection_dropped() {
87
+ true => {
88
+ self.current_connection += 1;
89
+ if self.current_connection == self.pool_size {
90
+ return Err(Report::new(
91
+ PoolError::PoolExhaustionWithConnectionDropError,
92
+ ));
93
+ }
94
+ result = self.connection_pool[self.current_connection as usize]
95
+ .get(hashed_url_string)
96
+ .await;
97
+ continue;
98
+ }
99
+ false => return Err(Report::new(PoolError::RedisError(error))),
100
+ },
101
+ Ok(res) => return Ok(res),
102
+ }
103
+ }
104
  }
105
 
106
  /// A function which caches the results by using the hashed `url` as the key and
 
111
  ///
112
  /// * `json_results` - It takes the json results string as an argument.
113
  /// * `url` - It takes the url as a String.
114
+ pub async fn cache_results(
115
  &mut self,
116
+ json_results: &str,
117
  url: &str,
118
+ ) -> Result<(), Report<PoolError>> {
119
+ self.current_connection = Default::default();
120
+ let hashed_url_string: &str = &self.hash_url(url);
 
 
121
 
122
+ let mut result: Result<(), RedisError> = self.connection_pool
123
+ [self.current_connection as usize]
124
+ .set_ex(hashed_url_string, json_results, 60)
125
+ .await;
126
 
127
+ // Code to check whether the current connection being used is dropped with connection error
128
+ // or not. if it drops with the connection error then the current connection is replaced
129
+ // with a new connection from the pool which is then used to run the redis command then
130
+ // that connection is also checked whether it is dropped or not if it is not then the
131
+ // result is passed as a `Result` or else the same process repeats again and if all of the
132
+ // connections in the pool result in connection drop error then a custom pool error is
133
+ // returned.
134
+ loop {
135
+ match result {
136
+ Err(error) => match error.is_connection_dropped() {
137
+ true => {
138
+ self.current_connection += 1;
139
+ if self.current_connection == self.pool_size {
140
+ return Err(Report::new(
141
+ PoolError::PoolExhaustionWithConnectionDropError,
142
+ ));
143
+ }
144
+ result = self.connection_pool[self.current_connection as usize]
145
+ .set_ex(hashed_url_string, json_results, 60)
146
+ .await;
147
+ continue;
148
+ }
149
+ false => return Err(Report::new(PoolError::RedisError(error))),
150
+ },
151
+ Ok(_) => return Ok(()),
152
+ }
153
+ }
154
  }
155
  }
src/cache/error.rs ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the error enum to handle different errors associated while requesting data from
2
+ //! the redis server using an async connection pool.
3
+ use std::fmt;
4
+
5
+ use redis::RedisError;
6
+
7
+ /// A custom error type used for handling redis async pool associated errors.
8
+ ///
9
+ /// This enum provides variants three different categories of errors:
10
+ /// * `RedisError` - This variant handles all errors related to `RedisError`,
11
+ /// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
12
+ /// which occurs when all the connections in the connection pool return a connection
13
+ /// dropped redis error.
14
+ #[derive(Debug)]
15
+ pub enum PoolError {
16
+ RedisError(RedisError),
17
+ PoolExhaustionWithConnectionDropError,
18
+ }
19
+
20
+ impl fmt::Display for PoolError {
21
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22
+ match self {
23
+ PoolError::RedisError(redis_error) => {
24
+ if let Some(detail) = redis_error.detail() {
25
+ write!(f, "{}", detail)
26
+ } else {
27
+ write!(f, "")
28
+ }
29
+ }
30
+ PoolError::PoolExhaustionWithConnectionDropError => {
31
+ write!(
32
+ f,
33
+ "Error all connections from the pool dropped with connection error"
34
+ )
35
+ }
36
+ }
37
+ }
38
+ }
39
+
40
+ impl error_stack::Context for PoolError {}
src/cache/mod.rs CHANGED
@@ -1 +1,2 @@
1
  pub mod cacher;
 
 
1
  pub mod cacher;
2
+ pub mod error;
src/config/parser.rs CHANGED
@@ -5,7 +5,7 @@ use crate::handler::paths::{file_path, FileType};
5
 
6
  use super::parser_models::{AggregatorConfig, RateLimiter, Style};
7
  use log::LevelFilter;
8
- use rlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
10
 
11
  /// A named struct which stores the parsed config file options.
@@ -53,30 +53,31 @@ impl Config {
53
  /// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
54
  /// Config struct with all the parsed config options from the parsed config file.
55
  pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
56
- Lua::new().context(|context| -> Result<Self, Box<dyn std::error::Error>> {
57
- let globals = context.globals();
58
 
59
- context
60
- .load(&fs::read_to_string(file_path(FileType::Config)?)?)
61
- .exec()?;
62
 
63
- let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
64
 
65
- let debug: bool = globals.get::<_, bool>("debug")?;
66
- let logging:bool= globals.get::<_, bool>("logging")?;
67
 
68
- if !logging_initialized {
69
- set_logging_level(debug, logging);
70
- }
71
 
72
- let threads: u8 = if parsed_threads == 0 {
73
- let total_num_of_threads: usize = available_parallelism()?.get() / 2;
74
- log::error!("Config Error: The value of `threads` option should be a non zero positive integer");
75
- log::error!("Falling back to using {} threads", total_num_of_threads);
76
- total_num_of_threads as u8
77
- } else {
78
- parsed_threads
79
- };
 
 
80
 
81
  let rate_limter = globals.get::<_,HashMap<String, u8>>("rate_limiter")?;
82
 
 
5
 
6
  use super::parser_models::{AggregatorConfig, RateLimiter, Style};
7
  use log::LevelFilter;
8
+ use mlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
10
 
11
  /// A named struct which stores the parsed config file options.
 
53
  /// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
54
  /// Config struct with all the parsed config options from the parsed config file.
55
  pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
56
+ let lua = Lua::new();
57
+ let globals = lua.globals();
58
 
59
+ lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
60
+ .exec()?;
 
61
 
62
+ let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
63
 
64
+ let debug: bool = globals.get::<_, bool>("debug")?;
65
+ let logging: bool = globals.get::<_, bool>("logging")?;
66
 
67
+ if !logging_initialized {
68
+ set_logging_level(debug, logging);
69
+ }
70
 
71
+ let threads: u8 = if parsed_threads == 0 {
72
+ let total_num_of_threads: usize = available_parallelism()?.get() / 2;
73
+ log::error!(
74
+ "Config Error: The value of `threads` option should be a non zero positive integer"
75
+ );
76
+ log::error!("Falling back to using {} threads", total_num_of_threads);
77
+ total_num_of_threads as u8
78
+ } else {
79
+ parsed_threads
80
+ };
81
 
82
  let rate_limter = globals.get::<_,HashMap<String, u8>>("rate_limiter")?;
83
 
src/config/parser_models.rs CHANGED
@@ -18,7 +18,7 @@ use serde::{Deserialize, Serialize};
18
  /// * `theme` - It stores the parsed theme option used to set a theme for the website.
19
  /// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
20
  /// theme being used.
21
- #[derive(Serialize, Deserialize, Clone)]
22
  pub struct Style {
23
  pub theme: String,
24
  pub colorscheme: String,
 
18
  /// * `theme` - It stores the parsed theme option used to set a theme for the website.
19
  /// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
20
  /// theme being used.
21
+ #[derive(Serialize, Deserialize, Clone, Default)]
22
  pub struct Style {
23
  pub theme: String,
24
  pub colorscheme: String,
src/engines/duckduckgo.rs CHANGED
@@ -4,14 +4,14 @@
4
 
5
  use std::collections::HashMap;
6
 
7
- use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
  use crate::results::aggregation_models::SearchResult;
11
 
12
  use super::engine_models::{EngineError, SearchEngine};
13
 
14
- use error_stack::{IntoReport, Report, Result, ResultExt};
15
 
16
  /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
17
  /// reduce code duplication as well as allows to create vector of different search engines easily.
@@ -39,9 +39,9 @@ impl SearchEngine for DuckDuckGo {
39
  /// or HeaderMap fails to initialize.
40
  async fn results(
41
  &self,
42
- query: String,
43
  page: u32,
44
- user_agent: String,
45
  request_timeout: u8,
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
@@ -61,38 +61,19 @@ impl SearchEngine for DuckDuckGo {
61
  };
62
 
63
  // initializing HeaderMap and adding appropriate headers.
64
- let mut header_map = HeaderMap::new();
65
- header_map.insert(
66
- USER_AGENT,
67
- user_agent
68
- .parse()
69
- .into_report()
70
- .change_context(EngineError::UnexpectedError)?,
71
- );
72
- header_map.insert(
73
- REFERER,
74
- "https://google.com/"
75
- .parse()
76
- .into_report()
77
- .change_context(EngineError::UnexpectedError)?,
78
- );
79
- header_map.insert(
80
- CONTENT_TYPE,
81
- "application/x-www-form-urlencoded"
82
- .parse()
83
- .into_report()
84
- .change_context(EngineError::UnexpectedError)?,
85
- );
86
- header_map.insert(
87
- COOKIE,
88
- "kl=wt-wt"
89
- .parse()
90
- .into_report()
91
- .change_context(EngineError::UnexpectedError)?,
92
- );
93
 
94
  let document: Html = Html::parse_document(
95
- &DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
96
  );
97
 
98
  let no_result: Selector = Selector::parse(".no-results")
@@ -126,8 +107,7 @@ impl SearchEngine for DuckDuckGo {
126
  .next()
127
  .unwrap()
128
  .inner_html()
129
- .trim()
130
- .to_string(),
131
  format!(
132
  "https://{}",
133
  result
@@ -136,15 +116,15 @@ impl SearchEngine for DuckDuckGo {
136
  .unwrap()
137
  .inner_html()
138
  .trim()
139
- ),
 
140
  result
141
  .select(&result_desc)
142
  .next()
143
  .unwrap()
144
  .inner_html()
145
- .trim()
146
- .to_string(),
147
- vec!["duckduckgo".to_string()],
148
  )
149
  })
150
  .map(|search_result| (search_result.url.clone(), search_result))
 
4
 
5
  use std::collections::HashMap;
6
 
7
+ use reqwest::header::HeaderMap;
8
  use scraper::{Html, Selector};
9
 
10
  use crate::results::aggregation_models::SearchResult;
11
 
12
  use super::engine_models::{EngineError, SearchEngine};
13
 
14
+ use error_stack::{Report, Result, ResultExt};
15
 
16
  /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
17
  /// reduce code duplication as well as allows to create vector of different search engines easily.
 
39
  /// or HeaderMap fails to initialize.
40
  async fn results(
41
  &self,
42
+ query: &str,
43
  page: u32,
44
+ user_agent: &str,
45
  request_timeout: u8,
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
 
61
  };
62
 
63
  // initializing HeaderMap and adding appropriate headers.
64
+ let header_map = HeaderMap::try_from(&HashMap::from([
65
+ ("USER_AGENT".to_string(), user_agent.to_string()),
66
+ ("REFERER".to_string(), "https://google.com/".to_string()),
67
+ (
68
+ "CONTENT_TYPE".to_string(),
69
+ "application/x-www-form-urlencoded".to_string(),
70
+ ),
71
+ ("COOKIE".to_string(), "kl=wt-wt".to_string()),
72
+ ]))
73
+ .change_context(EngineError::UnexpectedError)?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  let document: Html = Html::parse_document(
76
+ &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
77
  );
78
 
79
  let no_result: Selector = Selector::parse(".no-results")
 
107
  .next()
108
  .unwrap()
109
  .inner_html()
110
+ .trim(),
 
111
  format!(
112
  "https://{}",
113
  result
 
116
  .unwrap()
117
  .inner_html()
118
  .trim()
119
+ )
120
+ .as_str(),
121
  result
122
  .select(&result_desc)
123
  .next()
124
  .unwrap()
125
  .inner_html()
126
+ .trim(),
127
+ &["duckduckgo"],
 
128
  )
129
  })
130
  .map(|search_result| (search_result.url.clone(), search_result))
src/engines/engine_models.rs CHANGED
@@ -2,7 +2,7 @@
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
  use crate::results::aggregation_models::SearchResult;
5
- use error_stack::{IntoReport, Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
@@ -48,7 +48,7 @@ impl error_stack::Context for EngineError {}
48
  pub trait SearchEngine: Sync + Send {
49
  async fn fetch_html_from_upstream(
50
  &self,
51
- url: String,
52
  header_map: reqwest::header::HeaderMap,
53
  request_timeout: u8,
54
  ) -> Result<String, EngineError> {
@@ -59,19 +59,17 @@ pub trait SearchEngine: Sync + Send {
59
  .headers(header_map) // add spoofed headers to emulate human behavior
60
  .send()
61
  .await
62
- .into_report()
63
  .change_context(EngineError::RequestError)?
64
  .text()
65
  .await
66
- .into_report()
67
  .change_context(EngineError::RequestError)?)
68
  }
69
 
70
  async fn results(
71
  &self,
72
- query: String,
73
  page: u32,
74
- user_agent: String,
75
  request_timeout: u8,
76
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
77
  }
 
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
  use crate::results::aggregation_models::SearchResult;
5
+ use error_stack::{Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
 
48
  pub trait SearchEngine: Sync + Send {
49
  async fn fetch_html_from_upstream(
50
  &self,
51
+ url: &str,
52
  header_map: reqwest::header::HeaderMap,
53
  request_timeout: u8,
54
  ) -> Result<String, EngineError> {
 
59
  .headers(header_map) // add spoofed headers to emulate human behavior
60
  .send()
61
  .await
 
62
  .change_context(EngineError::RequestError)?
63
  .text()
64
  .await
 
65
  .change_context(EngineError::RequestError)?)
66
  }
67
 
68
  async fn results(
69
  &self,
70
+ query: &str,
71
  page: u32,
72
+ user_agent: &str,
73
  request_timeout: u8,
74
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
75
  }
src/engines/searx.rs CHANGED
@@ -2,14 +2,14 @@
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
- use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
  use crate::results::aggregation_models::SearchResult;
10
 
11
  use super::engine_models::{EngineError, SearchEngine};
12
- use error_stack::{IntoReport, Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
15
  /// reduce code duplication as well as allows to create vector of different search engines easily.
@@ -38,9 +38,9 @@ impl SearchEngine for Searx {
38
 
39
  async fn results(
40
  &self,
41
- query: String,
42
  page: u32,
43
- user_agent: String,
44
  request_timeout: u8,
45
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
46
  // Page number can be missing or empty string and so appropriate handling is required
@@ -51,32 +51,16 @@ impl SearchEngine for Searx {
51
  };
52
 
53
  // initializing headers and adding appropriate headers.
54
- let mut header_map = HeaderMap::new();
55
- header_map.insert(
56
- USER_AGENT,
57
- user_agent
58
- .parse()
59
- .into_report()
60
- .change_context(EngineError::UnexpectedError)?,
61
- );
62
- header_map.insert(
63
- REFERER,
64
- "https://google.com/"
65
- .parse()
66
- .into_report()
67
- .change_context(EngineError::UnexpectedError)?,
68
- );
69
- header_map.insert(
70
- CONTENT_TYPE,
71
- "application/x-www-form-urlencoded"
72
- .parse()
73
- .into_report()
74
- .change_context(EngineError::UnexpectedError)?,
75
- );
76
- header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
77
 
78
  let document: Html = Html::parse_document(
79
- &Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
80
  );
81
 
82
  let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
@@ -117,24 +101,21 @@ impl SearchEngine for Searx {
117
  .next()
118
  .unwrap()
119
  .inner_html()
120
- .trim()
121
- .to_string(),
122
  result
123
  .select(&result_url)
124
  .next()
125
  .unwrap()
126
  .value()
127
  .attr("href")
128
- .unwrap()
129
- .to_string(),
130
  result
131
  .select(&result_desc)
132
  .next()
133
  .unwrap()
134
  .inner_html()
135
- .trim()
136
- .to_string(),
137
- vec!["searx".to_string()],
138
  )
139
  })
140
  .map(|search_result| (search_result.url.clone(), search_result))
 
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
+ use reqwest::header::HeaderMap;
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
  use crate::results::aggregation_models::SearchResult;
10
 
11
  use super::engine_models::{EngineError, SearchEngine};
12
+ use error_stack::{Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
15
  /// reduce code duplication as well as allows to create vector of different search engines easily.
 
38
 
39
  async fn results(
40
  &self,
41
+ query: &str,
42
  page: u32,
43
+ user_agent: &str,
44
  request_timeout: u8,
45
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
46
  // Page number can be missing or empty string and so appropriate handling is required
 
51
  };
52
 
53
  // initializing headers and adding appropriate headers.
54
+ let header_map = HeaderMap::try_from(&HashMap::from([
55
+ ("USER_AGENT".to_string(), user_agent.to_string()),
56
+ ("REFERER".to_string(), "https://google.com/".to_string()),
57
+ ("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
58
+ ("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
59
+ ]))
60
+ .change_context(EngineError::UnexpectedError)?;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  let document: Html = Html::parse_document(
63
+ &Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
64
  );
65
 
66
  let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
 
101
  .next()
102
  .unwrap()
103
  .inner_html()
104
+ .trim(),
 
105
  result
106
  .select(&result_url)
107
  .next()
108
  .unwrap()
109
  .value()
110
  .attr("href")
111
+ .unwrap(),
 
112
  result
113
  .select(&result_desc)
114
  .next()
115
  .unwrap()
116
  .inner_html()
117
+ .trim(),
118
+ &["searx"],
 
119
  )
120
  })
121
  .map(|search_result| (search_result.url.clone(), search_result))
src/handler/paths.rs CHANGED
@@ -4,6 +4,7 @@
4
  use std::collections::HashMap;
5
  use std::io::Error;
6
  use std::path::Path;
 
7
 
8
  // ------- Constants --------
9
  static PUBLIC_DIRECTORY_NAME: &str = "public";
@@ -20,57 +21,7 @@ pub enum FileType {
20
  Theme,
21
  }
22
 
23
- static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, Vec<String>>> =
24
- once_cell::sync::Lazy::new(|| {
25
- HashMap::from([
26
- (
27
- FileType::Config,
28
- vec![
29
- format!(
30
- "{}/.config/{}/{}",
31
- std::env::var("HOME").unwrap(),
32
- COMMON_DIRECTORY_NAME,
33
- CONFIG_FILE_NAME
34
- ),
35
- format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
36
- format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
37
- ],
38
- ),
39
- (
40
- FileType::Theme,
41
- vec![
42
- format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
43
- format!("./{}/", PUBLIC_DIRECTORY_NAME),
44
- ],
45
- ),
46
- (
47
- FileType::AllowList,
48
- vec![
49
- format!(
50
- "{}/.config/{}/{}",
51
- std::env::var("HOME").unwrap(),
52
- COMMON_DIRECTORY_NAME,
53
- ALLOWLIST_FILE_NAME
54
- ),
55
- format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
56
- format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
57
- ],
58
- ),
59
- (
60
- FileType::BlockList,
61
- vec![
62
- format!(
63
- "{}/.config/{}/{}",
64
- std::env::var("HOME").unwrap(),
65
- COMMON_DIRECTORY_NAME,
66
- BLOCKLIST_FILE_NAME
67
- ),
68
- format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
69
- format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
70
- ],
71
- ),
72
- ])
73
- });
74
 
75
  /// A helper function which returns an appropriate config file path checking if the config
76
  /// file exists on that path.
@@ -95,11 +46,64 @@ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, V
95
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
96
  /// 2. Under project folder ( or codebase in other words) if it is not present
97
  /// here then it returns an error as mentioned above.
98
- pub fn file_path(file_type: FileType) -> Result<String, Error> {
99
- let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  for (idx, _) in file_path.iter().enumerate() {
101
  if Path::new(file_path[idx].as_str()).exists() {
102
- return Ok(file_path[idx].clone());
103
  }
104
  }
105
 
 
4
  use std::collections::HashMap;
5
  use std::io::Error;
6
  use std::path::Path;
7
+ use std::sync::OnceLock;
8
 
9
  // ------- Constants --------
10
  static PUBLIC_DIRECTORY_NAME: &str = "public";
 
21
  Theme,
22
  }
23
 
24
+ static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  /// A helper function which returns an appropriate config file path checking if the config
27
  /// file exists on that path.
 
46
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
47
  /// 2. Under project folder ( or codebase in other words) if it is not present
48
  /// here then it returns an error as mentioned above.
49
+ pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
50
+ let file_path: &Vec<String> = FILE_PATHS_FOR_DIFF_FILE_TYPES
51
+ .get_or_init(|| {
52
+ HashMap::from([
53
+ (
54
+ FileType::Config,
55
+ vec![
56
+ format!(
57
+ "{}/.config/{}/{}",
58
+ std::env::var("HOME").unwrap(),
59
+ COMMON_DIRECTORY_NAME,
60
+ CONFIG_FILE_NAME
61
+ ),
62
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
63
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
64
+ ],
65
+ ),
66
+ (
67
+ FileType::Theme,
68
+ vec![
69
+ format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
70
+ format!("./{}/", PUBLIC_DIRECTORY_NAME),
71
+ ],
72
+ ),
73
+ (
74
+ FileType::AllowList,
75
+ vec![
76
+ format!(
77
+ "{}/.config/{}/{}",
78
+ std::env::var("HOME").unwrap(),
79
+ COMMON_DIRECTORY_NAME,
80
+ ALLOWLIST_FILE_NAME
81
+ ),
82
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
83
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
84
+ ],
85
+ ),
86
+ (
87
+ FileType::BlockList,
88
+ vec![
89
+ format!(
90
+ "{}/.config/{}/{}",
91
+ std::env::var("HOME").unwrap(),
92
+ COMMON_DIRECTORY_NAME,
93
+ BLOCKLIST_FILE_NAME
94
+ ),
95
+ format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
96
+ format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
97
+ ],
98
+ ),
99
+ ])
100
+ })
101
+ .get(&file_type)
102
+ .unwrap();
103
+
104
  for (idx, _) in file_path.iter().enumerate() {
105
  if Path::new(file_path[idx].as_str()).exists() {
106
+ return Ok(std::mem::take(&mut &*file_path[idx]));
107
  }
108
  }
109
 
src/lib.rs CHANGED
@@ -43,7 +43,7 @@ use handler::paths::{file_path, FileType};
43
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
44
  let mut handlebars: Handlebars = Handlebars::new();
45
 
46
- let public_folder_path: String = file_path(FileType::Theme)?;
47
 
48
  handlebars
49
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
 
43
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
44
  let mut handlebars: Handlebars = Handlebars::new();
45
 
46
+ let public_folder_path: &str = file_path(FileType::Theme)?;
47
 
48
  handlebars
49
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
src/results/aggregation_models.rs CHANGED
@@ -2,6 +2,7 @@
2
  //! data scraped from the upstream search engines.
3
 
4
  use serde::{Deserialize, Serialize};
 
5
 
6
  use crate::{config::parser_models::Style, engines::engine_models::EngineError};
7
 
@@ -16,13 +17,13 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
16
  /// (href url in html in simple words).
17
  /// * `description` - The description of the search result.
18
  /// * `engine` - The names of the upstream engines from which this results were provided.
19
- #[derive(Clone, Serialize, Deserialize)]
20
  #[serde(rename_all = "camelCase")]
21
  pub struct SearchResult {
22
  pub title: String,
23
  pub url: String,
24
  pub description: String,
25
- pub engine: Vec<String>,
26
  }
27
 
28
  impl SearchResult {
@@ -35,12 +36,12 @@ impl SearchResult {
35
  /// (href url in html in simple words).
36
  /// * `description` - The description of the search result.
37
  /// * `engine` - The names of the upstream engines from which this results were provided.
38
- pub fn new(title: String, url: String, description: String, engine: Vec<String>) -> Self {
39
  SearchResult {
40
- title,
41
- url,
42
- description,
43
- engine,
44
  }
45
  }
46
 
@@ -49,8 +50,8 @@ impl SearchResult {
49
  /// # Arguments
50
  ///
51
  /// * `engine` - Takes an engine name provided as a String.
52
- pub fn add_engines(&mut self, engine: String) {
53
- self.engine.push(engine)
54
  }
55
 
56
  /// A function which returns the engine name stored from the struct as a string.
@@ -58,13 +59,12 @@ impl SearchResult {
58
  /// # Returns
59
  ///
60
  /// An engine name stored as a string from the struct.
61
- pub fn engine(self) -> String {
62
- self.engine.get(0).unwrap().to_string()
63
  }
64
  }
65
 
66
- ///
67
- #[derive(Serialize, Deserialize)]
68
  pub struct EngineErrorInfo {
69
  pub error: String,
70
  pub engine: String,
@@ -72,18 +72,18 @@ pub struct EngineErrorInfo {
72
  }
73
 
74
  impl EngineErrorInfo {
75
- pub fn new(error: &EngineError, engine: String) -> Self {
76
  Self {
77
  error: match error {
78
- EngineError::RequestError => String::from("RequestError"),
79
- EngineError::EmptyResultSet => String::from("EmptyResultSet"),
80
- EngineError::UnexpectedError => String::from("UnexpectedError"),
81
  },
82
- engine,
83
  severity_color: match error {
84
- EngineError::RequestError => String::from("green"),
85
- EngineError::EmptyResultSet => String::from("blue"),
86
- EngineError::UnexpectedError => String::from("red"),
87
  },
88
  }
89
  }
@@ -108,7 +108,7 @@ pub struct SearchResults {
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
- pub engine_errors_info: Vec<EngineErrorInfo>,
112
  }
113
 
114
  impl SearchResults {
@@ -124,19 +124,19 @@ impl SearchResults {
124
  /// given search query.
125
  pub fn new(
126
  results: Vec<SearchResult>,
127
- page_query: String,
128
- engine_errors_info: Vec<EngineErrorInfo>,
129
  ) -> Self {
130
- SearchResults {
131
  results,
132
- page_query,
133
- style: Style::new("".to_string(), "".to_string()),
134
- engine_errors_info,
135
  }
136
  }
137
 
138
  /// A setter function to add website style to the return search results.
139
- pub fn add_style(&mut self, style: Style) {
140
- self.style = style;
141
  }
142
  }
 
2
  //! data scraped from the upstream search engines.
3
 
4
  use serde::{Deserialize, Serialize};
5
+ use smallvec::SmallVec;
6
 
7
  use crate::{config::parser_models::Style, engines::engine_models::EngineError};
8
 
 
17
  /// (href url in html in simple words).
18
  /// * `description` - The description of the search result.
19
  /// * `engine` - The names of the upstream engines from which this results were provided.
20
+ #[derive(Clone, Serialize, Deserialize, Debug)]
21
  #[serde(rename_all = "camelCase")]
22
  pub struct SearchResult {
23
  pub title: String,
24
  pub url: String,
25
  pub description: String,
26
+ pub engine: SmallVec<[String; 0]>,
27
  }
28
 
29
  impl SearchResult {
 
36
  /// (href url in html in simple words).
37
  /// * `description` - The description of the search result.
38
  /// * `engine` - The names of the upstream engines from which this results were provided.
39
+ pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
40
  SearchResult {
41
+ title: title.to_owned(),
42
+ url: url.to_owned(),
43
+ description: description.to_owned(),
44
+ engine: engine.iter().map(|name| name.to_string()).collect(),
45
  }
46
  }
47
 
 
50
  /// # Arguments
51
  ///
52
  /// * `engine` - Takes an engine name provided as a String.
53
+ pub fn add_engines(&mut self, engine: &str) {
54
+ self.engine.push(engine.to_owned())
55
  }
56
 
57
  /// A function which returns the engine name stored from the struct as a string.
 
59
  /// # Returns
60
  ///
61
  /// An engine name stored as a string from the struct.
62
+ pub fn engine(&mut self) -> String {
63
+ std::mem::take(&mut self.engine[0])
64
  }
65
  }
66
 
67
+ #[derive(Serialize, Deserialize, Clone)]
 
68
  pub struct EngineErrorInfo {
69
  pub error: String,
70
  pub engine: String,
 
72
  }
73
 
74
  impl EngineErrorInfo {
75
+ pub fn new(error: &EngineError, engine: &str) -> Self {
76
  Self {
77
  error: match error {
78
+ EngineError::RequestError => "RequestError".to_owned(),
79
+ EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
80
+ EngineError::UnexpectedError => "UnexpectedError".to_owned(),
81
  },
82
+ engine: engine.to_owned(),
83
  severity_color: match error {
84
+ EngineError::RequestError => "green".to_owned(),
85
+ EngineError::EmptyResultSet => "blue".to_owned(),
86
+ EngineError::UnexpectedError => "red".to_owned(),
87
  },
88
  }
89
  }
 
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
+ pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
112
  }
113
 
114
  impl SearchResults {
 
124
  /// given search query.
125
  pub fn new(
126
  results: Vec<SearchResult>,
127
+ page_query: &str,
128
+ engine_errors_info: &[EngineErrorInfo],
129
  ) -> Self {
130
+ Self {
131
  results,
132
+ page_query: page_query.to_owned(),
133
+ style: Style::default(),
134
+ engine_errors_info: SmallVec::from(engine_errors_info),
135
  }
136
  }
137
 
138
  /// A setter function to add website style to the return search results.
139
+ pub fn add_style(&mut self, style: &Style) {
140
+ self.style = style.to_owned();
141
  }
142
  }
src/results/aggregator.rs CHANGED
@@ -64,14 +64,14 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
64
  /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
65
  /// containing appropriate values.
66
  pub async fn aggregate(
67
- query: String,
68
  page: u32,
69
  random_delay: bool,
70
  debug: bool,
71
- upstream_search_engines: Vec<EngineHandler>,
72
  request_timeout: u8,
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
- let user_agent: String = random_user_agent();
75
 
76
  // Add a random delay before making the request.
77
  if random_delay || !debug {
@@ -80,19 +80,18 @@ pub async fn aggregate(
80
  tokio::time::sleep(Duration::from_secs(delay_secs)).await;
81
  }
82
 
83
- let mut names: Vec<&str> = vec![];
84
 
85
  // create tasks for upstream result fetching
86
  let mut tasks: FutureVec = FutureVec::new();
87
 
88
  for engine_handler in upstream_search_engines {
89
- let (name, search_engine) = engine_handler.into_name_engine();
90
  names.push(name);
91
- let query: String = query.clone();
92
- let user_agent: String = user_agent.clone();
93
  tasks.push(tokio::spawn(async move {
94
  search_engine
95
- .results(query, page, user_agent.clone(), request_timeout)
96
  .await
97
  }));
98
  }
@@ -110,7 +109,7 @@ pub async fn aggregate(
110
  let mut result_map: HashMap<String, SearchResult> = HashMap::new();
111
  let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
112
 
113
- let mut handle_error = |error: Report<EngineError>, engine_name: String| {
114
  log::error!("Engine Error: {:?}", error);
115
  engine_errors_info.push(EngineErrorInfo::new(
116
  error.downcast_ref::<EngineError>().unwrap(),
@@ -120,7 +119,7 @@ pub async fn aggregate(
120
 
121
  for _ in 0..responses.len() {
122
  let response = responses.pop().unwrap();
123
- let engine = names.pop().unwrap().to_string();
124
 
125
  if result_map.is_empty() {
126
  match response {
@@ -128,7 +127,7 @@ pub async fn aggregate(
128
  result_map = results.clone();
129
  }
130
  Err(error) => {
131
- handle_error(error, engine);
132
  }
133
  }
134
  continue;
@@ -140,13 +139,13 @@ pub async fn aggregate(
140
  result_map
141
  .entry(key)
142
  .and_modify(|result| {
143
- result.add_engines(engine.clone());
144
  })
145
  .or_insert_with(|| -> SearchResult { value });
146
  });
147
  }
148
  Err(error) => {
149
- handle_error(error, engine);
150
  }
151
  }
152
  }
@@ -155,24 +154,20 @@ pub async fn aggregate(
155
  filter_with_lists(
156
  &mut result_map,
157
  &mut blacklist_map,
158
- &file_path(FileType::BlockList)?,
159
  )?;
160
 
161
  filter_with_lists(
162
  &mut blacklist_map,
163
  &mut result_map,
164
- &file_path(FileType::AllowList)?,
165
  )?;
166
 
167
  drop(blacklist_map);
168
 
169
  let results: Vec<SearchResult> = result_map.into_values().collect();
170
 
171
- Ok(SearchResults::new(
172
- results,
173
- query.to_string(),
174
- engine_errors_info,
175
- ))
176
  }
177
 
178
  /// Filters a map of search results using a list of regex patterns.
@@ -203,7 +198,10 @@ pub fn filter_with_lists(
203
  || re.is_match(&search_result.description.to_lowercase())
204
  {
205
  // If the search result matches the regex pattern, move it from the original map to the resultant map
206
- resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
 
 
 
207
  }
208
  }
209
  }
@@ -214,6 +212,7 @@ pub fn filter_with_lists(
214
  #[cfg(test)]
215
  mod tests {
216
  use super::*;
 
217
  use std::collections::HashMap;
218
  use std::io::Write;
219
  use tempfile::NamedTempFile;
@@ -223,22 +222,22 @@ mod tests {
223
  // Create a map of search results to filter
224
  let mut map_to_be_filtered = HashMap::new();
225
  map_to_be_filtered.insert(
226
- "https://www.example.com".to_string(),
227
  SearchResult {
228
- title: "Example Domain".to_string(),
229
- url: "https://www.example.com".to_string(),
230
  description: "This domain is for use in illustrative examples in documents."
231
- .to_string(),
232
- engine: vec!["Google".to_string(), "Bing".to_string()],
233
  },
234
  );
235
  map_to_be_filtered.insert(
236
- "https://www.rust-lang.org/".to_string(),
237
  SearchResult {
238
- title: "Rust Programming Language".to_string(),
239
- url: "https://www.rust-lang.org/".to_string(),
240
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
241
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
242
  },
243
  );
244
 
@@ -267,22 +266,22 @@ mod tests {
267
  fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
268
  let mut map_to_be_filtered = HashMap::new();
269
  map_to_be_filtered.insert(
270
- "https://www.example.com".to_string(),
271
  SearchResult {
272
- title: "Example Domain".to_string(),
273
- url: "https://www.example.com".to_string(),
274
  description: "This domain is for use in illustrative examples in documents."
275
- .to_string(),
276
- engine: vec!["Google".to_string(), "Bing".to_string()],
277
  },
278
  );
279
  map_to_be_filtered.insert(
280
- "https://www.rust-lang.org/".to_string(),
281
  SearchResult {
282
- title: "Rust Programming Language".to_string(),
283
- url: "https://www.rust-lang.org/".to_string(),
284
- description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
285
- engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
286
  },
287
  );
288
 
@@ -327,13 +326,13 @@ mod tests {
327
  fn test_filter_with_lists_invalid_regex() {
328
  let mut map_to_be_filtered = HashMap::new();
329
  map_to_be_filtered.insert(
330
- "https://www.example.com".to_string(),
331
  SearchResult {
332
- title: "Example Domain".to_string(),
333
- url: "https://www.example.com".to_string(),
334
  description: "This domain is for use in illustrative examples in documents."
335
- .to_string(),
336
- engine: vec!["Google".to_string(), "Bing".to_string()],
337
  },
338
  );
339
 
 
64
  /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
65
  /// containing appropriate values.
66
  pub async fn aggregate(
67
+ query: &str,
68
  page: u32,
69
  random_delay: bool,
70
  debug: bool,
71
+ upstream_search_engines: &[EngineHandler],
72
  request_timeout: u8,
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
+ let user_agent: &str = random_user_agent();
75
 
76
  // Add a random delay before making the request.
77
  if random_delay || !debug {
 
80
  tokio::time::sleep(Duration::from_secs(delay_secs)).await;
81
  }
82
 
83
+ let mut names: Vec<&str> = Vec::with_capacity(0);
84
 
85
  // create tasks for upstream result fetching
86
  let mut tasks: FutureVec = FutureVec::new();
87
 
88
  for engine_handler in upstream_search_engines {
89
+ let (name, search_engine) = engine_handler.to_owned().into_name_engine();
90
  names.push(name);
91
+ let query: String = query.to_owned();
 
92
  tasks.push(tokio::spawn(async move {
93
  search_engine
94
+ .results(&query, page, user_agent, request_timeout)
95
  .await
96
  }));
97
  }
 
109
  let mut result_map: HashMap<String, SearchResult> = HashMap::new();
110
  let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
111
 
112
+ let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
113
  log::error!("Engine Error: {:?}", error);
114
  engine_errors_info.push(EngineErrorInfo::new(
115
  error.downcast_ref::<EngineError>().unwrap(),
 
119
 
120
  for _ in 0..responses.len() {
121
  let response = responses.pop().unwrap();
122
+ let engine = names.pop().unwrap();
123
 
124
  if result_map.is_empty() {
125
  match response {
 
127
  result_map = results.clone();
128
  }
129
  Err(error) => {
130
+ handle_error(&error, engine);
131
  }
132
  }
133
  continue;
 
139
  result_map
140
  .entry(key)
141
  .and_modify(|result| {
142
+ result.add_engines(engine);
143
  })
144
  .or_insert_with(|| -> SearchResult { value });
145
  });
146
  }
147
  Err(error) => {
148
+ handle_error(&error, engine);
149
  }
150
  }
151
  }
 
154
  filter_with_lists(
155
  &mut result_map,
156
  &mut blacklist_map,
157
+ file_path(FileType::BlockList)?,
158
  )?;
159
 
160
  filter_with_lists(
161
  &mut blacklist_map,
162
  &mut result_map,
163
+ file_path(FileType::AllowList)?,
164
  )?;
165
 
166
  drop(blacklist_map);
167
 
168
  let results: Vec<SearchResult> = result_map.into_values().collect();
169
 
170
+ Ok(SearchResults::new(results, query, &engine_errors_info))
 
 
 
 
171
  }
172
 
173
  /// Filters a map of search results using a list of regex patterns.
 
198
  || re.is_match(&search_result.description.to_lowercase())
199
  {
200
  // If the search result matches the regex pattern, move it from the original map to the resultant map
201
+ resultant_map.insert(
202
+ url.to_owned(),
203
+ map_to_be_filtered.remove(&url.to_owned()).unwrap(),
204
+ );
205
  }
206
  }
207
  }
 
212
  #[cfg(test)]
213
  mod tests {
214
  use super::*;
215
+ use smallvec::smallvec;
216
  use std::collections::HashMap;
217
  use std::io::Write;
218
  use tempfile::NamedTempFile;
 
222
  // Create a map of search results to filter
223
  let mut map_to_be_filtered = HashMap::new();
224
  map_to_be_filtered.insert(
225
+ "https://www.example.com".to_owned(),
226
  SearchResult {
227
+ title: "Example Domain".to_owned(),
228
+ url: "https://www.example.com".to_owned(),
229
  description: "This domain is for use in illustrative examples in documents."
230
+ .to_owned(),
231
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
232
  },
233
  );
234
  map_to_be_filtered.insert(
235
+ "https://www.rust-lang.org/".to_owned(),
236
  SearchResult {
237
+ title: "Rust Programming Language".to_owned(),
238
+ url: "https://www.rust-lang.org/".to_owned(),
239
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
240
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
241
  },
242
  );
243
 
 
266
  fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
267
  let mut map_to_be_filtered = HashMap::new();
268
  map_to_be_filtered.insert(
269
+ "https://www.example.com".to_owned(),
270
  SearchResult {
271
+ title: "Example Domain".to_owned(),
272
+ url: "https://www.example.com".to_owned(),
273
  description: "This domain is for use in illustrative examples in documents."
274
+ .to_owned(),
275
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
276
  },
277
  );
278
  map_to_be_filtered.insert(
279
+ "https://www.rust-lang.org/".to_owned(),
280
  SearchResult {
281
+ title: "Rust Programming Language".to_owned(),
282
+ url: "https://www.rust-lang.org/".to_owned(),
283
+ description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
284
+ engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
285
  },
286
  );
287
 
 
326
  fn test_filter_with_lists_invalid_regex() {
327
  let mut map_to_be_filtered = HashMap::new();
328
  map_to_be_filtered.insert(
329
+ "https://www.example.com".to_owned(),
330
  SearchResult {
331
+ title: "Example Domain".to_owned(),
332
+ url: "https://www.example.com".to_owned(),
333
  description: "This domain is for use in illustrative examples in documents."
334
+ .to_owned(),
335
+ engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
336
  },
337
  );
338
 
src/results/user_agent.rs CHANGED
@@ -1,28 +1,32 @@
1
  //! This module provides the functionality to generate random user agent string.
2
 
 
 
3
  use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
4
 
5
- static USER_AGENTS: once_cell::sync::Lazy<UserAgents> = once_cell::sync::Lazy::new(|| {
6
- UserAgentsBuilder::new()
7
- .cache(false)
8
- .dir("/tmp")
9
- .thread(1)
10
- .set_browsers(
11
- Browsers::new()
12
- .set_chrome()
13
- .set_safari()
14
- .set_edge()
15
- .set_firefox()
16
- .set_mozilla(),
17
- )
18
- .build()
19
- });
20
 
21
  /// A function to generate random user agent to improve privacy of the user.
22
  ///
23
  /// # Returns
24
  ///
25
  /// A randomly generated user agent string.
26
- pub fn random_user_agent() -> String {
27
- USER_AGENTS.random().to_string()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
 
1
  //! This module provides the functionality to generate random user agent string.
2
 
3
+ use std::sync::OnceLock;
4
+
5
  use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
6
 
7
+ static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  /// A function to generate random user agent to improve privacy of the user.
10
  ///
11
  /// # Returns
12
  ///
13
  /// A randomly generated user agent string.
14
+ pub fn random_user_agent() -> &'static str {
15
+ USER_AGENTS
16
+ .get_or_init(|| {
17
+ UserAgentsBuilder::new()
18
+ .cache(false)
19
+ .dir("/tmp")
20
+ .thread(1)
21
+ .set_browsers(
22
+ Browsers::new()
23
+ .set_chrome()
24
+ .set_safari()
25
+ .set_edge()
26
+ .set_firefox()
27
+ .set_mozilla(),
28
+ )
29
+ .build()
30
+ })
31
+ .random()
32
  }
src/server/routes.rs CHANGED
@@ -16,6 +16,10 @@ use handlebars::Handlebars;
16
  use serde::Deserialize;
17
  use tokio::join;
18
 
 
 
 
 
19
  /// A named struct which deserializes all the user provided search parameters and stores them.
20
  ///
21
  /// # Fields
@@ -62,10 +66,10 @@ pub async fn not_found(
62
  /// * `engines` - It stores the user selected upstream search engines selected from the UI.
63
  #[allow(dead_code)]
64
  #[derive(Deserialize)]
65
- struct Cookie {
66
- theme: String,
67
- colorscheme: String,
68
- engines: Vec<String>,
69
  }
70
 
71
  /// Handles the route of search page of the `websurfx` meta search engine website and it takes
@@ -111,9 +115,9 @@ pub async fn search(
111
  page - 1
112
  ),
113
  &config,
114
- query.to_string(),
115
  page - 1,
116
- req.clone(),
117
  ),
118
  results(
119
  format!(
@@ -121,9 +125,9 @@ pub async fn search(
121
  config.binding_ip, config.port, query, page
122
  ),
123
  &config,
124
- query.to_string(),
125
  page,
126
- req.clone(),
127
  ),
128
  results(
129
  format!(
@@ -134,9 +138,9 @@ pub async fn search(
134
  page + 1
135
  ),
136
  &config,
137
- query.to_string(),
138
  page + 1,
139
- req.clone(),
140
  )
141
  );
142
 
@@ -154,30 +158,35 @@ pub async fn search(
154
  async fn results(
155
  url: String,
156
  config: &Config,
157
- query: String,
158
  page: u32,
159
- req: HttpRequest,
160
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
161
- //Initialize redis cache connection struct
162
- let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
 
 
 
 
 
 
163
  // fetch the cached results json.
164
- let cached_results_json = redis_cache.cached_json(&url);
 
165
  // check if fetched cache results was indeed fetched or it was an error and if so
166
  // handle the data accordingly.
167
  match cached_results_json {
168
- Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results).unwrap()),
169
  Err(_) => {
170
  // check if the cookie value is empty or not if it is empty then use the
171
  // default selected upstream search engines from the config file otherwise
172
  // parse the non-empty cookie and grab the user selected engines from the
173
  // UI and use that.
174
- let mut results: crate::results::aggregation_models::SearchResults = match req
175
- .cookie("appCookie")
176
- {
177
  Some(cookie_value) => {
178
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
179
 
180
- let engines = cookie_value
181
  .engines
182
  .iter()
183
  .filter_map(|name| EngineHandler::new(name))
@@ -188,7 +197,7 @@ async fn results(
188
  page,
189
  config.aggregator.random_delay,
190
  config.debug,
191
- engines,
192
  config.request_timeout,
193
  )
194
  .await?
@@ -199,14 +208,18 @@ async fn results(
199
  page,
200
  config.aggregator.random_delay,
201
  config.debug,
202
- config.upstream_search_engines.clone(),
203
  config.request_timeout,
204
  )
205
  .await?
206
  }
207
  };
208
- results.add_style(config.style.clone());
209
- redis_cache.cache_results(serde_json::to_string(&results)?, &url)?;
 
 
 
 
210
  Ok(results)
211
  }
212
  }
 
16
  use serde::Deserialize;
17
  use tokio::join;
18
 
19
+ // ---- Constants ----
20
+ /// Initialize redis cache connection once and store it on the heap.
21
+ const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
22
+
23
  /// A named struct which deserializes all the user provided search parameters and stores them.
24
  ///
25
  /// # Fields
 
66
  /// * `engines` - It stores the user selected upstream search engines selected from the UI.
67
  #[allow(dead_code)]
68
  #[derive(Deserialize)]
69
+ struct Cookie<'a> {
70
+ theme: &'a str,
71
+ colorscheme: &'a str,
72
+ engines: Vec<&'a str>,
73
  }
74
 
75
  /// Handles the route of search page of the `websurfx` meta search engine website and it takes
 
115
  page - 1
116
  ),
117
  &config,
118
+ query,
119
  page - 1,
120
+ &req,
121
  ),
122
  results(
123
  format!(
 
125
  config.binding_ip, config.port, query, page
126
  ),
127
  &config,
128
+ query,
129
  page,
130
+ &req,
131
  ),
132
  results(
133
  format!(
 
138
  page + 1
139
  ),
140
  &config,
141
+ query,
142
  page + 1,
143
+ &req,
144
  )
145
  );
146
 
 
158
  async fn results(
159
  url: String,
160
  config: &Config,
161
+ query: &str,
162
  page: u32,
163
+ req: &HttpRequest,
164
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
165
+ let redis_cache: RedisCache = REDIS_CACHE
166
+ .get_or_init(async {
167
+ // Initialize redis cache connection pool only one and store it in the heap.
168
+ RedisCache::new(&config.redis_url, 5).await.unwrap()
169
+ })
170
+ .await
171
+ .clone();
172
+
173
  // fetch the cached results json.
174
+ let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
175
+ redis_cache.clone().cached_json(&url).await;
176
  // check if fetched cache results was indeed fetched or it was an error and if so
177
  // handle the data accordingly.
178
  match cached_results_json {
179
+ Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
180
  Err(_) => {
181
  // check if the cookie value is empty or not if it is empty then use the
182
  // default selected upstream search engines from the config file otherwise
183
  // parse the non-empty cookie and grab the user selected engines from the
184
  // UI and use that.
185
+ let mut results: SearchResults = match req.cookie("appCookie") {
 
 
186
  Some(cookie_value) => {
187
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
188
 
189
+ let engines: Vec<EngineHandler> = cookie_value
190
  .engines
191
  .iter()
192
  .filter_map(|name| EngineHandler::new(name))
 
197
  page,
198
  config.aggregator.random_delay,
199
  config.debug,
200
+ &engines,
201
  config.request_timeout,
202
  )
203
  .await?
 
208
  page,
209
  config.aggregator.random_delay,
210
  config.debug,
211
+ &config.upstream_search_engines,
212
  config.request_timeout,
213
  )
214
  .await?
215
  }
216
  };
217
+
218
+ results.add_style(&config.style);
219
+ redis_cache
220
+ .clone()
221
+ .cache_results(&serde_json::to_string(&results)?, &url)
222
+ .await?;
223
  Ok(results)
224
  }
225
  }