alamin655 commited on
Commit
17fc24a
2 Parent(s): 2b85095 b1df4f1

Merge branch 'neon-mmd:rolling' into hf-rolling

Browse files
Cargo.lock CHANGED
@@ -136,6 +136,7 @@ checksum = "28f32d40287d3f402ae0028a9d54bef51af15c8769492826a69d28f81893151d"
136
  dependencies = [
137
  "futures-core",
138
  "tokio 1.36.0",
 
139
  ]
140
 
141
  [[package]]
@@ -150,8 +151,9 @@ dependencies = [
150
  "futures-core",
151
  "futures-util",
152
  "mio 0.8.11",
153
- "socket2",
154
  "tokio 1.36.0",
 
155
  "tracing",
156
  ]
157
 
@@ -211,7 +213,7 @@ dependencies = [
211
  "serde_json",
212
  "serde_urlencoded 0.7.1",
213
  "smallvec 1.13.1",
214
- "socket2",
215
  "time 0.3.34",
216
  "url 2.5.0",
217
  ]
@@ -243,6 +245,12 @@ version = "1.0.2"
243
  source = "registry+https://github.com/rust-lang/crates.io-index"
244
  checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
245
 
 
 
 
 
 
 
246
  [[package]]
247
  name = "aead"
248
  version = "0.5.2"
@@ -1667,7 +1675,7 @@ dependencies = [
1667
  "httpdate",
1668
  "itoa 1.0.10",
1669
  "pin-project-lite",
1670
- "socket2",
1671
  "tokio 1.36.0",
1672
  "tower-service",
1673
  "tracing",
@@ -1762,6 +1770,16 @@ dependencies = [
1762
  "generic-array",
1763
  ]
1764
 
 
 
 
 
 
 
 
 
 
 
1765
  [[package]]
1766
  name = "iovec"
1767
  version = "0.1.4"
@@ -1828,6 +1846,16 @@ dependencies = [
1828
  "winapi-build",
1829
  ]
1830
 
 
 
 
 
 
 
 
 
 
 
1831
  [[package]]
1832
  name = "language-tags"
1833
  version = "0.3.2"
@@ -1846,6 +1874,26 @@ version = "0.2.153"
1846
  source = "registry+https://github.com/rust-lang/crates.io-index"
1847
  checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
1848
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1849
  [[package]]
1850
  name = "libmimalloc-sys"
1851
  version = "0.1.35"
@@ -3017,6 +3065,12 @@ dependencies = [
3017
  "windows-sys 0.52.0",
3018
  ]
3019
 
 
 
 
 
 
 
3020
  [[package]]
3021
  name = "rustc-demangle"
3022
  version = "0.1.23"
@@ -3127,6 +3181,12 @@ dependencies = [
3127
  "windows-sys 0.52.0",
3128
  ]
3129
 
 
 
 
 
 
 
3130
  [[package]]
3131
  name = "scopeguard"
3132
  version = "1.2.0"
@@ -3372,6 +3432,16 @@ dependencies = [
3372
  "serde",
3373
  ]
3374
 
 
 
 
 
 
 
 
 
 
 
3375
  [[package]]
3376
  name = "socket2"
3377
  version = "0.5.6"
@@ -3403,6 +3473,15 @@ version = "1.2.0"
3403
  source = "registry+https://github.com/rust-lang/crates.io-index"
3404
  checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
3405
 
 
 
 
 
 
 
 
 
 
3406
  [[package]]
3407
  name = "string"
3408
  version = "0.2.1"
@@ -3579,6 +3658,25 @@ dependencies = [
3579
  "utf-8",
3580
  ]
3581
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3582
  [[package]]
3583
  name = "thousands"
3584
  version = "0.2.0"
@@ -3685,7 +3783,7 @@ dependencies = [
3685
  "parking_lot 0.12.1",
3686
  "pin-project-lite",
3687
  "signal-hook-registry",
3688
- "socket2",
3689
  "tokio-macros",
3690
  "windows-sys 0.48.0",
3691
  ]
@@ -3836,6 +3934,20 @@ dependencies = [
3836
  "tokio-executor",
3837
  ]
3838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3839
  [[package]]
3840
  name = "tokio-util"
3841
  version = "0.7.10"
@@ -3942,6 +4054,12 @@ dependencies = [
3942
  "tinyvec",
3943
  ]
3944
 
 
 
 
 
 
 
3945
  [[package]]
3946
  name = "unicode-width"
3947
  version = "0.1.11"
@@ -4157,7 +4275,7 @@ checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1"
4157
 
4158
  [[package]]
4159
  name = "websurfx"
4160
- version = "1.10.9"
4161
  dependencies = [
4162
  "actix-cors",
4163
  "actix-files",
@@ -4177,6 +4295,7 @@ dependencies = [
4177
  "error-stack",
4178
  "fake-useragent",
4179
  "futures 0.3.30",
 
4180
  "lightningcss",
4181
  "log",
4182
  "maud",
@@ -4192,7 +4311,9 @@ dependencies = [
4192
  "serde",
4193
  "serde_json",
4194
  "smallvec 1.13.1",
 
4195
  "tempfile",
 
4196
  "tokio 1.36.0",
4197
  ]
4198
 
 
136
  dependencies = [
137
  "futures-core",
138
  "tokio 1.36.0",
139
+ "tokio-uring",
140
  ]
141
 
142
  [[package]]
 
151
  "futures-core",
152
  "futures-util",
153
  "mio 0.8.11",
154
+ "socket2 0.5.6",
155
  "tokio 1.36.0",
156
+ "tokio-uring",
157
  "tracing",
158
  ]
159
 
 
213
  "serde_json",
214
  "serde_urlencoded 0.7.1",
215
  "smallvec 1.13.1",
216
+ "socket2 0.5.6",
217
  "time 0.3.34",
218
  "url 2.5.0",
219
  ]
 
245
  source = "registry+https://github.com/rust-lang/crates.io-index"
246
  checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
247
 
248
+ [[package]]
249
+ name = "adler32"
250
+ version = "1.2.0"
251
+ source = "registry+https://github.com/rust-lang/crates.io-index"
252
+ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
253
+
254
  [[package]]
255
  name = "aead"
256
  version = "0.5.2"
 
1675
  "httpdate",
1676
  "itoa 1.0.10",
1677
  "pin-project-lite",
1678
+ "socket2 0.5.6",
1679
  "tokio 1.36.0",
1680
  "tower-service",
1681
  "tracing",
 
1770
  "generic-array",
1771
  ]
1772
 
1773
+ [[package]]
1774
+ name = "io-uring"
1775
+ version = "0.5.13"
1776
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1777
+ checksum = "dd1e1a01cfb924fd8c5c43b6827965db394f5a3a16c599ce03452266e1cf984c"
1778
+ dependencies = [
1779
+ "bitflags 1.3.2",
1780
+ "libc",
1781
+ ]
1782
+
1783
  [[package]]
1784
  name = "iovec"
1785
  version = "0.1.4"
 
1846
  "winapi-build",
1847
  ]
1848
 
1849
+ [[package]]
1850
+ name = "keyword_extraction"
1851
+ version = "1.3.0"
1852
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1853
+ checksum = "c25710ba2c50e4762b267b7387a989d8d1a8235f5cf26cd84e34aac30b263140"
1854
+ dependencies = [
1855
+ "regex",
1856
+ "unicode-segmentation",
1857
+ ]
1858
+
1859
  [[package]]
1860
  name = "language-tags"
1861
  version = "0.3.2"
 
1874
  source = "registry+https://github.com/rust-lang/crates.io-index"
1875
  checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
1876
 
1877
+ [[package]]
1878
+ name = "libflate"
1879
+ version = "1.4.0"
1880
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1881
+ checksum = "5ff4ae71b685bbad2f2f391fe74f6b7659a34871c08b210fdc039e43bee07d18"
1882
+ dependencies = [
1883
+ "adler32",
1884
+ "crc32fast",
1885
+ "libflate_lz77",
1886
+ ]
1887
+
1888
+ [[package]]
1889
+ name = "libflate_lz77"
1890
+ version = "1.2.0"
1891
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1892
+ checksum = "a52d3a8bfc85f250440e4424db7d857e241a3aebbbe301f3eb606ab15c39acbf"
1893
+ dependencies = [
1894
+ "rle-decode-fast",
1895
+ ]
1896
+
1897
  [[package]]
1898
  name = "libmimalloc-sys"
1899
  version = "0.1.35"
 
3065
  "windows-sys 0.52.0",
3066
  ]
3067
 
3068
+ [[package]]
3069
+ name = "rle-decode-fast"
3070
+ version = "1.0.3"
3071
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3072
+ checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
3073
+
3074
  [[package]]
3075
  name = "rustc-demangle"
3076
  version = "0.1.23"
 
3181
  "windows-sys 0.52.0",
3182
  ]
3183
 
3184
+ [[package]]
3185
+ name = "scoped-tls"
3186
+ version = "1.0.1"
3187
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3188
+ checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
3189
+
3190
  [[package]]
3191
  name = "scopeguard"
3192
  version = "1.2.0"
 
3432
  "serde",
3433
  ]
3434
 
3435
+ [[package]]
3436
+ name = "socket2"
3437
+ version = "0.4.10"
3438
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3439
+ checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
3440
+ dependencies = [
3441
+ "libc",
3442
+ "winapi 0.3.9",
3443
+ ]
3444
+
3445
  [[package]]
3446
  name = "socket2"
3447
  version = "0.5.6"
 
3473
  source = "registry+https://github.com/rust-lang/crates.io-index"
3474
  checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
3475
 
3476
+ [[package]]
3477
+ name = "stop-words"
3478
+ version = "0.8.0"
3479
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3480
+ checksum = "8500024d809de02ecbf998472b7bed3c4fca380df2be68917f6a473bdb28ddcc"
3481
+ dependencies = [
3482
+ "serde_json",
3483
+ ]
3484
+
3485
  [[package]]
3486
  name = "string"
3487
  version = "0.2.1"
 
3658
  "utf-8",
3659
  ]
3660
 
3661
+ [[package]]
3662
+ name = "thesaurus"
3663
+ version = "0.5.2"
3664
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3665
+ checksum = "3e33ea271e53da683cd3439c04ff3b734f3d6052ea33a65ec9e0fa89a4f96369"
3666
+ dependencies = [
3667
+ "lazy_static",
3668
+ "thesaurus-moby",
3669
+ ]
3670
+
3671
+ [[package]]
3672
+ name = "thesaurus-moby"
3673
+ version = "0.2.0"
3674
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3675
+ checksum = "28f7806d5dbe7d9b627e332f88269a014a6a1d40ec411d4ea66cb702aabce4cf"
3676
+ dependencies = [
3677
+ "libflate",
3678
+ ]
3679
+
3680
  [[package]]
3681
  name = "thousands"
3682
  version = "0.2.0"
 
3783
  "parking_lot 0.12.1",
3784
  "pin-project-lite",
3785
  "signal-hook-registry",
3786
+ "socket2 0.5.6",
3787
  "tokio-macros",
3788
  "windows-sys 0.48.0",
3789
  ]
 
3934
  "tokio-executor",
3935
  ]
3936
 
3937
+ [[package]]
3938
+ name = "tokio-uring"
3939
+ version = "0.4.0"
3940
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3941
+ checksum = "0d5e02bb137e030b3a547c65a3bd2f1836d66a97369fdcc69034002b10e155ef"
3942
+ dependencies = [
3943
+ "io-uring",
3944
+ "libc",
3945
+ "scoped-tls",
3946
+ "slab",
3947
+ "socket2 0.4.10",
3948
+ "tokio 1.36.0",
3949
+ ]
3950
+
3951
  [[package]]
3952
  name = "tokio-util"
3953
  version = "0.7.10"
 
4054
  "tinyvec",
4055
  ]
4056
 
4057
+ [[package]]
4058
+ name = "unicode-segmentation"
4059
+ version = "1.11.0"
4060
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4061
+ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
4062
+
4063
  [[package]]
4064
  name = "unicode-width"
4065
  version = "0.1.11"
 
4275
 
4276
  [[package]]
4277
  name = "websurfx"
4278
+ version = "1.12.1"
4279
  dependencies = [
4280
  "actix-cors",
4281
  "actix-files",
 
4295
  "error-stack",
4296
  "fake-useragent",
4297
  "futures 0.3.30",
4298
+ "keyword_extraction",
4299
  "lightningcss",
4300
  "log",
4301
  "maud",
 
4311
  "serde",
4312
  "serde_json",
4313
  "smallvec 1.13.1",
4314
+ "stop-words",
4315
  "tempfile",
4316
+ "thesaurus",
4317
  "tokio 1.36.0",
4318
  ]
4319
 
Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
  [package]
2
  name = "websurfx"
3
- version = "1.10.9"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
@@ -13,45 +13,93 @@ bench = false
13
  path = "src/bin/websurfx.rs"
14
 
15
  [dependencies]
16
- reqwest = {version="0.11.24", default-features=false, features=["rustls-tls","brotli", "gzip"]}
17
- tokio = {version="1.32.0",features=["rt-multi-thread","macros", "fs", "io-util"], default-features = false}
18
- serde = {version="1.0.196", default-features=false, features=["derive"]}
19
- serde_json = {version="1.0.109", default-features=false}
20
- maud = {version="0.25.0", default-features=false, features=["actix-web"]}
21
- scraper = {version="0.18.1", default-features = false}
22
- actix-web = {version="4.4.0", features = ["cookies", "macros", "compress-brotli"], default-features=false}
23
- actix-files = {version="0.6.5", default-features=false}
24
- actix-cors = {version="0.7.0", default-features=false}
25
- fake-useragent = {version="0.1.3", default-features=false}
26
- env_logger = {version="0.11.1", default-features=false}
27
- log = {version="0.4.21", default-features=false}
28
- mlua = {version="0.9.1", features=["luajit", "vendored"], default-features=false}
29
- redis = {version="0.24.0", features=["tokio-comp","connection-manager"], default-features = false, optional = true}
30
- blake3 = {version="1.5.0", default-features=false}
31
- error-stack = {version="0.4.0", default-features=false, features=["std"]}
32
- async-trait = {version="0.1.76", default-features=false}
33
- regex = {version="1.9.4", features=["perf"], default-features = false}
34
- smallvec = {version="1.13.1", features=["union", "serde"], default-features=false}
35
- futures = {version="0.3.30", default-features=false, features=["alloc"]}
36
- dhat = {version="0.3.2", optional = true, default-features=false}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  mimalloc = { version = "0.1.38", default-features = false }
38
- async-once-cell = {version="0.5.3", default-features=false}
39
- actix-governor = {version="0.5.0", default-features=false}
40
- mini-moka = { version="0.10", optional = true, default-features=false, features=["sync"]}
41
- async-compression = { version = "0.4.6", default-features = false, features=["brotli","tokio"], optional=true}
42
- chacha20poly1305={version="0.10.1", default-features=false, features=["alloc","getrandom"], optional=true}
43
- chacha20 = {version="0.9.1", default-features=false, optional=true}
44
- base64 = {version="0.21.5", default-features=false, features=["std"], optional=true}
45
- cfg-if = {version="1.0.0", default-features=false,optional=true}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  [dev-dependencies]
48
- rusty-hook = {version="^0.11.2", default-features=false}
49
- criterion = {version="0.5.1", default-features=false}
50
- tempfile = {version="3.10.1", default-features=false}
51
 
52
  [build-dependencies]
53
- lightningcss = {version="1.0.0-alpha.52", default-features=false, features=["grid"]}
54
- minify-js = {version="0.6.0", default-features=false}
 
 
55
 
56
  [profile.dev]
57
  opt-level = 0
@@ -72,19 +120,21 @@ debug = false # This should only be commented when testing with dhat profiler
72
  split-debuginfo = '...'
73
  debug-assertions = false
74
  overflow-checks = false
75
- lto = true
76
  panic = 'abort'
77
  incremental = false
78
  codegen-units = 1
79
  rpath = false
80
- strip = "debuginfo"
81
 
82
  [features]
 
83
  default = ["memory-cache"]
84
- dhat-heap = ["dep:dhat"]
85
  memory-cache = ["dep:mini-moka"]
86
- redis-cache = ["dep:redis","dep:base64"]
87
- compress-cache-results = ["dep:async-compression","dep:cfg-if"]
88
- encrypt-cache-results = ["dep:chacha20poly1305","dep:chacha20"]
89
- cec-cache-results = ["compress-cache-results","encrypt-cache-results"]
90
-
 
 
1
  [package]
2
  name = "websurfx"
3
+ version = "1.12.1"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
 
13
  path = "src/bin/websurfx.rs"
14
 
15
  [dependencies]
16
+ reqwest = { version = "0.11.24", default-features = false, features = [
17
+ "rustls-tls",
18
+ "brotli",
19
+ "gzip",
20
+ ] }
21
+ tokio = { version = "1.32.0", features = [
22
+ "rt-multi-thread",
23
+ "macros",
24
+ "fs",
25
+ "io-util",
26
+ ], default-features = false }
27
+ serde = { version = "1.0.196", default-features = false, features = ["derive"] }
28
+ serde_json = { version = "1.0.109", default-features = false }
29
+ maud = { version = "0.25.0", default-features = false, features = [
30
+ "actix-web",
31
+ ] }
32
+ scraper = { version = "0.18.1", default-features = false }
33
+ actix-web = { version = "4.4.0", features = [
34
+ "cookies",
35
+ "macros",
36
+ "compress-brotli",
37
+ ], default-features = false }
38
+ actix-files = { version = "0.6.5", default-features = false }
39
+ actix-cors = { version = "0.7.0", default-features = false }
40
+ fake-useragent = { version = "0.1.3", default-features = false }
41
+ env_logger = { version = "0.11.1", default-features = false }
42
+ log = { version = "0.4.21", default-features = false }
43
+ mlua = { version = "0.9.1", features = [
44
+ "luajit",
45
+ "vendored",
46
+ ], default-features = false }
47
+ redis = { version = "0.24.0", features = [
48
+ "tokio-comp",
49
+ "connection-manager",
50
+ ], default-features = false, optional = true }
51
+ blake3 = { version = "1.5.0", default-features = false }
52
+ error-stack = { version = "0.4.0", default-features = false, features = [
53
+ "std",
54
+ ] }
55
+ async-trait = { version = "0.1.76", default-features = false }
56
+ regex = { version = "1.9.4", features = ["perf"], default-features = false }
57
+ smallvec = { version = "1.13.1", features = [
58
+ "union",
59
+ "serde",
60
+ ], default-features = false }
61
+ futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
62
+ dhat = { version = "0.3.2", optional = true, default-features = false }
63
  mimalloc = { version = "0.1.38", default-features = false }
64
+ async-once-cell = { version = "0.5.3", default-features = false }
65
+ actix-governor = { version = "0.5.0", default-features = false }
66
+ mini-moka = { version = "0.10", optional = true, default-features = false, features = [
67
+ "sync",
68
+ ] }
69
+ async-compression = { version = "0.4.6", default-features = false, features = [
70
+ "brotli",
71
+ "tokio",
72
+ ], optional = true }
73
+ chacha20poly1305 = { version = "0.10.1", default-features = false, features = [
74
+ "alloc",
75
+ "getrandom",
76
+ ], optional = true }
77
+ chacha20 = { version = "0.9.1", default-features = false, optional = true }
78
+ base64 = { version = "0.21.5", default-features = false, features = [
79
+ "std",
80
+ ], optional = true }
81
+ cfg-if = { version = "1.0.0", default-features = false, optional = true }
82
+ keyword_extraction = { version = "1.3.0", default-features = false, features = [
83
+ "tf_idf",
84
+
85
+
86
+ ] }
87
+
88
+ stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
89
+ thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
90
+ "moby",
91
+ ] }
92
 
93
  [dev-dependencies]
94
+ rusty-hook = { version = "^0.11.2", default-features = false }
95
+ criterion = { version = "0.5.1", default-features = false }
96
+ tempfile = { version = "3.10.1", default-features = false }
97
 
98
  [build-dependencies]
99
+ lightningcss = { version = "1.0.0-alpha.52", default-features = false, features = [
100
+ "grid",
101
+ ] }
102
+ minify-js = { version = "0.6.0", default-features = false }
103
 
104
  [profile.dev]
105
  opt-level = 0
 
120
  split-debuginfo = '...'
121
  debug-assertions = false
122
  overflow-checks = false
123
+ lto = 'thin'
124
  panic = 'abort'
125
  incremental = false
126
  codegen-units = 1
127
  rpath = false
128
+ strip = "symbols"
129
 
130
  [features]
131
+ use-synonyms-search = ["thesaurus/static"]
132
  default = ["memory-cache"]
133
+ dhat-heap = ["dep:dhat"]
134
  memory-cache = ["dep:mini-moka"]
135
+ redis-cache = ["dep:redis", "dep:base64"]
136
+ compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
137
+ encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
138
+ cec-cache-results = ["compress-cache-results", "encrypt-cache-results"]
139
+ experimental-io-uring = ["actix-web/experimental-io-uring"]
140
+ use-non-static-synonyms-search = ["thesaurus"]
public/static/colorschemes/rose-pine-dawn.css ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --background-color: #faf4ed;
3
+ --foreground-color: #575279;
4
+ --logo-color: #d7827e;
5
+ --color-one: #f2e9e1;
6
+ --color-two: #907aa9;
7
+ --color-three: #56949f;
8
+ --color-four: #ea9d34;
9
+ --color-five: #d7827e;
10
+ --color-six: #9893a5;
11
+ --color-seven: #575279;
12
+ }
public/static/colorschemes/rose-pine-moon.css ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --background-color: #232136;
3
+ --foreground-color: #e0def4;
4
+ --logo-color: #ea9a97;
5
+ --color-one: #393552;
6
+ --color-two: #c4a7e7;
7
+ --color-three: #9ccfd8;
8
+ --color-four: #f6c177;
9
+ --color-five: #ea9a97;
10
+ --color-six: #6e6a86;
11
+ --color-seven: #e0def4;
12
+ }
public/static/colorschemes/rose-pine.css ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --background-color: #191724;
3
+ --foreground-color: #e0def4;
4
+ --logo-color: #ebbcba;
5
+ --color-one: #26233a;
6
+ --color-two: #c4a7e7;
7
+ --color-three: #9ccfd8;
8
+ --color-four: #f6c177;
9
+ --color-five: #eb6f92;
10
+ --color-six: #6e6a86;
11
+ --color-seven: #e0def4;
12
+ }
public/static/themes/simple.css CHANGED
@@ -482,6 +482,7 @@ footer div {
482
 
483
  .about-container article .logo-container svg {
484
  width: clamp(200px, 530px, 815px);
 
485
  }
486
 
487
  .about-container article .text-block {
@@ -858,4 +859,4 @@ input:checked + .slider::before {
858
  .feature-card {
859
  border-radius: 0;
860
  }
861
- }
 
482
 
483
  .about-container article .logo-container svg {
484
  width: clamp(200px, 530px, 815px);
485
+ color: var(--logo-color);
486
  }
487
 
488
  .about-container article .text-block {
 
859
  .feature-card {
860
  border-radius: 0;
861
  }
862
+ }
src/config/parser.rs CHANGED
@@ -42,6 +42,10 @@ pub struct Config {
42
  /// It stores the level of safe search to be used for restricting content in the
43
  /// search results.
44
  pub safe_search: u8,
 
 
 
 
45
  }
46
 
47
  impl Config {
@@ -131,6 +135,8 @@ impl Config {
131
  upstream_search_engines: globals
132
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?,
133
  request_timeout: globals.get::<_, u8>("request_timeout")?,
 
 
134
  threads,
135
  rate_limiter: RateLimiter {
136
  number_of_requests: rate_limiter["number_of_requests"],
 
42
  /// It stores the level of safe search to be used for restricting content in the
43
  /// search results.
44
  pub safe_search: u8,
45
+ /// It stores the TCP connection keepalive duration in seconds.
46
+ pub tcp_connection_keepalive: u8,
47
+ /// It stores the pool idle connection timeout in seconds.
48
+ pub pool_idle_connection_timeout: u8,
49
  }
50
 
51
  impl Config {
 
135
  upstream_search_engines: globals
136
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?,
137
  request_timeout: globals.get::<_, u8>("request_timeout")?,
138
+ tcp_connection_keepalive: globals.get::<_, u8>("tcp_connection_keepalive")?,
139
+ pool_idle_connection_timeout: globals.get::<_, u8>("pool_idle_connection_timeout")?,
140
  threads,
141
  rate_limiter: RateLimiter {
142
  number_of_requests: rate_limiter["number_of_requests"],
src/models/aggregation_models.rs CHANGED
@@ -4,7 +4,11 @@
4
  use super::engine_models::EngineError;
5
  use serde::{Deserialize, Serialize};
6
  use smallvec::SmallVec;
7
-
 
 
 
 
8
  /// A named struct to store the raw scraped search results scraped search results from the
9
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
10
  /// to write idiomatic rust using `Iterators`.
@@ -20,6 +24,8 @@ pub struct SearchResult {
20
  pub description: String,
21
  /// The names of the upstream engines from which this results were provided.
22
  pub engine: SmallVec<[String; 0]>,
 
 
23
  }
24
 
25
  impl SearchResult {
@@ -37,9 +43,49 @@ impl SearchResult {
37
  title: title.to_owned(),
38
  url: url.to_owned(),
39
  description: description.to_owned(),
 
40
  engine: engine.iter().map(|name| name.to_string()).collect(),
41
  }
42
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  /// A function which adds the engine name provided as a string into a vector of strings.
45
  ///
@@ -182,3 +228,53 @@ impl SearchResults {
182
  self.no_engines_selected = true;
183
  }
184
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  use super::engine_models::EngineError;
5
  use serde::{Deserialize, Serialize};
6
  use smallvec::SmallVec;
7
+ #[cfg(any(
8
+ feature = "use-synonyms-search",
9
+ feature = "use-non-static-synonyms-search"
10
+ ))]
11
+ use thesaurus::synonyms;
12
  /// A named struct to store the raw scraped search results scraped search results from the
13
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
14
  /// to write idiomatic rust using `Iterators`.
 
24
  pub description: String,
25
  /// The names of the upstream engines from which this results were provided.
26
  pub engine: SmallVec<[String; 0]>,
27
+ /// The td-tdf score of the result in regards to the title, url and description and the user's query
28
+ pub relevance_score: f32,
29
  }
30
 
31
  impl SearchResult {
 
43
  title: title.to_owned(),
44
  url: url.to_owned(),
45
  description: description.to_owned(),
46
+ relevance_score: 0.0,
47
  engine: engine.iter().map(|name| name.to_string()).collect(),
48
  }
49
  }
50
+ /// calculates and update the relevance score of the current search.
51
+
52
+ /// # Arguments
53
+ ///
54
+ /// * query - the query string used to obtain the results
55
+ ///
56
+ ///
57
+
58
+ pub fn calculate_relevance(&mut self, query: &str) {
59
+ use stop_words::{get, LANGUAGE};
60
+ // when language settings can change to any of the ones supported on this crate: https://docs.rs/crate/stop-words/0.8.0
61
+ let documents = [
62
+ self.title.clone(),
63
+ self.url.clone(),
64
+ self.description.clone(),
65
+ ];
66
+
67
+ let stop_words = get(LANGUAGE::English);
68
+ let punctuation = [
69
+ ".".to_owned(),
70
+ ",".to_owned(),
71
+ ":".to_owned(),
72
+ ";".to_owned(),
73
+ "!".to_owned(),
74
+ "?".to_owned(),
75
+ "(".to_owned(),
76
+ ")".to_owned(),
77
+ "[".to_owned(),
78
+ "]".to_owned(),
79
+ "{".to_owned(),
80
+ "}".to_owned(),
81
+ "\"".to_owned(),
82
+ "'".to_owned(),
83
+ "<".to_owned(),
84
+ ">".to_owned(),
85
+ ];
86
+
87
+ self.relevance_score = calculate_tf_idf(query, &documents, &stop_words, &punctuation);
88
+ }
89
 
90
  /// A function which adds the engine name provided as a string into a vector of strings.
91
  ///
 
228
  self.no_engines_selected = true;
229
  }
230
  }
231
+
232
+ /// Helper function to calculate the tf-idf for the search query.
233
+ /// <br> The approach is as [`as`](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
234
+ /// <br> Find a sample article about TF-IDF [`here`](https://medium.com/analytics-vidhya/tf-idf-term-frequency-technique-easiest-explanation-for-text-classification-in-nlp-with-code-8ca3912e58c3)
235
+ /// ### Arguments
236
+ /// * `query` - a user's search query
237
+ /// * `documents` - a list of text used for comparision (url, title, description)
238
+ /// * `stop_words` - A list of language specific stop words.
239
+ /// * `punctuation` - list of punctuation symbols.
240
+ /// ### Returns
241
+ /// * `score` - The average tf-idf score of the word tokens (and synonyms) in the query
242
+ fn calculate_tf_idf(
243
+ query: &str,
244
+ documents: &[String],
245
+ stop_words: &[String],
246
+ punctuation: &[String],
247
+ ) -> f32 {
248
+ use keyword_extraction::{
249
+ tf_idf::{TfIdf, TfIdfParams},
250
+ tokenizer::Tokenizer,
251
+ };
252
+
253
+ let params = TfIdfParams::UnprocessedDocuments(documents, stop_words, Some(punctuation));
254
+ let tf_idf = TfIdf::new(params);
255
+ let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
256
+ let query_tokens = tokener.split_into_words();
257
+ let mut search_tokens = vec![];
258
+
259
+ for token in query_tokens {
260
+ #[cfg(any(
261
+ feature = "use-synonyms-search",
262
+ feature = "use-non-static-synonyms-search"
263
+ ))]
264
+ {
265
+ // find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
266
+ let synonyms = synonyms(&token);
267
+ search_tokens.extend(synonyms)
268
+ }
269
+ search_tokens.push(token);
270
+ }
271
+
272
+ let mut total_score = 0.0f32;
273
+ for token in search_tokens.iter() {
274
+ total_score += tf_idf.get_score(token);
275
+ }
276
+
277
+ let result = total_score / (search_tokens.len() as f32);
278
+
279
+ f32::from(!result.is_nan()) * result
280
+ }
src/results/aggregator.rs CHANGED
@@ -8,6 +8,7 @@ use crate::models::{
8
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
9
  engine_models::{EngineError, EngineHandler},
10
  };
 
11
  use error_stack::Report;
12
  use futures::stream::FuturesUnordered;
13
  use regex::Regex;
@@ -77,6 +78,11 @@ pub async fn aggregate(
77
  let client = CLIENT.get_or_init(|| {
78
  ClientBuilder::new()
79
  .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
 
 
 
 
 
80
  .https_only(true)
81
  .gzip(true)
82
  .brotli(true)
@@ -182,7 +188,17 @@ pub async fn aggregate(
182
  drop(blacklist_map);
183
  }
184
 
185
- let results: Vec<SearchResult> = result_map.iter().map(|(_, value)| value.clone()).collect();
 
 
 
 
 
 
 
 
 
 
186
 
187
  Ok(SearchResults::new(results, &engine_errors_info))
188
  }
@@ -231,7 +247,21 @@ pub async fn filter_with_lists(
231
 
232
  Ok(())
233
  }
234
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  #[cfg(test)]
236
  mod tests {
237
  use super::*;
@@ -250,6 +280,7 @@ mod tests {
250
  url: "https://www.example.com".to_owned(),
251
  description: "This domain is for use in illustrative examples in documents."
252
  .to_owned(),
 
253
  engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
254
  },
255
  ));
@@ -260,6 +291,7 @@ mod tests {
260
  url: "https://www.rust-lang.org/".to_owned(),
261
  description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
262
  engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
 
263
  },)
264
  );
265
 
@@ -300,6 +332,7 @@ mod tests {
300
  description: "This domain is for use in illustrative examples in documents."
301
  .to_owned(),
302
  engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
 
303
  },
304
  ));
305
  map_to_be_filtered.push((
@@ -309,6 +342,7 @@ mod tests {
309
  url: "https://www.rust-lang.org/".to_owned(),
310
  description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
311
  engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
 
312
  },
313
  ));
314
 
@@ -365,6 +399,7 @@ mod tests {
365
  description: "This domain is for use in illustrative examples in documents."
366
  .to_owned(),
367
  engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
 
368
  },
369
  ));
370
 
 
8
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
9
  engine_models::{EngineError, EngineHandler},
10
  };
11
+
12
  use error_stack::Report;
13
  use futures::stream::FuturesUnordered;
14
  use regex::Regex;
 
78
  let client = CLIENT.get_or_init(|| {
79
  ClientBuilder::new()
80
  .timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
81
+ .pool_idle_timeout(Duration::from_secs(
82
+ config.pool_idle_connection_timeout as u64,
83
+ ))
84
+ .tcp_keepalive(Duration::from_secs(config.tcp_connection_keepalive as u64))
85
+ .connect_timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
86
  .https_only(true)
87
  .gzip(true)
88
  .brotli(true)
 
188
  drop(blacklist_map);
189
  }
190
 
191
+ let mut results: Vec<SearchResult> = result_map
192
+ .iter()
193
+ .map(|(_, value)| {
194
+ let mut copy = value.clone();
195
+ if !copy.url.contains("temu.com") {
196
+ copy.calculate_relevance(query.as_str())
197
+ }
198
+ copy
199
+ })
200
+ .collect();
201
+ sort_search_results(&mut results);
202
 
203
  Ok(SearchResults::new(results, &engine_errors_info))
204
  }
 
247
 
248
  Ok(())
249
  }
250
+ /// Sorts SearchResults by relevance score.
251
+ /// <br> sort_unstable is used as its faster,stability is not an issue on our side.
252
+ /// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
253
+ /// # Arguments
254
+ /// * `results` - A mutable slice or Vec of SearchResults
255
+ ///
256
+ fn sort_search_results(results: &mut [SearchResult]) {
257
+ results.sort_unstable_by(|a, b| {
258
+ use std::cmp::Ordering;
259
+
260
+ b.relevance_score
261
+ .partial_cmp(&a.relevance_score)
262
+ .unwrap_or(Ordering::Less)
263
+ })
264
+ }
265
  #[cfg(test)]
266
  mod tests {
267
  use super::*;
 
280
  url: "https://www.example.com".to_owned(),
281
  description: "This domain is for use in illustrative examples in documents."
282
  .to_owned(),
283
+ relevance_score: 0.0,
284
  engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
285
  },
286
  ));
 
291
  url: "https://www.rust-lang.org/".to_owned(),
292
  description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
293
  engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
294
+ relevance_score:0.0
295
  },)
296
  );
297
 
 
332
  description: "This domain is for use in illustrative examples in documents."
333
  .to_owned(),
334
  engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
335
+ relevance_score: 0.0,
336
  },
337
  ));
338
  map_to_be_filtered.push((
 
342
  url: "https://www.rust-lang.org/".to_owned(),
343
  description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
344
  engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
345
+ relevance_score:0.0
346
  },
347
  ));
348
 
 
399
  description: "This domain is for use in illustrative examples in documents."
400
  .to_owned(),
401
  engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
402
+ relevance_score: 0.0,
403
  },
404
  ));
405
 
websurfx/config.lua CHANGED
@@ -10,6 +10,8 @@ production_use = false -- whether to use production mode or not (in other words
10
  -- if production_use is set to true
11
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
12
  request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
 
 
13
  rate_limiter = {
14
  number_of_requests = 50, -- The number of request that are allowed within a provided time limit.
15
  time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
 
10
  -- if production_use is set to true
11
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
12
  request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
13
+ tcp_connection_keepalive = 30 -- the amount of time the tcp connection should remain alive (or connected to the server). (value in seconds).
14
+ pool_idle_connection_timeout = 30 -- timeout for the idle connections in the reqwest HTTP connection pool (value in seconds).
15
  rate_limiter = {
16
  number_of_requests = 50, -- The number of request that are allowed within a provided time limit.
17
  time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.