Hansimov commited on
Commit
c7c538d
1 Parent(s): 3dda344

:zap: [Enhance] WebpageContentExtractor: Escape dash, and ignore

Browse files
Files changed (1) hide show
  1. networks/network_configs.py +3 -2
networks/network_configs.py CHANGED
@@ -10,13 +10,14 @@ IGNORE_CLASSES = [
10
  "navbar",
11
  # 163.com
12
  "post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
13
- "ntes-.*nav",
14
- "nav-bottom",
15
  ]
16
 
17
  IGNORE_HOSTS = [
18
  "weibo.com",
19
  "hymson.com",
 
20
  ]
21
 
22
  REQUESTS_HEADERS = {
 
10
  "navbar",
11
  # 163.com
12
  "post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
13
+ "ntes\-.*nav",
14
+ "nav\-bottom",
15
  ]
16
 
17
  IGNORE_HOSTS = [
18
  "weibo.com",
19
  "hymson.com",
20
+ "yahoo.com",
21
  ]
22
 
23
  REQUESTS_HEADERS = {