Hansimov commited on
Commit
138c09e
1 Parent(s): 8bf48d8

:gem: [Feature] Enable SearchAPIApp: /queries_to_search_results

Browse files
Files changed (2) hide show
  1. apis/__init__.py +0 -0
  2. apis/search_api.py +112 -0
apis/__init__.py ADDED
File without changes
apis/search_api.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import sys
4
+ import uvicorn
5
+
6
+ from fastapi import FastAPI, Depends
7
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
8
+ from pydantic import BaseModel, Field
9
+ from typing import Union
10
+ from sse_starlette.sse import EventSourceResponse, ServerSentEvent
11
+ from utils.logger import logger
12
+ from networks.google_searcher import GoogleSearcher
13
+ from networks.html_fetcher import HTMLFetcher
14
+ from documents.query_results_extractor import QueryResultsExtractor
15
+ from utils.logger import logger
16
+
17
+
18
+ class SearchAPIApp:
19
+ def __init__(self):
20
+ self.app = FastAPI(
21
+ docs_url="/",
22
+ title="Web Search API",
23
+ swagger_ui_parameters={"defaultModelsExpandDepth": -1},
24
+ version="1.0",
25
+ )
26
+ self.setup_routes()
27
+
28
+ class QueriesToSearchResultsPostItem(BaseModel):
29
+ queries: list = Field(
30
+ default=[""],
31
+ description="(list[str]) Queries to search",
32
+ )
33
+ result_num: int = Field(
34
+ default=10,
35
+ description="(int) Number of search results",
36
+ )
37
+ safe: bool = Field(
38
+ default=False,
39
+ description="(bool) Enable SafeSearch",
40
+ )
41
+ types: list = Field(
42
+ default=["web"],
43
+ description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`",
44
+ )
45
+
46
+ def queries_to_search_results(self, item: QueriesToSearchResultsPostItem):
47
+ google_searcher = GoogleSearcher()
48
+ query_results_extractor = QueryResultsExtractor()
49
+ queries_search_results = []
50
+ for query in item.queries:
51
+ query_html_path = google_searcher.search(
52
+ query=query, result_num=item.result_num, safe=item.safe
53
+ )
54
+ query_search_results = query_results_extractor.extract(query_html_path)
55
+ queries_search_results.append(query_search_results)
56
+ logger.note(queries_search_results)
57
+
58
+ # html_fetcher = HTMLFetcher()
59
+ # for query_search_result in queries_search_results:
60
+ # for query_result in query_search_result["query_results"]:
61
+ # html_path = html_fetcher.fetch(query_result["url"])
62
+ # query_result["html_path"] = str(html_path)
63
+ return queries_search_results
64
+
65
+ def setup_routes(self):
66
+ self.app.post(
67
+ "/queries_to_search_results",
68
+ summary="Search queries, and extract contents from results",
69
+ )(self.queries_to_search_results)
70
+
71
+
72
+ class ArgParser(argparse.ArgumentParser):
73
+ def __init__(self, *args, **kwargs):
74
+ super(ArgParser, self).__init__(*args, **kwargs)
75
+
76
+ self.add_argument(
77
+ "-s",
78
+ "--server",
79
+ type=str,
80
+ default="0.0.0.0",
81
+ help="Server IP for Web Search API",
82
+ )
83
+ self.add_argument(
84
+ "-p",
85
+ "--port",
86
+ type=int,
87
+ default=21111,
88
+ help="Server Port for Web Search API",
89
+ )
90
+
91
+ self.add_argument(
92
+ "-d",
93
+ "--dev",
94
+ default=False,
95
+ action="store_true",
96
+ help="Run in dev mode",
97
+ )
98
+
99
+ self.args = self.parse_args(sys.argv[1:])
100
+
101
+
102
+ app = SearchAPIApp().app
103
+
104
+ if __name__ == "__main__":
105
+ args = ArgParser().args
106
+ if args.dev:
107
+ uvicorn.run("__main__:app", host=args.server, port=args.port, reload=True)
108
+ else:
109
+ uvicorn.run("__main__:app", host=args.server, port=args.port, reload=False)
110
+
111
+ # python -m apis.search_api # [Docker] in product mode
112
+ # python -m apis.search_api -d # [Dev] in develop mode