Jorge Henao commited on
Commit
47753be
·
1 Parent(s): fcc4f87
__pycache__/config.cpython-38.pyc ADDED
Binary file (575 Bytes). View file
 
__pycache__/document_quieries.cpython-38.pyc ADDED
Binary file (3.2 kB). View file
 
app.py CHANGED
@@ -9,7 +9,6 @@ extractive_query = ExtractiveProposalQueries(es_host = Config.es_host, es_index
9
  reader_name_or_path = Config.reader_model_name_or_path,
10
  use_gpu = Config.use_gpu)
11
 
12
-
13
  def fake_search(question, retriever_top_k, reader_top_k):
14
  #p1_result = query.search_by_query(query = question, retriever_top_k = retriever_top_k, reader_top_k = reader_top_k, es_index = "informecomisionverdad")
15
  result = []
@@ -30,7 +29,6 @@ def search(question, retriever_top_k, reader_top_k):
30
 
31
  return result
32
 
33
-
34
  if __name__ == "__main__":
35
  # streamlit part starts here with title
36
  title = """
@@ -58,7 +56,7 @@ if __name__ == "__main__":
58
  # set start time
59
  stt = time.time()
60
  # retrieve top 5 documents
61
- results = search(query, retriever_top_k=5, reader_top_k=3)
62
  # set endtime
63
  ent = time.time()
64
  # measure resulting time
 
9
  reader_name_or_path = Config.reader_model_name_or_path,
10
  use_gpu = Config.use_gpu)
11
 
 
12
  def fake_search(question, retriever_top_k, reader_top_k):
13
  #p1_result = query.search_by_query(query = question, retriever_top_k = retriever_top_k, reader_top_k = reader_top_k, es_index = "informecomisionverdad")
14
  result = []
 
29
 
30
  return result
31
 
 
32
  if __name__ == "__main__":
33
  # streamlit part starts here with title
34
  title = """
 
56
  # set start time
57
  stt = time.time()
58
  # retrieve top 5 documents
59
+ results = fake_search(query, retriever_top_k=5, reader_top_k=3)
60
  # set endtime
61
  ent = time.time()
62
  # measure resulting time
document_quieries.py CHANGED
@@ -3,6 +3,9 @@ from haystack.nodes import BM25Retriever, FARMReader
3
  from haystack.document_stores import ElasticsearchDocumentStore
4
  from haystack.pipelines import ExtractiveQAPipeline
5
  import certifi
 
 
 
6
 
7
  ca_certs=certifi.where()
8
 
@@ -17,7 +20,8 @@ class ExtractiveProposalQueries(DocumentQueries):
17
  def __init__(self, es_host: str, es_index: str, es_user, es_password, reader_name_or_path: str, use_gpu = True) -> None:
18
  reader = FARMReader(model_name_or_path = reader_name_or_path, use_gpu = use_gpu, num_processes=1, context_window_size=200)
19
  self._initialize_pipeline(es_host, es_index, es_user, es_password, reader = reader)
20
-
 
21
  def _initialize_pipeline(self, es_host, es_index, es_user, es_password, reader = None):
22
  if reader is not None:
23
  self.reader = reader
@@ -29,8 +33,28 @@ class ExtractiveProposalQueries(DocumentQueries):
29
  self.pipe = ExtractiveQAPipeline(self.reader, self.retriever)
30
 
31
  def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
 
32
  if es_index is not None:
33
  self._initialize_pipeline(self.es_host, es_index, self.es_user, self.es_password)
34
  params = {"Retriever": {"top_k": retriever_top_k}, "Reader": {"top_k": reader_top_k}}
35
  prediction = self.pipe.run( query = query, params = params)
36
  return prediction["answers"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from haystack.document_stores import ElasticsearchDocumentStore
4
  from haystack.pipelines import ExtractiveQAPipeline
5
  import certifi
6
+ import datetime
7
+ import requests
8
+ from base64 import b64encode
9
 
10
  ca_certs=certifi.where()
11
 
 
20
  def __init__(self, es_host: str, es_index: str, es_user, es_password, reader_name_or_path: str, use_gpu = True) -> None:
21
  reader = FARMReader(model_name_or_path = reader_name_or_path, use_gpu = use_gpu, num_processes=1, context_window_size=200)
22
  self._initialize_pipeline(es_host, es_index, es_user, es_password, reader = reader)
23
+ self.log = Log(es_host= es_host, es_index="log", es_user = es_user, es_password= es_password)
24
+
25
  def _initialize_pipeline(self, es_host, es_index, es_user, es_password, reader = None):
26
  if reader is not None:
27
  self.reader = reader
 
33
  self.pipe = ExtractiveQAPipeline(self.reader, self.retriever)
34
 
35
  def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
36
+ self.log.write_log(query, "hfspace-informecomision")
37
  if es_index is not None:
38
  self._initialize_pipeline(self.es_host, es_index, self.es_user, self.es_password)
39
  params = {"Retriever": {"top_k": retriever_top_k}, "Reader": {"top_k": reader_top_k}}
40
  prediction = self.pipe.run( query = query, params = params)
41
  return prediction["answers"]
42
+
43
+ class Log():
44
+
45
+ def __init__(self, es_host: str, es_index: str, es_user, es_password) -> None:
46
+ self.elastic_endpoint = f"https://{es_host}:443/{es_index}/_doc"
47
+ self.credentials = b64encode(b"elastic:Xmir2X1f5twb8OJbpjbiIgXt").decode("ascii")
48
+ self.auth_header = { 'Authorization' : 'Basic %s' % self.credentials }
49
+
50
+ def write_log(self, message: str, source: str) -> None:
51
+ created_date = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
52
+ post_data = {
53
+ "message" : message,
54
+ "createdDate": {
55
+ "date" : created_date
56
+ },
57
+ "source": source
58
+ }
59
+ r = requests.post(self.elastic_endpoint, json = post_data, headers = self.auth_header)
60
+ print(r.text)