Spaces:
Runtime error
Runtime error
Jorge Henao
commited on
Commit
·
312fcc5
1
Parent(s):
3a0a9ab
openapi generative pipeline integration
Browse files- .vscode/launch.json +26 -9
- about.py +1 -1
- hallazgos.py +50 -8
- main_page.py +7 -2
- pinecode_quieries.py +28 -14
- reformas.py +61 -10
.vscode/launch.json
CHANGED
@@ -1,16 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
{
|
2 |
// Use IntelliSense to learn about possible attributes.
|
3 |
// Hover to view descriptions of existing attributes.
|
4 |
-
// For more information, visit:
|
5 |
"version": "0.2.0",
|
6 |
"configurations": [
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
15 |
]
|
16 |
}
|
|
|
1 |
+
// {
|
2 |
+
// // Use IntelliSense to learn about possible attributes.
|
3 |
+
// // Hover to view descriptions of existing attributes.
|
4 |
+
// // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
5 |
+
// "version": "0.2.0",
|
6 |
+
// "configurations": [
|
7 |
+
// {
|
8 |
+
// "name": "Python: Current File",
|
9 |
+
// "type": "python",
|
10 |
+
// "request": "launch",
|
11 |
+
// "program": "${file}",
|
12 |
+
// "console": "integratedTerminal",
|
13 |
+
// "justMyCode": true
|
14 |
+
// }
|
15 |
+
// ]
|
16 |
+
// }
|
17 |
{
|
18 |
// Use IntelliSense to learn about possible attributes.
|
19 |
// Hover to view descriptions of existing attributes.
|
20 |
+
// For more information, visit: Debugging in Visual Studio Code
|
21 |
"version": "0.2.0",
|
22 |
"configurations": [
|
23 |
+
{
|
24 |
+
"name": "debug streamlit",
|
25 |
+
"type": "python",
|
26 |
+
"request": "launch",
|
27 |
+
"program": "/Users/jorge.henao/oosource/ask2democracycol/venv/bin/streamlit",
|
28 |
+
"args": [
|
29 |
+
"run",
|
30 |
+
"main_page.py"]
|
31 |
+
}
|
32 |
]
|
33 |
}
|
about.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
def about_ask2democracy():
|
4 |
st.markdown("""
|
5 |
<h1 style='
|
6 |
text-align: center;
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
def about_ask2democracy(api_key):
|
4 |
st.markdown("""
|
5 |
<h1 style='
|
6 |
text-align: center;
|
hallazgos.py
CHANGED
@@ -10,14 +10,15 @@ import logging
|
|
10 |
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING)
|
11 |
logging.getLogger("haystack").setLevel(logging.INFO)
|
12 |
|
13 |
-
|
|
|
|
|
|
|
14 |
api_key = Config.es_password,
|
15 |
environment = Config.pinecone_environment,
|
16 |
embedding_dim = Config.embedding_dim,
|
17 |
reader_name_or_path = Config.reader_model_name_or_path,
|
18 |
-
use_gpu = Config.use_gpu)
|
19 |
-
|
20 |
-
def hallazgos_comision_verdad_2022():
|
21 |
title = """
|
22 |
<h1 style='
|
23 |
text-align: center;
|
@@ -55,8 +56,7 @@ def hallazgos_comision_verdad_2022():
|
|
55 |
|
56 |
def search(question, retriever_top_k, reader_top_k):
|
57 |
filters = {"source_title": "Hallazgos y recomendaciones - 28 de Junio 2022"}
|
58 |
-
|
59 |
-
query_result = extractive_query.search_by_query(query = question,
|
60 |
retriever_top_k = retriever_top_k,
|
61 |
reader_top_k = reader_top_k,
|
62 |
filters = filters)
|
@@ -80,7 +80,7 @@ def hallazgos_comision_verdad_2022():
|
|
80 |
elapsed_time = round(ent - stt, 2)
|
81 |
|
82 |
# show which query was entered, and what was searching time
|
83 |
-
st.write(f"**Resultados
|
84 |
# then we use loop to show results
|
85 |
for i, answer in enumerate(results):
|
86 |
# answer starts with header
|
@@ -92,9 +92,49 @@ def hallazgos_comision_verdad_2022():
|
|
92 |
|
93 |
st.markdown("---")
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
#results = search("que es el adres", retriever_top_k=5, reader_top_k=3)
|
96 |
|
97 |
-
|
|
|
98 |
No es un sistema basado en palabras claves, puedes escribir preguntas elaboradas.
|
99 |
Una serie de modelos de lenguaje transformers intervienen en cada consulta para ayudar a entenderlas.""",
|
100 |
unsafe_allow_html=True)
|
@@ -106,4 +146,6 @@ def hallazgos_comision_verdad_2022():
|
|
106 |
st.error("¡escribe una pregunta!")
|
107 |
else:
|
108 |
st.session_state["submit"] = True
|
|
|
|
|
109 |
search_and_show_results()
|
|
|
10 |
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING)
|
11 |
logging.getLogger("haystack").setLevel(logging.INFO)
|
12 |
|
13 |
+
|
14 |
+
def hallazgos_comision_verdad_2022(api_key):
|
15 |
+
|
16 |
+
queries = PinecodeProposalQueries (index_name= Config.index_name,
|
17 |
api_key = Config.es_password,
|
18 |
environment = Config.pinecone_environment,
|
19 |
embedding_dim = Config.embedding_dim,
|
20 |
reader_name_or_path = Config.reader_model_name_or_path,
|
21 |
+
use_gpu = Config.use_gpu, OPENAI_key= api_key)
|
|
|
|
|
22 |
title = """
|
23 |
<h1 style='
|
24 |
text-align: center;
|
|
|
56 |
|
57 |
def search(question, retriever_top_k, reader_top_k):
|
58 |
filters = {"source_title": "Hallazgos y recomendaciones - 28 de Junio 2022"}
|
59 |
+
query_result = queries.search_by_query(query = question,
|
|
|
60 |
retriever_top_k = retriever_top_k,
|
61 |
reader_top_k = reader_top_k,
|
62 |
filters = filters)
|
|
|
80 |
elapsed_time = round(ent - stt, 2)
|
81 |
|
82 |
# show which query was entered, and what was searching time
|
83 |
+
st.write(f"**Resultados encontrados de las fuentes** \"{query}\" ({elapsed_time} sec.):")
|
84 |
# then we use loop to show results
|
85 |
for i, answer in enumerate(results):
|
86 |
# answer starts with header
|
|
|
92 |
|
93 |
st.markdown("---")
|
94 |
|
95 |
+
def search_and_generate_answer(question, retriever_top_k, generator_top_k):
|
96 |
+
filters = {"source_title": "Hallazgos y recomendaciones - 28 de Junio 2022"}
|
97 |
+
query_result = queries.genenerate_answer_OpenAI(query = question,
|
98 |
+
retriever_top_k = retriever_top_k,
|
99 |
+
generator_top_k = generator_top_k,
|
100 |
+
filters = filters, OPENAI_key = st.session_state.get("OPENAI_API_KEY"))
|
101 |
+
|
102 |
+
result = []
|
103 |
+
for i in range(0, len(query_result)):
|
104 |
+
item = query_result[i]
|
105 |
+
source_title = item.meta['doc_metas'][0]['source_title']
|
106 |
+
source_url = item.meta['doc_metas'][0]['source_url']
|
107 |
+
chapter_titles = [source['title'] for source in item.meta['doc_metas']]
|
108 |
+
result.append([[i+1], item.answer.replace("\n",""),
|
109 |
+
source_title, source_url, str(chapter_titles)])
|
110 |
+
|
111 |
+
def search_and_show_generative_results():
|
112 |
+
# set start time
|
113 |
+
stt = time.time()
|
114 |
+
|
115 |
+
# retrieve top 5 documents
|
116 |
+
results = search_and_generate_answer(query, retriever_top_k=5, generator_top_k=1)
|
117 |
+
# set endtime
|
118 |
+
ent = time.time()
|
119 |
+
# measure resulting time
|
120 |
+
elapsed_time = round(ent - stt, 2)
|
121 |
+
|
122 |
+
# show which query was entered, and what was searching time
|
123 |
+
st.write(f"**Respuesta generada a partir de los resultados** \"{query}\" ({elapsed_time} sec.):")
|
124 |
+
# then we use loop to show results
|
125 |
+
if results != None:
|
126 |
+
for i, answer in enumerate(results):
|
127 |
+
# answer starts with header
|
128 |
+
st.subheader(f"{answer[1]}")
|
129 |
+
st.markdown(f"[**Lee más aquí**]({answer[3]})")
|
130 |
+
st.caption(f"Fuentes: {answer[2]} - {answer[4]}")
|
131 |
+
|
132 |
+
st.markdown("---")
|
133 |
+
|
134 |
#results = search("que es el adres", retriever_top_k=5, reader_top_k=3)
|
135 |
|
136 |
+
|
137 |
+
st.markdown(f"""<br><p>Cuanto más contexto le des a la pregunta mejores resultados se obtienen.
|
138 |
No es un sistema basado en palabras claves, puedes escribir preguntas elaboradas.
|
139 |
Una serie de modelos de lenguaje transformers intervienen en cada consulta para ayudar a entenderlas.""",
|
140 |
unsafe_allow_html=True)
|
|
|
146 |
st.error("¡escribe una pregunta!")
|
147 |
else:
|
148 |
st.session_state["submit"] = True
|
149 |
+
if api_key:
|
150 |
+
search_and_show_generative_results()
|
151 |
search_and_show_results()
|
main_page.py
CHANGED
@@ -6,7 +6,11 @@ from pinecode_quieries import PinecodeProposalQueries
|
|
6 |
from config import Config
|
7 |
|
8 |
# Define the sidebar
|
9 |
-
api_key = st.sidebar.text_input("OpenAI API Key", type="password"
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Define the navigation between pages
|
12 |
page_options = {
|
@@ -19,4 +23,5 @@ page_options = {
|
|
19 |
selected_page = st.sidebar.radio("Selecciona la página que deseas explorar:", list(page_options.keys()))
|
20 |
|
21 |
# Render the selected page
|
22 |
-
|
|
|
|
6 |
from config import Config
|
7 |
|
8 |
# Define the sidebar
|
9 |
+
api_key = st.sidebar.text_input("OpenAI API Key", type="password",
|
10 |
+
value=st.session_state.get("OPENAI_API_KEY", ""))
|
11 |
+
if api_key:
|
12 |
+
st.session_state["OPENAI_API_KEY"] = api_key
|
13 |
+
|
14 |
|
15 |
# Define the navigation between pages
|
16 |
page_options = {
|
|
|
23 |
selected_page = st.sidebar.radio("Selecciona la página que deseas explorar:", list(page_options.keys()))
|
24 |
|
25 |
# Render the selected page
|
26 |
+
#print("key: " + api_key)
|
27 |
+
page_options[selected_page](api_key)
|
pinecode_quieries.py
CHANGED
@@ -76,24 +76,24 @@ class DocumentQueries(ABC):
|
|
76 |
class PinecodeProposalQueries(DocumentQueries):
|
77 |
|
78 |
def __init__(self, index_name: str, api_key, reader_name_or_path: str, use_gpu = True,
|
79 |
-
embedding_dim = 384, environment = "us-east1-gcp") -> None:
|
80 |
|
81 |
reader = FARMReader(model_name_or_path = reader_name_or_path,
|
82 |
use_gpu = use_gpu, num_processes = 1,
|
83 |
context_window_size = 200)
|
84 |
|
85 |
self._initialize_pipeline(index_name, api_key, reader = reader, embedding_dim=
|
86 |
-
embedding_dim, environment = environment)
|
87 |
#self.log = Log(es_host= es_host, es_index="log", es_user = es_user, es_password= es_password)
|
88 |
|
89 |
def _initialize_pipeline(self, index_name, api_key, similarity = "cosine",
|
90 |
embedding_dim = 384, reader = None,
|
91 |
environment = "us-east1-gcp",
|
92 |
-
metadata_config = {"indexed": ["title", "source_title"]}
|
|
|
93 |
if reader is not None:
|
94 |
self.reader = reader
|
95 |
-
|
96 |
-
model="text-davinci-003", temperature=.5, max_tokens=60)
|
97 |
#pinecone.init(api_key=es_password, environment="us-east1-gcp")
|
98 |
|
99 |
self.document_store = PineconeDocumentStore(
|
@@ -113,9 +113,14 @@ class PinecodeProposalQueries(DocumentQueries):
|
|
113 |
|
114 |
self.extractive_pipe = ExtractiveQAPipeline (reader = self.reader,
|
115 |
retriever = self.retriever)
|
116 |
-
self.generative_OPENAI_pipe = GenerativeQAPipeline(generator = self.OPENAI_generator,
|
117 |
-
retriever = self.retriever)
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, index_name: str = None, filters = None):
|
120 |
#self.document_store.update_embeddings(self.retriever, update_existing_embeddings=False)
|
121 |
params = {"Retriever": {"top_k": retriever_top_k,
|
@@ -124,19 +129,28 @@ class PinecodeProposalQueries(DocumentQueries):
|
|
124 |
prediction = self.extractive_pipe.run( query = query, params = params, debug = True)
|
125 |
return prediction["answers"]
|
126 |
|
127 |
-
def genenerate_answer_OpenAI(self, query : str, retriever_top_k: int,
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
def genenerate_answer_HF(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None, filters = None) :
|
135 |
params = {"Retriever": {"top_k": retriever_top_k,
|
136 |
"filters": filters},
|
137 |
"Generator": {"top_k": reader_top_k}}
|
138 |
prediction = self.generative_HF_pipe.run( query = query, params = params)
|
139 |
-
return prediction
|
140 |
|
141 |
class Log():
|
142 |
|
|
|
76 |
class PinecodeProposalQueries(DocumentQueries):
|
77 |
|
78 |
def __init__(self, index_name: str, api_key, reader_name_or_path: str, use_gpu = True,
|
79 |
+
embedding_dim = 384, environment = "us-east1-gcp", OPENAI_key = None) -> None:
|
80 |
|
81 |
reader = FARMReader(model_name_or_path = reader_name_or_path,
|
82 |
use_gpu = use_gpu, num_processes = 1,
|
83 |
context_window_size = 200)
|
84 |
|
85 |
self._initialize_pipeline(index_name, api_key, reader = reader, embedding_dim=
|
86 |
+
embedding_dim, environment = environment, OPENAI_key= OPENAI_key)
|
87 |
#self.log = Log(es_host= es_host, es_index="log", es_user = es_user, es_password= es_password)
|
88 |
|
89 |
def _initialize_pipeline(self, index_name, api_key, similarity = "cosine",
|
90 |
embedding_dim = 384, reader = None,
|
91 |
environment = "us-east1-gcp",
|
92 |
+
metadata_config = {"indexed": ["title", "source_title"]},
|
93 |
+
OPENAI_key = None):
|
94 |
if reader is not None:
|
95 |
self.reader = reader
|
96 |
+
|
|
|
97 |
#pinecone.init(api_key=es_password, environment="us-east1-gcp")
|
98 |
|
99 |
self.document_store = PineconeDocumentStore(
|
|
|
113 |
|
114 |
self.extractive_pipe = ExtractiveQAPipeline (reader = self.reader,
|
115 |
retriever = self.retriever)
|
|
|
|
|
116 |
|
117 |
+
self.generative_OPENAI_pipe = None
|
118 |
+
if (OPENAI_key != None and OPENAI_key != ""):
|
119 |
+
OPENAI_generator = OpenAIAnswerGenerator(api_key = OPENAI_key,
|
120 |
+
model="text-davinci-003", temperature=.5, max_tokens=60)
|
121 |
+
self.generative_OPENAI_pipe = GenerativeQAPipeline(generator = OPENAI_generator,
|
122 |
+
retriever = self.retriever)
|
123 |
+
|
124 |
def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, index_name: str = None, filters = None):
|
125 |
#self.document_store.update_embeddings(self.retriever, update_existing_embeddings=False)
|
126 |
params = {"Retriever": {"top_k": retriever_top_k,
|
|
|
129 |
prediction = self.extractive_pipe.run( query = query, params = params, debug = True)
|
130 |
return prediction["answers"]
|
131 |
|
132 |
+
def genenerate_answer_OpenAI(self, query : str, retriever_top_k: int, generator_top_k: int, filters = None, OPENAI_key = None):
|
133 |
+
# if (OPENAI_key != None and OPENAI_key != ""):
|
134 |
+
# OPENAI_generator = OpenAIAnswerGenerator(api_key=OPENAI_key,
|
135 |
+
# model="text-davinci-003", temperature=.5, max_tokens=60)
|
136 |
+
# self.generative_OPENAI_pipe = GenerativeQAPipeline(generator = OPENAI_generator,
|
137 |
+
# retriever = self.retriever)
|
138 |
+
|
139 |
+
if (self.generative_OPENAI_pipe != None):
|
140 |
+
params = {"Retriever": {"top_k": retriever_top_k,
|
141 |
+
"filters": filters},
|
142 |
+
"Generator": {"top_k": generator_top_k}}
|
143 |
+
prediction = self.generative_OPENAI_pipe.run( query = query, params = params)
|
144 |
+
return prediction["answers"]
|
145 |
+
else:
|
146 |
+
return None
|
147 |
|
148 |
def genenerate_answer_HF(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None, filters = None) :
|
149 |
params = {"Retriever": {"top_k": retriever_top_k,
|
150 |
"filters": filters},
|
151 |
"Generator": {"top_k": reader_top_k}}
|
152 |
prediction = self.generative_HF_pipe.run( query = query, params = params)
|
153 |
+
return prediction["answers"]
|
154 |
|
155 |
class Log():
|
156 |
|
reformas.py
CHANGED
@@ -10,14 +10,16 @@ import logging
|
|
10 |
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING)
|
11 |
logging.getLogger("haystack").setLevel(logging.INFO)
|
12 |
|
13 |
-
|
|
|
|
|
14 |
api_key = Config.es_password,
|
15 |
environment = Config.pinecone_environment,
|
16 |
embedding_dim = Config.embedding_dim,
|
17 |
reader_name_or_path = Config.reader_model_name_or_path,
|
18 |
-
use_gpu = Config.use_gpu
|
|
|
19 |
|
20 |
-
def reforma_salud_febrero_2023():
|
21 |
title = """
|
22 |
<h1 style='
|
23 |
text-align: center;
|
@@ -74,7 +76,7 @@ def reforma_salud_febrero_2023():
|
|
74 |
def search(question, retriever_top_k, reader_top_k):
|
75 |
filters = {"source_title": "Reforma de la salud 13 Febrero 2023"}
|
76 |
|
77 |
-
query_result =
|
78 |
retriever_top_k = retriever_top_k,
|
79 |
reader_top_k = reader_top_k,
|
80 |
filters = filters)
|
@@ -86,7 +88,7 @@ def reforma_salud_febrero_2023():
|
|
86 |
int(item.meta['page']), item.meta['source_url']])
|
87 |
#result.append([[i+1], item.answer, item.context[:200], item.meta['title']])
|
88 |
return result
|
89 |
-
|
90 |
def search_and_show_results():
|
91 |
# set start time
|
92 |
stt = time.time()
|
@@ -98,7 +100,7 @@ def reforma_salud_febrero_2023():
|
|
98 |
elapsed_time = round(ent - stt, 2)
|
99 |
|
100 |
# show which query was entered, and what was searching time
|
101 |
-
st.write(f"**Resultados
|
102 |
# then we use loop to show results
|
103 |
for i, answer in enumerate(results):
|
104 |
# answer starts with header
|
@@ -106,12 +108,54 @@ def reforma_salud_febrero_2023():
|
|
106 |
# cropped answer
|
107 |
doc = answer[2][:250] + "..."
|
108 |
st.markdown(f"{doc}[**Lee más aquí**]({answer[6]})")
|
109 |
-
st.caption(f"Fuente: {answer[4]} -
|
110 |
|
111 |
-
st.markdown("---")
|
112 |
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
st.write(f"""<br><p>Cuanto más contexto le des a la pregunta mejores resultados se obtienen.
|
116 |
No es un sistema basado en palabras claves, puedes escribir preguntas elaboradas.
|
117 |
Una serie de modelos de lenguaje transformers intervienen en cada consulta para ayudar a entenderlas."""
|
@@ -124,4 +168,11 @@ def reforma_salud_febrero_2023():
|
|
124 |
st.error("¡escribe una pregunta!")
|
125 |
else:
|
126 |
st.session_state["submit"] = True
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING)
|
11 |
logging.getLogger("haystack").setLevel(logging.INFO)
|
12 |
|
13 |
+
def reforma_salud_febrero_2023(api_key):
|
14 |
+
|
15 |
+
queries = PinecodeProposalQueries (index_name= Config.index_name,
|
16 |
api_key = Config.es_password,
|
17 |
environment = Config.pinecone_environment,
|
18 |
embedding_dim = Config.embedding_dim,
|
19 |
reader_name_or_path = Config.reader_model_name_or_path,
|
20 |
+
use_gpu = Config.use_gpu,
|
21 |
+
OPENAI_key=api_key)
|
22 |
|
|
|
23 |
title = """
|
24 |
<h1 style='
|
25 |
text-align: center;
|
|
|
76 |
def search(question, retriever_top_k, reader_top_k):
|
77 |
filters = {"source_title": "Reforma de la salud 13 Febrero 2023"}
|
78 |
|
79 |
+
query_result = queries.search_by_query(query = question,
|
80 |
retriever_top_k = retriever_top_k,
|
81 |
reader_top_k = reader_top_k,
|
82 |
filters = filters)
|
|
|
88 |
int(item.meta['page']), item.meta['source_url']])
|
89 |
#result.append([[i+1], item.answer, item.context[:200], item.meta['title']])
|
90 |
return result
|
91 |
+
|
92 |
def search_and_show_results():
|
93 |
# set start time
|
94 |
stt = time.time()
|
|
|
100 |
elapsed_time = round(ent - stt, 2)
|
101 |
|
102 |
# show which query was entered, and what was searching time
|
103 |
+
st.write(f"**Resultados encontrados de las fuentes** \"{query}\" ({elapsed_time} sec.):")
|
104 |
# then we use loop to show results
|
105 |
for i, answer in enumerate(results):
|
106 |
# answer starts with header
|
|
|
108 |
# cropped answer
|
109 |
doc = answer[2][:250] + "..."
|
110 |
st.markdown(f"{doc}[**Lee más aquí**]({answer[6]})")
|
111 |
+
st.caption(f"Fuente: {answer[4]} - Artículo: {answer[3]} - Página: {answer[5]}")
|
112 |
|
113 |
+
#st.markdown("---")
|
114 |
|
115 |
+
def search_and_generate_answer(question, retriever_top_k, generator_top_k):
|
116 |
+
filters = {"source_title": "Reforma de la salud 13 Febrero 2023"}
|
117 |
+
|
118 |
+
query_result = queries.genenerate_answer_OpenAI(query = question,
|
119 |
+
retriever_top_k = retriever_top_k,
|
120 |
+
generator_top_k = generator_top_k,
|
121 |
+
filters = filters)
|
122 |
+
|
123 |
+
result = []
|
124 |
+
for i in range(0, len(query_result)):
|
125 |
+
item = query_result[i]
|
126 |
+
source_title = item.meta['doc_metas'][0]['source_title']
|
127 |
+
source_url = item.meta['doc_metas'][0]['source_url']
|
128 |
+
chapter_titles = [source['title'] for source in item.meta['doc_metas']]
|
129 |
+
result.append([[i+1], item.answer.replace("\n",""),
|
130 |
+
source_title, source_url, str(chapter_titles)])
|
131 |
+
return result
|
132 |
|
133 |
+
def search_and_show_generative_results():
|
134 |
+
# set start time
|
135 |
+
stt = time.time()
|
136 |
+
|
137 |
+
# retrieve top 5 documents
|
138 |
+
results = search_and_generate_answer(query, retriever_top_k=5, generator_top_k=1)
|
139 |
+
# set endtime
|
140 |
+
ent = time.time()
|
141 |
+
# measure resulting time
|
142 |
+
elapsed_time = round(ent - stt, 2)
|
143 |
+
|
144 |
+
# show which query was entered, and what was searching time
|
145 |
+
st.write(f"**Respuesta generada a partir de los resultados** \"{query}\" ({elapsed_time} sec.):")
|
146 |
+
if results != None:
|
147 |
+
for i, answer in enumerate(results):
|
148 |
+
# answer starts with header
|
149 |
+
st.subheader(f"{answer[1]}")
|
150 |
+
st.markdown(f"[**Lee más aquí**]({answer[3]})")
|
151 |
+
st.caption(f"Fuentes: {answer[2]} - {answer[4]}")
|
152 |
+
|
153 |
+
st.markdown("---")
|
154 |
+
|
155 |
+
#st.markdown("---")
|
156 |
+
|
157 |
+
#results = search("que es el adres", retriever_top_k=5, reader_top_k=3)
|
158 |
+
|
159 |
st.write(f"""<br><p>Cuanto más contexto le des a la pregunta mejores resultados se obtienen.
|
160 |
No es un sistema basado en palabras claves, puedes escribir preguntas elaboradas.
|
161 |
Una serie de modelos de lenguaje transformers intervienen en cada consulta para ayudar a entenderlas."""
|
|
|
168 |
st.error("¡escribe una pregunta!")
|
169 |
else:
|
170 |
st.session_state["submit"] = True
|
171 |
+
#if not st.session_state.get("OPENAI_API_KEY"):
|
172 |
+
if api_key:
|
173 |
+
search_and_show_generative_results()
|
174 |
+
search_and_show_results()
|
175 |
+
|
176 |
+
#r = search_and_generate_answer("que es el ADRES?", retriever_top_k = 5, generator_top_k = 1)
|
177 |
+
|
178 |
+
|