Spaces:
Sleeping
Sleeping
import shutil | |
from haystack.document_stores import FAISSDocumentStore | |
from haystack.nodes import EmbeddingRetriever | |
from haystack.pipelines import ExtractiveQAPipeline | |
from haystack.nodes import FARMReader | |
import streamlit as st | |
from app_utils.config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT, | |
READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH) | |
# cached to make index and models load only at start | |
def start_haystack(): | |
""" | |
load document store, retriever, reader and create pipeline | |
""" | |
shutil.copy(f'{INDEX_DIR}/faiss_document_store.db', '.') | |
document_store = FAISSDocumentStore( | |
faiss_index_path=f'{INDEX_DIR}/my_faiss_index.faiss', | |
faiss_config_path=f'{INDEX_DIR}/my_faiss_index.json') | |
print(f'Index size: {document_store.get_document_count()}') | |
retriever = EmbeddingRetriever( | |
document_store=document_store, | |
embedding_model=RETRIEVER_MODEL, | |
model_format=RETRIEVER_MODEL_FORMAT | |
) | |
reader = FARMReader(model_name_or_path=READER_MODEL, | |
use_gpu=False, | |
confidence_threshold=READER_CONFIG_THRESHOLD) | |
pipe = ExtractiveQAPipeline(reader, retriever) | |
return pipe | |
pipe = start_haystack() | |
# the pipeline is not included as parameter of the following function, | |
# because it is difficult to cache | |
def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5): | |
"""Run query and get answers""" | |
params = {"Retriever": {"top_k": retriever_top_k}, | |
"Reader": {"top_k": reader_top_k}} | |
results = pipe.run(question, params=params) | |
return results | |
def load_questions(): | |
"""Load selected questions from file""" | |
with open(QUESTIONS_PATH) as fin: | |
questions = [line.strip() for line in fin.readlines() | |
if not line.startswith('#')] | |
return questions | |