Spaces:
Runtime error
Runtime error
import os | |
import utils.constants as constants_utils | |
import utils.data_loader as data_loader_utils | |
import utils.langchain_utils as langchain_utils | |
import utils.weather as weather_utils | |
import utils.mandi_price as mandi_utils | |
import utils.translator as translator_utils | |
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, GPTListIndex | |
from langchain.indexes import VectorstoreIndexCreator | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
import warnings | |
warnings.filterwarnings('ignore') | |
class KKMS_KSSW: | |
def __init__(self): | |
self.index = None | |
self.documents = [] | |
self.response = None | |
# Instantiate langchain_utils class object | |
self.langchain_utils_obj = langchain_utils.LANGCHAIN_UTILS() | |
# Instantiate Mandi Price utils class object | |
self.mandi_utils_obj = mandi_utils.MANDI_PRICE() | |
# Instantiate Weather class object | |
self.weather_utils_obj = weather_utils.WEATHER() | |
# Instantiate translator_utils class object | |
self.translator_utils_obj = translator_utils.TRANSLATOR() | |
if not os.path.exists(constants_utils.DATA_PATH): | |
os.makedirs(constants_utils.DATA_PATH) | |
if not os.path.exists(constants_utils.OUTPUT_PATH): | |
os.makedirs(constants_utils.OUTPUT_PATH) | |
# Initialize index (vector store) | |
def initialize_index(self, save_index_to_disk=True, index_type='GPTSimpleVectorIndex'): | |
# Load the index from the saved index.json file | |
if os.path.exists(constants_utils.INDEX_FILENAME): | |
print(f'Loading pre-generated index from: {constants_utils.INDEX_FILENAME}') | |
self.index = self.langchain_utils_obj.load_index(index_type='GPTSimpleVectorIndex', filepath=constants_utils.INDEX_FILENAME) | |
else: | |
# Load data from Docs | |
if os.path.exists(constants_utils.DATA_PATH): | |
doc_documents = SimpleDirectoryReader(constants_utils.DATA_PATH).load_data() | |
self.documents = doc_documents[:] | |
# Load data from PDFs only | |
# pdf_documents = data_loader_utils.load_document(doc_type='pdf', doc_filepath=doc_filepath) | |
# Load data from URLs & append it to the documents that we read from PDFs | |
# url_documents = data_loader_utils.load_document(doc_type='url', urls=urls) | |
# self.documents.extend(url_documents) | |
# Build the Vector store for docs | |
if index_type == 'GPTSimpleVectorIndex': | |
self.index = GPTSimpleVectorIndex.from_documents(self.documents) | |
elif index_type == 'FAISS': | |
self.index = FAISS.from_documents( | |
self.documents, | |
OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY')) | |
) | |
def merge_documents_from_different_sources(doc_documents, url_documents): | |
# Build the Vector store for docs | |
doc_index = GPTSimpleVectorIndex.from_documents(doc_documents) | |
# Build the Vector store for URLs | |
url_index = GPTSimpleVectorIndex.from_documents(url_documents) | |
# Set summary of each index | |
doc_index.set_text("index_from_docs") | |
url_index.set_text("index_from_urls") | |
# Merge index of different data sources | |
self.index = GPTListIndex([doc_index]) | |
self.index.insert(url_index) # can also be passed directly as GPTListIndex([doc_index, url_index]) | |
return self.index | |
if save_index_to_disk: | |
# Save index to a index.json file | |
print(f'Saving newly generated index: {constants_utils.INDEX_FILENAME}') | |
if index_type == 'GPTSimpleVectorIndex': | |
self.index.save_to_disk(constants_utils.INDEX_FILENAME) | |
elif index_type == 'FAISS': | |
self.index.save_local(constants_utils.INDEX_FILENAME) | |
# Define query on index to retrieve the most relevant top K documents from the vector store | |
def query(self, | |
question, | |
mode='default', | |
response_mode="default", | |
similarity_top_k=1, | |
required_keywords=[], | |
exclude_keywords=[], | |
verbose=False | |
): | |
''' | |
Args: | |
mode: can be any of [default, embedding] | |
response_mode: can be any of [default, compact, tree_summarize] | |
''' | |
# Querying the index | |
self.response = self.index.query(question, | |
mode=mode, | |
response_mode=response_mode, | |
similarity_top_k=similarity_top_k, | |
required_keywords=required_keywords, | |
exclude_keywords=exclude_keywords, | |
verbose=verbose) | |
return self.response | |