from langchain.document_loaders import DirectoryLoader, TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.llms import CTransformers from langchain import PromptTemplate from langchain.chains import RetrievalQA from langchain.vectorstores import FAISS import time loader = DirectoryLoader("./infotext", glob="*.txt", loader_cls=TextLoader) # interpret information in the documents documents = loader.load() splitter = RecursiveCharacterTextSplitter() texts = splitter.split_documents(documents) embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) # create and save the local database db = FAISS.from_documents(texts, embeddings) db.save_local("faiss") # prepare the template we will use when prompting the AI template = """Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context} Question: {question} Only return the helpful answer below and nothing else. Helpful answer: """ # load the language model config = {'max_new_tokens': 256, 'temperature': 0.01} llm = CTransformers(model="TheBloke/Llama-2-13B-chat-GGML", model_file="llama-2-13b-chat.ggmlv3.q2_K.bin", model_type="llama",config=config) # load the interpreted information from the local database embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}) db = FAISS.load_local("faiss", embeddings) # prepare a version of the llm pre-loaded with the local content retriever = db.as_retriever(search_kwargs={'k': 2}) prompt = PromptTemplate( template=template, input_variables=['context', 'question']) def query(question): model = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever, return_source_documents=True, chain_type_kwargs={'prompt': prompt}) time_start = time.time() output = model({'query': question}) response = output["result"] time_elapsed = time.time() - time_start return [response, time_elapsed]