import os import keyboard import time import requests os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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" #from langchain.vectorstores.weaviate import Weaviate from langchain.document_loaders import TextLoader #for textfiles from langchain.text_splitter import CharacterTextSplitter #text splitter from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models # Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain from langchain.chains.question_answering import load_qa_chain from langchain import HuggingFaceHub from langchain.document_loaders import UnstructuredPDFLoader #load pdf from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb from langchain.chains import RetrievalQA from langchain.document_loaders import UnstructuredURLLoader #load urls into docoument-loader import requests import textwrap from langchain.document_loaders import TextLoader loader = TextLoader('./KS-all-info_rev1.txt') documents = loader.load() def wrap_text_preserve_newlines(text, width=110): # Split the input text into lines based on newline characters lines = text.split('\n') # Wrap each line individually wrapped_lines = [textwrap.fill(line, width=width) for line in lines] # Join the wrapped lines back together using newline characters wrapped_text = '\n'.join(wrapped_lines) return wrapped_text text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=10) docs = text_splitter.split_documents(documents) # Embeddings embeddings = HuggingFaceEmbeddings() #Create the vectorized db # Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html db = FAISS.from_documents(docs, embeddings) llm=HuggingFaceHub(repo_id="MBZUAI/LaMini-Flan-T5-783M", model_kwargs={"temperature":0, "max_length":512}) chain = load_qa_chain(llm, chain_type="stuff") def run_chain(query): result=chain.run(input_documents=docs, question=query) return result #keyboard.unhook_all()###########################