|
import os |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.vectorstores import Chroma |
|
from langchain.document_loaders import PyPDFium2Loader |
|
from langchain.chains.question_answering import load_qa_chain |
|
|
|
from langchain.chat_models import ChatOpenAI |
|
|
|
|
|
class PDFQuery: |
|
def __init__(self): |
|
os.environ["OPENAI_API_KEY"] = "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" |
|
self.embeddings = OpenAIEmbeddings() |
|
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200) |
|
|
|
self.llm = ChatOpenAI(temperature=0) |
|
self.chain = None |
|
self.db = None |
|
|
|
def ask(self, question: str) -> str: |
|
if self.chain is None: |
|
response = "Please, add a document." |
|
else: |
|
docs = self.db.get_relevant_documents(question) |
|
response = self.chain.run(input_documents=docs, question=question) |
|
return response |
|
|
|
def ingest(self, file_path: os.PathLike) -> None: |
|
loader = PyPDFium2Loader(file_path) |
|
documents = loader.load() |
|
splitted_documents = self.text_splitter.split_documents(documents) |
|
self.db = Chroma.from_documents(splitted_documents, self.embeddings).as_retriever() |
|
|
|
self.chain = load_qa_chain(ChatOpenAI(temperature=0), chain_type="stuff") |
|
|