docverifyrag / ingest.py
Carlos Salgado
ignore flakes, add ingest
d17ba2d
raw
history blame
223 Bytes
from langchain_community.document_loaders import UnstructuredPDFLoader
def ingest_pdf(path):
loader = UnstructuredPDFLoader()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
return data