AgenticRagNCERT / utils /document_loader.py
Ashvanth.S
Update changes
65d74a7
raw
history blame
317 Bytes
import os
from langchain_community.document_loaders import PDFPlumberLoader
def load_pdf(file_path):
loader = PDFPlumberLoader(file_path)
document = loader.load()
return document
def create_unique_ids(documents):
return [f"{doc.metadata['source']}_page_{doc.metadata['page']}" for doc in documents]