Spaces:
Paused
Paused
import os | |
from langchain_community.document_loaders import PDFPlumberLoader | |
def load_pdf(file_path): | |
loader = PDFPlumberLoader(file_path) | |
document = loader.load() | |
return document | |
def create_unique_ids(documents): | |
return [f"{doc.metadata['source']}_page_{doc.metadata['page']}" for doc in documents] | |