Spaces:
Paused
Paused
File size: 317 Bytes
dbb2933 65d74a7 dbb2933 |
1 2 3 4 5 6 7 8 9 10 11 |
import os
from langchain_community.document_loaders import PDFPlumberLoader
def load_pdf(file_path):
loader = PDFPlumberLoader(file_path)
document = loader.load()
return document
def create_unique_ids(documents):
return [f"{doc.metadata['source']}_page_{doc.metadata['page']}" for doc in documents]
|