import glob import os from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter from transformers import AutoTokenizer from langchain_community.document_loaders import PyMuPDFLoader path_to_data = "./data/" def process_pdf(): files = {'ABC':'./data/MWTS2021.pdf', 'XYZ':'./data/Consolidated2021.pdf'} docs = {} for file,value in files.items(): try: docs[file] = PyMuPDFLoader(value).load() except Exception as e: print("Exception: ", e)