JBHF commited on
Commit
bff81b1
·
verified ·
1 Parent(s): 81d7480

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -16,6 +16,7 @@ from langchain_community.embeddings import OllamaEmbeddings
16
 
17
  # JB:
18
  from langchain.embeddings import FastEmbedEmbeddings
 
19
 
20
  from langchain_community.vectorstores import FAISS
21
  # from langchain.vectorstores import Chroma
@@ -47,9 +48,30 @@ if "vector" not in st.session_state:
47
  # https://python.langchain.com/docs/integrations/document_loaders/merge_doc
48
  # from langchain_community.document_loaders import PyPDFLoader
49
  # loader_pdf = PyPDFLoader("../MachineLearning-Lecture01.pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  pdf_file_path = "*.pdf" # JB
51
  # st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
52
- st.session_state.loader = PyPDFLoader(*.pdf).load() # JB
 
 
53
  # chunks = self.text_splitter.split_documents(docs)
54
  # chunks = filter_complex_metadata(chunks)
55
 
 
16
 
17
  # JB:
18
  from langchain.embeddings import FastEmbedEmbeddings
19
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
20
 
21
  from langchain_community.vectorstores import FAISS
22
  # from langchain.vectorstores import Chroma
 
48
  # https://python.langchain.com/docs/integrations/document_loaders/merge_doc
49
  # from langchain_community.document_loaders import PyPDFLoader
50
  # loader_pdf = PyPDFLoader("../MachineLearning-Lecture01.pdf")
51
+ #
52
+ # https://stackoverflow.com/questions/60215731/pypdf-to-read-each-pdf-in-a-folder
53
+ #
54
+ # https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html
55
+ # https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#pypdf-directory
56
+ # !!!!!
57
+ # PyPDF Directory
58
+ # Load PDFs from directory
59
+ # from langchain_community.document_loaders import PyPDFDirectoryLoader
60
+ # loader = PyPDFDirectoryLoader("example_data/")
61
+ # docs = loader.load()
62
+ #
63
+ # ZIE OOK:
64
+ # https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#using-pypdf
65
+ # Using MathPix
66
+ # Inspired by Daniel Gross's https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21
67
+ # from langchain_community.document_loaders import MathpixPDFLoader
68
+ # loader = MathpixPDFLoader("example_data/layout-parser-paper.pdf")
69
+ # data = loader.load()
70
  pdf_file_path = "*.pdf" # JB
71
  # st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
72
+ # st.session_state.loader = PyPDFLoader(*.pdf).load() # JB syntax error *.pdf !
73
+ st.session_state.loader = PyPDFDirectoryLoader("") # JB PyPDFDirectoryLoader("example_data/")
74
+
75
  # chunks = self.text_splitter.split_documents(docs)
76
  # chunks = filter_complex_metadata(chunks)
77