JBHF commited on
Commit
2142374
1 Parent(s): 523a632

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -18,6 +18,12 @@ from langchain_community.embeddings import OllamaEmbeddings
18
  from langchain.embeddings import FastEmbedEmbeddings
19
  from langchain_community.document_loaders import PyPDFDirectoryLoader
20
 
 
 
 
 
 
 
21
  from langchain_community.vectorstores import FAISS
22
  # from langchain.vectorstores import Chroma
23
  # from langchain_community.vectorstores import Chroma
@@ -84,7 +90,9 @@ if "vector" not in st.session_state:
84
  # JB:
85
  # https://python.langchain.com/docs/modules/data_connection/document_loaders/file_directory
86
  text_loader_kwargs={'autodetect_encoding': True}
87
- loader = DirectoryLoader(path, glob="**/*.pdf", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
 
 
88
  docs = loader.load()
89
  st.session_state.docs = docs
90
 
 
18
  from langchain.embeddings import FastEmbedEmbeddings
19
  from langchain_community.document_loaders import PyPDFDirectoryLoader
20
 
21
+ # JB:
22
+ # File Directory
23
+ # This covers how to load all documents in a directory.
24
+ # Under the hood, by default this uses the UnstructuredLoader.
25
+ from langchain_community.document_loaders import DirectoryLoader
26
+
27
  from langchain_community.vectorstores import FAISS
28
  # from langchain.vectorstores import Chroma
29
  # from langchain_community.vectorstores import Chroma
 
90
  # JB:
91
  # https://python.langchain.com/docs/modules/data_connection/document_loaders/file_directory
92
  text_loader_kwargs={'autodetect_encoding': True}
93
+ # loader = DirectoryLoader(path, glob="**/*.pdf", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
94
+ # PyPDFDirectoryLoader (TEST):
95
+ loader = PyPDFDirectoryLoader(path, glob="**/*.pdf", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
96
  docs = loader.load()
97
  st.session_state.docs = docs
98