Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,12 @@ from langchain_community.embeddings import OllamaEmbeddings
|
|
18 |
from langchain.embeddings import FastEmbedEmbeddings
|
19 |
from langchain_community.document_loaders import PyPDFDirectoryLoader
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
from langchain_community.vectorstores import FAISS
|
22 |
# from langchain.vectorstores import Chroma
|
23 |
# from langchain_community.vectorstores import Chroma
|
@@ -84,7 +90,9 @@ if "vector" not in st.session_state:
|
|
84 |
# JB:
|
85 |
# https://python.langchain.com/docs/modules/data_connection/document_loaders/file_directory
|
86 |
text_loader_kwargs={'autodetect_encoding': True}
|
87 |
-
loader = DirectoryLoader(path, glob="**/*.pdf", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
|
|
|
|
|
88 |
docs = loader.load()
|
89 |
st.session_state.docs = docs
|
90 |
|
|
|
18 |
from langchain.embeddings import FastEmbedEmbeddings
|
19 |
from langchain_community.document_loaders import PyPDFDirectoryLoader
|
20 |
|
21 |
+
# JB:
|
22 |
+
# File Directory
|
23 |
+
# This covers how to load all documents in a directory.
|
24 |
+
# Under the hood, by default this uses the UnstructuredLoader.
|
25 |
+
from langchain_community.document_loaders import DirectoryLoader
|
26 |
+
|
27 |
from langchain_community.vectorstores import FAISS
|
28 |
# from langchain.vectorstores import Chroma
|
29 |
# from langchain_community.vectorstores import Chroma
|
|
|
90 |
# JB:
|
91 |
# https://python.langchain.com/docs/modules/data_connection/document_loaders/file_directory
|
92 |
text_loader_kwargs={'autodetect_encoding': True}
|
93 |
+
# loader = DirectoryLoader(path, glob="**/*.pdf", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
|
94 |
+
# PyPDFDirectoryLoader (TEST):
|
95 |
+
loader = PyPDFDirectoryLoader(path, glob="**/*.pdf", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
|
96 |
docs = loader.load()
|
97 |
st.session_state.docs = docs
|
98 |
|