Update app.py
Browse files
app.py
CHANGED
@@ -16,6 +16,7 @@ from langchain_community.embeddings import OllamaEmbeddings
|
|
16 |
|
17 |
# JB:
|
18 |
from langchain.embeddings import FastEmbedEmbeddings
|
|
|
19 |
|
20 |
from langchain_community.vectorstores import FAISS
|
21 |
# from langchain.vectorstores import Chroma
|
@@ -47,9 +48,30 @@ if "vector" not in st.session_state:
|
|
47 |
# https://python.langchain.com/docs/integrations/document_loaders/merge_doc
|
48 |
# from langchain_community.document_loaders import PyPDFLoader
|
49 |
# loader_pdf = PyPDFLoader("../MachineLearning-Lecture01.pdf")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
pdf_file_path = "*.pdf" # JB
|
51 |
# st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
|
52 |
-
st.session_state.loader = PyPDFLoader(*.pdf).load()
|
|
|
|
|
53 |
# chunks = self.text_splitter.split_documents(docs)
|
54 |
# chunks = filter_complex_metadata(chunks)
|
55 |
|
|
|
16 |
|
17 |
# JB:
|
18 |
from langchain.embeddings import FastEmbedEmbeddings
|
19 |
+
from langchain_community.document_loaders import PyPDFDirectoryLoader
|
20 |
|
21 |
from langchain_community.vectorstores import FAISS
|
22 |
# from langchain.vectorstores import Chroma
|
|
|
48 |
# https://python.langchain.com/docs/integrations/document_loaders/merge_doc
|
49 |
# from langchain_community.document_loaders import PyPDFLoader
|
50 |
# loader_pdf = PyPDFLoader("../MachineLearning-Lecture01.pdf")
|
51 |
+
#
|
52 |
+
# https://stackoverflow.com/questions/60215731/pypdf-to-read-each-pdf-in-a-folder
|
53 |
+
#
|
54 |
+
# https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html
|
55 |
+
# https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#pypdf-directory
|
56 |
+
# !!!!!
|
57 |
+
# PyPDF Directory
|
58 |
+
# Load PDFs from directory
|
59 |
+
# from langchain_community.document_loaders import PyPDFDirectoryLoader
|
60 |
+
# loader = PyPDFDirectoryLoader("example_data/")
|
61 |
+
# docs = loader.load()
|
62 |
+
#
|
63 |
+
# ZIE OOK:
|
64 |
+
# https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#using-pypdf
|
65 |
+
# Using MathPix
|
66 |
+
# Inspired by Daniel Gross's https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21
|
67 |
+
# from langchain_community.document_loaders import MathpixPDFLoader
|
68 |
+
# loader = MathpixPDFLoader("example_data/layout-parser-paper.pdf")
|
69 |
+
# data = loader.load()
|
70 |
pdf_file_path = "*.pdf" # JB
|
71 |
# st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
|
72 |
+
# st.session_state.loader = PyPDFLoader(*.pdf).load() # JB syntax error *.pdf !
|
73 |
+
st.session_state.loader = PyPDFDirectoryLoader("") # JB PyPDFDirectoryLoader("example_data/")
|
74 |
+
|
75 |
# chunks = self.text_splitter.split_documents(docs)
|
76 |
# chunks = filter_complex_metadata(chunks)
|
77 |
|