NonToxicGlazeAdvisor_Chat_with_Docs_Groq_Edition_1

Running

App Files Files Community

JBHF commited on Mar 8, 2024

Commit

bff81b1

verified ·

1 Parent(s): 81d7480

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -1

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from langchain_community.embeddings import OllamaEmbeddings
 # JB:
 from langchain.embeddings import FastEmbedEmbeddings
 from langchain_community.vectorstores import FAISS
 # from langchain.vectorstores import Chroma
@@ -47,9 +48,30 @@ if "vector" not in st.session_state:
     # https://python.langchain.com/docs/integrations/document_loaders/merge_doc
     # from langchain_community.document_loaders import PyPDFLoader
     # loader_pdf = PyPDFLoader("../MachineLearning-Lecture01.pdf")
     pdf_file_path = "*.pdf"                                                            # JB
     # st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load()            # JB
-    st.session_state.loader = PyPDFLoader(*.pdf).load()                                # JB
     # chunks = self.text_splitter.split_documents(docs)
     # chunks = filter_complex_metadata(chunks)

 # JB:
 from langchain.embeddings import FastEmbedEmbeddings
+from langchain_community.document_loaders import PyPDFDirectoryLoader
 from langchain_community.vectorstores import FAISS
 # from langchain.vectorstores import Chroma
     # https://python.langchain.com/docs/integrations/document_loaders/merge_doc
     # from langchain_community.document_loaders import PyPDFLoader
     # loader_pdf = PyPDFLoader("../MachineLearning-Lecture01.pdf")
+    #
+    # https://stackoverflow.com/questions/60215731/pypdf-to-read-each-pdf-in-a-folder
+    #
+    # https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html
+    # https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#pypdf-directory
+    # !!!!!
+    # PyPDF Directory
+    # Load PDFs from directory
+    # from langchain_community.document_loaders import PyPDFDirectoryLoader
+    # loader = PyPDFDirectoryLoader("example_data/")
+    # docs = loader.load()
+    #
+    # ZIE OOK:
+    # https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf#using-pypdf
+    # Using MathPix
+    # Inspired by Daniel Gross's https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21
+    # from langchain_community.document_loaders import MathpixPDFLoader
+    # loader = MathpixPDFLoader("example_data/layout-parser-paper.pdf")
+    # data = loader.load()
     pdf_file_path = "*.pdf"                                                            # JB
     # st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load()            # JB
+    # st.session_state.loader = PyPDFLoader(*.pdf).load()                              # JB syntax error *.pdf !
+    st.session_state.loader = PyPDFDirectoryLoader("")                    # JB PyPDFDirectoryLoader("example_data/")
     # chunks = self.text_splitter.split_documents(docs)
     # chunks = filter_complex_metadata(chunks)