Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,8 @@ import os
|
|
10 |
import streamlit as st
|
11 |
from langchain_groq import ChatGroq
|
12 |
from langchain_community.document_loaders import WebBaseLoader
|
|
|
|
|
13 |
from langchain_community.embeddings import OllamaEmbeddings
|
14 |
|
15 |
# JB:
|
@@ -39,8 +41,14 @@ if "vector" not in st.session_state:
|
|
39 |
st.session_state.embeddings = FastEmbedEmbeddings() # JB
|
40 |
|
41 |
|
42 |
-
st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html")
|
43 |
-
st.session_state.docs = st.session_state.loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
46 |
st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
|
|
|
10 |
import streamlit as st
|
11 |
from langchain_groq import ChatGroq
|
12 |
from langchain_community.document_loaders import WebBaseLoader
|
13 |
+
# JB:
|
14 |
+
from langchain_community.document_loaders import PyPDFLoader
|
15 |
from langchain_community.embeddings import OllamaEmbeddings
|
16 |
|
17 |
# JB:
|
|
|
41 |
st.session_state.embeddings = FastEmbedEmbeddings() # JB
|
42 |
|
43 |
|
44 |
+
# st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html") # ORIGINAL
|
45 |
+
# st.session_state.docs = st.session_state.loader.load() # ORIGINAL
|
46 |
+
pdf_file_path = "" # JB
|
47 |
+
st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
|
48 |
+
# chunks = self.text_splitter.split_documents(docs)
|
49 |
+
# chunks = filter_complex_metadata(chunks)
|
50 |
+
|
51 |
+
|
52 |
|
53 |
st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
54 |
st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
|