JBHF commited on
Commit
52d4f49
1 Parent(s): 3179492

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -10,6 +10,8 @@ import os
10
  import streamlit as st
11
  from langchain_groq import ChatGroq
12
  from langchain_community.document_loaders import WebBaseLoader
 
 
13
  from langchain_community.embeddings import OllamaEmbeddings
14
 
15
  # JB:
@@ -39,8 +41,14 @@ if "vector" not in st.session_state:
39
  st.session_state.embeddings = FastEmbedEmbeddings() # JB
40
 
41
 
42
- st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html")
43
- st.session_state.docs = st.session_state.loader.load()
 
 
 
 
 
 
44
 
45
  st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
46
  st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
 
10
  import streamlit as st
11
  from langchain_groq import ChatGroq
12
  from langchain_community.document_loaders import WebBaseLoader
13
+ # JB:
14
+ from langchain_community.document_loaders import PyPDFLoader
15
  from langchain_community.embeddings import OllamaEmbeddings
16
 
17
  # JB:
 
41
  st.session_state.embeddings = FastEmbedEmbeddings() # JB
42
 
43
 
44
+ # st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html") # ORIGINAL
45
+ # st.session_state.docs = st.session_state.loader.load() # ORIGINAL
46
+ pdf_file_path = "" # JB
47
+ st.session_state.loader = PyPDFLoader(file_path=pdf_file_path).load() # JB
48
+ # chunks = self.text_splitter.split_documents(docs)
49
+ # chunks = filter_complex_metadata(chunks)
50
+
51
+
52
 
53
  st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
54
  st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)