mikepastor11 commited on
Commit
b27a1d5
1 Parent(s): c81b4f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -93
app.py CHANGED
@@ -7,102 +7,12 @@
7
  import streamlit as st
8
  from dotenv import load_dotenv
9
 
10
- from PyPDF2 import PdfReader
11
- from langchain.text_splitter import CharacterTextSplitter
12
-
13
- from InstructorEmbedding import INSTRUCTOR
14
- from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
15
- from langchain.vectorstores import FAISS
16
- from langchain.chat_models import ChatOpenAI
17
- from langchain.memory import ConversationBufferMemory
18
- from langchain.chains import ConversationalRetrievalChain
19
- from htmlTemplates import css, bot_template, user_template
20
- from langchain.llms import HuggingFaceHub
21
-
22
- def get_pdf_text(pdf_docs):
23
- text = ""
24
- for pdf in pdf_docs:
25
- pdf_reader = PdfReader(pdf)
26
- for page in pdf_reader.pages:
27
- text += page.extract_text()
28
- return text
29
-
30
- # Chunk size and overlap must not exceed the models capacity!
31
- #
32
- def get_text_chunks(text):
33
- text_splitter = CharacterTextSplitter(
34
- separator="\n",
35
- chunk_size=800, # 1000
36
- chunk_overlap=200,
37
- length_function=len
38
- )
39
- chunks = text_splitter.split_text(text)
40
- return chunks
41
-
42
-
43
- def get_vectorstore(text_chunks):
44
- # embeddings = OpenAIEmbeddings()
45
-
46
- # pip install InstructorEmbedding
47
- # pip install sentence-transformers==2.2.2
48
- embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
49
-
50
- # from InstructorEmbedding import INSTRUCTOR
51
- # model = INSTRUCTOR('hkunlp/instructor-xl')
52
- # sentence = "3D ActionSLAM: wearable person tracking in multi-floor environments"
53
- # instruction = "Represent the Science title:"
54
- # embeddings = model.encode([[instruction, sentence]])
55
-
56
- # embeddings = model.encode(text_chunks)
57
- print('have Embeddings: ')
58
-
59
- # text_chunks="this is a test"
60
- # FAISS, Chroma and other vector databases
61
- #
62
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
63
- print('FAISS succeeds: ')
64
-
65
- return vectorstore
66
-
67
- def get_conversation_chain(vectorstore):
68
- # llm = ChatOpenAI()
69
- # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
70
- # google/bigbird-roberta-base facebook/bart-large
71
- llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512})
72
-
73
- memory = ConversationBufferMemory(
74
- memory_key='chat_history', return_messages=True)
75
- conversation_chain = ConversationalRetrievalChain.from_llm(
76
- llm=llm,
77
- retriever=vectorstore.as_retriever(),
78
- memory=memory,
79
- )
80
- return conversation_chain
81
-
82
- def handle_userinput(user_question):
83
-
84
- response = st.session_state.conversation({'question': user_question})
85
- # response = st.session_state.conversation({'summarization': user_question})
86
- st.session_state.chat_history = response['chat_history']
87
-
88
-
89
- # st.empty()
90
-
91
- for i, message in enumerate(st.session_state.chat_history):
92
- if i % 2 == 0:
93
- st.write(user_template.replace(
94
- "{{MSG}}", message.content), unsafe_allow_html=True)
95
-
96
- else:
97
- st.write(bot_template.replace(
98
- "{{MSG}}", message.content), unsafe_allow_html=True)
99
-
100
 
101
 
102
 
103
  def main():
104
 
105
- load_dotenv()
106
  st.set_page_config(page_title="MLP Chat with multiple PDFs",
107
  page_icon=":books:")
108
 
@@ -116,8 +26,8 @@ def main():
116
  st.header("Mike's PDF Chat :books:")
117
 
118
  user_question = st.text_input("Ask a question about your documents:")
119
- if user_question:
120
- handle_userinput(user_question)
121
 
122
  # st.write( user_template, unsafe_allow_html=True)
123
  # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
 
7
  import streamlit as st
8
  from dotenv import load_dotenv
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
 
13
  def main():
14
 
15
+ # load_dotenv()
16
  st.set_page_config(page_title="MLP Chat with multiple PDFs",
17
  page_icon=":books:")
18
 
 
26
  st.header("Mike's PDF Chat :books:")
27
 
28
  user_question = st.text_input("Ask a question about your documents:")
29
+ # if user_question:
30
+ # handle_userinput(user_question)
31
 
32
  # st.write( user_template, unsafe_allow_html=True)
33
  # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)