binqiangliu commited on
Commit
887bf36
1 Parent(s): 7f0efd4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
+ from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
7
+ from langchain import HuggingFaceHub
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from htmlTemplates import bot_template, user_template, css
13
+ from transformers import pipeline
14
+ import sys
15
+ import os
16
+ from dotenv import load_dotenv
17
+
18
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
19
+ repo_id=os.getenv("repo_id")
20
+
21
+ OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
22
+ openai_api_key = os.environ.get('openai_api_key')
23
+ embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
24
+
25
+ #*******************************************#Pinecone Account: b***liu@gmail.com
26
+ #pinecone_index_name=os.environ.get('pinecone_index_name')
27
+ #pinecone_namespace=os.environ.get('pinecone_namespace')
28
+ #pinecone_api_key=os.environ.get('pinecone_api_key')
29
+ #pinecone_environment=os.environ.get('pinecone_environment')
30
+ #pinecone.init(
31
+ # api_key=pinecone_api_key,
32
+ # environment=pinecone_environment
33
+ #)
34
+ #index = pinecone.Index(pinecone_index_name)
35
+ #loaded_v_db_500_wt_metadata = Pinecone.from_existing_index(index_name=pinecone_index_name, embedding=embeddings, namespace=pinecone_namespace)
36
+ #*******************************************#
37
+
38
+ #*******************************************#Pinecone Account: ij***.l**@hotmail.com
39
+ pinecone_index_name_1=os.environ.get('pinecone_index_name_1')
40
+ #pinecone_namespace_1=os.environ.get('pinecone_namespace_1') #no namespace under this Pinecone account
41
+ pinecone_api_key_1=os.environ.get('pinecone_api_key_1')
42
+ pinecone_environment_1=os.environ.get('pinecone_environment_1')
43
+ pinecone.init(
44
+ api_key=pinecone_api_key_1,
45
+ environment=pinecone_environment_1
46
+ )
47
+ index = pinecone.Index(pinecone_index_name_1)
48
+ #vectorstore = Pinecone.from_existing_index(index_name=pinecone_index_name_1, embedding=embeddings)
49
+ #*******************************************#
50
+
51
+ hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
52
+ HUGGINGFACEHUB_API_TOKEN = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
53
+ huggingfacehub_api_token= os.environ.get('huggingfacehub_api_token')
54
+ repo_id = os.environ.get('repo_id')
55
+
56
+ def get_vector_store():
57
+ #vectorstore = FAISS.from_texts(texts = text_chunks, embedding = embeddings)
58
+ vectorstore = Pinecone.from_existing_index(index_name=pinecone_index_name_1, embedding=embeddings)
59
+ return vectorstore
60
+
61
+ def get_conversation_chain(vector_store):
62
+ # OpenAI Model
63
+ #llm = ChatOpenAI()
64
+ #HuggingFace Model
65
+ #llm = HuggingFaceHub(repo_id="google/flan-t5-xxl")
66
+ #llm = HuggingFaceHub(repo_id="tiiuae/falcon-40b-instruct", model_kwargs={"temperature":0.5, "max_length":512}) #出现超时timed out错误
67
+ #llm = HuggingFaceHub(repo_id="meta-llama/Llama-2-70b-hf", model_kwargs={"min_length":100, "max_length":1024,"temperature":0.1})
68
+ #repo_id="HuggingFaceH4/starchat-beta"
69
+ llm = HuggingFaceHub(repo_id=repo_id,
70
+ model_kwargs={"min_length":1024,
71
+ "max_new_tokens":5632, "do_sample":True,
72
+ "temperature":0.1,
73
+ "top_k":50,
74
+ "top_p":0.95, "eos_token_id":49155})
75
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
76
+ conversation_chain = ConversationalRetrievalChain.from_llm(
77
+ llm = llm,
78
+ retriever = vector_store.as_retriever(),
79
+ memory = memory
80
+ )
81
+ print("***Start of printing Conversation_Chain***")
82
+ print(conversation_chain)
83
+ print("***End of printing Conversation_Chain***")
84
+ st.write("***Start of printing Conversation_Chain***")
85
+ st.write(conversation_chain)
86
+ st.write("***End of printing Conversation_Chain***")
87
+ return conversation_chain
88
+
89
+ def handle_user_input(question):
90
+ response = st.session_state.conversation({'question':question})
91
+ st.session_state.chat_history = response['chat_history']
92
+ for i, message in enumerate(st.session_state.chat_history):
93
+ if i % 2 == 0:
94
+ st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
95
+ else:
96
+ st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
97
+
98
+ def main():
99
+ load_dotenv()
100
+ st.set_page_config(page_title='Chat with Your own PDFs', page_icon=':books:')
101
+ st.write(css, unsafe_allow_html=True)
102
+ if "conversation" not in st.session_state:
103
+ st.session_state.conversation = None
104
+ if "chat_history" not in st.session_state:
105
+ st.session_state.chat_history = None
106
+ st.header('Chat with Your own PDFs :books:')
107
+ question = st.text_input("Ask anything to your PDF: ")
108
+ if question:
109
+ handle_user_input(question)
110
+ with st.sidebar:
111
+ st.subheader("Upload your Documents Here: ")
112
+ pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True)
113
+ if st.button("OK"):
114
+ with st.spinner("Preparation under process..."):
115
+ # Create Vector Store
116
+ vector_store = get_vector_store()
117
+ st.write("DONE")
118
+ # Create conversation chain
119
+ st.session_state.conversation = get_conversation_chain(vector_store)
120
+
121
+ if __name__ == '__main__':
122
+ main()