Luca Foppiano commited on
Commit
9997b7b
β€’
1 Parent(s): fcde626

fix env variables (#9)

Browse files

* avoid writing env variables of api keys

README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: πŸ“ Scientific Document Insight QA
3
- emoji:
4
  colorFrom: yellow
5
  colorTo: pink
6
  sdk: streamlit
@@ -10,7 +10,7 @@ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- # DocumentIQA: Scientific Document Insight QA
14
 
15
  **Work in progress** :construction_worker:
16
 
 
1
  ---
2
+ title: Scientific Document Insights Q/A
3
+ emoji: πŸ“
4
  colorFrom: yellow
5
  colorTo: pink
6
  sdk: streamlit
 
10
  license: apache-2.0
11
  ---
12
 
13
+ # DocumentIQA: Scientific Document Insights Q/A
14
 
15
  **Work in progress** :construction_worker:
16
 
document_qa/document_qa_engine.py CHANGED
@@ -205,6 +205,7 @@ class DocumentQAEngine:
205
  if doc_id:
206
  hash = doc_id
207
  else:
 
208
  hash = metadata[0]['hash']
209
 
210
  if hash not in self.embeddings_dict.keys():
 
205
  if doc_id:
206
  hash = doc_id
207
  else:
208
+
209
  hash = metadata[0]['hash']
210
 
211
  if hash not in self.embeddings_dict.keys():
requirements.txt CHANGED
@@ -19,7 +19,6 @@ chromadb==0.4.15
19
  tiktoken==0.4.0
20
  openai==0.27.7
21
  langchain==0.0.314
22
- promptlayer==0.2.4
23
  typing-inspect==0.9.0
24
  typing_extensions==4.8.0
25
  pydantic==2.4.2
 
19
  tiktoken==0.4.0
20
  openai==0.27.7
21
  langchain==0.0.314
 
22
  typing-inspect==0.9.0
23
  typing_extensions==4.8.0
24
  pydantic==2.4.2
streamlit_app.py CHANGED
@@ -10,7 +10,7 @@ from langchain.llms.huggingface_hub import HuggingFaceHub
10
  dotenv.load_dotenv(override=True)
11
 
12
  import streamlit as st
13
- from langchain.chat_models import PromptLayerChatOpenAI
14
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
15
 
16
  from document_qa.document_qa_engine import DocumentQAEngine
@@ -52,7 +52,7 @@ if 'uploaded' not in st.session_state:
52
  st.session_state['uploaded'] = False
53
 
54
  st.set_page_config(
55
- page_title="Document Insights QA",
56
  page_icon="πŸ“",
57
  initial_sidebar_state="expanded",
58
  menu_items={
@@ -70,13 +70,21 @@ def new_file():
70
 
71
 
72
  # @st.cache_resource
73
- def init_qa(model):
74
  if model == 'chatgpt-3.5-turbo':
75
- chat = PromptLayerChatOpenAI(model_name="gpt-3.5-turbo",
76
- temperature=0,
77
- return_pl_id=True,
78
- pl_tags=["streamlit", "chatgpt"])
79
- embeddings = OpenAIEmbeddings()
 
 
 
 
 
 
 
 
80
  elif model == 'mistral-7b-instruct-v0.1':
81
  chat = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1",
82
  model_kwargs={"temperature": 0.01, "max_length": 4096, "max_new_tokens": 2048})
@@ -162,12 +170,11 @@ with st.sidebar:
162
  st.markdown(
163
  ":warning: Mistral is free to use, however requests might hit limits of the huggingface free API and fail. :warning: ")
164
 
165
- if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
166
  if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
167
  api_key = st.text_input('Huggingface API Key', type="password")
168
 
169
- st.markdown(
170
- "Get it [here](https://huggingface.co/docs/hub/security-tokens)")
171
  else:
172
  api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
173
 
@@ -176,33 +183,33 @@ with st.sidebar:
176
  if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
177
  with st.spinner("Preparing environment"):
178
  st.session_state['api_keys'][model] = api_key
179
- if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
180
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
181
  st.session_state['rqa'][model] = init_qa(model)
182
 
183
- elif model == 'chatgpt-3.5-turbo':
184
  if 'OPENAI_API_KEY' not in os.environ:
185
  api_key = st.text_input('OpenAI API Key', type="password")
186
- st.markdown(
187
- "Get it [here](https://platform.openai.com/account/api-keys)")
188
  else:
189
  api_key = os.environ['OPENAI_API_KEY']
190
 
191
  if api_key:
192
- # st.session_state['api_key'] = is_api_key_provided = True
193
  if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
194
  with st.spinner("Preparing environment"):
195
  st.session_state['api_keys'][model] = api_key
196
  if 'OPENAI_API_KEY' not in os.environ:
197
- os.environ['OPENAI_API_KEY'] = api_key
198
- st.session_state['rqa'][model] = init_qa(model)
 
199
  # else:
200
  # is_api_key_provided = st.session_state['api_key']
201
 
202
- st.title("πŸ“ Scientific Document Insight QA")
203
  st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
204
 
205
- st.markdown(":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
 
206
 
207
  uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
208
  disabled=st.session_state['model'] is not None and st.session_state['model'] not in
 
10
  dotenv.load_dotenv(override=True)
11
 
12
  import streamlit as st
13
+ from langchain.chat_models import ChatOpenAI
14
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
15
 
16
  from document_qa.document_qa_engine import DocumentQAEngine
 
52
  st.session_state['uploaded'] = False
53
 
54
  st.set_page_config(
55
+ page_title="Scientific Document Insights Q/A",
56
  page_icon="πŸ“",
57
  initial_sidebar_state="expanded",
58
  menu_items={
 
70
 
71
 
72
  # @st.cache_resource
73
+ def init_qa(model, api_key=None):
74
  if model == 'chatgpt-3.5-turbo':
75
+ if api_key:
76
+ chat = ChatOpenAI(model_name="gpt-3.5-turbo",
77
+ temperature=0,
78
+ openai_api_key=api_key,
79
+ frequency_penalty=0.1)
80
+ embeddings = OpenAIEmbeddings(openai_api_key=api_key)
81
+ else:
82
+ chat = ChatOpenAI(model_name="gpt-3.5-turbo",
83
+ temperature=0,
84
+ frequency_penalty=0.1)
85
+ embeddings = OpenAIEmbeddings()
86
+
87
+
88
  elif model == 'mistral-7b-instruct-v0.1':
89
  chat = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1",
90
  model_kwargs={"temperature": 0.01, "max_length": 4096, "max_new_tokens": 2048})
 
170
  st.markdown(
171
  ":warning: Mistral is free to use, however requests might hit limits of the huggingface free API and fail. :warning: ")
172
 
173
+ if model == 'mistral-7b-instruct-v0.1' and model not in st.session_state['api_keys']:
174
  if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
175
  api_key = st.text_input('Huggingface API Key', type="password")
176
 
177
+ st.markdown("Get it [here](https://huggingface.co/docs/hub/security-tokens)")
 
178
  else:
179
  api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
180
 
 
183
  if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
184
  with st.spinner("Preparing environment"):
185
  st.session_state['api_keys'][model] = api_key
186
+ # if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
187
+ # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
188
  st.session_state['rqa'][model] = init_qa(model)
189
 
190
+ elif model == 'chatgpt-3.5-turbo' and model not in st.session_state['api_keys']:
191
  if 'OPENAI_API_KEY' not in os.environ:
192
  api_key = st.text_input('OpenAI API Key', type="password")
193
+ st.markdown("Get it [here](https://platform.openai.com/account/api-keys)")
 
194
  else:
195
  api_key = os.environ['OPENAI_API_KEY']
196
 
197
  if api_key:
 
198
  if model not in st.session_state['rqa'] or model not in st.session_state['api_keys']:
199
  with st.spinner("Preparing environment"):
200
  st.session_state['api_keys'][model] = api_key
201
  if 'OPENAI_API_KEY' not in os.environ:
202
+ st.session_state['rqa'][model] = init_qa(model, api_key)
203
+ else:
204
+ st.session_state['rqa'][model] = init_qa(model)
205
  # else:
206
  # is_api_key_provided = st.session_state['api_key']
207
 
208
+ st.title("πŸ“ Scientific Document Insights Q/A")
209
  st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
210
 
211
+ st.markdown(
212
+ ":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
213
 
214
  uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
215
  disabled=st.session_state['model'] is not None and st.session_state['model'] not in