mtyrrell commited on
Commit
e64f989
1 Parent(s): 9918933

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -23
app.py CHANGED
@@ -1,5 +1,3 @@
1
- # inspiration from Ekimetrics climate qa
2
-
3
  import streamlit as st
4
  import os
5
  import re
@@ -13,13 +11,15 @@ import pandas as pd
13
  from haystack.document_stores import FAISSDocumentStore
14
  from haystack.nodes import EmbeddingRetriever
15
  from haystack.schema import Document
 
16
 
 
 
 
17
 
18
- # Enter openai API key
19
  openai_key = os.environ["OPENAI_API_KEY"]
20
 
21
- # Select model
22
- model_name = "gpt-3.5-turbo"
23
 
24
  # Define the template
25
  template = PromptTemplate(
@@ -30,10 +30,12 @@ Provide example quotes and citations using extracted text from the documents. \
30
  Use facts and numbers from the documents in your answer. \
31
  Reference information used from documents at the end of each applicable sentence (ex: [source: document_name]), where 'document_name' is the text provided at the start of each document (demarcated by '- &&&' and '&&&:')'. \
32
  If no relevant information to answer the question is present in the documents, just say you don't have enough information to answer. \
33
-
34
  Context: {' - '.join(['&&& '+d.meta['document_name']+' ref. '+str(d.meta['ref_id'])+' &&&: '+d.content for d in documents])}; Question: {query}; Answer:""",
35
  )
36
 
 
 
 
37
  # Create a list of options for the dropdown
38
  country_options = ['Angola','Botswana','Lesotho','Kenya','Malawi','Mozambique','Namibia','South Africa','Zambia','Zimbabwe']
39
 
@@ -98,14 +100,33 @@ def get_refs(docs, res):
98
 
99
  return result_str
100
 
101
- def run_query(input_text, country):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  docs = get_docs(input_text, country)
103
  # st.write('Selected country: ', country) # Debugging country
104
- res = pipe.run(query=input_text, documents=docs)
105
- output = res["results"][0]
 
 
 
 
 
 
106
  st.write('Response')
107
  st.success(output)
108
- references = get_refs(docs, res["results"][0])
109
  st.write('References')
110
  st.markdown('References are based on text automatically extracted from climate policy documents. These extracts may contain non-legible characters or disjointed text as an artifact of the extraction procedure')
111
  st.markdown(references, unsafe_allow_html=True)
@@ -121,19 +142,29 @@ retriever = EmbeddingRetriever(
121
  progress_bar=False,
122
  )
123
 
124
- # Initialize the PromptNode
125
- pn = PromptNode(model_name_or_path=model_name, default_prompt_template=template, api_key=openai_key, max_length=2000, model_kwargs={"generation_kwargs": {"do_sample": False, "temperature": 0}})
126
-
127
- # Initialize the pipeline
128
- pipe = Pipeline()
129
- pipe.add_node(component=pn, name="prompt_node", inputs=["Query"])
130
-
131
 
132
  # Guiding text
133
  st.title('Climate Policy Documents: Vulnerabilities Analysis Q&A (test)')
134
  st.markdown('This tool seeks to provide an interface for quering national climate policy documents (NDCs, LTS etc.). The current version is powered by chatGPT (3.5) and limited to 10 Southern African countries (Angola, Botswana, Eswatini, Lesotho, Malawi, Mozambique, Namibia, South Africa, Zambia, Zimbabwe) and Kenya. The intended use case is to allow users to interact with the documents and obtain valuable insights on various vulnerable groups affected by climate change.')
135
  st.markdown('**DISCLAIMER:** *This prototype tool based on LLMs (Language Models) is provided "as is" for experimental and exploratory purposes only, and should not be used for critical or production applications. Users are advised that the tool may contain errors, bugs, or limitations and should be used with caution and awareness of potential risks, and the developers make no warranties or guarantees regarding its performance, reliability, or suitability for any specific purpose.*')
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  # Dropdown selectbox
139
  country = st.selectbox('Select a country:', country_options)
@@ -148,10 +179,5 @@ else:
148
 
149
 
150
  if st.button('Submit'):
151
- run_query(text, country=country)
152
-
153
- import sys
154
- import streamlit as st
155
 
156
- python_version = sys.version
157
- st.text(f"Python Version: {python_version}")
 
 
 
1
  import streamlit as st
2
  import os
3
  import re
 
11
  from haystack.document_stores import FAISSDocumentStore
12
  from haystack.nodes import EmbeddingRetriever
13
  from haystack.schema import Document
14
+ from huggingface_hub import login, HfApi, hf_hub_download, InferenceClient
15
 
16
+ # Get HF token
17
+ hf_token = os.environ["HF_TOKEN"]
18
+ login(token=hf_token, add_to_git_credential=True)
19
 
20
+ # Get openai API key
21
  openai_key = os.environ["OPENAI_API_KEY"]
22
 
 
 
23
 
24
  # Define the template
25
  template = PromptTemplate(
 
30
  Use facts and numbers from the documents in your answer. \
31
  Reference information used from documents at the end of each applicable sentence (ex: [source: document_name]), where 'document_name' is the text provided at the start of each document (demarcated by '- &&&' and '&&&:')'. \
32
  If no relevant information to answer the question is present in the documents, just say you don't have enough information to answer. \
 
33
  Context: {' - '.join(['&&& '+d.meta['document_name']+' ref. '+str(d.meta['ref_id'])+' &&&: '+d.content for d in documents])}; Question: {query}; Answer:""",
34
  )
35
 
36
+ # Create a list of options for the dropdown
37
+ model_options = ['chatGPT','Llama2']
38
+
39
  # Create a list of options for the dropdown
40
  country_options = ['Angola','Botswana','Lesotho','Kenya','Malawi','Mozambique','Namibia','South Africa','Zambia','Zimbabwe']
41
 
 
100
 
101
  return result_str
102
 
103
+ # define a special function for putting the prompt together (as we can't use haystack)
104
+ def get_prompt_llama2(docs, query):
105
+ base_prompt="Answer the given question using the following documents. \
106
+ Formulate your answer in the style of an academic report. \
107
+ Provide example quotes and citations using extracted text from the documents. \
108
+ Use facts and numbers from the documents in your answer. \
109
+ ALWAYS include references for information used from documents at the end of each applicable sentence using the format: '[ref. #]', where '[ref. #]' is included in the text provided at the start of each document (demarcated by the pattern '- &&& [ref. #] document_name &&&:')'. \
110
+ Do not include page numbers in the references. \
111
+ If no relevant information to answer the question is present in the documents, just say you don't have enough information to answer."
112
+ # Add the meta data for references
113
+ context = ' - '.join(['&&& [ref. '+str(d.meta['ref_id'])+'] '+d.meta['document_name']+' &&&: '+d.content for d in docs])
114
+ prompt = base_prompt+"; Context: "+context+"; Question: "+query+"; Answer:"
115
+ return(prompt)
116
+
117
+ def run_query(input_text, country, model_sel):
118
  docs = get_docs(input_text, country)
119
  # st.write('Selected country: ', country) # Debugging country
120
+ if model_sel == "chatGPT":
121
+ res = pipe.run(query=input_text, documents=docs)
122
+ output = res["results"][0]
123
+ references = get_refs(docs, res["results"][0])
124
+ else:
125
+ res = client.text_generation(get_prompt_llama2(docs, query=input_text), max_new_tokens=4000, temperature=0.01, model=model)
126
+ output = res
127
+ references = get_refs(docs, res)
128
  st.write('Response')
129
  st.success(output)
 
130
  st.write('References')
131
  st.markdown('References are based on text automatically extracted from climate policy documents. These extracts may contain non-legible characters or disjointed text as an artifact of the extraction procedure')
132
  st.markdown(references, unsafe_allow_html=True)
 
142
  progress_bar=False,
143
  )
144
 
 
 
 
 
 
 
 
145
 
146
  # Guiding text
147
  st.title('Climate Policy Documents: Vulnerabilities Analysis Q&A (test)')
148
  st.markdown('This tool seeks to provide an interface for quering national climate policy documents (NDCs, LTS etc.). The current version is powered by chatGPT (3.5) and limited to 10 Southern African countries (Angola, Botswana, Eswatini, Lesotho, Malawi, Mozambique, Namibia, South Africa, Zambia, Zimbabwe) and Kenya. The intended use case is to allow users to interact with the documents and obtain valuable insights on various vulnerable groups affected by climate change.')
149
  st.markdown('**DISCLAIMER:** *This prototype tool based on LLMs (Language Models) is provided "as is" for experimental and exploratory purposes only, and should not be used for critical or production applications. Users are advised that the tool may contain errors, bugs, or limitations and should be used with caution and awareness of potential risks, and the developers make no warranties or guarantees regarding its performance, reliability, or suitability for any specific purpose.*')
150
 
151
+ # Dropdown selectbox: model
152
+ model_sel = st.selectbox('Select a model:', model_options)
153
+
154
+ #----Model Select logic-------
155
+ if model_sel == "chatGPT":
156
+ model_name = "gpt-3.5-turbo"
157
+ # Initialize the PromptNode
158
+ pn = PromptNode(model_name_or_path=model_name, default_prompt_template=template, api_key=openai_key, max_length=2000, model_kwargs={"generation_kwargs": {"do_sample": False, "temperature": 0}})
159
+
160
+ # Initialize the pipeline
161
+ pipe = Pipeline()
162
+ pipe.add_node(component=pn, name="prompt_node", inputs=["Query"])
163
+ else:
164
+ model = "meta-llama/Llama-2-70b-chat-hf"
165
+ # Instantiate the inference client
166
+ client = InferenceClient()
167
+
168
 
169
  # Dropdown selectbox
170
  country = st.selectbox('Select a country:', country_options)
 
179
 
180
 
181
  if st.button('Submit'):
182
+ run_query(text, country=country, model_sel=model_sel)
 
 
 
183