Spaces:

mtyrrell
/

cpv_poc

Running

mtyrrell commited on Aug 8, 2023

Commit

8e99f61

•

1 Parent(s): f5548b0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -79,21 +79,19 @@ def get_docs(input_query, country = None):
         ls_dict.append(doc)
     return(ls_dict)
-def get_refs(res):
   '''
   Parse response for engineered reference ids (refer to prompt template)
   Extract documents using reference ids
   '''
-  import re
-  text = res["results"][0]
-  # This pattern should be returned by gpt3.5
   # pattern = r'ref\. (\d+)\]\.'
   pattern = r'ref\. (\d+)'
-  ref_ids = [int(match) for match in re.findall(pattern, text)]
   # extract
   result_str = ""  # Initialize an empty string to store the result
   for i in range(len(res['documents'])):
-      doc = res['documents'][i].to_dict()
       ref_id = doc['meta']['ref_id']
       if ref_id in ref_ids:
              result_str += "**Ref. " + str(ref_id) + " [" + doc['meta']['country'] + " " + doc['meta']['document_name'] + "]:** " + "*'" + doc['content'] + "'*<br> <br>" # Add <br> for a line break
@@ -106,7 +104,7 @@ def run_query(input_text, country):
     output = res["results"][0]
     st.write('Response')
     st.success(output)
-    references = get_refs(res)
     st.write('References')
     st.markdown('References are based on text automatically extracted from climate policy documents. These extracts may contain non-legible characters or disjointed text as an artifact of the extraction procedure')
     st.markdown(references, unsafe_allow_html=True)

         ls_dict.append(doc)
     return(ls_dict)
+def get_refs(docs, res):
   '''
   Parse response for engineered reference ids (refer to prompt template)
   Extract documents using reference ids
   '''
+  # This pattern should be returned by gpt3.5 & llama2
   # pattern = r'ref\. (\d+)\]\.'
   pattern = r'ref\. (\d+)'
+  ref_ids = [int(match) for match in re.findall(pattern, res)]
   # extract
   result_str = ""  # Initialize an empty string to store the result
   for i in range(len(res['documents'])):
+      doc = docs[i].to_dict()
       ref_id = doc['meta']['ref_id']
       if ref_id in ref_ids:
              result_str += "**Ref. " + str(ref_id) + " [" + doc['meta']['country'] + " " + doc['meta']['document_name'] + "]:** " + "*'" + doc['content'] + "'*<br> <br>" # Add <br> for a line break
     output = res["results"][0]
     st.write('Response')
     st.success(output)
+    references = get_refs(docs, res["results"][0])
     st.write('References')
     st.markdown('References are based on text automatically extracted from climate policy documents. These extracts may contain non-legible characters or disjointed text as an artifact of the extraction procedure')
     st.markdown(references, unsafe_allow_html=True)