mtyrrell commited on
Commit
f5548b0
1 Parent(s): d7e50c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -45,14 +45,15 @@ examples = [
45
  ]
46
 
47
  def get_docs(input_query, country = None):
48
- # Construct a hacky query to focus the retriever on the target country (see notes below)
 
 
 
49
  if country:
50
  query = "For the country of "+country+", "+input_query
51
  else:
52
  query = input_query
53
-
54
- # Get top 150 because we want to make sure we have 10 pertaining to the selected country
55
- # TEMP SOLUTION: not ideal, but FAISS document store doesnt allow metadata filtering. Needs to be tested with the full dataset
56
  docs = retriever.retrieve(query=query,top_k = 150)
57
  # Break out the key fields and convert to pandas for filtering
58
  docs = [{**x.meta,"score":x.score,"content":x.content} for x in docs]
@@ -61,8 +62,9 @@ def get_docs(input_query, country = None):
61
  df_docs = df_docs.query('country in @country')
62
  # Take the top 10
63
  df_docs = df_docs.head(10)
 
64
  df_docs = df_docs.reset_index()
65
- df_docs['ref_id'] = df_docs.index + 1
66
  # Convert back to Document format
67
  ls_dict = []
68
  # Iterate over df and add relevant fields to the dict object
@@ -72,7 +74,7 @@ def get_docs(input_query, country = None):
72
  row['content'],
73
  meta={'country': row['country'],'document_name': row['document'], 'ref_id': row['ref_id'], 'score': row['score']}
74
  )
75
-
76
  # Append the Document object to the documents list
77
  ls_dict.append(doc)
78
  return(ls_dict)
 
45
  ]
46
 
47
  def get_docs(input_query, country = None):
48
+ '''
49
+ Construct a hacky query to focus the retriever on the target country (see notes below)
50
+ We take the top 150 k because we want to make sure we have 10 pertaining to the selected country
51
+ '''
52
  if country:
53
  query = "For the country of "+country+", "+input_query
54
  else:
55
  query = input_query
56
+ # Retrieve top k documents
 
 
57
  docs = retriever.retrieve(query=query,top_k = 150)
58
  # Break out the key fields and convert to pandas for filtering
59
  docs = [{**x.meta,"score":x.score,"content":x.content} for x in docs]
 
62
  df_docs = df_docs.query('country in @country')
63
  # Take the top 10
64
  df_docs = df_docs.head(10)
65
+ # Get ourselves an index setup from which to base the source reference number from (in the prompt and matching afterwards)
66
  df_docs = df_docs.reset_index()
67
+ df_docs['ref_id'] = df_docs.index + 1 # start the index at 1
68
  # Convert back to Document format
69
  ls_dict = []
70
  # Iterate over df and add relevant fields to the dict object
 
74
  row['content'],
75
  meta={'country': row['country'],'document_name': row['document'], 'ref_id': row['ref_id'], 'score': row['score']}
76
  )
77
+
78
  # Append the Document object to the documents list
79
  ls_dict.append(doc)
80
  return(ls_dict)