Update app.py
Browse files
app.py
CHANGED
@@ -45,14 +45,15 @@ examples = [
|
|
45 |
]
|
46 |
|
47 |
def get_docs(input_query, country = None):
|
48 |
-
|
|
|
|
|
|
|
49 |
if country:
|
50 |
query = "For the country of "+country+", "+input_query
|
51 |
else:
|
52 |
query = input_query
|
53 |
-
|
54 |
-
# Get top 150 because we want to make sure we have 10 pertaining to the selected country
|
55 |
-
# TEMP SOLUTION: not ideal, but FAISS document store doesnt allow metadata filtering. Needs to be tested with the full dataset
|
56 |
docs = retriever.retrieve(query=query,top_k = 150)
|
57 |
# Break out the key fields and convert to pandas for filtering
|
58 |
docs = [{**x.meta,"score":x.score,"content":x.content} for x in docs]
|
@@ -61,8 +62,9 @@ def get_docs(input_query, country = None):
|
|
61 |
df_docs = df_docs.query('country in @country')
|
62 |
# Take the top 10
|
63 |
df_docs = df_docs.head(10)
|
|
|
64 |
df_docs = df_docs.reset_index()
|
65 |
-
df_docs['ref_id'] = df_docs.index + 1
|
66 |
# Convert back to Document format
|
67 |
ls_dict = []
|
68 |
# Iterate over df and add relevant fields to the dict object
|
@@ -72,7 +74,7 @@ def get_docs(input_query, country = None):
|
|
72 |
row['content'],
|
73 |
meta={'country': row['country'],'document_name': row['document'], 'ref_id': row['ref_id'], 'score': row['score']}
|
74 |
)
|
75 |
-
|
76 |
# Append the Document object to the documents list
|
77 |
ls_dict.append(doc)
|
78 |
return(ls_dict)
|
|
|
45 |
]
|
46 |
|
47 |
def get_docs(input_query, country = None):
|
48 |
+
'''
|
49 |
+
Construct a hacky query to focus the retriever on the target country (see notes below)
|
50 |
+
We take the top 150 k because we want to make sure we have 10 pertaining to the selected country
|
51 |
+
'''
|
52 |
if country:
|
53 |
query = "For the country of "+country+", "+input_query
|
54 |
else:
|
55 |
query = input_query
|
56 |
+
# Retrieve top k documents
|
|
|
|
|
57 |
docs = retriever.retrieve(query=query,top_k = 150)
|
58 |
# Break out the key fields and convert to pandas for filtering
|
59 |
docs = [{**x.meta,"score":x.score,"content":x.content} for x in docs]
|
|
|
62 |
df_docs = df_docs.query('country in @country')
|
63 |
# Take the top 10
|
64 |
df_docs = df_docs.head(10)
|
65 |
+
# Get ourselves an index setup from which to base the source reference number from (in the prompt and matching afterwards)
|
66 |
df_docs = df_docs.reset_index()
|
67 |
+
df_docs['ref_id'] = df_docs.index + 1 # start the index at 1
|
68 |
# Convert back to Document format
|
69 |
ls_dict = []
|
70 |
# Iterate over df and add relevant fields to the dict object
|
|
|
74 |
row['content'],
|
75 |
meta={'country': row['country'],'document_name': row['document'], 'ref_id': row['ref_id'], 'score': row['score']}
|
76 |
)
|
77 |
+
|
78 |
# Append the Document object to the documents list
|
79 |
ls_dict.append(doc)
|
80 |
return(ls_dict)
|