Spaces:
Build error
Build error
Upload 17 files
#17
by
awinml
- opened
- utils/entity_extraction.py +36 -1
- utils/retriever.py +13 -6
utils/entity_extraction.py
CHANGED
@@ -21,6 +21,41 @@ def expand_list_of_lists(list_of_lists):
|
|
21 |
return expanded_list
|
22 |
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def all_keywords_combs(texts):
|
25 |
|
26 |
texts = [text.split(" ") for text in texts]
|
@@ -47,7 +82,7 @@ def extract_keywords(query_text, model):
|
|
47 |
prompt = f"###Instruction:Extract the important keywords which describe the context accurately.\n\nInput:{query_text}\n\n###Response:"
|
48 |
response = model.predict(prompt)
|
49 |
keywords = response.split(", ")
|
50 |
-
keywords =
|
51 |
return keywords
|
52 |
|
53 |
|
|
|
21 |
return expanded_list
|
22 |
|
23 |
|
24 |
+
def keywords_no_companies(texts):
|
25 |
+
# Company list (to remove companies from extracted entities)
|
26 |
+
|
27 |
+
company_list = [
|
28 |
+
"apple",
|
29 |
+
"amd",
|
30 |
+
"amazon",
|
31 |
+
"cisco",
|
32 |
+
"google",
|
33 |
+
"microsoft",
|
34 |
+
"nvidia",
|
35 |
+
"asml",
|
36 |
+
"intel",
|
37 |
+
"micron",
|
38 |
+
"aapl",
|
39 |
+
"csco",
|
40 |
+
"msft",
|
41 |
+
"asml",
|
42 |
+
"nvda",
|
43 |
+
"googl",
|
44 |
+
"mu",
|
45 |
+
"intc",
|
46 |
+
"amzn",
|
47 |
+
"amd",
|
48 |
+
]
|
49 |
+
|
50 |
+
texts = [text.split(" ") for text in texts]
|
51 |
+
texts = expand_list_of_lists(texts)
|
52 |
+
|
53 |
+
# Convert all strings to lowercase.
|
54 |
+
lower_texts = [text.lower() for text in texts]
|
55 |
+
keywords = [text for text in lower_texts if text not in company_list]
|
56 |
+
return keywords
|
57 |
+
|
58 |
+
|
59 |
def all_keywords_combs(texts):
|
60 |
|
61 |
texts = [text.split(" ") for text in texts]
|
|
|
82 |
prompt = f"###Instruction:Extract the important keywords which describe the context accurately.\n\nInput:{query_text}\n\n###Response:"
|
83 |
response = model.predict(prompt)
|
84 |
keywords = response.split(", ")
|
85 |
+
keywords = keywords_no_companies(keywords)
|
86 |
return keywords
|
87 |
|
88 |
|
utils/retriever.py
CHANGED
@@ -15,6 +15,9 @@ def query_pinecone_sparse(
|
|
15 |
else:
|
16 |
participant = "Question"
|
17 |
|
|
|
|
|
|
|
18 |
if year == "All":
|
19 |
if quarter == "All":
|
20 |
xc = index.query(
|
@@ -34,7 +37,7 @@ def query_pinecone_sparse(
|
|
34 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
35 |
"Ticker": {"$eq": ticker},
|
36 |
"QA_Flag": {"$eq": participant},
|
37 |
-
|
38 |
},
|
39 |
include_metadata=True,
|
40 |
)
|
@@ -56,7 +59,7 @@ def query_pinecone_sparse(
|
|
56 |
"Quarter": {"$eq": quarter},
|
57 |
"Ticker": {"$eq": ticker},
|
58 |
"QA_Flag": {"$eq": participant},
|
59 |
-
|
60 |
},
|
61 |
include_metadata=True,
|
62 |
)
|
@@ -71,7 +74,7 @@ def query_pinecone_sparse(
|
|
71 |
"Quarter": {"$eq": quarter},
|
72 |
"Ticker": {"$eq": ticker},
|
73 |
"QA_Flag": {"$eq": participant},
|
74 |
-
|
75 |
},
|
76 |
include_metadata=True,
|
77 |
)
|
@@ -100,6 +103,10 @@ def query_pinecone(
|
|
100 |
else:
|
101 |
participant = "Question"
|
102 |
|
|
|
|
|
|
|
|
|
103 |
if year == "All":
|
104 |
if quarter == "All":
|
105 |
xc = index.query(
|
@@ -118,7 +125,7 @@ def query_pinecone(
|
|
118 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
119 |
"Ticker": {"$eq": ticker},
|
120 |
"QA_Flag": {"$eq": participant},
|
121 |
-
|
122 |
},
|
123 |
include_metadata=True,
|
124 |
)
|
@@ -139,7 +146,7 @@ def query_pinecone(
|
|
139 |
"Quarter": {"$eq": quarter},
|
140 |
"Ticker": {"$eq": ticker},
|
141 |
"QA_Flag": {"$eq": participant},
|
142 |
-
|
143 |
},
|
144 |
include_metadata=True,
|
145 |
)
|
@@ -153,7 +160,7 @@ def query_pinecone(
|
|
153 |
"Quarter": {"$eq": quarter},
|
154 |
"Ticker": {"$eq": ticker},
|
155 |
"QA_Flag": {"$eq": participant},
|
156 |
-
|
157 |
},
|
158 |
include_metadata=True,
|
159 |
)
|
|
|
15 |
else:
|
16 |
participant = "Question"
|
17 |
|
18 |
+
# Create filter dictionary based on keywords
|
19 |
+
filter_dict = [{'Keywords': word} for word in keywords]
|
20 |
+
|
21 |
if year == "All":
|
22 |
if quarter == "All":
|
23 |
xc = index.query(
|
|
|
37 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
38 |
"Ticker": {"$eq": ticker},
|
39 |
"QA_Flag": {"$eq": participant},
|
40 |
+
'$and': filter_dict
|
41 |
},
|
42 |
include_metadata=True,
|
43 |
)
|
|
|
59 |
"Quarter": {"$eq": quarter},
|
60 |
"Ticker": {"$eq": ticker},
|
61 |
"QA_Flag": {"$eq": participant},
|
62 |
+
'$and': filter_dict
|
63 |
},
|
64 |
include_metadata=True,
|
65 |
)
|
|
|
74 |
"Quarter": {"$eq": quarter},
|
75 |
"Ticker": {"$eq": ticker},
|
76 |
"QA_Flag": {"$eq": participant},
|
77 |
+
'$and': filter_dict
|
78 |
},
|
79 |
include_metadata=True,
|
80 |
)
|
|
|
103 |
else:
|
104 |
participant = "Question"
|
105 |
|
106 |
+
# Create filter dictionary based on keywords
|
107 |
+
filter_dict = [{'Keywords': word} for word in keywords]
|
108 |
+
|
109 |
+
|
110 |
if year == "All":
|
111 |
if quarter == "All":
|
112 |
xc = index.query(
|
|
|
125 |
"Quarter": {"$in": ["Q1", "Q2", "Q3", "Q4"]},
|
126 |
"Ticker": {"$eq": ticker},
|
127 |
"QA_Flag": {"$eq": participant},
|
128 |
+
'$and': filter_dict
|
129 |
},
|
130 |
include_metadata=True,
|
131 |
)
|
|
|
146 |
"Quarter": {"$eq": quarter},
|
147 |
"Ticker": {"$eq": ticker},
|
148 |
"QA_Flag": {"$eq": participant},
|
149 |
+
'$and': filter_dict
|
150 |
},
|
151 |
include_metadata=True,
|
152 |
)
|
|
|
160 |
"Quarter": {"$eq": quarter},
|
161 |
"Ticker": {"$eq": ticker},
|
162 |
"QA_Flag": {"$eq": participant},
|
163 |
+
'$and': filter_dict
|
164 |
},
|
165 |
include_metadata=True,
|
166 |
)
|