Spaces:
Running
Running
Update custom_utils.py
Browse files- custom_utils.py +39 -18
custom_utils.py
CHANGED
@@ -25,7 +25,13 @@ def rag_ingestion(collection):
|
|
25 |
collection.insert_many(dataset)
|
26 |
return "Manually create a vector search index (in free tier, this feature is not available via SDK)"
|
27 |
|
28 |
-
def rag_retrieval(openai_api_key,
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
###
|
30 |
### Pre-retrieval processing: index filter
|
31 |
### Post-retrieval processing: result filter
|
@@ -108,7 +114,15 @@ def rag_retrieval(openai_api_key, prompt, db, collection, vector_index="vector_i
|
|
108 |
###
|
109 |
###
|
110 |
|
111 |
-
get_knowledge = vector_search(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
if not get_knowledge:
|
114 |
return "No results found.", "No source information available."
|
@@ -119,7 +133,9 @@ def rag_retrieval(openai_api_key, prompt, db, collection, vector_index="vector_i
|
|
119 |
|
120 |
return get_knowledge
|
121 |
|
122 |
-
def rag_inference(openai_api_key,
|
|
|
|
|
123 |
openai.api_key = openai_api_key
|
124 |
|
125 |
content = f"Answer this user question: {prompt} with the following context:\n{search_results}"
|
@@ -139,39 +155,44 @@ def rag_inference(openai_api_key, prompt, search_results):
|
|
139 |
|
140 |
return completion.choices[0].message.content
|
141 |
|
142 |
-
def vector_search(openai_api_key,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
query_embedding = get_text_embedding(openai_api_key, user_query)
|
144 |
|
145 |
if query_embedding is None:
|
146 |
return "Invalid query or embedding generation failed."
|
147 |
|
148 |
-
vector_search_stage = {
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
}
|
157 |
|
158 |
-
""" filter
|
159 |
vector_search_stage = {
|
160 |
"$vectorSearch": {
|
161 |
"index": vector_index,
|
162 |
"queryVector": query_embedding,
|
163 |
"path": "description_embedding",
|
164 |
"numCandidates": 150,
|
165 |
-
"limit":
|
166 |
"filter": {
|
167 |
"$and": [
|
168 |
-
{"accommodates": {"$eq":
|
169 |
-
{"bedrooms": {"$eq":
|
170 |
]
|
171 |
},
|
172 |
}
|
173 |
}
|
174 |
-
"""
|
175 |
|
176 |
remove_embedding_stage = {
|
177 |
"$unset": "description_embedding"
|
|
|
25 |
collection.insert_many(dataset)
|
26 |
return "Manually create a vector search index (in free tier, this feature is not available via SDK)"
|
27 |
|
28 |
+
def rag_retrieval(openai_api_key,
|
29 |
+
prompt,
|
30 |
+
accomodates,
|
31 |
+
bedrooms,
|
32 |
+
db,
|
33 |
+
collection,
|
34 |
+
vector_index="vector_index"):
|
35 |
###
|
36 |
### Pre-retrieval processing: index filter
|
37 |
### Post-retrieval processing: result filter
|
|
|
114 |
###
|
115 |
###
|
116 |
|
117 |
+
get_knowledge = vector_search(
|
118 |
+
openai_api_key,
|
119 |
+
prompt,
|
120 |
+
accomodates,
|
121 |
+
bedrooms,
|
122 |
+
db,
|
123 |
+
collection,
|
124 |
+
additional_stages,
|
125 |
+
vector_index)
|
126 |
|
127 |
if not get_knowledge:
|
128 |
return "No results found.", "No source information available."
|
|
|
133 |
|
134 |
return get_knowledge
|
135 |
|
136 |
+
def rag_inference(openai_api_key,
|
137 |
+
prompt,
|
138 |
+
search_results):
|
139 |
openai.api_key = openai_api_key
|
140 |
|
141 |
content = f"Answer this user question: {prompt} with the following context:\n{search_results}"
|
|
|
155 |
|
156 |
return completion.choices[0].message.content
|
157 |
|
158 |
+
def vector_search(openai_api_key,
|
159 |
+
user_query,
|
160 |
+
accommodates,
|
161 |
+
bedrooms,
|
162 |
+
db,
|
163 |
+
collection,
|
164 |
+
additional_stages=[],
|
165 |
+
vector_index="vector_index"):
|
166 |
query_embedding = get_text_embedding(openai_api_key, user_query)
|
167 |
|
168 |
if query_embedding is None:
|
169 |
return "Invalid query or embedding generation failed."
|
170 |
|
171 |
+
#vector_search_stage = {
|
172 |
+
# "$vectorSearch": {
|
173 |
+
# "index": vector_index,
|
174 |
+
# "queryVector": query_embedding,
|
175 |
+
# "path": "description_embedding",
|
176 |
+
# "numCandidates": 150,
|
177 |
+
# "limit": 3,
|
178 |
+
# }
|
179 |
+
#}
|
180 |
|
|
|
181 |
vector_search_stage = {
|
182 |
"$vectorSearch": {
|
183 |
"index": vector_index,
|
184 |
"queryVector": query_embedding,
|
185 |
"path": "description_embedding",
|
186 |
"numCandidates": 150,
|
187 |
+
"limit": 10,
|
188 |
"filter": {
|
189 |
"$and": [
|
190 |
+
{"accommodates": {"$eq": accommodates}},
|
191 |
+
{"bedrooms": {"$eq": bedrooms}}
|
192 |
]
|
193 |
},
|
194 |
}
|
195 |
}
|
|
|
196 |
|
197 |
remove_embedding_stage = {
|
198 |
"$unset": "description_embedding"
|