bstraehle commited on
Commit
fe97823
·
verified ·
1 Parent(s): 189fd24

Update custom_utils.py

Browse files
Files changed (1) hide show
  1. custom_utils.py +23 -25
custom_utils.py CHANGED
@@ -6,8 +6,8 @@ from document_model import Listing, SearchResultItem
6
  from pydantic import ValidationError
7
  from pymongo.collection import Collection
8
  from pymongo.errors import OperationFailure
9
- from pymongo.operations import SearchIndexModel
10
  from pymongo.mongo_client import MongoClient
 
11
 
12
  DB_NAME = "airbnb_dataset"
13
  COLLECTION_NAME = "listings_reviews"
@@ -28,6 +28,28 @@ def rag_ingestion(collection):
28
  collection.insert_many(listings)
29
  return "Manually create a vector search index (in free tier, this feature is not available via SDK)"
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def rag_retrieval(openai_api_key, prompt, db, collection, stages=[], vector_index="vector_index"):
32
  get_knowledge = vector_search(openai_api_key, prompt, db, collection, stages, vector_index)
33
 
@@ -67,30 +89,6 @@ def rag_inference(openai_api_key, prompt, search_results):
67
 
68
  return completion.choices[0].message.content
69
 
70
- def process_records(data_frame):
71
- records = data_frame.to_dict(orient="records")
72
-
73
- # Handle potential NaT values
74
- for record in records:
75
- print("###")
76
- print(record)
77
- #for key, value in record.items():
78
- # # List values
79
- # if isinstance(value, list):
80
- # processed_list = [None if pd.isnull(v) else v for v in value]
81
- # record[key] = processed_list
82
- # # Scalar values
83
- # else:
84
- # if pd.isnull(value):
85
- # record[key] = None
86
-
87
- try:
88
- # Convert each dictionary to a Listing instance
89
- return [Listing(**record).dict() for record in records]
90
- except ValidationError as e:
91
- print("Validation error:", e)
92
- return []
93
-
94
  def vector_search(openai_api_key, user_query, db, collection, additional_stages=[], vector_index="vector_index"):
95
  query_embedding = get_text_embedding(openai_api_key, user_query)
96
 
 
6
  from pydantic import ValidationError
7
  from pymongo.collection import Collection
8
  from pymongo.errors import OperationFailure
 
9
  from pymongo.mongo_client import MongoClient
10
+ from pymongo.operations import SearchIndexModel
11
 
12
  DB_NAME = "airbnb_dataset"
13
  COLLECTION_NAME = "listings_reviews"
 
28
  collection.insert_many(listings)
29
  return "Manually create a vector search index (in free tier, this feature is not available via SDK)"
30
 
31
+ def process_records(data_frame):
32
+ records = data_frame.to_dict(orient="records")
33
+
34
+ # Handle potential NaT values
35
+ for record in records:
36
+ for key, value in record.items():
37
+ # List values
38
+ if isinstance(value, list):
39
+ processed_list = [None if pd.isnull(v) else v for v in value]
40
+ record[key] = processed_list
41
+ # Scalar values
42
+ else:
43
+ if pd.isnull(value):
44
+ record[key] = None
45
+
46
+ try:
47
+ # Convert each dictionary to a Listing instance
48
+ return [Listing(**record).dict() for record in records]
49
+ except ValidationError as e:
50
+ print("Validation error:", e)
51
+ return []
52
+
53
  def rag_retrieval(openai_api_key, prompt, db, collection, stages=[], vector_index="vector_index"):
54
  get_knowledge = vector_search(openai_api_key, prompt, db, collection, stages, vector_index)
55
 
 
89
 
90
  return completion.choices[0].message.content
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def vector_search(openai_api_key, user_query, db, collection, additional_stages=[], vector_index="vector_index"):
93
  query_embedding = get_text_embedding(openai_api_key, user_query)
94