sebastianalgharaballi
commited on
Commit
•
d148b11
1
Parent(s):
3385eb9
working with basic boolean query
Browse files- test_two_phase_search.py +130 -64
test_two_phase_search.py
CHANGED
@@ -1,74 +1,140 @@
|
|
1 |
-
from
|
2 |
-
from opensearch_client import OpenSearchClient
|
3 |
from chroma_storage import ChromaMatchingSystem
|
4 |
-
from
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
self.opensearch_client = opensearch_client
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
n_results=n_results,
|
31 |
-
where_conditions={"jobseeker_id": {"$in": [r['jobseeker_id'] for r in opensearch_results]}}
|
32 |
-
)
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
if opensearch_result:
|
43 |
-
final_results.append({
|
44 |
-
'jobseeker_id': match.jobseeker_id,
|
45 |
-
'similarity_score': match.similarity_score,
|
46 |
-
'field_scores': match.field_scores,
|
47 |
-
'explanation': match.explanation,
|
48 |
-
'opensearch_score': opensearch_result.get('_score', 0.0)
|
49 |
-
})
|
50 |
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
73 |
|
74 |
-
|
|
|
|
1 |
+
from test_embeddings import create_test_data, transform_jobseeker_to_opensearch, IndependentJobSeekerAssessmentRDS
|
|
|
2 |
from chroma_storage import ChromaMatchingSystem
|
3 |
+
from opensearch_client import OpenSearchClient
|
4 |
+
from two_phase_search import TwoPhaseSearchSystem
|
5 |
+
import json
|
6 |
|
7 |
+
def test_single_jobseeker(opensearch_client):
|
8 |
+
"""Test OpenSearch with one jobseeker"""
|
9 |
+
print("\nTesting OpenSearch with a single jobseeker...")
|
|
|
10 |
|
11 |
+
# Create one test jobseeker
|
12 |
+
test_jobseeker = IndependentJobSeekerAssessmentRDS(
|
13 |
+
primary_skills=["Python", "AWS"],
|
14 |
+
secondary_skills=["Docker", "CI/CD"],
|
15 |
+
experiences=[{
|
16 |
+
"title": "Software Engineer",
|
17 |
+
"company": "Tech Co.",
|
18 |
+
"duration": "3 years",
|
19 |
+
"description": "Developed scalable software systems and optimized cloud infrastructure."
|
20 |
+
}],
|
21 |
+
educations=[{
|
22 |
+
"degree": "Bachelor's",
|
23 |
+
"field": "Computer Science",
|
24 |
+
"institution": "Tech University"
|
25 |
+
}],
|
26 |
+
certifications=[{
|
27 |
+
"name": "AWS Solutions Architect",
|
28 |
+
"organization": "AWS",
|
29 |
+
"start_date": "2022-01",
|
30 |
+
"end_date": "2025-01"
|
31 |
+
}]
|
32 |
+
)
|
33 |
|
34 |
+
# Transform and index into OpenSearch
|
35 |
+
jobseeker_id = "test_seeker_1"
|
36 |
+
payload = transform_jobseeker_to_opensearch(test_jobseeker, jobseeker_id)
|
37 |
+
response = opensearch_client.index_jobseeker(payload)
|
38 |
|
39 |
+
if response:
|
40 |
+
print(f"Successfully indexed jobseeker {jobseeker_id}: {response}")
|
41 |
+
else:
|
42 |
+
print(f"Failed to index jobseeker {jobseeker_id}")
|
|
|
|
|
|
|
43 |
|
44 |
+
def test_two_phase_search():
|
45 |
+
"""Test the complete two-phase search pipeline"""
|
46 |
+
print("\nTesting two-phase search system...")
|
47 |
+
|
48 |
+
# Initialize components
|
49 |
+
chroma_matcher = ChromaMatchingSystem()
|
50 |
+
opensearch_client = OpenSearchClient()
|
51 |
+
search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
# Check if OpenSearch database has any data
|
54 |
+
print("\nChecking if OpenSearch database has any data...")
|
55 |
+
test_payload = {
|
56 |
+
"boolean_search_query": "Software AND (Python OR AWS)", # Simple boolean query
|
57 |
+
"skills": [], # No skill requirements
|
58 |
+
"size": 100, # Retrieve up to 100 entries
|
59 |
+
"sort_by": ["score"]
|
60 |
+
}
|
61 |
+
opensearch_results = opensearch_client.search_jobseekers(test_payload)
|
62 |
+
if opensearch_results:
|
63 |
+
print(f"OpenSearch returned {len(opensearch_results)} entries.")
|
64 |
+
else:
|
65 |
+
print("OpenSearch database is empty or the query returned no results.")
|
66 |
|
67 |
+
# Get test data
|
68 |
+
(job_posting, matching_seeker, matching_info,
|
69 |
+
partial_matching_seeker, partial_matching_info,
|
70 |
+
non_matching_seeker, non_matching_info) = create_test_data()
|
71 |
+
|
72 |
+
print("\nAdding test seekers to ChromaDB...")
|
73 |
+
# Add test seekers to ChromaDB
|
74 |
+
test_seekers = [
|
75 |
+
("seeker_1", matching_seeker, matching_info,
|
76 |
+
{"experience_level": "senior"}),
|
77 |
+
("seeker_2", partial_matching_seeker, partial_matching_info,
|
78 |
+
{"experience_level": "mid"}),
|
79 |
+
("seeker_3", non_matching_seeker, non_matching_info,
|
80 |
+
{"experience_level": "mid"})
|
81 |
+
]
|
82 |
+
|
83 |
+
for seeker_id, processed, unprocessed, metadata in test_seekers:
|
84 |
+
print(f"Adding job seeker: {seeker_id}")
|
85 |
+
chroma_matcher.add_job_seeker(seeker_id, processed, unprocessed, metadata)
|
86 |
+
|
87 |
+
# Build search parameters using updated method
|
88 |
+
search_params = search_system.build_search_params(
|
89 |
+
job_posting=job_posting,
|
90 |
+
location={
|
91 |
+
"state": "CA",
|
92 |
+
"country": "US"
|
93 |
+
},
|
94 |
+
certifications={
|
95 |
+
"name": "AWS Solutions Architect",
|
96 |
+
"organization": "AWS"
|
97 |
+
},
|
98 |
+
tags=[
|
99 |
+
{
|
100 |
+
"tag_key": "seniority",
|
101 |
+
"tag_value": "senior"
|
102 |
+
}
|
103 |
+
]
|
104 |
+
)
|
105 |
+
|
106 |
+
print("\nConstructed Search Parameters:")
|
107 |
+
print("=" * 60)
|
108 |
+
print(json.dumps(search_params, indent=2))
|
109 |
+
|
110 |
+
# Perform search
|
111 |
+
print("\nPerforming search...")
|
112 |
+
results = search_system.search_candidates(
|
113 |
+
job_posting=job_posting,
|
114 |
+
search_params=search_params,
|
115 |
+
n_results=10
|
116 |
+
)
|
117 |
+
|
118 |
+
# Print results
|
119 |
+
print("\nSearch Results:")
|
120 |
+
print("=" * 60)
|
121 |
+
|
122 |
+
if not results:
|
123 |
+
print("No matches found")
|
124 |
+
print("\nDebug: Checking OpenSearch response...")
|
125 |
+
opensearch_results = opensearch_client.search_jobseekers(search_params)
|
126 |
+
print(f"OpenSearch returned {len(opensearch_results)} results")
|
127 |
+
return
|
128 |
|
129 |
+
for i, result in enumerate(results, 1):
|
130 |
+
print(f"\nMatch {i}:")
|
131 |
+
print(f"Jobseeker ID: {result['jobseeker_id']}")
|
132 |
+
print(f"ChromaDB Score: {result['similarity_score']:.3f}")
|
133 |
+
print(f"OpenSearch Score: {result['opensearch_score']:.3f}")
|
134 |
+
print("\nField Scores:")
|
135 |
+
for field, score in result['field_scores'].items():
|
136 |
+
print(f" {field}: {score:.3f}")
|
137 |
+
print(f"\nExplanation: {result['explanation']}")
|
138 |
|
139 |
+
if __name__ == "__main__":
|
140 |
+
test_two_phase_search()
|