embeddings-sebastian / test_two_phase_search.py
sebastianalgharaballi's picture
working with basic boolean query
d148b11 verified
raw
history blame
5.07 kB
from test_embeddings import create_test_data, transform_jobseeker_to_opensearch, IndependentJobSeekerAssessmentRDS
from chroma_storage import ChromaMatchingSystem
from opensearch_client import OpenSearchClient
from two_phase_search import TwoPhaseSearchSystem
import json
def test_single_jobseeker(opensearch_client):
"""Test OpenSearch with one jobseeker"""
print("\nTesting OpenSearch with a single jobseeker...")
# Create one test jobseeker
test_jobseeker = IndependentJobSeekerAssessmentRDS(
primary_skills=["Python", "AWS"],
secondary_skills=["Docker", "CI/CD"],
experiences=[{
"title": "Software Engineer",
"company": "Tech Co.",
"duration": "3 years",
"description": "Developed scalable software systems and optimized cloud infrastructure."
}],
educations=[{
"degree": "Bachelor's",
"field": "Computer Science",
"institution": "Tech University"
}],
certifications=[{
"name": "AWS Solutions Architect",
"organization": "AWS",
"start_date": "2022-01",
"end_date": "2025-01"
}]
)
# Transform and index into OpenSearch
jobseeker_id = "test_seeker_1"
payload = transform_jobseeker_to_opensearch(test_jobseeker, jobseeker_id)
response = opensearch_client.index_jobseeker(payload)
if response:
print(f"Successfully indexed jobseeker {jobseeker_id}: {response}")
else:
print(f"Failed to index jobseeker {jobseeker_id}")
def test_two_phase_search():
"""Test the complete two-phase search pipeline"""
print("\nTesting two-phase search system...")
# Initialize components
chroma_matcher = ChromaMatchingSystem()
opensearch_client = OpenSearchClient()
search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client)
# Check if OpenSearch database has any data
print("\nChecking if OpenSearch database has any data...")
test_payload = {
"boolean_search_query": "Software AND (Python OR AWS)", # Simple boolean query
"skills": [], # No skill requirements
"size": 100, # Retrieve up to 100 entries
"sort_by": ["score"]
}
opensearch_results = opensearch_client.search_jobseekers(test_payload)
if opensearch_results:
print(f"OpenSearch returned {len(opensearch_results)} entries.")
else:
print("OpenSearch database is empty or the query returned no results.")
# Get test data
(job_posting, matching_seeker, matching_info,
partial_matching_seeker, partial_matching_info,
non_matching_seeker, non_matching_info) = create_test_data()
print("\nAdding test seekers to ChromaDB...")
# Add test seekers to ChromaDB
test_seekers = [
("seeker_1", matching_seeker, matching_info,
{"experience_level": "senior"}),
("seeker_2", partial_matching_seeker, partial_matching_info,
{"experience_level": "mid"}),
("seeker_3", non_matching_seeker, non_matching_info,
{"experience_level": "mid"})
]
for seeker_id, processed, unprocessed, metadata in test_seekers:
print(f"Adding job seeker: {seeker_id}")
chroma_matcher.add_job_seeker(seeker_id, processed, unprocessed, metadata)
# Build search parameters using updated method
search_params = search_system.build_search_params(
job_posting=job_posting,
location={
"state": "CA",
"country": "US"
},
certifications={
"name": "AWS Solutions Architect",
"organization": "AWS"
},
tags=[
{
"tag_key": "seniority",
"tag_value": "senior"
}
]
)
print("\nConstructed Search Parameters:")
print("=" * 60)
print(json.dumps(search_params, indent=2))
# Perform search
print("\nPerforming search...")
results = search_system.search_candidates(
job_posting=job_posting,
search_params=search_params,
n_results=10
)
# Print results
print("\nSearch Results:")
print("=" * 60)
if not results:
print("No matches found")
print("\nDebug: Checking OpenSearch response...")
opensearch_results = opensearch_client.search_jobseekers(search_params)
print(f"OpenSearch returned {len(opensearch_results)} results")
return
for i, result in enumerate(results, 1):
print(f"\nMatch {i}:")
print(f"Jobseeker ID: {result['jobseeker_id']}")
print(f"ChromaDB Score: {result['similarity_score']:.3f}")
print(f"OpenSearch Score: {result['opensearch_score']:.3f}")
print("\nField Scores:")
for field, score in result['field_scores'].items():
print(f" {field}: {score:.3f}")
print(f"\nExplanation: {result['explanation']}")
if __name__ == "__main__":
test_two_phase_search()