File size: 5,067 Bytes
d148b11 3385eb9 d148b11 3385eb9 d148b11 3385eb9 d148b11 3385eb9 d148b11 1cdaa9c d148b11 3385eb9 d148b11 3385eb9 d148b11 3385eb9 d148b11 1cdaa9c d148b11 1cdaa9c d148b11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
from test_embeddings import create_test_data, transform_jobseeker_to_opensearch, IndependentJobSeekerAssessmentRDS
from chroma_storage import ChromaMatchingSystem
from opensearch_client import OpenSearchClient
from two_phase_search import TwoPhaseSearchSystem
import json
def test_single_jobseeker(opensearch_client):
"""Test OpenSearch with one jobseeker"""
print("\nTesting OpenSearch with a single jobseeker...")
# Create one test jobseeker
test_jobseeker = IndependentJobSeekerAssessmentRDS(
primary_skills=["Python", "AWS"],
secondary_skills=["Docker", "CI/CD"],
experiences=[{
"title": "Software Engineer",
"company": "Tech Co.",
"duration": "3 years",
"description": "Developed scalable software systems and optimized cloud infrastructure."
}],
educations=[{
"degree": "Bachelor's",
"field": "Computer Science",
"institution": "Tech University"
}],
certifications=[{
"name": "AWS Solutions Architect",
"organization": "AWS",
"start_date": "2022-01",
"end_date": "2025-01"
}]
)
# Transform and index into OpenSearch
jobseeker_id = "test_seeker_1"
payload = transform_jobseeker_to_opensearch(test_jobseeker, jobseeker_id)
response = opensearch_client.index_jobseeker(payload)
if response:
print(f"Successfully indexed jobseeker {jobseeker_id}: {response}")
else:
print(f"Failed to index jobseeker {jobseeker_id}")
def test_two_phase_search():
"""Test the complete two-phase search pipeline"""
print("\nTesting two-phase search system...")
# Initialize components
chroma_matcher = ChromaMatchingSystem()
opensearch_client = OpenSearchClient()
search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client)
# Check if OpenSearch database has any data
print("\nChecking if OpenSearch database has any data...")
test_payload = {
"boolean_search_query": "Software AND (Python OR AWS)", # Simple boolean query
"skills": [], # No skill requirements
"size": 100, # Retrieve up to 100 entries
"sort_by": ["score"]
}
opensearch_results = opensearch_client.search_jobseekers(test_payload)
if opensearch_results:
print(f"OpenSearch returned {len(opensearch_results)} entries.")
else:
print("OpenSearch database is empty or the query returned no results.")
# Get test data
(job_posting, matching_seeker, matching_info,
partial_matching_seeker, partial_matching_info,
non_matching_seeker, non_matching_info) = create_test_data()
print("\nAdding test seekers to ChromaDB...")
# Add test seekers to ChromaDB
test_seekers = [
("seeker_1", matching_seeker, matching_info,
{"experience_level": "senior"}),
("seeker_2", partial_matching_seeker, partial_matching_info,
{"experience_level": "mid"}),
("seeker_3", non_matching_seeker, non_matching_info,
{"experience_level": "mid"})
]
for seeker_id, processed, unprocessed, metadata in test_seekers:
print(f"Adding job seeker: {seeker_id}")
chroma_matcher.add_job_seeker(seeker_id, processed, unprocessed, metadata)
# Build search parameters using updated method
search_params = search_system.build_search_params(
job_posting=job_posting,
location={
"state": "CA",
"country": "US"
},
certifications={
"name": "AWS Solutions Architect",
"organization": "AWS"
},
tags=[
{
"tag_key": "seniority",
"tag_value": "senior"
}
]
)
print("\nConstructed Search Parameters:")
print("=" * 60)
print(json.dumps(search_params, indent=2))
# Perform search
print("\nPerforming search...")
results = search_system.search_candidates(
job_posting=job_posting,
search_params=search_params,
n_results=10
)
# Print results
print("\nSearch Results:")
print("=" * 60)
if not results:
print("No matches found")
print("\nDebug: Checking OpenSearch response...")
opensearch_results = opensearch_client.search_jobseekers(search_params)
print(f"OpenSearch returned {len(opensearch_results)} results")
return
for i, result in enumerate(results, 1):
print(f"\nMatch {i}:")
print(f"Jobseeker ID: {result['jobseeker_id']}")
print(f"ChromaDB Score: {result['similarity_score']:.3f}")
print(f"OpenSearch Score: {result['opensearch_score']:.3f}")
print("\nField Scores:")
for field, score in result['field_scores'].items():
print(f" {field}: {score:.3f}")
print(f"\nExplanation: {result['explanation']}")
if __name__ == "__main__":
test_two_phase_search() |