|
from test_embeddings import create_test_data, transform_jobseeker_to_opensearch, IndependentJobSeekerAssessmentRDS |
|
from chroma_storage import ChromaMatchingSystem |
|
from opensearch_client import OpenSearchClient |
|
from two_phase_search import TwoPhaseSearchSystem |
|
import json |
|
|
|
def test_single_jobseeker(opensearch_client): |
|
"""Test OpenSearch with one jobseeker""" |
|
print("\nTesting OpenSearch with a single jobseeker...") |
|
|
|
|
|
test_jobseeker = IndependentJobSeekerAssessmentRDS( |
|
primary_skills=["Python", "AWS"], |
|
secondary_skills=["Docker", "CI/CD"], |
|
experiences=[{ |
|
"title": "Software Engineer", |
|
"company": "Tech Co.", |
|
"duration": "3 years", |
|
"description": "Developed scalable software systems and optimized cloud infrastructure." |
|
}], |
|
educations=[{ |
|
"degree": "Bachelor's", |
|
"field": "Computer Science", |
|
"institution": "Tech University" |
|
}], |
|
certifications=[{ |
|
"name": "AWS Solutions Architect", |
|
"organization": "AWS", |
|
"start_date": "2022-01", |
|
"end_date": "2025-01" |
|
}] |
|
) |
|
|
|
|
|
jobseeker_id = "test_seeker_1" |
|
payload = transform_jobseeker_to_opensearch(test_jobseeker, jobseeker_id) |
|
response = opensearch_client.index_jobseeker(payload) |
|
|
|
if response: |
|
print(f"Successfully indexed jobseeker {jobseeker_id}: {response}") |
|
else: |
|
print(f"Failed to index jobseeker {jobseeker_id}") |
|
|
|
def test_two_phase_search(): |
|
"""Test the complete two-phase search pipeline""" |
|
print("\nTesting two-phase search system...") |
|
|
|
|
|
chroma_matcher = ChromaMatchingSystem() |
|
opensearch_client = OpenSearchClient() |
|
search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client) |
|
|
|
|
|
print("\nChecking if OpenSearch database has any data...") |
|
test_payload = { |
|
"boolean_search_query": "Software AND (Python OR AWS)", |
|
"skills": [], |
|
"size": 100, |
|
"sort_by": ["score"] |
|
} |
|
opensearch_results = opensearch_client.search_jobseekers(test_payload) |
|
if opensearch_results: |
|
print(f"OpenSearch returned {len(opensearch_results)} entries.") |
|
else: |
|
print("OpenSearch database is empty or the query returned no results.") |
|
|
|
|
|
(job_posting, matching_seeker, matching_info, |
|
partial_matching_seeker, partial_matching_info, |
|
non_matching_seeker, non_matching_info) = create_test_data() |
|
|
|
print("\nAdding test seekers to ChromaDB...") |
|
|
|
test_seekers = [ |
|
("seeker_1", matching_seeker, matching_info, |
|
{"experience_level": "senior"}), |
|
("seeker_2", partial_matching_seeker, partial_matching_info, |
|
{"experience_level": "mid"}), |
|
("seeker_3", non_matching_seeker, non_matching_info, |
|
{"experience_level": "mid"}) |
|
] |
|
|
|
for seeker_id, processed, unprocessed, metadata in test_seekers: |
|
print(f"Adding job seeker: {seeker_id}") |
|
chroma_matcher.add_job_seeker(seeker_id, processed, unprocessed, metadata) |
|
|
|
|
|
search_params = search_system.build_search_params( |
|
job_posting=job_posting, |
|
location={ |
|
"state": "CA", |
|
"country": "US" |
|
}, |
|
certifications={ |
|
"name": "AWS Solutions Architect", |
|
"organization": "AWS" |
|
}, |
|
tags=[ |
|
{ |
|
"tag_key": "seniority", |
|
"tag_value": "senior" |
|
} |
|
] |
|
) |
|
|
|
print("\nConstructed Search Parameters:") |
|
print("=" * 60) |
|
print(json.dumps(search_params, indent=2)) |
|
|
|
|
|
print("\nPerforming search...") |
|
results = search_system.search_candidates( |
|
job_posting=job_posting, |
|
search_params=search_params, |
|
n_results=10 |
|
) |
|
|
|
|
|
print("\nSearch Results:") |
|
print("=" * 60) |
|
|
|
if not results: |
|
print("No matches found") |
|
print("\nDebug: Checking OpenSearch response...") |
|
opensearch_results = opensearch_client.search_jobseekers(search_params) |
|
print(f"OpenSearch returned {len(opensearch_results)} results") |
|
return |
|
|
|
for i, result in enumerate(results, 1): |
|
print(f"\nMatch {i}:") |
|
print(f"Jobseeker ID: {result['jobseeker_id']}") |
|
print(f"ChromaDB Score: {result['similarity_score']:.3f}") |
|
print(f"OpenSearch Score: {result['opensearch_score']:.3f}") |
|
print("\nField Scores:") |
|
for field, score in result['field_scores'].items(): |
|
print(f" {field}: {score:.3f}") |
|
print(f"\nExplanation: {result['explanation']}") |
|
|
|
if __name__ == "__main__": |
|
test_two_phase_search() |