from test_embeddings import create_test_data, transform_jobseeker_to_opensearch, IndependentJobSeekerAssessmentRDS from chroma_storage import ChromaMatchingSystem from opensearch_client import OpenSearchClient from two_phase_search import TwoPhaseSearchSystem import json def test_single_jobseeker(opensearch_client): """Test OpenSearch with one jobseeker""" print("\nTesting OpenSearch with a single jobseeker...") # Create one test jobseeker test_jobseeker = IndependentJobSeekerAssessmentRDS( primary_skills=["Python", "AWS"], secondary_skills=["Docker", "CI/CD"], experiences=[{ "title": "Software Engineer", "company": "Tech Co.", "duration": "3 years", "description": "Developed scalable software systems and optimized cloud infrastructure." }], educations=[{ "degree": "Bachelor's", "field": "Computer Science", "institution": "Tech University" }], certifications=[{ "name": "AWS Solutions Architect", "organization": "AWS", "start_date": "2022-01", "end_date": "2025-01" }] ) # Transform and index into OpenSearch jobseeker_id = "test_seeker_1" payload = transform_jobseeker_to_opensearch(test_jobseeker, jobseeker_id) response = opensearch_client.index_jobseeker(payload) if response: print(f"Successfully indexed jobseeker {jobseeker_id}: {response}") else: print(f"Failed to index jobseeker {jobseeker_id}") def test_two_phase_search(): """Test the complete two-phase search pipeline""" print("\nTesting two-phase search system...") # Initialize components chroma_matcher = ChromaMatchingSystem() opensearch_client = OpenSearchClient() search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client) # Check if OpenSearch database has any data print("\nChecking if OpenSearch database has any data...") test_payload = { "boolean_search_query": "Software AND (Python OR AWS)", # Simple boolean query "skills": [], # No skill requirements "size": 100, # Retrieve up to 100 entries "sort_by": ["score"] } opensearch_results = opensearch_client.search_jobseekers(test_payload) if opensearch_results: print(f"OpenSearch returned {len(opensearch_results)} entries.") else: print("OpenSearch database is empty or the query returned no results.") # Get test data (job_posting, matching_seeker, matching_info, partial_matching_seeker, partial_matching_info, non_matching_seeker, non_matching_info) = create_test_data() print("\nAdding test seekers to ChromaDB...") # Add test seekers to ChromaDB test_seekers = [ ("seeker_1", matching_seeker, matching_info, {"experience_level": "senior"}), ("seeker_2", partial_matching_seeker, partial_matching_info, {"experience_level": "mid"}), ("seeker_3", non_matching_seeker, non_matching_info, {"experience_level": "mid"}) ] for seeker_id, processed, unprocessed, metadata in test_seekers: print(f"Adding job seeker: {seeker_id}") chroma_matcher.add_job_seeker(seeker_id, processed, unprocessed, metadata) # Build search parameters using updated method search_params = search_system.build_search_params( job_posting=job_posting, location={ "state": "CA", "country": "US" }, certifications={ "name": "AWS Solutions Architect", "organization": "AWS" }, tags=[ { "tag_key": "seniority", "tag_value": "senior" } ] ) print("\nConstructed Search Parameters:") print("=" * 60) print(json.dumps(search_params, indent=2)) # Perform search print("\nPerforming search...") results = search_system.search_candidates( job_posting=job_posting, search_params=search_params, n_results=10 ) # Print results print("\nSearch Results:") print("=" * 60) if not results: print("No matches found") print("\nDebug: Checking OpenSearch response...") opensearch_results = opensearch_client.search_jobseekers(search_params) print(f"OpenSearch returned {len(opensearch_results)} results") return for i, result in enumerate(results, 1): print(f"\nMatch {i}:") print(f"Jobseeker ID: {result['jobseeker_id']}") print(f"ChromaDB Score: {result['similarity_score']:.3f}") print(f"OpenSearch Score: {result['opensearch_score']:.3f}") print("\nField Scores:") for field, score in result['field_scores'].items(): print(f" {field}: {score:.3f}") print(f"\nExplanation: {result['explanation']}") if __name__ == "__main__": test_two_phase_search()