File size: 5,067 Bytes
d148b11
3385eb9
d148b11
 
 
3385eb9
d148b11
 
 
3385eb9
d148b11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3385eb9
d148b11
 
 
 
1cdaa9c
d148b11
 
 
 
3385eb9
d148b11
 
 
 
 
 
 
 
3385eb9
d148b11
 
 
 
 
 
 
 
 
 
 
 
 
3385eb9
d148b11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cdaa9c
d148b11
 
 
 
 
 
 
 
 
1cdaa9c
d148b11
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from test_embeddings import create_test_data, transform_jobseeker_to_opensearch, IndependentJobSeekerAssessmentRDS
from chroma_storage import ChromaMatchingSystem
from opensearch_client import OpenSearchClient
from two_phase_search import TwoPhaseSearchSystem
import json

def test_single_jobseeker(opensearch_client):
    """Test OpenSearch with one jobseeker"""
    print("\nTesting OpenSearch with a single jobseeker...")

    # Create one test jobseeker
    test_jobseeker = IndependentJobSeekerAssessmentRDS(
        primary_skills=["Python", "AWS"],
        secondary_skills=["Docker", "CI/CD"],
        experiences=[{
            "title": "Software Engineer",
            "company": "Tech Co.",
            "duration": "3 years",
            "description": "Developed scalable software systems and optimized cloud infrastructure."
        }],
        educations=[{
            "degree": "Bachelor's",
            "field": "Computer Science",
            "institution": "Tech University"
        }],
        certifications=[{
            "name": "AWS Solutions Architect",
            "organization": "AWS",
            "start_date": "2022-01",
            "end_date": "2025-01"
        }]
    )

    # Transform and index into OpenSearch
    jobseeker_id = "test_seeker_1"
    payload = transform_jobseeker_to_opensearch(test_jobseeker, jobseeker_id)
    response = opensearch_client.index_jobseeker(payload)

    if response:
        print(f"Successfully indexed jobseeker {jobseeker_id}: {response}")
    else:
        print(f"Failed to index jobseeker {jobseeker_id}")

def test_two_phase_search():
    """Test the complete two-phase search pipeline"""
    print("\nTesting two-phase search system...")
    
    # Initialize components
    chroma_matcher = ChromaMatchingSystem()
    opensearch_client = OpenSearchClient()
    search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client)

    # Check if OpenSearch database has any data
    print("\nChecking if OpenSearch database has any data...")
    test_payload = {
        "boolean_search_query": "Software AND (Python OR AWS)",  # Simple boolean query
        "skills": [],                # No skill requirements
        "size": 100,                 # Retrieve up to 100 entries
        "sort_by": ["score"]
    }
    opensearch_results = opensearch_client.search_jobseekers(test_payload)
    if opensearch_results:
        print(f"OpenSearch returned {len(opensearch_results)} entries.")
    else:
        print("OpenSearch database is empty or the query returned no results.")

    # Get test data
    (job_posting, matching_seeker, matching_info,
     partial_matching_seeker, partial_matching_info,
     non_matching_seeker, non_matching_info) = create_test_data()
    
    print("\nAdding test seekers to ChromaDB...")
    # Add test seekers to ChromaDB
    test_seekers = [
        ("seeker_1", matching_seeker, matching_info, 
         {"experience_level": "senior"}),
        ("seeker_2", partial_matching_seeker, partial_matching_info,
         {"experience_level": "mid"}),
        ("seeker_3", non_matching_seeker, non_matching_info,
         {"experience_level": "mid"})
    ]
    
    for seeker_id, processed, unprocessed, metadata in test_seekers:
        print(f"Adding job seeker: {seeker_id}")
        chroma_matcher.add_job_seeker(seeker_id, processed, unprocessed, metadata)
    
    # Build search parameters using updated method
    search_params = search_system.build_search_params(
        job_posting=job_posting,
        location={
            "state": "CA",
            "country": "US"
        },
        certifications={
            "name": "AWS Solutions Architect",
            "organization": "AWS"
        },
        tags=[
            {
                "tag_key": "seniority",
                "tag_value": "senior"
            }
        ]
    )
    
    print("\nConstructed Search Parameters:")
    print("=" * 60)
    print(json.dumps(search_params, indent=2))
    
    # Perform search
    print("\nPerforming search...")
    results = search_system.search_candidates(
        job_posting=job_posting,
        search_params=search_params,
        n_results=10
    )
    
    # Print results
    print("\nSearch Results:")
    print("=" * 60)
    
    if not results:
        print("No matches found")
        print("\nDebug: Checking OpenSearch response...")
        opensearch_results = opensearch_client.search_jobseekers(search_params)
        print(f"OpenSearch returned {len(opensearch_results)} results")
        return
        
    for i, result in enumerate(results, 1):
        print(f"\nMatch {i}:")
        print(f"Jobseeker ID: {result['jobseeker_id']}")
        print(f"ChromaDB Score: {result['similarity_score']:.3f}")
        print(f"OpenSearch Score: {result['opensearch_score']:.3f}")
        print("\nField Scores:")
        for field, score in result['field_scores'].items():
            print(f"  {field}: {score:.3f}")
        print(f"\nExplanation: {result['explanation']}")

if __name__ == "__main__":
    test_two_phase_search()