embeddings-sebastian / test_unseen_matches.py
sebastianalgharaballi's picture
Upload test_unseen_matches.py
f24b428 verified
raw
history blame
2.8 kB
from test_embeddings import create_test_data
from chroma_storage import ChromaMatchingSystem
import chromadb
from chromadb.config import Settings
def test_unseen_matches():
print("Starting ChromaDB unseen matches test...")
# Clear existing collections with proper settings
client = chromadb.Client(Settings(
allow_reset=True,
is_persistent=True
))
client.reset()
# Rest of the test remains the same...
(job_posting, matching_seeker, matching_info,
partial_matching_seeker, partial_matching_info,
non_matching_seeker, non_matching_info) = create_test_data()
matcher = ChromaMatchingSystem("test_collection")
# Add test seekers
print("\nAdding test job seekers...")
test_seekers = [
("seeker_1", matching_seeker, matching_info,
{"experience_level": "senior"}),
("seeker_2", partial_matching_seeker, partial_matching_info,
{"experience_level": "mid"}),
("seeker_3", non_matching_seeker, non_matching_info,
{"experience_level": "mid"})
]
for seeker_id, processed, unprocessed, metadata in test_seekers:
print(f"Adding job seeker: {seeker_id}")
matcher.add_job_seeker(seeker_id, processed, unprocessed, metadata)
# Test 1: Get initial unseen matches
print("\nTest 1: Getting initial unseen matches...")
unseen_matches = matcher.get_unseen_matches(job_posting, n_results=3)
print(f"\nFound {len(unseen_matches)} unseen matches:")
for i, match in enumerate(unseen_matches, 1):
print(f"\nMatch {i}:")
print(f"Similarity Score: {match.similarity_score:.3f}")
print("\nField Scores:")
for field, score in match.field_scores.items():
print(f" {field}: {score:.3f}")
# Mark top match as seen
print("\nMarking top match as seen...")
matcher.mark_matches_as_seen(["seeker_1"])
# Test 2: Get unseen matches after marking one as seen
print("\nTest 2: Getting unseen matches after marking one as seen...")
remaining_unseen = matcher.get_unseen_matches(job_posting, n_results=3)
print(f"\nFound {len(remaining_unseen)} remaining unseen matches:")
for i, match in enumerate(remaining_unseen, 1):
print(f"\nMatch {i}:")
print(f"Similarity Score: {match.similarity_score:.3f}")
# Test 3: Get all matches (including seen)
print("\nTest 3: Getting all matches (including seen)...")
all_matches = matcher.get_matches(job_posting, n_results=3)
print(f"\nFound {len(all_matches)} total matches:")
for i, match in enumerate(all_matches, 1):
print(f"\nMatch {i}:")
print(f"Similarity Score: {match.similarity_score:.3f}")
if __name__ == "__main__":
test_unseen_matches()