sebastianalgharaballi
commited on
new scoring fin
Browse files- embeddings.py +23 -13
embeddings.py
CHANGED
@@ -611,9 +611,9 @@ class EmbeddingManager:
|
|
611 |
return RiskLevel.LOW_RISK # Different country is low risk per requirements
|
612 |
return RiskLevel.NO_RISK
|
613 |
def calculate_similarity(self, job_embeddings: Dict[str, np.ndarray],
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
"""Calculate similarity with comprehensive scoring system"""
|
618 |
# Calculate embedding-based similarity scores
|
619 |
field_scores = {}
|
@@ -640,6 +640,7 @@ class EmbeddingManager:
|
|
640 |
|
641 |
field_pair = f"{job_field}{seeker_field}"
|
642 |
field_scores[field_pair] = field_score
|
|
|
643 |
# Calculate all risk scores
|
644 |
risk_scores = {
|
645 |
'job_stability': self.calculate_job_stability_risk(processed_seeker.experiences),
|
@@ -664,6 +665,7 @@ class EmbeddingManager:
|
|
664 |
'soft_skills': self.evaluate_soft_skills(processed_seeker.experiences),
|
665 |
'certifications': self.evaluate_certifications(processed_seeker.certifications, job_posting)
|
666 |
}
|
|
|
667 |
# Calculate final score using the formula:
|
668 |
# Total Score = ∑(Risk Weight × Risk Level) + ∑(Bonus Weight × Bonus Level)
|
669 |
total_score = 0.0
|
@@ -672,26 +674,34 @@ class EmbeddingManager:
|
|
672 |
for category, score in risk_scores.items():
|
673 |
weight = FIELD_WEIGHTS.get(category, 0)
|
674 |
total_score += weight * score
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
)
|
|
|
|
|
|
|
|
|
680 |
|
681 |
# Add weighted bonus scores
|
682 |
for category, score in bonus_scores.items():
|
683 |
weight = FIELD_WEIGHTS.get(category, 0)
|
684 |
total_score += weight * score
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
)
|
|
|
|
|
|
|
690 |
# Normalize to 0-100 scale
|
691 |
min_possible = sum(w * RiskLevel.HIGH_RISK for w in FIELD_WEIGHTS.values())
|
692 |
max_possible = sum(w * max(RiskLevel.NO_RISK, BonusLevel.BEST) for w in FIELD_WEIGHTS.values())
|
693 |
normalized_score = ((total_score - min_possible) / (max_possible - min_possible)) * 100
|
694 |
final_score = max(0, min(100, normalized_score))
|
|
|
695 |
return MatchResult(
|
696 |
similarity_score=final_score,
|
697 |
field_scores={**field_scores, **risk_scores, **bonus_scores},
|
|
|
611 |
return RiskLevel.LOW_RISK # Different country is low risk per requirements
|
612 |
return RiskLevel.NO_RISK
|
613 |
def calculate_similarity(self, job_embeddings: Dict[str, np.ndarray],
|
614 |
+
seeker_embeddings: Dict[str, np.ndarray],
|
615 |
+
job_posting: JobPosting,
|
616 |
+
processed_seeker: IndependentJobSeekerAssessmentRDS) -> MatchResult:
|
617 |
"""Calculate similarity with comprehensive scoring system"""
|
618 |
# Calculate embedding-based similarity scores
|
619 |
field_scores = {}
|
|
|
640 |
|
641 |
field_pair = f"{job_field}{seeker_field}"
|
642 |
field_scores[field_pair] = field_score
|
643 |
+
|
644 |
# Calculate all risk scores
|
645 |
risk_scores = {
|
646 |
'job_stability': self.calculate_job_stability_risk(processed_seeker.experiences),
|
|
|
665 |
'soft_skills': self.evaluate_soft_skills(processed_seeker.experiences),
|
666 |
'certifications': self.evaluate_certifications(processed_seeker.certifications, job_posting)
|
667 |
}
|
668 |
+
|
669 |
# Calculate final score using the formula:
|
670 |
# Total Score = ∑(Risk Weight × Risk Level) + ∑(Bonus Weight × Bonus Level)
|
671 |
total_score = 0.0
|
|
|
674 |
for category, score in risk_scores.items():
|
675 |
weight = FIELD_WEIGHTS.get(category, 0)
|
676 |
total_score += weight * score
|
677 |
+
category_name = category.replace('_', '*').upper()
|
678 |
+
if score == RiskLevel.NO_RISK:
|
679 |
+
explanation_parts.append(f"{category_name}: No Risk")
|
680 |
+
elif score == RiskLevel.LOW_RISK:
|
681 |
+
explanation_parts.append(f"{category_name}: Low Risk")
|
682 |
+
elif score == RiskLevel.MEDIUM_RISK:
|
683 |
+
explanation_parts.append(f"{category_name}: Medium Risk")
|
684 |
+
elif score == RiskLevel.HIGH_RISK:
|
685 |
+
explanation_parts.append(f"{category_name}: High Risk")
|
686 |
|
687 |
# Add weighted bonus scores
|
688 |
for category, score in bonus_scores.items():
|
689 |
weight = FIELD_WEIGHTS.get(category, 0)
|
690 |
total_score += weight * score
|
691 |
+
category_name = category.replace('_', '*').upper()
|
692 |
+
if score == BonusLevel.BEST:
|
693 |
+
explanation_parts.append(f"{category_name}: Best Bonus")
|
694 |
+
elif score == BonusLevel.BETTER:
|
695 |
+
explanation_parts.append(f"{category_name}: Better Bonus")
|
696 |
+
elif score == BonusLevel.GOOD:
|
697 |
+
explanation_parts.append(f"{category_name}: Good Bonus")
|
698 |
+
|
699 |
# Normalize to 0-100 scale
|
700 |
min_possible = sum(w * RiskLevel.HIGH_RISK for w in FIELD_WEIGHTS.values())
|
701 |
max_possible = sum(w * max(RiskLevel.NO_RISK, BonusLevel.BEST) for w in FIELD_WEIGHTS.values())
|
702 |
normalized_score = ((total_score - min_possible) / (max_possible - min_possible)) * 100
|
703 |
final_score = max(0, min(100, normalized_score))
|
704 |
+
|
705 |
return MatchResult(
|
706 |
similarity_score=final_score,
|
707 |
field_scores={**field_scores, **risk_scores, **bonus_scores},
|