stealth-talent
/

embeddings-sebastian

Model card Files Files and versions Community

sebastianalgharaballi commited on Dec 13, 2024

Commit

66bc137

verified ·

1 Parent(s): 0516311

new scoring

Browse files

Files changed (1) hide show

embeddings.py +543 -82

embeddings.py CHANGED Viewed

@@ -58,6 +58,84 @@ class MatchResult:
     explanation: str
     status: str = "unseen"
 class EmbeddingManager:
     def __init__(self, job_encoder, seeker_encoder):
         self.job_encoder = job_encoder
@@ -65,10 +143,7 @@ class EmbeddingManager:
     def get_job_fields(self, job_posting: JobPosting) -> Dict[str, str]:
         """Extract relevant fields from job posting"""
-        # Convert primary skills list to string
         primary_skills_str = ', '.join([skill.skill_name for skill in job_posting.primary_skills]) if job_posting.primary_skills else ''
-        # Convert secondary skills list to string
         secondary_skills_str = ', '.join([skill.skill_name for skill in job_posting.secondary_skills]) if job_posting.secondary_skills else ''
         return {
@@ -78,8 +153,7 @@ class EmbeddingManager:
             'primary_skills': primary_skills_str,
             'secondary_skills': secondary_skills_str
         }
-    def get_seeker_fields(self, processed_seeker: IndependentJobSeekerAssessmentRDS,
                          unprocessed_seeker: JobseekerInfoRDS) -> Dict[str, str]:
         """Extract relevant fields from job seeker"""
         return {
@@ -90,8 +164,9 @@ class EmbeddingManager:
             'certifications': self._format_certifications(processed_seeker.certifications),
             'summary': unprocessed_seeker.summary
         }
     def _format_experience(self, experiences: List[dict]) -> str:
         exp_parts = []
         for exp in experiences:
             summaries = exp.get('experience_summaries', [])
@@ -99,8 +174,6 @@ class EmbeddingManager:
             exp_parts.append(exp_str)
         return ' | '.join(exp_parts)
     def _format_education(self, educations: List[dict]) -> str:
         """Format education entries into a single string"""
         edu_parts = []
@@ -111,17 +184,16 @@ class EmbeddingManager:
             edu_str = f"{degree} in {field} from {institution}"
             edu_parts.append(edu_str)
         return ' | '.join(edu_parts)
     def _format_certifications(self, certifications: List[dict]) -> str:
         """Format certification entries into a single string"""
         cert_parts = []
         for cert in certifications:
-            name = cert.get('name', '')  # This is required as per schema
             org = cert.get('organization', '')
             start = cert.get('start_date', '')
             end = cert.get('end_date', '')
-            # Build certification string
             cert_str = name
             if org:
                 cert_str += f" from {org}"
@@ -136,105 +208,494 @@ class EmbeddingManager:
             cert_parts.append(cert_str)
         return ' | '.join(cert_parts)
-    def embed_jobposting(self, job_posting: JobPosting) -> Dict[str, np.ndarray]:
-        """Generate embeddings for job posting fields"""
-        fields = self.get_job_fields(job_posting)
-        return self.job_encoder.encode_fields(fields)
-    def embed_jobseeker(self, processed_seeker: IndependentJobSeekerAssessmentRDS,
-                       unprocessed_seeker: JobseekerInfoRDS) -> Dict[str, np.ndarray]:
-        """Generate embeddings for job seeker fields"""
-        fields = self.get_seeker_fields(processed_seeker, unprocessed_seeker)
-        print("DEBUG - Seeker fields:", fields)
-        return self.seeker_encoder.encode_fields(fields)
-# list of job seeker ids with their scores (from metadata)
     def calculate_similarity(self, job_embeddings: Dict[str, np.ndarray],
-                           seeker_embeddings: Dict[str, np.ndarray]) -> MatchResult:
-        """Calculate similarity with strict thresholds"""
         field_scores = {}
         explanation_parts = []
-        # Calculate similarity for each field pair
         for job_field, seeker_fields in FIELD_MAPPING.items():
             if job_field not in job_embeddings:
                 continue
             job_emb = job_embeddings[job_field]
-            # Handle multiple seeker fields for one job field
             for seeker_field in seeker_fields:
                 if seeker_field not in seeker_embeddings:
                     continue
                 seeker_emb = seeker_embeddings[seeker_field]
-                # Calculate raw cosine similarity
                 similarity = np.dot(job_emb, seeker_emb) / (
                     np.linalg.norm(job_emb) * np.linalg.norm(seeker_emb) + 1e-9
                 )
-                # Scale to [0, 1] much more aggressively
-                raw_score = (similarity * 0.8)  # Compress range
-                field_score = max(0, min(1, (raw_score + 1) / 2))  # Rescale to [0,1]
-                # Apply non-linear transformation for more discrimination
-                if field_score > 0.9:  # Only the very best get boosted
-                    field_score = min(field_score * 1.1, 1.0)
-                elif field_score < 0.7:  # More aggressive penalty for lower scores
-                    field_score = field_score * 0.6
-                field_pair_name = f"{job_field}_{seeker_field}"
-                field_scores[field_pair_name] = field_score
-                # Much stricter thresholds for quality descriptions
-                match_quality = "strong" if field_score > 0.9 else \
-                            "good" if field_score > 0.8 else \
-                            "moderate" if field_score > 0.6 else "weak"
-                explanation_parts.append(
-                    f"{match_quality.capitalize()} match on {job_field} to {seeker_field} "
-                    f"(similarity: {field_score:.2f})"
-                )
-        # Calculate weighted average with critical field emphasis
-        final_score = 0.0
-        total_weight = 0.0
-        # Critical fields get extra weight (subject to change)
-        critical_fields = {
-            'primary_skills_primary_skills': 1,
-            'role_description_experience': 1,
-            'role_description_certifications': 1,  # Added certifications with same weight
         }
-        for field_pair, score in field_scores.items():
-            base_weight = FIELD_WEIGHTS.get(field_pair, 0.0)
-            # Apply critical field multiplier
-            weight = base_weight * critical_fields.get(field_pair, 1.0)
-            final_score += score * weight
-            total_weight += weight
-        if total_weight > 0:
-            final_score = final_score / total_weight
-            # Final adjustments for extreme discrimination
-            if final_score > 0.9:  # Only truly exceptional matches
-                final_score = min(final_score * 1.1, 1.0)
-            elif final_score < 0.7:  # Really penalize poor matches
-                final_score = final_score * 0.6
-        explanation = " | ".join(explanation_parts)
-        print("DEBUG - All field scores:", field_scores)
         return MatchResult(
             similarity_score=final_score,
-            field_scores=field_scores,
-            explanation=explanation
         )
 def initialize_embedding_system(job_encoder, seeker_encoder):

     explanation: str
     status: str = "unseen"
+from typing import Dict, List, Optional, Tuple, Set
+import numpy as np
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from collections import defaultdict
+import re
+class RiskLevel:
+    NO_RISK = 5
+    LOW_RISK = -5
+    MEDIUM_RISK = -10
+    HIGH_RISK = -15
+class BonusLevel:
+    NO_BONUS = 0
+    GOOD = 1
+    BETTER = 2
+    BEST = 3
+# Your existing field mappings
+FIELD_MAPPING = {
+    'title': ['summary'],
+    'primary_skills': ['primary_skills'],
+    'secondary_skills': ['secondary_skills'],
+    'role_description': ['experience', 'certifications']
+}
+# Updated field weights incorporating all criteria
+FIELD_WEIGHTS = {
+    'job_stability': 12,
+    'job_duration': 12,
+    'responsibility_tenure': 1.5,
+    'employment_pedigree': 7.5,
+    'primary_skills_experience': 5,
+    'career_experience': 2.5,
+    'role_impact': 5,
+    'management_scope': 1.5,
+    'primary_skills_occurrence': 12,
+    'primary_skills_frequency': 2.5,
+    'primary_skills_recency': 15,
+    'soft_skills': 1,
+    'employment_recency': 7.5,
+    'location_match': 1,
+    'certifications': 2,
+    'job_title_experience': 5,
+    'job_title_match': 7,
+    'primary_skills_primary_skills': 0.5,
+    'secondary_skills_secondary_skills': 0.1,
+    'role_description_experience': 0.25,
+    'role_description_certifications': 0.05,
+    'title_summary': 0.1
+}
+SOFT_SKILLS_KEYWORDS = {
+    'communication': ['effectively communicated', 'presented to stakeholders', 'negotiated', 'collaborated with', 'mediated'],
+    'teamwork': ['worked in a team', 'collaborated with', 'partnered with', 'contributed to a team effort'],
+    'leadership': ['led a team', 'mentored', 'coached', 'managed', 'guided'],
+    'problem_solving': ['resolved', 'addressed challenges', 'innovated', 'strategized', 'implemented solutions'],
+    'adaptability': ['adapted to', 'quickly learned', 'flexible in', 'handled change'],
+    'emotional_intelligence': ['empathized with', 'understood needs', 'fostered relationships', 'built trust', 'managed conflict']
+}
+LEADERSHIP_KEYWORDS = [
+    'led', 'managed', 'directed', 'architected', 'innovated',
+    'spearheaded', 'strategized', 'developed', 'executed',
+    'owned', 'delivered', 'implemented'
+]
+IMPACT_PATTERNS = {
+    'revenue_growth': r'increased revenue by (\d+)%',
+    'cost_savings': r'saved \$(\d+) million',
+    'project_launch': r'launched .+ generated \$(\d+) million',
+    'project_completion': r'completed .+ ahead of schedule',
+    'budget_management': r'managed \$(\d+) million budget',
+    'risk_mitigation': r'decreased .+ by (\d+)%',
+    'client_retention': r'improved retention by (\d+)%',
+    'satisfaction': r'satisfaction .+ (\d+)% to (\d+)%',
+    'team_growth': r'grew team by (\d+)%'
+}
+EXPERIENCE_LEVELS = {
+    'junior': ['Junior', 'Associate'],
+    'mid': ['Staff', 'Senior', 'Sr.'],
+    'senior': ['Principal', 'Lead', 'Supervisor', 'Manager'],
+    'executive': ['Director', 'VP', 'CXO', 'President', 'Owner', 'Founder', 'Partner']
+}
 class EmbeddingManager:
     def __init__(self, job_encoder, seeker_encoder):
         self.job_encoder = job_encoder
     def get_job_fields(self, job_posting: JobPosting) -> Dict[str, str]:
         """Extract relevant fields from job posting"""
         primary_skills_str = ', '.join([skill.skill_name for skill in job_posting.primary_skills]) if job_posting.primary_skills else ''
         secondary_skills_str = ', '.join([skill.skill_name for skill in job_posting.secondary_skills]) if job_posting.secondary_skills else ''
         return {
             'primary_skills': primary_skills_str,
             'secondary_skills': secondary_skills_str
         }
+    def get_seeker_fields(self, processed_seeker: IndependentJobSeekerAssessmentRDS,
                          unprocessed_seeker: JobseekerInfoRDS) -> Dict[str, str]:
         """Extract relevant fields from job seeker"""
         return {
             'certifications': self._format_certifications(processed_seeker.certifications),
             'summary': unprocessed_seeker.summary
         }
     def _format_experience(self, experiences: List[dict]) -> str:
+        """Format experience entries into a single string"""
         exp_parts = []
         for exp in experiences:
             summaries = exp.get('experience_summaries', [])
             exp_parts.append(exp_str)
         return ' | '.join(exp_parts)
     def _format_education(self, educations: List[dict]) -> str:
         """Format education entries into a single string"""
         edu_parts = []
             edu_str = f"{degree} in {field} from {institution}"
             edu_parts.append(edu_str)
         return ' | '.join(edu_parts)
     def _format_certifications(self, certifications: List[dict]) -> str:
         """Format certification entries into a single string"""
         cert_parts = []
         for cert in certifications:
+            name = cert.get('name', '')
             org = cert.get('organization', '')
             start = cert.get('start_date', '')
             end = cert.get('end_date', '')
             cert_str = name
             if org:
                 cert_str += f" from {org}"
             cert_parts.append(cert_str)
         return ' | '.join(cert_parts)
+    def calculate_job_stability_risk(self, experiences: List[dict]) -> float:
+        """Calculate risk based on employment gaps within last 2 years"""
+        if not experiences:
+            return RiskLevel.HIGH_RISK
+        sorted_experiences = sorted(
+            experiences,
+            key=lambda x: datetime.fromisoformat(x['start_date'])
+        )
+        two_years_ago = datetime.now() - timedelta(days=730)
+        gaps = []
+        for i in range(1, len(sorted_experiences)):
+            current_start = datetime.fromisoformat(sorted_experiences[i]['start_date'])
+            if current_start < two_years_ago:
+                continue
+            prev_end = sorted_experiences[i-1].get('end_date')
+            prev_end = datetime.fromisoformat(prev_end) if prev_end else datetime.now()
+            if current_start > prev_end:
+                gap_months = (current_start - prev_end).days / 30
+                gaps.append(int(gap_months))
+        if not gaps:
+            return RiskLevel.NO_RISK
+        max_gap = max(gaps)
+        if max_gap > 9:
+            return RiskLevel.HIGH_RISK
+        elif max_gap > 3:
+            return RiskLevel.MEDIUM_RISK
+        return RiskLevel.LOW_RISK
+    def calculate_job_duration_risk(self, experiences: List[dict]) -> float:
+        """Calculate risk based on number of jobs in recent periods"""
+        now = datetime.now()
+        one_year_ago = now - timedelta(days=365)
+        two_years_ago = now - timedelta(days=730)
+        jobs_last_year = sum(1 for exp in experiences
+                            if datetime.fromisoformat(exp['start_date']) >= one_year_ago)
+        jobs_last_two_years = sum(1 for exp in experiences
+                                 if datetime.fromisoformat(exp['start_date']) >= two_years_ago)
+        # One year criteria
+        if jobs_last_year >= 4:
+            return RiskLevel.HIGH_RISK
+        elif jobs_last_year == 3:
+            return RiskLevel.MEDIUM_RISK
+        elif jobs_last_year == 2:
+            return RiskLevel.LOW_RISK
+        elif jobs_last_year == 1:
+            return RiskLevel.NO_RISK
+        # Two year criteria
+        if jobs_last_two_years >= 5:
+            return RiskLevel.HIGH_RISK
+        elif jobs_last_two_years in [3, 4]:
+            return RiskLevel.MEDIUM_RISK
+        elif jobs_last_two_years <= 2:
+            return RiskLevel.LOW_RISK
+        return RiskLevel.NO_RISK
+    def calculate_employment_recency_risk(self, experiences: List[dict]) -> float:
+        """Calculate risk based on most recent employment end date"""
+        if not experiences:
+            return RiskLevel.HIGH_RISK
+        latest_end_date = max(
+            datetime.fromisoformat(exp['end_date']) if exp.get('end_date')
+            else datetime.now()
+            for exp in experiences
+        )
+        months_since_end = (datetime.now() - latest_end_date).days / 30
+        if months_since_end > 9:
+            return RiskLevel.HIGH_RISK
+        elif months_since_end > 6:
+            return RiskLevel.MEDIUM_RISK
+        elif months_since_end > 3:
+            return RiskLevel.LOW_RISK
+        return RiskLevel.NO_RISK
+    def calculate_responsibility_tenure(self, experiences: List[dict]) -> float:
+        """Calculate bonus based on leadership tenure"""
+        leadership_months = 0
+        for exp in experiences:
+            description = ' '.join(exp.get('experience_summaries', []))
+            if any(keyword in description.lower() for keyword in LEADERSHIP_KEYWORDS):
+                start = datetime.fromisoformat(exp['start_date'])
+                end = datetime.fromisoformat(exp['end_date']) if exp.get('end_date') else datetime.now()
+                leadership_months += (end - start).days / 30
+        if leadership_months >= 24:
+            return BonusLevel.BEST
+        elif leadership_months >= 12:
+            return BonusLevel.BETTER
+        elif leadership_months > 0:
+            return BonusLevel.GOOD
+        return BonusLevel.NO_BONUS
+    def evaluate_employment_pedigree(self, experiences: List[dict],
+                                   job_posting: JobPosting) -> float:
+        """Evaluate employment pedigree based on company comparisons"""
+        score = 0
+        recent_experiences = experiences[:3]  # Focus on most recent experiences
+        for exp in recent_experiences:
+            # Check industry match
+            if exp.get('industry') == job_posting.industry:
+                score += 1
+            # Check company size match
+            if abs(exp.get('company_size', 0) - job_posting.company_size) < 0.2:  # Within 20%
+                score += 1
+            # Check revenue match
+            if abs(exp.get('company_revenue', 0) - job_posting.company_revenue) < 0.2:
+                score += 1
+            # Check growth rate match
+            if abs(exp.get('growth_rate', 0) - job_posting.growth_rate) < 0.1:
+                score += 1
+        # Scale score to bonus levels
+        if score >= 10:  # High match across multiple factors
+            return BonusLevel.BEST
+        elif score >= 6:
+            return BonusLevel.BETTER
+        elif score >= 3:
+            return BonusLevel.GOOD
+        return BonusLevel.NO_BONUS
+    def evaluate_job_title_experience(self, experiences: List[dict], required_years: int) -> float:
+        """Evaluate job title experience against required years"""
+        actual_years = sum((datetime.fromisoformat(exp.get('end_date', datetime.now().isoformat())) -
+                        datetime.fromisoformat(exp['start_date'])).days / 365
+                        for exp in experiences)
+        percentage = (actual_years / required_years) * 100 if required_years > 0 else 0
+        if percentage >= 100:
+            return RiskLevel.NO_RISK
+        elif percentage >= 71:
+            return RiskLevel.LOW_RISK
+        elif percentage >= 61:
+            return RiskLevel.MEDIUM_RISK
+        return RiskLevel.HIGH_RISK
+    def evaluate_primary_skills_quality(self, experience_description: str) -> float:
+        """Evaluate the quality of primary skills usage description"""
+        # Count technical terms and action verbs
+        tech_terms = len(re.findall(r'\b(?:API|REST|SDK|Framework|Platform|System|Database)\b',
+                                  experience_description, re.IGNORECASE))
+        action_verbs = len(re.findall(r'\b(?:Designed|Implemented|Developed|Engineered|Architected)\b',
+                                    experience_description, re.IGNORECASE))
+        context_richness = len(re.findall(r'using|with|through|by|via',
+                                        experience_description, re.IGNORECASE))
+        total_score = tech_terms + action_verbs + context_richness
+        if total_score >= 7:  # Complex technical implementation with context
+            return BonusLevel.BEST
+        elif total_score >= 5:  # Good technical description
+            return BonusLevel.BETTER
+        elif total_score >= 3:  # Basic technical mention
+            return BonusLevel.GOOD
+        return BonusLevel.NO_BONUS
+    def evaluate_role_impact(self, experiences: List[dict]) -> float:
+        """Evaluate quantifiable impact mentions in role descriptions"""
+        impact_score = 0
+        for exp in experiences:
+            description = ' '.join(exp.get('experience_summaries', []))
+            # Performance improvements
+            if perf_match := re.search(r'improv(?:ed|ing)\s+\w+\s+by\s+(\d+)%', description, re.IGNORECASE):
+                value = float(perf_match.group(1))
+                impact_score += min(3, value // 20)  # 20% = 1 point, 40% = 2 points, 60%+ = 3 points
+            # Cost savings
+            if cost_match := re.search(r'sav(?:ed|ing)\s+\$(\d+)K', description, re.IGNORECASE):
+                value = float(cost_match.group(1))
+                impact_score += min(3, value // 100)  # $100K = 1 point, $200K = 2 points, $300K+ = 3 points
+            # Time/efficiency improvements
+            if time_match := re.search(r'reduc(?:ed|ing)\s+\w+\s+(?:time|duration)\s+by\s+(\d+)%', description, re.IGNORECASE):
+                value = float(time_match.group(1))
+                impact_score += min(3, value // 25)  # 25% = 1 point, 50% = 2 points, 75%+ = 3 points
+        if impact_score >= 6:
+            return BonusLevel.BEST
+        elif impact_score >= 4:
+            return BonusLevel.BETTER
+        elif impact_score >= 2:
+            return BonusLevel.GOOD
+        return BonusLevel.NO_BONUS
+    def evaluate_management_scope(self, experiences: List[dict]) -> float:
+        """Evaluate scope of management responsibilities"""
+        scope_score = 0
+        for exp in experiences:
+            description = ' '.join(exp.get('experience_summaries', []))
+            # Team size
+            if team_match := re.search(r'(?:manag|lead)(?:ed|ing)?\s+(?:a\s+)?team\s+of\s+(\d+)', description, re.IGNORECASE):
+                team_size = int(team_match.group(1))
+                scope_score += min(3, team_size // 3)  # 3 people = 1 point, 6 people = 2 points, 9+ = 3 points
+            # Mentoring/training
+            if mentor_match := re.search(r'mentor(?:ed|ing)?\s+(\d+)', description, re.IGNORECASE):
+                mentees = int(mentor_match.group(1))
+                scope_score += min(2, mentees // 2)  # 2 mentees = 1 point, 4+ mentees = 2 points
+            # Project leadership
+            if re.search(r'led\s+(?:development|migration|implementation)', description, re.IGNORECASE):
+                scope_score += 1
+        if scope_score >= 5:
+            return BonusLevel.BEST
+        elif scope_score >= 3:
+            return BonusLevel.BETTER
+        elif scope_score >= 1:
+            return BonusLevel.GOOD
+        return BonusLevel.NO_BONUS
+    def evaluate_soft_skills(self, experiences: List[dict]) -> float:
+        """Evaluate presence of soft skills keywords"""
+        skill_matches = defaultdict(int)
+        for exp in experiences:
+            description = ' '.join(exp.get('experience_summaries', []))
+            for category, keywords in SOFT_SKILLS_KEYWORDS.items():
+                for keyword in keywords:
+                    if keyword in description.lower():
+                        skill_matches[category] += 1
+        # Count categories with significant matches
+        categories_present = sum(1 for matches in skill_matches.values() if matches > 0)
+        if categories_present >= 5:  # Strong soft skills across most categories
+            return BonusLevel.BEST
+        elif categories_present >= 3:
+            return BonusLevel.BETTER
+        elif categories_present >= 1:
+            return BonusLevel.GOOD
+        return BonusLevel.NO_BONUS
+    def analyze_primary_skills(self, job_posting: JobPosting,
+                         seeker: IndependentJobSeekerAssessmentRDS) -> Dict[str, float]:
+        """Analyze occurrence, frequency, and recency of primary skills"""
+        required_skills = {skill.skill_name.lower() for skill in job_posting.primary_skills}
+        skill_counts = defaultdict(int)
+        recent_skills = set()
+        # Define skill variations
+        skill_variations = {
+            'python': ['python', 'py', 'python3'],
+            'aws': ['aws', 'amazon web services', 'amazon aws', 'cloud'],
+            'microservices': ['microservices', 'micro-services', 'microservice', 'micro service'],
+            'rest apis': ['rest', 'restful', 'rest api', 'rest apis', 'restful api', 'restful apis'],
+            'api': ['api', 'apis', 'restful api', 'web api'],
+            'docker': ['docker', 'containerization', 'containers'],
+            'kubernetes': ['kubernetes', 'k8s', 'kubectl'],
+        }
+        for i, exp in enumerate(seeker.experiences):
+            exp_text = ' '.join(exp.get('experience_summaries', []))
+            exp_text = exp_text.lower()
+            exp_skills = set()
+            for skill in required_skills:
+                # Get variations for this skill
+                variations = skill_variations.get(skill.lower(), [skill.lower()])
+                # Count all variations
+                skill_count = sum(
+                    len(re.findall(rf'\b{re.escape(var)}\b', exp_text))
+                    for var in variations
+                )
+                skill_counts[skill] += skill_count
+                if skill_count > 0:
+                    exp_skills.add(skill)
+                    if i == 0:  # Most recent experience
+                        recent_skills.add(skill)
+        # Calculate occurrence risk
+        total_unique = len(set(skill for skill, count in skill_counts.items() if count > 0))
+        occurrence_percentage = (total_unique / len(required_skills)) * 100
+        if occurrence_percentage == 100:
+            occurrence_risk = RiskLevel.NO_RISK
+        elif occurrence_percentage >= 75:
+            occurrence_risk = RiskLevel.LOW_RISK
+        elif occurrence_percentage >= 50:
+            occurrence_risk = RiskLevel.MEDIUM_RISK
+        else:
+            occurrence_risk = RiskLevel.HIGH_RISK
+        # Calculate frequency risk with more lenient thresholds
+        total_mentions = sum(skill_counts.values())
+        if total_mentions >= 15:  # Changed from 20
+            frequency_risk = RiskLevel.LOW_RISK
+        elif total_mentions >= 8:  # Changed from 10
+            frequency_risk = RiskLevel.MEDIUM_RISK
+        else:
+            frequency_risk = RiskLevel.HIGH_RISK
+        # Calculate recency score
+        recent_percentage = (len(recent_skills) / len(required_skills)) * 100
+        if recent_percentage == 100:
+            recency_risk = RiskLevel.NO_RISK
+        elif recent_percentage >= 65:
+            recency_risk = RiskLevel.LOW_RISK
+        elif recent_percentage >= 50:
+            recency_risk = RiskLevel.MEDIUM_RISK
+        else:
+            recency_risk = RiskLevel.HIGH_RISK
+        return {
+            'primary_skills_occurrence': occurrence_risk,
+            'primary_skills_frequency': frequency_risk,
+            'primary_skills_recency': recency_risk
+        }
+    def calculate_job_title_match(self, job_posting: JobPosting,
+                                experiences: List[dict]) -> Tuple[float, float]:
+        """Calculate job title match percentage and risk level"""
+        target_title = job_posting.title.lower()
+        total_months = 0
+        matched_months = 0
+        for exp in experiences:
+            start_date = datetime.fromisoformat(exp['start_date'])
+            end_date = datetime.fromisoformat(exp['end_date']) if exp.get('end_date') else datetime.now()
+            months = (end_date - start_date).days / 30
+            total_months += months
+            # Check for exact or related title match
+            title = exp.get('title', '').lower()
+            if target_title in title or title in target_title:
+                matched_months += months
+        if total_months == 0:
+            return 0, RiskLevel.HIGH_RISK
+        match_percentage = (matched_months / total_months) * 100
+        # Determine risk level based on percentage
+        if match_percentage >= 80:
+            risk_level = RiskLevel.NO_RISK
+        elif match_percentage >= 70:
+            risk_level = RiskLevel.LOW_RISK
+        elif match_percentage >= 60:
+            risk_level = RiskLevel.MEDIUM_RISK
+        else:
+            risk_level = RiskLevel.HIGH_RISK
+        return match_percentage, risk_level
+    def evaluate_career_experience(self, experiences: List[dict],
+                                 required_years: int) -> float:
+        """Evaluate career experience level"""
+        if not experiences:
+            return RiskLevel.HIGH_RISK
+        first_job_date = min(datetime.fromisoformat(exp['start_date']) for exp in experiences)
+        years_experience = (datetime.now() - first_job_date).days / 365
+        if years_experience >= 7:
+            return RiskLevel.NO_RISK
+        elif years_experience >= 3:
+            return RiskLevel.LOW_RISK
+        return RiskLevel.HIGH_RISK
+    def evaluate_certifications(self, certifications: List[dict],
+                              job_posting: JobPosting) -> float:
+        """Evaluate relevance of certifications to job title"""
+        if not certifications:
+            return BonusLevel.NO_BONUS
+        relevant_certs = 0
+        job_keywords = set(job_posting.title.lower().split())
+        for cert in certifications:
+            cert_name = cert.get('name', '').lower()
+            if any(keyword in cert_name for keyword in job_keywords):
+                relevant_certs += 1
+        if relevant_certs >= 3:
+            return BonusLevel.BEST
+        elif relevant_certs == 2:
+            return BonusLevel.BETTER
+        elif relevant_certs == 1:
+            return BonusLevel.GOOD
+        return BonusLevel.NO_BONUS
+    def check_location_match(self, seeker_location: str, job_location: str) -> float:
+        """Check if experience locations match job location"""
+        if not seeker_location or not job_location:
+            return RiskLevel.HIGH_RISK
+        if seeker_location.lower() != job_location.lower():
+            return RiskLevel.LOW_RISK  # Different country is low risk per requirements
+        return RiskLevel.NO_RISK
     def calculate_similarity(self, job_embeddings: Dict[str, np.ndarray],
+                           seeker_embeddings: Dict[str, np.ndarray],
+                           job_posting: JobPosting,
+                           processed_seeker: IndependentJobSeekerAssessmentRDS) -> MatchResult:
+        """Calculate similarity with comprehensive scoring system"""
+        # Calculate embedding-based similarity scores
         field_scores = {}
         explanation_parts = []
+        # Your existing embedding similarity calculation
         for job_field, seeker_fields in FIELD_MAPPING.items():
             if job_field not in job_embeddings:
                 continue
             job_emb = job_embeddings[job_field]
             for seeker_field in seeker_fields:
                 if seeker_field not in seeker_embeddings:
                     continue
                 seeker_emb = seeker_embeddings[seeker_field]
                 similarity = np.dot(job_emb, seeker_emb) / (
                     np.linalg.norm(job_emb) * np.linalg.norm(seeker_emb) + 1e-9
                 )
+                raw_score = (similarity * 0.8)
+                field_score = max(0, min(1, (raw_score + 1) / 2))
+                field_pair = f"{job_field}{seeker_field}"
+                field_scores[field_pair] = field_score
+        # Calculate all risk scores
+        risk_scores = {
+            'job_stability': self.calculate_job_stability_risk(processed_seeker.experiences),
+            'job_duration': self.calculate_job_duration_risk(processed_seeker.experiences),
+            'employment_recency': self.calculate_employment_recency_risk(processed_seeker.experiences)
         }
+        # Add primary skills analysis
+        risk_scores.update(self.analyze_primary_skills(job_posting, processed_seeker))
+        # Calculate job title match
+        title_match_percentage, title_risk = self.calculate_job_title_match(
+            job_posting, processed_seeker.experiences)
+        risk_scores['job_title_experience'] = title_risk
+        # Calculate all bonus scores
+        bonus_scores = {
+            'responsibility_tenure': self.calculate_responsibility_tenure(processed_seeker.experiences),
+            'employment_pedigree': self.evaluate_employment_pedigree(processed_seeker.experiences, job_posting),
+            'role_impact': self.evaluate_role_impact(processed_seeker.experiences),
+            'management_scope': self.evaluate_management_scope(processed_seeker.experiences),
+            'soft_skills': self.evaluate_soft_skills(processed_seeker.experiences),
+            'certifications': self.evaluate_certifications(processed_seeker.certifications, job_posting)
+        }
+        # Calculate final score using the formula:
+        # Total Score = ∑(Risk Weight × Risk Level) + ∑(Bonus Weight × Bonus Level)
+        total_score = 0.0
+        # Add weighted risk scores
+        for category, score in risk_scores.items():
+            weight = FIELD_WEIGHTS.get(category, 0)
+            total_score += weight * score
+            if score != RiskLevel.NO_RISK:
+                explanation_parts.append(
+                    f"{category.replace('', ' ').title()}: "
+                    f"{'High' if score == RiskLevel.HIGH_RISK else 'Medium' if score == RiskLevel.MEDIUM_RISK else 'Low'} Risk"
+                )
+        # Add weighted bonus scores
+        for category, score in bonus_scores.items():
+            weight = FIELD_WEIGHTS.get(category, 0)
+            total_score += weight * score
+            if score != BonusLevel.NO_BONUS:
+                explanation_parts.append(
+                    f"{category.replace('', ' ').title()}: "
+                    f"{'Best' if score == BonusLevel.BEST else 'Better' if score == BonusLevel.BETTER else 'Good'} Bonus"
+                )
+        # Normalize to 0-100 scale
+        min_possible = sum(w * RiskLevel.HIGH_RISK for w in FIELD_WEIGHTS.values())
+        max_possible = sum(w * max(RiskLevel.NO_RISK, BonusLevel.BEST) for w in FIELD_WEIGHTS.values())
+        normalized_score = ((total_score - min_possible) / (max_possible - min_possible)) * 100
+        final_score = max(0, min(100, normalized_score))
         return MatchResult(
             similarity_score=final_score,
+            field_scores={**field_scores, **risk_scores, **bonus_scores},
+            explanation=" | ".join(explanation_parts)
         )
 def initialize_embedding_system(job_encoder, seeker_encoder):