|
import os |
|
import gradio as gr |
|
import pandas as pd |
|
from sentence_transformers import SentenceTransformer, util |
|
from PyPDF2 import PdfReader |
|
import docx |
|
import re |
|
import google.generativeai as genai |
|
import concurrent.futures |
|
from fuzzywuzzy import fuzz |
|
from typing import List, Dict, Tuple, Any |
|
from dataclasses import dataclass |
|
import logging |
|
from pathlib import Path |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
|
) |
|
logger = logging.getLogger(__name__) |
|
|
|
@dataclass |
|
class Config: |
|
MAX_RESUMES: int = 10 |
|
MAX_LEADERSHIP_EXP: int = 10 |
|
MAX_MANAGEMENT_EXP: int = 10 |
|
MODEL_NAME: str = 'paraphrase-MiniLM-L6-v2' |
|
GEMINI_MODEL: str = 'gemini-1.5-flash' |
|
|
|
class ResumeAnalyzer: |
|
def __init__(self): |
|
self.config = Config() |
|
self._initialize_models() |
|
self.required_skills = self._load_required_skills() |
|
self.role_hierarchy = self._load_role_hierarchy() |
|
|
|
def _initialize_models(self) -> None: |
|
"""Initialize the required models and API configurations.""" |
|
try: |
|
self.sentence_model = SentenceTransformer(self.config.MODEL_NAME) |
|
|
|
api_key = os.getenv('GOOGLE_API_KEY') |
|
if not api_key: |
|
raise ValueError("Google API key not found. Please set GOOGLE_API_KEY.") |
|
genai.configure(api_key=api_key) |
|
|
|
except Exception as e: |
|
logger.error(f"Failed to initialize models: {str(e)}") |
|
raise |
|
|
|
@staticmethod |
|
def _load_required_skills() -> List[str]: |
|
"""Load the list of required leadership and management skills.""" |
|
return [ |
|
"strategic planning", "team management", "project management", |
|
"decision making", "communication", "leadership", |
|
"conflict resolution", "delegation", "performance management", |
|
"budget management", "resource allocation", "staff development", |
|
"change management", "risk management", "problem solving", |
|
"negotiation", "executive leadership", "organizational skills", |
|
"business development", "stakeholder management", "collaboration", |
|
"emotional intelligence", "coaching", "mentoring", |
|
"time management", "cross-functional team leadership", "innovation", |
|
"organizational culture", "team motivation", "employee engagement", |
|
"organizational design", "continuous improvement", |
|
"decision-making under pressure", "adaptability", "accountability", |
|
"team building", "succession planning", "strategic partnerships", |
|
"executive presence", "influencing", "visionary leadership" |
|
] |
|
|
|
@staticmethod |
|
def _load_role_hierarchy() -> Dict[str, int]: |
|
"""Load the role hierarchy for scoring.""" |
|
return { |
|
"CEO": 5, "CIO": 5, "CFO": 5, "COO": 5, |
|
"Director": 4, "VP": 4, "Head": 4, |
|
"Manager": 3, "Senior": 3, |
|
"Team Lead": 2, "Lead": 2, |
|
"Junior": 1, "Associate": 1 |
|
} |
|
|
|
def extract_text_from_file(self, file_path: str) -> str: |
|
"""Extract text content from various file formats.""" |
|
try: |
|
file_path = Path(file_path) |
|
if not file_path.exists(): |
|
raise FileNotFoundError(f"File not found: {file_path}") |
|
|
|
ext = file_path.suffix.lower() |
|
if ext == ".txt": |
|
return file_path.read_text(encoding='utf-8') |
|
elif ext == ".pdf": |
|
with open(file_path, 'rb') as file: |
|
reader = PdfReader(file) |
|
return " ".join(page.extract_text() for page in reader.pages) |
|
elif ext == ".docx": |
|
doc = docx.Document(file_path) |
|
return " ".join(para.text for para in doc.paragraphs) |
|
else: |
|
raise ValueError(f"Unsupported file format: {ext}") |
|
except Exception as e: |
|
logger.error(f"Error extracting text from {file_path}: {str(e)}") |
|
return "" |
|
|
|
def analyze_with_gemini(self, resume_text: str, job_desc: str) -> str: |
|
"""Analyze resume using Gemini model.""" |
|
try: |
|
prompt = f""" |
|
Analyze the resume with respect to the job description. |
|
Resume: {resume_text} |
|
Job Description: {job_desc} |
|
|
|
Please provide a structured analysis with the following information: |
|
1. Candidate Name: |
|
2. Email Address: |
|
3. Contact Number: |
|
4. Relevant Skills: |
|
5. Educational Background: |
|
6. Team Leadership Experience (years): |
|
7. Management Experience (years): |
|
8. Management Skills: |
|
9. Match Percentage: |
|
|
|
Summary of Qualifications: |
|
• |
|
• |
|
• |
|
• |
|
• |
|
""" |
|
|
|
model = genai.GenerativeModel(self.config.GEMINI_MODEL) |
|
response = model.generate_content(prompt) |
|
return response.text.strip() |
|
except Exception as e: |
|
logger.error(f"Gemini analysis failed: {str(e)}") |
|
raise |
|
|
|
def extract_management_details(self, gemini_response: str) -> Tuple[int, int, str]: |
|
"""Extract management experience details from Gemini response.""" |
|
try: |
|
patterns = { |
|
'leadership': r"Team Leadership Experience \(years\):\s*(\d+)", |
|
'management': r"Management Experience \(years\):\s*(\d+)", |
|
'skills': r"Management Skills\s*[:\-]?\s*(.*?)(?=\n|$)" |
|
} |
|
|
|
matches = { |
|
key: re.search(pattern, gemini_response) |
|
for key, pattern in patterns.items() |
|
} |
|
|
|
leadership_years = int(matches['leadership'].group(1)) if matches['leadership'] else 0 |
|
management_years = int(matches['management'].group(1)) if matches['management'] else 0 |
|
skills = matches['skills'].group(1) if matches['skills'] else "" |
|
|
|
return leadership_years, management_years, skills |
|
except Exception as e: |
|
logger.error(f"Error extracting management details: {str(e)}") |
|
return 0, 0, "" |
|
|
|
def calculate_role_score(self, role_keywords: str) -> float: |
|
"""Calculate seniority score based on role keywords.""" |
|
try: |
|
seniority_score = 0 |
|
for keyword, score in self.role_hierarchy.items(): |
|
if fuzz.partial_ratio(keyword.lower(), role_keywords.lower()) > 80: |
|
seniority_score = max(seniority_score, score) |
|
return seniority_score |
|
except Exception as e: |
|
logger.error(f"Error calculating role score: {str(e)}") |
|
return 0 |
|
|
|
def calculate_advanced_match(self, leadership_years: int, management_years: int, |
|
skills: str, role_keywords: str) -> float: |
|
"""Calculate overall match percentage using weighted criteria.""" |
|
try: |
|
weights = { |
|
'leadership': 0.35, |
|
'management': 0.35, |
|
'skills': 0.20, |
|
'role': 0.10 |
|
} |
|
|
|
leadership_score = min(leadership_years / self.config.MAX_LEADERSHIP_EXP, 1.0) * 100 |
|
management_score = min(management_years / self.config.MAX_MANAGEMENT_EXP, 1.0) * 100 |
|
|
|
role_score = self.calculate_role_score(role_keywords) * 20 |
|
|
|
skills_matched = sum(1 for skill in self.required_skills |
|
if fuzz.partial_ratio(skill.lower(), skills.lower()) > 80) |
|
skill_match_score = (skills_matched / len(self.required_skills)) * 100 |
|
|
|
overall_match = sum([ |
|
leadership_score * weights['leadership'], |
|
management_score * weights['management'], |
|
skill_match_score * weights['skills'], |
|
role_score * weights['role'] |
|
]) |
|
|
|
return round(overall_match, 2) |
|
except Exception as e: |
|
logger.error(f"Error calculating advanced match: {str(e)}") |
|
return 0.0 |
|
|
|
def process_resume(self, resume: Any, job_desc: str, |
|
progress_callback: callable) -> Dict[str, Any]: |
|
"""Process a single resume and return analysis results.""" |
|
try: |
|
resume_text = self.extract_text_from_file(resume.name) |
|
if not resume_text.strip(): |
|
return self._create_error_result(resume.name, "Failed to extract text from resume") |
|
|
|
gemini_analysis = self.analyze_with_gemini(resume_text, job_desc) |
|
leadership_years, management_years, skills = self.extract_management_details(gemini_analysis) |
|
overall_match = self.calculate_advanced_match( |
|
leadership_years, management_years, skills, gemini_analysis.lower() |
|
) |
|
|
|
result = { |
|
"Resume": resume.name, |
|
"Candidate Name": self._extract_field(gemini_analysis, "Candidate Name"), |
|
"Email": self._extract_field(gemini_analysis, "Email Address"), |
|
"Contact": self._extract_field(gemini_analysis, "Contact Number"), |
|
"Overall Match Percentage": f"{overall_match}%", |
|
"Gemini Analysis": gemini_analysis |
|
} |
|
|
|
if progress_callback: |
|
progress_callback(1) |
|
|
|
return result |
|
except Exception as e: |
|
logger.error(f"Error processing resume {resume.name}: {str(e)}") |
|
return self._create_error_result(resume.name, str(e)) |
|
|
|
@staticmethod |
|
def _extract_field(text: str, field: str) -> str: |
|
"""Extract a specific field from the analysis text.""" |
|
pattern = f"{field}\\s*[:\\-]?\\s*(.*?)(?=\\n|$)" |
|
match = re.search(pattern, text) |
|
return match.group(1) if match else "N/A" |
|
|
|
@staticmethod |
|
def _create_error_result(resume_name: str, error_message: str) -> Dict[str, str]: |
|
"""Create a standardized error result.""" |
|
return { |
|
"Resume": resume_name, |
|
"Candidate Name": "N/A", |
|
"Email": "N/A", |
|
"Contact": "N/A", |
|
"Overall Match Percentage": "0.0%", |
|
"Gemini Analysis": f"Analysis failed: {error_message}" |
|
} |
|
|
|
def analyze_resumes(self, resumes: List[Any], job_desc: str) -> pd.DataFrame: |
|
"""Analyze multiple resumes in parallel.""" |
|
if len(resumes) > self.config.MAX_RESUMES: |
|
return pd.DataFrame([{ |
|
"Error": f"Cannot process more than {self.config.MAX_RESUMES} resumes at once." |
|
}]) |
|
|
|
progress = gr.Progress() |
|
|
|
try: |
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
futures = [ |
|
executor.submit(self.process_resume, resume, job_desc, progress.update) |
|
for resume in resumes |
|
] |
|
results = [future.result() for future in concurrent.futures.as_completed(futures)] |
|
|
|
return pd.DataFrame(results) |
|
except Exception as e: |
|
logger.error(f"Error in batch resume analysis: {str(e)}") |
|
return pd.DataFrame([{"Error": f"Analysis failed: {str(e)}"}]) |
|
|
|
|
|
def create_interface(): |
|
analyzer = ResumeAnalyzer() |
|
|
|
iface = gr.Interface( |
|
fn=analyzer.analyze_resumes, |
|
inputs=[ |
|
gr.File( |
|
label="Upload Resumes (max 10)", |
|
file_count="multiple" |
|
), |
|
gr.Textbox( |
|
label="Enter Job Description", |
|
placeholder="Paste the job description here..." |
|
) |
|
], |
|
outputs=[ |
|
gr.DataFrame(label="Analysis Results") |
|
], |
|
title="Resume Analysis Tool", |
|
description="Upload resumes and a job description to analyze candidates' leadership and management potential.", |
|
examples=[], |
|
cache_examples=False, |
|
theme="default" |
|
) |
|
|
|
return iface |
|
|
|
if __name__ == "__main__": |
|
iface = create_interface() |
|
iface.launch( |
|
share=False, |
|
debug=True, |
|
) |