|
import os |
|
import re |
|
import fitz |
|
import nltk |
|
from gensim.models.doc2vec import Doc2Vec, TaggedDocument |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import pandas as pd |
|
import gradio as gr |
|
|
|
|
|
nltk.download('punkt') |
|
nltk.download('stopwords') |
|
|
|
|
|
def preprocess_text(text): |
|
text = re.sub(r'\W+', ' ', text.lower()) |
|
return text |
|
|
|
|
|
def extract_keywords_tfidf(text, max_features=50): |
|
vectorizer = TfidfVectorizer(stop_words='english', max_features=max_features) |
|
tfidf_matrix = vectorizer.fit_transform([text]) |
|
feature_names = vectorizer.get_feature_names_out() |
|
tfidf_scores = tfidf_matrix.toarray().flatten() |
|
keyword_scores = sorted(zip(tfidf_scores, feature_names), reverse=True) |
|
return [keyword for score, keyword in keyword_scores] |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
document = fitz.open(pdf_path) |
|
text = "" |
|
for page_num in range(len(document)): |
|
page = document.load_page(page_num) |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
def give_feedback(resume_text, job_description): |
|
feedback = [] |
|
|
|
|
|
if '•' in resume_text and '-' in resume_text: |
|
feedback.append("Consider using a consistent bullet point style throughout your resume.") |
|
|
|
|
|
if not any(re.findall(r'\bexperience\b|\beducation\b|\bskills\b', resume_text.lower())): |
|
feedback.append("Make sure your resume includes sections like Experience, Education, and Skills.") |
|
|
|
|
|
jd_keywords = extract_keywords_tfidf(preprocess_text(job_description)) |
|
resume_keywords = extract_keywords_tfidf(preprocess_text(resume_text)) |
|
|
|
common_keywords = set(jd_keywords).intersection(set(resume_keywords)) |
|
if len(common_keywords) < 8: |
|
feedback.append(f"Your resume could better match the job description. Consider adding keywords such as: {', '.join(jd_keywords[:5])}.") |
|
|
|
|
|
action_verbs = ["managed", "led", "developed", "designed", "implemented", "created"] |
|
if not any(verb in resume_text.lower() for verb in action_verbs): |
|
feedback.append("Consider using strong action verbs to describe your achievements and responsibilities.") |
|
|
|
if not re.search(r'\bsummary\b|\bobjective\b', resume_text, re.IGNORECASE): |
|
feedback.append("Consider adding a professional summary or objective statement to provide a quick overview of your qualifications.") |
|
|
|
|
|
if not re.findall(r'\d+', resume_text): |
|
feedback.append("Include quantifiable achievements in your experience section (e.g., increased sales by 20%).") |
|
|
|
|
|
if not feedback: |
|
feedback.append("Your resume is well-aligned with the job description. Ensure to keep it updated with relevant keywords and achievements.") |
|
|
|
return feedback |
|
|
|
|
|
def tfidf_cosine_similarity(resume, jd): |
|
documents = [resume, jd] |
|
vectorizer = TfidfVectorizer() |
|
tfidf_matrix = vectorizer.fit_transform(documents) |
|
|
|
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) |
|
return cosine_sim[0][0] |
|
|
|
|
|
def doc2vec_cosine_similarity(resume, jd, model): |
|
resume_vector = model.infer_vector(resume.split()) |
|
jd_vector = model.infer_vector(jd.split()) |
|
|
|
cosine_sim = cosine_similarity([resume_vector], [jd_vector]) |
|
return cosine_sim[0][0] |
|
|
|
|
|
def extract_years_of_experience(text): |
|
years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE) |
|
if years: |
|
return sum(map(int, years)) |
|
return 0 |
|
|
|
|
|
def extract_info_from_resumes(resume_files, job_description): |
|
data = [] |
|
|
|
|
|
documents = [] |
|
for file in resume_files: |
|
text = extract_text_from_pdf(file.name) |
|
documents.append(preprocess_text(text)) |
|
|
|
documents.append(preprocess_text(job_description)) |
|
tagged_docs = [TaggedDocument(doc.split(), [i]) for i, doc in enumerate(documents)] |
|
model = Doc2Vec(tagged_docs, vector_size=50, window=2, min_count=1, workers=4) |
|
|
|
for file in resume_files: |
|
text = extract_text_from_pdf(file.name) |
|
|
|
preprocessed_text = preprocess_text(text) |
|
resume_keywords = extract_keywords_tfidf(preprocessed_text) |
|
years_of_experience = extract_years_of_experience(text) |
|
|
|
|
|
if years_of_experience > 0: |
|
resume_keywords.append(f"{years_of_experience} years experience") |
|
|
|
name = os.path.splitext(os.path.basename(file.name))[0] |
|
|
|
feedback = give_feedback(text, job_description) |
|
|
|
|
|
jd_keywords = extract_keywords_tfidf(preprocess_text(job_description)) |
|
common_keywords = set(jd_keywords).intersection(set(resume_keywords)) |
|
keyword_match_score = len(common_keywords) |
|
tfidf_score = tfidf_cosine_similarity(text, job_description) |
|
doc2vec_score = doc2vec_cosine_similarity(preprocessed_text, preprocess_text(job_description), model) |
|
|
|
data.append({ |
|
'Name': name, |
|
'Keyword_Match_Score': keyword_match_score, |
|
'TFIDF_Score': tfidf_score, |
|
'Doc2Vec_Score': doc2vec_score, |
|
'Years_of_Experience': years_of_experience, |
|
'Feedback': '; '.join(feedback), |
|
}) |
|
|
|
return data |
|
|
|
|
|
def save_to_excel(data, output_file): |
|
df = pd.DataFrame(data) |
|
try: |
|
df.to_excel(output_file, index=False) |
|
return output_file |
|
except Exception as e: |
|
return f"Error saving file: {e}" |
|
|
|
|
|
def gradio_interface(resume_files, job_description): |
|
if resume_files: |
|
output_file = 'Resume_Analysis.xlsx' |
|
resumes = extract_info_from_resumes(resume_files, job_description) |
|
result = save_to_excel(resumes, output_file) |
|
else: |
|
result = "No resumes to process." |
|
|
|
return result |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=[ |
|
gr.Files(label="Upload multiple Resumes", type="filepath"), |
|
gr.Textbox(label="Job Description", lines=5, placeholder="Enter the job description here...") |
|
], |
|
outputs=gr.File(label="Download Results"), |
|
|
|
description="Upload multiple resume PDFs and provide a job description to analyze the resumes and get an Excel file with the results." |
|
) |
|
|
|
|
|
iface.launch() |