Spaces:

Prernas19
/

cv

Sleeping

App Files Files Community

cv / app.py

Prernas19

Create app.py

b3d91ee verified 5 months ago

raw

history blame contribute delete

7.24 kB

	import os
	import re
	import fitz # Importing PyMuPDF for PDF text extraction
	import nltk
	from gensim.models.doc2vec import Doc2Vec, TaggedDocument
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import pandas as pd
	import gradio as gr

	# Download NLTK data files
	nltk.download('punkt')
	nltk.download('stopwords')

	# Function to preprocess text
	def preprocess_text(text):
	text = re.sub(r'\W+', ' ', text.lower()) # Remove non-alphanumeric characters and lower case
	return text

	# Function to extract keywords using TF-IDF
	def extract_keywords_tfidf(text, max_features=50):
	vectorizer = TfidfVectorizer(stop_words='english', max_features=max_features)
	tfidf_matrix = vectorizer.fit_transform([text])
	feature_names = vectorizer.get_feature_names_out()
	tfidf_scores = tfidf_matrix.toarray().flatten()
	keyword_scores = sorted(zip(tfidf_scores, feature_names), reverse=True)
	return [keyword for score, keyword in keyword_scores]

	# Function to extract text from a PDF
	def extract_text_from_pdf(pdf_path):
	document = fitz.open(pdf_path)
	text = ""
	for page_num in range(len(document)):
	page = document.load_page(page_num)
	text += page.get_text()
	return text

	# Function to give feedback on resume
	def give_feedback(resume_text, job_description):
	feedback = []

	# Check formatting (example: consistency in bullet points)
	if '•' in resume_text and '-' in resume_text:
	feedback.append("Consider using a consistent bullet point style throughout your resume.")

	# Check for grammar and spelling
	if not any(re.findall(r'\bexperience\b\|\beducation\b\|\bskills\b', resume_text.lower())):
	feedback.append("Make sure your resume includes sections like Experience, Education, and Skills.")

	# Extract keywords and check relevance
	jd_keywords = extract_keywords_tfidf(preprocess_text(job_description))
	resume_keywords = extract_keywords_tfidf(preprocess_text(resume_text))

	common_keywords = set(jd_keywords).intersection(set(resume_keywords))
	if len(common_keywords) < 8:
	feedback.append(f"Your resume could better match the job description. Consider adding keywords such as: {', '.join(jd_keywords[:5])}.")

	# Check for action verbs
	action_verbs = ["managed", "led", "developed", "designed", "implemented", "created"]
	if not any(verb in resume_text.lower() for verb in action_verbs):
	feedback.append("Consider using strong action verbs to describe your achievements and responsibilities.")

	if not re.search(r'\bsummary\b\|\bobjective\b', resume_text, re.IGNORECASE):
	feedback.append("Consider adding a professional summary or objective statement to provide a quick overview of your qualifications.")

	# Check for quantifiable achievements
	if not re.findall(r'\d+', resume_text):
	feedback.append("Include quantifiable achievements in your experience section (e.g., increased sales by 20%).")

	# Provide positive feedback if none of the above conditions are met
	if not feedback:
	feedback.append("Your resume is well-aligned with the job description. Ensure to keep it updated with relevant keywords and achievements.")

	return feedback

	# Function to calculate TF-IDF cosine similarity score
	def tfidf_cosine_similarity(resume, jd):
	documents = [resume, jd]
	vectorizer = TfidfVectorizer()
	tfidf_matrix = vectorizer.fit_transform(documents)

	cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
	return cosine_sim[0][0]

	# Function to calculate Doc2Vec cosine similarity score
	def doc2vec_cosine_similarity(resume, jd, model):
	resume_vector = model.infer_vector(resume.split())
	jd_vector = model.infer_vector(jd.split())

	cosine_sim = cosine_similarity([resume_vector], [jd_vector])
	return cosine_sim[0][0]

	# Function to extract years of experience from resume
	def extract_years_of_experience(text):
	years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE)
	if years:
	return sum(map(int, years))
	return 0

	# Function to extract information from resumes in a folder
	def extract_info_from_resumes(resume_files, job_description):
	data = []

	# Train Doc2Vec model on resumes and job description
	documents = []
	for file in resume_files:
	text = extract_text_from_pdf(file.name)
	documents.append(preprocess_text(text))

	documents.append(preprocess_text(job_description))
	tagged_docs = [TaggedDocument(doc.split(), [i]) for i, doc in enumerate(documents)]
	model = Doc2Vec(tagged_docs, vector_size=50, window=2, min_count=1, workers=4)

	for file in resume_files:
	text = extract_text_from_pdf(file.name)

	preprocessed_text = preprocess_text(text)
	resume_keywords = extract_keywords_tfidf(preprocessed_text)
	years_of_experience = extract_years_of_experience(text)

	# Append years of experience to the resume keywords
	if years_of_experience > 0:
	resume_keywords.append(f"{years_of_experience} years experience")

	name = os.path.splitext(os.path.basename(file.name))[0]

	feedback = give_feedback(text, job_description)

	# Calculate scores
	jd_keywords = extract_keywords_tfidf(preprocess_text(job_description))
	common_keywords = set(jd_keywords).intersection(set(resume_keywords))
	keyword_match_score = len(common_keywords) # Count of common keywords as a whole number
	tfidf_score = tfidf_cosine_similarity(text, job_description)
	doc2vec_score = doc2vec_cosine_similarity(preprocessed_text, preprocess_text(job_description), model)

	data.append({
	'Name': name,
	'Keyword_Match_Score': keyword_match_score, # Whole number
	'TFIDF_Score': tfidf_score,
	'Doc2Vec_Score': doc2vec_score,
	'Years_of_Experience': years_of_experience,
	'Feedback': '; '.join(feedback), # Combine feedback into a single string
	})

	return data

	# Function to save data to an Excel file
	def save_to_excel(data, output_file):
	df = pd.DataFrame(data)
	try:
	df.to_excel(output_file, index=False)
	return output_file
	except Exception as e:
	return f"Error saving file: {e}"

	# Gradio interface function
	def gradio_interface(resume_files, job_description):
	if resume_files:
	output_file = 'Resume_Analysis.xlsx'
	resumes = extract_info_from_resumes(resume_files, job_description)
	result = save_to_excel(resumes, output_file)
	else:
	result = "No resumes to process."

	return result

	# Gradio UI setup
	iface = gr.Interface(
	fn=gradio_interface,
	inputs=[
	gr.Files(label="Upload multiple Resumes", type="filepath"), # Accept multiple file uploads
	gr.Textbox(label="Job Description", lines=5, placeholder="Enter the job description here...")
	],
	outputs=gr.File(label="Download Results"), # Provide the output file

	description="Upload multiple resume PDFs and provide a job description to analyze the resumes and get an Excel file with the results."
	)

	# Launch the Gradio interface
	iface.launch()