Spaces:

raj22rishi
/

Resume-Tracker

Build error

App Files Files Community

Resume-Tracker / app.py

raj22rishi

Upload app.py

ca4a0d5 verified 11 months ago

raw

history blame

3.76 kB

	import streamlit as st
	from io import BytesIO
	import tempfile
	from PyPDF2 import PdfReader
	import spacy
	from pyresparser import ResumeParser
	from sklearn.feature_extraction.text import TfidfVectorizer

	# Load the spaCy model for natural language processing
	nlp = spacy.load('en_core_web_sm')

	# Function to extract text from PDF resumes
	def extract_text_from_pdf(file):
	text = ""
	pdf_reader = PdfReader(file)
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	# Function to preprocess text using spaCy
	def preprocess_text(text):
	doc = nlp(text)
	tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
	return " ".join(tokens)

	# Function to preprocess and combine the relevant resume fields
	def preprocess_resume_data(resume_data):
	skills = " ".join(resume_data.get('skills', [])) if resume_data.get('skills') else ""
	experience = " ".join(resume_data.get('experience', [])) if resume_data.get('experience') else ""
	degree = " ".join(resume_data.get('degree', [])) if resume_data.get('degree') else ""
	combined_data = f"{skills} {experience} {degree}"
	return preprocess_text(combined_data)

	# Main function to create the Streamlit app
	def main():
	st.title("Resume Ranker and Prescreening Software")
	st.write("Upload resumes (in PDF format) and enter job descriptions or keywords to filter and rank them.")

	# Upload resumes
	uploaded_files = st.file_uploader("Upload Resumes (PDF files)", accept_multiple_files=True)

	# Input field for job description or keywords
	job_description = st.text_area("Enter Job Description or Keywords")

	if st.button("Rank Resumes"):
	if not uploaded_files:
	st.warning("Please upload one or more resumes.")
	return

	if not job_description:
	st.warning("Please enter a job description or keywords.")
	return

	# Preprocess the job description
	job_description_processed = preprocess_text(job_description)

	# Vectorize the job description and resumes
	vectorizer = TfidfVectorizer()
	job_vec = vectorizer.fit_transform([job_description_processed])

	# List to store responses along with file names and their matching percentages
	file_responses = []

	# Loop through uploaded resumes
	for file in uploaded_files:
	# Read the uploaded PDF file into memory
	pdf_data = BytesIO(file.read())

	# Save the PDF data to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
	temp_file.write(pdf_data.getvalue())
	temp_file_path = temp_file.name

	# Parse and preprocess resumes
	data = ResumeParser(temp_file_path).get_extracted_data()
	if data:
	combined_resume_data = preprocess_resume_data(data)
	resume_vec = vectorizer.transform([combined_resume_data])
	similarity = (resume_vec * job_vec.T).A[0][0] * 100

	# Append file name and similarity to the list
	file_responses.append((file.name, similarity))

	# Delete the temporary file
	temp_file.close()

	# Sort file responses based on the similarity in descending order
	file_responses.sort(key=lambda x: x[1], reverse=True)

	# Display sorted file names and similarity percentages
	st.header("Ranked Resumes")
	for file_name, similarity in file_responses:
	st.write(f"Resume: {file_name}, Match Percentage: {similarity:.2f}%")

	if __name__ == "__main__":
	main()