Spaces:
Build error
Build error
File size: 3,762 Bytes
ca4a0d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import streamlit as st
from io import BytesIO
import tempfile
from PyPDF2 import PdfReader
import spacy
from pyresparser import ResumeParser
from sklearn.feature_extraction.text import TfidfVectorizer
# Load the spaCy model for natural language processing
nlp = spacy.load('en_core_web_sm')
# Function to extract text from PDF resumes
def extract_text_from_pdf(file):
text = ""
pdf_reader = PdfReader(file)
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to preprocess text using spaCy
def preprocess_text(text):
doc = nlp(text)
tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
return " ".join(tokens)
# Function to preprocess and combine the relevant resume fields
def preprocess_resume_data(resume_data):
skills = " ".join(resume_data.get('skills', [])) if resume_data.get('skills') else ""
experience = " ".join(resume_data.get('experience', [])) if resume_data.get('experience') else ""
degree = " ".join(resume_data.get('degree', [])) if resume_data.get('degree') else ""
combined_data = f"{skills} {experience} {degree}"
return preprocess_text(combined_data)
# Main function to create the Streamlit app
def main():
st.title("Resume Ranker and Prescreening Software")
st.write("Upload resumes (in PDF format) and enter job descriptions or keywords to filter and rank them.")
# Upload resumes
uploaded_files = st.file_uploader("Upload Resumes (PDF files)", accept_multiple_files=True)
# Input field for job description or keywords
job_description = st.text_area("Enter Job Description or Keywords")
if st.button("Rank Resumes"):
if not uploaded_files:
st.warning("Please upload one or more resumes.")
return
if not job_description:
st.warning("Please enter a job description or keywords.")
return
# Preprocess the job description
job_description_processed = preprocess_text(job_description)
# Vectorize the job description and resumes
vectorizer = TfidfVectorizer()
job_vec = vectorizer.fit_transform([job_description_processed])
# List to store responses along with file names and their matching percentages
file_responses = []
# Loop through uploaded resumes
for file in uploaded_files:
# Read the uploaded PDF file into memory
pdf_data = BytesIO(file.read())
# Save the PDF data to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
temp_file.write(pdf_data.getvalue())
temp_file_path = temp_file.name
# Parse and preprocess resumes
data = ResumeParser(temp_file_path).get_extracted_data()
if data:
combined_resume_data = preprocess_resume_data(data)
resume_vec = vectorizer.transform([combined_resume_data])
similarity = (resume_vec * job_vec.T).A[0][0] * 100
# Append file name and similarity to the list
file_responses.append((file.name, similarity))
# Delete the temporary file
temp_file.close()
# Sort file responses based on the similarity in descending order
file_responses.sort(key=lambda x: x[1], reverse=True)
# Display sorted file names and similarity percentages
st.header("Ranked Resumes")
for file_name, similarity in file_responses:
st.write(f"Resume: {file_name}, Match Percentage: {similarity:.2f}%")
if __name__ == "__main__":
main()
|