Spaces:
Sleeping
Sleeping
File size: 4,103 Bytes
039d1b5 1fe926a 039d1b5 1fe926a 039d1b5 1fe926a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import streamlit as st
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from PyPDF2 import PdfReader
import os
from io import BytesIO
import pickle
import pdfminer
from pdfminer.high_level import extract_text
import re
nltk.download('punkt')
nltk.download('stopwords')
def preprocess_text(text):
words = word_tokenize(text.lower())
stop_words = set(stopwords.words('english'))
words = [word for word in words if word not in stop_words]
stemmer = PorterStemmer()
words = [stemmer.stem(word) for word in words]
return ' '.join(words)
def extract_text_from_pdf(pdf_content):
pdf_reader = PdfReader(BytesIO(pdf_content))
text = ''
for page in pdf_reader.pages:
text += page.extract_text()
return text
def clean_pdf_text(text):
# Your existing cleanResume function remains unchanged
text = re.sub('http\S+\s*', ' ', text)
text = re.sub('RT|cc', ' ', text)
text = re.sub('#\S+', '', text)
text = re.sub('@\S+', ' ', text)
text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', text)
text = re.sub(r'[^\x00-\x7f]',r' ', text)
text = re.sub('\s+', ' ', text)
return text
def extract_candidate_name(text):
# Use regular expressions to extract candidate names
# Modify the regex pattern according to your naming conventions
pattern = r'(?:Mr\.|Ms\.|Mrs\.)?\s?([A-Z][a-z]+)\s([A-Z][a-z]+)'
match = re.search(pattern, text)
if match:
return match.group(0)
return "Candidate Name Not Found"
def calculate_similarity(job_description, cvs, cv_file_names):
processed_job_desc = preprocess_text(job_description)
processed_cvs = [preprocess_text(cv) for cv in cvs]
all_text = [processed_job_desc] + processed_cvs
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_text)
similarity_scores = cosine_similarity(tfidf_matrix)[0][1:]
ranked_cvs = list(zip(cv_file_names, similarity_scores))
ranked_cvs.sort(key=lambda x: x[1], reverse=True)
return ranked_cvs
def rank_and_shortlist(job_description, cv_files, threshold=0.15):
cv_texts = [extract_text_from_pdf(cv_file.read()) for cv_file in cv_files]
cv_file_names = [cv_file.name for cv_file in cv_files]
cvs = [clean_pdf_text(cv_text) for cv_text in cv_texts]
similarity_scores = calculate_similarity(job_description, cvs, cv_file_names)
ranked_cvs = [(cv_name, score) for (cv_name, score) in similarity_scores]
shortlisted_cvs = [(cv_name, score) for (cv_name, score) in ranked_cvs if score > threshold]
return ranked_cvs, shortlisted_cvs
def main():
st.title("Resume Ranking App")
st.write("Upload the Job Description:")
job_description = st.text_area("Job Description", height=200, key='job_description')
st.write("Upload the Resumes (PDFs):")
cv_files = st.file_uploader("Choose PDF files", accept_multiple_files=True, type=["pdf"], key='cv_files')
if st.button("Submit"):
if job_description and cv_files:
# Rank and shortlist candidates
ranked_cvs, shortlisted_cvs = rank_and_shortlist(job_description, cv_files)
# Display ranking with larger text
st.markdown("### Ranking of Resumes:")
for rank, score in ranked_cvs:
st.markdown(f"**File Name:** {rank}, **Similarity Score:** {score:.2f}")
# Display shortlisted candidates with larger text
st.markdown("### Shortlisted Candidates:")
if not shortlisted_cvs: # Check if the shortlisted_cvs list is empty
st.markdown("None")
else:
for rank, score in shortlisted_cvs:
st.markdown(f"**File Name:** {rank}, **Similarity Score:** {score:.2f}")
else:
st.write("Please upload both the job description and resumes to proceed.")
if __name__ == "__main__":
main() |