Jeet Paul commited on
Commit
039d1b5
·
1 Parent(s): 8781a84

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import nltk
3
+ from nltk.corpus import stopwords
4
+ from nltk.tokenize import word_tokenize
5
+ from nltk.stem import PorterStemmer
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ from PyPDF2 import PdfReader
9
+ import os
10
+ from io import BytesIO
11
+ import pickle
12
+ import pdfminer
13
+ from pdfminer.high_level import extract_text
14
+ import re
15
+
16
+ nltk.download('punkt')
17
+ nltk.download('stopwords')
18
+
19
+ def preprocess_text(text):
20
+ words = word_tokenize(text.lower())
21
+
22
+ stop_words = set(stopwords.words('english'))
23
+ words = [word for word in words if word not in stop_words]
24
+
25
+ stemmer = PorterStemmer()
26
+ words = [stemmer.stem(word) for word in words]
27
+
28
+ return ' '.join(words)
29
+
30
+ def extract_text_from_pdf(pdf_content):
31
+ pdf_reader = PdfReader(BytesIO(pdf_content))
32
+ text = ''
33
+ for page in pdf_reader.pages:
34
+ text += page.extract_text()
35
+ return text
36
+
37
+ def clean_pdf_text(text):
38
+ # Your existing cleanResume function remains unchanged
39
+ text = re.sub('http\S+\s*', ' ', text)
40
+ text = re.sub('RT|cc', ' ', text)
41
+ text = re.sub('#\S+', '', text)
42
+ text = re.sub('@\S+', ' ', text)
43
+ text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', text)
44
+ text = re.sub(r'[^\x00-\x7f]',r' ', text)
45
+ text = re.sub('\s+', ' ', text)
46
+ return text
47
+
48
+ def extract_candidate_name(text):
49
+ # Use regular expressions to extract candidate names
50
+ # Modify the regex pattern according to your naming conventions
51
+ pattern = r'(?:Mr\.|Ms\.|Mrs\.)?\s?([A-Z][a-z]+)\s([A-Z][a-z]+)'
52
+ match = re.search(pattern, text)
53
+ if match:
54
+ return match.group(0)
55
+ return "Candidate Name Not Found"
56
+
57
+ def calculate_similarity(job_description, cvs, cv_file_names):
58
+ processed_job_desc = preprocess_text(job_description)
59
+
60
+ processed_cvs = [preprocess_text(cv) for cv in cvs]
61
+
62
+ all_text = [processed_job_desc] + processed_cvs
63
+
64
+ vectorizer = TfidfVectorizer()
65
+ tfidf_matrix = vectorizer.fit_transform(all_text)
66
+
67
+ similarity_scores = cosine_similarity(tfidf_matrix)[0][1:]
68
+
69
+ ranked_cvs = list(zip(cv_file_names, similarity_scores))
70
+ ranked_cvs.sort(key=lambda x: x[1], reverse=True)
71
+
72
+ return ranked_cvs
73
+
74
+ def rank_and_shortlist(job_description, cv_files, threshold=0.2):
75
+ cv_texts = [extract_text_from_pdf(cv_file.read()) for cv_file in cv_files]
76
+ cv_file_names = [cv_file.name for cv_file in cv_files]
77
+ cvs = [clean_pdf_text(cv_text) for cv_text in cv_texts]
78
+ similarity_scores = calculate_similarity(job_description, cvs, cv_file_names)
79
+
80
+ ranked_cvs = [(cv_name, score) for (cv_name, score) in similarity_scores]
81
+ shortlisted_cvs = [(cv_name, score) for (cv_name, score) in ranked_cvs if score > threshold]
82
+
83
+ return ranked_cvs, shortlisted_cvs
84
+
85
+ def main():
86
+ st.title("Resume Ranking App")
87
+
88
+ st.write("Upload the Job Description:")
89
+ job_description = st.text_area("Job Description", height=200, key='job_description')
90
+
91
+ st.write("Upload the Resumes (PDFs):")
92
+ cv_files = st.file_uploader("Choose PDF files", accept_multiple_files=True, type=["pdf"], key='cv_files')
93
+
94
+ if st.button("Submit"):
95
+ if job_description and cv_files:
96
+ # Rank and shortlist candidates
97
+ ranked_cvs, shortlisted_cvs = rank_and_shortlist(job_description, cv_files)
98
+
99
+ # Display ranking with larger text
100
+ st.markdown("### Ranking of Resumes:")
101
+ for rank, score in ranked_cvs:
102
+ st.markdown(f"**File Name:** {rank}, **Similarity Score:** {score:.2f}")
103
+
104
+ # Display shortlisted candidates with larger text
105
+ st.markdown("### Shortlisted Candidates:")
106
+ for rank, score in shortlisted_cvs:
107
+ st.markdown(f"**File Name:** {rank}, **Similarity Score:** {score:.2f}")
108
+
109
+ if __name__ == "__main__":
110
+ main()