Spaces:

billusanda007
/

HireGPT

Sleeping

App Files Files Community

Jeet Paul commited on Jul 31, 2023

Commit

039d1b5

1 Parent(s): 8781a84

Create app.py

Browse files

Files changed (1) hide show

app.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import streamlit as st
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import PorterStemmer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from PyPDF2 import PdfReader
+import os
+from io import BytesIO
+import pickle
+import pdfminer
+from pdfminer.high_level import extract_text
+import re
+nltk.download('punkt')
+nltk.download('stopwords')
+def preprocess_text(text):
+    words = word_tokenize(text.lower())
+    stop_words = set(stopwords.words('english'))
+    words = [word for word in words if word not in stop_words]
+    stemmer = PorterStemmer()
+    words = [stemmer.stem(word) for word in words]
+    return ' '.join(words)
+def extract_text_from_pdf(pdf_content):
+    pdf_reader = PdfReader(BytesIO(pdf_content))
+    text = ''
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+def clean_pdf_text(text):
+    # Your existing cleanResume function remains unchanged
+    text = re.sub('http\S+\s*', ' ', text)
+    text = re.sub('RT|cc', ' ', text)
+    text = re.sub('#\S+', '', text)
+    text = re.sub('@\S+', '  ', text)
+    text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', text)
+    text = re.sub(r'[^\x00-\x7f]',r' ', text)
+    text = re.sub('\s+', ' ', text)
+    return text
+def extract_candidate_name(text):
+    # Use regular expressions to extract candidate names
+    # Modify the regex pattern according to your naming conventions
+    pattern = r'(?:Mr\.|Ms\.|Mrs\.)?\s?([A-Z][a-z]+)\s([A-Z][a-z]+)'
+    match = re.search(pattern, text)
+    if match:
+        return match.group(0)
+    return "Candidate Name Not Found"
+def calculate_similarity(job_description, cvs, cv_file_names):
+    processed_job_desc = preprocess_text(job_description)
+    processed_cvs = [preprocess_text(cv) for cv in cvs]
+    all_text = [processed_job_desc] + processed_cvs
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform(all_text)
+    similarity_scores = cosine_similarity(tfidf_matrix)[0][1:]
+    ranked_cvs = list(zip(cv_file_names, similarity_scores))
+    ranked_cvs.sort(key=lambda x: x[1], reverse=True)
+    return ranked_cvs
+def rank_and_shortlist(job_description, cv_files, threshold=0.2):
+    cv_texts = [extract_text_from_pdf(cv_file.read()) for cv_file in cv_files]
+    cv_file_names = [cv_file.name for cv_file in cv_files]
+    cvs = [clean_pdf_text(cv_text) for cv_text in cv_texts]
+    similarity_scores = calculate_similarity(job_description, cvs, cv_file_names)
+    ranked_cvs = [(cv_name, score) for (cv_name, score) in similarity_scores]
+    shortlisted_cvs = [(cv_name, score) for (cv_name, score) in ranked_cvs if score > threshold]
+    return ranked_cvs, shortlisted_cvs
+def main():
+    st.title("Resume Ranking App")
+    st.write("Upload the Job Description:")
+    job_description = st.text_area("Job Description", height=200, key='job_description')
+    st.write("Upload the Resumes (PDFs):")
+    cv_files = st.file_uploader("Choose PDF files", accept_multiple_files=True, type=["pdf"], key='cv_files')
+    if st.button("Submit"):
+        if job_description and cv_files:
+            # Rank and shortlist candidates
+            ranked_cvs, shortlisted_cvs = rank_and_shortlist(job_description, cv_files)
+            # Display ranking with larger text
+            st.markdown("### Ranking of Resumes:")
+            for rank, score in ranked_cvs:
+                st.markdown(f"**File Name:** {rank}, **Similarity Score:** {score:.2f}")
+            # Display shortlisted candidates with larger text
+            st.markdown("### Shortlisted Candidates:")
+            for rank, score in shortlisted_cvs:
+                st.markdown(f"**File Name:** {rank}, **Similarity Score:** {score:.2f}")
+if __name__ == "__main__":
+    main()