DreamStream-1 commited on
Commit
26fdc21
·
verified ·
1 Parent(s): a51874e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -96
app.py CHANGED
@@ -1,126 +1,183 @@
1
  import re
2
- import io
3
- import PyPDF2 as pdf
4
  import pandas as pd
5
- from sklearn.feature_extraction.text import TfidfVectorizer
6
- from sklearn.metrics.pairwise import cosine_similarity
7
- from transformers import pipeline
8
  import google.generativeai as genai
 
 
9
  import streamlit as st
 
 
 
 
 
 
10
 
11
- # Initialize Google Gemini AI API for summarization (ensure API key is set up)
12
- api_key = 'YOUR_GOOGLE_API_KEY'
13
  genai.configure(api_key=api_key)
14
 
15
- # Set up NER pipelines for skill and education extraction
16
- skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
17
- education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
 
 
18
 
19
- # Extract text from uploaded PDF file
20
  def input_pdf_text(uploaded_file):
21
- file_stream = io.BytesIO(uploaded_file.read())
22
  reader = pdf.PdfReader(file_stream)
23
  text = ""
24
  for page in reader.pages:
25
  text += page.extract_text()
26
  return text
27
 
28
- # Extract contact information using regular expressions (email, phone)
29
  def extract_contact_info(resume_text):
 
 
 
 
 
 
 
 
 
 
 
30
  email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
31
- phone_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
32
  email = email_match.group(0) if email_match else "Not Available"
33
- phone = phone_match.group(0) if phone_match else "Not Available"
34
- return email, phone
35
-
36
- # Extract skills using NER model or regex
37
- def extract_skills(resume_text):
38
- ner_results = skill_extractor(resume_text)
39
- skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
40
- return ", ".join(skills) if skills else "Not Available"
41
-
42
- # Extract education details using NER model
43
- def extract_education(resume_text):
44
- ner_results = education_extractor(resume_text)
45
- education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
46
 
47
- # If no direct education found, use regex patterns to capture education
48
- if education_entities:
49
- return ", ".join(education_entities)
50
- else:
51
- edu_patterns = [
52
- r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech)",
53
- r"(University of [A-Za-z]+.*)"
54
- ]
55
- education = []
56
- for pattern in edu_patterns:
57
- matches = re.findall(pattern, resume_text)
58
- education.extend(matches)
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- return ", ".join(education) if education else "Not Available"
61
-
62
- # Calculate match percentage between resume and job description
63
- def calculate_match_percentage(resume_text, job_description):
64
- documents = [resume_text, job_description]
65
- tfidf_vectorizer = TfidfVectorizer(stop_words='english')
66
- tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
67
- cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
68
- return cosine_sim[0][0] * 100 # Return percentage match
69
-
70
- # Generate resume summary using Gemini API
71
- def generate_resume_summary(resume_text):
72
- prompt = f"Summarize the resume below in 5 bullet points:\n\n{resume_text}"
73
- model = genai.GenerativeModel('gemini-1.5-flash')
74
- response = model.generate_content(prompt)
75
- return response.text.strip()
76
-
77
- # Main function to process and analyze the resume and job description
78
- def analyze_resume_and_job(resume_pdf, job_description):
79
- resume_text = input_pdf_text(resume_pdf)
80
 
81
- # Extract candidate details
82
- email, phone = extract_contact_info(resume_text)
83
- skills = extract_skills(resume_text)
84
- education = extract_education(resume_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- # Calculate match percentage
87
- match_percentage = calculate_match_percentage(resume_text, job_description)
 
88
 
89
- # Generate summary using Gemini
90
- resume_summary = generate_resume_summary(resume_text)
91
 
92
- # Prepare results for display
93
- result_data = {
94
- 'Email': email,
95
- 'Phone': phone,
96
- 'Skills': skills,
97
- 'Education': education,
98
- 'Match_Percentage': match_percentage,
99
- 'Resume_Summary': resume_summary
100
- }
101
 
102
- # Create a DataFrame for display
103
- df = pd.DataFrame([result_data])
 
 
104
 
105
- return df
 
 
106
 
107
- # Streamlit interface
108
- st.title("Resume & Job Description Analyzer")
109
- st.write("Upload your resume PDF and a job description to analyze how well the resume matches the job requirements.")
110
 
111
- # File uploader for resume PDF
112
- resume_pdf = st.file_uploader("Upload Resume (PDF)", type="pdf")
113
- job_description = st.text_area("Enter Job Description", height=200)
114
 
115
- if resume_pdf and job_description:
116
- # Analyze the resume and job description
117
- df = analyze_resume_and_job(resume_pdf, job_description)
118
-
119
- # Display results
120
- st.subheader("Analysis Results")
121
- st.dataframe(df)
122
-
123
- st.subheader("Detailed Resume Summary")
124
- st.write(df['Resume_Summary'][0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  else:
126
- st.write("Please upload a resume and provide a job description to get started.")
 
1
  import re
2
+ import os
 
3
  import pandas as pd
 
 
 
4
  import google.generativeai as genai
5
+ import PyPDF2 as pdf
6
+ import io
7
  import streamlit as st
8
+ from transformers import pipeline
9
+
10
+ # Get API Key from Hugging Face Secret
11
+ api_key = os.getenv('GOOGLE_API_KEY')
12
+ if not api_key:
13
+ raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.")
14
 
 
 
15
  genai.configure(api_key=api_key)
16
 
17
+ # Function to get response from the Gemini model
18
+ def get_gemini_response(input_text):
19
+ model = genai.GenerativeModel('gemini-1.5-flash')
20
+ response = model.generate_content(input_text)
21
+ return response.text
22
 
23
+ # Function to extract text from uploaded PDF
24
  def input_pdf_text(uploaded_file):
25
+ file_stream = io.BytesIO(uploaded_file.read()) # Read the content of the uploaded file
26
  reader = pdf.PdfReader(file_stream)
27
  text = ""
28
  for page in reader.pages:
29
  text += page.extract_text()
30
  return text
31
 
32
+ # Function to extract name, email, and contact from the resume text using transformers (for name)
33
  def extract_contact_info(resume_text):
34
+ # Using Hugging Face's transformers for NER to extract names
35
+ nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
36
+
37
+ # Process the resume text for named entities
38
+ entities = nlp(resume_text)
39
+ names = [entity['word'] for entity in entities if entity['entity'] == 'B-PER']
40
+
41
+ # If no name was found by NER, return "Not Available"
42
+ name = names[0] if names else "Not Available"
43
+
44
+ # Extract email using regex
45
  email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
 
46
  email = email_match.group(0) if email_match else "Not Available"
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Updated regex pattern for phone number extraction (more robust)
49
+ contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
50
+ contact = contact_match.group(0) if contact_match else "Not Available"
51
+
52
+ return name, email, contact
53
+
54
+ # Function to extract years of team leadership or management experience from the job description
55
+ def extract_expected_years(job_description):
56
+ # Use regex to extract any number of years mentioned in the job description for management or team leadership
57
+ year_matches = re.findall(r"\d+\s?year[s]?", job_description)
58
+ return [match for match in year_matches]
59
+
60
+ # Function to extract direct team leadership experience years based on date mentions (for direct team leadership)
61
+ def extract_direct_team_leadership_years(text):
62
+ patterns = [
63
+ r"from (\d{4}) to (\d{4})", # Matches: 'from 2019 to 2022'
64
+ r"for (\d+) years", # Matches: 'for 3 years'
65
+ r"since (\d{4})", # Matches: 'since 2018'
66
+ r"from (\d{4}) to present" # Matches: 'from 2019 to present'
67
+ ]
68
+
69
+ total_years = 0
70
+
71
+ for pattern in patterns:
72
+ matches = re.findall(pattern, text)
73
 
74
+ for match in matches:
75
+ if len(match) == 2:
76
+ start_year = int(match[0])
77
+ end_year = int(match[1])
78
+ total_years += end_year - start_year
79
+ elif len(match) == 1:
80
+ years = int(match[0])
81
+ total_years += years
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ return total_years
84
+
85
+ # Refined Prompt Template for Gemini API (requesting detailed text about management experience)
86
+ input_prompt = """
87
+ Act as a sophisticated Applicant Tracking System (ATS) with expertise in evaluating resumes specifically for management and team leadership roles. Your task is to analyze the resume in relation to the job description, focusing on direct and indirect team management experience, skills, and qualifications.
88
+
89
+ Instructions:
90
+ 1. **Candidate's Name**: Identify and state the candidate’s full name as it appears in the resume.
91
+ 2. **Direct Team Leadership Experience**: Clearly outline instances where the candidate has formally managed teams, led projects, or directly supervised team members. Quantify this experience in years wherever possible.
92
+ 3. **Direct Management Experience**: Identify instances where the candidate has taken on roles involving management responsibilities, such as directly overseeing projects, setting goals for a team, conducting performance reviews, or handling direct reports. Quantify this experience in years, and provide specific examples when available.
93
+ 4. **Relevant Skills and Qualifications**: List the candidate’s relevant skills and qualifications that align with the job description requirements. Mention specific technologies, methodologies, and leadership frameworks mentioned in the resume.
94
+ 5. **Educational Background**: Summarize the candidate’s educational qualifications relevant to the role.
95
+ 6. **Match Percentage Calculation**: Based on the candidate's experience and the keywords related to management and team leadership found in the job description, calculate a match percentage. Incorporate the years of leadership experience and skills alignment as factors in this match.
96
+
97
+ Provide a detailed output that includes:
98
+ - **Candidate's Name**: State the candidate's name.
99
+ - **Direct Team Leadership Experience (in years)**: Quantify and describe this experience.
100
+ - **Direct Management Experience (in years)**: Quantify and describe this experience with specific examples.
101
+ - **Relevant Skills and Qualifications**: List skills and qualifications relevant to management and team leadership.
102
+ - **Educational Background**: Provide a brief summary.
103
+ - **Match Percentage**: Set to 70%.
104
+
105
+ Input:
106
+ - Resume Text: "{text}"
107
+ - Job Description: "{jd}"
108
+ """
109
 
110
+ # Streamlit interface
111
+ st.title("Resume ATS Analysis Tool")
112
+ st.markdown("### Upload Resume and Job Description for Analysis")
113
 
114
+ # File uploader for resume PDF
115
+ uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
116
 
117
+ # Input for job description
118
+ job_description = st.text_area("Job Description", height=200)
 
 
 
 
 
 
 
119
 
120
+ # Check if both file and job description are provided
121
+ if uploaded_file and job_description:
122
+ # Show Analyze button when both resume and job description are provided
123
+ analyze_button = st.button("Analyze")
124
 
125
+ if analyze_button:
126
+ # Extract text from the uploaded PDF
127
+ resume_text = input_pdf_text(uploaded_file)
128
 
129
+ # Extract contact info (name, email, contact)
130
+ name, email, contact = extract_contact_info(resume_text)
 
131
 
132
+ # Extract years of experience for Direct Team Leadership and Management
133
+ direct_team_leadership_years = extract_direct_team_leadership_years(resume_text)
134
+ direct_management_years = direct_team_leadership_years # Reuse extraction logic for simplicity
135
 
136
+ # Prepare the prompt with resume and job description text
137
+ prompt = input_prompt.format(text=resume_text, jd=job_description)
138
+
139
+ # Get the response from Gemini model
140
+ response_text = get_gemini_response(prompt)
141
+
142
+ # Clean up the response to remove unnecessary whitespace or formatting
143
+ response_text_clean = response_text.strip()
144
+
145
+ # Set the match percentage to 70%
146
+ match_percentage = 70
147
+
148
+ # Determine the Job Description Match Score
149
+ job_description_match_score = "High" if match_percentage >= 80 else "Moderate" if match_percentage >= 50 else "Low"
150
+
151
+ # Create a DataFrame with the extracted data
152
+ data = {
153
+ 'Candidate_Name': [name],
154
+ 'Email': [email],
155
+ 'Contact': [contact],
156
+ 'Direct_Team_Leadership_Experience_Years': [direct_team_leadership_years],
157
+ 'Direct_Management_Experience_Years': [direct_management_years],
158
+ 'Relevant_Skills_and_Qualifications': ["Placeholder for skills"], # To be updated with real data
159
+ 'Educational_Background': ["Placeholder for education"], # To be updated with real data
160
+ 'Model_Response': [response_text_clean],
161
+ 'Match_Percentage': [match_percentage],
162
+ 'Job_Description_Match_Score': [job_description_match_score]
163
+ }
164
+
165
+ df = pd.DataFrame(data)
166
+
167
+ # Display the DataFrame as a table
168
+ st.subheader("Analysis Results")
169
+ st.dataframe(df)
170
+
171
+ # Save the DataFrame to a CSV file
172
+ csv_filename = "ATS_Analysis_Results.csv"
173
+ df.to_csv(csv_filename, index=False)
174
+
175
+ # Button to download CSV
176
+ st.download_button(
177
+ label="Download Results as CSV",
178
+ data=df.to_csv(index=False),
179
+ file_name=csv_filename,
180
+ mime="text/csv"
181
+ )
182
  else:
183
+ st.warning("Please upload a resume and provide a job description for analysis.")