DreamStream-1 commited on
Commit
b03ad52
·
verified ·
1 Parent(s): 4cae725

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -218
app.py CHANGED
@@ -6,238 +6,309 @@ from PyPDF2 import PdfReader
6
  import docx
7
  import re
8
  import google.generativeai as genai
9
- import time
10
  import concurrent.futures
11
  from fuzzywuzzy import fuzz
 
 
 
 
12
 
13
- # Load pre-trained embedding model for basic analysis
14
- sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
15
-
16
- # Configure Google API for Gemini 1.5 Flash
17
- api_key = os.getenv('GOOGLE_API_KEY')
18
- if not api_key:
19
- raise ValueError("Google API key not found. Please set GOOGLE_API_KEY.")
20
- genai.configure(api_key=api_key)
21
-
22
- # Maximum resumes to process
23
- MAX_RESUMES = 10
24
-
25
- # Define the key leadership and management skills you're looking for
26
- required_skills = [
27
- "strategic planning",
28
- "team management",
29
- "project management",
30
- "decision making",
31
- "communication",
32
- "leadership",
33
- "conflict resolution",
34
- "delegation",
35
- "performance management",
36
- "budget management",
37
- "resource allocation",
38
- "staff development",
39
- "change management",
40
- "risk management",
41
- "problem solving",
42
- "negotiation",
43
- "executive leadership",
44
- "organizational skills",
45
- "business development",
46
- "stakeholder management",
47
- "collaboration",
48
- "emotional intelligence",
49
- "coaching",
50
- "mentoring",
51
- "time management",
52
- "cross-functional team leadership",
53
- "innovation",
54
- "organizational culture",
55
- "team motivation",
56
- "employee engagement",
57
- "organizational design",
58
- "continuous improvement",
59
- "decision-making under pressure",
60
- "adaptability",
61
- "accountability",
62
- "team building",
63
- "succession planning",
64
- "strategic partnerships",
65
- "executive presence",
66
- "influencing",
67
- "visionary leadership"
68
- ]
69
-
70
- # Helper Functions
71
-
72
- def extract_text_from_file(file_path):
73
- ext = os.path.splitext(file_path)[1].lower()
74
- if ext == ".txt":
75
- with open(file_path, 'r', encoding='utf-8') as f:
76
- return f.read()
77
- elif ext == ".pdf":
78
- reader = PdfReader(file_path)
79
- return "".join(page.extract_text() for page in reader.pages)
80
- elif ext == ".docx":
81
- doc = docx.Document(file_path)
82
- return " ".join(para.text for para in doc.paragraphs)
83
- else:
84
- return ""
85
-
86
- def analyze_with_gemini(resume_text, job_desc):
87
- prompt = f"""
88
- Analyze the resume with respect to the job description.
89
- Resume: {resume_text}
90
- Job Description: {job_desc}
91
- Extract:
92
- 1. Candidate Name
93
- 2. Email Address
94
- 3. Contact Number
95
- 4. Relevant Skills
96
- 5. Educational Background
97
- 6. Team Leadership Experience (years)
98
- 7. Management Experience (years)
99
- 8. Management Skills (e.g. strategic planning, team management, project management, etc.)
100
- 9. Match Percentage (leadership and management focus)
101
- Provide a summary of qualifications in 5 bullet points.
102
- """
103
- response = genai.GenerativeModel('gemini-1.5-flash').generate_content(prompt)
104
- return response.text.strip()
105
-
106
- def extract_management_details(gemini_response):
107
- leadership_exp_pattern = r"Team Leadership Experience \(years\):\s*(\d+)"
108
- management_exp_pattern = r"Management Experience \(years\):\s*(\d+)"
109
- management_skills_pattern = r"Management Skills\s*[:\-]?\s*(.*?)(?=\n|$)"
110
-
111
- leadership_match = re.search(leadership_exp_pattern, gemini_response)
112
- management_match = re.search(management_exp_pattern, gemini_response)
113
- skills_match = re.search(management_skills_pattern, gemini_response)
114
-
115
- leadership_years = int(leadership_match.group(1)) if leadership_match else 0
116
- management_years = int(management_match.group(1)) if management_match else 0
117
- skills = skills_match.group(1) if skills_match else ""
118
-
119
- return leadership_years, management_years, skills
120
-
121
- def extract_candidate_details(gemini_response):
122
- name_pattern = r"Candidate Name\s*[:\-]?\s*(.*?)(?=\n|$)"
123
- email_pattern = r"Email Address\s*[:\-]?\s*(.*?)(?=\n|$)"
124
- contact_pattern = r"Contact Number\s*[:\-]?\s*(.*?)(?=\n|$)"
125
-
126
- name_match = re.search(name_pattern, gemini_response)
127
- email_match = re.search(email_pattern, gemini_response)
128
- contact_match = re.search(contact_pattern, gemini_response)
129
-
130
- name = name_match.group(1) if name_match else "N/A"
131
- email = email_match.group(1) if email_match else "N/A"
132
- contact = contact_match.group(1) if contact_match else "N/A"
133
-
134
- return name, email, contact
135
-
136
- def calculate_role_score(role_keywords):
137
- seniority_score = 0
138
- role_hierarchy = {
139
- "CEO": 5,
140
- "CIO": 5,
141
- "Director": 4,
142
- "VP": 4,
143
- "Manager": 3,
144
- "Team Lead": 2,
145
- "Junior": 1
146
- }
147
-
148
- for keyword, score in role_hierarchy.items():
149
- if fuzz.partial_ratio(keyword.lower(), role_keywords.lower()) > 80:
150
- seniority_score = max(seniority_score, score)
151
-
152
- return seniority_score
153
 
154
- def calculate_advanced_match(leadership_years, management_years, skills, required_skills, role_keywords, max_leadership_exp=10, max_management_exp=10):
155
- leadership_weight = 0.35
156
- management_weight = 0.35
157
- skills_weight = 0.2
158
- role_weight = 0.1
 
159
 
160
- leadership_score = min(leadership_years / max_leadership_exp, 1.0) * 100
161
- management_score = min(management_years / max_management_exp, 1.0) * 100
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- role_score = calculate_role_score(role_keywords)
164
- role_score = role_score * 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- skills_matched = sum(1 for skill in required_skills if fuzz.partial_ratio(skill.lower(), skills.lower()) > 80)
167
- total_skills = len(required_skills)
168
- skill_match_score = (skills_matched / total_skills) * 100
 
 
 
 
 
 
 
169
 
170
- overall_match = (leadership_score * leadership_weight) + \
171
- (management_score * management_weight) + \
172
- (skill_match_score * skills_weight) + \
173
- (role_score * role_weight)
174
- return round(overall_match, 2)
 
175
 
176
- def process_resume(resume, job_desc, progress_callback):
177
- resume_text = extract_text_from_file(resume.name)
178
-
179
- if not resume_text.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  return {
181
- "Resume": resume.name,
182
  "Candidate Name": "N/A",
183
  "Email": "N/A",
184
  "Contact": "N/A",
185
- "Overall Match Percentage": 0.0,
186
- "Gemini Analysis": "Failed to extract text from resume."
187
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- try:
190
- gemini_analysis = analyze_with_gemini(resume_text, job_desc)
191
- leadership_years, management_years, skills = extract_management_details(gemini_analysis)
192
- role_keywords = gemini_analysis.lower()
193
- overall_match = calculate_advanced_match(leadership_years, management_years, skills, required_skills, role_keywords)
194
- name, email, contact = extract_candidate_details(gemini_analysis)
195
- except Exception as e:
196
- gemini_analysis = f"Gemini analysis failed: {str(e)}"
197
- name, email, contact = "N/A", "N/A", "N/A"
198
- overall_match = 0.0
199
-
200
- progress_callback(1) # Update progress for this resume
201
-
202
- return {
203
- "Resume": resume.name,
204
- "Candidate Name": name,
205
- "Email": email,
206
- "Contact": contact,
207
- "Overall Match Percentage": f"{overall_match}%",
208
- "Gemini Analysis": gemini_analysis
209
- }
210
-
211
- def analyze_resumes(resumes, job_desc):
212
- progress = gr.Progress()
213
- results = []
214
-
215
- if len(resumes) > MAX_RESUMES:
216
- return "Error: Cannot upload more than 10 resumes."
217
 
218
- with concurrent.futures.ThreadPoolExecutor() as executor:
219
- futures = []
220
- for resume in resumes:
221
- futures.append(executor.submit(process_resume, resume, job_desc, progress.update))
222
-
223
- for future in concurrent.futures.as_completed(futures):
224
- results.append(future.result())
225
-
226
- return pd.DataFrame(results)
227
-
228
- def download_results():
229
- # You need to return a file path, here we use a CSV output
230
- results_df = pd.DataFrame(results) # Assume results are available globally or passed in another way
231
- results_df.to_csv("/tmp/analysis_results.csv", index=False)
232
- return "/tmp/analysis_results.csv"
233
-
234
- # Define Gradio Interface
235
- iface = gr.Interface(
236
- fn=analyze_resumes,
237
- inputs=[gr.File(label="Upload Resumes", file_count="multiple"), gr.Textbox(label="Enter Job Description")],
238
- outputs=[gr.DataFrame(label="Analysis Results"), gr.Textbox(label="Resume Count Message"), gr.File(label="Download Results", file=download_results)],
239
- flagging_mode="never",
240
- live=True
241
- )
242
 
243
- iface.launch()
 
 
 
 
 
 
 
 
6
  import docx
7
  import re
8
  import google.generativeai as genai
 
9
  import concurrent.futures
10
  from fuzzywuzzy import fuzz
11
+ from typing import List, Dict, Tuple, Any
12
+ from dataclasses import dataclass
13
+ import logging
14
+ from pathlib import Path
15
 
16
+ # Configure logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ @dataclass
24
+ class Config:
25
+ MAX_RESUMES: int = 10
26
+ MAX_LEADERSHIP_EXP: int = 10
27
+ MAX_MANAGEMENT_EXP: int = 10
28
+ MODEL_NAME: str = 'paraphrase-MiniLM-L6-v2'
29
+ GEMINI_MODEL: str = 'gemini-1.5-flash'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ class ResumeAnalyzer:
32
+ def __init__(self):
33
+ self.config = Config()
34
+ self._initialize_models()
35
+ self.required_skills = self._load_required_skills()
36
+ self.role_hierarchy = self._load_role_hierarchy()
37
 
38
+ def _initialize_models(self) -> None:
39
+ """Initialize the required models and API configurations."""
40
+ try:
41
+ self.sentence_model = SentenceTransformer(self.config.MODEL_NAME)
42
+
43
+ api_key = os.getenv('GOOGLE_API_KEY')
44
+ if not api_key:
45
+ raise ValueError("Google API key not found. Please set GOOGLE_API_KEY.")
46
+ genai.configure(api_key=api_key)
47
+
48
+ except Exception as e:
49
+ logger.error(f"Failed to initialize models: {str(e)}")
50
+ raise
51
 
52
+ @staticmethod
53
+ def _load_required_skills() -> List[str]:
54
+ """Load the list of required leadership and management skills."""
55
+ return [
56
+ "strategic planning", "team management", "project management",
57
+ "decision making", "communication", "leadership",
58
+ "conflict resolution", "delegation", "performance management",
59
+ "budget management", "resource allocation", "staff development",
60
+ "change management", "risk management", "problem solving",
61
+ "negotiation", "executive leadership", "organizational skills",
62
+ "business development", "stakeholder management", "collaboration",
63
+ "emotional intelligence", "coaching", "mentoring",
64
+ "time management", "cross-functional team leadership", "innovation",
65
+ "organizational culture", "team motivation", "employee engagement",
66
+ "organizational design", "continuous improvement",
67
+ "decision-making under pressure", "adaptability", "accountability",
68
+ "team building", "succession planning", "strategic partnerships",
69
+ "executive presence", "influencing", "visionary leadership"
70
+ ]
71
 
72
+ @staticmethod
73
+ def _load_role_hierarchy() -> Dict[str, int]:
74
+ """Load the role hierarchy for scoring."""
75
+ return {
76
+ "CEO": 5, "CIO": 5, "CFO": 5, "COO": 5,
77
+ "Director": 4, "VP": 4, "Head": 4,
78
+ "Manager": 3, "Senior": 3,
79
+ "Team Lead": 2, "Lead": 2,
80
+ "Junior": 1, "Associate": 1
81
+ }
82
 
83
+ def extract_text_from_file(self, file_path: str) -> str:
84
+ """Extract text content from various file formats."""
85
+ try:
86
+ file_path = Path(file_path)
87
+ if not file_path.exists():
88
+ raise FileNotFoundError(f"File not found: {file_path}")
89
 
90
+ ext = file_path.suffix.lower()
91
+ if ext == ".txt":
92
+ return file_path.read_text(encoding='utf-8')
93
+ elif ext == ".pdf":
94
+ with open(file_path, 'rb') as file:
95
+ reader = PdfReader(file)
96
+ return " ".join(page.extract_text() for page in reader.pages)
97
+ elif ext == ".docx":
98
+ doc = docx.Document(file_path)
99
+ return " ".join(para.text for para in doc.paragraphs)
100
+ else:
101
+ raise ValueError(f"Unsupported file format: {ext}")
102
+ except Exception as e:
103
+ logger.error(f"Error extracting text from {file_path}: {str(e)}")
104
+ return ""
105
+
106
+ def analyze_with_gemini(self, resume_text: str, job_desc: str) -> str:
107
+ """Analyze resume using Gemini model."""
108
+ try:
109
+ prompt = f"""
110
+ Analyze the resume with respect to the job description.
111
+ Resume: {resume_text}
112
+ Job Description: {job_desc}
113
+
114
+ Please provide a structured analysis with the following information:
115
+ 1. Candidate Name:
116
+ 2. Email Address:
117
+ 3. Contact Number:
118
+ 4. Relevant Skills:
119
+ 5. Educational Background:
120
+ 6. Team Leadership Experience (years):
121
+ 7. Management Experience (years):
122
+ 8. Management Skills:
123
+ 9. Match Percentage:
124
+
125
+ Summary of Qualifications:
126
+
127
+
128
+
129
+
130
+
131
+ """
132
+
133
+ model = genai.GenerativeModel(self.config.GEMINI_MODEL)
134
+ response = model.generate_content(prompt)
135
+ return response.text.strip()
136
+ except Exception as e:
137
+ logger.error(f"Gemini analysis failed: {str(e)}")
138
+ raise
139
+
140
+ def extract_management_details(self, gemini_response: str) -> Tuple[int, int, str]:
141
+ """Extract management experience details from Gemini response."""
142
+ try:
143
+ patterns = {
144
+ 'leadership': r"Team Leadership Experience \(years\):\s*(\d+)",
145
+ 'management': r"Management Experience \(years\):\s*(\d+)",
146
+ 'skills': r"Management Skills\s*[:\-]?\s*(.*?)(?=\n|$)"
147
+ }
148
+
149
+ matches = {
150
+ key: re.search(pattern, gemini_response)
151
+ for key, pattern in patterns.items()
152
+ }
153
+
154
+ leadership_years = int(matches['leadership'].group(1)) if matches['leadership'] else 0
155
+ management_years = int(matches['management'].group(1)) if matches['management'] else 0
156
+ skills = matches['skills'].group(1) if matches['skills'] else ""
157
+
158
+ return leadership_years, management_years, skills
159
+ except Exception as e:
160
+ logger.error(f"Error extracting management details: {str(e)}")
161
+ return 0, 0, ""
162
+
163
+ def calculate_role_score(self, role_keywords: str) -> float:
164
+ """Calculate seniority score based on role keywords."""
165
+ try:
166
+ seniority_score = 0
167
+ for keyword, score in self.role_hierarchy.items():
168
+ if fuzz.partial_ratio(keyword.lower(), role_keywords.lower()) > 80:
169
+ seniority_score = max(seniority_score, score)
170
+ return seniority_score
171
+ except Exception as e:
172
+ logger.error(f"Error calculating role score: {str(e)}")
173
+ return 0
174
+
175
+ def calculate_advanced_match(self, leadership_years: int, management_years: int,
176
+ skills: str, role_keywords: str) -> float:
177
+ """Calculate overall match percentage using weighted criteria."""
178
+ try:
179
+ weights = {
180
+ 'leadership': 0.35,
181
+ 'management': 0.35,
182
+ 'skills': 0.20,
183
+ 'role': 0.10
184
+ }
185
+
186
+ leadership_score = min(leadership_years / self.config.MAX_LEADERSHIP_EXP, 1.0) * 100
187
+ management_score = min(management_years / self.config.MAX_MANAGEMENT_EXP, 1.0) * 100
188
+
189
+ role_score = self.calculate_role_score(role_keywords) * 20 # Scale to 100
190
+
191
+ skills_matched = sum(1 for skill in self.required_skills
192
+ if fuzz.partial_ratio(skill.lower(), skills.lower()) > 80)
193
+ skill_match_score = (skills_matched / len(self.required_skills)) * 100
194
+
195
+ overall_match = sum([
196
+ leadership_score * weights['leadership'],
197
+ management_score * weights['management'],
198
+ skill_match_score * weights['skills'],
199
+ role_score * weights['role']
200
+ ])
201
+
202
+ return round(overall_match, 2)
203
+ except Exception as e:
204
+ logger.error(f"Error calculating advanced match: {str(e)}")
205
+ return 0.0
206
+
207
+ def process_resume(self, resume: Any, job_desc: str,
208
+ progress_callback: callable) -> Dict[str, Any]:
209
+ """Process a single resume and return analysis results."""
210
+ try:
211
+ resume_text = self.extract_text_from_file(resume.name)
212
+ if not resume_text.strip():
213
+ return self._create_error_result(resume.name, "Failed to extract text from resume")
214
+
215
+ gemini_analysis = self.analyze_with_gemini(resume_text, job_desc)
216
+ leadership_years, management_years, skills = self.extract_management_details(gemini_analysis)
217
+ overall_match = self.calculate_advanced_match(
218
+ leadership_years, management_years, skills, gemini_analysis.lower()
219
+ )
220
+
221
+ result = {
222
+ "Resume": resume.name,
223
+ "Candidate Name": self._extract_field(gemini_analysis, "Candidate Name"),
224
+ "Email": self._extract_field(gemini_analysis, "Email Address"),
225
+ "Contact": self._extract_field(gemini_analysis, "Contact Number"),
226
+ "Overall Match Percentage": f"{overall_match}%",
227
+ "Gemini Analysis": gemini_analysis
228
+ }
229
+
230
+ if progress_callback:
231
+ progress_callback(1)
232
+
233
+ return result
234
+ except Exception as e:
235
+ logger.error(f"Error processing resume {resume.name}: {str(e)}")
236
+ return self._create_error_result(resume.name, str(e))
237
+
238
+ @staticmethod
239
+ def _extract_field(text: str, field: str) -> str:
240
+ """Extract a specific field from the analysis text."""
241
+ pattern = f"{field}\\s*[:\\-]?\\s*(.*?)(?=\\n|$)"
242
+ match = re.search(pattern, text)
243
+ return match.group(1) if match else "N/A"
244
+
245
+ @staticmethod
246
+ def _create_error_result(resume_name: str, error_message: str) -> Dict[str, str]:
247
+ """Create a standardized error result."""
248
  return {
249
+ "Resume": resume_name,
250
  "Candidate Name": "N/A",
251
  "Email": "N/A",
252
  "Contact": "N/A",
253
+ "Overall Match Percentage": "0.0%",
254
+ "Gemini Analysis": f"Analysis failed: {error_message}"
255
  }
256
+
257
+ def analyze_resumes(self, resumes: List[Any], job_desc: str) -> pd.DataFrame:
258
+ """Analyze multiple resumes in parallel."""
259
+ if len(resumes) > self.config.MAX_RESUMES:
260
+ return pd.DataFrame([{
261
+ "Error": f"Cannot process more than {self.config.MAX_RESUMES} resumes at once."
262
+ }])
263
+
264
+ progress = gr.Progress()
265
+
266
+ try:
267
+ with concurrent.futures.ThreadPoolExecutor() as executor:
268
+ futures = [
269
+ executor.submit(self.process_resume, resume, job_desc, progress.update)
270
+ for resume in resumes
271
+ ]
272
+ results = [future.result() for future in concurrent.futures.as_completed(futures)]
273
+
274
+ return pd.DataFrame(results)
275
+ except Exception as e:
276
+ logger.error(f"Error in batch resume analysis: {str(e)}")
277
+ return pd.DataFrame([{"Error": f"Analysis failed: {str(e)}"}])
278
+
279
+ # Create Gradio interface
280
+ def create_interface():
281
+ analyzer = ResumeAnalyzer()
282
 
283
+ iface = gr.Interface(
284
+ fn=analyzer.analyze_resumes,
285
+ inputs=[
286
+ gr.File(
287
+ label="Upload Resumes (max 10)",
288
+ file_count="multiple"
289
+ ),
290
+ gr.Textbox(
291
+ label="Enter Job Description",
292
+ placeholder="Paste the job description here..."
293
+ )
294
+ ],
295
+ outputs=[
296
+ gr.DataFrame(label="Analysis Results")
297
+ ],
298
+ title="Resume Analysis Tool",
299
+ description="Upload resumes and a job description to analyze candidates' leadership and management potential.",
300
+ examples=[],
301
+ cache_examples=False,
302
+ theme="default"
303
+ )
 
 
 
 
 
 
 
304
 
305
+ return iface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ if __name__ == "__main__":
308
+ iface = create_interface()
309
+ iface.launch(
310
+ share=False,
311
+ debug=True,
312
+ server_name="0.0.0.0",
313
+ server_port=7860
314
+ )