DreamStream-1 commited on
Commit
01e59a7
·
verified ·
1 Parent(s): 9221a1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -131
app.py CHANGED
@@ -1,112 +1,86 @@
1
  import gradio as gr
2
- from sentence_transformers import SentenceTransformer, util
3
- import docx
4
  import os
5
- from PyPDF2 import PdfReader
 
6
  import requests
7
- import pandas as pd
8
 
9
- # Load pre-trained model for sentence embedding
10
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
11
 
12
- # Define maximum number of resumes
13
- MAX_RESUMES = 10
14
-
15
- # Function to fetch Google API key from environment variable
16
- def get_google_api_key():
17
- api_key = os.getenv('GOOGLE_API_KEY') # Fetching the API key from environment variables
18
- if not api_key:
19
- raise ValueError("Google API key not found in environment variables.")
20
- return api_key
21
-
22
- # Function to extract text from resume (handles .txt, .pdf, .docx)
23
- def extract_text_from_resume(resume_file):
24
- file_extension = os.path.splitext(resume_file)[1].lower()
25
- if file_extension not in ['.txt', '.pdf', '.docx']:
26
- return "Unsupported file format"
27
-
28
- if file_extension == '.txt':
29
- return read_text_file(resume_file)
30
- elif file_extension == '.pdf':
31
- return read_pdf_file(resume_file)
32
- elif file_extension == '.docx':
33
- return read_docx_file(resume_file)
34
-
35
- return "Failed to read the resume text."
36
-
37
- def read_text_file(file_path):
38
- with open(file_path, 'r') as file:
39
- return file.read()
40
-
41
- def read_pdf_file(file_path):
42
- reader = PdfReader(file_path)
43
- text = ""
44
- for page in reader.pages:
45
- text += page.extract_text()
46
- return text
47
-
48
- def read_docx_file(file_path):
49
- doc = docx.Document(file_path)
50
- text = ""
51
- for para in doc.paragraphs:
52
- text += para.text
53
- return text
54
-
55
- # System prompt to extract candidate details from the resume
56
- def system_prompt_to_extract_info(resume_text):
57
- prompt = f"""
58
- Extract the following information from the resume:
59
- 1. Candidate's Full Name
60
- 2. Candidate's Email Address
61
- 3. Candidate's Contact Number
62
 
63
- Resume Text: {resume_text}
 
64
 
65
- Return the results in the following format:
66
- - Name: [Extracted Name]
67
- - Email: [Extracted Email]
68
- - Contact: [Extracted Contact Number]
69
- """
70
- return prompt
71
-
72
- # Function to extract candidate information from resume text
 
 
 
 
 
73
  def extract_entities_via_gemini(resume_text):
74
- api_key = get_google_api_key() # Fetch the API key from environment variables
75
- endpoint = "https://gemini.googleapis.com/v1/documents:analyzeEntities" # Placeholder API endpoint (adjust as necessary)
76
-
77
- headers = {
78
- "Authorization": f"Bearer {api_key}",
79
- "Content-Type": "application/json"
80
- }
81
-
82
- document = {
83
- "document": {
84
- "type": "PLAIN_TEXT",
85
- "content": resume_text
86
- }
87
- }
88
 
89
- # Send request to Gemini or another NLP API
90
- response = requests.post(endpoint, headers=headers, json=document)
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- if response.status_code != 200:
93
- return {"error": "Failed to extract entities from resume", "status_code": response.status_code, "response": response.text}
94
-
95
- # Process the response from the Gemini API (or similar NLP API)
96
- entities = response.json().get('entities', [])
97
- extracted_info = {"name": "Unknown Candidate", "email": "No Email", "contact": "No Contact"}
98
-
99
- for entity in entities:
100
- if entity['type'] == 'PERSON':
101
- extracted_info['name'] = entity['name']
102
- if entity['type'] == 'EMAIL':
103
- extracted_info['email'] = entity['name']
104
- if entity['type'] == 'PHONE_NUMBER':
105
- extracted_info['contact'] = entity['name']
106
-
107
- return extracted_info
108
 
109
- # Function to check similarity between resumes and job description
 
 
 
 
 
 
 
 
 
 
110
  def check_similarity(job_description, resume_files):
111
  results = []
112
  job_emb = model.encode(job_description, convert_to_tensor=True)
@@ -157,37 +131,24 @@ def check_similarity(job_description, resume_files):
157
  csv_file_path = save_results_to_csv(results)
158
  return results, csv_file_path
159
 
160
- # Function to save results to CSV
161
- def save_results_to_csv(results):
162
- df = pd.DataFrame(results, columns=["Resume File", "Similarity Score (%)", "Eligibility", "Candidate Name", "Leadership Experience", "Email", "Contact"])
163
- csv_file = "/tmp/results.csv"
164
- df.to_csv(csv_file, index=False)
165
- return csv_file # Return the file path
166
-
167
- # Gradio Interface Components
168
- job_desc_input = gr.File(label="Upload Job Description (TXT)", type="filepath")
169
- resumes_input = gr.Files(label="Upload Resumes (TXT, DOCX, PDF)", type="filepath")
170
-
171
- # Gradio Outputs
172
- results_output = gr.Dataframe(headers=[
173
- "Resume File",
174
- "Similarity Score (%)",
175
- "Eligibility",
176
- "Candidate Name",
177
- "Leadership Experience",
178
- "Email",
179
- "Contact"],
180
- label="Analysis Results"
181
- )
182
-
183
- # Gradio Interface
184
- interface = gr.Interface(
185
- fn=check_similarity,
186
- inputs=[job_desc_input, resumes_input],
187
- outputs=[results_output, gr.File(label="Download CSV")], # Now works properly without value
188
- title="HR Assistant - Resume Screening & Leadership Experience",
189
- description="Upload job description and resumes to screen candidates for managerial and team leadership roles and extract candidate details.",
190
- allow_flagging="never"
191
- )
192
-
193
- interface.launch()
 
1
  import gradio as gr
 
 
2
  import os
3
+ import csv
4
+ import re
5
  import requests
6
+ from sentence_transformers import SentenceTransformer, util
7
 
8
+ # Initialize Sentence-Transformer model
9
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
10
 
11
+ # Define a function to extract leadership experience from resume text
12
+ def extract_leadership_experience(resume_text):
13
+ # Define leadership-related keywords/phrases
14
+ leadership_keywords = [
15
+ "led", "managed", "team lead", "supervised", "coordinated", "directed",
16
+ "oversaw", "responsible for", "led a team", "executed", "mentored",
17
+ "project manager", "leadership role", "department head", "team captain"
18
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # Convert resume text to lower case for case-insensitive matching
21
+ resume_text_lower = resume_text.lower()
22
 
23
+ # Look for matches in the resume text
24
+ leadership_experience = []
25
+ for keyword in leadership_keywords:
26
+ if re.search(r"\b" + re.escape(keyword) + r"\b", resume_text_lower):
27
+ leadership_experience.append(keyword)
28
+
29
+ # Return leadership experience as a string
30
+ if leadership_experience:
31
+ return ", ".join(set(leadership_experience))
32
+ else:
33
+ return "No leadership experience found"
34
+
35
+ # Define a function to extract contact info using Gemini API (simulated here)
36
  def extract_entities_via_gemini(resume_text):
37
+ # This is a simulation of the Google Gemini API. Replace with your actual API calls.
38
+ response = requests.post(
39
+ "https://your-gemini-api-endpoint.com", # Replace with actual endpoint
40
+ data={"text": resume_text}
41
+ )
 
 
 
 
 
 
 
 
 
42
 
43
+ # Simulate successful response with mock data
44
+ if response.status_code == 200:
45
+ data = response.json()
46
+ return {
47
+ "name": data.get("name", "Unknown"),
48
+ "email": data.get("email", "No Email"),
49
+ "contact": data.get("contact", "No Contact")
50
+ }
51
+ else:
52
+ return {
53
+ "name": "Unknown",
54
+ "email": "No Email",
55
+ "contact": "No Contact"
56
+ }
57
 
58
+ # Function to extract text from resumes (assumes .pdf or .txt files)
59
+ def extract_text_from_resume(resume_file):
60
+ # Add your extraction logic here based on the file type (e.g., PDF, DOCX, TXT)
61
+ try:
62
+ if resume_file.name.endswith('.txt'):
63
+ with open(resume_file.name, 'r') as file:
64
+ return file.read()
65
+ elif resume_file.name.endswith('.pdf'):
66
+ # Add logic to extract text from PDF
67
+ return "Extracted text from PDF file"
68
+ else:
69
+ return ""
70
+ except Exception as e:
71
+ return ""
 
 
72
 
73
+ # Function to save results to CSV
74
+ def save_results_to_csv(results):
75
+ csv_file_path = "/tmp/resume_results.csv"
76
+ with open(csv_file_path, mode='w', newline='') as file:
77
+ writer = csv.writer(file)
78
+ writer.writerow(["Resume Name", "Similarity Score (%)", "Eligibility", "Name", "Leadership Experience", "Email", "Contact"])
79
+ for result in results:
80
+ writer.writerow(result)
81
+ return csv_file_path
82
+
83
+ # Function to check similarity and process resumes
84
  def check_similarity(job_description, resume_files):
85
  results = []
86
  job_emb = model.encode(job_description, convert_to_tensor=True)
 
131
  csv_file_path = save_results_to_csv(results)
132
  return results, csv_file_path
133
 
134
+ # Function to download the results as a CSV file
135
+ def download_results(results):
136
+ return save_results_to_csv(results)
137
+
138
+ # Define Gradio Interface
139
+ with gr.Blocks() as demo:
140
+ with gr.Row():
141
+ job_desc_input = gr.Textbox(label="Job Description", lines=3)
142
+ resume_input = gr.Files(label="Upload Resumes", file_count="multiple", file_types=[".pdf", ".txt"])
143
+
144
+ results_output = gr.Dataframe(headers=["Resume Name", "Similarity Score (%)", "Eligibility", "Name", "Leadership Experience", "Email", "Contact"])
145
+
146
+ # Define the button to trigger similarity check
147
+ check_button = gr.Button("Check Similarity")
148
+
149
+ # Set up the button's action
150
+ check_button.click(check_similarity, inputs=[job_desc_input, resume_input], outputs=[results_output, gr.File(label="Download CSV", file=download_results)])
151
+
152
+ # Launch the Gradio interface
153
+ demo.launch()
154
+