Spaces:
Runtime error
Runtime error
Commit
·
e989038
1
Parent(s):
59eb4e0
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,662 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List, Union, Tuple, Dict
|
3 |
+
from langchain.chains.question_answering import load_qa_chain
|
4 |
+
from langchain.document_loaders import UnstructuredFileLoader
|
5 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
6 |
+
from langchain.llms import OpenAI as OpenAILLM
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
from langchain.vectorstores import FAISS
|
9 |
+
import gradio as gr
|
10 |
+
from openai import OpenAI
|
11 |
+
import seaborn as sns
|
12 |
+
import matplotlib.pyplot as plt
|
13 |
+
import pandas as pd
|
14 |
+
import logging
|
15 |
+
from PyPDF2 import PdfReader
|
16 |
+
import re
|
17 |
+
import plotly.graph_objects as go
|
18 |
+
import csv
|
19 |
+
|
20 |
+
# Configure logging
|
21 |
+
logging.basicConfig(
|
22 |
+
filename='Resume_Analyzer.log', # You can adjust the log file name here
|
23 |
+
filemode='a',
|
24 |
+
format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
|
25 |
+
datefmt='%Y-%b-%d %H:%M:%S'
|
26 |
+
)
|
27 |
+
LOGGER = logging.getLogger(__name__)
|
28 |
+
|
29 |
+
log_level_env = 'INFO' # You can adjust the log level here
|
30 |
+
log_level_dict = {
|
31 |
+
'DEBUG': logging.DEBUG,
|
32 |
+
'INFO': logging.INFO,
|
33 |
+
'WARNING': logging.WARNING,
|
34 |
+
'ERROR': logging.ERROR,
|
35 |
+
'CRITICAL': logging.CRITICAL
|
36 |
+
}
|
37 |
+
if log_level_env in log_level_dict:
|
38 |
+
log_level = log_level_dict[log_level_env]
|
39 |
+
else:
|
40 |
+
log_level = log_level_dict['INFO']
|
41 |
+
LOGGER.setLevel(log_level)
|
42 |
+
|
43 |
+
class JobPotral:
|
44 |
+
|
45 |
+
def __init__(self) -> None:
|
46 |
+
|
47 |
+
"""
|
48 |
+
Initialize the JobPotral object.
|
49 |
+
|
50 |
+
Sets the OpenAI API key in the environment.
|
51 |
+
"""
|
52 |
+
self.client = OpenAI()
|
53 |
+
|
54 |
+
self.answer = ""
|
55 |
+
|
56 |
+
def get_empty_state(self) -> dict:
|
57 |
+
|
58 |
+
|
59 |
+
"""
|
60 |
+
Get an empty state for the knowledge base.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
- dict: An empty state dictionary.
|
64 |
+
"""
|
65 |
+
|
66 |
+
LOGGER.info("Creating Empty Dictionary...")
|
67 |
+
|
68 |
+
return {"knowledge_base": None}
|
69 |
+
|
70 |
+
def create_knowledge_base(self, docs: List[str]) -> FAISS:
|
71 |
+
|
72 |
+
"""
|
73 |
+
Create a knowledge base from a set of documents.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
- docs (list): List of documents to create a knowledge base from.
|
77 |
+
|
78 |
+
Returns:
|
79 |
+
- knowledge_base: The created knowledge base.
|
80 |
+
"""
|
81 |
+
try:
|
82 |
+
LOGGER.info("Creating Knowledge Base...")
|
83 |
+
|
84 |
+
# split into chunks
|
85 |
+
text_splitter = CharacterTextSplitter(
|
86 |
+
separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
|
87 |
+
)
|
88 |
+
chunks = text_splitter.split_documents(docs)
|
89 |
+
|
90 |
+
# Create embeddings
|
91 |
+
embeddings = OpenAIEmbeddings()
|
92 |
+
|
93 |
+
#create knowledge base
|
94 |
+
knowledge_base = FAISS.from_documents(chunks, embeddings)
|
95 |
+
|
96 |
+
#return knowledge base
|
97 |
+
return knowledge_base
|
98 |
+
|
99 |
+
except Exception as e:
|
100 |
+
LOGGER.error(f"Error creating knowledge base: {str(e)}")
|
101 |
+
raise
|
102 |
+
|
103 |
+
|
104 |
+
def upload_file(self, file_obj: gr.File) -> Tuple[str, Union[str, Dict[str, FAISS]]]:
|
105 |
+
|
106 |
+
"""
|
107 |
+
Upload a file and create a knowledge base.
|
108 |
+
|
109 |
+
Args:
|
110 |
+
- file_obj: File object representing the uploaded file.
|
111 |
+
|
112 |
+
Returns:
|
113 |
+
- tuple: Tuple containing file name and the knowledge base of given document.
|
114 |
+
"""
|
115 |
+
|
116 |
+
try:
|
117 |
+
|
118 |
+
# Log that the process of unstructuring files is starting
|
119 |
+
LOGGER.info("Unstructuring Files...")
|
120 |
+
|
121 |
+
# Initialize an UnstructuredFileLoader with the uploaded file and a loading strategy
|
122 |
+
loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
|
123 |
+
|
124 |
+
# Load the document(s) using the file loader
|
125 |
+
docs = loader.load()
|
126 |
+
|
127 |
+
# Create a knowledge base from the loaded documents
|
128 |
+
knowledge_base = self.create_knowledge_base(docs)
|
129 |
+
|
130 |
+
# Return the file name and the knowledge base as a dictionary
|
131 |
+
return file_obj.name, {"knowledge_base": knowledge_base}
|
132 |
+
|
133 |
+
except Exception as e:
|
134 |
+
LOGGER.error(f"Error uploading file: {str(e)}")
|
135 |
+
raise
|
136 |
+
|
137 |
+
def answer_question(self, question: str, state: Dict[str, Union[None, Dict[str, FAISS]]], chat_history) -> str:
|
138 |
+
|
139 |
+
"""
|
140 |
+
Answer a question using the knowledge base.
|
141 |
+
|
142 |
+
Args:
|
143 |
+
- question (str): The question to answer.
|
144 |
+
- state (dict): The state containing the knowledge base.
|
145 |
+
|
146 |
+
Returns:
|
147 |
+
- str: The answer to the question.
|
148 |
+
"""
|
149 |
+
|
150 |
+
try:
|
151 |
+
# Log that the model is generating a response
|
152 |
+
LOGGER.info("Generating Responce From Model...")
|
153 |
+
|
154 |
+
# Access the knowledge base from the state
|
155 |
+
knowledge_base = state["knowledge_base"]
|
156 |
+
|
157 |
+
# Perform similarity search on the knowledge base for the given question
|
158 |
+
docs = knowledge_base.similarity_search(question)
|
159 |
+
|
160 |
+
# Initialize the OpenAILLM model
|
161 |
+
llm = OpenAILLM()
|
162 |
+
|
163 |
+
# Load a question-answering chain of models
|
164 |
+
chain = load_qa_chain(llm, chain_type="stuff")
|
165 |
+
|
166 |
+
# Run the question-answering chain on the input documents and question
|
167 |
+
response = chain.run(input_documents=docs, question=question)
|
168 |
+
|
169 |
+
# Append the question and response to the chat history
|
170 |
+
chat_history.append((question, response))
|
171 |
+
|
172 |
+
# Return an empty string and the updated chat history
|
173 |
+
return "", chat_history
|
174 |
+
|
175 |
+
except Exception as e:
|
176 |
+
# Log an error if an exception occurs during question answering
|
177 |
+
LOGGER.error(f"Error answering question: {str(e)}")
|
178 |
+
raise
|
179 |
+
|
180 |
+
def get_graph(self, file_path: str) -> Tuple[go.Figure, go.Figure, go.Figure]:
|
181 |
+
|
182 |
+
"""
|
183 |
+
Generate three types of charts based on data from a CSV file.
|
184 |
+
|
185 |
+
Parameters:
|
186 |
+
- file_path (str): The path to the CSV file.
|
187 |
+
|
188 |
+
Returns:
|
189 |
+
Tuple[go.Figure, go.Figure, go.Figure]: A tuple containing three Plotly figures (Bar chart, Pie chart, and Histogram).
|
190 |
+
"""
|
191 |
+
try:
|
192 |
+
LOGGER.info("Create graph for CSV file...")
|
193 |
+
|
194 |
+
# Read data from CSV file into a DataFrame
|
195 |
+
df = pd.read_csv(file_path.name)
|
196 |
+
|
197 |
+
# Chart 1: Bar chart - Number of members by domain
|
198 |
+
domain_counts = df['Domain'].value_counts()
|
199 |
+
domain_fig = go.Figure(go.Bar(x=domain_counts.index, y=domain_counts, marker_color='skyblue'))
|
200 |
+
domain_fig.update_layout(title='Number of Members by Domain', xaxis_title='Domain', yaxis_title='Number of Members')
|
201 |
+
|
202 |
+
# Chart 2: Pie chart - Distribution of working time
|
203 |
+
working_time_counts = df['Working Time'].value_counts()
|
204 |
+
working_time_fig = go.Figure(go.Pie(labels=working_time_counts.index, values=working_time_counts,
|
205 |
+
pull=[0.1, 0], marker_colors=['lightcoral', 'lightskyblue']))
|
206 |
+
working_time_fig.update_layout(title='Distribution of Working Time')
|
207 |
+
|
208 |
+
# Chart 3: Histogram - Distribution of career gaps
|
209 |
+
career_gap_fig = go.Figure(go.Histogram(x=df['Career Gap (years)'], nbinsx=20, marker_color='lightgreen',
|
210 |
+
marker_line_color='black', marker_line_width=1.2))
|
211 |
+
career_gap_fig.update_layout(title='Distribution of Career Gaps', xaxis_title='Career Gap (years)', yaxis_title='Number of Members')
|
212 |
+
|
213 |
+
return domain_fig, working_time_fig, career_gap_fig
|
214 |
+
|
215 |
+
except Exception as e:
|
216 |
+
# Handle exceptions
|
217 |
+
LOGGER.error(f"Error in get_graph: {str(e)}")
|
218 |
+
raise
|
219 |
+
|
220 |
+
def extract_text_from_pdf(self, pdf_path: str) -> str:
|
221 |
+
|
222 |
+
"""
|
223 |
+
Extracts text from a PDF file.
|
224 |
+
|
225 |
+
Args:
|
226 |
+
pdf_path (str): The path to the PDF file.
|
227 |
+
|
228 |
+
Returns:
|
229 |
+
str: The extracted text from the PDF.
|
230 |
+
"""
|
231 |
+
|
232 |
+
text = ''
|
233 |
+
try:
|
234 |
+
LOGGER.info("Extract text from pdf...")
|
235 |
+
|
236 |
+
# Load PDF document
|
237 |
+
pdf = PdfReader(pdf_path)
|
238 |
+
|
239 |
+
# Extract text from each page and pass it to the process_text function
|
240 |
+
for page_number in range(len(pdf.pages)):
|
241 |
+
|
242 |
+
try:
|
243 |
+
# Extract text from the page
|
244 |
+
page = pdf.pages[page_number]
|
245 |
+
|
246 |
+
# Extract page text
|
247 |
+
text += page.extract_text()
|
248 |
+
except Exception as e:
|
249 |
+
LOGGER.error(f"Error extracting text from page {page_number + 1}: {e}")
|
250 |
+
|
251 |
+
#return extracted text
|
252 |
+
return text
|
253 |
+
|
254 |
+
except Exception as e:
|
255 |
+
LOGGER.error(f"Error reading PDF file: {e}")
|
256 |
+
raise
|
257 |
+
|
258 |
+
def matching_percentage(self, resume_path: str, job_description_path: str) -> Tuple[str, go.Figure]:
|
259 |
+
|
260 |
+
"""
|
261 |
+
Assess the matching percentage between a resume and a job description using the OpenAI GPT-3.5-turbo model.
|
262 |
+
|
263 |
+
Parameters:
|
264 |
+
- resume_path (str): Path to the resume file (PDF format).
|
265 |
+
- job_description_path (str): Path to the job description file (PDF format).
|
266 |
+
|
267 |
+
Returns:
|
268 |
+
Tuple[str, go.Figure]: A tuple containing the matching result string and a Plotly figure.
|
269 |
+
"""
|
270 |
+
try:
|
271 |
+
LOGGER.info("Get matching percentage...")
|
272 |
+
|
273 |
+
# Extract text from the resume and job description PDFs
|
274 |
+
resume = self.extract_text_from_pdf(resume_path.name)
|
275 |
+
job_description = self.extract_text_from_pdf(job_description_path.name)
|
276 |
+
|
277 |
+
# Create a conversation for the OpenAI chat API
|
278 |
+
conversation = [
|
279 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
280 |
+
{"role": "user", "content": f"""Given the job description and the resume, assess the matching percentage to 100 and if 100 percentage not matched mention the remaining percentage with reason. **Job Description:**{job_description}**Resume:**{resume}
|
281 |
+
**Detailed Analysis:**
|
282 |
+
the result should be in this format:
|
283 |
+
Matched Percentage: [matching percentage].
|
284 |
+
Reason : [Mention Reason and keys from Job Description and Resume get this matched percentage.].
|
285 |
+
Skills To Improve : [Mention the skills How to improve and get match the given Job Description].
|
286 |
+
Keywords : [matched key words from Job Description and Resume].
|
287 |
+
"""}
|
288 |
+
]
|
289 |
+
|
290 |
+
# Call OpenAI GPT-3.5-turbo
|
291 |
+
chat_completion = self.client.chat.completions.create(
|
292 |
+
model = "gpt-3.5-turbo",
|
293 |
+
messages = conversation,
|
294 |
+
max_tokens=500,
|
295 |
+
temperature=0
|
296 |
+
)
|
297 |
+
|
298 |
+
matched_result = chat_completion.choices[0].message.content
|
299 |
+
|
300 |
+
# Generate a Plotly figure for visualization
|
301 |
+
fig = self.get_ploty(matched_result)
|
302 |
+
|
303 |
+
return matched_result, fig
|
304 |
+
|
305 |
+
except Exception as e:
|
306 |
+
# Handle exceptions
|
307 |
+
LOGGER.error(f"Error in matching_percentage: {str(e)}")
|
308 |
+
raise
|
309 |
+
|
310 |
+
def get_ploty(self, result: str) -> go.Figure:
|
311 |
+
|
312 |
+
"""
|
313 |
+
Extracts matched percentage from the input result and creates a pie chart using Plotly.
|
314 |
+
|
315 |
+
Parameters:
|
316 |
+
- result (str): The input string containing information about the matched percentage.
|
317 |
+
|
318 |
+
Returns:
|
319 |
+
- go.Figure: Plotly figure object representing the pie chart.
|
320 |
+
"""
|
321 |
+
|
322 |
+
try:
|
323 |
+
LOGGER.info("Create Pie chart for Matched percentage...")
|
324 |
+
|
325 |
+
# Use regex with case-insensitive flag to extract the matched percentage
|
326 |
+
match_percentage = re.search(r'matched percentage: (\d+)%', result, re.IGNORECASE)
|
327 |
+
|
328 |
+
# If the specific format is found, extract the matched percentage
|
329 |
+
if match_percentage:
|
330 |
+
matched_percentage = int(match_percentage.group(1))
|
331 |
+
|
332 |
+
else:
|
333 |
+
# If the specific format is not found, try another regex pattern
|
334 |
+
match_percentage = re.search(r'(\d+)%', result, re.IGNORECASE)
|
335 |
+
matched_percentage = int(match_percentage.group(1))
|
336 |
+
|
337 |
+
# Creating a pie chart with plotly
|
338 |
+
labels = ['Matched', 'Not Matched']
|
339 |
+
values = [matched_percentage, 100 - matched_percentage]
|
340 |
+
|
341 |
+
fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.1, 0])])
|
342 |
+
fig.update_layout(title='Matched Percentage')
|
343 |
+
|
344 |
+
return fig
|
345 |
+
|
346 |
+
except Exception as e:
|
347 |
+
# raise the exception
|
348 |
+
LOGGER.error(f"Error processing result:{str(e)}")
|
349 |
+
raise
|
350 |
+
|
351 |
+
def count_reviews(self) -> go.Figure:
|
352 |
+
|
353 |
+
"""
|
354 |
+
Count and visualize the distribution of positive, negative, and neutral reviews.
|
355 |
+
|
356 |
+
Returns:
|
357 |
+
go.Figure: Plotly figure showing the distribution of reviews.
|
358 |
+
"""
|
359 |
+
|
360 |
+
try:
|
361 |
+
LOGGER.info("Count reviews...")
|
362 |
+
|
363 |
+
# Extracted data from the reviews
|
364 |
+
data = self.answer
|
365 |
+
|
366 |
+
# Split the data into sections based on the review categories
|
367 |
+
sections = [section.strip() for section in data.split("\n\n")]
|
368 |
+
|
369 |
+
# Initialize counters for positive, neutral, and negative reviews
|
370 |
+
positive_count = 0
|
371 |
+
neutral_count = 0
|
372 |
+
negative_count = 0
|
373 |
+
|
374 |
+
# Initialize counters for positive, neutral, and negative reviews
|
375 |
+
for section in sections:
|
376 |
+
lines = section.split('\n')
|
377 |
+
|
378 |
+
if len(lines) > 1:
|
379 |
+
category = lines[0].strip()
|
380 |
+
reviews = lines[1:]
|
381 |
+
count = len(reviews)
|
382 |
+
|
383 |
+
# Update counts based on the review category
|
384 |
+
if "Positive" in category:
|
385 |
+
positive_count += count
|
386 |
+
elif "Suggestion" in category:
|
387 |
+
neutral_count += count
|
388 |
+
elif "Negative" in category:
|
389 |
+
negative_count += count
|
390 |
+
|
391 |
+
# Data for the bar graph
|
392 |
+
labels = ['Positive', 'Negative', 'Neutral']
|
393 |
+
counts = [positive_count, negative_count, neutral_count]
|
394 |
+
|
395 |
+
# Creating the bar graph using Plotly
|
396 |
+
fig = go.Figure(data=[go.Bar(x=labels, y=counts, marker=dict(color=['green', 'red', 'gray']))])
|
397 |
+
|
398 |
+
# Adding title and labels
|
399 |
+
fig.update_layout(title='Distribution of Reviews',
|
400 |
+
xaxis=dict(title='Sentiment'),
|
401 |
+
yaxis=dict(title='Number of Reviews'))
|
402 |
+
|
403 |
+
return fig
|
404 |
+
|
405 |
+
except Exception as e:
|
406 |
+
# Log and raise an error in case of an exception
|
407 |
+
LOGGER.error(f"Error in count_reviews: {e}")
|
408 |
+
raise
|
409 |
+
|
410 |
+
def csv_to_list(self, file_path: str) -> list:
|
411 |
+
|
412 |
+
"""
|
413 |
+
Read a CSV file and convert it to a list.
|
414 |
+
|
415 |
+
Args:
|
416 |
+
file_path (str): Path to the CSV file.
|
417 |
+
|
418 |
+
Returns:
|
419 |
+
list: List containing data from the CSV file.
|
420 |
+
"""
|
421 |
+
try:
|
422 |
+
LOGGER.info("Extract CSV...")
|
423 |
+
# Initialize an empty list to store CSV data
|
424 |
+
data_list = []
|
425 |
+
|
426 |
+
# Open the CSV file and read its contents
|
427 |
+
with open(file_path.name, 'r',newline='') as csv_file:
|
428 |
+
|
429 |
+
csv_reader = csv.reader(csv_file)
|
430 |
+
|
431 |
+
next(csv_reader, None) # Skip the header row
|
432 |
+
|
433 |
+
for row in csv_reader:
|
434 |
+
# Convert each row to a string and append to the list
|
435 |
+
data_list.append("".join(row))
|
436 |
+
|
437 |
+
return data_list
|
438 |
+
|
439 |
+
except Exception as e:
|
440 |
+
# Log and raise an error in case of an exception
|
441 |
+
LOGGER.error(f"Error in csv_to_list: {e}")
|
442 |
+
raise
|
443 |
+
|
444 |
+
def extract_top_reviews(self, file_path: str) -> tuple:
|
445 |
+
|
446 |
+
"""
|
447 |
+
Extract the top suggestion, positive, and negative reviews from a CSV file.
|
448 |
+
|
449 |
+
Args:
|
450 |
+
file_path (str): Path to the CSV file.
|
451 |
+
|
452 |
+
Returns:
|
453 |
+
tuple: Suggestion reviews, positive reviews, and negative reviews.
|
454 |
+
"""
|
455 |
+
|
456 |
+
try:
|
457 |
+
LOGGER.info("Extract top reviews...")
|
458 |
+
|
459 |
+
# Set the number of top reviews to extract
|
460 |
+
top_count = 5
|
461 |
+
|
462 |
+
# Split the reviews into suggestion, positive, and negative categories
|
463 |
+
suggestion_reviews,positive_reviews,negative_reviews = self.split_reviews(file_path)
|
464 |
+
|
465 |
+
# Extract the top suggestion reviews
|
466 |
+
reviews_list = suggestion_reviews.split("\n") # Assuming each review is on a new line
|
467 |
+
suggest_reviews = "\n\n ".join(reviews_list[:top_count])
|
468 |
+
|
469 |
+
# Extract the top positive reviews
|
470 |
+
reviews_list = positive_reviews.split("\n") # Assuming each review is on a new line
|
471 |
+
pos_reviews ="\n\n ".join(reviews_list[:top_count])
|
472 |
+
|
473 |
+
# Extract the top negative reviews
|
474 |
+
reviews_list = negative_reviews.split("\n") # Assuming each review is on a new line
|
475 |
+
neg_reviews = "\n\n ".join(reviews_list[:top_count])
|
476 |
+
|
477 |
+
return suggest_reviews,pos_reviews,neg_reviews
|
478 |
+
|
479 |
+
except Exception as e:
|
480 |
+
# Log and raise an error in case of an exception
|
481 |
+
LOGGER.error(f"Error in extract_top_reviews: {e}")
|
482 |
+
raise
|
483 |
+
|
484 |
+
def split_reviews(self, file_path: str) -> tuple:
|
485 |
+
|
486 |
+
"""
|
487 |
+
Split reviews into suggestion, positive, and negative categories using OpenAI API.
|
488 |
+
|
489 |
+
Args:
|
490 |
+
file_path (str): Path to the CSV file.
|
491 |
+
|
492 |
+
Returns:
|
493 |
+
tuple: Suggestion reviews, positive reviews, and negative reviews.
|
494 |
+
"""
|
495 |
+
try:
|
496 |
+
LOGGER.info("Classify reviews...")
|
497 |
+
|
498 |
+
# Convert CSV file to a list of reviews
|
499 |
+
reviews = self.csv_to_list(file_path)
|
500 |
+
|
501 |
+
# Construct the prompt for OpenAI API
|
502 |
+
prompt = f"read and analyse to return suggestion reviews,postive reviews and negative reviews with label ***{reviews}***."
|
503 |
+
|
504 |
+
# Call OpenAI API with the given prompt
|
505 |
+
response = self.client.completions.create(
|
506 |
+
model="text-davinci-003", # You can use a different engine
|
507 |
+
prompt=prompt,
|
508 |
+
max_tokens=200,
|
509 |
+
temperature = 0,
|
510 |
+
)
|
511 |
+
|
512 |
+
# Extract and return the generated text
|
513 |
+
self.answer += response.choices[0].text
|
514 |
+
|
515 |
+
# Split the generated text into suggestion, positive, and negative reviews
|
516 |
+
suggestion_reviews = self.answer.split("Suggestion Reviews:")[1].split("Positive Reviews:")[0].strip()
|
517 |
+
positive_reviews = self.answer.split("Positive Reviews:")[1].split("Negative Reviews:")[0].strip()
|
518 |
+
negative_reviews = self.answer.split("Negative Reviews:")[1].strip()
|
519 |
+
|
520 |
+
return suggestion_reviews,positive_reviews,negative_reviews
|
521 |
+
|
522 |
+
except Exception as e:
|
523 |
+
# Log and raise an error in case of an exception
|
524 |
+
LOGGER.error(f"Error in split_reviews: {e}")
|
525 |
+
raise
|
526 |
+
|
527 |
+
|
528 |
+
def file_name(self,upload_file:str) -> str:
|
529 |
+
|
530 |
+
"""
|
531 |
+
Get the name of the uploaded file.
|
532 |
+
|
533 |
+
Args:
|
534 |
+
upload_file: File object.
|
535 |
+
|
536 |
+
Returns:
|
537 |
+
str: File name.
|
538 |
+
"""
|
539 |
+
try:
|
540 |
+
# return file path
|
541 |
+
return upload_file.name
|
542 |
+
except Exception as e:
|
543 |
+
LOGGER.error(f"Error in file_name: {e}")
|
544 |
+
raise
|
545 |
+
|
546 |
+
def gradio_interface(self):
|
547 |
+
|
548 |
+
"""
|
549 |
+
Create a Gradio interface for the JobPotral.
|
550 |
+
"""
|
551 |
+
|
552 |
+
with gr.Blocks(css="style.css",theme='freddyaboulton/test-blue') as demo:
|
553 |
+
gr.HTML("""<center class="darkblue" text-align:center;padding:30px;'><center>
|
554 |
+
<center><h1 class ="center" style="color:#fff">ADOPLE AI</h1></center>
|
555 |
+
<br><center><h1 style="color:#fff">Job Potral Tool</h1></center>""")
|
556 |
+
|
557 |
+
# QA
|
558 |
+
state = gr.State(self.get_empty_state())
|
559 |
+
with gr.Tab("QA and Graph"):
|
560 |
+
with gr.Column(elem_id="col-container"):
|
561 |
+
gr.Markdown("**Upload your file**")
|
562 |
+
with gr.Row(elem_id="row-flex"):
|
563 |
+
with gr.Column(scale=0.90, min_width=160):
|
564 |
+
file_output = gr.File(elem_classes="filenameshow")
|
565 |
+
with gr.Column(scale=0.10, min_width=160):
|
566 |
+
upload_button = gr.UploadButton(
|
567 |
+
"Browse File", file_types=[".txt", ".pdf", ".doc", ".docx",".csv"],
|
568 |
+
elem_classes="filenameshow")
|
569 |
+
with gr.Row(elem_id="col-container"):
|
570 |
+
with gr.Column():
|
571 |
+
analyse_graph = gr.Button("Analyse Graph")
|
572 |
+
|
573 |
+
with gr.TabItem("Chatbot"):
|
574 |
+
with gr.Row(elem_id="col-container"):
|
575 |
+
with gr.Column(scale=1, min_width=0):
|
576 |
+
chatbot = gr.Chatbot(label = "Resume QA")
|
577 |
+
msg = gr.Textbox(label = "Question")
|
578 |
+
clear = gr.ClearButton([msg, chatbot])
|
579 |
+
|
580 |
+
# analyse graph
|
581 |
+
with gr.TabItem("Graph"):
|
582 |
+
with gr.Row(elem_id="col-container"):
|
583 |
+
with gr.Column(scale=1.0, min_width=150):
|
584 |
+
domain_graph = gr.Plot(label="Domain Graph")
|
585 |
+
with gr.Row(elem_id="col-container"):
|
586 |
+
with gr.Column(scale=1.0, min_width=150):
|
587 |
+
working_time_graph = gr.Plot(label="Working Time Graph")
|
588 |
+
with gr.Row(elem_id="col-container"):
|
589 |
+
with gr.Column(scale=1.0, min_width=150):
|
590 |
+
career_gap_graph = gr.Plot(label="Career Gap Graph")
|
591 |
+
|
592 |
+
# resume analyser
|
593 |
+
with gr.Tab("Resume Analyzer"):
|
594 |
+
with gr.Row(elem_id="col-container"):
|
595 |
+
with gr.Column(scale=0.55, min_width=150, ):
|
596 |
+
job_description = gr.File(label="Job Description", file_types = [".pdf",".txt"])
|
597 |
+
with gr.Column(scale=0.55, min_width=150):
|
598 |
+
resume = gr.File(label="Resume", file_types = [".pdf",".txt"])
|
599 |
+
|
600 |
+
with gr.Row(elem_id="col-container"):
|
601 |
+
with gr.Column(scale=0.80, min_width=150):
|
602 |
+
analyse_btn = gr.Button("Analyse")
|
603 |
+
with gr.Column(scale=0.20, min_width=150):
|
604 |
+
clear_btn = gr.ClearButton()
|
605 |
+
|
606 |
+
with gr.Row(elem_id="col-container"):
|
607 |
+
with gr.Column(scale=1.0, min_width=150):
|
608 |
+
matched_result = gr.Textbox(label="Matched Result", lines=10)
|
609 |
+
|
610 |
+
with gr.Row(elem_id="col-container"):
|
611 |
+
with gr.Column(scale=1.0, min_width=150):
|
612 |
+
pychart = gr.Plot(label="Matching Percentage Chart")
|
613 |
+
|
614 |
+
# review analyser
|
615 |
+
with gr.Tab("Reviews Analyzer"):
|
616 |
+
with gr.Row(elem_id="col-container"):
|
617 |
+
with gr.Column(scale=0.90, min_width=160):
|
618 |
+
file_output_review = gr.File(elem_classes="filenameshow")
|
619 |
+
with gr.Column(scale=0.10, min_width=160):
|
620 |
+
upload_button_review = gr.UploadButton(
|
621 |
+
"Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
|
622 |
+
elem_classes="filenameshow")
|
623 |
+
|
624 |
+
with gr.Row(elem_id="col-container"):
|
625 |
+
split_reviews_top_5_btn = gr.Button("Split TOP 5 Reviews ")
|
626 |
+
|
627 |
+
with gr.Row(elem_id="col-container"):
|
628 |
+
suggested_reviews = gr.Textbox(label="Suggested Reviews")
|
629 |
+
postive_reviews =gr.Textbox(label="Positive Reviews")
|
630 |
+
negative_reviews = gr.Textbox(label="Negative Reviews")
|
631 |
+
|
632 |
+
with gr.Row(elem_id="col-container"):
|
633 |
+
sentiment_graph_btn = gr.Button("Sentiment Graph")
|
634 |
+
|
635 |
+
with gr.Row(elem_id="col-container"):
|
636 |
+
sentiment_graph = gr.Plot(label="Sentiment Analysis")
|
637 |
+
# QA
|
638 |
+
upload_button.upload(self.upload_file, upload_button, [file_output,state])
|
639 |
+
|
640 |
+
msg.submit(self.answer_question, [msg, state, chatbot], [msg, chatbot])
|
641 |
+
|
642 |
+
# analyse graph
|
643 |
+
analyse_graph.click(self.get_graph, upload_button, [domain_graph, working_time_graph, career_gap_graph])
|
644 |
+
|
645 |
+
|
646 |
+
# resume analyser
|
647 |
+
analyse_btn.click(self.matching_percentage, [job_description, resume], [matched_result, pychart])
|
648 |
+
|
649 |
+
|
650 |
+
# review analyser
|
651 |
+
upload_button_review.upload(self.file_name,upload_button_review,file_output_review)
|
652 |
+
|
653 |
+
sentiment_graph_btn.click(self.count_reviews,[],sentiment_graph)
|
654 |
+
|
655 |
+
split_reviews_top_5_btn.click(self.extract_top_reviews,upload_button_review,[suggested_reviews,postive_reviews,negative_reviews])
|
656 |
+
|
657 |
+
demo.launch(debug = True)
|
658 |
+
|
659 |
+
if __name__ == "__main__":
|
660 |
+
|
661 |
+
analyze = JobPotral()
|
662 |
+
analyze.gradio_interface()
|