Spaces:

ADOPLE
/

job_potral_tool

Runtime error

App Files Files Community

robertselvam commited on Dec 12, 2023

Commit

e989038

1 Parent(s): 59eb4e0

Create app.py

Browse files

Files changed (1) hide show

app.py +662 -0

app.py ADDED Viewed

	@@ -0,0 +1,662 @@

+import os
+from typing import List, Union, Tuple, Dict
+from langchain.chains.question_answering import load_qa_chain
+from langchain.document_loaders import UnstructuredFileLoader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms import OpenAI as OpenAILLM
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+import gradio as gr
+from openai import OpenAI
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+import logging
+from PyPDF2 import PdfReader
+import re
+import plotly.graph_objects as go
+import csv
+# Configure logging
+logging.basicConfig(
+    filename='Resume_Analyzer.log',  # You can adjust the log file name here
+    filemode='a',
+    format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
+    datefmt='%Y-%b-%d %H:%M:%S'
+)
+LOGGER = logging.getLogger(__name__)
+log_level_env = 'INFO'  # You can adjust the log level here
+log_level_dict = {
+    'DEBUG': logging.DEBUG,
+    'INFO': logging.INFO,
+    'WARNING': logging.WARNING,
+    'ERROR': logging.ERROR,
+    'CRITICAL': logging.CRITICAL
+}
+if log_level_env in log_level_dict:
+    log_level = log_level_dict[log_level_env]
+else:
+    log_level = log_level_dict['INFO']
+LOGGER.setLevel(log_level)
+class JobPotral:
+    def __init__(self) -> None:
+      """
+      Initialize the JobPotral object.
+      Sets the OpenAI API key in the environment.
+      """
+      self.client = OpenAI()
+      self.answer = ""
+    def get_empty_state(self) -> dict:
+        """
+        Get an empty state for the knowledge base.
+        Returns:
+        - dict: An empty state dictionary.
+        """
+        LOGGER.info("Creating Empty Dictionary...")
+        return {"knowledge_base": None}
+    def create_knowledge_base(self, docs: List[str]) -> FAISS:
+        """
+        Create a knowledge base from a set of documents.
+        Args:
+        - docs (list): List of documents to create a knowledge base from.
+        Returns:
+        - knowledge_base: The created knowledge base.
+        """
+        try:
+          LOGGER.info("Creating Knowledge Base...")
+          # split into chunks
+          text_splitter = CharacterTextSplitter(
+              separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
+          )
+          chunks = text_splitter.split_documents(docs)
+          # Create embeddings
+          embeddings = OpenAIEmbeddings()
+          #create knowledge base
+          knowledge_base = FAISS.from_documents(chunks, embeddings)
+          #return knowledge base
+          return knowledge_base
+        except Exception as e:
+            LOGGER.error(f"Error creating knowledge base: {str(e)}")
+            raise
+    def upload_file(self, file_obj: gr.File) -> Tuple[str, Union[str, Dict[str, FAISS]]]:
+        """
+        Upload a file and create a knowledge base.
+        Args:
+        - file_obj: File object representing the uploaded file.
+        Returns:
+        - tuple: Tuple containing file name and the knowledge base of given document.
+        """
+        try:
+          # Log that the process of unstructuring files is starting
+          LOGGER.info("Unstructuring Files...")
+          # Initialize an UnstructuredFileLoader with the uploaded file and a loading strategy
+          loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
+          # Load the document(s) using the file loader
+          docs = loader.load()
+          # Create a knowledge base from the loaded documents
+          knowledge_base = self.create_knowledge_base(docs)
+          # Return the file name and the knowledge base as a dictionary
+          return file_obj.name, {"knowledge_base": knowledge_base}
+        except Exception as e:
+            LOGGER.error(f"Error uploading file: {str(e)}")
+            raise
+    def answer_question(self, question: str, state: Dict[str, Union[None, Dict[str, FAISS]]], chat_history) -> str:
+        """
+        Answer a question using the knowledge base.
+        Args:
+        - question (str): The question to answer.
+        - state (dict): The state containing the knowledge base.
+        Returns:
+        - str: The answer to the question.
+        """
+        try:
+            # Log that the model is generating a response
+            LOGGER.info("Generating Responce From Model...")
+            # Access the knowledge base from the state
+            knowledge_base = state["knowledge_base"]
+            # Perform similarity search on the knowledge base for the given question
+            docs = knowledge_base.similarity_search(question)
+            # Initialize the OpenAILLM model
+            llm = OpenAILLM()
+            # Load a question-answering chain of models
+            chain = load_qa_chain(llm, chain_type="stuff")
+            # Run the question-answering chain on the input documents and question
+            response = chain.run(input_documents=docs, question=question)
+            # Append the question and response to the chat history
+            chat_history.append((question, response))
+            # Return an empty string and the updated chat history
+            return "", chat_history
+        except Exception as e:
+            # Log an error if an exception occurs during question answering
+            LOGGER.error(f"Error answering question: {str(e)}")
+            raise
+    def get_graph(self, file_path: str) -> Tuple[go.Figure, go.Figure, go.Figure]:
+        """
+        Generate three types of charts based on data from a CSV file.
+        Parameters:
+        - file_path (str): The path to the CSV file.
+        Returns:
+        Tuple[go.Figure, go.Figure, go.Figure]: A tuple containing three Plotly figures (Bar chart, Pie chart, and Histogram).
+        """
+        try:
+            LOGGER.info("Create graph for CSV file...")
+            # Read data from CSV file into a DataFrame
+            df = pd.read_csv(file_path.name)
+            # Chart 1: Bar chart - Number of members by domain
+            domain_counts = df['Domain'].value_counts()
+            domain_fig = go.Figure(go.Bar(x=domain_counts.index, y=domain_counts, marker_color='skyblue'))
+            domain_fig.update_layout(title='Number of Members by Domain', xaxis_title='Domain', yaxis_title='Number of Members')
+            # Chart 2: Pie chart - Distribution of working time
+            working_time_counts = df['Working Time'].value_counts()
+            working_time_fig = go.Figure(go.Pie(labels=working_time_counts.index, values=working_time_counts,
+                                              pull=[0.1, 0], marker_colors=['lightcoral', 'lightskyblue']))
+            working_time_fig.update_layout(title='Distribution of Working Time')
+            # Chart 3: Histogram - Distribution of career gaps
+            career_gap_fig = go.Figure(go.Histogram(x=df['Career Gap (years)'], nbinsx=20, marker_color='lightgreen',
+                                                    marker_line_color='black', marker_line_width=1.2))
+            career_gap_fig.update_layout(title='Distribution of Career Gaps', xaxis_title='Career Gap (years)', yaxis_title='Number of Members')
+            return domain_fig, working_time_fig, career_gap_fig
+        except Exception as e:
+            # Handle exceptions
+            LOGGER.error(f"Error in get_graph: {str(e)}")
+            raise
+    def extract_text_from_pdf(self, pdf_path: str) -> str:
+        """
+        Extracts text from a PDF file.
+        Args:
+            pdf_path (str): The path to the PDF file.
+        Returns:
+            str: The extracted text from the PDF.
+        """
+        text = ''
+        try:
+            LOGGER.info("Extract text from pdf...")
+            # Load PDF document
+            pdf = PdfReader(pdf_path)
+            # Extract text from each page and pass it to the process_text function
+            for page_number in range(len(pdf.pages)):
+                try:
+                    # Extract text from the page
+                    page = pdf.pages[page_number]
+                    # Extract page text
+                    text += page.extract_text()
+                except Exception as e:
+                    LOGGER.error(f"Error extracting text from page {page_number + 1}: {e}")
+            #return extracted text
+            return text
+        except Exception as e:
+            LOGGER.error(f"Error reading PDF file: {e}")
+            raise
+    def matching_percentage(self, resume_path: str, job_description_path: str) -> Tuple[str, go.Figure]:
+        """
+        Assess the matching percentage between a resume and a job description using the OpenAI GPT-3.5-turbo model.
+        Parameters:
+        - resume_path (str): Path to the resume file (PDF format).
+        - job_description_path (str): Path to the job description file (PDF format).
+        Returns:
+        Tuple[str, go.Figure]: A tuple containing the matching result string and a Plotly figure.
+        """
+        try:
+            LOGGER.info("Get matching percentage...")
+            # Extract text from the resume and job description PDFs
+            resume = self.extract_text_from_pdf(resume_path.name)
+            job_description = self.extract_text_from_pdf(job_description_path.name)
+            # Create a conversation for the OpenAI chat API
+            conversation = [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": f"""Given the job description and the resume, assess the matching percentage to 100 and if 100 percentage not matched mention the remaining percentage with reason. **Job Description:**{job_description}**Resume:**{resume}
+                                                **Detailed Analysis:**
+                                                        the result should be in this format:
+                                                        Matched Percentage: [matching percentage].
+                                                        Reason            : [Mention Reason and keys from Job Description and Resume get this matched percentage.].
+                                                        Skills To Improve : [Mention the skills How to improve and get match the given Job Description].
+                                                        Keywords          : [matched key words from Job Description and Resume].
+                """}
+            ]
+            # Call OpenAI GPT-3.5-turbo
+            chat_completion = self.client.chat.completions.create(
+                model = "gpt-3.5-turbo",
+                messages = conversation,
+                max_tokens=500,
+                temperature=0
+            )
+            matched_result = chat_completion.choices[0].message.content
+            # Generate a Plotly figure for visualization
+            fig = self.get_ploty(matched_result)
+            return matched_result, fig
+        except Exception as e:
+            # Handle exceptions
+            LOGGER.error(f"Error in matching_percentage: {str(e)}")
+            raise
+    def get_ploty(self, result: str) -> go.Figure:
+        """
+        Extracts matched percentage from the input result and creates a pie chart using Plotly.
+        Parameters:
+        - result (str): The input string containing information about the matched percentage.
+        Returns:
+        - go.Figure: Plotly figure object representing the pie chart.
+        """
+        try:
+            LOGGER.info("Create Pie chart for Matched percentage...")
+            # Use regex with case-insensitive flag to extract the matched percentage
+            match_percentage = re.search(r'matched percentage: (\d+)%', result, re.IGNORECASE)
+            # If the specific format is found, extract the matched percentage
+            if match_percentage:
+                matched_percentage = int(match_percentage.group(1))
+            else:
+                # If the specific format is not found, try another regex pattern
+                match_percentage = re.search(r'(\d+)%', result, re.IGNORECASE)
+                matched_percentage = int(match_percentage.group(1))
+            # Creating a pie chart with plotly
+            labels = ['Matched', 'Not Matched']
+            values = [matched_percentage, 100 - matched_percentage]
+            fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.1, 0])])
+            fig.update_layout(title='Matched Percentage')
+            return fig
+        except Exception as e:
+            # raise the exception
+            LOGGER.error(f"Error processing result:{str(e)}")
+            raise
+    def count_reviews(self) -> go.Figure:
+        """
+        Count and visualize the distribution of positive, negative, and neutral reviews.
+        Returns:
+            go.Figure: Plotly figure showing the distribution of reviews.
+        """
+        try:
+            LOGGER.info("Count reviews...")
+            # Extracted data from the reviews
+            data = self.answer
+            # Split the data into sections based on the review categories
+            sections = [section.strip() for section in data.split("\n\n")]
+            # Initialize counters for positive, neutral, and negative reviews
+            positive_count = 0
+            neutral_count = 0
+            negative_count = 0
+            # Initialize counters for positive, neutral, and negative reviews
+            for section in sections:
+                lines = section.split('\n')
+                if len(lines) > 1:
+                    category = lines[0].strip()
+                    reviews = lines[1:]
+                    count = len(reviews)
+                    # Update counts based on the review category
+                    if "Positive" in category:
+                        positive_count += count
+                    elif "Suggestion" in category:
+                        neutral_count += count
+                    elif "Negative" in category:
+                        negative_count += count
+            # Data for the bar graph
+            labels = ['Positive', 'Negative', 'Neutral']
+            counts = [positive_count, negative_count, neutral_count]
+            # Creating the bar graph using Plotly
+            fig = go.Figure(data=[go.Bar(x=labels, y=counts, marker=dict(color=['green', 'red', 'gray']))])
+            # Adding title and labels
+            fig.update_layout(title='Distribution of Reviews',
+                              xaxis=dict(title='Sentiment'),
+                              yaxis=dict(title='Number of Reviews'))
+            return fig
+        except Exception as e:
+            # Log and raise an error in case of an exception
+            LOGGER.error(f"Error in count_reviews: {e}")
+            raise
+    def csv_to_list(self, file_path: str) -> list:
+        """
+        Read a CSV file and convert it to a list.
+        Args:
+            file_path (str): Path to the CSV file.
+        Returns:
+            list: List containing data from the CSV file.
+        """
+        try:
+            LOGGER.info("Extract CSV...")
+            # Initialize an empty list to store CSV data
+            data_list = []
+            # Open the CSV file and read its contents
+            with open(file_path.name, 'r',newline='') as csv_file:
+                csv_reader = csv.reader(csv_file)
+                next(csv_reader, None) # Skip the header row
+                for row in csv_reader:
+                    # Convert each row to a string and append to the list
+                    data_list.append("".join(row))
+            return data_list
+        except Exception as e:
+            # Log and raise an error in case of an exception
+            LOGGER.error(f"Error in csv_to_list: {e}")
+            raise
+    def extract_top_reviews(self, file_path: str) -> tuple:
+        """
+        Extract the top suggestion, positive, and negative reviews from a CSV file.
+        Args:
+            file_path (str): Path to the CSV file.
+        Returns:
+            tuple: Suggestion reviews, positive reviews, and negative reviews.
+        """
+        try:
+            LOGGER.info("Extract top reviews...")
+            # Set the number of top reviews to extract
+            top_count = 5
+            # Split the reviews into suggestion, positive, and negative categories
+            suggestion_reviews,positive_reviews,negative_reviews = self.split_reviews(file_path)
+            # Extract the top suggestion reviews
+            reviews_list = suggestion_reviews.split("\n")  # Assuming each review is on a new line
+            suggest_reviews = "\n\n ".join(reviews_list[:top_count])
+            # Extract the top positive reviews
+            reviews_list = positive_reviews.split("\n")  # Assuming each review is on a new line
+            pos_reviews ="\n\n ".join(reviews_list[:top_count])
+            # Extract the top negative reviews
+            reviews_list = negative_reviews.split("\n")  # Assuming each review is on a new line
+            neg_reviews = "\n\n ".join(reviews_list[:top_count])
+            return suggest_reviews,pos_reviews,neg_reviews
+        except Exception as e:
+            # Log and raise an error in case of an exception
+            LOGGER.error(f"Error in extract_top_reviews: {e}")
+            raise
+    def split_reviews(self, file_path: str) -> tuple:
+        """
+        Split reviews into suggestion, positive, and negative categories using OpenAI API.
+        Args:
+            file_path (str): Path to the CSV file.
+        Returns:
+            tuple: Suggestion reviews, positive reviews, and negative reviews.
+        """
+        try:
+            LOGGER.info("Classify reviews...")
+            # Convert CSV file to a list of reviews
+            reviews = self.csv_to_list(file_path)
+            # Construct the prompt for OpenAI API
+            prompt = f"read and analyse to return suggestion reviews,postive reviews and negative reviews with label ***{reviews}***."
+            # Call OpenAI API with the given prompt
+            response = self.client.completions.create(
+                model="text-davinci-003",  # You can use a different engine
+                prompt=prompt,
+                max_tokens=200,
+                temperature = 0,
+            )
+            # Extract and return the generated text
+            self.answer += response.choices[0].text
+            # Split the generated text into suggestion, positive, and negative reviews
+            suggestion_reviews = self.answer.split("Suggestion Reviews:")[1].split("Positive Reviews:")[0].strip()
+            positive_reviews = self.answer.split("Positive Reviews:")[1].split("Negative Reviews:")[0].strip()
+            negative_reviews = self.answer.split("Negative Reviews:")[1].strip()
+            return suggestion_reviews,positive_reviews,negative_reviews
+        except Exception as e:
+            # Log and raise an error in case of an exception
+            LOGGER.error(f"Error in split_reviews: {e}")
+            raise
+    def file_name(self,upload_file:str) -> str:
+        """
+        Get the name of the uploaded file.
+        Args:
+            upload_file: File object.
+        Returns:
+            str: File name.
+        """
+        try:
+          # return file path
+          return upload_file.name
+        except Exception as e:
+            LOGGER.error(f"Error in file_name: {e}")
+            raise
+    def gradio_interface(self):
+        """
+        Create a Gradio interface for the JobPotral.
+        """
+        with gr.Blocks(css="style.css",theme='freddyaboulton/test-blue') as demo:
+          gr.HTML("""<center class="darkblue" text-align:center;padding:30px;'><center>
+          <center><h1 class ="center" style="color:#fff">ADOPLE AI</h1></center>
+          <br><center><h1 style="color:#fff">Job Potral Tool</h1></center>""")
+          # QA
+          state = gr.State(self.get_empty_state())
+          with gr.Tab("QA and Graph"):
+            with gr.Column(elem_id="col-container"):
+              gr.Markdown("**Upload your file**")
+              with gr.Row(elem_id="row-flex"):
+                  with gr.Column(scale=0.90, min_width=160):
+                      file_output = gr.File(elem_classes="filenameshow")
+                  with gr.Column(scale=0.10, min_width=160):
+                      upload_button = gr.UploadButton(
+                          "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx",".csv"],
+                          elem_classes="filenameshow")
+              with gr.Row(elem_id="col-container"):
+                with gr.Column():
+                  analyse_graph = gr.Button("Analyse Graph")
+            with gr.TabItem("Chatbot"):
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=1, min_width=0):
+                      chatbot = gr.Chatbot(label = "Resume QA")
+                      msg = gr.Textbox(label = "Question")
+                      clear = gr.ClearButton([msg, chatbot])
+            # analyse graph
+            with gr.TabItem("Graph"):
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=1.0, min_width=150):
+                  domain_graph = gr.Plot(label="Domain Graph")
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=1.0, min_width=150):
+                  working_time_graph = gr.Plot(label="Working Time Graph")
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=1.0, min_width=150):
+                  career_gap_graph = gr.Plot(label="Career Gap Graph")
+          # resume analyser
+          with gr.Tab("Resume Analyzer"):
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=0.55, min_width=150, ):
+                  job_description = gr.File(label="Job Description", file_types = [".pdf",".txt"])
+                with gr.Column(scale=0.55, min_width=150):
+                  resume = gr.File(label="Resume", file_types = [".pdf",".txt"])
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=0.80, min_width=150):
+                  analyse_btn = gr.Button("Analyse")
+                with gr.Column(scale=0.20, min_width=150):
+                  clear_btn = gr.ClearButton()
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=1.0, min_width=150):
+                  matched_result = gr.Textbox(label="Matched Result", lines=10)
+              with gr.Row(elem_id="col-container"):
+                with gr.Column(scale=1.0, min_width=150):
+                  pychart = gr.Plot(label="Matching Percentage Chart")
+          # review analyser
+          with gr.Tab("Reviews Analyzer"):
+              with gr.Row(elem_id="col-container"):
+                  with gr.Column(scale=0.90, min_width=160):
+                      file_output_review = gr.File(elem_classes="filenameshow")
+                  with gr.Column(scale=0.10, min_width=160):
+                      upload_button_review = gr.UploadButton(
+                          "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
+                          elem_classes="filenameshow")
+              with gr.Row(elem_id="col-container"):
+                  split_reviews_top_5_btn = gr.Button("Split TOP 5 Reviews ")
+              with gr.Row(elem_id="col-container"):
+                  suggested_reviews = gr.Textbox(label="Suggested Reviews")
+                  postive_reviews =gr.Textbox(label="Positive Reviews")
+                  negative_reviews = gr.Textbox(label="Negative Reviews")
+              with gr.Row(elem_id="col-container"):
+                  sentiment_graph_btn = gr.Button("Sentiment Graph")
+              with gr.Row(elem_id="col-container"):
+                  sentiment_graph = gr.Plot(label="Sentiment Analysis")
+          # QA
+          upload_button.upload(self.upload_file, upload_button, [file_output,state])
+          msg.submit(self.answer_question, [msg, state, chatbot], [msg, chatbot])
+          # analyse graph
+          analyse_graph.click(self.get_graph, upload_button, [domain_graph, working_time_graph, career_gap_graph])
+          # resume analyser
+          analyse_btn.click(self.matching_percentage, [job_description, resume], [matched_result, pychart])
+          # review analyser
+          upload_button_review.upload(self.file_name,upload_button_review,file_output_review)
+          sentiment_graph_btn.click(self.count_reviews,[],sentiment_graph)
+          split_reviews_top_5_btn.click(self.extract_top_reviews,upload_button_review,[suggested_reviews,postive_reviews,negative_reviews])
+        demo.launch(debug = True)
+if __name__ == "__main__":
+  analyze = JobPotral()
+  analyze.gradio_interface()