In [1]:
from openai import OpenAI
import os
from trulens_eval import Provider, TruLlama, FeedbackMode, Feedback, Select,  Tru
from trulens_eval.feedback import Groundedness
from trulens_eval import OpenAI as fOpenAI
import pandas as pd
import numpy as np
from tqdm import tqdm

from app import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPEN_AI_API_KEY")

In [122]:
job_description = """
Job: Software Engineer At Truera
Design and implement complex distributed systems, build APIs around AI/ML observability, and partner with various engineers and product managers for new products and features.
Engage in customer interactions to drive product features, review design and code, ensure high-quality deliverables, and take significant ownership of development with a pragmatic, results-driven approach.
Advocate for engineering efficiency through continuous deployment, automation, monitoring, and development of self-healing systems while maintaining scalability, availability, and latency.
Possess strong communication skills, provide mentoring, lead and foster team development, and continuously learn and grow while pushing the team and sharing knowledge.
Require a BS in Computer Science or equivalent, 4+ years in distributed data systems, expertise in Java/Python/Go, and experience with data infrastructure and modern cloud-based architectures; experience in ML and container technologies is a plus.
"""


In [152]:
def generate_skill_gap_analysis(json_data, job_description):
    try:
        # Construct a detailed prompt for the Gemini model
        prompt = load_prompt("prompts/skills_gap_prompt.txt").replace("job_description", job_description).replace("json_data", json_data)
        # Call the Gemini model to generate the skill gap analysis
        response = model_text.generate_content(prompt)

        # Format and return the skill gap analysis
        return response.text

    except Exception as e:
        return f"An error occurred: {e}"

def generate_interview_questions(json_data):
    prompt = load_prompt("prompts/interview_questions_prompt.txt") + json_data

    # Generate responses using the model
    responses = model_text.generate_content(prompt)

    # Return the generated questions or content
    return responses.text

def generate_cover_letter(json_data, job_description):
    try:
        # Create a prompt for the cover letter
        prompt = load_prompt("prompts/cover_letter_prompt.txt").replace("job_description", job_description).replace("json_data", json_data)

        # Generate the cover letter using the model
        response = model_text.generate_content(prompt)

        return response.text

    except Exception as e:
        return f"An error occurred: {e}"

In [141]:
resumes_folder_path = "resumes/"
json_data_resumes = []
for file_name in tqdm(os.listdir(resumes_folder_path)):
    if file_name.lower().endswith('.pdf'): 
        pdf_path = os.path.join(resumes_folder_path, file_name)

        with open(pdf_path, 'rb') as pdf_file:
            pdf_content = pdf_file.read()
            # Convert PDF to image and process with Gemini model
            try:
                image, json_data = process_pdf_and_save_job_desc(pdf_content, job_description)

                # Optionally: Display results or further process them
                json_data_resumes.append(json_data)
            except:
                pass

100%|██████████| 30/30 [06:43<00:00, 13.45s/it]


In [142]:
print(json_data_resumes[:5])

[' ```json\n{\n  "Education": [\n    {\n      "Institution": "San Francisco State University",\n      "Degree": "B.S.",\n      "Field of Study": "Engineering",\n      "Start Date": "September 2014",\n      "End Date": "June 2018",\n      "GPA": "3.8"\n    }\n  ],\n  "Work Experience": [\n    {\n      "Company": "Philo",\n      "Role": "Software Engineer III, Front-End",\n      "Start Date": "December 2020",\n      "End Date": "Present",\n      "Responsibilities": "Developed 37+ robust, reusable, and reliable platform components which enhanced the company platform, managed 100% of change request processing coordination with 3 other teams, provided technical advice and weigh-ins on technical decisions that impacted 6 cross-functional teams and objectives for 11+ technical teams, spearheaded 68+ group sessions to elicit complex information on requirements clarification, design sessions, code reviews, and troubleshooting issues"\n    },\n    {\n      "Company": "LegalZoom",\n      "Role": 

In [153]:
df = pd.DataFrame(columns=[
    'input', 
    'prompt_interview_questions', 'output_interview_questions', 
    'prompt_cover_letter', 'output_cover_letter', 
    'prompt_skill_gap_analysis', 'output_skill_gap_analysis'
])

# Populate DataFrame
for json_data in tqdm(json_data_resumes):
    # Generate prompts for each task
    interview_questions_prompt = load_prompt("prompts/interview_questions_prompt.txt") + json_data
    cover_letter_prompt = load_prompt("prompts/cover_letter_prompt.txt").replace("job_description", job_description).replace("json_data", json_data)
    skill_gap_analysis_prompt = load_prompt("prompts/skills_gap_prompt.txt").replace("job_description", job_description).replace("json_data", json_data)
    
    # Create a new row as a dictionary
    new_row = {
        'input': json_data,
        'prompt_interview_questions': interview_questions_prompt, 
        'output_interview_questions': generate_interview_questions(json_data),
        'prompt_cover_letter': cover_letter_prompt, 
        'output_cover_letter': generate_cover_letter(json_data, job_description),
        'prompt_skill_gap_analysis': skill_gap_analysis_prompt, 
        'output_skill_gap_analysis': generate_skill_gap_analysis(json_data, job_description)
    }
    
    # Append the new row to the DataFrame
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

100%|██████████| 27/27 [07:07<00:00, 15.84s/it]


In [144]:
df.head()

Unnamed: 0,input,prompt_interview_questions,output_interview_questions,prompt_cover_letter,output_cover_letter,prompt_skill_gap_analysis,output_skill_gap_analysis
0,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:**\nCould you walk me t...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,- **Skill Gap: Proficiency in Advanced Data An...
1,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:** Tell me about your e...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am excited to presen...",Assume the role of an experienced career coach...,- **Skill Gap: Data Infrastructure and Modern ...
2,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:** Tell me about a scen...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,- **Skill Gap: Advanced Distributed Data Syste...
3,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1**: In your role at Duol...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am thrilled to bring...",Assume the role of an experienced career coach...,- Skill Gap: Distributed Systems Design and Im...
4,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Questions:**\n\n1. **Systems Integ...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,- Skill Gap: Cloud-Based Architectures\nBrief ...


In [145]:
df.to_csv("v2_w_o_feedback.csv")

In [154]:
from llama_index.llms import OpenAI

llm_text = OpenAI() 


In [155]:
class OpenAI_Provider:
    def load_prompt(filename):
        try:
            with open(filename, "r") as file:
                return file.read()
        except Exception as e:
            return f"Error loading prompt: {e}"

    def evaluate_cover_letter(self, input: str, output: str) -> float:
        # Load the prompt for evaluating cover letters
        prompt = load_prompt("prompts/evaluate_cover_letter_prompt.txt")
        formatted_prompt = prompt.format(input=input, output=output)

        # Get the response from Gemini model
        response = llm_text.complete(formatted_prompt)
        # Interpret and return the response as a score
        return self.interpret_response(response.text)

    def evaluate_skills_gap(self, input: str, output: str) -> float:
        # Load the prompt for evaluating skills gap
        prompt = load_prompt("prompts/evaluate_skills_gap_prompt.txt")
        formatted_prompt = prompt.format(input=input, output=output)

        # Get the response from Gemini model
        response = llm_text.complete(formatted_prompt)
        # Interpret and return the response as a score
        return self.interpret_response(response.text)

    def evaluate_interview_questions(self, input: str, output: str) -> float:
        # Load the prompt for evaluating interview questions
        prompt = load_prompt("prompts/evaluate_interview_questions_prompt.txt")
        formatted_prompt = prompt.format(input=input, output=output)

        # Get the response from Gemini model
        response = llm_text.complete(formatted_prompt)
        # Interpret and return the response as a score
        return self.interpret_response(response.text)

    def interpret_response(self, response_text: str) -> float:
        try:
            # Parse the response text as JSON and extract the score
            response_json = json.loads(response_text)
            score = response_json.get("score", np.nan)
            return int(score)  # Ensure the score is an integer
        except (json.JSONDecodeError, ValueError):
            return np.nan


In [156]:
openai_provider_custom = OpenAI_Provider()

# Assuming df is already defined and populated with the necessary columns
# Now adding new columns for scores
df['score_interview_questions'] = np.nan
df['score_cover_letter'] = np.nan
df['score_skill_gap_analysis'] = np.nan

# Populate new score columns using the specific prompt and output columns
for index, row in tqdm(df.iterrows()):
    # Evaluate and score each output against the specific prompt
    df.at[index, 'score_interview_questions'] = openai_provider_custom.evaluate_interview_questions(
        input=row['prompt_interview_questions'],
        output=row['output_interview_questions']
    )
    
    df.at[index, 'score_cover_letter'] = openai_provider_custom.evaluate_cover_letter(
        input=row['prompt_cover_letter'],
        output=row['output_cover_letter']
    )
    
    df.at[index, 'score_skill_gap_analysis'] = openai_provider_custom.evaluate_skills_gap(
        input=row['prompt_skill_gap_analysis'],
        output=row['output_skill_gap_analysis']
    )

27it [00:50,  1.88s/it]


In [157]:
df

Unnamed: 0,input,prompt_interview_questions,output_interview_questions,prompt_cover_letter,output_cover_letter,prompt_skill_gap_analysis,output_skill_gap_analysis,score_interview_questions,score_cover_letter,score_skill_gap_analysis
0,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:** In your role at Phil...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am excited to submit...",Assume the role of an experienced career coach...,"**Skill Gap 1: Proficiency in Java, Python, or...",9.0,9.0,8.0
1,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:**\nDuring your interns...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am excited to submit...",Assume the role of an experienced career coach...,**Skill Gap 1: Absence of Experience in Data I...,9.0,8.0,8.0
2,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,Technical Question 1: Elaborate on a specific ...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nWith great excitement,...",Assume the role of an experienced career coach...,**Skill Gap 1: Advanced Data Analytics Tools**...,9.0,8.0,8.0
3,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,Technical Question 1: How did you optimize mac...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,**Skill Gaps:**\n\n* **Expertise in Java/Pytho...,9.0,8.0,9.0
4,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:**\nIn your role as an ...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,**Skill Gap 1:** Distributed Data Systems\n**B...,9.0,8.0,8.0
5,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:**\n\nTell me about a c...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI enthusiastically pre...",Assume the role of an experienced career coach...,**Skill Gaps:**\n\n* **Advanced Distributed Sy...,9.0,9.0,8.0
6,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:**\n\nYou mentioned wor...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,**Skill Gap: Distributed Data Systems Experien...,9.0,8.0,8.0
7,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,"**Technical Question 1:** At Google, you imple...",Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,**Skill Gap 1: Cloud-Based Architectures**\nBr...,9.0,8.0,8.0
8,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:**\n\nDescribe your exp...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,**Skill Gaps:**\n\n- **Distributed Data System...,9.0,8.0,8.0
9,"```json\n{\n ""Education"": [\n {\n ""I...",Assume the role of an experienced interviewer....,**Technical Question 1:** During your tenure a...,Assume the role of an expert recruiter. Create...,"Dear Hiring Manager,\n\nI am writing to expres...",Assume the role of an experienced career coach...,**Skill Gap 1: Data Infrastructure Experience*...,9.0,9.0,8.0


In [158]:
mean_values = df[["score_interview_questions","score_cover_letter","score_skill_gap_analysis"]].mean()
mean_values

score_interview_questions    9.000000
score_cover_letter           8.333333
score_skill_gap_analysis     8.296296
dtype: float64

In [151]:
#df.to_csv("results_v2.csv")

In [129]:
df.to_csv("analysis.csv")