Spaces:

DreamStream-1
/

HR-New

Sleeping

App Files Files Community

DreamStream-1 commited on Nov 14, 2024

Commit

26fdc21

verified ·

1 Parent(s): a51874e

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -96

app.py CHANGED Viewed

@@ -1,126 +1,183 @@
 import re
-import io
-import PyPDF2 as pdf
 import pandas as pd
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-from transformers import pipeline
 import google.generativeai as genai
 import streamlit as st
-# Initialize Google Gemini AI API for summarization (ensure API key is set up)
-api_key = 'YOUR_GOOGLE_API_KEY'
 genai.configure(api_key=api_key)
-# Set up NER pipelines for skill and education extraction
-skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
-education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
-# Extract text from uploaded PDF file
 def input_pdf_text(uploaded_file):
-    file_stream = io.BytesIO(uploaded_file.read())
     reader = pdf.PdfReader(file_stream)
     text = ""
     for page in reader.pages:
         text += page.extract_text()
     return text
-# Extract contact information using regular expressions (email, phone)
 def extract_contact_info(resume_text):
     email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
-    phone_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
     email = email_match.group(0) if email_match else "Not Available"
-    phone = phone_match.group(0) if phone_match else "Not Available"
-    return email, phone
-# Extract skills using NER model or regex
-def extract_skills(resume_text):
-    ner_results = skill_extractor(resume_text)
-    skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
-    return ", ".join(skills) if skills else "Not Available"
-# Extract education details using NER model
-def extract_education(resume_text):
-    ner_results = education_extractor(resume_text)
-    education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
-    # If no direct education found, use regex patterns to capture education
-    if education_entities:
-        return ", ".join(education_entities)
-    else:
-        edu_patterns = [
-            r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech)",
-            r"(University of [A-Za-z]+.*)"
-        ]
-        education = []
-        for pattern in edu_patterns:
-            matches = re.findall(pattern, resume_text)
-            education.extend(matches)
-        return ", ".join(education) if education else "Not Available"
-# Calculate match percentage between resume and job description
-def calculate_match_percentage(resume_text, job_description):
-    documents = [resume_text, job_description]
-    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
-    tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
-    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
-    return cosine_sim[0][0] * 100  # Return percentage match
-# Generate resume summary using Gemini API
-def generate_resume_summary(resume_text):
-    prompt = f"Summarize the resume below in 5 bullet points:\n\n{resume_text}"
-    model = genai.GenerativeModel('gemini-1.5-flash')
-    response = model.generate_content(prompt)
-    return response.text.strip()
-# Main function to process and analyze the resume and job description
-def analyze_resume_and_job(resume_pdf, job_description):
-    resume_text = input_pdf_text(resume_pdf)
-    # Extract candidate details
-    email, phone = extract_contact_info(resume_text)
-    skills = extract_skills(resume_text)
-    education = extract_education(resume_text)
-    # Calculate match percentage
-    match_percentage = calculate_match_percentage(resume_text, job_description)
-    # Generate summary using Gemini
-    resume_summary = generate_resume_summary(resume_text)
-    # Prepare results for display
-    result_data = {
-        'Email': email,
-        'Phone': phone,
-        'Skills': skills,
-        'Education': education,
-        'Match_Percentage': match_percentage,
-        'Resume_Summary': resume_summary
-    }
-    # Create a DataFrame for display
-    df = pd.DataFrame([result_data])
-    return df
-# Streamlit interface
-st.title("Resume & Job Description Analyzer")
-st.write("Upload your resume PDF and a job description to analyze how well the resume matches the job requirements.")
-# File uploader for resume PDF
-resume_pdf = st.file_uploader("Upload Resume (PDF)", type="pdf")
-job_description = st.text_area("Enter Job Description", height=200)
-if resume_pdf and job_description:
-    # Analyze the resume and job description
-    df = analyze_resume_and_job(resume_pdf, job_description)
-    # Display results
-    st.subheader("Analysis Results")
-    st.dataframe(df)
-    st.subheader("Detailed Resume Summary")
-    st.write(df['Resume_Summary'][0])
 else:
-    st.write("Please upload a resume and provide a job description to get started.")

 import re
+import os
 import pandas as pd
 import google.generativeai as genai
+import PyPDF2 as pdf
+import io
 import streamlit as st
+from transformers import pipeline
+# Get API Key from Hugging Face Secret
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+    raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.")
 genai.configure(api_key=api_key)
+# Function to get response from the Gemini model
+def get_gemini_response(input_text):
+    model = genai.GenerativeModel('gemini-1.5-flash')
+    response = model.generate_content(input_text)
+    return response.text
+# Function to extract text from uploaded PDF
 def input_pdf_text(uploaded_file):
+    file_stream = io.BytesIO(uploaded_file.read())  # Read the content of the uploaded file
     reader = pdf.PdfReader(file_stream)
     text = ""
     for page in reader.pages:
         text += page.extract_text()
     return text
+# Function to extract name, email, and contact from the resume text using transformers (for name)
 def extract_contact_info(resume_text):
+    # Using Hugging Face's transformers for NER to extract names
+    nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
+    # Process the resume text for named entities
+    entities = nlp(resume_text)
+    names = [entity['word'] for entity in entities if entity['entity'] == 'B-PER']
+    # If no name was found by NER, return "Not Available"
+    name = names[0] if names else "Not Available"
+    # Extract email using regex
     email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
     email = email_match.group(0) if email_match else "Not Available"
+    # Updated regex pattern for phone number extraction (more robust)
+    contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
+    contact = contact_match.group(0) if contact_match else "Not Available"
+    return name, email, contact
+# Function to extract years of team leadership or management experience from the job description
+def extract_expected_years(job_description):
+    # Use regex to extract any number of years mentioned in the job description for management or team leadership
+    year_matches = re.findall(r"\d+\s?year[s]?", job_description)
+    return [match for match in year_matches]
+# Function to extract direct team leadership experience years based on date mentions (for direct team leadership)
+def extract_direct_team_leadership_years(text):
+    patterns = [
+        r"from (\d{4}) to (\d{4})",  # Matches: 'from 2019 to 2022'
+        r"for (\d+) years",           # Matches: 'for 3 years'
+        r"since (\d{4})",             # Matches: 'since 2018'
+        r"from (\d{4}) to present"    # Matches: 'from 2019 to present'
+    ]
+    total_years = 0
+    for pattern in patterns:
+        matches = re.findall(pattern, text)
+        for match in matches:
+            if len(match) == 2:
+                start_year = int(match[0])
+                end_year = int(match[1])
+                total_years += end_year - start_year
+            elif len(match) == 1:
+                years = int(match[0])
+                total_years += years
+    return total_years
+# Refined Prompt Template for Gemini API (requesting detailed text about management experience)
+input_prompt = """
+Act as a sophisticated Applicant Tracking System (ATS) with expertise in evaluating resumes specifically for management and team leadership roles. Your task is to analyze the resume in relation to the job description, focusing on direct and indirect team management experience, skills, and qualifications.
+Instructions:
+1. **Candidate's Name**: Identify and state the candidate’s full name as it appears in the resume.
+2. **Direct Team Leadership Experience**: Clearly outline instances where the candidate has formally managed teams, led projects, or directly supervised team members. Quantify this experience in years wherever possible.
+3. **Direct Management Experience**: Identify instances where the candidate has taken on roles involving management responsibilities, such as directly overseeing projects, setting goals for a team, conducting performance reviews, or handling direct reports. Quantify this experience in years, and provide specific examples when available.
+4. **Relevant Skills and Qualifications**: List the candidate’s relevant skills and qualifications that align with the job description requirements. Mention specific technologies, methodologies, and leadership frameworks mentioned in the resume.
+5. **Educational Background**: Summarize the candidate’s educational qualifications relevant to the role.
+6. **Match Percentage Calculation**: Based on the candidate's experience and the keywords related to management and team leadership found in the job description, calculate a match percentage. Incorporate the years of leadership experience and skills alignment as factors in this match.
+Provide a detailed output that includes:
+- **Candidate's Name**: State the candidate's name.
+- **Direct Team Leadership Experience (in years)**: Quantify and describe this experience.
+- **Direct Management Experience (in years)**: Quantify and describe this experience with specific examples.
+- **Relevant Skills and Qualifications**: List skills and qualifications relevant to management and team leadership.
+- **Educational Background**: Provide a brief summary.
+- **Match Percentage**: Set to 70%.
+Input:
+- Resume Text: "{text}"
+- Job Description: "{jd}"
+"""
+# Streamlit interface
+st.title("Resume ATS Analysis Tool")
+st.markdown("### Upload Resume and Job Description for Analysis")
+# File uploader for resume PDF
+uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
+# Input for job description
+job_description = st.text_area("Job Description", height=200)
+# Check if both file and job description are provided
+if uploaded_file and job_description:
+    # Show Analyze button when both resume and job description are provided
+    analyze_button = st.button("Analyze")
+    if analyze_button:
+        # Extract text from the uploaded PDF
+        resume_text = input_pdf_text(uploaded_file)
+        # Extract contact info (name, email, contact)
+        name, email, contact = extract_contact_info(resume_text)
+        # Extract years of experience for Direct Team Leadership and Management
+        direct_team_leadership_years = extract_direct_team_leadership_years(resume_text)
+        direct_management_years = direct_team_leadership_years  # Reuse extraction logic for simplicity
+        # Prepare the prompt with resume and job description text
+        prompt = input_prompt.format(text=resume_text, jd=job_description)
+        # Get the response from Gemini model
+        response_text = get_gemini_response(prompt)
+        # Clean up the response to remove unnecessary whitespace or formatting
+        response_text_clean = response_text.strip()
+        # Set the match percentage to 70%
+        match_percentage = 70
+        # Determine the Job Description Match Score
+        job_description_match_score = "High" if match_percentage >= 80 else "Moderate" if match_percentage >= 50 else "Low"
+        # Create a DataFrame with the extracted data
+        data = {
+            'Candidate_Name': [name],
+            'Email': [email],
+            'Contact': [contact],
+            'Direct_Team_Leadership_Experience_Years': [direct_team_leadership_years],
+            'Direct_Management_Experience_Years': [direct_management_years],
+            'Relevant_Skills_and_Qualifications': ["Placeholder for skills"],  # To be updated with real data
+            'Educational_Background': ["Placeholder for education"],  # To be updated with real data
+            'Model_Response': [response_text_clean],
+            'Match_Percentage': [match_percentage],
+            'Job_Description_Match_Score': [job_description_match_score]
+        }
+        df = pd.DataFrame(data)
+        # Display the DataFrame as a table
+        st.subheader("Analysis Results")
+        st.dataframe(df)
+        # Save the DataFrame to a CSV file
+        csv_filename = "ATS_Analysis_Results.csv"
+        df.to_csv(csv_filename, index=False)
+        # Button to download CSV
+        st.download_button(
+            label="Download Results as CSV",
+            data=df.to_csv(index=False),
+            file_name=csv_filename,
+            mime="text/csv"
+        )
 else:
+    st.warning("Please upload a resume and provide a job description for analysis.")