Spaces:

Manojajj
/

resume_parser_llama

Sleeping

File size: 4,170 Bytes

67ba08f
71bc45c
67ba08f
 
 
71bc45c
7ac0a55
c0605d9
7ac0a55
 
 
71bc45c
7ac0a55
 
 
67ba08f
71bc45c
5206581
71bc45c
 
 
67ba08f
 
 
 
 
 
 
 
 
71bc45c
67ba08f
 
 
 
 
 
 
 
 
 
 
c0605d9
 
0e4d704
c0605d9
 
67ba08f
 
c0605d9
67ba08f
 
 
71bc45c
67ba08f
 
 
c0605d9
67ba08f
c0605d9
67ba08f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71bc45c
67ba08f
 
 
 
 
 
 
71bc45c
67ba08f
 
 
 
 
 
 
 
 
 
7ac0a55
 
919b346
 
 
7ac0a55
 
919b346
 
7ac0a55
 
 
 
 
 
 
 
71bc45c
 
7ac0a55
 
71bc45c
7ac0a55
71bc45c
67ba08f
7ac0a55

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pdfplumber
import re
import openpyxl
import os
from huggingface_hub import login

# Function to authenticate Hugging Face using token
def authenticate_hf(token):
    try:
        login(token)
        return "Authentication Successful"
    except Exception as e:
        return f"Error: {e}"

# Initialize the model and tokenizer
model_name = "Qwen/Qwen2.5-1.5B-Instruct"  # Replace with the actual model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Function to parse the resume text for name, email, phone, and skills
def parse_resume(text):
    # Define the prompts for each type of information
    prompts = {
        "name": "Extract the name from this resume:\n",
        "email": "Extract the email address from this resume:\n",
        "phone": "Extract the phone number from this resume:\n",
        "skills": "Extract the technical skills from this resume:\n"
    }

    results = {}

    for key, prompt in prompts.items():
        # Generate model response for each field
        inputs = tokenizer(prompt + text, return_tensors="pt")
        outputs = model.generate(**inputs, max_length=50000)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        if key == 'email':
            # Use regex to validate email format
            email = re.findall(r'\S+@\S+', response)
            results[key] = email[0] if email else None
        elif key == 'phone':
            # Use regex to validate phone number format
            phone = re.findall(r'\b\d{10,15}\b', response)
            results[key] = phone[0] if phone else None
        elif key == 'skills':
            # Extract technical skills
            results[key] = response
        else:
            results[key] = response
    
    return results

# Function to save parsed data to Excel file
def save_to_excel(parsed_data, output_file):
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.append(["Name", "Email", "Phone", "Skills"])

    for data in parsed_data:
        ws.append([data["name"], data["email"], data["phone"], data["skills"]])
    
    wb.save(output_file)

# Function to process PDF files and output an Excel file
def process_pdfs(pdfs):
    parsed_data = []
    
    for pdf in pdfs:
        # Extract text from the PDF
        text = extract_text_from_pdf(pdf.name)
        
        # Parse the text for relevant details
        parsed_info = parse_resume(text)
        
        # Add parsed information to the list
        parsed_data.append(parsed_info)

    # Save the parsed data to an Excel file
    output_file = "parsed_resumes.xlsx"
    save_to_excel(parsed_data, output_file)

    return output_file

# Gradio interface setup with Hugging Face API token input
with gr.Blocks() as app:
    # Adding Hugging Face logo
    gr.Image("https://huggingface.co/front/assets/huggingface_logo.svg", label="Hugging Face Logo", width=150)

    gr.Markdown("### Hugging Face Authentication")
    
    # Input field for Hugging Face API token (blank space)
    hf_token = gr.Textbox(label="Hugging Face API Token", placeholder="Enter your Hugging Face token here", type="password", value="")
    login_button = gr.Button("Authenticate")
    auth_status = gr.Textbox(label="Authentication Status", interactive=False)
    
    # Authenticate Hugging Face model when button is clicked
    login_button.click(authenticate_hf, inputs=hf_token, outputs=auth_status)
    
    gr.Markdown("### Upload PDF Resumes")

    # File input to upload resumes (use "filepath" for type)
    pdfs_input = gr.File(file_count="multiple", type="filepath")
    output_file = gr.File()

    # Process the PDFs and parse them
    process_button = gr.Button("Process Resumes")
    process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file)

# Launch the app
app.launch()