Spaces:
Sleeping
Sleeping
File size: 4,170 Bytes
67ba08f 71bc45c 67ba08f 71bc45c 7ac0a55 c0605d9 7ac0a55 71bc45c 7ac0a55 67ba08f 71bc45c 5206581 71bc45c 67ba08f 71bc45c 67ba08f c0605d9 0e4d704 c0605d9 67ba08f c0605d9 67ba08f 71bc45c 67ba08f c0605d9 67ba08f c0605d9 67ba08f 71bc45c 67ba08f 71bc45c 67ba08f 7ac0a55 919b346 7ac0a55 919b346 7ac0a55 71bc45c 7ac0a55 71bc45c 7ac0a55 71bc45c 67ba08f 7ac0a55 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pdfplumber
import re
import openpyxl
import os
from huggingface_hub import login
# Function to authenticate Hugging Face using token
def authenticate_hf(token):
try:
login(token)
return "Authentication Successful"
except Exception as e:
return f"Error: {e}"
# Initialize the model and tokenizer
model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Replace with the actual model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text()
return text
# Function to parse the resume text for name, email, phone, and skills
def parse_resume(text):
# Define the prompts for each type of information
prompts = {
"name": "Extract the name from this resume:\n",
"email": "Extract the email address from this resume:\n",
"phone": "Extract the phone number from this resume:\n",
"skills": "Extract the technical skills from this resume:\n"
}
results = {}
for key, prompt in prompts.items():
# Generate model response for each field
inputs = tokenizer(prompt + text, return_tensors="pt")
outputs = model.generate(**inputs, max_length=50000)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
if key == 'email':
# Use regex to validate email format
email = re.findall(r'\S+@\S+', response)
results[key] = email[0] if email else None
elif key == 'phone':
# Use regex to validate phone number format
phone = re.findall(r'\b\d{10,15}\b', response)
results[key] = phone[0] if phone else None
elif key == 'skills':
# Extract technical skills
results[key] = response
else:
results[key] = response
return results
# Function to save parsed data to Excel file
def save_to_excel(parsed_data, output_file):
wb = openpyxl.Workbook()
ws = wb.active
ws.append(["Name", "Email", "Phone", "Skills"])
for data in parsed_data:
ws.append([data["name"], data["email"], data["phone"], data["skills"]])
wb.save(output_file)
# Function to process PDF files and output an Excel file
def process_pdfs(pdfs):
parsed_data = []
for pdf in pdfs:
# Extract text from the PDF
text = extract_text_from_pdf(pdf.name)
# Parse the text for relevant details
parsed_info = parse_resume(text)
# Add parsed information to the list
parsed_data.append(parsed_info)
# Save the parsed data to an Excel file
output_file = "parsed_resumes.xlsx"
save_to_excel(parsed_data, output_file)
return output_file
# Gradio interface setup with Hugging Face API token input
with gr.Blocks() as app:
# Adding Hugging Face logo
gr.Image("https://huggingface.co/front/assets/huggingface_logo.svg", label="Hugging Face Logo", width=150)
gr.Markdown("### Hugging Face Authentication")
# Input field for Hugging Face API token (blank space)
hf_token = gr.Textbox(label="Hugging Face API Token", placeholder="Enter your Hugging Face token here", type="password", value="")
login_button = gr.Button("Authenticate")
auth_status = gr.Textbox(label="Authentication Status", interactive=False)
# Authenticate Hugging Face model when button is clicked
login_button.click(authenticate_hf, inputs=hf_token, outputs=auth_status)
gr.Markdown("### Upload PDF Resumes")
# File input to upload resumes (use "filepath" for type)
pdfs_input = gr.File(file_count="multiple", type="filepath")
output_file = gr.File()
# Process the PDFs and parse them
process_button = gr.Button("Process Resumes")
process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file)
# Launch the app
app.launch()
|