Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import pdfplumber | |
import re | |
import openpyxl | |
import os | |
from huggingface_hub import login | |
# Function to authenticate Hugging Face using token | |
def authenticate_hf(token): | |
try: | |
login(token) | |
return "Authentication Successful" | |
except Exception as e: | |
return f"Error: {e}" | |
# Initialize the model and tokenizer | |
model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Replace with the actual model name | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_path): | |
with pdfplumber.open(pdf_path) as pdf: | |
text = '' | |
for page in pdf.pages: | |
text += page.extract_text() | |
return text | |
# Function to parse the resume text for name, email, phone, and skills | |
def parse_resume(text): | |
# Define the prompts for each type of information | |
prompts = { | |
"name": "Extract the name from this resume:\n", | |
"email": "Extract the email address from this resume:\n", | |
"phone": "Extract the phone number from this resume:\n", | |
"skills": "Extract the technical skills from this resume:\n" | |
} | |
results = {} | |
for key, prompt in prompts.items(): | |
# Generate model response for each field | |
inputs = tokenizer(prompt + text, return_tensors="pt") | |
outputs = model.generate(**inputs, max_length=50000) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
if key == 'email': | |
# Use regex to validate email format | |
email = re.findall(r'\S+@\S+', response) | |
results[key] = email[0] if email else None | |
elif key == 'phone': | |
# Use regex to validate phone number format | |
phone = re.findall(r'\b\d{10,15}\b', response) | |
results[key] = phone[0] if phone else None | |
elif key == 'skills': | |
# Extract technical skills | |
results[key] = response | |
else: | |
results[key] = response | |
return results | |
# Function to save parsed data to Excel file | |
def save_to_excel(parsed_data, output_file): | |
wb = openpyxl.Workbook() | |
ws = wb.active | |
ws.append(["Name", "Email", "Phone", "Skills"]) | |
for data in parsed_data: | |
ws.append([data["name"], data["email"], data["phone"], data["skills"]]) | |
wb.save(output_file) | |
# Function to process PDF files and output an Excel file | |
def process_pdfs(pdfs): | |
parsed_data = [] | |
for pdf in pdfs: | |
# Extract text from the PDF | |
text = extract_text_from_pdf(pdf.name) | |
# Parse the text for relevant details | |
parsed_info = parse_resume(text) | |
# Add parsed information to the list | |
parsed_data.append(parsed_info) | |
# Save the parsed data to an Excel file | |
output_file = "parsed_resumes.xlsx" | |
save_to_excel(parsed_data, output_file) | |
return output_file | |
# Gradio interface setup with Hugging Face API token input | |
with gr.Blocks() as app: | |
# Adding Hugging Face logo | |
gr.Image("https://huggingface.co/front/assets/huggingface_logo.svg", label="Hugging Face Logo", width=150) | |
gr.Markdown("### Hugging Face Authentication") | |
# Input field for Hugging Face API token (blank space) | |
hf_token = gr.Textbox(label="Hugging Face API Token", placeholder="Enter your Hugging Face token here", type="password", value="") | |
login_button = gr.Button("Authenticate") | |
auth_status = gr.Textbox(label="Authentication Status", interactive=False) | |
# Authenticate Hugging Face model when button is clicked | |
login_button.click(authenticate_hf, inputs=hf_token, outputs=auth_status) | |
gr.Markdown("### Upload PDF Resumes") | |
# File input to upload resumes (use "filepath" for type) | |
pdfs_input = gr.File(file_count="multiple", type="filepath") | |
output_file = gr.File() | |
# Process the PDFs and parse them | |
process_button = gr.Button("Process Resumes") | |
process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file) | |
# Launch the app | |
app.launch() | |