Spaces:

Manojajj
/

resume_parser_llama

Running

App Files Files Community

resume_parser_llama / app.py

Manojajj

Update app.py

0e4d704 verified 2 months ago

raw

history blame contribute delete

4.17 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import pdfplumber
	import re
	import openpyxl
	import os
	from huggingface_hub import login

	# Function to authenticate Hugging Face using token
	def authenticate_hf(token):
	try:
	login(token)
	return "Authentication Successful"
	except Exception as e:
	return f"Error: {e}"

	# Initialize the model and tokenizer
	model_name = "Qwen/Qwen2.5-1.5B-Instruct" # Replace with the actual model name
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_path):
	with pdfplumber.open(pdf_path) as pdf:
	text = ''
	for page in pdf.pages:
	text += page.extract_text()
	return text

	# Function to parse the resume text for name, email, phone, and skills
	def parse_resume(text):
	# Define the prompts for each type of information
	prompts = {
	"name": "Extract the name from this resume:\n",
	"email": "Extract the email address from this resume:\n",
	"phone": "Extract the phone number from this resume:\n",
	"skills": "Extract the technical skills from this resume:\n"
	}

	results = {}

	for key, prompt in prompts.items():
	# Generate model response for each field
	inputs = tokenizer(prompt + text, return_tensors="pt")
	outputs = model.generate(**inputs, max_length=50000)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	if key == 'email':
	# Use regex to validate email format
	email = re.findall(r'\S+@\S+', response)
	results[key] = email[0] if email else None
	elif key == 'phone':
	# Use regex to validate phone number format
	phone = re.findall(r'\b\d{10,15}\b', response)
	results[key] = phone[0] if phone else None
	elif key == 'skills':
	# Extract technical skills
	results[key] = response
	else:
	results[key] = response

	return results

	# Function to save parsed data to Excel file
	def save_to_excel(parsed_data, output_file):
	wb = openpyxl.Workbook()
	ws = wb.active
	ws.append(["Name", "Email", "Phone", "Skills"])

	for data in parsed_data:
	ws.append([data["name"], data["email"], data["phone"], data["skills"]])

	wb.save(output_file)

	# Function to process PDF files and output an Excel file
	def process_pdfs(pdfs):
	parsed_data = []

	for pdf in pdfs:
	# Extract text from the PDF
	text = extract_text_from_pdf(pdf.name)

	# Parse the text for relevant details
	parsed_info = parse_resume(text)

	# Add parsed information to the list
	parsed_data.append(parsed_info)

	# Save the parsed data to an Excel file
	output_file = "parsed_resumes.xlsx"
	save_to_excel(parsed_data, output_file)

	return output_file

	# Gradio interface setup with Hugging Face API token input
	with gr.Blocks() as app:
	# Adding Hugging Face logo
	gr.Image("https://huggingface.co/front/assets/huggingface_logo.svg", label="Hugging Face Logo", width=150)

	gr.Markdown("### Hugging Face Authentication")

	# Input field for Hugging Face API token (blank space)
	hf_token = gr.Textbox(label="Hugging Face API Token", placeholder="Enter your Hugging Face token here", type="password", value="")
	login_button = gr.Button("Authenticate")
	auth_status = gr.Textbox(label="Authentication Status", interactive=False)

	# Authenticate Hugging Face model when button is clicked
	login_button.click(authenticate_hf, inputs=hf_token, outputs=auth_status)

	gr.Markdown("### Upload PDF Resumes")

	# File input to upload resumes (use "filepath" for type)
	pdfs_input = gr.File(file_count="multiple", type="filepath")
	output_file = gr.File()

	# Process the PDFs and parse them
	process_button = gr.Button("Process Resumes")
	process_button.click(process_pdfs, inputs=pdfs_input, outputs=output_file)

	# Launch the app
	app.launch()