Spaces:

Manojajj
/

Resume_parser_excel_to_excel

Sleeping

App Files Files Community

Resume_parser_excel_to_excel / app.py

Manojajj

Create app.py

dd6777e verified 3 months ago

raw

history blame contribute delete

2.55 kB

	import gradio as gr
	import torch
	from transformers import pipeline
	import pandas as pd
	import re

	# Load pre-trained model for Named Entity Recognition (NER) to extract details
	nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")

	def parse_resume(resume_text):
	"""Parse the resume and extract details like name, email, phone, and skills."""
	# Define regex for phone and email extraction
	phone_pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
	email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'

	# Extract phone and email using regex
	phone = re.findall(phone_pattern, resume_text)
	email = re.findall(email_pattern, resume_text)

	# Extract named entities for skills
	entities = nlp(resume_text)
	skills = [entity['word'] for entity in entities if 'MISC' in entity['entity']]

	# Handle case if no skills found
	skills = ", ".join(skills) if skills else "No skills found"

	# Create a dictionary of parsed data (exclude Experience, Education, Certifications)
	parsed_data = {
	"Phone": phone[0] if phone else "Not found",
	"Email": email[0] if email else "Not found",
	"Skills": skills,
	}

	return parsed_data

	def process_resumes(csv_file):
	"""Process a CSV file of resumes and output a single Excel file."""
	# Read the CSV file
	df = pd.read_csv(csv_file.name)

	# Ensure the column with resume text is named 'Resume' (you can adjust this as needed)
	if 'Resume' not in df.columns:
	return "Error: The CSV file must contain a 'Resume' column."

	all_parsed_data = []

	# Loop through each row in the CSV and parse the resume text
	for _, row in df.iterrows():
	resume_text = row['Resume'] # Assuming the column name is 'Resume'
	parsed_info = parse_resume(resume_text)
	all_parsed_data.append(parsed_info)

	# Convert the parsed data into a pandas DataFrame
	parsed_df = pd.DataFrame(all_parsed_data)

	# Save the DataFrame to an Excel file
	output_file = "parsed_resumes.xlsx"
	parsed_df.to_excel(output_file, index=False)

	return output_file

	# Define Gradio interface
	gr.Interface(
	fn=process_resumes,
	inputs=gr.File(file_count="single", label="Upload Resume CSV"),
	outputs=gr.File(label="Download Parsed Data (Excel)"),
	title="AI Resume Parser",
	description="Upload a CSV file containing resume texts to extract details like Name, Email, Phone, and Skills. The results will be saved in an Excel file."
	).launch()