Spaces:

rexoscare
/

Resume_screener

Build error

App Files Files Community

Resume_screener / app.py

rexoscare

Update app.py

638e28f over 2 years ago

raw

history blame contribute delete

No virus

2.58 kB

	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import pandas as pd
	import gradio as gr
	import pdfplumber
	import texthero as hero
	from texthero import preprocessing as ppe
	import re


	model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')


	def remove_special_characters(text):
	pattern = r'[^a-zA-Z]'
	text = re.sub(pattern, ' ', text)
	return text


	#word file (Job Description)
	def opentxt(filepath):
	file_1 = open(filepath, errors="ignore")
	file_2 = file_1.read()
	file_2 = file_2.replace('\n', ' ')
	file_2 = re.sub('www.\S+\|www.\S+', '', file_2)
	df_1 = pd.DataFrame([file_2], columns = ['text'])
	df_1['text'] = df_1['text'].apply(remove_special_characters)
	custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
	df_1['cleaned_text'] = hero.clean(df_1['text'], custom_pipeline)
	file_2 = df_1['cleaned_text'].astype(str)
	return file_2


	#pdf file (Resume)
	def pdftotext(filepath):
	with pdfplumber.open(filepath) as pdf:
	first_page = pdf.pages[0]
	list_1 = first_page.extract_text(x_tolerance=3, y_tolerance=3)
	list_1 = list_1.replace('\n', ' ')
	list_1 = re.sub('www.\S+\|www.\S+', '', list_1)
	df = pd.DataFrame([list_1], columns = ['text'])
	df['text'] = df['text'].apply(remove_special_characters)
	custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace]
	df['cleaned_text'] = hero.clean(df['text'], custom_pipeline)
	list_1 = df['cleaned_text'].astype(str)
	return list_1


	def sent_similarity(filepath_1, filepath_2):
	txt_1 = pdftotext(filepath_1.name)
	txt_2 = opentxt(filepath_2.name)
	sentences = [''.join(txt_1), ''.join(txt_2)]
	sentence_embeddings = model.encode(sentences)
	similarity = cosine_similarity(sentence_embeddings[0].reshape(1, -1),sentence_embeddings[1].reshape(1, -1))[0][0]
	return round(similarity*100, 2)


	input_1 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Resume (.pdf)', optional=False)
	input_2 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Job Description (.txt)', optional=False)

	title = "Resume Screener"
	description = "Upload your resume(.pdf) and the job description(.txt) and let the sentence similarity model display the similarity percentage !!!"

	iface = gr.Interface(
	sent_similarity,
	[input_1, input_2], "label", title = title, description = description)

	if __name__ == "__main__":
	iface.launch()