Bhuvaneshvar
/

cmrit

Model card Files Files and versions Community

cmrit / cmrithackathon-master /data_loader.py

Bhuvaneshvar's picture

Upload 2116 files

6370773 verified about 2 months ago

1.17 kB

	import pandas as pd
	import re

	# Define a list of common skills to match within Job Descriptions
	COMMON_SKILLS = [
	'python', 'machine learning', 'data analysis', 'sql', 'deep learning',
	'statistics', 'data visualization', 'nlp', 'big data', 'java', 'c++',
	'excel', 'r', 'cloud computing', 'data mining', 'tableau', 'power bi',
	'data engineering', 'pandas', 'numpy', 'tensorflow', 'keras'
	]

	def load_data(file_path):
	"""
	Loads the job data from a CSV file and performs initial preprocessing.
	"""
	jobs_df = pd.read_csv(file_path)
	jobs_df = jobs_df[['job_title', 'description_text', 'salary_formatted']]
	jobs_df.dropna(inplace=True) # Drop rows with missing values

	# Extract skills from the Job Descriptions
	jobs_df['skills'] = jobs_df['description_text'].apply(extract_skills)
	return jobs_df

	def extract_skills(description):
	"""
	Extracts a list of common skills found in the Job Description.
	"""
	description = description.lower()
	skills_found = [skill for skill in COMMON_SKILLS if re.search(r'\b' + skill + r'\b', description)]
	return ', '.join(skills_found) if skills_found else ''