Spaces:

andrecatarino
/

Energy-Price-Forecasting

Runtime error

Energy-Price-Forecasting / app /utils.py

André Catarino

wip

d4c62b5 over 1 year ago

2.77 kB

	import pandas as pd
	from itertools import combinations
	import matplotlib.pyplot as plt

	def load_energy_data(file_path:str="../data/raw/energy_dataset.csv", date:str="time") -> pd.DataFrame:
	df = pd.read_csv(file_path, parse_dates=[date])
	return df

	def load_weather_data(file_path:str="../data/raw/weather_features.csv", date:str="dt_iso") -> pd.DataFrame:
	df = pd.read_csv(file_path, parse_dates=[date])
	return df

	def load_preprocessed_data(file_path:str="../data/processed/processed_merged_data.pkl") -> pd.DataFrame:
	df = pd.read_pickle(file_path)
	return df

	def id_outliers(df:pd.DataFrame)-> pd.DataFrame:
	"""
	Identify outliers for each column of a dataframe
	:param df: dataframe
	:return: dataframe with lower and upper bound and number of outliers
	"""
	result_data = []
	for col_name in df.columns:
	q1 = df[col_name].quantile(0.25)
	q3 = df[col_name].quantile(0.75)
	iqr = q3 - q1
	lower_bound = q1 - 1.5 * iqr
	upper_bound = q3 + 1.5 * iqr
	n_outliers = len(df[(df[col_name] < lower_bound) \| (df[col_name] > upper_bound)])
	result_data.append([lower_bound, upper_bound, n_outliers])
	outliers = pd.DataFrame(result_data, columns=['lower_bound', 'upper_bound', 'n_outliers'], index=df.columns)
	return outliers

	def filtered_heatmap(df:pd.DataFrame, absthreshold:int=0) -> pd.DataFrame:
	"""
	Filter a correlation matrix by absolute value threshold
	:param df: correlation matrix
	:param absthreshold: absolute value threshold
	:return: filtered correlation matrix
	"""
	passed = set()
	for (r,c) in combinations(df.columns, 2):
	if (abs(df.loc[r,c]) >= absthreshold) and (r != c):
	passed.add(r)
	passed.add(c)
	passed = sorted(passed)
	return df.loc[passed,passed]

	def compare_metrics(eval_df:pd.DataFrame, stat:str, metrics:list) -> None:
	"""
	Compare the mean or standard deviation of each metric for each model
	:param eval_df: dataframe with evaluation metrics for each model
	:param stat: 'mean' or 'std'
	:param metrics: list of metrics to compare
	:return: None
	"""
	fig, axes = plt.subplots(1, len(metrics), figsize=(15, 4))

	for i, metric in enumerate(metrics):
	ax = axes[i]
	ax.bar(eval_df['Model'], eval_df[metric].apply(lambda x: eval(f'np.{stat}(x)')), color='skyblue')
	ax.set_title(metric)
	ax.set_xlabel('Model')
	ax.set_ylabel(metric)
	ax.set_xticklabels(eval_df['Model'], rotation=45, ha='right')
	ax.yaxis.grid()

	# Adjust the layout to avoid overlapping titles
	plt.tight_layout()

	# Save the plot
	plt.savefig(f'../plots/metrics_{stat}.png')

	# Show the plot
	plt.show()