Spaces:

wnstnb
/

gamedayspx-monitor

Sleeping

App Files Files Community

gamedayspx-monitor / uni_model.py

wnstnb

updating charts

4b357c0 over 1 year ago

raw

history blame contribute delete

7.42 kB

	import numpy as np
	import warnings
	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	warnings.simplefilter(action='ignore', category=FutureWarning)

	import pandas as pd
	from sklearn.model_selection import TimeSeriesSplit
	from sklearn.metrics import mean_absolute_error
	from sklearn.linear_model import LinearRegression # Example model
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder

	import datetime
	from datetime import time, timedelta
	from tqdm import tqdm

	def prep_data(df):
	morning_start = datetime.datetime.combine(now.date(), time(6, 30))
	delta = now - morning_start
	print(delta)
	# candle = 1 #max(0,min((delta.total_seconds() / 60 / 30) // 1, 12))
	# candles = np.arange(1,13)
	candles = np.arange(1,2)
	for candle in tqdm(candles):
	print(f'running for {str(candle)}')
	data, df_final, final_row = get_daily(mode='intra', periods_30m=candle)

	df_new = data[['Open','High','Low','Close','Close30','Close_VIX30','Close_VIX','Close_VVIX30','Close_VVIX']].copy()
	df_new['PrevClose'] = df_new['Close'].shift(1)
	df_new['CurrentGap'] = (df_new['Open'] / df_new['PrevClose']) - 1
	df_new['ClosePctIntra'] = (df_new['Close30'] / df_new['Close'].shift(1)) - 1
	df_new['ClosePctOpenIntra'] = (df_new['Close30'] / df_new['Open']) - 1
	df_new['ClosePctVIXIntra'] = (df_new['Close_VIX30'] / df_new['Close_VIX'].shift(1)) - 1
	df_new['ClosePctVVIXIntra'] = (df_new['Close_VVIX30'] / df_new['Close_VVIX'].shift(1)) - 1
	df_new['EMA8'] = df_new['Close'].ewm(8).mean()
	df_new['EMA8'] = df_new['EMA8'].shift(1)
	df_new['EMA8Intra'] = df_new['Close30'] > df_new['EMA8']

	# Target will be the day's close
	df_new['ClosePct'] = (df_new['Close'] / df_new['Close'].shift(1)) - 1

	# Column to determine what percentile the current intra performance looks like
	intra_rank = []
	for i, pct in tqdm(enumerate(df_new['ClosePctIntra'])):
	try:
	historical = df_new['ClosePctIntra'].iloc[:i]
	current = df_new['ClosePctIntra'].iloc[i]
	perc = len(historical[historical > current]) / len(historical)
	except:
	perc = None
	intra_rank.append(perc)

	df_new['IntraPercentile'] = intra_rank

	# Column to determine what percentile the daily performance looks like
	daily_rank = []
	for i, pct in tqdm(enumerate(df_new['ClosePct'])):
	try:
	historical = df_new['ClosePct'].iloc[:i]
	current = df_new['ClosePct'].iloc[i]
	perc = len(historical[historical > current]) / len(historical)
	except:
	perc = None
	daily_rank.append(perc)

	df_new['ClosePctPercentile'] = daily_rank

	# Let's do n-5 to start just for closes
	lags = np.arange(1,6)

	for lag in lags:
	df_new[f'ClosePct_n{str(lag)}'] = df_new['ClosePct'].shift(lag)
	# df_new[f'ClosePctPercentile_n{str(lag)}'] = df_new['ClosePctPercentile'].shift(lag)


	df_feats = df_new[[c for c in df_new.columns if 'ClosePct' in c or 'Intra' in c or 'Gap' in c]]

	df_final = df_feats.dropna()

	X = df_final[['ClosePctIntra']] # Feature dataset
	y = df_final['ClosePct'] # Target dataset

	# model = LGBMRegressor(random_state=42, n_estimators=10, verbose=-1)
	# model = LinearRegression()
	# Define the column transformer for handling numeric and categorical features


	# Fit the pipeline on the training data
	# pipeline.fit(X_train, y_train)

	tscv = TimeSeriesSplit(n_splits=len(df_final)-1, max_train_size=None, test_size=1)

	mae_scores = []
	overall_results = []

	for train_index, test_index in tscv.split(X):

	X_train = X.iloc[train_index]
	X_test = X.iloc[test_index]
	y_train = y.iloc[train_index]
	y_test = y.iloc[test_index]

	# Select features
	categorical_features = X_train.select_dtypes(include='object').columns
	numeric_features = X_train.drop(columns=[c for c in X_train.columns if 'Percentile' in c]).select_dtypes(include='number').columns

	# Transformers
	numeric_transformer = RobustScaler() # Example: StandardScaler for numeric features
	categorical_transformer = OneHotEncoder() # Example: OneHotEncoder for categorical features

	# Define the pipeline steps
	preprocessor = ColumnTransformer(
	transformers=[
	('numeric', numeric_transformer, numeric_features), # numeric_features is a list of numeric feature column names
	('categorical', categorical_transformer, categorical_features) # categorical_features is a list of categorical feature column names
	])

	# Create the pipeline
	pipeline = Pipeline(steps=[
	('preprocessor', preprocessor),
	('model', LinearRegression())
	])

	# Fit the model
	pipeline.fit(X_train, y_train)

	# Predict
	y_pred = pipeline.predict(X_test)

	# Calculate metrics
	# mae_scores.append(mean_absolute_error(y_test, y_pred))
	result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': y_pred}, index=y_test.index)
	overall_results.append(result_df)

	df_results = pd.concat(overall_results)

	uppers = []
	lowers = []
	alpha = 0.05
	for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
	try:

	df_q = df_results.iloc[:i]
	pred = df_results['Predicted'].iloc[-1]
	errors = df_q['IsTrue'] - df_q['Predicted']
	positive_errors = errors[errors >= 0]
	negative_errors = errors[errors < 0]

	# Calculate bounds
	upper_bound = pred + np.quantile(positive_errors, 1 - alpha)
	lower_bound = pred + np.quantile(negative_errors, alpha)

	except:
	upper_bound = None
	lower_bound = None

	uppers.append(upper_bound)
	lowers.append(lower_bound)

	df_results['Upper'] = uppers
	df_results['Lower'] = lowers

	df_results = df_results.merge(data[['PrevClose']],left_index=True, right_index=True)
	df_results['Pred'] = df_results['PrevClose'] * (1 + df_results['Predicted'])
	df_results['Actual'] = df_results['PrevClose'] * (1 + df_results['IsTrue'])
	df_results['Up'] = df_results['PrevClose'] * (1 + df_results['Upper'])
	df_results['Down'] = df_results['PrevClose'] * (1 + df_results['Lower'])

	results[f'{str(int(candle))}'] = df_results

	# Average metrics across folds
	average_mae = mean_absolute_error(df_results['IsTrue'], df_results['Predicted'])
	# sorted_features = sorted([(feat, coef) for feat, coef in zip(model.feature_name_, model.feature_importances_)], key=lambda x: abs(x[1]), reverse=True)
	sorted_features = sorted([(feat, coef) for feat, coef in zip(pipeline.feature_names_in_, pipeline.named_steps.model.coef_)], key=lambda x: abs(x[1]), reverse=True)

	coefs[f'{str(int(candle))}'] = pd.DataFrame(sorted_features, columns=['Feature','Coefficient'])

	df_consolidated.loc[int(candle), 'MAE'] = average_mae