wnstnb commited on
Commit
4b357c0
·
1 Parent(s): ff17253

updating charts

Browse files
Files changed (4) hide show
  1. .gitignore +3 -1
  2. app.py +252 -76
  3. data_check.ipynb +510 -0
  4. uni_model.py +180 -0
.gitignore CHANGED
@@ -1,4 +1,6 @@
1
  /.env
2
  /.venv
3
  /hss.pem ec2-user@ec2-18-1
4
- /__pycache__
 
 
 
1
  /.env
2
  /.venv
3
  /hss.pem ec2-user@ec2-18-1
4
+ /__pycache__
5
+ appOld.py
6
+ appOld2.py
app.py CHANGED
@@ -56,107 +56,283 @@ levels = data_daily.loc[df1.index.date, ['H1','H2','L1','L2','Open']].drop_dupli
56
  levels['FirstBar'] = dts
57
 
58
  # Plot
 
59
  import streamlit as st
60
  from streamlit_lightweight_charts import renderLightweightCharts
61
 
62
- chartOptions = [{
63
- "width":800,
64
- "height":400,
65
- "rightPriceScale": {
66
- "scaleMargins": {
67
- "top": 0.2,
68
- "bottom": 0.25,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  },
70
- "borderVisible": True,
71
- },
72
- "layout": {
73
- "textColor": 'white',
74
- "background": {
75
- "type": 'solid',
76
- "color": 'black'
 
 
 
 
 
 
 
 
 
 
77
  },
 
 
 
 
 
 
 
 
78
  },
79
- "grid": {
80
- "vertLines": {
81
- "color": "rgba(197, 203, 206, 0)"
 
 
 
 
82
  },
83
- "horzLines": {
84
- "color": "rgba(197, 203, 206, 0)"
85
- }
86
- }
87
- },
88
- {
89
- "width":800,
90
- "height":125,
91
- "layout": {
92
- "textColor": 'white',
93
- "background": {
94
- "type": 'solid',
95
- "color": 'black'
96
  },
97
- },
98
- "grid": {
99
  "vertLines": {
100
- "color": "rgba(197, 203, 206, 0)"
101
- },
102
  "horzLines": {
103
- "color": "rgba(197, 203, 206, 0)"
104
  }
105
  },
106
- },]
107
-
108
- seriesCandlestickChart = [{
109
-
110
- "type": 'Candlestick',
111
- "data": [
112
- {"open": open,
113
- "high": high,
114
- "low": low,
115
- "close": close,
116
- "time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
117
- ],
118
- "options": {
119
- "upColor": '#3399ff',
120
- "downColor": '#ff5f5f',
121
- "borderVisible": False,
122
- "wickUpColor": '#3399ff',
123
- "wickDownColor": '#ff5f5f',
124
- "priceScaleVisible": True
125
  },
126
- "priceScale": {
127
- "scaleMargins": {
128
- "top": 0.7,
129
- "bottom": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  }
131
  }
132
- },
133
- {
134
- "type": 'Line',
135
- "data": [{"value": value, "time":dt.timestamp()} for value, dt in zip(levels['H1'], levels['FirstBar'])],
 
 
136
  "options": {
137
- "color": 'blue',
138
- "lineWidth": 1
 
 
 
139
  }
140
- }]
 
141
 
142
- seriesPredictions = [{
143
- "type": 'Histogram',
144
- "data": [
145
- { "value": pred, "time": dt.timestamp(), "color":color } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
146
- ],
147
- "options": { "color": '#26a69a' }
148
- }]
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  renderLightweightCharts([
151
  {
152
- "chart": chartOptions[0],
153
  "series": seriesCandlestickChart
154
  },
155
  {
156
- "chart": chartOptions[1],
157
- "series": seriesPredictions
158
- },
159
  ], 'multipane')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  # Important levels
162
  df_levels = pd.DataFrame(levels[['H2','H1','Open','L1','L2']].iloc[-1]).round(2)
 
56
  levels['FirstBar'] = dts
57
 
58
  # Plot
59
+
60
  import streamlit as st
61
  from streamlit_lightweight_charts import renderLightweightCharts
62
 
63
+ import json
64
+ import numpy as np
65
+ import yfinance as yf
66
+ import pandas as pd
67
+
68
+ COLOR_BULL = '#3399ff' # #26a69a
69
+ COLOR_BEAR = '#ff5f5f' # #ef5350
70
+
71
+
72
+ # Some data wrangling to match required format
73
+ df = df1.copy()
74
+ df['time'] = [dt.timestamp() for dt in df.index]
75
+ df = df[['time','Open','High','Low','Close','CalibPredicted','Color']]
76
+ df.columns = ['time','open','high','low','close','volume','color'] # rename columns
77
+ # df['color'] = np.where( df['open'] > df['close'], COLOR_BEAR, COLOR_BULL) # bull or bear
78
+
79
+ # export to JSON format
80
+ # candles = json.loads(df.to_json(orient = "records"))
81
+ candles = json.loads(json.dumps([
82
+ {"open": open,
83
+ "high": high,
84
+ "low": low,
85
+ "close": close,
86
+ "time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
87
+ ], indent=2))
88
+ # volume = json.loads(df.rename(columns={"volume": "value",}).to_json(orient = "records"))
89
+ volume = json.loads(json.dumps([
90
+ { "value": pred, "time": dt.timestamp(), "color":color } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
91
+ ], indent=2))
92
+
93
+ chartMultipaneOptions = [
94
+ {
95
+ # "width": 600,
96
+ "height": 400,
97
+ "layout": {
98
+ "background": {
99
+ "type": "solid",
100
+ "color": 'transparent'
101
+ },
102
+ "textColor": "white"
103
  },
104
+ "grid": {
105
+ "vertLines": {
106
+ "color": "rgba(197, 203, 206, 0.25)"
107
+ },
108
+ "horzLines": {
109
+ "color": "rgba(197, 203, 206, 0.25)"
110
+ }
111
+ },
112
+ "crosshair": {
113
+ "mode": 0
114
+ },
115
+ "priceScale": {
116
+ "borderColor": "rgba(197, 203, 206, 0.8)"
117
+ },
118
+ "timeScale": {
119
+ "borderColor": "rgba(197, 203, 206, 0.8)",
120
+ "barSpacing": 15
121
  },
122
+ "watermark": {
123
+ "visible": True,
124
+ "fontSize": 48,
125
+ "horzAlign": 'center',
126
+ "vertAlign": 'center',
127
+ "color": 'rgba(171, 71, 188, 0.3)',
128
+ "text": 'AAPL - D1',
129
+ }
130
  },
131
+ {
132
+ # "width": 600,
133
+ "height": 100,
134
+ "layout": {
135
+ "background": {
136
+ "type": 'solid',
137
+ "color": 'transparent'
138
  },
139
+ "textColor": 'black',
 
 
 
 
 
 
 
 
 
 
 
 
140
  },
141
+ "grid": {
 
142
  "vertLines": {
143
+ "color": 'rgba(42, 46, 57, 0)',
144
+ },
145
  "horzLines": {
146
+ "color": 'rgba(42, 46, 57, 0.6)',
147
  }
148
  },
149
+ "timeScale": {
150
+ "visible": False,
151
+ },
152
+ "watermark": {
153
+ "visible": True,
154
+ "fontSize": 18,
155
+ "horzAlign": 'left',
156
+ "vertAlign": 'top',
157
+ "color": 'rgba(171, 71, 188, 0.7)',
158
+ "text": 'Volume',
159
+ }
 
 
 
 
 
 
 
 
160
  },
161
+ {
162
+ "width": 600,
163
+ "height": 200,
164
+ "layout": {
165
+ "background": {
166
+ "type": "solid",
167
+ "color": 'white'
168
+ },
169
+ "textColor": "black"
170
+ },
171
+ "timeScale": {
172
+ "visible": False,
173
+ },
174
+ "watermark": {
175
+ "visible": True,
176
+ "fontSize": 18,
177
+ "horzAlign": 'left',
178
+ "vertAlign": 'center',
179
+ "color": 'rgba(171, 71, 188, 0.7)',
180
+ "text": 'MACD',
181
  }
182
  }
183
+ ]
184
+
185
+ seriesCandlestickChart = [
186
+ {
187
+ "type": 'Candlestick',
188
+ "data": candles,
189
  "options": {
190
+ "upColor": COLOR_BULL,
191
+ "downColor": COLOR_BEAR,
192
+ "borderVisible": False,
193
+ "wickUpColor": COLOR_BULL,
194
+ "wickDownColor": COLOR_BEAR
195
  }
196
+ }
197
+ ]
198
 
199
+ seriesVolumeChart = [
200
+ {
201
+ "type": 'Histogram',
202
+ "data": volume,
203
+ "options": {
204
+ "priceFormat": {
205
+ "type": 'volume',
206
+ },
207
+ "priceScaleId": "" # set as an overlay setting,
208
+ },
209
+ "priceScale": {
210
+ "scaleMargins": {
211
+ "top": 0,
212
+ "bottom": 0,
213
+ },
214
+ "alignLabels": False
215
+ }
216
+ }
217
+ ]
218
 
219
  renderLightweightCharts([
220
  {
221
+ "chart": chartMultipaneOptions[0],
222
  "series": seriesCandlestickChart
223
  },
224
  {
225
+ "chart": chartMultipaneOptions[1],
226
+ "series": seriesVolumeChart
227
+ }
228
  ], 'multipane')
229
+ # import streamlit as st
230
+ # from streamlit_lightweight_charts import renderLightweightCharts
231
+
232
+ # chartOptions = [{
233
+ # "width":800,
234
+ # "height":400,
235
+ # "rightPriceScale": {
236
+ # "scaleMargins": {
237
+ # "top": 0.2,
238
+ # "bottom": 0.25,
239
+ # },
240
+ # "borderVisible": False,
241
+ # },
242
+ # "overlayPriceScales": {
243
+ # "scaleMargins": {
244
+ # "top": 0.7,
245
+ # "bottom": 0,
246
+ # }
247
+ # },
248
+ # "layout": {
249
+ # "textColor": 'white',
250
+ # "background": {
251
+ # "type": 'solid',
252
+ # "color": 'black'
253
+ # },
254
+ # },
255
+ # "grid": {
256
+ # "vertLines": {
257
+ # "color": "rgba(197, 203, 206, 0)"
258
+ # },
259
+ # "horzLines": {
260
+ # "color": "rgba(197, 203, 206, 0)"
261
+ # }
262
+ # }
263
+ # },
264
+ # {
265
+ # "width":800,
266
+ # "height":125,
267
+ # "layout": {
268
+ # "textColor": 'white',
269
+ # "background": {
270
+ # "type": 'solid',
271
+ # "color": 'black'
272
+ # },
273
+ # },
274
+ # "grid": {
275
+ # "vertLines": {
276
+ # "color": "rgba(197, 203, 206, 0)"
277
+ # },
278
+ # "horzLines": {
279
+ # "color": "rgba(197, 203, 206, 0)"
280
+ # }
281
+ # },
282
+ # },]
283
+
284
+ # seriesCandlestickChart = [{
285
+
286
+ # "type": 'Candlestick',
287
+ # "data": [
288
+ # {"open": open,
289
+ # "high": high,
290
+ # "low": low,
291
+ # "close": close,
292
+ # "time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
293
+ # ],
294
+ # "options": {
295
+ # "upColor": '#3399ff',
296
+ # "downColor": '#ff5f5f',
297
+ # "borderVisible": False,
298
+ # "wickUpColor": '#3399ff',
299
+ # "wickDownColor": '#ff5f5f',
300
+ # "priceScaleVisible": True
301
+ # },
302
+ # "priceScale": {
303
+ # "scaleMargins": {
304
+ # "top": 0.7,
305
+ # "bottom": 0,
306
+ # }
307
+ # }
308
+ # },
309
+ # {
310
+ # "type": 'Line',
311
+ # "data": [{"value": value, "time":dt.timestamp()} for value, dt in zip(levels['H1'], levels['FirstBar'])],
312
+ # "options": {
313
+ # "color": 'blue',
314
+ # "lineWidth": 1
315
+ # }
316
+ # }]
317
+
318
+ # seriesPredictions = [{
319
+ # "type": 'Histogram',
320
+ # "data": [
321
+ # { "value": pred, "time": dt.timestamp(), "color":color } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
322
+ # ],
323
+ # "options": { "color": '#26a69a' }
324
+ # }]
325
+
326
+ # renderLightweightCharts([
327
+ # {
328
+ # "chart": chartOptions[0],
329
+ # "series": seriesCandlestickChart
330
+ # },
331
+ # {
332
+ # "chart": chartOptions[1],
333
+ # "series": seriesPredictions
334
+ # },
335
+ # ], 'multipane')
336
 
337
  # Important levels
338
  df_levels = pd.DataFrame(levels[['H2','H1','Open','L1','L2']].iloc[-1]).round(2)
data_check.ipynb ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import numpy as np\n",
10
+ "import warnings\n",
11
+ "with warnings.catch_warnings():\n",
12
+ " warnings.simplefilter(\"ignore\")\n",
13
+ " warnings.simplefilter(action='ignore', category=FutureWarning)\n",
14
+ "\n",
15
+ "import pandas as pd\n",
16
+ "from getDailyData import get_daily\n",
17
+ "from sklearn.model_selection import TimeSeriesSplit\n",
18
+ "from sklearn.metrics import mean_absolute_error\n",
19
+ "from sklearn.linear_model import LinearRegression # Example model\n",
20
+ "from sklearn.pipeline import Pipeline\n",
21
+ "from sklearn.compose import ColumnTransformer\n",
22
+ "from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder\n",
23
+ "from lightgbm import LGBMRegressor\n",
24
+ "from tqdm import tqdm\n"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "import yfinance as yf\n",
34
+ "spx = yf.Ticker('^GSPC')\n",
35
+ "spx.history(start='2023-11-20', interval='1d')"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "import datetime\n",
45
+ "from datetime import time, timedelta\n",
46
+ "from tqdm import tqdm\n",
47
+ "\n",
48
+ "now = datetime.datetime.now()\n",
49
+ "df_consolidated = pd.DataFrame()\n",
50
+ "results = {}\n",
51
+ "coefs = {}\n",
52
+ "\n",
53
+ "morning_start = datetime.datetime.combine(now.date(), time(6, 30))\n",
54
+ "delta = now - morning_start\n",
55
+ "print(delta)\n",
56
+ "# candle = 1 #max(0,min((delta.total_seconds() / 60 / 30) // 1, 12))\n",
57
+ "# candles = np.arange(1,13)\n",
58
+ "candles = np.arange(1,2)\n",
59
+ "for candle in tqdm(candles):\n",
60
+ " print(f'running for {str(candle)}')\n",
61
+ " data, df_final, final_row = get_daily(mode='intra', periods_30m=candle)\n",
62
+ "\n",
63
+ " df_new = data[['Open','High','Low','Close','Close30','Close_VIX30','Close_VIX','Close_VVIX30','Close_VVIX']].copy()\n",
64
+ " df_new['PrevClose'] = df_new['Close'].shift(1)\n",
65
+ " df_new['CurrentGap'] = (df_new['Open'] / df_new['PrevClose']) - 1\n",
66
+ " df_new['ClosePctIntra'] = (df_new['Close30'] / df_new['Close'].shift(1)) - 1\n",
67
+ " df_new['ClosePctOpenIntra'] = (df_new['Close30'] / df_new['Open']) - 1\n",
68
+ " df_new['ClosePctVIXIntra'] = (df_new['Close_VIX30'] / df_new['Close_VIX'].shift(1)) - 1\n",
69
+ " df_new['ClosePctVVIXIntra'] = (df_new['Close_VVIX30'] / df_new['Close_VVIX'].shift(1)) - 1\n",
70
+ " df_new['EMA8'] = df_new['Close'].ewm(8).mean()\n",
71
+ " df_new['EMA8'] = df_new['EMA8'].shift(1)\n",
72
+ " df_new['EMA8Intra'] = df_new['Close30'] > df_new['EMA8']\n",
73
+ "\n",
74
+ " # Target will be the day's close\n",
75
+ " df_new['ClosePct'] = (df_new['Close'] / df_new['Close'].shift(1)) - 1\n",
76
+ "\n",
77
+ " # Column to determine what percentile the current intra performance looks like\n",
78
+ " intra_rank = []\n",
79
+ " for i, pct in tqdm(enumerate(df_new['ClosePctIntra'])):\n",
80
+ " try:\n",
81
+ " historical = df_new['ClosePctIntra'].iloc[:i]\n",
82
+ " current = df_new['ClosePctIntra'].iloc[i]\n",
83
+ " perc = len(historical[historical > current]) / len(historical)\n",
84
+ " except:\n",
85
+ " perc = None\n",
86
+ " intra_rank.append(perc)\n",
87
+ "\n",
88
+ " df_new['IntraPercentile'] = intra_rank\n",
89
+ "\n",
90
+ " # Column to determine what percentile the daily performance looks like\n",
91
+ " daily_rank = []\n",
92
+ " for i, pct in tqdm(enumerate(df_new['ClosePct'])):\n",
93
+ " try:\n",
94
+ " historical = df_new['ClosePct'].iloc[:i]\n",
95
+ " current = df_new['ClosePct'].iloc[i]\n",
96
+ " perc = len(historical[historical > current]) / len(historical)\n",
97
+ " except:\n",
98
+ " perc = None\n",
99
+ " daily_rank.append(perc)\n",
100
+ "\n",
101
+ " df_new['ClosePctPercentile'] = daily_rank\n",
102
+ "\n",
103
+ " # Let's do n-5 to start just for closes\n",
104
+ " lags = np.arange(1,6)\n",
105
+ "\n",
106
+ " for lag in lags:\n",
107
+ " df_new[f'ClosePct_n{str(lag)}'] = df_new['ClosePct'].shift(lag)\n",
108
+ " # df_new[f'ClosePctPercentile_n{str(lag)}'] = df_new['ClosePctPercentile'].shift(lag)\n",
109
+ "\n",
110
+ "\n",
111
+ " df_feats = df_new[[c for c in df_new.columns if 'ClosePct' in c or 'Intra' in c or 'Gap' in c]]\n",
112
+ "\n",
113
+ " df_final = df_feats.dropna()\n",
114
+ "\n",
115
+ " X = df_final[['ClosePctIntra']] # Feature dataset\n",
116
+ " y = df_final['ClosePct'] # Target dataset\n",
117
+ "\n",
118
+ " # model = LGBMRegressor(random_state=42, n_estimators=10, verbose=-1)\n",
119
+ " # model = LinearRegression()\n",
120
+ " # Define the column transformer for handling numeric and categorical features\n",
121
+ " \n",
122
+ "\n",
123
+ " # Fit the pipeline on the training data\n",
124
+ " # pipeline.fit(X_train, y_train)\n",
125
+ "\n",
126
+ " tscv = TimeSeriesSplit(n_splits=len(df_final)-1, max_train_size=None, test_size=1)\n",
127
+ "\n",
128
+ " mae_scores = []\n",
129
+ " overall_results = []\n",
130
+ "\n",
131
+ " for train_index, test_index in tscv.split(X):\n",
132
+ " \n",
133
+ " X_train = X.iloc[train_index]\n",
134
+ " X_test = X.iloc[test_index]\n",
135
+ " y_train = y.iloc[train_index]\n",
136
+ " y_test = y.iloc[test_index]\n",
137
+ " \n",
138
+ " # Select features\n",
139
+ " categorical_features = X_train.select_dtypes(include='object').columns\n",
140
+ " numeric_features = X_train.drop(columns=[c for c in X_train.columns if 'Percentile' in c]).select_dtypes(include='number').columns\n",
141
+ "\n",
142
+ " # Transformers\n",
143
+ " numeric_transformer = RobustScaler() # Example: StandardScaler for numeric features\n",
144
+ " categorical_transformer = OneHotEncoder() # Example: OneHotEncoder for categorical features\n",
145
+ "\n",
146
+ " # Define the pipeline steps\n",
147
+ " preprocessor = ColumnTransformer(\n",
148
+ " transformers=[\n",
149
+ " ('numeric', numeric_transformer, numeric_features), # numeric_features is a list of numeric feature column names\n",
150
+ " ('categorical', categorical_transformer, categorical_features) # categorical_features is a list of categorical feature column names\n",
151
+ " ])\n",
152
+ "\n",
153
+ " # Create the pipeline\n",
154
+ " pipeline = Pipeline(steps=[\n",
155
+ " ('preprocessor', preprocessor),\n",
156
+ " ('model', LinearRegression())\n",
157
+ " ])\n",
158
+ " \n",
159
+ " # Fit the model\n",
160
+ " pipeline.fit(X_train, y_train)\n",
161
+ "\n",
162
+ " # Predict\n",
163
+ " y_pred = pipeline.predict(X_test)\n",
164
+ "\n",
165
+ " # Calculate metrics\n",
166
+ " # mae_scores.append(mean_absolute_error(y_test, y_pred))\n",
167
+ " result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': y_pred}, index=y_test.index)\n",
168
+ " overall_results.append(result_df)\n",
169
+ "\n",
170
+ " df_results = pd.concat(overall_results)\n",
171
+ "\n",
172
+ " uppers = []\n",
173
+ " lowers = []\n",
174
+ " alpha = 0.05\n",
175
+ " for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):\n",
176
+ " try:\n",
177
+ " \n",
178
+ " df_q = df_results.iloc[:i]\n",
179
+ " pred = df_results['Predicted'].iloc[-1]\n",
180
+ " errors = df_q['IsTrue'] - df_q['Predicted']\n",
181
+ " positive_errors = errors[errors >= 0]\n",
182
+ " negative_errors = errors[errors < 0]\n",
183
+ "\n",
184
+ " # Calculate bounds\n",
185
+ " upper_bound = pred + np.quantile(positive_errors, 1 - alpha)\n",
186
+ " lower_bound = pred + np.quantile(negative_errors, alpha)\n",
187
+ " \n",
188
+ " except:\n",
189
+ " upper_bound = None\n",
190
+ " lower_bound = None\n",
191
+ "\n",
192
+ " uppers.append(upper_bound)\n",
193
+ " lowers.append(lower_bound)\n",
194
+ "\n",
195
+ " df_results['Upper'] = uppers\n",
196
+ " df_results['Lower'] = lowers\n",
197
+ "\n",
198
+ " df_results = df_results.merge(data[['PrevClose']],left_index=True, right_index=True)\n",
199
+ " df_results['Pred'] = df_results['PrevClose'] * (1 + df_results['Predicted'])\n",
200
+ " df_results['Actual'] = df_results['PrevClose'] * (1 + df_results['IsTrue'])\n",
201
+ " df_results['Up'] = df_results['PrevClose'] * (1 + df_results['Upper'])\n",
202
+ " df_results['Down'] = df_results['PrevClose'] * (1 + df_results['Lower'])\n",
203
+ "\n",
204
+ " results[f'{str(int(candle))}'] = df_results\n",
205
+ "\n",
206
+ " # Average metrics across folds\n",
207
+ " average_mae = mean_absolute_error(df_results['IsTrue'], df_results['Predicted'])\n",
208
+ " # sorted_features = sorted([(feat, coef) for feat, coef in zip(model.feature_name_, model.feature_importances_)], key=lambda x: abs(x[1]), reverse=True)\n",
209
+ " sorted_features = sorted([(feat, coef) for feat, coef in zip(pipeline.feature_names_in_, pipeline.named_steps.model.coef_)], key=lambda x: abs(x[1]), reverse=True)\n",
210
+ "\n",
211
+ " coefs[f'{str(int(candle))}'] = pd.DataFrame(sorted_features, columns=['Feature','Coefficient'])\n",
212
+ "\n",
213
+ " df_consolidated.loc[int(candle), 'MAE'] = average_mae"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": null,
219
+ "metadata": {},
220
+ "outputs": [],
221
+ "source": [
222
+ "pipeline.named_steps['model'].coef_"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": null,
228
+ "metadata": {},
229
+ "outputs": [],
230
+ "source": [
231
+ "df_f = pd.concat(coefs)"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": null,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": [
240
+ "df_consolidated"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": null,
246
+ "metadata": {},
247
+ "outputs": [],
248
+ "source": [
249
+ "results[f'{str(candle)}'].loc['2023-10-01':, ['Pred','Actual','Up','Down']].plot();"
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": null,
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "coefs[f'{str(candle)}']"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": 3,
264
+ "metadata": {},
265
+ "outputs": [
266
+ {
267
+ "name": "stderr",
268
+ "output_type": "stream",
269
+ "text": [
270
+ "d:\\Projects\\gamedayspx_lambda\\getDailyData.py:243: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
271
+ " return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()\n",
272
+ "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 1598.36it/s]\n",
273
+ "d:\\Projects\\gamedayspx_lambda\\model_intra_v2.py:11: SettingWithCopyWarning: \n",
274
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
275
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
276
+ "\n",
277
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
278
+ " df[target_column] = df[target_column].astype(bool)\n",
279
+ "d:\\Projects\\gamedayspx_lambda\\.venv\\lib\\site-packages\\sklearn\\base.py:465: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
280
+ " warnings.warn(\n",
281
+ "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_10000\\2718014135.py:38: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
282
+ " return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()\n"
283
+ ]
284
+ }
285
+ ],
286
+ "source": [
287
+ "from getDailyData import get_daily\n",
288
+ "from model_intra_v2 import walk_forward_validation\n",
289
+ "from model_day_v2 import walk_forward_validation_seq as walk_forward_validation_daily\n",
290
+ "from model_regr_v2 import walk_forward_validation as walk_forward_validation_regr\n",
291
+ "from model_regr_v2 import calc_upper_lower\n",
292
+ "import pandas as pd\n",
293
+ "import json\n",
294
+ "from dbConn import connection, engine, insert_dataframe_to_sql\n",
295
+ "import numpy as np\n",
296
+ "from datetime import time, timedelta\n",
297
+ "import datetime\n",
298
+ "from pandas.tseries.offsets import BDay\n",
299
+ "import holidays\n",
300
+ "from dotenv import load_dotenv\n",
301
+ "load_dotenv()\n",
302
+ "\n",
303
+ "periods_30m = 1\n",
304
+ "\n",
305
+ "if periods_30m > 0:\n",
306
+ " data, df_final, final_row = get_daily(mode='intra', periods_30m=periods_30m)\n",
307
+ " # Regression model\n",
308
+ " res, _ = walk_forward_validation(df_final.drop(columns=['Target']).dropna(), 'Target_clf', 1, mode='single')\n",
309
+ " regr_res, _ = walk_forward_validation_regr(df_final[['CurrentClose30toClose','ClosePct']].dropna(), 'ClosePct', 1, mode='single')\n",
310
+ " df_regr_results = pd.read_sql_query(f'select * from reg_results where ModelNum = {str(periods_30m)}', con = engine)\n",
311
+ " regr_pct = regr_res['Predicted'].iloc[-1]\n",
312
+ " upper, lower = calc_upper_lower(regr_pct, df_regr_results, alpha=0.05)\n",
313
+ "\n",
314
+ "elif periods_30m == 0:\n",
315
+ " data, df_final, final_row = get_daily()\n",
316
+ " res, _, _ = walk_forward_validation_daily(df_final.dropna(), 'Target_clf', 'Target', 200, 1)\n",
317
+ "\n",
318
+ "# Get results, run calibration and pvalue \n",
319
+ "\n",
320
+ "df_results = pd.read_sql_query(f'select * from results where ModelNum = {str(periods_30m)}', con = engine)\n",
321
+ "\n",
322
+ "# Calibrate Probabilities\n",
323
+ "def get_quantiles(df, col_name, q):\n",
324
+ " return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()\n",
325
+ "\n",
326
+ "pct = res['Predicted'].iloc[-1]\n",
327
+ "\n",
328
+ "df_q = get_quantiles(df_results, 'Predicted', 10)\n",
329
+ "for q in df_q.index:\n",
330
+ " if q.left <= pct <= q.right:\n",
331
+ " p = df_q[q]\n",
332
+ "\n",
333
+ "calib_scores = np.abs(df_results['Predicted'].iloc[:-1] - 0.5)\n",
334
+ "score = abs(pct - 0.5)\n",
335
+ "pv = np.mean(calib_scores >= score)\n",
336
+ "asof = datetime.datetime.combine(data.index[-1], time(9,30)) + (periods_30m * timedelta(minutes=30)) \n",
337
+ "\n",
338
+ "blob = {\n",
339
+ " 'Datetime': str(res.index[-1]),\n",
340
+ " 'IsTrue':df_final['Target_clf'].iloc[-1],\n",
341
+ " 'Predicted': pct,\n",
342
+ " 'CalibPredicted': p,\n",
343
+ " 'Pvalue':pv,\n",
344
+ " 'ModelNum':periods_30m,\n",
345
+ " 'AsOf':str(asof)\n",
346
+ "}\n",
347
+ "\n",
348
+ "# Write to DB\n",
349
+ "df_write = pd.DataFrame.from_dict({k:[v] for k, v in blob.items()})\n"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "code",
354
+ "execution_count": 4,
355
+ "metadata": {},
356
+ "outputs": [
357
+ {
358
+ "data": {
359
+ "text/html": [
360
+ "<div>\n",
361
+ "<style scoped>\n",
362
+ " .dataframe tbody tr th:only-of-type {\n",
363
+ " vertical-align: middle;\n",
364
+ " }\n",
365
+ "\n",
366
+ " .dataframe tbody tr th {\n",
367
+ " vertical-align: top;\n",
368
+ " }\n",
369
+ "\n",
370
+ " .dataframe thead th {\n",
371
+ " text-align: right;\n",
372
+ " }\n",
373
+ "</style>\n",
374
+ "<table border=\"1\" class=\"dataframe\">\n",
375
+ " <thead>\n",
376
+ " <tr style=\"text-align: right;\">\n",
377
+ " <th></th>\n",
378
+ " <th>Datetime</th>\n",
379
+ " <th>IsTrue</th>\n",
380
+ " <th>Predicted</th>\n",
381
+ " <th>CalibPredicted</th>\n",
382
+ " <th>Pvalue</th>\n",
383
+ " <th>ModelNum</th>\n",
384
+ " <th>AsOf</th>\n",
385
+ " </tr>\n",
386
+ " </thead>\n",
387
+ " <tbody>\n",
388
+ " <tr>\n",
389
+ " <th>0</th>\n",
390
+ " <td>2023-11-22 00:00:00</td>\n",
391
+ " <td>True</td>\n",
392
+ " <td>0.712132</td>\n",
393
+ " <td>0.832636</td>\n",
394
+ " <td>0.404288</td>\n",
395
+ " <td>1</td>\n",
396
+ " <td>2023-11-24 10:00:00</td>\n",
397
+ " </tr>\n",
398
+ " </tbody>\n",
399
+ "</table>\n",
400
+ "</div>"
401
+ ],
402
+ "text/plain": [
403
+ " Datetime IsTrue Predicted CalibPredicted Pvalue ModelNum \\\n",
404
+ "0 2023-11-22 00:00:00 True 0.712132 0.832636 0.404288 1 \n",
405
+ "\n",
406
+ " AsOf \n",
407
+ "0 2023-11-24 10:00:00 "
408
+ ]
409
+ },
410
+ "execution_count": 4,
411
+ "metadata": {},
412
+ "output_type": "execute_result"
413
+ }
414
+ ],
415
+ "source": [
416
+ "df_write"
417
+ ]
418
+ },
419
+ {
420
+ "cell_type": "code",
421
+ "execution_count": null,
422
+ "metadata": {},
423
+ "outputs": [],
424
+ "source": [
425
+ "cursor = connection.cursor()\n",
426
+ "insert_dataframe_to_sql('results', df_write, cursor)"
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "code",
431
+ "execution_count": null,
432
+ "metadata": {},
433
+ "outputs": [],
434
+ "source": [
435
+ "\n",
436
+ "if periods_30m > 0:\n",
437
+ " regr_blob = {\n",
438
+ " 'Datetime': str(res.index[-1]),\n",
439
+ " 'IsTrue':df_final['ClosePct'].iloc[-1],\n",
440
+ " 'Predicted': regr_pct,\n",
441
+ " 'Upper': upper,\n",
442
+ " 'Lower':lower,\n",
443
+ " 'ModelNum':periods_30m,\n",
444
+ " 'AsOf':str(asof)\n",
445
+ " }\n",
446
+ " df_write_reg = pd.DataFrame.from_dict({k:[v] for k, v in regr_blob.items()})\n",
447
+ " insert_dataframe_to_sql('reg_results', df_write_reg, cursor)\n",
448
+ "\n",
449
+ "cursor.close()\n",
450
+ "connection.close()\n"
451
+ ]
452
+ },
453
+ {
454
+ "cell_type": "code",
455
+ "execution_count": 2,
456
+ "metadata": {},
457
+ "outputs": [
458
+ {
459
+ "data": {
460
+ "text/plain": [
461
+ "{'Datetime': '2023-11-22 00:00:00',\n",
462
+ " 'IsTrue': 0.0005968736678840791,\n",
463
+ " 'Predicted': 0.00048111739459897327,\n",
464
+ " 'Upper': 0.02107334825815718,\n",
465
+ " 'Lower': -0.018127700802536933,\n",
466
+ " 'ModelNum': 1,\n",
467
+ " 'AsOf': '2023-11-24 10:00:00'}"
468
+ ]
469
+ },
470
+ "execution_count": 2,
471
+ "metadata": {},
472
+ "output_type": "execute_result"
473
+ }
474
+ ],
475
+ "source": [
476
+ "regr_blob"
477
+ ]
478
+ },
479
+ {
480
+ "cell_type": "code",
481
+ "execution_count": null,
482
+ "metadata": {},
483
+ "outputs": [],
484
+ "source": [
485
+ "regr_blob"
486
+ ]
487
+ }
488
+ ],
489
+ "metadata": {
490
+ "kernelspec": {
491
+ "display_name": ".venv",
492
+ "language": "python",
493
+ "name": "python3"
494
+ },
495
+ "language_info": {
496
+ "codemirror_mode": {
497
+ "name": "ipython",
498
+ "version": 3
499
+ },
500
+ "file_extension": ".py",
501
+ "mimetype": "text/x-python",
502
+ "name": "python",
503
+ "nbconvert_exporter": "python",
504
+ "pygments_lexer": "ipython3",
505
+ "version": "3.10.11"
506
+ }
507
+ },
508
+ "nbformat": 4,
509
+ "nbformat_minor": 2
510
+ }
uni_model.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import warnings
3
+ with warnings.catch_warnings():
4
+ warnings.simplefilter("ignore")
5
+ warnings.simplefilter(action='ignore', category=FutureWarning)
6
+
7
+ import pandas as pd
8
+ from sklearn.model_selection import TimeSeriesSplit
9
+ from sklearn.metrics import mean_absolute_error
10
+ from sklearn.linear_model import LinearRegression # Example model
11
+ from sklearn.pipeline import Pipeline
12
+ from sklearn.compose import ColumnTransformer
13
+ from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder
14
+
15
+ import datetime
16
+ from datetime import time, timedelta
17
+ from tqdm import tqdm
18
+
19
+ def prep_data(df):
20
+ morning_start = datetime.datetime.combine(now.date(), time(6, 30))
21
+ delta = now - morning_start
22
+ print(delta)
23
+ # candle = 1 #max(0,min((delta.total_seconds() / 60 / 30) // 1, 12))
24
+ # candles = np.arange(1,13)
25
+ candles = np.arange(1,2)
26
+ for candle in tqdm(candles):
27
+ print(f'running for {str(candle)}')
28
+ data, df_final, final_row = get_daily(mode='intra', periods_30m=candle)
29
+
30
+ df_new = data[['Open','High','Low','Close','Close30','Close_VIX30','Close_VIX','Close_VVIX30','Close_VVIX']].copy()
31
+ df_new['PrevClose'] = df_new['Close'].shift(1)
32
+ df_new['CurrentGap'] = (df_new['Open'] / df_new['PrevClose']) - 1
33
+ df_new['ClosePctIntra'] = (df_new['Close30'] / df_new['Close'].shift(1)) - 1
34
+ df_new['ClosePctOpenIntra'] = (df_new['Close30'] / df_new['Open']) - 1
35
+ df_new['ClosePctVIXIntra'] = (df_new['Close_VIX30'] / df_new['Close_VIX'].shift(1)) - 1
36
+ df_new['ClosePctVVIXIntra'] = (df_new['Close_VVIX30'] / df_new['Close_VVIX'].shift(1)) - 1
37
+ df_new['EMA8'] = df_new['Close'].ewm(8).mean()
38
+ df_new['EMA8'] = df_new['EMA8'].shift(1)
39
+ df_new['EMA8Intra'] = df_new['Close30'] > df_new['EMA8']
40
+
41
+ # Target will be the day's close
42
+ df_new['ClosePct'] = (df_new['Close'] / df_new['Close'].shift(1)) - 1
43
+
44
+ # Column to determine what percentile the current intra performance looks like
45
+ intra_rank = []
46
+ for i, pct in tqdm(enumerate(df_new['ClosePctIntra'])):
47
+ try:
48
+ historical = df_new['ClosePctIntra'].iloc[:i]
49
+ current = df_new['ClosePctIntra'].iloc[i]
50
+ perc = len(historical[historical > current]) / len(historical)
51
+ except:
52
+ perc = None
53
+ intra_rank.append(perc)
54
+
55
+ df_new['IntraPercentile'] = intra_rank
56
+
57
+ # Column to determine what percentile the daily performance looks like
58
+ daily_rank = []
59
+ for i, pct in tqdm(enumerate(df_new['ClosePct'])):
60
+ try:
61
+ historical = df_new['ClosePct'].iloc[:i]
62
+ current = df_new['ClosePct'].iloc[i]
63
+ perc = len(historical[historical > current]) / len(historical)
64
+ except:
65
+ perc = None
66
+ daily_rank.append(perc)
67
+
68
+ df_new['ClosePctPercentile'] = daily_rank
69
+
70
+ # Let's do n-5 to start just for closes
71
+ lags = np.arange(1,6)
72
+
73
+ for lag in lags:
74
+ df_new[f'ClosePct_n{str(lag)}'] = df_new['ClosePct'].shift(lag)
75
+ # df_new[f'ClosePctPercentile_n{str(lag)}'] = df_new['ClosePctPercentile'].shift(lag)
76
+
77
+
78
+ df_feats = df_new[[c for c in df_new.columns if 'ClosePct' in c or 'Intra' in c or 'Gap' in c]]
79
+
80
+ df_final = df_feats.dropna()
81
+
82
+ X = df_final[['ClosePctIntra']] # Feature dataset
83
+ y = df_final['ClosePct'] # Target dataset
84
+
85
+ # model = LGBMRegressor(random_state=42, n_estimators=10, verbose=-1)
86
+ # model = LinearRegression()
87
+ # Define the column transformer for handling numeric and categorical features
88
+
89
+
90
+ # Fit the pipeline on the training data
91
+ # pipeline.fit(X_train, y_train)
92
+
93
+ tscv = TimeSeriesSplit(n_splits=len(df_final)-1, max_train_size=None, test_size=1)
94
+
95
+ mae_scores = []
96
+ overall_results = []
97
+
98
+ for train_index, test_index in tscv.split(X):
99
+
100
+ X_train = X.iloc[train_index]
101
+ X_test = X.iloc[test_index]
102
+ y_train = y.iloc[train_index]
103
+ y_test = y.iloc[test_index]
104
+
105
+ # Select features
106
+ categorical_features = X_train.select_dtypes(include='object').columns
107
+ numeric_features = X_train.drop(columns=[c for c in X_train.columns if 'Percentile' in c]).select_dtypes(include='number').columns
108
+
109
+ # Transformers
110
+ numeric_transformer = RobustScaler() # Example: StandardScaler for numeric features
111
+ categorical_transformer = OneHotEncoder() # Example: OneHotEncoder for categorical features
112
+
113
+ # Define the pipeline steps
114
+ preprocessor = ColumnTransformer(
115
+ transformers=[
116
+ ('numeric', numeric_transformer, numeric_features), # numeric_features is a list of numeric feature column names
117
+ ('categorical', categorical_transformer, categorical_features) # categorical_features is a list of categorical feature column names
118
+ ])
119
+
120
+ # Create the pipeline
121
+ pipeline = Pipeline(steps=[
122
+ ('preprocessor', preprocessor),
123
+ ('model', LinearRegression())
124
+ ])
125
+
126
+ # Fit the model
127
+ pipeline.fit(X_train, y_train)
128
+
129
+ # Predict
130
+ y_pred = pipeline.predict(X_test)
131
+
132
+ # Calculate metrics
133
+ # mae_scores.append(mean_absolute_error(y_test, y_pred))
134
+ result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': y_pred}, index=y_test.index)
135
+ overall_results.append(result_df)
136
+
137
+ df_results = pd.concat(overall_results)
138
+
139
+ uppers = []
140
+ lowers = []
141
+ alpha = 0.05
142
+ for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
143
+ try:
144
+
145
+ df_q = df_results.iloc[:i]
146
+ pred = df_results['Predicted'].iloc[-1]
147
+ errors = df_q['IsTrue'] - df_q['Predicted']
148
+ positive_errors = errors[errors >= 0]
149
+ negative_errors = errors[errors < 0]
150
+
151
+ # Calculate bounds
152
+ upper_bound = pred + np.quantile(positive_errors, 1 - alpha)
153
+ lower_bound = pred + np.quantile(negative_errors, alpha)
154
+
155
+ except:
156
+ upper_bound = None
157
+ lower_bound = None
158
+
159
+ uppers.append(upper_bound)
160
+ lowers.append(lower_bound)
161
+
162
+ df_results['Upper'] = uppers
163
+ df_results['Lower'] = lowers
164
+
165
+ df_results = df_results.merge(data[['PrevClose']],left_index=True, right_index=True)
166
+ df_results['Pred'] = df_results['PrevClose'] * (1 + df_results['Predicted'])
167
+ df_results['Actual'] = df_results['PrevClose'] * (1 + df_results['IsTrue'])
168
+ df_results['Up'] = df_results['PrevClose'] * (1 + df_results['Upper'])
169
+ df_results['Down'] = df_results['PrevClose'] * (1 + df_results['Lower'])
170
+
171
+ results[f'{str(int(candle))}'] = df_results
172
+
173
+ # Average metrics across folds
174
+ average_mae = mean_absolute_error(df_results['IsTrue'], df_results['Predicted'])
175
+ # sorted_features = sorted([(feat, coef) for feat, coef in zip(model.feature_name_, model.feature_importances_)], key=lambda x: abs(x[1]), reverse=True)
176
+ sorted_features = sorted([(feat, coef) for feat, coef in zip(pipeline.feature_names_in_, pipeline.named_steps.model.coef_)], key=lambda x: abs(x[1]), reverse=True)
177
+
178
+ coefs[f'{str(int(candle))}'] = pd.DataFrame(sorted_features, columns=['Feature','Coefficient'])
179
+
180
+ df_consolidated.loc[int(candle), 'MAE'] = average_mae