Spaces:
Sleeping
Sleeping
updating charts
Browse files- .gitignore +3 -1
- app.py +252 -76
- data_check.ipynb +510 -0
- uni_model.py +180 -0
.gitignore
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
/.env
|
2 |
/.venv
|
3 |
/hss.pem ec2-user@ec2-18-1
|
4 |
-
/__pycache__
|
|
|
|
|
|
1 |
/.env
|
2 |
/.venv
|
3 |
/hss.pem ec2-user@ec2-18-1
|
4 |
+
/__pycache__
|
5 |
+
appOld.py
|
6 |
+
appOld2.py
|
app.py
CHANGED
@@ -56,107 +56,283 @@ levels = data_daily.loc[df1.index.date, ['H1','H2','L1','L2','Open']].drop_dupli
|
|
56 |
levels['FirstBar'] = dts
|
57 |
|
58 |
# Plot
|
|
|
59 |
import streamlit as st
|
60 |
from streamlit_lightweight_charts import renderLightweightCharts
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
},
|
70 |
-
"
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
},
|
79 |
-
|
80 |
-
"
|
81 |
-
|
|
|
|
|
|
|
|
|
82 |
},
|
83 |
-
|
84 |
-
"color": "rgba(197, 203, 206, 0)"
|
85 |
-
}
|
86 |
-
}
|
87 |
-
},
|
88 |
-
{
|
89 |
-
"width":800,
|
90 |
-
"height":125,
|
91 |
-
"layout": {
|
92 |
-
"textColor": 'white',
|
93 |
-
"background": {
|
94 |
-
"type": 'solid',
|
95 |
-
"color": 'black'
|
96 |
},
|
97 |
-
|
98 |
-
"grid": {
|
99 |
"vertLines": {
|
100 |
-
"color":
|
101 |
-
|
102 |
"horzLines": {
|
103 |
-
"color":
|
104 |
}
|
105 |
},
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
],
|
118 |
-
"options": {
|
119 |
-
"upColor": '#3399ff',
|
120 |
-
"downColor": '#ff5f5f',
|
121 |
-
"borderVisible": False,
|
122 |
-
"wickUpColor": '#3399ff',
|
123 |
-
"wickDownColor": '#ff5f5f',
|
124 |
-
"priceScaleVisible": True
|
125 |
},
|
126 |
-
|
127 |
-
"
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
}
|
131 |
}
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
136 |
"options": {
|
137 |
-
"
|
138 |
-
"
|
|
|
|
|
|
|
139 |
}
|
140 |
-
}
|
|
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
renderLightweightCharts([
|
151 |
{
|
152 |
-
"chart":
|
153 |
"series": seriesCandlestickChart
|
154 |
},
|
155 |
{
|
156 |
-
"chart":
|
157 |
-
"series":
|
158 |
-
}
|
159 |
], 'multipane')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
# Important levels
|
162 |
df_levels = pd.DataFrame(levels[['H2','H1','Open','L1','L2']].iloc[-1]).round(2)
|
|
|
56 |
levels['FirstBar'] = dts
|
57 |
|
58 |
# Plot
|
59 |
+
|
60 |
import streamlit as st
|
61 |
from streamlit_lightweight_charts import renderLightweightCharts
|
62 |
|
63 |
+
import json
|
64 |
+
import numpy as np
|
65 |
+
import yfinance as yf
|
66 |
+
import pandas as pd
|
67 |
+
|
68 |
+
COLOR_BULL = '#3399ff' # #26a69a
|
69 |
+
COLOR_BEAR = '#ff5f5f' # #ef5350
|
70 |
+
|
71 |
+
|
72 |
+
# Some data wrangling to match required format
|
73 |
+
df = df1.copy()
|
74 |
+
df['time'] = [dt.timestamp() for dt in df.index]
|
75 |
+
df = df[['time','Open','High','Low','Close','CalibPredicted','Color']]
|
76 |
+
df.columns = ['time','open','high','low','close','volume','color'] # rename columns
|
77 |
+
# df['color'] = np.where( df['open'] > df['close'], COLOR_BEAR, COLOR_BULL) # bull or bear
|
78 |
+
|
79 |
+
# export to JSON format
|
80 |
+
# candles = json.loads(df.to_json(orient = "records"))
|
81 |
+
candles = json.loads(json.dumps([
|
82 |
+
{"open": open,
|
83 |
+
"high": high,
|
84 |
+
"low": low,
|
85 |
+
"close": close,
|
86 |
+
"time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
|
87 |
+
], indent=2))
|
88 |
+
# volume = json.loads(df.rename(columns={"volume": "value",}).to_json(orient = "records"))
|
89 |
+
volume = json.loads(json.dumps([
|
90 |
+
{ "value": pred, "time": dt.timestamp(), "color":color } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
|
91 |
+
], indent=2))
|
92 |
+
|
93 |
+
chartMultipaneOptions = [
|
94 |
+
{
|
95 |
+
# "width": 600,
|
96 |
+
"height": 400,
|
97 |
+
"layout": {
|
98 |
+
"background": {
|
99 |
+
"type": "solid",
|
100 |
+
"color": 'transparent'
|
101 |
+
},
|
102 |
+
"textColor": "white"
|
103 |
},
|
104 |
+
"grid": {
|
105 |
+
"vertLines": {
|
106 |
+
"color": "rgba(197, 203, 206, 0.25)"
|
107 |
+
},
|
108 |
+
"horzLines": {
|
109 |
+
"color": "rgba(197, 203, 206, 0.25)"
|
110 |
+
}
|
111 |
+
},
|
112 |
+
"crosshair": {
|
113 |
+
"mode": 0
|
114 |
+
},
|
115 |
+
"priceScale": {
|
116 |
+
"borderColor": "rgba(197, 203, 206, 0.8)"
|
117 |
+
},
|
118 |
+
"timeScale": {
|
119 |
+
"borderColor": "rgba(197, 203, 206, 0.8)",
|
120 |
+
"barSpacing": 15
|
121 |
},
|
122 |
+
"watermark": {
|
123 |
+
"visible": True,
|
124 |
+
"fontSize": 48,
|
125 |
+
"horzAlign": 'center',
|
126 |
+
"vertAlign": 'center',
|
127 |
+
"color": 'rgba(171, 71, 188, 0.3)',
|
128 |
+
"text": 'AAPL - D1',
|
129 |
+
}
|
130 |
},
|
131 |
+
{
|
132 |
+
# "width": 600,
|
133 |
+
"height": 100,
|
134 |
+
"layout": {
|
135 |
+
"background": {
|
136 |
+
"type": 'solid',
|
137 |
+
"color": 'transparent'
|
138 |
},
|
139 |
+
"textColor": 'black',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
},
|
141 |
+
"grid": {
|
|
|
142 |
"vertLines": {
|
143 |
+
"color": 'rgba(42, 46, 57, 0)',
|
144 |
+
},
|
145 |
"horzLines": {
|
146 |
+
"color": 'rgba(42, 46, 57, 0.6)',
|
147 |
}
|
148 |
},
|
149 |
+
"timeScale": {
|
150 |
+
"visible": False,
|
151 |
+
},
|
152 |
+
"watermark": {
|
153 |
+
"visible": True,
|
154 |
+
"fontSize": 18,
|
155 |
+
"horzAlign": 'left',
|
156 |
+
"vertAlign": 'top',
|
157 |
+
"color": 'rgba(171, 71, 188, 0.7)',
|
158 |
+
"text": 'Volume',
|
159 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
},
|
161 |
+
{
|
162 |
+
"width": 600,
|
163 |
+
"height": 200,
|
164 |
+
"layout": {
|
165 |
+
"background": {
|
166 |
+
"type": "solid",
|
167 |
+
"color": 'white'
|
168 |
+
},
|
169 |
+
"textColor": "black"
|
170 |
+
},
|
171 |
+
"timeScale": {
|
172 |
+
"visible": False,
|
173 |
+
},
|
174 |
+
"watermark": {
|
175 |
+
"visible": True,
|
176 |
+
"fontSize": 18,
|
177 |
+
"horzAlign": 'left',
|
178 |
+
"vertAlign": 'center',
|
179 |
+
"color": 'rgba(171, 71, 188, 0.7)',
|
180 |
+
"text": 'MACD',
|
181 |
}
|
182 |
}
|
183 |
+
]
|
184 |
+
|
185 |
+
seriesCandlestickChart = [
|
186 |
+
{
|
187 |
+
"type": 'Candlestick',
|
188 |
+
"data": candles,
|
189 |
"options": {
|
190 |
+
"upColor": COLOR_BULL,
|
191 |
+
"downColor": COLOR_BEAR,
|
192 |
+
"borderVisible": False,
|
193 |
+
"wickUpColor": COLOR_BULL,
|
194 |
+
"wickDownColor": COLOR_BEAR
|
195 |
}
|
196 |
+
}
|
197 |
+
]
|
198 |
|
199 |
+
seriesVolumeChart = [
|
200 |
+
{
|
201 |
+
"type": 'Histogram',
|
202 |
+
"data": volume,
|
203 |
+
"options": {
|
204 |
+
"priceFormat": {
|
205 |
+
"type": 'volume',
|
206 |
+
},
|
207 |
+
"priceScaleId": "" # set as an overlay setting,
|
208 |
+
},
|
209 |
+
"priceScale": {
|
210 |
+
"scaleMargins": {
|
211 |
+
"top": 0,
|
212 |
+
"bottom": 0,
|
213 |
+
},
|
214 |
+
"alignLabels": False
|
215 |
+
}
|
216 |
+
}
|
217 |
+
]
|
218 |
|
219 |
renderLightweightCharts([
|
220 |
{
|
221 |
+
"chart": chartMultipaneOptions[0],
|
222 |
"series": seriesCandlestickChart
|
223 |
},
|
224 |
{
|
225 |
+
"chart": chartMultipaneOptions[1],
|
226 |
+
"series": seriesVolumeChart
|
227 |
+
}
|
228 |
], 'multipane')
|
229 |
+
# import streamlit as st
|
230 |
+
# from streamlit_lightweight_charts import renderLightweightCharts
|
231 |
+
|
232 |
+
# chartOptions = [{
|
233 |
+
# "width":800,
|
234 |
+
# "height":400,
|
235 |
+
# "rightPriceScale": {
|
236 |
+
# "scaleMargins": {
|
237 |
+
# "top": 0.2,
|
238 |
+
# "bottom": 0.25,
|
239 |
+
# },
|
240 |
+
# "borderVisible": False,
|
241 |
+
# },
|
242 |
+
# "overlayPriceScales": {
|
243 |
+
# "scaleMargins": {
|
244 |
+
# "top": 0.7,
|
245 |
+
# "bottom": 0,
|
246 |
+
# }
|
247 |
+
# },
|
248 |
+
# "layout": {
|
249 |
+
# "textColor": 'white',
|
250 |
+
# "background": {
|
251 |
+
# "type": 'solid',
|
252 |
+
# "color": 'black'
|
253 |
+
# },
|
254 |
+
# },
|
255 |
+
# "grid": {
|
256 |
+
# "vertLines": {
|
257 |
+
# "color": "rgba(197, 203, 206, 0)"
|
258 |
+
# },
|
259 |
+
# "horzLines": {
|
260 |
+
# "color": "rgba(197, 203, 206, 0)"
|
261 |
+
# }
|
262 |
+
# }
|
263 |
+
# },
|
264 |
+
# {
|
265 |
+
# "width":800,
|
266 |
+
# "height":125,
|
267 |
+
# "layout": {
|
268 |
+
# "textColor": 'white',
|
269 |
+
# "background": {
|
270 |
+
# "type": 'solid',
|
271 |
+
# "color": 'black'
|
272 |
+
# },
|
273 |
+
# },
|
274 |
+
# "grid": {
|
275 |
+
# "vertLines": {
|
276 |
+
# "color": "rgba(197, 203, 206, 0)"
|
277 |
+
# },
|
278 |
+
# "horzLines": {
|
279 |
+
# "color": "rgba(197, 203, 206, 0)"
|
280 |
+
# }
|
281 |
+
# },
|
282 |
+
# },]
|
283 |
+
|
284 |
+
# seriesCandlestickChart = [{
|
285 |
+
|
286 |
+
# "type": 'Candlestick',
|
287 |
+
# "data": [
|
288 |
+
# {"open": open,
|
289 |
+
# "high": high,
|
290 |
+
# "low": low,
|
291 |
+
# "close": close,
|
292 |
+
# "time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
|
293 |
+
# ],
|
294 |
+
# "options": {
|
295 |
+
# "upColor": '#3399ff',
|
296 |
+
# "downColor": '#ff5f5f',
|
297 |
+
# "borderVisible": False,
|
298 |
+
# "wickUpColor": '#3399ff',
|
299 |
+
# "wickDownColor": '#ff5f5f',
|
300 |
+
# "priceScaleVisible": True
|
301 |
+
# },
|
302 |
+
# "priceScale": {
|
303 |
+
# "scaleMargins": {
|
304 |
+
# "top": 0.7,
|
305 |
+
# "bottom": 0,
|
306 |
+
# }
|
307 |
+
# }
|
308 |
+
# },
|
309 |
+
# {
|
310 |
+
# "type": 'Line',
|
311 |
+
# "data": [{"value": value, "time":dt.timestamp()} for value, dt in zip(levels['H1'], levels['FirstBar'])],
|
312 |
+
# "options": {
|
313 |
+
# "color": 'blue',
|
314 |
+
# "lineWidth": 1
|
315 |
+
# }
|
316 |
+
# }]
|
317 |
+
|
318 |
+
# seriesPredictions = [{
|
319 |
+
# "type": 'Histogram',
|
320 |
+
# "data": [
|
321 |
+
# { "value": pred, "time": dt.timestamp(), "color":color } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
|
322 |
+
# ],
|
323 |
+
# "options": { "color": '#26a69a' }
|
324 |
+
# }]
|
325 |
+
|
326 |
+
# renderLightweightCharts([
|
327 |
+
# {
|
328 |
+
# "chart": chartOptions[0],
|
329 |
+
# "series": seriesCandlestickChart
|
330 |
+
# },
|
331 |
+
# {
|
332 |
+
# "chart": chartOptions[1],
|
333 |
+
# "series": seriesPredictions
|
334 |
+
# },
|
335 |
+
# ], 'multipane')
|
336 |
|
337 |
# Important levels
|
338 |
df_levels = pd.DataFrame(levels[['H2','H1','Open','L1','L2']].iloc[-1]).round(2)
|
data_check.ipynb
ADDED
@@ -0,0 +1,510 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import numpy as np\n",
|
10 |
+
"import warnings\n",
|
11 |
+
"with warnings.catch_warnings():\n",
|
12 |
+
" warnings.simplefilter(\"ignore\")\n",
|
13 |
+
" warnings.simplefilter(action='ignore', category=FutureWarning)\n",
|
14 |
+
"\n",
|
15 |
+
"import pandas as pd\n",
|
16 |
+
"from getDailyData import get_daily\n",
|
17 |
+
"from sklearn.model_selection import TimeSeriesSplit\n",
|
18 |
+
"from sklearn.metrics import mean_absolute_error\n",
|
19 |
+
"from sklearn.linear_model import LinearRegression # Example model\n",
|
20 |
+
"from sklearn.pipeline import Pipeline\n",
|
21 |
+
"from sklearn.compose import ColumnTransformer\n",
|
22 |
+
"from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder\n",
|
23 |
+
"from lightgbm import LGBMRegressor\n",
|
24 |
+
"from tqdm import tqdm\n"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": null,
|
30 |
+
"metadata": {},
|
31 |
+
"outputs": [],
|
32 |
+
"source": [
|
33 |
+
"import yfinance as yf\n",
|
34 |
+
"spx = yf.Ticker('^GSPC')\n",
|
35 |
+
"spx.history(start='2023-11-20', interval='1d')"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": null,
|
41 |
+
"metadata": {},
|
42 |
+
"outputs": [],
|
43 |
+
"source": [
|
44 |
+
"import datetime\n",
|
45 |
+
"from datetime import time, timedelta\n",
|
46 |
+
"from tqdm import tqdm\n",
|
47 |
+
"\n",
|
48 |
+
"now = datetime.datetime.now()\n",
|
49 |
+
"df_consolidated = pd.DataFrame()\n",
|
50 |
+
"results = {}\n",
|
51 |
+
"coefs = {}\n",
|
52 |
+
"\n",
|
53 |
+
"morning_start = datetime.datetime.combine(now.date(), time(6, 30))\n",
|
54 |
+
"delta = now - morning_start\n",
|
55 |
+
"print(delta)\n",
|
56 |
+
"# candle = 1 #max(0,min((delta.total_seconds() / 60 / 30) // 1, 12))\n",
|
57 |
+
"# candles = np.arange(1,13)\n",
|
58 |
+
"candles = np.arange(1,2)\n",
|
59 |
+
"for candle in tqdm(candles):\n",
|
60 |
+
" print(f'running for {str(candle)}')\n",
|
61 |
+
" data, df_final, final_row = get_daily(mode='intra', periods_30m=candle)\n",
|
62 |
+
"\n",
|
63 |
+
" df_new = data[['Open','High','Low','Close','Close30','Close_VIX30','Close_VIX','Close_VVIX30','Close_VVIX']].copy()\n",
|
64 |
+
" df_new['PrevClose'] = df_new['Close'].shift(1)\n",
|
65 |
+
" df_new['CurrentGap'] = (df_new['Open'] / df_new['PrevClose']) - 1\n",
|
66 |
+
" df_new['ClosePctIntra'] = (df_new['Close30'] / df_new['Close'].shift(1)) - 1\n",
|
67 |
+
" df_new['ClosePctOpenIntra'] = (df_new['Close30'] / df_new['Open']) - 1\n",
|
68 |
+
" df_new['ClosePctVIXIntra'] = (df_new['Close_VIX30'] / df_new['Close_VIX'].shift(1)) - 1\n",
|
69 |
+
" df_new['ClosePctVVIXIntra'] = (df_new['Close_VVIX30'] / df_new['Close_VVIX'].shift(1)) - 1\n",
|
70 |
+
" df_new['EMA8'] = df_new['Close'].ewm(8).mean()\n",
|
71 |
+
" df_new['EMA8'] = df_new['EMA8'].shift(1)\n",
|
72 |
+
" df_new['EMA8Intra'] = df_new['Close30'] > df_new['EMA8']\n",
|
73 |
+
"\n",
|
74 |
+
" # Target will be the day's close\n",
|
75 |
+
" df_new['ClosePct'] = (df_new['Close'] / df_new['Close'].shift(1)) - 1\n",
|
76 |
+
"\n",
|
77 |
+
" # Column to determine what percentile the current intra performance looks like\n",
|
78 |
+
" intra_rank = []\n",
|
79 |
+
" for i, pct in tqdm(enumerate(df_new['ClosePctIntra'])):\n",
|
80 |
+
" try:\n",
|
81 |
+
" historical = df_new['ClosePctIntra'].iloc[:i]\n",
|
82 |
+
" current = df_new['ClosePctIntra'].iloc[i]\n",
|
83 |
+
" perc = len(historical[historical > current]) / len(historical)\n",
|
84 |
+
" except:\n",
|
85 |
+
" perc = None\n",
|
86 |
+
" intra_rank.append(perc)\n",
|
87 |
+
"\n",
|
88 |
+
" df_new['IntraPercentile'] = intra_rank\n",
|
89 |
+
"\n",
|
90 |
+
" # Column to determine what percentile the daily performance looks like\n",
|
91 |
+
" daily_rank = []\n",
|
92 |
+
" for i, pct in tqdm(enumerate(df_new['ClosePct'])):\n",
|
93 |
+
" try:\n",
|
94 |
+
" historical = df_new['ClosePct'].iloc[:i]\n",
|
95 |
+
" current = df_new['ClosePct'].iloc[i]\n",
|
96 |
+
" perc = len(historical[historical > current]) / len(historical)\n",
|
97 |
+
" except:\n",
|
98 |
+
" perc = None\n",
|
99 |
+
" daily_rank.append(perc)\n",
|
100 |
+
"\n",
|
101 |
+
" df_new['ClosePctPercentile'] = daily_rank\n",
|
102 |
+
"\n",
|
103 |
+
" # Let's do n-5 to start just for closes\n",
|
104 |
+
" lags = np.arange(1,6)\n",
|
105 |
+
"\n",
|
106 |
+
" for lag in lags:\n",
|
107 |
+
" df_new[f'ClosePct_n{str(lag)}'] = df_new['ClosePct'].shift(lag)\n",
|
108 |
+
" # df_new[f'ClosePctPercentile_n{str(lag)}'] = df_new['ClosePctPercentile'].shift(lag)\n",
|
109 |
+
"\n",
|
110 |
+
"\n",
|
111 |
+
" df_feats = df_new[[c for c in df_new.columns if 'ClosePct' in c or 'Intra' in c or 'Gap' in c]]\n",
|
112 |
+
"\n",
|
113 |
+
" df_final = df_feats.dropna()\n",
|
114 |
+
"\n",
|
115 |
+
" X = df_final[['ClosePctIntra']] # Feature dataset\n",
|
116 |
+
" y = df_final['ClosePct'] # Target dataset\n",
|
117 |
+
"\n",
|
118 |
+
" # model = LGBMRegressor(random_state=42, n_estimators=10, verbose=-1)\n",
|
119 |
+
" # model = LinearRegression()\n",
|
120 |
+
" # Define the column transformer for handling numeric and categorical features\n",
|
121 |
+
" \n",
|
122 |
+
"\n",
|
123 |
+
" # Fit the pipeline on the training data\n",
|
124 |
+
" # pipeline.fit(X_train, y_train)\n",
|
125 |
+
"\n",
|
126 |
+
" tscv = TimeSeriesSplit(n_splits=len(df_final)-1, max_train_size=None, test_size=1)\n",
|
127 |
+
"\n",
|
128 |
+
" mae_scores = []\n",
|
129 |
+
" overall_results = []\n",
|
130 |
+
"\n",
|
131 |
+
" for train_index, test_index in tscv.split(X):\n",
|
132 |
+
" \n",
|
133 |
+
" X_train = X.iloc[train_index]\n",
|
134 |
+
" X_test = X.iloc[test_index]\n",
|
135 |
+
" y_train = y.iloc[train_index]\n",
|
136 |
+
" y_test = y.iloc[test_index]\n",
|
137 |
+
" \n",
|
138 |
+
" # Select features\n",
|
139 |
+
" categorical_features = X_train.select_dtypes(include='object').columns\n",
|
140 |
+
" numeric_features = X_train.drop(columns=[c for c in X_train.columns if 'Percentile' in c]).select_dtypes(include='number').columns\n",
|
141 |
+
"\n",
|
142 |
+
" # Transformers\n",
|
143 |
+
" numeric_transformer = RobustScaler() # Example: StandardScaler for numeric features\n",
|
144 |
+
" categorical_transformer = OneHotEncoder() # Example: OneHotEncoder for categorical features\n",
|
145 |
+
"\n",
|
146 |
+
" # Define the pipeline steps\n",
|
147 |
+
" preprocessor = ColumnTransformer(\n",
|
148 |
+
" transformers=[\n",
|
149 |
+
" ('numeric', numeric_transformer, numeric_features), # numeric_features is a list of numeric feature column names\n",
|
150 |
+
" ('categorical', categorical_transformer, categorical_features) # categorical_features is a list of categorical feature column names\n",
|
151 |
+
" ])\n",
|
152 |
+
"\n",
|
153 |
+
" # Create the pipeline\n",
|
154 |
+
" pipeline = Pipeline(steps=[\n",
|
155 |
+
" ('preprocessor', preprocessor),\n",
|
156 |
+
" ('model', LinearRegression())\n",
|
157 |
+
" ])\n",
|
158 |
+
" \n",
|
159 |
+
" # Fit the model\n",
|
160 |
+
" pipeline.fit(X_train, y_train)\n",
|
161 |
+
"\n",
|
162 |
+
" # Predict\n",
|
163 |
+
" y_pred = pipeline.predict(X_test)\n",
|
164 |
+
"\n",
|
165 |
+
" # Calculate metrics\n",
|
166 |
+
" # mae_scores.append(mean_absolute_error(y_test, y_pred))\n",
|
167 |
+
" result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': y_pred}, index=y_test.index)\n",
|
168 |
+
" overall_results.append(result_df)\n",
|
169 |
+
"\n",
|
170 |
+
" df_results = pd.concat(overall_results)\n",
|
171 |
+
"\n",
|
172 |
+
" uppers = []\n",
|
173 |
+
" lowers = []\n",
|
174 |
+
" alpha = 0.05\n",
|
175 |
+
" for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):\n",
|
176 |
+
" try:\n",
|
177 |
+
" \n",
|
178 |
+
" df_q = df_results.iloc[:i]\n",
|
179 |
+
" pred = df_results['Predicted'].iloc[-1]\n",
|
180 |
+
" errors = df_q['IsTrue'] - df_q['Predicted']\n",
|
181 |
+
" positive_errors = errors[errors >= 0]\n",
|
182 |
+
" negative_errors = errors[errors < 0]\n",
|
183 |
+
"\n",
|
184 |
+
" # Calculate bounds\n",
|
185 |
+
" upper_bound = pred + np.quantile(positive_errors, 1 - alpha)\n",
|
186 |
+
" lower_bound = pred + np.quantile(negative_errors, alpha)\n",
|
187 |
+
" \n",
|
188 |
+
" except:\n",
|
189 |
+
" upper_bound = None\n",
|
190 |
+
" lower_bound = None\n",
|
191 |
+
"\n",
|
192 |
+
" uppers.append(upper_bound)\n",
|
193 |
+
" lowers.append(lower_bound)\n",
|
194 |
+
"\n",
|
195 |
+
" df_results['Upper'] = uppers\n",
|
196 |
+
" df_results['Lower'] = lowers\n",
|
197 |
+
"\n",
|
198 |
+
" df_results = df_results.merge(data[['PrevClose']],left_index=True, right_index=True)\n",
|
199 |
+
" df_results['Pred'] = df_results['PrevClose'] * (1 + df_results['Predicted'])\n",
|
200 |
+
" df_results['Actual'] = df_results['PrevClose'] * (1 + df_results['IsTrue'])\n",
|
201 |
+
" df_results['Up'] = df_results['PrevClose'] * (1 + df_results['Upper'])\n",
|
202 |
+
" df_results['Down'] = df_results['PrevClose'] * (1 + df_results['Lower'])\n",
|
203 |
+
"\n",
|
204 |
+
" results[f'{str(int(candle))}'] = df_results\n",
|
205 |
+
"\n",
|
206 |
+
" # Average metrics across folds\n",
|
207 |
+
" average_mae = mean_absolute_error(df_results['IsTrue'], df_results['Predicted'])\n",
|
208 |
+
" # sorted_features = sorted([(feat, coef) for feat, coef in zip(model.feature_name_, model.feature_importances_)], key=lambda x: abs(x[1]), reverse=True)\n",
|
209 |
+
" sorted_features = sorted([(feat, coef) for feat, coef in zip(pipeline.feature_names_in_, pipeline.named_steps.model.coef_)], key=lambda x: abs(x[1]), reverse=True)\n",
|
210 |
+
"\n",
|
211 |
+
" coefs[f'{str(int(candle))}'] = pd.DataFrame(sorted_features, columns=['Feature','Coefficient'])\n",
|
212 |
+
"\n",
|
213 |
+
" df_consolidated.loc[int(candle), 'MAE'] = average_mae"
|
214 |
+
]
|
215 |
+
},
|
216 |
+
{
|
217 |
+
"cell_type": "code",
|
218 |
+
"execution_count": null,
|
219 |
+
"metadata": {},
|
220 |
+
"outputs": [],
|
221 |
+
"source": [
|
222 |
+
"pipeline.named_steps['model'].coef_"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"cell_type": "code",
|
227 |
+
"execution_count": null,
|
228 |
+
"metadata": {},
|
229 |
+
"outputs": [],
|
230 |
+
"source": [
|
231 |
+
"df_f = pd.concat(coefs)"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"cell_type": "code",
|
236 |
+
"execution_count": null,
|
237 |
+
"metadata": {},
|
238 |
+
"outputs": [],
|
239 |
+
"source": [
|
240 |
+
"df_consolidated"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"cell_type": "code",
|
245 |
+
"execution_count": null,
|
246 |
+
"metadata": {},
|
247 |
+
"outputs": [],
|
248 |
+
"source": [
|
249 |
+
"results[f'{str(candle)}'].loc['2023-10-01':, ['Pred','Actual','Up','Down']].plot();"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"cell_type": "code",
|
254 |
+
"execution_count": null,
|
255 |
+
"metadata": {},
|
256 |
+
"outputs": [],
|
257 |
+
"source": [
|
258 |
+
"coefs[f'{str(candle)}']"
|
259 |
+
]
|
260 |
+
},
|
261 |
+
{
|
262 |
+
"cell_type": "code",
|
263 |
+
"execution_count": 3,
|
264 |
+
"metadata": {},
|
265 |
+
"outputs": [
|
266 |
+
{
|
267 |
+
"name": "stderr",
|
268 |
+
"output_type": "stream",
|
269 |
+
"text": [
|
270 |
+
"d:\\Projects\\gamedayspx_lambda\\getDailyData.py:243: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
|
271 |
+
" return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()\n",
|
272 |
+
"Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 1598.36it/s]\n",
|
273 |
+
"d:\\Projects\\gamedayspx_lambda\\model_intra_v2.py:11: SettingWithCopyWarning: \n",
|
274 |
+
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
275 |
+
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
276 |
+
"\n",
|
277 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
278 |
+
" df[target_column] = df[target_column].astype(bool)\n",
|
279 |
+
"d:\\Projects\\gamedayspx_lambda\\.venv\\lib\\site-packages\\sklearn\\base.py:465: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
|
280 |
+
" warnings.warn(\n",
|
281 |
+
"C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_10000\\2718014135.py:38: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
|
282 |
+
" return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()\n"
|
283 |
+
]
|
284 |
+
}
|
285 |
+
],
|
286 |
+
"source": [
|
287 |
+
"from getDailyData import get_daily\n",
|
288 |
+
"from model_intra_v2 import walk_forward_validation\n",
|
289 |
+
"from model_day_v2 import walk_forward_validation_seq as walk_forward_validation_daily\n",
|
290 |
+
"from model_regr_v2 import walk_forward_validation as walk_forward_validation_regr\n",
|
291 |
+
"from model_regr_v2 import calc_upper_lower\n",
|
292 |
+
"import pandas as pd\n",
|
293 |
+
"import json\n",
|
294 |
+
"from dbConn import connection, engine, insert_dataframe_to_sql\n",
|
295 |
+
"import numpy as np\n",
|
296 |
+
"from datetime import time, timedelta\n",
|
297 |
+
"import datetime\n",
|
298 |
+
"from pandas.tseries.offsets import BDay\n",
|
299 |
+
"import holidays\n",
|
300 |
+
"from dotenv import load_dotenv\n",
|
301 |
+
"load_dotenv()\n",
|
302 |
+
"\n",
|
303 |
+
"periods_30m = 1\n",
|
304 |
+
"\n",
|
305 |
+
"if periods_30m > 0:\n",
|
306 |
+
" data, df_final, final_row = get_daily(mode='intra', periods_30m=periods_30m)\n",
|
307 |
+
" # Regression model\n",
|
308 |
+
" res, _ = walk_forward_validation(df_final.drop(columns=['Target']).dropna(), 'Target_clf', 1, mode='single')\n",
|
309 |
+
" regr_res, _ = walk_forward_validation_regr(df_final[['CurrentClose30toClose','ClosePct']].dropna(), 'ClosePct', 1, mode='single')\n",
|
310 |
+
" df_regr_results = pd.read_sql_query(f'select * from reg_results where ModelNum = {str(periods_30m)}', con = engine)\n",
|
311 |
+
" regr_pct = regr_res['Predicted'].iloc[-1]\n",
|
312 |
+
" upper, lower = calc_upper_lower(regr_pct, df_regr_results, alpha=0.05)\n",
|
313 |
+
"\n",
|
314 |
+
"elif periods_30m == 0:\n",
|
315 |
+
" data, df_final, final_row = get_daily()\n",
|
316 |
+
" res, _, _ = walk_forward_validation_daily(df_final.dropna(), 'Target_clf', 'Target', 200, 1)\n",
|
317 |
+
"\n",
|
318 |
+
"# Get results, run calibration and pvalue \n",
|
319 |
+
"\n",
|
320 |
+
"df_results = pd.read_sql_query(f'select * from results where ModelNum = {str(periods_30m)}', con = engine)\n",
|
321 |
+
"\n",
|
322 |
+
"# Calibrate Probabilities\n",
|
323 |
+
"def get_quantiles(df, col_name, q):\n",
|
324 |
+
" return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()\n",
|
325 |
+
"\n",
|
326 |
+
"pct = res['Predicted'].iloc[-1]\n",
|
327 |
+
"\n",
|
328 |
+
"df_q = get_quantiles(df_results, 'Predicted', 10)\n",
|
329 |
+
"for q in df_q.index:\n",
|
330 |
+
" if q.left <= pct <= q.right:\n",
|
331 |
+
" p = df_q[q]\n",
|
332 |
+
"\n",
|
333 |
+
"calib_scores = np.abs(df_results['Predicted'].iloc[:-1] - 0.5)\n",
|
334 |
+
"score = abs(pct - 0.5)\n",
|
335 |
+
"pv = np.mean(calib_scores >= score)\n",
|
336 |
+
"asof = datetime.datetime.combine(data.index[-1], time(9,30)) + (periods_30m * timedelta(minutes=30)) \n",
|
337 |
+
"\n",
|
338 |
+
"blob = {\n",
|
339 |
+
" 'Datetime': str(res.index[-1]),\n",
|
340 |
+
" 'IsTrue':df_final['Target_clf'].iloc[-1],\n",
|
341 |
+
" 'Predicted': pct,\n",
|
342 |
+
" 'CalibPredicted': p,\n",
|
343 |
+
" 'Pvalue':pv,\n",
|
344 |
+
" 'ModelNum':periods_30m,\n",
|
345 |
+
" 'AsOf':str(asof)\n",
|
346 |
+
"}\n",
|
347 |
+
"\n",
|
348 |
+
"# Write to DB\n",
|
349 |
+
"df_write = pd.DataFrame.from_dict({k:[v] for k, v in blob.items()})\n"
|
350 |
+
]
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"cell_type": "code",
|
354 |
+
"execution_count": 4,
|
355 |
+
"metadata": {},
|
356 |
+
"outputs": [
|
357 |
+
{
|
358 |
+
"data": {
|
359 |
+
"text/html": [
|
360 |
+
"<div>\n",
|
361 |
+
"<style scoped>\n",
|
362 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
363 |
+
" vertical-align: middle;\n",
|
364 |
+
" }\n",
|
365 |
+
"\n",
|
366 |
+
" .dataframe tbody tr th {\n",
|
367 |
+
" vertical-align: top;\n",
|
368 |
+
" }\n",
|
369 |
+
"\n",
|
370 |
+
" .dataframe thead th {\n",
|
371 |
+
" text-align: right;\n",
|
372 |
+
" }\n",
|
373 |
+
"</style>\n",
|
374 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
375 |
+
" <thead>\n",
|
376 |
+
" <tr style=\"text-align: right;\">\n",
|
377 |
+
" <th></th>\n",
|
378 |
+
" <th>Datetime</th>\n",
|
379 |
+
" <th>IsTrue</th>\n",
|
380 |
+
" <th>Predicted</th>\n",
|
381 |
+
" <th>CalibPredicted</th>\n",
|
382 |
+
" <th>Pvalue</th>\n",
|
383 |
+
" <th>ModelNum</th>\n",
|
384 |
+
" <th>AsOf</th>\n",
|
385 |
+
" </tr>\n",
|
386 |
+
" </thead>\n",
|
387 |
+
" <tbody>\n",
|
388 |
+
" <tr>\n",
|
389 |
+
" <th>0</th>\n",
|
390 |
+
" <td>2023-11-22 00:00:00</td>\n",
|
391 |
+
" <td>True</td>\n",
|
392 |
+
" <td>0.712132</td>\n",
|
393 |
+
" <td>0.832636</td>\n",
|
394 |
+
" <td>0.404288</td>\n",
|
395 |
+
" <td>1</td>\n",
|
396 |
+
" <td>2023-11-24 10:00:00</td>\n",
|
397 |
+
" </tr>\n",
|
398 |
+
" </tbody>\n",
|
399 |
+
"</table>\n",
|
400 |
+
"</div>"
|
401 |
+
],
|
402 |
+
"text/plain": [
|
403 |
+
" Datetime IsTrue Predicted CalibPredicted Pvalue ModelNum \\\n",
|
404 |
+
"0 2023-11-22 00:00:00 True 0.712132 0.832636 0.404288 1 \n",
|
405 |
+
"\n",
|
406 |
+
" AsOf \n",
|
407 |
+
"0 2023-11-24 10:00:00 "
|
408 |
+
]
|
409 |
+
},
|
410 |
+
"execution_count": 4,
|
411 |
+
"metadata": {},
|
412 |
+
"output_type": "execute_result"
|
413 |
+
}
|
414 |
+
],
|
415 |
+
"source": [
|
416 |
+
"df_write"
|
417 |
+
]
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"cell_type": "code",
|
421 |
+
"execution_count": null,
|
422 |
+
"metadata": {},
|
423 |
+
"outputs": [],
|
424 |
+
"source": [
|
425 |
+
"cursor = connection.cursor()\n",
|
426 |
+
"insert_dataframe_to_sql('results', df_write, cursor)"
|
427 |
+
]
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"cell_type": "code",
|
431 |
+
"execution_count": null,
|
432 |
+
"metadata": {},
|
433 |
+
"outputs": [],
|
434 |
+
"source": [
|
435 |
+
"\n",
|
436 |
+
"if periods_30m > 0:\n",
|
437 |
+
" regr_blob = {\n",
|
438 |
+
" 'Datetime': str(res.index[-1]),\n",
|
439 |
+
" 'IsTrue':df_final['ClosePct'].iloc[-1],\n",
|
440 |
+
" 'Predicted': regr_pct,\n",
|
441 |
+
" 'Upper': upper,\n",
|
442 |
+
" 'Lower':lower,\n",
|
443 |
+
" 'ModelNum':periods_30m,\n",
|
444 |
+
" 'AsOf':str(asof)\n",
|
445 |
+
" }\n",
|
446 |
+
" df_write_reg = pd.DataFrame.from_dict({k:[v] for k, v in regr_blob.items()})\n",
|
447 |
+
" insert_dataframe_to_sql('reg_results', df_write_reg, cursor)\n",
|
448 |
+
"\n",
|
449 |
+
"cursor.close()\n",
|
450 |
+
"connection.close()\n"
|
451 |
+
]
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"cell_type": "code",
|
455 |
+
"execution_count": 2,
|
456 |
+
"metadata": {},
|
457 |
+
"outputs": [
|
458 |
+
{
|
459 |
+
"data": {
|
460 |
+
"text/plain": [
|
461 |
+
"{'Datetime': '2023-11-22 00:00:00',\n",
|
462 |
+
" 'IsTrue': 0.0005968736678840791,\n",
|
463 |
+
" 'Predicted': 0.00048111739459897327,\n",
|
464 |
+
" 'Upper': 0.02107334825815718,\n",
|
465 |
+
" 'Lower': -0.018127700802536933,\n",
|
466 |
+
" 'ModelNum': 1,\n",
|
467 |
+
" 'AsOf': '2023-11-24 10:00:00'}"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
"execution_count": 2,
|
471 |
+
"metadata": {},
|
472 |
+
"output_type": "execute_result"
|
473 |
+
}
|
474 |
+
],
|
475 |
+
"source": [
|
476 |
+
"regr_blob"
|
477 |
+
]
|
478 |
+
},
|
479 |
+
{
|
480 |
+
"cell_type": "code",
|
481 |
+
"execution_count": null,
|
482 |
+
"metadata": {},
|
483 |
+
"outputs": [],
|
484 |
+
"source": [
|
485 |
+
"regr_blob"
|
486 |
+
]
|
487 |
+
}
|
488 |
+
],
|
489 |
+
"metadata": {
|
490 |
+
"kernelspec": {
|
491 |
+
"display_name": ".venv",
|
492 |
+
"language": "python",
|
493 |
+
"name": "python3"
|
494 |
+
},
|
495 |
+
"language_info": {
|
496 |
+
"codemirror_mode": {
|
497 |
+
"name": "ipython",
|
498 |
+
"version": 3
|
499 |
+
},
|
500 |
+
"file_extension": ".py",
|
501 |
+
"mimetype": "text/x-python",
|
502 |
+
"name": "python",
|
503 |
+
"nbconvert_exporter": "python",
|
504 |
+
"pygments_lexer": "ipython3",
|
505 |
+
"version": "3.10.11"
|
506 |
+
}
|
507 |
+
},
|
508 |
+
"nbformat": 4,
|
509 |
+
"nbformat_minor": 2
|
510 |
+
}
|
uni_model.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import warnings
|
3 |
+
with warnings.catch_warnings():
|
4 |
+
warnings.simplefilter("ignore")
|
5 |
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
6 |
+
|
7 |
+
import pandas as pd
|
8 |
+
from sklearn.model_selection import TimeSeriesSplit
|
9 |
+
from sklearn.metrics import mean_absolute_error
|
10 |
+
from sklearn.linear_model import LinearRegression # Example model
|
11 |
+
from sklearn.pipeline import Pipeline
|
12 |
+
from sklearn.compose import ColumnTransformer
|
13 |
+
from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder
|
14 |
+
|
15 |
+
import datetime
|
16 |
+
from datetime import time, timedelta
|
17 |
+
from tqdm import tqdm
|
18 |
+
|
19 |
+
def prep_data(df):
|
20 |
+
morning_start = datetime.datetime.combine(now.date(), time(6, 30))
|
21 |
+
delta = now - morning_start
|
22 |
+
print(delta)
|
23 |
+
# candle = 1 #max(0,min((delta.total_seconds() / 60 / 30) // 1, 12))
|
24 |
+
# candles = np.arange(1,13)
|
25 |
+
candles = np.arange(1,2)
|
26 |
+
for candle in tqdm(candles):
|
27 |
+
print(f'running for {str(candle)}')
|
28 |
+
data, df_final, final_row = get_daily(mode='intra', periods_30m=candle)
|
29 |
+
|
30 |
+
df_new = data[['Open','High','Low','Close','Close30','Close_VIX30','Close_VIX','Close_VVIX30','Close_VVIX']].copy()
|
31 |
+
df_new['PrevClose'] = df_new['Close'].shift(1)
|
32 |
+
df_new['CurrentGap'] = (df_new['Open'] / df_new['PrevClose']) - 1
|
33 |
+
df_new['ClosePctIntra'] = (df_new['Close30'] / df_new['Close'].shift(1)) - 1
|
34 |
+
df_new['ClosePctOpenIntra'] = (df_new['Close30'] / df_new['Open']) - 1
|
35 |
+
df_new['ClosePctVIXIntra'] = (df_new['Close_VIX30'] / df_new['Close_VIX'].shift(1)) - 1
|
36 |
+
df_new['ClosePctVVIXIntra'] = (df_new['Close_VVIX30'] / df_new['Close_VVIX'].shift(1)) - 1
|
37 |
+
df_new['EMA8'] = df_new['Close'].ewm(8).mean()
|
38 |
+
df_new['EMA8'] = df_new['EMA8'].shift(1)
|
39 |
+
df_new['EMA8Intra'] = df_new['Close30'] > df_new['EMA8']
|
40 |
+
|
41 |
+
# Target will be the day's close
|
42 |
+
df_new['ClosePct'] = (df_new['Close'] / df_new['Close'].shift(1)) - 1
|
43 |
+
|
44 |
+
# Column to determine what percentile the current intra performance looks like
|
45 |
+
intra_rank = []
|
46 |
+
for i, pct in tqdm(enumerate(df_new['ClosePctIntra'])):
|
47 |
+
try:
|
48 |
+
historical = df_new['ClosePctIntra'].iloc[:i]
|
49 |
+
current = df_new['ClosePctIntra'].iloc[i]
|
50 |
+
perc = len(historical[historical > current]) / len(historical)
|
51 |
+
except:
|
52 |
+
perc = None
|
53 |
+
intra_rank.append(perc)
|
54 |
+
|
55 |
+
df_new['IntraPercentile'] = intra_rank
|
56 |
+
|
57 |
+
# Column to determine what percentile the daily performance looks like
|
58 |
+
daily_rank = []
|
59 |
+
for i, pct in tqdm(enumerate(df_new['ClosePct'])):
|
60 |
+
try:
|
61 |
+
historical = df_new['ClosePct'].iloc[:i]
|
62 |
+
current = df_new['ClosePct'].iloc[i]
|
63 |
+
perc = len(historical[historical > current]) / len(historical)
|
64 |
+
except:
|
65 |
+
perc = None
|
66 |
+
daily_rank.append(perc)
|
67 |
+
|
68 |
+
df_new['ClosePctPercentile'] = daily_rank
|
69 |
+
|
70 |
+
# Let's do n-5 to start just for closes
|
71 |
+
lags = np.arange(1,6)
|
72 |
+
|
73 |
+
for lag in lags:
|
74 |
+
df_new[f'ClosePct_n{str(lag)}'] = df_new['ClosePct'].shift(lag)
|
75 |
+
# df_new[f'ClosePctPercentile_n{str(lag)}'] = df_new['ClosePctPercentile'].shift(lag)
|
76 |
+
|
77 |
+
|
78 |
+
df_feats = df_new[[c for c in df_new.columns if 'ClosePct' in c or 'Intra' in c or 'Gap' in c]]
|
79 |
+
|
80 |
+
df_final = df_feats.dropna()
|
81 |
+
|
82 |
+
X = df_final[['ClosePctIntra']] # Feature dataset
|
83 |
+
y = df_final['ClosePct'] # Target dataset
|
84 |
+
|
85 |
+
# model = LGBMRegressor(random_state=42, n_estimators=10, verbose=-1)
|
86 |
+
# model = LinearRegression()
|
87 |
+
# Define the column transformer for handling numeric and categorical features
|
88 |
+
|
89 |
+
|
90 |
+
# Fit the pipeline on the training data
|
91 |
+
# pipeline.fit(X_train, y_train)
|
92 |
+
|
93 |
+
tscv = TimeSeriesSplit(n_splits=len(df_final)-1, max_train_size=None, test_size=1)
|
94 |
+
|
95 |
+
mae_scores = []
|
96 |
+
overall_results = []
|
97 |
+
|
98 |
+
for train_index, test_index in tscv.split(X):
|
99 |
+
|
100 |
+
X_train = X.iloc[train_index]
|
101 |
+
X_test = X.iloc[test_index]
|
102 |
+
y_train = y.iloc[train_index]
|
103 |
+
y_test = y.iloc[test_index]
|
104 |
+
|
105 |
+
# Select features
|
106 |
+
categorical_features = X_train.select_dtypes(include='object').columns
|
107 |
+
numeric_features = X_train.drop(columns=[c for c in X_train.columns if 'Percentile' in c]).select_dtypes(include='number').columns
|
108 |
+
|
109 |
+
# Transformers
|
110 |
+
numeric_transformer = RobustScaler() # Example: StandardScaler for numeric features
|
111 |
+
categorical_transformer = OneHotEncoder() # Example: OneHotEncoder for categorical features
|
112 |
+
|
113 |
+
# Define the pipeline steps
|
114 |
+
preprocessor = ColumnTransformer(
|
115 |
+
transformers=[
|
116 |
+
('numeric', numeric_transformer, numeric_features), # numeric_features is a list of numeric feature column names
|
117 |
+
('categorical', categorical_transformer, categorical_features) # categorical_features is a list of categorical feature column names
|
118 |
+
])
|
119 |
+
|
120 |
+
# Create the pipeline
|
121 |
+
pipeline = Pipeline(steps=[
|
122 |
+
('preprocessor', preprocessor),
|
123 |
+
('model', LinearRegression())
|
124 |
+
])
|
125 |
+
|
126 |
+
# Fit the model
|
127 |
+
pipeline.fit(X_train, y_train)
|
128 |
+
|
129 |
+
# Predict
|
130 |
+
y_pred = pipeline.predict(X_test)
|
131 |
+
|
132 |
+
# Calculate metrics
|
133 |
+
# mae_scores.append(mean_absolute_error(y_test, y_pred))
|
134 |
+
result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': y_pred}, index=y_test.index)
|
135 |
+
overall_results.append(result_df)
|
136 |
+
|
137 |
+
df_results = pd.concat(overall_results)
|
138 |
+
|
139 |
+
uppers = []
|
140 |
+
lowers = []
|
141 |
+
alpha = 0.05
|
142 |
+
for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
|
143 |
+
try:
|
144 |
+
|
145 |
+
df_q = df_results.iloc[:i]
|
146 |
+
pred = df_results['Predicted'].iloc[-1]
|
147 |
+
errors = df_q['IsTrue'] - df_q['Predicted']
|
148 |
+
positive_errors = errors[errors >= 0]
|
149 |
+
negative_errors = errors[errors < 0]
|
150 |
+
|
151 |
+
# Calculate bounds
|
152 |
+
upper_bound = pred + np.quantile(positive_errors, 1 - alpha)
|
153 |
+
lower_bound = pred + np.quantile(negative_errors, alpha)
|
154 |
+
|
155 |
+
except:
|
156 |
+
upper_bound = None
|
157 |
+
lower_bound = None
|
158 |
+
|
159 |
+
uppers.append(upper_bound)
|
160 |
+
lowers.append(lower_bound)
|
161 |
+
|
162 |
+
df_results['Upper'] = uppers
|
163 |
+
df_results['Lower'] = lowers
|
164 |
+
|
165 |
+
df_results = df_results.merge(data[['PrevClose']],left_index=True, right_index=True)
|
166 |
+
df_results['Pred'] = df_results['PrevClose'] * (1 + df_results['Predicted'])
|
167 |
+
df_results['Actual'] = df_results['PrevClose'] * (1 + df_results['IsTrue'])
|
168 |
+
df_results['Up'] = df_results['PrevClose'] * (1 + df_results['Upper'])
|
169 |
+
df_results['Down'] = df_results['PrevClose'] * (1 + df_results['Lower'])
|
170 |
+
|
171 |
+
results[f'{str(int(candle))}'] = df_results
|
172 |
+
|
173 |
+
# Average metrics across folds
|
174 |
+
average_mae = mean_absolute_error(df_results['IsTrue'], df_results['Predicted'])
|
175 |
+
# sorted_features = sorted([(feat, coef) for feat, coef in zip(model.feature_name_, model.feature_importances_)], key=lambda x: abs(x[1]), reverse=True)
|
176 |
+
sorted_features = sorted([(feat, coef) for feat, coef in zip(pipeline.feature_names_in_, pipeline.named_steps.model.coef_)], key=lambda x: abs(x[1]), reverse=True)
|
177 |
+
|
178 |
+
coefs[f'{str(int(candle))}'] = pd.DataFrame(sorted_features, columns=['Feature','Coefficient'])
|
179 |
+
|
180 |
+
df_consolidated.loc[int(candle), 'MAE'] = average_mae
|