Spaces:

wnstnb
/

gamedayspx-monitor

Sleeping

App Files Files Community

wnstnb commited on Nov 27, 2023

Commit

4b357c0

1 Parent(s): ff17253

updating charts

Browse files

Files changed (4) hide show

.gitignore +3 -1
app.py +252 -76
data_check.ipynb +510 -0
uni_model.py +180 -0

.gitignore CHANGED Viewed

@@ -1,4 +1,6 @@
 /.env
 /.venv
 /hss.pem ec2-user@ec2-18-1
-/__pycache__

 /.env
 /.venv
 /hss.pem ec2-user@ec2-18-1
+/__pycache__
+appOld.py
+appOld2.py

app.py CHANGED Viewed

@@ -56,107 +56,283 @@ levels = data_daily.loc[df1.index.date, ['H1','H2','L1','L2','Open']].drop_dupli
 levels['FirstBar'] = dts
 # Plot
 import streamlit as st
 from streamlit_lightweight_charts import renderLightweightCharts
-chartOptions = [{
-    "width":800,
-    "height":400,
-    "rightPriceScale": {
-        "scaleMargins": {
-            "top": 0.2,
-            "bottom": 0.25,
         },
-        "borderVisible": True,
-    },
-    "layout": {
-        "textColor": 'white',
-        "background": {
-            "type": 'solid',
-            "color": 'black'
         },
     },
-    "grid": {
-        "vertLines": {
-            "color": "rgba(197, 203, 206, 0)"
             },
-        "horzLines": {
-            "color": "rgba(197, 203, 206, 0)"
-        }
-    }
-},
-{
-    "width":800,
-    "height":125,
-    "layout": {
-        "textColor": 'white',
-        "background": {
-            "type": 'solid',
-            "color": 'black'
         },
-    },
-    "grid": {
             "vertLines": {
-                "color": "rgba(197, 203, 206, 0)"
-                },
             "horzLines": {
-                "color": "rgba(197, 203, 206, 0)"
             }
         },
-},]
-seriesCandlestickChart = [{
-    "type": 'Candlestick',
-    "data": [
-        {"open": open,
-         "high": high,
-         "low": low,
-         "close": close,
-         "time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
-    ],
-    "options": {
-        "upColor": '#3399ff',
-        "downColor": '#ff5f5f',
-        "borderVisible": False,
-        "wickUpColor": '#3399ff',
-        "wickDownColor": '#ff5f5f',
-        "priceScaleVisible": True
     },
-    "priceScale": {
-        "scaleMargins": {
-            "top": 0.7,
-            "bottom": 0,
         }
     }
-},
-{
-        "type": 'Line',
-        "data": [{"value": value, "time":dt.timestamp()} for value, dt in zip(levels['H1'], levels['FirstBar'])],
         "options": {
-            "color": 'blue',
-            "lineWidth": 1
         }
-    }]
-seriesPredictions = [{
-    "type": 'Histogram',
-    "data": [
-        { "value": pred, "time": dt.timestamp(), "color":color  } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
-    ],
-    "options": { "color": '#26a69a' }
-}]
 renderLightweightCharts([
     {
-        "chart": chartOptions[0],
         "series": seriesCandlestickChart
     },
     {
-        "chart": chartOptions[1],
-        "series": seriesPredictions
-    },
 ], 'multipane')
 # Important levels
 df_levels = pd.DataFrame(levels[['H2','H1','Open','L1','L2']].iloc[-1]).round(2)

 levels['FirstBar'] = dts
 # Plot
 import streamlit as st
 from streamlit_lightweight_charts import renderLightweightCharts
+import json
+import numpy as np
+import yfinance as yf
+import pandas as pd
+COLOR_BULL = '#3399ff' # #26a69a
+COLOR_BEAR = '#ff5f5f'  # #ef5350
+# Some data wrangling to match required format
+df = df1.copy()
+df['time'] = [dt.timestamp() for dt in df.index]
+df = df[['time','Open','High','Low','Close','CalibPredicted','Color']]
+df.columns = ['time','open','high','low','close','volume','color']                  # rename columns
+# df['color'] = np.where(  df['open'] > df['close'], COLOR_BEAR, COLOR_BULL)  # bull or bear
+# export to JSON format
+# candles = json.loads(df.to_json(orient = "records"))
+candles = json.loads(json.dumps([
+        {"open": open,
+         "high": high,
+         "low": low,
+         "close": close,
+         "time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
+    ], indent=2))
+# volume = json.loads(df.rename(columns={"volume": "value",}).to_json(orient = "records"))
+volume = json.loads(json.dumps([
+        { "value": pred, "time": dt.timestamp(), "color":color  } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
+    ], indent=2))
+chartMultipaneOptions = [
+    {
+        # "width": 600,
+        "height": 400,
+        "layout": {
+            "background": {
+                "type": "solid",
+                "color": 'transparent'
+            },
+            "textColor": "white"
         },
+        "grid": {
+            "vertLines": {
+                "color": "rgba(197, 203, 206, 0.25)"
+                },
+            "horzLines": {
+                "color": "rgba(197, 203, 206, 0.25)"
+            }
+        },
+        "crosshair": {
+            "mode": 0
+        },
+        "priceScale": {
+            "borderColor": "rgba(197, 203, 206, 0.8)"
+        },
+        "timeScale": {
+            "borderColor": "rgba(197, 203, 206, 0.8)",
+            "barSpacing": 15
         },
+        "watermark": {
+            "visible": True,
+            "fontSize": 48,
+            "horzAlign": 'center',
+            "vertAlign": 'center',
+            "color": 'rgba(171, 71, 188, 0.3)',
+            "text": 'AAPL - D1',
+        }
     },
+    {
+        # "width": 600,
+        "height": 100,
+        "layout": {
+            "background": {
+                "type": 'solid',
+                "color": 'transparent'
             },
+            "textColor": 'black',
         },
+        "grid": {
             "vertLines": {
+                "color": 'rgba(42, 46, 57, 0)',
+            },
             "horzLines": {
+                "color": 'rgba(42, 46, 57, 0.6)',
             }
         },
+        "timeScale": {
+            "visible": False,
+        },
+        "watermark": {
+            "visible": True,
+            "fontSize": 18,
+            "horzAlign": 'left',
+            "vertAlign": 'top',
+            "color": 'rgba(171, 71, 188, 0.7)',
+            "text": 'Volume',
+        }
     },
+    {
+        "width": 600,
+        "height": 200,
+        "layout": {
+            "background": {
+                "type": "solid",
+                "color": 'white'
+            },
+            "textColor": "black"
+        },
+        "timeScale": {
+            "visible": False,
+        },
+        "watermark": {
+            "visible": True,
+            "fontSize": 18,
+            "horzAlign": 'left',
+            "vertAlign": 'center',
+            "color": 'rgba(171, 71, 188, 0.7)',
+            "text": 'MACD',
         }
     }
+]
+seriesCandlestickChart = [
+    {
+        "type": 'Candlestick',
+        "data": candles,
         "options": {
+            "upColor": COLOR_BULL,
+            "downColor": COLOR_BEAR,
+            "borderVisible": False,
+            "wickUpColor": COLOR_BULL,
+            "wickDownColor": COLOR_BEAR
         }
+    }
+]
+seriesVolumeChart = [
+    {
+        "type": 'Histogram',
+        "data": volume,
+        "options": {
+            "priceFormat": {
+                "type": 'volume',
+            },
+            "priceScaleId": "" # set as an overlay setting,
+        },
+        "priceScale": {
+            "scaleMargins": {
+                "top": 0,
+                "bottom": 0,
+            },
+            "alignLabels": False
+        }
+    }
+]
 renderLightweightCharts([
     {
+        "chart": chartMultipaneOptions[0],
         "series": seriesCandlestickChart
     },
     {
+        "chart": chartMultipaneOptions[1],
+        "series": seriesVolumeChart
+    }
 ], 'multipane')
+# import streamlit as st
+# from streamlit_lightweight_charts import renderLightweightCharts
+# chartOptions = [{
+#     "width":800,
+#     "height":400,
+#     "rightPriceScale": {
+#         "scaleMargins": {
+#             "top": 0.2,
+#             "bottom": 0.25,
+#         },
+#         "borderVisible": False,
+#     },
+#     "overlayPriceScales": {
+#         "scaleMargins": {
+#             "top": 0.7,
+#             "bottom": 0,
+#         }
+#     },
+#     "layout": {
+#         "textColor": 'white',
+#         "background": {
+#             "type": 'solid',
+#             "color": 'black'
+#         },
+#     },
+#     "grid": {
+#         "vertLines": {
+#             "color": "rgba(197, 203, 206, 0)"
+#             },
+#         "horzLines": {
+#             "color": "rgba(197, 203, 206, 0)"
+#         }
+#     }
+# },
+# {
+#     "width":800,
+#     "height":125,
+#     "layout": {
+#         "textColor": 'white',
+#         "background": {
+#             "type": 'solid',
+#             "color": 'black'
+#         },
+#     },
+#     "grid": {
+#             "vertLines": {
+#                 "color": "rgba(197, 203, 206, 0)"
+#                 },
+#             "horzLines": {
+#                 "color": "rgba(197, 203, 206, 0)"
+#             }
+#         },
+# },]
+# seriesCandlestickChart = [{
+#     "type": 'Candlestick',
+#     "data": [
+#         {"open": open,
+#          "high": high,
+#          "low": low,
+#          "close": close,
+#          "time": dt.timestamp()} for open, high, low, close, dt in zip(df1['Open'],df1['High'],df1['Low'],df1['Close'], df1.index)
+#     ],
+#     "options": {
+#         "upColor": '#3399ff',
+#         "downColor": '#ff5f5f',
+#         "borderVisible": False,
+#         "wickUpColor": '#3399ff',
+#         "wickDownColor": '#ff5f5f',
+#         "priceScaleVisible": True
+#     },
+#     "priceScale": {
+#         "scaleMargins": {
+#             "top": 0.7,
+#             "bottom": 0,
+#         }
+#     }
+# },
+# {
+#         "type": 'Line',
+#         "data": [{"value": value, "time":dt.timestamp()} for value, dt in zip(levels['H1'], levels['FirstBar'])],
+#         "options": {
+#             "color": 'blue',
+#             "lineWidth": 1
+#         }
+#     }]
+# seriesPredictions = [{
+#     "type": 'Histogram',
+#     "data": [
+#         { "value": pred, "time": dt.timestamp(), "color":color  } for pred, dt, color in zip(df1['CalibPredicted'], df1.index, df1['Color'])
+#     ],
+#     "options": { "color": '#26a69a' }
+# }]
+# renderLightweightCharts([
+#     {
+#         "chart": chartOptions[0],
+#         "series": seriesCandlestickChart
+#     },
+#     {
+#         "chart": chartOptions[1],
+#         "series": seriesPredictions
+#     },
+# ], 'multipane')
 # Important levels
 df_levels = pd.DataFrame(levels[['H2','H1','Open','L1','L2']].iloc[-1]).round(2)

data_check.ipynb ADDED Viewed

	@@ -0,0 +1,510 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import warnings\n",
+    "with warnings.catch_warnings():\n",
+    "    warnings.simplefilter(\"ignore\")\n",
+    "    warnings.simplefilter(action='ignore', category=FutureWarning)\n",
+    "\n",
+    "import pandas as pd\n",
+    "from getDailyData import get_daily\n",
+    "from sklearn.model_selection import TimeSeriesSplit\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "from sklearn.linear_model import LinearRegression  # Example model\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder\n",
+    "from lightgbm import LGBMRegressor\n",
+    "from tqdm import tqdm\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yfinance as yf\n",
+    "spx = yf.Ticker('^GSPC')\n",
+    "spx.history(start='2023-11-20', interval='1d')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "from datetime import time, timedelta\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "now = datetime.datetime.now()\n",
+    "df_consolidated = pd.DataFrame()\n",
+    "results = {}\n",
+    "coefs = {}\n",
+    "\n",
+    "morning_start = datetime.datetime.combine(now.date(), time(6, 30))\n",
+    "delta = now - morning_start\n",
+    "print(delta)\n",
+    "# candle = 1 #max(0,min((delta.total_seconds() / 60 / 30) // 1, 12))\n",
+    "# candles = np.arange(1,13)\n",
+    "candles = np.arange(1,2)\n",
+    "for candle in tqdm(candles):\n",
+    "    print(f'running for {str(candle)}')\n",
+    "    data, df_final, final_row = get_daily(mode='intra', periods_30m=candle)\n",
+    "\n",
+    "    df_new = data[['Open','High','Low','Close','Close30','Close_VIX30','Close_VIX','Close_VVIX30','Close_VVIX']].copy()\n",
+    "    df_new['PrevClose'] = df_new['Close'].shift(1)\n",
+    "    df_new['CurrentGap'] = (df_new['Open'] / df_new['PrevClose']) - 1\n",
+    "    df_new['ClosePctIntra'] = (df_new['Close30'] / df_new['Close'].shift(1)) - 1\n",
+    "    df_new['ClosePctOpenIntra'] = (df_new['Close30'] / df_new['Open']) - 1\n",
+    "    df_new['ClosePctVIXIntra'] = (df_new['Close_VIX30'] / df_new['Close_VIX'].shift(1)) - 1\n",
+    "    df_new['ClosePctVVIXIntra'] = (df_new['Close_VVIX30'] / df_new['Close_VVIX'].shift(1)) - 1\n",
+    "    df_new['EMA8'] = df_new['Close'].ewm(8).mean()\n",
+    "    df_new['EMA8'] = df_new['EMA8'].shift(1)\n",
+    "    df_new['EMA8Intra'] = df_new['Close30'] > df_new['EMA8']\n",
+    "\n",
+    "    # Target will be the day's close\n",
+    "    df_new['ClosePct'] = (df_new['Close'] / df_new['Close'].shift(1)) - 1\n",
+    "\n",
+    "    # Column to determine what percentile the current intra performance looks like\n",
+    "    intra_rank = []\n",
+    "    for i, pct in tqdm(enumerate(df_new['ClosePctIntra'])):\n",
+    "        try:\n",
+    "            historical = df_new['ClosePctIntra'].iloc[:i]\n",
+    "            current = df_new['ClosePctIntra'].iloc[i]\n",
+    "            perc = len(historical[historical > current]) / len(historical)\n",
+    "        except:\n",
+    "            perc = None\n",
+    "        intra_rank.append(perc)\n",
+    "\n",
+    "    df_new['IntraPercentile'] = intra_rank\n",
+    "\n",
+    "    # Column to determine what percentile the daily performance looks like\n",
+    "    daily_rank = []\n",
+    "    for i, pct in tqdm(enumerate(df_new['ClosePct'])):\n",
+    "        try:\n",
+    "            historical = df_new['ClosePct'].iloc[:i]\n",
+    "            current = df_new['ClosePct'].iloc[i]\n",
+    "            perc = len(historical[historical > current]) / len(historical)\n",
+    "        except:\n",
+    "            perc = None\n",
+    "        daily_rank.append(perc)\n",
+    "\n",
+    "    df_new['ClosePctPercentile'] = daily_rank\n",
+    "\n",
+    "    # Let's do n-5 to start just for closes\n",
+    "    lags = np.arange(1,6)\n",
+    "\n",
+    "    for lag in lags:\n",
+    "        df_new[f'ClosePct_n{str(lag)}'] = df_new['ClosePct'].shift(lag)\n",
+    "        # df_new[f'ClosePctPercentile_n{str(lag)}'] = df_new['ClosePctPercentile'].shift(lag)\n",
+    "\n",
+    "\n",
+    "    df_feats = df_new[[c for c in df_new.columns if 'ClosePct' in c or 'Intra' in c or 'Gap' in c]]\n",
+    "\n",
+    "    df_final = df_feats.dropna()\n",
+    "\n",
+    "    X = df_final[['ClosePctIntra']]  # Feature dataset\n",
+    "    y = df_final['ClosePct']    # Target dataset\n",
+    "\n",
+    "    # model = LGBMRegressor(random_state=42, n_estimators=10, verbose=-1)\n",
+    "    # model = LinearRegression()\n",
+    "    # Define the column transformer for handling numeric and categorical features\n",
+    "    \n",
+    "\n",
+    "    # Fit the pipeline on the training data\n",
+    "    # pipeline.fit(X_train, y_train)\n",
+    "\n",
+    "    tscv = TimeSeriesSplit(n_splits=len(df_final)-1, max_train_size=None, test_size=1)\n",
+    "\n",
+    "    mae_scores = []\n",
+    "    overall_results = []\n",
+    "\n",
+    "    for train_index, test_index in tscv.split(X):\n",
+    "        \n",
+    "        X_train = X.iloc[train_index]\n",
+    "        X_test = X.iloc[test_index]\n",
+    "        y_train = y.iloc[train_index]\n",
+    "        y_test = y.iloc[test_index]\n",
+    "        \n",
+    "        # Select features\n",
+    "        categorical_features = X_train.select_dtypes(include='object').columns\n",
+    "        numeric_features = X_train.drop(columns=[c for c in X_train.columns if 'Percentile' in c]).select_dtypes(include='number').columns\n",
+    "\n",
+    "        # Transformers\n",
+    "        numeric_transformer = RobustScaler()  # Example: StandardScaler for numeric features\n",
+    "        categorical_transformer = OneHotEncoder()  # Example: OneHotEncoder for categorical features\n",
+    "\n",
+    "        # Define the pipeline steps\n",
+    "        preprocessor = ColumnTransformer(\n",
+    "            transformers=[\n",
+    "                ('numeric', numeric_transformer, numeric_features),  # numeric_features is a list of numeric feature column names\n",
+    "                ('categorical', categorical_transformer, categorical_features)  # categorical_features is a list of categorical feature column names\n",
+    "            ])\n",
+    "\n",
+    "        # Create the pipeline\n",
+    "        pipeline = Pipeline(steps=[\n",
+    "            ('preprocessor', preprocessor),\n",
+    "            ('model', LinearRegression())\n",
+    "        ])\n",
+    "        \n",
+    "        # Fit the model\n",
+    "        pipeline.fit(X_train, y_train)\n",
+    "\n",
+    "        # Predict\n",
+    "        y_pred = pipeline.predict(X_test)\n",
+    "\n",
+    "        # Calculate metrics\n",
+    "        # mae_scores.append(mean_absolute_error(y_test, y_pred))\n",
+    "        result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': y_pred}, index=y_test.index)\n",
+    "        overall_results.append(result_df)\n",
+    "\n",
+    "    df_results = pd.concat(overall_results)\n",
+    "\n",
+    "    uppers = []\n",
+    "    lowers = []\n",
+    "    alpha = 0.05\n",
+    "    for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):\n",
+    "        try:\n",
+    "            \n",
+    "            df_q = df_results.iloc[:i]\n",
+    "            pred = df_results['Predicted'].iloc[-1]\n",
+    "            errors = df_q['IsTrue'] - df_q['Predicted']\n",
+    "            positive_errors = errors[errors >= 0]\n",
+    "            negative_errors = errors[errors < 0]\n",
+    "\n",
+    "            # Calculate bounds\n",
+    "            upper_bound = pred + np.quantile(positive_errors, 1 - alpha)\n",
+    "            lower_bound = pred + np.quantile(negative_errors, alpha)\n",
+    "            \n",
+    "        except:\n",
+    "            upper_bound = None\n",
+    "            lower_bound = None\n",
+    "\n",
+    "        uppers.append(upper_bound)\n",
+    "        lowers.append(lower_bound)\n",
+    "\n",
+    "    df_results['Upper'] = uppers\n",
+    "    df_results['Lower'] = lowers\n",
+    "\n",
+    "    df_results = df_results.merge(data[['PrevClose']],left_index=True, right_index=True)\n",
+    "    df_results['Pred'] = df_results['PrevClose'] * (1 + df_results['Predicted'])\n",
+    "    df_results['Actual'] = df_results['PrevClose'] * (1 + df_results['IsTrue'])\n",
+    "    df_results['Up'] = df_results['PrevClose'] * (1 + df_results['Upper'])\n",
+    "    df_results['Down'] = df_results['PrevClose'] * (1 + df_results['Lower'])\n",
+    "\n",
+    "    results[f'{str(int(candle))}'] = df_results\n",
+    "\n",
+    "    # Average metrics across folds\n",
+    "    average_mae = mean_absolute_error(df_results['IsTrue'], df_results['Predicted'])\n",
+    "    # sorted_features = sorted([(feat, coef) for feat, coef in zip(model.feature_name_, model.feature_importances_)], key=lambda x: abs(x[1]), reverse=True)\n",
+    "    sorted_features = sorted([(feat, coef) for feat, coef in zip(pipeline.feature_names_in_, pipeline.named_steps.model.coef_)], key=lambda x: abs(x[1]), reverse=True)\n",
+    "\n",
+    "    coefs[f'{str(int(candle))}'] = pd.DataFrame(sorted_features, columns=['Feature','Coefficient'])\n",
+    "\n",
+    "    df_consolidated.loc[int(candle), 'MAE'] = average_mae"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline.named_steps['model'].coef_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_f = pd.concat(coefs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_consolidated"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results[f'{str(candle)}'].loc['2023-10-01':, ['Pred','Actual','Up','Down']].plot();"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "coefs[f'{str(candle)}']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "d:\\Projects\\gamedayspx_lambda\\getDailyData.py:243: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()\n",
+      "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 1598.36it/s]\n",
+      "d:\\Projects\\gamedayspx_lambda\\model_intra_v2.py:11: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  df[target_column] = df[target_column].astype(bool)\n",
+      "d:\\Projects\\gamedayspx_lambda\\.venv\\lib\\site-packages\\sklearn\\base.py:465: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\WINSTON-ITX\\AppData\\Local\\Temp\\ipykernel_10000\\2718014135.py:38: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()\n"
+     ]
+    }
+   ],
+   "source": [
+    "from getDailyData import get_daily\n",
+    "from model_intra_v2 import walk_forward_validation\n",
+    "from model_day_v2 import walk_forward_validation_seq as walk_forward_validation_daily\n",
+    "from model_regr_v2 import walk_forward_validation as walk_forward_validation_regr\n",
+    "from model_regr_v2 import calc_upper_lower\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "from dbConn import connection, engine, insert_dataframe_to_sql\n",
+    "import numpy as np\n",
+    "from datetime import time, timedelta\n",
+    "import datetime\n",
+    "from pandas.tseries.offsets import BDay\n",
+    "import holidays\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "\n",
+    "periods_30m = 1\n",
+    "\n",
+    "if periods_30m > 0:\n",
+    "    data, df_final, final_row = get_daily(mode='intra', periods_30m=periods_30m)\n",
+    "    # Regression model\n",
+    "    res, _ = walk_forward_validation(df_final.drop(columns=['Target']).dropna(), 'Target_clf', 1, mode='single')\n",
+    "    regr_res, _ = walk_forward_validation_regr(df_final[['CurrentClose30toClose','ClosePct']].dropna(), 'ClosePct', 1, mode='single')\n",
+    "    df_regr_results = pd.read_sql_query(f'select * from reg_results where ModelNum = {str(periods_30m)}', con = engine)\n",
+    "    regr_pct = regr_res['Predicted'].iloc[-1]\n",
+    "    upper, lower = calc_upper_lower(regr_pct, df_regr_results, alpha=0.05)\n",
+    "\n",
+    "elif periods_30m == 0:\n",
+    "    data, df_final, final_row = get_daily()\n",
+    "    res, _, _ = walk_forward_validation_daily(df_final.dropna(), 'Target_clf', 'Target', 200, 1)\n",
+    "\n",
+    "# Get results, run calibration and pvalue    \n",
+    "\n",
+    "df_results = pd.read_sql_query(f'select * from results where ModelNum = {str(periods_30m)}', con = engine)\n",
+    "\n",
+    "# Calibrate Probabilities\n",
+    "def get_quantiles(df, col_name, q):\n",
+    "    return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()\n",
+    "\n",
+    "pct = res['Predicted'].iloc[-1]\n",
+    "\n",
+    "df_q = get_quantiles(df_results, 'Predicted', 10)\n",
+    "for q in df_q.index:\n",
+    "    if q.left <= pct <= q.right:\n",
+    "        p = df_q[q]\n",
+    "\n",
+    "calib_scores = np.abs(df_results['Predicted'].iloc[:-1] - 0.5)\n",
+    "score = abs(pct - 0.5)\n",
+    "pv = np.mean(calib_scores >= score)\n",
+    "asof = datetime.datetime.combine(data.index[-1], time(9,30)) + (periods_30m * timedelta(minutes=30)) \n",
+    "\n",
+    "blob = {\n",
+    "    'Datetime': str(res.index[-1]),\n",
+    "    'IsTrue':df_final['Target_clf'].iloc[-1],\n",
+    "    'Predicted': pct,\n",
+    "    'CalibPredicted': p,\n",
+    "    'Pvalue':pv,\n",
+    "    'ModelNum':periods_30m,\n",
+    "    'AsOf':str(asof)\n",
+    "}\n",
+    "\n",
+    "# Write to DB\n",
+    "df_write = pd.DataFrame.from_dict({k:[v] for k, v in blob.items()})\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Datetime</th>\n",
+       "      <th>IsTrue</th>\n",
+       "      <th>Predicted</th>\n",
+       "      <th>CalibPredicted</th>\n",
+       "      <th>Pvalue</th>\n",
+       "      <th>ModelNum</th>\n",
+       "      <th>AsOf</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2023-11-22 00:00:00</td>\n",
+       "      <td>True</td>\n",
+       "      <td>0.712132</td>\n",
+       "      <td>0.832636</td>\n",
+       "      <td>0.404288</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-11-24 10:00:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              Datetime  IsTrue  Predicted  CalibPredicted    Pvalue  ModelNum  \\\n",
+       "0  2023-11-22 00:00:00    True   0.712132        0.832636  0.404288         1   \n",
+       "\n",
+       "                  AsOf  \n",
+       "0  2023-11-24 10:00:00  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_write"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cursor = connection.cursor()\n",
+    "insert_dataframe_to_sql('results', df_write, cursor)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "if periods_30m > 0:\n",
+    "    regr_blob = {\n",
+    "        'Datetime': str(res.index[-1]),\n",
+    "        'IsTrue':df_final['ClosePct'].iloc[-1],\n",
+    "        'Predicted': regr_pct,\n",
+    "        'Upper': upper,\n",
+    "        'Lower':lower,\n",
+    "        'ModelNum':periods_30m,\n",
+    "        'AsOf':str(asof)\n",
+    "    }\n",
+    "    df_write_reg = pd.DataFrame.from_dict({k:[v] for k, v in regr_blob.items()})\n",
+    "    insert_dataframe_to_sql('reg_results', df_write_reg, cursor)\n",
+    "\n",
+    "cursor.close()\n",
+    "connection.close()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Datetime': '2023-11-22 00:00:00',\n",
+       " 'IsTrue': 0.0005968736678840791,\n",
+       " 'Predicted': 0.00048111739459897327,\n",
+       " 'Upper': 0.02107334825815718,\n",
+       " 'Lower': -0.018127700802536933,\n",
+       " 'ModelNum': 1,\n",
+       " 'AsOf': '2023-11-24 10:00:00'}"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regr_blob"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regr_blob"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

uni_model.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import numpy as np
+import warnings
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    warnings.simplefilter(action='ignore', category=FutureWarning)
+import pandas as pd
+from sklearn.model_selection import TimeSeriesSplit
+from sklearn.metrics import mean_absolute_error
+from sklearn.linear_model import LinearRegression  # Example model
+from sklearn.pipeline import Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder
+import datetime
+from datetime import time, timedelta
+from tqdm import tqdm
+def prep_data(df):
+morning_start = datetime.datetime.combine(now.date(), time(6, 30))
+delta = now - morning_start
+print(delta)
+# candle = 1 #max(0,min((delta.total_seconds() / 60 / 30) // 1, 12))
+# candles = np.arange(1,13)
+candles = np.arange(1,2)
+for candle in tqdm(candles):
+    print(f'running for {str(candle)}')
+    data, df_final, final_row = get_daily(mode='intra', periods_30m=candle)
+    df_new = data[['Open','High','Low','Close','Close30','Close_VIX30','Close_VIX','Close_VVIX30','Close_VVIX']].copy()
+    df_new['PrevClose'] = df_new['Close'].shift(1)
+    df_new['CurrentGap'] = (df_new['Open'] / df_new['PrevClose']) - 1
+    df_new['ClosePctIntra'] = (df_new['Close30'] / df_new['Close'].shift(1)) - 1
+    df_new['ClosePctOpenIntra'] = (df_new['Close30'] / df_new['Open']) - 1
+    df_new['ClosePctVIXIntra'] = (df_new['Close_VIX30'] / df_new['Close_VIX'].shift(1)) - 1
+    df_new['ClosePctVVIXIntra'] = (df_new['Close_VVIX30'] / df_new['Close_VVIX'].shift(1)) - 1
+    df_new['EMA8'] = df_new['Close'].ewm(8).mean()
+    df_new['EMA8'] = df_new['EMA8'].shift(1)
+    df_new['EMA8Intra'] = df_new['Close30'] > df_new['EMA8']
+    # Target will be the day's close
+    df_new['ClosePct'] = (df_new['Close'] / df_new['Close'].shift(1)) - 1
+    # Column to determine what percentile the current intra performance looks like
+    intra_rank = []
+    for i, pct in tqdm(enumerate(df_new['ClosePctIntra'])):
+        try:
+            historical = df_new['ClosePctIntra'].iloc[:i]
+            current = df_new['ClosePctIntra'].iloc[i]
+            perc = len(historical[historical > current]) / len(historical)
+        except:
+            perc = None
+        intra_rank.append(perc)
+    df_new['IntraPercentile'] = intra_rank
+    # Column to determine what percentile the daily performance looks like
+    daily_rank = []
+    for i, pct in tqdm(enumerate(df_new['ClosePct'])):
+        try:
+            historical = df_new['ClosePct'].iloc[:i]
+            current = df_new['ClosePct'].iloc[i]
+            perc = len(historical[historical > current]) / len(historical)
+        except:
+            perc = None
+        daily_rank.append(perc)
+    df_new['ClosePctPercentile'] = daily_rank
+    # Let's do n-5 to start just for closes
+    lags = np.arange(1,6)
+    for lag in lags:
+        df_new[f'ClosePct_n{str(lag)}'] = df_new['ClosePct'].shift(lag)
+        # df_new[f'ClosePctPercentile_n{str(lag)}'] = df_new['ClosePctPercentile'].shift(lag)
+    df_feats = df_new[[c for c in df_new.columns if 'ClosePct' in c or 'Intra' in c or 'Gap' in c]]
+    df_final = df_feats.dropna()
+    X = df_final[['ClosePctIntra']]  # Feature dataset
+    y = df_final['ClosePct']    # Target dataset
+    # model = LGBMRegressor(random_state=42, n_estimators=10, verbose=-1)
+    # model = LinearRegression()
+    # Define the column transformer for handling numeric and categorical features
+    # Fit the pipeline on the training data
+    # pipeline.fit(X_train, y_train)
+    tscv = TimeSeriesSplit(n_splits=len(df_final)-1, max_train_size=None, test_size=1)
+    mae_scores = []
+    overall_results = []
+    for train_index, test_index in tscv.split(X):
+        X_train = X.iloc[train_index]
+        X_test = X.iloc[test_index]
+        y_train = y.iloc[train_index]
+        y_test = y.iloc[test_index]
+        # Select features
+        categorical_features = X_train.select_dtypes(include='object').columns
+        numeric_features = X_train.drop(columns=[c for c in X_train.columns if 'Percentile' in c]).select_dtypes(include='number').columns
+        # Transformers
+        numeric_transformer = RobustScaler()  # Example: StandardScaler for numeric features
+        categorical_transformer = OneHotEncoder()  # Example: OneHotEncoder for categorical features
+        # Define the pipeline steps
+        preprocessor = ColumnTransformer(
+            transformers=[
+                ('numeric', numeric_transformer, numeric_features),  # numeric_features is a list of numeric feature column names
+                ('categorical', categorical_transformer, categorical_features)  # categorical_features is a list of categorical feature column names
+            ])
+        # Create the pipeline
+        pipeline = Pipeline(steps=[
+            ('preprocessor', preprocessor),
+            ('model', LinearRegression())
+        ])
+        # Fit the model
+        pipeline.fit(X_train, y_train)
+        # Predict
+        y_pred = pipeline.predict(X_test)
+        # Calculate metrics
+        # mae_scores.append(mean_absolute_error(y_test, y_pred))
+        result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': y_pred}, index=y_test.index)
+        overall_results.append(result_df)
+    df_results = pd.concat(overall_results)
+    uppers = []
+    lowers = []
+    alpha = 0.05
+    for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
+        try:
+            df_q = df_results.iloc[:i]
+            pred = df_results['Predicted'].iloc[-1]
+            errors = df_q['IsTrue'] - df_q['Predicted']
+            positive_errors = errors[errors >= 0]
+            negative_errors = errors[errors < 0]
+            # Calculate bounds
+            upper_bound = pred + np.quantile(positive_errors, 1 - alpha)
+            lower_bound = pred + np.quantile(negative_errors, alpha)
+        except:
+            upper_bound = None
+            lower_bound = None
+        uppers.append(upper_bound)
+        lowers.append(lower_bound)
+    df_results['Upper'] = uppers
+    df_results['Lower'] = lowers
+    df_results = df_results.merge(data[['PrevClose']],left_index=True, right_index=True)
+    df_results['Pred'] = df_results['PrevClose'] * (1 + df_results['Predicted'])
+    df_results['Actual'] = df_results['PrevClose'] * (1 + df_results['IsTrue'])
+    df_results['Up'] = df_results['PrevClose'] * (1 + df_results['Upper'])
+    df_results['Down'] = df_results['PrevClose'] * (1 + df_results['Lower'])
+    results[f'{str(int(candle))}'] = df_results
+    # Average metrics across folds
+    average_mae = mean_absolute_error(df_results['IsTrue'], df_results['Predicted'])
+    # sorted_features = sorted([(feat, coef) for feat, coef in zip(model.feature_name_, model.feature_importances_)], key=lambda x: abs(x[1]), reverse=True)
+    sorted_features = sorted([(feat, coef) for feat, coef in zip(pipeline.feature_names_in_, pipeline.named_steps.model.coef_)], key=lambda x: abs(x[1]), reverse=True)
+    coefs[f'{str(int(candle))}'] = pd.DataFrame(sorted_features, columns=['Feature','Coefficient'])
+    df_consolidated.loc[int(candle), 'MAE'] = average_mae