Spaces:
Sleeping
Sleeping
DB functions and auto handle time of day
Browse files- dbConn.py +42 -0
- getDailyData.py +1 -17
- getIntraData.py +2 -46
- lambda_function.py +66 -9
- model_day_v2.py +12 -7
- model_intra_v2.py +17 -11
dbConn.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
from sqlalchemy import create_engine
|
3 |
+
import pandas as pd
|
4 |
+
import os
|
5 |
+
import MySQLdb
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
engine = create_engine(
|
9 |
+
f"mysql+mysqldb://{os.getenv('DATABASE_USERNAME')}:" \
|
10 |
+
f"{os.getenv('DATABASE_PASSWORD')}@{os.getenv('DATABASE_HOST')}/" \
|
11 |
+
f"{os.getenv('DATABASE')}?ssl_ca=ca-certificates.crt&ssl_mode=VERIFY_IDENTITY"
|
12 |
+
)
|
13 |
+
|
14 |
+
connection = MySQLdb.connect(
|
15 |
+
host=os.getenv("DATABASE_HOST"),
|
16 |
+
user=os.getenv("DATABASE_USERNAME"),
|
17 |
+
passwd=os.getenv("DATABASE_PASSWORD"),
|
18 |
+
db=os.getenv("DATABASE"),
|
19 |
+
autocommit=True,
|
20 |
+
ssl_mode="VERIFY_IDENTITY",
|
21 |
+
ssl={ "ca": "ca-certificates.crt" }
|
22 |
+
)
|
23 |
+
|
24 |
+
# Function to write dataframe to SQL
|
25 |
+
def insert_dataframe_to_sql(table_name, dataframe, cursor):
|
26 |
+
# Prepare the SQL insert statement
|
27 |
+
placeholders = ', '.join(['%s'] * len(dataframe.columns))
|
28 |
+
columns = ', '.join(dataframe.columns)
|
29 |
+
|
30 |
+
# Prepare the ON DUPLICATE KEY UPDATE part of the query
|
31 |
+
update_columns = ', '.join([f"{col} = VALUES({col})" for col in dataframe.columns])
|
32 |
+
|
33 |
+
sql = f"""INSERT INTO {table_name} ({columns}) VALUES ({placeholders})
|
34 |
+
ON DUPLICATE KEY UPDATE {update_columns}"""
|
35 |
+
|
36 |
+
# Convert dataframe to a list of tuples, handling NaN values
|
37 |
+
data = [tuple(row) if not any(pd.isna(val) for val in row)
|
38 |
+
else tuple(None if pd.isna(val) else val for val in row)
|
39 |
+
for row in dataframe.values]
|
40 |
+
|
41 |
+
# Execute the SQL command for each row
|
42 |
+
cursor.executemany(sql, data)
|
getDailyData.py
CHANGED
@@ -2,11 +2,8 @@ import pandas as pd
|
|
2 |
import numpy as np
|
3 |
import yfinance as yf
|
4 |
from tqdm import tqdm
|
5 |
-
import os
|
6 |
from pandas.tseries.offsets import BDay
|
7 |
-
from
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
load_dotenv()
|
10 |
|
11 |
data_start_date = '2018-07-01'
|
12 |
|
@@ -21,13 +18,6 @@ def get_daily(mode='daily', periods_30m=None):
|
|
21 |
vvix = yf.Ticker('^VVIX')
|
22 |
spx = yf.Ticker('^GSPC')
|
23 |
|
24 |
-
# Grab data from db
|
25 |
-
engine = create_engine(
|
26 |
-
f"mysql+mysqldb://{os.getenv('DATABASE_USERNAME')}:" \
|
27 |
-
f"{os.getenv('DATABASE_PASSWORD')}@{os.getenv('DATABASE_HOST')}/" \
|
28 |
-
f"{os.getenv('DATABASE')}?ssl_ca=ca-certificates.crt&ssl_mode=VERIFY_IDENTITY"
|
29 |
-
)
|
30 |
-
|
31 |
query = f'''SELECT
|
32 |
spx.Datetime AS Datetime,
|
33 |
spx.Open AS Open,
|
@@ -266,12 +256,6 @@ def get_daily(mode='daily', periods_30m=None):
|
|
266 |
|
267 |
data['GreenProbas'] = probas
|
268 |
|
269 |
-
engine = create_engine(
|
270 |
-
f"mysql+mysqldb://{os.getenv('DATABASE_USERNAME')}:" \
|
271 |
-
f"{os.getenv('DATABASE_PASSWORD')}@{os.getenv('DATABASE_HOST')}/" \
|
272 |
-
f"{os.getenv('DATABASE')}?ssl_ca=ca-certificates.crt&ssl_mode=VERIFY_IDENTITY"
|
273 |
-
)
|
274 |
-
|
275 |
df_releases = pd.read_sql_query('select * from releases', con=engine)
|
276 |
df_releases = df_releases.set_index('Datetime')
|
277 |
data = data.merge(df_releases, how = 'left', left_index=True, right_index=True)
|
|
|
2 |
import numpy as np
|
3 |
import yfinance as yf
|
4 |
from tqdm import tqdm
|
|
|
5 |
from pandas.tseries.offsets import BDay
|
6 |
+
from dbConn import engine
|
|
|
|
|
7 |
|
8 |
data_start_date = '2018-07-01'
|
9 |
|
|
|
18 |
vvix = yf.Ticker('^VVIX')
|
19 |
spx = yf.Ticker('^GSPC')
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
query = f'''SELECT
|
22 |
spx.Datetime AS Datetime,
|
23 |
spx.Open AS Open,
|
|
|
256 |
|
257 |
data['GreenProbas'] = probas
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
df_releases = pd.read_sql_query('select * from releases', con=engine)
|
260 |
df_releases = df_releases.set_index('Datetime')
|
261 |
data = data.merge(df_releases, how = 'left', left_index=True, right_index=True)
|
getIntraData.py
CHANGED
@@ -1,24 +1,13 @@
|
|
1 |
import pandas as pd
|
2 |
import yfinance as yf
|
3 |
import datetime
|
4 |
-
# from datasets import load_dataset
|
5 |
-
from sqlalchemy import create_engine
|
6 |
-
import os
|
7 |
from getDailyData import data_start_date
|
8 |
-
from
|
9 |
-
|
10 |
-
# Load environment variables from the .env file
|
11 |
-
load_dotenv()
|
12 |
|
13 |
def get_intra(periods_30m = 1):
|
14 |
'''
|
15 |
Method to get historical 30 minute data and append live data to it, if exists.
|
16 |
'''
|
17 |
-
engine = create_engine(
|
18 |
-
f"mysql+mysqldb://{os.getenv('DATABASE_USERNAME')}:" \
|
19 |
-
f"{os.getenv('DATABASE_PASSWORD')}@{os.getenv('DATABASE_HOST')}/" \
|
20 |
-
f"{os.getenv('DATABASE')}?ssl_ca=ca-certificates.crt&ssl_mode=VERIFY_IDENTITY"
|
21 |
-
)
|
22 |
|
23 |
query = f'''SELECT
|
24 |
spx30.Datetime AS Datetime,
|
@@ -44,42 +33,11 @@ def get_intra(periods_30m = 1):
|
|
44 |
spx30.Datetime > '{data_start_date}'
|
45 |
|
46 |
'''
|
47 |
-
# spx30 = pd.read_sql_query(f'SELECT * FROM SPX_full_30min WHERE Datetime > {data_start_date}', con=engine)
|
48 |
-
# vix30 = pd.read_sql_query(f'SELECT * FROM VIX_full_30min WHERE Datetime > {data_start_date}', con=engine)
|
49 |
-
# vvix30 = pd.read_sql_query(f'SELECT * FROM VVIX_full_30min WHERE Datetime > {data_start_date}', con=engine)
|
50 |
-
# dfs = []
|
51 |
|
52 |
df_30m = pd.read_sql_query(sql=query, con=engine.connect())
|
53 |
df_30m['Datetime'] = df_30m['Datetime'].dt.tz_localize('America/New_York')
|
54 |
df_30m = df_30m.set_index('Datetime',drop=True)
|
55 |
|
56 |
-
# for fr in [spx30, vix30, vvix30]:
|
57 |
-
# # fr['Datetime'] = fr['Datetime'].apply(lambda x: datetime.datetime.strptime(x[:-6], dt_format))
|
58 |
-
# fr['Datetime'] = fr['Datetime'].dt.tz_localize('America/New_York')
|
59 |
-
# fr = fr.set_index('Datetime')
|
60 |
-
# fr['Open'] = pd.to_numeric(fr['Open'])
|
61 |
-
# fr['High'] = pd.to_numeric(fr['High'])
|
62 |
-
# fr['Low'] = pd.to_numeric(fr['Low'])
|
63 |
-
# fr['Close'] = pd.to_numeric(fr['Close'])
|
64 |
-
# dfs.append(fr[['Open','High','Low','Close']])
|
65 |
-
|
66 |
-
# df_30m = pd.concat(dfs, axis=1)
|
67 |
-
|
68 |
-
# df_30m.columns = [
|
69 |
-
# 'Open30',
|
70 |
-
# 'High30',
|
71 |
-
# 'Low30',
|
72 |
-
# 'Close30',
|
73 |
-
# 'Open_VIX30',
|
74 |
-
# 'High_VIX30',
|
75 |
-
# 'Low_VIX30',
|
76 |
-
# 'Close_VIX30',
|
77 |
-
# 'Open_VVIX30',
|
78 |
-
# 'High_VVIX30',
|
79 |
-
# 'Low_VVIX30',
|
80 |
-
# 'Close_VVIX30'
|
81 |
-
# ]
|
82 |
-
|
83 |
# Get incremental date
|
84 |
last_date = df_30m.index.date[-1]
|
85 |
last_date = last_date + datetime.timedelta(days=1)
|
@@ -134,6 +92,4 @@ def get_intra(periods_30m = 1):
|
|
134 |
vvix_intra = df_30m.groupby('Datetime')['VVIX30IntraPerf'].tail(1)
|
135 |
|
136 |
df_intra = pd.concat([opens_intra, highs_intra, lows_intra, closes_intra, spx_intra, vix_intra, vvix_intra], axis=1)
|
137 |
-
return df_intra
|
138 |
-
|
139 |
-
|
|
|
1 |
import pandas as pd
|
2 |
import yfinance as yf
|
3 |
import datetime
|
|
|
|
|
|
|
4 |
from getDailyData import data_start_date
|
5 |
+
from dbConn import engine
|
|
|
|
|
|
|
6 |
|
7 |
def get_intra(periods_30m = 1):
|
8 |
'''
|
9 |
Method to get historical 30 minute data and append live data to it, if exists.
|
10 |
'''
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
query = f'''SELECT
|
13 |
spx30.Datetime AS Datetime,
|
|
|
33 |
spx30.Datetime > '{data_start_date}'
|
34 |
|
35 |
'''
|
|
|
|
|
|
|
|
|
36 |
|
37 |
df_30m = pd.read_sql_query(sql=query, con=engine.connect())
|
38 |
df_30m['Datetime'] = df_30m['Datetime'].dt.tz_localize('America/New_York')
|
39 |
df_30m = df_30m.set_index('Datetime',drop=True)
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# Get incremental date
|
42 |
last_date = df_30m.index.date[-1]
|
43 |
last_date = last_date + datetime.timedelta(days=1)
|
|
|
92 |
vvix_intra = df_30m.groupby('Datetime')['VVIX30IntraPerf'].tail(1)
|
93 |
|
94 |
df_intra = pd.concat([opens_intra, highs_intra, lows_intra, closes_intra, spx_intra, vix_intra, vvix_intra], axis=1)
|
95 |
+
return df_intra
|
|
|
|
lambda_function.py
CHANGED
@@ -1,17 +1,74 @@
|
|
1 |
# Function should get the data and run the whole model, return a single prediction based on the time
|
2 |
from getDailyData import get_daily
|
3 |
from model_intra_v3 import walk_forward_validation
|
|
|
|
|
|
|
4 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def lambda_handler(periods_30m):
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
if __name__ == '__main__':
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Function should get the data and run the whole model, return a single prediction based on the time
|
2 |
from getDailyData import get_daily
|
3 |
from model_intra_v3 import walk_forward_validation
|
4 |
+
from model_day_v2 import walk_forward_validation_seq as walk_forward_validation_daily
|
5 |
+
from datetime import timedelta
|
6 |
+
import pandas as pd
|
7 |
import json
|
8 |
+
from dbConn import connection, engine, insert_dataframe_to_sql
|
9 |
+
import numpy as np
|
10 |
+
import datetime
|
11 |
+
from datetime import time
|
12 |
+
import datetime
|
13 |
+
from pandas.tseries.offsets import BDay
|
14 |
|
15 |
def lambda_handler(periods_30m):
|
16 |
+
if periods_30m > 0:
|
17 |
+
data, df_final, final_row = get_daily(mode='intra', periods_30m=periods_30m)
|
18 |
+
res, _ = walk_forward_validation(df_final.drop(columns=['Target']).dropna(), 'Target_clf', 1, mode='single')
|
19 |
+
|
20 |
+
elif periods_30m == 0:
|
21 |
+
data, df_final, final_row = get_daily()
|
22 |
+
res, _, _ = walk_forward_validation_daily(df_final.dropna(), 'Target_clf', 'Target', 200, 1)
|
23 |
+
|
24 |
+
# Get results, run calibration and pvalue
|
25 |
+
df_results = pd.read_sql_query(f'select * from results where ModelNum = {str(periods_30m)}', con = engine)
|
26 |
+
|
27 |
+
# Calibrate Probabilities
|
28 |
+
def get_quantiles(df, col_name, q):
|
29 |
+
return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()
|
30 |
+
|
31 |
+
pct = res['Predicted'].iloc[-1]
|
32 |
+
|
33 |
+
df_q = get_quantiles(df_results, 'Predicted', 10)
|
34 |
+
for q in df_q.index:
|
35 |
+
if q.left <= pct <= q.right:
|
36 |
+
p = df_q[q]
|
37 |
+
|
38 |
+
calib_scores = np.abs(df_results['Predicted'].iloc[:-1] - 0.5)
|
39 |
+
score = abs(pct - 0.5)
|
40 |
+
pv = np.mean(calib_scores >= score)
|
41 |
+
asof = datetime.combine(data.index[-1], time(9,30)) + (periods_30m * timedelta(minutes=30))
|
42 |
+
|
43 |
+
blob = {
|
44 |
+
'Datetime': str(res.index[-1]),
|
45 |
+
'IsTrue':df_final['Target_clf'].iloc[-1],
|
46 |
+
'Predicted': pct,
|
47 |
+
'CalibPredicted': p,
|
48 |
+
'Pvalue':pv,
|
49 |
+
'ModelNum':periods_30m,
|
50 |
+
'AsOf':str(asof)
|
51 |
+
}
|
52 |
+
|
53 |
+
# Write to DB
|
54 |
+
df_write = pd.DataFrame.from_dict({k:[v] for k, v in blob.items()})
|
55 |
+
cursor = connection.cursor()
|
56 |
+
insert_dataframe_to_sql('results', df_write, cursor)
|
57 |
+
cursor.close()
|
58 |
+
connection.close()
|
59 |
+
|
60 |
+
return json.loads(json.dumps(blob))
|
61 |
|
62 |
if __name__ == '__main__':
|
63 |
+
# Code that, based on the time of the day, return which data/model to run
|
64 |
+
from datetime import datetime, time
|
65 |
+
now = datetime.now()
|
66 |
+
morning_start = datetime.combine(now.date(), time(6, 30))
|
67 |
+
delta = now - morning_start
|
68 |
+
intervals = max(1,min((delta.total_seconds() / 60 / 30) // 1, 12))
|
69 |
+
print(f'running for {str(intervals)}')
|
70 |
+
j = lambda_handler(intervals)
|
71 |
+
# times_list = np.arange(0,13)
|
72 |
+
# for i in times_list:
|
73 |
+
# j = lambda_handler(i)
|
74 |
+
# print(j)
|
model_day_v2.py
CHANGED
@@ -3,6 +3,7 @@ from tqdm import tqdm
|
|
3 |
from sklearn import linear_model
|
4 |
import lightgbm as lgb
|
5 |
from dailyCols import model_cols
|
|
|
6 |
|
7 |
def walk_forward_validation(df, target_column, num_training_rows, num_periods):
|
8 |
|
@@ -84,22 +85,26 @@ def walk_forward_validation_seq(df, target_column_clf, target_column_regr, num_t
|
|
84 |
return df.groupby(pd.cut(df[col_name], q))['True'].mean()
|
85 |
|
86 |
greenprobas = []
|
87 |
-
|
88 |
-
for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas'):
|
89 |
try:
|
90 |
-
df_q = get_quantiles(df_results.iloc[:i], 'Predicted',
|
91 |
for q in df_q.index:
|
92 |
if q.left <= pct <= q.right:
|
93 |
p = df_q[q]
|
94 |
-
|
|
|
|
|
|
|
95 |
except:
|
96 |
p = None
|
97 |
-
|
98 |
|
99 |
greenprobas.append(p)
|
100 |
-
|
101 |
-
|
102 |
df_results['CalibPredicted'] = greenprobas
|
|
|
103 |
|
104 |
return df_results, model1, model2
|
105 |
|
|
|
3 |
from sklearn import linear_model
|
4 |
import lightgbm as lgb
|
5 |
from dailyCols import model_cols
|
6 |
+
import numpy as np
|
7 |
|
8 |
def walk_forward_validation(df, target_column, num_training_rows, num_periods):
|
9 |
|
|
|
85 |
return df.groupby(pd.cut(df[col_name], q))['True'].mean()
|
86 |
|
87 |
greenprobas = []
|
88 |
+
pvals = []
|
89 |
+
for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
|
90 |
try:
|
91 |
+
df_q = get_quantiles(df_results.iloc[:i], 'Predicted', 10)
|
92 |
for q in df_q.index:
|
93 |
if q.left <= pct <= q.right:
|
94 |
p = df_q[q]
|
95 |
+
|
96 |
+
calib_scores = np.abs(df_results['Predicted'].iloc[:i] - 0.5)
|
97 |
+
score = abs(df_results['Predicted'].iloc[i] - 0.5)
|
98 |
+
pv = np.mean(calib_scores >= score)
|
99 |
except:
|
100 |
p = None
|
101 |
+
pv = None
|
102 |
|
103 |
greenprobas.append(p)
|
104 |
+
pvals.append(pv)
|
105 |
+
|
106 |
df_results['CalibPredicted'] = greenprobas
|
107 |
+
df_results['Pvalue'] = pvals
|
108 |
|
109 |
return df_results, model1, model2
|
110 |
|
model_intra_v2.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import pandas as pd
|
|
|
2 |
from tqdm import tqdm
|
3 |
import lightgbm as lgb
|
4 |
from sklearn.model_selection import TimeSeriesSplit
|
@@ -29,29 +30,36 @@ def walk_forward_validation(df, target_column, num_periods, mode='full'):
|
|
29 |
predictions = model.predict_proba(X_test)[:,-1]
|
30 |
|
31 |
# Create a DataFrame to store the true and predicted values
|
32 |
-
result_df = pd.DataFrame({'
|
33 |
overall_results.append(result_df)
|
34 |
-
df_results = pd.concat(overall_results)
|
35 |
|
|
|
|
|
36 |
# Calibrate Probabilities
|
37 |
def get_quantiles(df, col_name, q):
|
38 |
-
return df.groupby(pd.cut(df[col_name], q))['
|
39 |
|
40 |
greenprobas = []
|
|
|
41 |
for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
|
42 |
try:
|
43 |
-
df_q = get_quantiles(df_results.iloc[:i], 'Predicted',
|
44 |
for q in df_q.index:
|
45 |
if q.left <= pct <= q.right:
|
46 |
p = df_q[q]
|
|
|
|
|
|
|
|
|
47 |
except:
|
48 |
p = None
|
|
|
49 |
|
50 |
greenprobas.append(p)
|
51 |
-
|
|
|
52 |
df_results['CalibPredicted'] = greenprobas
|
53 |
-
|
54 |
-
return df_results, model
|
55 |
|
56 |
elif mode == 'single':
|
57 |
X_train = df.drop(target_column, axis=1).iloc[:-1]
|
@@ -62,13 +70,11 @@ def walk_forward_validation(df, target_column, num_periods, mode='full'):
|
|
62 |
model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
|
63 |
model.fit(X_train, y_train)
|
64 |
predictions = model.predict_proba(X_test.values.reshape(1, -1))[:,-1]
|
65 |
-
|
66 |
|
67 |
-
|
68 |
|
69 |
|
70 |
-
|
71 |
-
|
72 |
def seq_predict_proba(df, trained_clf_model):
|
73 |
clf_pred_proba = trained_clf_model.predict_proba(df[model_cols])[:,-1]
|
74 |
return clf_pred_proba
|
|
|
1 |
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
from tqdm import tqdm
|
4 |
import lightgbm as lgb
|
5 |
from sklearn.model_selection import TimeSeriesSplit
|
|
|
30 |
predictions = model.predict_proba(X_test)[:,-1]
|
31 |
|
32 |
# Create a DataFrame to store the true and predicted values
|
33 |
+
result_df = pd.DataFrame({'IsTrue': y_test, 'Predicted': predictions}, index=y_test.index)
|
34 |
overall_results.append(result_df)
|
|
|
35 |
|
36 |
+
df_results = pd.concat(overall_results)
|
37 |
+
|
38 |
# Calibrate Probabilities
|
39 |
def get_quantiles(df, col_name, q):
|
40 |
+
return df.groupby(pd.cut(df[col_name], q))['IsTrue'].mean()
|
41 |
|
42 |
greenprobas = []
|
43 |
+
pvals = []
|
44 |
for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
|
45 |
try:
|
46 |
+
df_q = get_quantiles(df_results.iloc[:i], 'Predicted', 10)
|
47 |
for q in df_q.index:
|
48 |
if q.left <= pct <= q.right:
|
49 |
p = df_q[q]
|
50 |
+
|
51 |
+
calib_scores = np.abs(df_results['Predicted'].iloc[:i] - 0.5)
|
52 |
+
score = abs(df_results['Predicted'].iloc[i] - 0.5)
|
53 |
+
pv = np.mean(calib_scores >= score)
|
54 |
except:
|
55 |
p = None
|
56 |
+
pv = None
|
57 |
|
58 |
greenprobas.append(p)
|
59 |
+
pvals.append(pv)
|
60 |
+
|
61 |
df_results['CalibPredicted'] = greenprobas
|
62 |
+
df_results['Pvalue'] = pvals
|
|
|
63 |
|
64 |
elif mode == 'single':
|
65 |
X_train = df.drop(target_column, axis=1).iloc[:-1]
|
|
|
70 |
model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
|
71 |
model.fit(X_train, y_train)
|
72 |
predictions = model.predict_proba(X_test.values.reshape(1, -1))[:,-1]
|
73 |
+
df_results = pd.DataFrame({'IsTrue': y_test, 'Predicted': predictions}, index=[df.index[-1]])
|
74 |
|
75 |
+
return df_results, model
|
76 |
|
77 |
|
|
|
|
|
78 |
def seq_predict_proba(df, trained_clf_model):
|
79 |
clf_pred_proba = trained_clf_model.predict_proba(df[model_cols])[:,-1]
|
80 |
return clf_pred_proba
|