File size: 2,113 Bytes
2b7b43f 4e07a41 2b7b43f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import polars as pl
import joblib
model = joblib.load('stuff_model/lgbm_model_2020_2024.joblib')
# Read the values from the text file
with open('stuff_model/target_stats.txt', 'r') as file:
lines = file.readlines()
target_mean = float(lines[0].strip())
target_std = float(lines[1].strip())
# Define the features to be used for training
features = ['start_speed',
'spin_rate',
'extension',
'az',
'ax',
'x0',
'z0',
'speed_diff',
'az_diff',
'ax_diff']
def stuff_apply(df:pl.DataFrame) -> pl.DataFrame:
# Filter the dataframe to include only the rows for the year 2024 and drop rows with null values in the specified features and target column
# df_test = df.drop_nulls(subset=features)
df_test = df.clone()
# Predict the target values for the 2024 data using the trained model
df_test = df_test.with_columns(
pl.Series(name="target", values=model.predict(df_test[features].to_numpy()))
)
# Standardize the target column to create a z-score
df_test = df_test.with_columns(
((pl.col('target') - target_mean) / target_std).alias('target_zscore')
)
# Convert the z-score to tj_stuff_plus
df_test = df_test.with_columns(
(100 - (pl.col('target_zscore') * 10)).alias('tj_stuff_plus')
)
df_pitch_types = pl.read_csv('stuff_model/tj_stuff_plus_pitch.csv')
# Join the pitch type statistics with the main DataFrame based on pitch_type
df_pitch_all = df_test.join(df_pitch_types, left_on='pitch_type', right_on='pitch_type')
# Normalize pitch_grade values to a range between -0.5 and 0.5 based on the percentiles
df_pitch_all = df_pitch_all.with_columns(
((pl.col('tj_stuff_plus') - pl.col('mean')) / pl.col('std')).alias('pitch_grade')
)
# Scale the pitch_grade values to a range between 20 and 80
df_pitch_all = df_pitch_all.with_columns(
(pl.col('pitch_grade') * 10 + 50).clip(20, 80)
)
return df_pitch_all |