|
import polars as pl |
|
import api_scraper |
|
mlb_scrape = api_scraper.MLB_Scrape() |
|
|
|
from stuff_model import * |
|
from shiny import App, reactive, ui, render |
|
from shiny.ui import h2, tags |
|
from api_scraper import MLB_Scrape |
|
import datetime |
|
from stuff_model import feature_engineering as fe |
|
from stuff_model import stuff_apply |
|
from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme |
|
theme.tabulator_site() |
|
scraper = MLB_Scrape() |
|
|
|
df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet') |
|
|
|
pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name'])) |
|
|
|
|
|
|
|
|
|
app_ui = ui.page_fluid( |
|
ui.card( |
|
ui.card_header("2025 Spring Training Pitch Data App"), |
|
ui.row( |
|
ui.column(4, |
|
ui.markdown("""This app generates a table which shows the 2025 Spring Training data. |
|
|
|
* Differences are calculated based on 2024 regular season data |
|
* If 2024 data does not exist for pitcher, 2023 Data is used |
|
* If no difference exists, the pitch is labelled as a new pitch"""), |
|
|
|
|
|
ui.input_action_button( |
|
"refresh", |
|
"Refresh Data", |
|
class_="btn-primary", |
|
width="100%" |
|
) |
|
), |
|
ui.column(3, |
|
ui.div( |
|
"By: ", |
|
ui.tags.a( |
|
"@TJStats", |
|
href="https://x.com/TJStats", |
|
target="_blank" |
|
) |
|
), |
|
ui.tags.p("Data: MLB"), |
|
ui.tags.p( |
|
ui.tags.a( |
|
"Support me on Patreon for more baseball content", |
|
href="https://www.patreon.com/TJ_Stats", |
|
target="_blank" |
|
) |
|
) |
|
) |
|
), |
|
ui.navset_tab( |
|
ui.nav("All Pitches", |
|
ui.row(ui.column(1,ui.download_button("download_all", "Download Data", class_="btn-sm mb-3")), |
|
|
|
ui.column(2, |
|
ui.div( |
|
{"class": "input-group"}, |
|
ui.span("Pitches >=", class_="input-label"), |
|
ui.input_numeric(id='pitches_all_min', label='', value=1, min=1, width="100px") |
|
) |
|
)), |
|
|
|
output_tabulator("table_all") |
|
), |
|
ui.nav("Daily Pitches", |
|
ui.row( |
|
ui.column(2, |
|
ui.div( |
|
{"class": "input-group"}, |
|
ui.span("Pitches >=", class_="input-label"), |
|
ui.input_numeric(id='pitches_daily_min', label='', value=1, min=1, width="100px") |
|
) |
|
)), |
|
|
|
|
|
output_tabulator("table_daily") |
|
), |
|
ui.nav("tjStuff+", |
|
|
|
ui.row( |
|
ui.column(2, |
|
ui.div( |
|
{"class": "input-group"}, |
|
ui.span("Pitches >=", class_="input-label"), |
|
ui.input_numeric(id='pitches_tjstuff_min', label='', value=1, min=1, width="100px") |
|
) |
|
)), |
|
output_tabulator("table_tjstuff") |
|
), |
|
ui.nav("tjStuff+ Summary", |
|
ui.row(ui.column(1,ui.download_button("download_tjsumm", "Download Data", class_="btn-sm mb-3")), |
|
|
|
ui.column(2, |
|
ui.div( |
|
{"class": "input-group"}, |
|
ui.span("Pitches >=", class_="input-label"), |
|
ui.input_numeric(id='pitches_tjsumm_min', label='', value=1, min=1, width="100px") |
|
) |
|
)), |
|
|
|
output_tabulator("table_stuff_all") |
|
|
|
), |
|
ui.nav("tjStuff+ Team", |
|
|
|
ui.row( |
|
ui.column(2, |
|
)), |
|
output_tabulator("table_tjstuff_team") |
|
), |
|
|
|
) |
|
) |
|
) |
|
|
|
def server(input, output, session): |
|
|
|
|
|
@reactive.Calc |
|
def spring_data(): |
|
|
|
import polars as pl |
|
df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet") |
|
|
|
|
|
date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date() |
|
print(datetime.datetime.now()) |
|
date_str = date.strftime('%Y-%m-%d') |
|
|
|
|
|
|
|
game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S']) |
|
.filter(pl.col('date') == date)['game_id']) |
|
|
|
data = scraper.get_data(game_list_input) |
|
df = scraper.get_data_df(data) |
|
|
|
df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True) |
|
|
|
return df_spring.filter(pl.col('start_speed')>0) |
|
|
|
@reactive.Calc |
|
def ts_data(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
|
|
import polars as pl |
|
|
|
|
|
df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg( |
|
pl.col("start_speed").count().alias("pitcher_total") |
|
) |
|
|
|
df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([ |
|
pl.col('start_speed').count().alias('count'), |
|
pl.col('start_speed').mean().alias('start_speed'), |
|
pl.col('start_speed').max().alias('max_start_speed'), |
|
pl.col('ivb').mean().alias('ivb'), |
|
pl.col('hb').mean().alias('hb'), |
|
pl.col('release_pos_z').mean().alias('release_pos_z'), |
|
pl.col('release_pos_x').mean().alias('release_pos_x'), |
|
pl.col('extension').mean().alias('extension'), |
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), |
|
(pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'), |
|
(pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count') |
|
]) |
|
|
|
|
|
df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left") |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns( |
|
(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent") |
|
) |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns([ |
|
(pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"), |
|
(pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent") |
|
]) |
|
|
|
df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old') |
|
|
|
|
|
df_merge = df_merge.with_columns( |
|
pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old') |
|
) |
|
|
|
df_merge = df_merge.with_columns( |
|
pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old')) |
|
.then(pl.lit(True)) |
|
.otherwise(pl.lit(None)) |
|
.alias("new_pitch") |
|
) |
|
|
|
df_merge = df_merge.select([ |
|
'pitcher_id', |
|
'pitcher_name', |
|
'pitch_type', |
|
'count', |
|
'pitch_percent', |
|
'rhh_percent', |
|
'lhh_percent', |
|
'start_speed', |
|
'max_start_speed', |
|
'ivb', |
|
'hb', |
|
'release_pos_z', |
|
'release_pos_x', |
|
'extension', |
|
'tj_stuff_plus', |
|
]) |
|
|
|
return df_merge |
|
|
|
@reactive.Calc |
|
def ts_data(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
import polars as pl |
|
|
|
|
|
df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg( |
|
pl.col("start_speed").count().alias("pitcher_total") |
|
) |
|
|
|
df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([ |
|
pl.col('start_speed').count().alias('count'), |
|
pl.col('start_speed').mean().alias('start_speed'), |
|
pl.col('start_speed').max().alias('max_start_speed'), |
|
pl.col('ivb').mean().alias('ivb'), |
|
pl.col('hb').mean().alias('hb'), |
|
pl.col('release_pos_z').mean().alias('release_pos_z'), |
|
pl.col('release_pos_x').mean().alias('release_pos_x'), |
|
pl.col('extension').mean().alias('extension'), |
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), |
|
(pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'), |
|
(pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count') |
|
]) |
|
|
|
|
|
df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left") |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns( |
|
(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent") |
|
) |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns([ |
|
(pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"), |
|
(pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent") |
|
]) |
|
|
|
df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old') |
|
|
|
|
|
df_merge = df_merge.with_columns( |
|
pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old') |
|
) |
|
|
|
df_merge = df_merge.with_columns( |
|
pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old')) |
|
.then(pl.lit(True)) |
|
.otherwise(pl.lit(None)) |
|
.alias("new_pitch") |
|
) |
|
|
|
df_merge = df_merge.select([ |
|
'pitcher_id', |
|
'pitcher_name', |
|
'pitch_type', |
|
'count', |
|
'pitch_percent', |
|
'rhh_percent', |
|
'lhh_percent', |
|
'start_speed', |
|
'max_start_speed', |
|
'ivb', |
|
'hb', |
|
'release_pos_z', |
|
'release_pos_x', |
|
'extension', |
|
'tj_stuff_plus', |
|
]) |
|
|
|
return df_merge |
|
|
|
@reactive.Calc |
|
def ts_data_summ(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_agg_2024_pitch = df_spring_stuff.group_by(['pitcher_id','pitcher_name', 'pitch_type']).agg( |
|
pl.col('tj_stuff_plus').len().alias('count'), |
|
pl.col('tj_stuff_plus').mean() |
|
) |
|
|
|
|
|
df_weighted_avg = df_agg_2024_pitch.with_columns( |
|
(pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus') |
|
).group_by(['pitcher_id', 'pitcher_name']).agg( |
|
pl.col('count').sum().alias('total_count'), |
|
pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus') |
|
).with_columns( |
|
(pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus') |
|
).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count']) |
|
|
|
|
|
df_weighted_avg = df_weighted_avg.with_columns( |
|
pl.lit("All").alias('pitch_type') |
|
) |
|
|
|
|
|
df_weighted_avg = df_weighted_avg.select([ |
|
'pitcher_id', |
|
'pitcher_name', |
|
|
|
'pitch_type', |
|
pl.col('total_count').alias('count'), |
|
'tj_stuff_plus' |
|
]) |
|
|
|
|
|
df_small = pl.concat([df_agg_2024_pitch, df_weighted_avg]) |
|
|
|
df_game_count = df_spring_stuff.group_by(['pitcher_id']).agg( |
|
|
|
(((pl.col('game_id').count())).alias('pitches')/((pl.col('game_id').n_unique()))).alias('pitches_per_game'), |
|
) |
|
|
|
|
|
|
|
count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'], |
|
df_small.filter(pl.col('pitch_type')=='All')['count'])) |
|
|
|
|
|
df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'], |
|
columns='pitch_type', |
|
values='tj_stuff_plus').with_columns( |
|
pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count"))) |
|
|
|
|
|
for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']: |
|
if col not in df_small_pivot.columns: |
|
df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col)) |
|
|
|
df_small_pivot.select(['pitcher_id','pitcher_name','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True) |
|
|
|
return df_small_pivot |
|
|
|
|
|
|
|
@session.download(filename="data.csv") |
|
def download_all(): |
|
yield ts_data().write_csv() |
|
|
|
|
|
@session.download(filename="data_tjstuff.csv") |
|
def download_tjsumm(): |
|
yield ts_data_summ().write_csv() |
|
|
|
@output |
|
@render_tabulator |
|
@reactive.event(input.refresh) |
|
def table_all(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
|
|
import polars as pl |
|
|
|
|
|
df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg( |
|
pl.col("start_speed").count().alias("pitcher_total") |
|
) |
|
|
|
df_pitcher_totals_hands = ( |
|
df_spring_stuff |
|
.group_by(["pitcher_id", "batter_hand"]) |
|
.agg(pl.col("start_speed").count().alias("pitcher_total")) |
|
.pivot( |
|
values="pitcher_total", |
|
index="pitcher_id", |
|
columns="batter_hand", |
|
aggregate_function="sum" |
|
) |
|
.rename({"L": "pitcher_total_left", "R": "pitcher_total_right"}) |
|
.fill_null(0) |
|
) |
|
|
|
|
|
df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([ |
|
pl.col('start_speed').count().alias('count'), |
|
pl.col('start_speed').mean().alias('start_speed'), |
|
pl.col('start_speed').max().alias('max_start_speed'), |
|
pl.col('ivb').mean().alias('ivb'), |
|
pl.col('hb').mean().alias('hb'), |
|
pl.col('release_pos_z').mean().alias('release_pos_z'), |
|
pl.col('release_pos_x').mean().alias('release_pos_x'), |
|
pl.col('extension').mean().alias('extension'), |
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), |
|
(pl.col("batter_hand").eq("R").sum()).alias("rhh_count"), |
|
(pl.col("batter_hand").eq("L").sum()).alias("lhh_count") |
|
]) |
|
|
|
|
|
df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left") |
|
df_spring_group = df_spring_group.join(df_pitcher_totals_hands, on=["pitcher_id"], how="left") |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns( |
|
(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent") |
|
) |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns([ |
|
(pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"), |
|
(pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent") |
|
]) |
|
|
|
|
|
df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old') |
|
|
|
|
|
df_merge = df_merge.with_columns( |
|
pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old') |
|
) |
|
|
|
df_merge = df_merge.with_columns( |
|
pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old')) |
|
.then(pl.lit(True)) |
|
.otherwise(pl.lit(None)) |
|
.alias("new_pitch") |
|
) |
|
|
|
import polars as pl |
|
|
|
|
|
cols_to_subtract = [ |
|
("start_speed", "start_speed_old"), |
|
("max_start_speed", "max_start_speed_old"), |
|
("ivb", "ivb_old"), |
|
("hb", "hb_old"), |
|
("release_pos_z", "release_pos_z_old"), |
|
("release_pos_x", "release_pos_x_old"), |
|
("extension", "extension_old"), |
|
("tj_stuff_plus", "tj_stuff_plus_old") |
|
] |
|
|
|
df_merge = df_merge.with_columns([ |
|
|
|
pl.when(pl.col(old).is_null()) |
|
.then(pl.lit(10000)) |
|
.otherwise(pl.col(new) - pl.col(old)) |
|
.alias(new + "_diff") |
|
for new, old in cols_to_subtract |
|
]) |
|
|
|
|
|
df_merge = df_merge.with_columns([ |
|
pl.when(pl.col(new + "_diff").eq(10000)) |
|
.then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') |
|
.otherwise( |
|
pl.col(new).round(1).cast(pl.Utf8) + |
|
"\n(" + |
|
pl.col(new + "_diff").round(1) |
|
.map_elements(lambda x: f"{x:+.1f}") + |
|
")" |
|
).alias(new + "_formatted") |
|
for new, _ in cols_to_subtract |
|
]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent'] |
|
|
|
df_merge = df_merge.with_columns([ |
|
(pl.col(col) * 100) |
|
.round(1) |
|
.map_elements(lambda x: f"{x:.1f}%") |
|
.alias(col + "_formatted") |
|
for col in percent_cols |
|
]).sort(['pitcher_id','count'],descending=True) |
|
|
|
|
|
columns = [ |
|
{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,}, |
|
{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input","contextMenu":True}, |
|
{ "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"}, |
|
{ "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"}, |
|
{ "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"}, |
|
{ "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" } |
|
] |
|
|
|
|
|
|
|
df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_all_min())) |
|
|
|
|
|
df_plot = df_merge.to_pandas() |
|
|
|
team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team'])) |
|
df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict) |
|
|
|
|
|
|
|
return Tabulator( |
|
df_plot, |
|
|
|
table_options=TableOptions( |
|
height=750, |
|
|
|
columns=columns, |
|
) |
|
) |
|
|
|
|
|
@output |
|
@render_tabulator |
|
@reactive.event(input.refresh) |
|
def table_daily(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
|
|
import polars as pl |
|
|
|
|
|
df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg( |
|
pl.col("start_speed").count().alias("pitcher_total") |
|
) |
|
|
|
df_pitcher_totals_hands = ( |
|
df_spring_stuff |
|
.group_by(["pitcher_id", "batter_hand"]) |
|
.agg(pl.col("start_speed").count().alias("pitcher_total")) |
|
.pivot( |
|
values="pitcher_total", |
|
index="pitcher_id", |
|
columns="batter_hand", |
|
aggregate_function="sum" |
|
) |
|
.rename({"L": "pitcher_total_left", "R": "pitcher_total_right"}) |
|
.fill_null(0) |
|
) |
|
|
|
|
|
df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([ |
|
pl.col('start_speed').count().alias('count'), |
|
pl.col('start_speed').mean().alias('start_speed'), |
|
pl.col('start_speed').max().alias('max_start_speed'), |
|
pl.col('ivb').mean().alias('ivb'), |
|
pl.col('hb').mean().alias('hb'), |
|
pl.col('release_pos_z').mean().alias('release_pos_z'), |
|
pl.col('release_pos_x').mean().alias('release_pos_x'), |
|
pl.col('extension').mean().alias('extension'), |
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), |
|
(pl.col("batter_hand").eq("R").sum()).alias("rhh_count"), |
|
(pl.col("batter_hand").eq("L").sum()).alias("lhh_count") |
|
]) |
|
|
|
|
|
df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left") |
|
df_spring_group = df_spring_group.join(df_pitcher_totals_hands, on=["pitcher_id"], how="left") |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns( |
|
(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent") |
|
) |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns([ |
|
(pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"), |
|
(pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent") |
|
]) |
|
|
|
|
|
df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old') |
|
|
|
|
|
df_merge = df_merge.with_columns( |
|
pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old') |
|
) |
|
|
|
df_merge = df_merge.with_columns( |
|
pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old')) |
|
.then(pl.lit(True)) |
|
.otherwise(pl.lit(None)) |
|
.alias("new_pitch") |
|
) |
|
|
|
import polars as pl |
|
|
|
|
|
cols_to_subtract = [ |
|
("start_speed", "start_speed_old"), |
|
("max_start_speed", "max_start_speed_old"), |
|
("ivb", "ivb_old"), |
|
("hb", "hb_old"), |
|
("release_pos_z", "release_pos_z_old"), |
|
("release_pos_x", "release_pos_x_old"), |
|
("extension", "extension_old"), |
|
("tj_stuff_plus", "tj_stuff_plus_old") |
|
] |
|
|
|
df_merge = df_merge.with_columns([ |
|
|
|
pl.when(pl.col(old).is_null()) |
|
.then(pl.lit(10000)) |
|
.otherwise(pl.col(new) - pl.col(old)) |
|
.alias(new + "_diff") |
|
for new, old in cols_to_subtract |
|
]) |
|
|
|
|
|
df_merge = df_merge.with_columns([ |
|
pl.when(pl.col(new + "_diff").eq(10000)) |
|
.then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') |
|
.otherwise( |
|
pl.col(new).round(1).cast(pl.Utf8) + |
|
"\n(" + |
|
pl.col(new + "_diff").round(1) |
|
.map_elements(lambda x: f"{x:+.1f}") + |
|
")" |
|
).alias(new + "_formatted") |
|
for new, _ in cols_to_subtract |
|
]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent'] |
|
|
|
df_merge = df_merge.with_columns([ |
|
(pl.col(col) * 100) |
|
.round(1) |
|
.map_elements(lambda x: f"{x:.1f}%") |
|
.alias(col + "_formatted") |
|
for col in percent_cols |
|
]).sort(['pitcher_id','count'],descending=True) |
|
|
|
|
|
columns = [ |
|
{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,}, |
|
{ "title": "Date", "field": "game_date", "width": 100, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"}, |
|
{ "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"}, |
|
{ "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"}, |
|
{ "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"}, |
|
{ "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" } |
|
] |
|
|
|
|
|
df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_daily_min())) |
|
|
|
df_plot = df_merge.to_pandas() |
|
|
|
team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team'])) |
|
df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict) |
|
|
|
|
|
|
|
return Tabulator( |
|
df_plot, |
|
|
|
table_options=TableOptions( |
|
height=750, |
|
|
|
columns=columns, |
|
) |
|
) |
|
|
|
@output |
|
@render_tabulator |
|
@reactive.event(input.refresh) |
|
def table_tjstuff(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
|
|
import polars as pl |
|
|
|
|
|
df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg( |
|
pl.col("start_speed").count().alias("pitcher_total") |
|
) |
|
|
|
df_pitcher_totals_hands = ( |
|
df_spring_stuff |
|
.group_by(["pitcher_id", "batter_hand"]) |
|
.agg(pl.col("start_speed").count().alias("pitcher_total")) |
|
.pivot( |
|
values="pitcher_total", |
|
index="pitcher_id", |
|
columns="batter_hand", |
|
aggregate_function="sum" |
|
) |
|
.rename({"L": "pitcher_total_left", "R": "pitcher_total_right"}) |
|
.fill_null(0) |
|
) |
|
|
|
|
|
df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([ |
|
pl.col('start_speed').count().alias('count'), |
|
pl.col('start_speed').mean().alias('start_speed'), |
|
pl.col('start_speed').max().alias('max_start_speed'), |
|
pl.col('ivb').mean().alias('ivb'), |
|
pl.col('hb').mean().alias('hb'), |
|
pl.col('release_pos_z').mean().alias('release_pos_z'), |
|
pl.col('release_pos_x').mean().alias('release_pos_x'), |
|
pl.col('extension').mean().alias('extension'), |
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), |
|
(pl.col("batter_hand").eq("R").sum()).alias("rhh_count"), |
|
(pl.col("batter_hand").eq("L").sum()).alias("lhh_count") |
|
]) |
|
|
|
|
|
df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left") |
|
df_spring_group = df_spring_group.join(df_pitcher_totals_hands, on=["pitcher_id"], how="left") |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns( |
|
(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent") |
|
) |
|
|
|
|
|
df_spring_group = df_spring_group.with_columns([ |
|
(pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"), |
|
(pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent") |
|
]) |
|
|
|
df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old') |
|
|
|
|
|
df_merge = df_merge.with_columns( |
|
pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old') |
|
) |
|
|
|
df_merge = df_merge.with_columns( |
|
pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old')) |
|
.then(pl.lit(True)) |
|
.otherwise(pl.lit(None)) |
|
.alias("new_pitch") |
|
) |
|
|
|
import polars as pl |
|
|
|
|
|
cols_to_subtract = [ |
|
("start_speed", "start_speed_old"), |
|
("max_start_speed", "max_start_speed_old"), |
|
("ivb", "ivb_old"), |
|
("hb", "hb_old"), |
|
("release_pos_z", "release_pos_z_old"), |
|
("release_pos_x", "release_pos_x_old"), |
|
("extension", "extension_old"), |
|
("tj_stuff_plus", "tj_stuff_plus_old") |
|
] |
|
|
|
df_merge = df_merge.with_columns([ |
|
|
|
pl.when(pl.col(old).is_null()) |
|
.then(pl.lit(None)) |
|
.otherwise(pl.col(new) - pl.col(old)) |
|
.alias(new + "_diff") |
|
for new, old in cols_to_subtract |
|
]) |
|
|
|
|
|
|
|
df_merge = df_merge.with_columns([ |
|
|
|
pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted") |
|
for new, _ in cols_to_subtract |
|
]) |
|
|
|
|
|
|
|
df_merge = df_merge.with_columns([ |
|
pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"), |
|
pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff") |
|
]) |
|
|
|
|
|
|
|
percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent'] |
|
|
|
df_merge = df_merge.with_columns([ |
|
(pl.col(col) * 100) |
|
.round(1) |
|
.map_elements(lambda x: f"{x:.1f}%") |
|
.alias(col + "_formatted") |
|
for col in percent_cols |
|
]).sort(['pitcher_id','count'],descending=True) |
|
|
|
|
|
|
|
|
|
columns = [ |
|
{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,}, |
|
{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"}, |
|
{ "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},+ |
|
{ "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"}, |
|
{ "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"}, |
|
{ "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" }, |
|
{ "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" } |
|
] |
|
|
|
df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_tjstuff_min())) |
|
|
|
df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas() |
|
|
|
team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team'])) |
|
df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict) |
|
|
|
|
|
|
|
return Tabulator( |
|
df_plot, |
|
|
|
table_options=TableOptions( |
|
height=750, |
|
|
|
columns=columns, |
|
) |
|
) |
|
|
|
@output |
|
@render_tabulator |
|
@reactive.event(input.refresh) |
|
def table_stuff_all(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_agg_2024_pitch = df_spring_stuff.group_by(['pitcher_id','pitcher_name', 'pitch_type']).agg( |
|
pl.col('tj_stuff_plus').len().alias('count'), |
|
pl.col('tj_stuff_plus').mean() |
|
) |
|
|
|
|
|
df_weighted_avg = df_agg_2024_pitch.with_columns( |
|
(pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus') |
|
).group_by(['pitcher_id', 'pitcher_name']).agg( |
|
pl.col('count').sum().alias('total_count'), |
|
pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus') |
|
).with_columns( |
|
(pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus') |
|
).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count']) |
|
|
|
|
|
df_weighted_avg = df_weighted_avg.with_columns( |
|
pl.lit("All").alias('pitch_type') |
|
) |
|
|
|
|
|
df_weighted_avg = df_weighted_avg.select([ |
|
'pitcher_id', |
|
'pitcher_name', |
|
|
|
'pitch_type', |
|
pl.col('total_count').alias('count'), |
|
'tj_stuff_plus' |
|
]) |
|
|
|
|
|
df_small = pl.concat([df_agg_2024_pitch, df_weighted_avg]) |
|
|
|
df_game_count = df_spring_stuff.group_by(['pitcher_id']).agg( |
|
|
|
(((pl.col('game_id').count())).alias('pitches')/((pl.col('game_id').n_unique()))).alias('pitches_per_game'), |
|
) |
|
|
|
|
|
|
|
count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'], |
|
df_small.filter(pl.col('pitch_type')=='All')['count'])) |
|
|
|
|
|
df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'], |
|
columns='pitch_type', |
|
values='tj_stuff_plus').with_columns( |
|
pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count"))) |
|
|
|
|
|
for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']: |
|
if col not in df_small_pivot.columns: |
|
df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col)) |
|
|
|
df_small_pivot.select(['pitcher_id','pitcher_name','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True) |
|
|
|
|
|
df_small_pivot = df_small_pivot.with_columns([ |
|
pl.col(col).round(0).alias(col) for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All'] |
|
]) |
|
|
|
|
|
df_small_pivot = df_small_pivot.filter(pl.col('count')>=int(input.pitches_tjsumm_min())) |
|
|
|
df_plot = df_small_pivot.sort(['pitcher_id','count'],descending=True).to_pandas() |
|
|
|
team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team'])) |
|
df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict) |
|
|
|
|
|
columns = [ |
|
{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,}, |
|
{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"}, |
|
{ "title": "CH", "field": "CH", "width": 80, "formatter":"textarea" }, |
|
{ "title": "CU", "field": "CU", "width": 80, "formatter":"textarea" }, |
|
{ "title": "FC", "field": "FC", "width": 80, "formatter":"textarea" }, |
|
{ "title": "FF", "field": "FF", "width": 80, "formatter":"textarea" }, |
|
{ "title": "FS", "field": "FS", "width": 80, "formatter":"textarea" }, |
|
{ "title": "SI", "field": "SI", "width": 80, "formatter":"textarea" }, |
|
{ "title": "SL", "field": "SL", "width": 80, "formatter":"textarea" }, |
|
{ "title": "ST", "field": "ST", "width": 80, "formatter":"textarea" }, |
|
{ "title": "All", "field": "All", "width": 80, "formatter":"textarea" } |
|
|
|
] |
|
|
|
|
|
|
|
|
|
return Tabulator( |
|
df_plot, |
|
|
|
table_options=TableOptions( |
|
height=750, |
|
|
|
|
|
columns=columns, |
|
), |
|
) |
|
|
|
|
|
@output |
|
@render_tabulator |
|
@reactive.event(input.refresh) |
|
def table_tjstuff_team(): |
|
|
|
df_spring = spring_data() |
|
|
|
|
|
|
|
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) |
|
|
|
|
|
|
|
import polars as pl |
|
|
|
|
|
|
|
df_spring_group = df_spring_stuff.group_by(['pitcher_team']).agg([ |
|
pl.col('start_speed').count().alias('count'), |
|
pl.col('start_speed').mean().alias('start_speed'), |
|
pl.col('start_speed').max().alias('max_start_speed'), |
|
pl.col('ivb').mean().alias('ivb'), |
|
pl.col('hb').mean().alias('hb'), |
|
pl.col('release_pos_z').mean().alias('release_pos_z'), |
|
pl.col('release_pos_x').mean().alias('release_pos_x'), |
|
pl.col('extension').mean().alias('extension'), |
|
pl.col('tj_stuff_plus').mean().round(0).alias('tj_stuff_plus'), |
|
(pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'), |
|
(pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count') |
|
]) |
|
|
|
|
|
|
|
columns = [ |
|
|
|
{ "title": "Team", "field": "pitcher_team", "width": 250, "headerFilter":"input" ,"frozen":True,}, |
|
|
|
|
|
{ "title": "Pitches", "field": "count", "width": 250 , "headerFilter":"input"}, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{ "title": "tjStuff+", "field": "tj_stuff_plus", "width": 250, "headerFilter":"input", "formatter":"textarea" }, |
|
|
|
|
|
] |
|
|
|
df_merge = df_spring_group.clone() |
|
|
|
df_plot = df_merge.sort(['pitcher_team','count'],descending=True).to_pandas() |
|
|
|
|
|
|
|
|
|
|
|
|
|
return Tabulator( |
|
df_plot, |
|
|
|
table_options=TableOptions( |
|
height=750, |
|
|
|
columns=columns, |
|
) |
|
) |
|
|
|
|
|
|
|
|
|
app = App(app_ui, server) |
|
|