spring_training_pitching_app

Running

App Files Files Community

spring_training_pitching_app / app.py

nesticot

Update app.py

aa747ac verified about 2 months ago

raw

history blame

48.1 kB

	import polars as pl
	import api_scraper
	mlb_scrape = api_scraper.MLB_Scrape()

	from stuff_model import *
	from shiny import App, reactive, ui, render
	from shiny.ui import h2, tags
	from api_scraper import MLB_Scrape
	import datetime
	from stuff_model import feature_engineering as fe
	from stuff_model import stuff_apply
	from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
	theme.tabulator_site()
	scraper = MLB_Scrape()

	df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')

	pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))




	app_ui = ui.page_fluid(
	ui.card(
	ui.card_header("2025 Spring Training Pitch Data App"),
	ui.row(
	ui.column(4,
	ui.markdown("""This app generates a table which shows the 2025 Spring Training data.

	* Differences are calculated based on 2024 regular season data
	* If 2024 data does not exist for pitcher, 2023 Data is used
	* If no difference exists, the pitch is labelled as a new pitch"""),


	ui.input_action_button(
	"refresh",
	"Refresh Data",
	class_="btn-primary",
	width="100%"
	)
	),
	ui.column(3,
	ui.div(
	"By: ",
	ui.tags.a(
	"@TJStats",
	href="https://x.com/TJStats",
	target="_blank"
	)
	),
	ui.tags.p("Data: MLB"),
	ui.tags.p(
	ui.tags.a(
	"Support me on Patreon for more baseball content",
	href="https://www.patreon.com/TJ_Stats",
	target="_blank"
	)
	)
	)
	),
	ui.navset_tab(
	ui.nav("All Pitches",
	ui.row(ui.column(1,ui.download_button("download_all", "Download Data", class_="btn-sm mb-3")),

	ui.column(2,
	ui.div(
	{"class": "input-group"},
	ui.span("Pitches >=", class_="input-label"),
	ui.input_numeric(id='pitches_all_min', label='', value=1, min=1, width="100px")
	)
	)),

	output_tabulator("table_all")
	),
	ui.nav("Daily Pitches",
	ui.row(
	ui.column(2,
	ui.div(
	{"class": "input-group"},
	ui.span("Pitches >=", class_="input-label"),
	ui.input_numeric(id='pitches_daily_min', label='', value=1, min=1, width="100px")
	)
	)),


	output_tabulator("table_daily")
	),
	ui.nav("tjStuff+",

	ui.row(
	ui.column(2,
	ui.div(
	{"class": "input-group"},
	ui.span("Pitches >=", class_="input-label"),
	ui.input_numeric(id='pitches_tjstuff_min', label='', value=1, min=1, width="100px")
	)
	)),
	output_tabulator("table_tjstuff")
	),
	ui.nav("tjStuff+ Summary",
	ui.row(ui.column(1,ui.download_button("download_tjsumm", "Download Data", class_="btn-sm mb-3")),

	ui.column(2,
	ui.div(
	{"class": "input-group"},
	ui.span("Pitches >=", class_="input-label"),
	ui.input_numeric(id='pitches_tjsumm_min', label='', value=1, min=1, width="100px")
	)
	)),

	output_tabulator("table_stuff_all")

	),
	ui.nav("tjStuff+ Team",

	ui.row(
	ui.column(2,
	)),
	output_tabulator("table_tjstuff_team")
	),

	)
	)
	)

	def server(input, output, session):


	@reactive.Calc
	def spring_data():

	import polars as pl
	df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")


	date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
	print(datetime.datetime.now())
	date_str = date.strftime('%Y-%m-%d')
	# Initialize the scraper


	game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
	.filter(pl.col('date') == date)['game_id'])

	data = scraper.get_data(game_list_input)
	df = scraper.get_data_df(data)

	df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)

	return df_spring.filter(pl.col('start_speed')>0)

	@reactive.Calc
	def ts_data():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))



	import polars as pl

	# Compute total pitches for each pitcher
	df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
	pl.col("start_speed").count().alias("pitcher_total")
	)

	df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
	pl.col('start_speed').count().alias('count'),
	pl.col('start_speed').mean().alias('start_speed'),
	pl.col('start_speed').max().alias('max_start_speed'),
	pl.col('ivb').mean().alias('ivb'),
	pl.col('hb').mean().alias('hb'),
	pl.col('release_pos_z').mean().alias('release_pos_z'),
	pl.col('release_pos_x').mean().alias('release_pos_x'),
	pl.col('extension').mean().alias('extension'),
	pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
	(pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
	(pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
	])

	# Join total pitches per pitcher to the grouped DataFrame on pitcher_id
	df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")

	# Now calculate the pitch percent for each pitcher/pitch_type combination
	df_spring_group = df_spring_group.with_columns(
	(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
	)

	# Optionally, if you want the percentage of left/right-handed batters within the group:
	df_spring_group = df_spring_group.with_columns([
	(pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
	(pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
	])

	df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')


	df_merge = df_merge.with_columns(
	pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
	)

	df_merge = df_merge.with_columns(
	pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
	.then(pl.lit(True))
	.otherwise(pl.lit(None))
	.alias("new_pitch")
	)

	df_merge = df_merge.select([
	'pitcher_id',
	'pitcher_name',
	'pitch_type',
	'count',
	'pitch_percent',
	'rhh_percent',
	'lhh_percent',
	'start_speed',
	'max_start_speed',
	'ivb',
	'hb',
	'release_pos_z',
	'release_pos_x',
	'extension',
	'tj_stuff_plus',
	])

	return df_merge

	@reactive.Calc
	def ts_data():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))


	import polars as pl

	# Compute total pitches for each pitcher
	df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
	pl.col("start_speed").count().alias("pitcher_total")
	)

	df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
	pl.col('start_speed').count().alias('count'),
	pl.col('start_speed').mean().alias('start_speed'),
	pl.col('start_speed').max().alias('max_start_speed'),
	pl.col('ivb').mean().alias('ivb'),
	pl.col('hb').mean().alias('hb'),
	pl.col('release_pos_z').mean().alias('release_pos_z'),
	pl.col('release_pos_x').mean().alias('release_pos_x'),
	pl.col('extension').mean().alias('extension'),
	pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
	(pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
	(pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
	])

	# Join total pitches per pitcher to the grouped DataFrame on pitcher_id
	df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")

	# Now calculate the pitch percent for each pitcher/pitch_type combination
	df_spring_group = df_spring_group.with_columns(
	(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
	)

	# Optionally, if you want the percentage of left/right-handed batters within the group:
	df_spring_group = df_spring_group.with_columns([
	(pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
	(pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
	])

	df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')


	df_merge = df_merge.with_columns(
	pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
	)

	df_merge = df_merge.with_columns(
	pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
	.then(pl.lit(True))
	.otherwise(pl.lit(None))
	.alias("new_pitch")
	)

	df_merge = df_merge.select([
	'pitcher_id',
	'pitcher_name',
	'pitch_type',
	'count',
	'pitch_percent',
	'rhh_percent',
	'lhh_percent',
	'start_speed',
	'max_start_speed',
	'ivb',
	'hb',
	'release_pos_z',
	'release_pos_x',
	'extension',
	'tj_stuff_plus',
	])

	return df_merge

	@reactive.Calc
	def ts_data_summ():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))







	# Aggregate tj_stuff_plus by pitcher_id and year
	df_agg_2024_pitch = df_spring_stuff.group_by(['pitcher_id','pitcher_name', 'pitch_type']).agg(
	pl.col('tj_stuff_plus').len().alias('count'),
	pl.col('tj_stuff_plus').mean()
	)

	# Calculate the weighted average of 'tj_stuff_plus' for each pitcher
	df_weighted_avg = df_agg_2024_pitch.with_columns(
	(pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus')
	).group_by(['pitcher_id', 'pitcher_name']).agg(
	pl.col('count').sum().alias('total_count'),
	pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus')
	).with_columns(
	(pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus')
	).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count'])

	# Add the 'pitch_type' column with value "All"
	df_weighted_avg = df_weighted_avg.with_columns(
	pl.lit("All").alias('pitch_type')
	)

	# Select and rename columns to match the original DataFrame
	df_weighted_avg = df_weighted_avg.select([
	'pitcher_id',
	'pitcher_name',

	'pitch_type',
	pl.col('total_count').alias('count'),
	'tj_stuff_plus'
	])

	# Concatenate the new rows with the original DataFrame
	df_small = pl.concat([df_agg_2024_pitch, df_weighted_avg])

	df_game_count = df_spring_stuff.group_by(['pitcher_id']).agg(

	(((pl.col('game_id').count())).alias('pitches')/((pl.col('game_id').n_unique()))).alias('pitches_per_game'),
	)



	count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'],
	df_small.filter(pl.col('pitch_type')=='All')['count']))
	# Check if 'FS' column exists, if not create it and fill with None

	df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'],
	columns='pitch_type',
	values='tj_stuff_plus').with_columns(
	pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count")))

	# Check if 'FS' column exists, if not create it and fill with None
	for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']:
	if col not in df_small_pivot.columns:
	df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col))

	df_small_pivot.select(['pitcher_id','pitcher_name','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard()

	return df_small_pivot



	@session.download(filename="data.csv")
	def download_all():
	yield ts_data().write_csv()


	@session.download(filename="data_tjstuff.csv")
	def download_tjsumm():
	yield ts_data_summ().write_csv()

	@output
	@render_tabulator
	@reactive.event(input.refresh)
	def table_all():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))



	import polars as pl

	# Compute total pitches for each pitcher
	df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
	pl.col("start_speed").count().alias("pitcher_total")
	)

	df_pitcher_totals_hands = (
	df_spring_stuff
	.group_by(["pitcher_id", "batter_hand"])
	.agg(pl.col("start_speed").count().alias("pitcher_total"))
	.pivot(
	values="pitcher_total",
	index="pitcher_id",
	columns="batter_hand",
	aggregate_function="sum"
	)
	.rename({"L": "pitcher_total_left", "R": "pitcher_total_right"})
	.fill_null(0) # Fill missing values with 0 if some pitchers don't face both hands
	)


	df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
	pl.col('start_speed').count().alias('count'),
	pl.col('start_speed').mean().alias('start_speed'),
	pl.col('start_speed').max().alias('max_start_speed'),
	pl.col('ivb').mean().alias('ivb'),
	pl.col('hb').mean().alias('hb'),
	pl.col('release_pos_z').mean().alias('release_pos_z'),
	pl.col('release_pos_x').mean().alias('release_pos_x'),
	pl.col('extension').mean().alias('extension'),
	pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
	(pl.col("batter_hand").eq("R").sum()).alias("rhh_count"), # Corrected: Counts RHH (batter_hand == "R")
	(pl.col("batter_hand").eq("L").sum()).alias("lhh_count") # Corrected: Counts LHH (batter_hand == "L")
	])

	# Join total pitches per pitcher to the grouped DataFrame on pitcher_id
	df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
	df_spring_group = df_spring_group.join(df_pitcher_totals_hands, on=["pitcher_id"], how="left")

	# Now calculate the pitch percent for each pitcher/pitch_type combination
	df_spring_group = df_spring_group.with_columns(
	(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
	)

	# Optionally, if you want the percentage of left/right-handed batters within the group:
	df_spring_group = df_spring_group.with_columns([
	(pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"),
	(pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent")
	])


	df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')


	df_merge = df_merge.with_columns(
	pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
	)

	df_merge = df_merge.with_columns(
	pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
	.then(pl.lit(True))
	.otherwise(pl.lit(None))
	.alias("new_pitch")
	)

	import polars as pl

	# Define the columns to subtract
	cols_to_subtract = [
	("start_speed", "start_speed_old"),
	("max_start_speed", "max_start_speed_old"),
	("ivb", "ivb_old"),
	("hb", "hb_old"),
	("release_pos_z", "release_pos_z_old"),
	("release_pos_x", "release_pos_x_old"),
	("extension", "extension_old"),
	("tj_stuff_plus", "tj_stuff_plus_old")
	]

	df_merge = df_merge.with_columns([
	# Step 1: Create _diff columns with the default value (e.g., 80) if old is null
	pl.when(pl.col(old).is_null())
	.then(pl.lit(10000)) # If old is null, assign 80 as the default
	.otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
	.alias(new + "_diff")
	for new, old in cols_to_subtract
	])

	# Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
	df_merge = df_merge.with_columns([
	pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
	.then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
	.otherwise(
	pl.col(new).round(1).cast(pl.Utf8) +
	"\n(" +
	pl.col(new + "_diff").round(1)
	.map_elements(lambda x: f"{x:+.1f}") +
	")"
	).alias(new + "_formatted")
	for new, _ in cols_to_subtract
	])






	percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']

	df_merge = df_merge.with_columns([
	(pl.col(col) * 100) # Convert to percentage
	.round(1) # Round to 1 decimal
	.map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
	.alias(col + "_formatted")
	for col in percent_cols
	]).sort(['pitcher_id','count'],descending=True)


	columns = [
	{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
	{ "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
	{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input","contextMenu":True},
	{ "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
	{ "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
	{ "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
	{ "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
	]



	df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_all_min()))


	df_plot = df_merge.to_pandas()

	team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
	df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)



	return Tabulator(
	df_plot,

	table_options=TableOptions(
	height=750,

	columns=columns,
	)
	)


	@output
	@render_tabulator
	@reactive.event(input.refresh)
	def table_daily():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))



	import polars as pl

	# Compute total pitches for each pitcher
	df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
	pl.col("start_speed").count().alias("pitcher_total")
	)

	df_pitcher_totals_hands = (
	df_spring_stuff
	.group_by(["pitcher_id", "batter_hand"])
	.agg(pl.col("start_speed").count().alias("pitcher_total"))
	.pivot(
	values="pitcher_total",
	index="pitcher_id",
	columns="batter_hand",
	aggregate_function="sum"
	)
	.rename({"L": "pitcher_total_left", "R": "pitcher_total_right"})
	.fill_null(0) # Fill missing values with 0 if some pitchers don't face both hands
	)


	df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
	pl.col('start_speed').count().alias('count'),
	pl.col('start_speed').mean().alias('start_speed'),
	pl.col('start_speed').max().alias('max_start_speed'),
	pl.col('ivb').mean().alias('ivb'),
	pl.col('hb').mean().alias('hb'),
	pl.col('release_pos_z').mean().alias('release_pos_z'),
	pl.col('release_pos_x').mean().alias('release_pos_x'),
	pl.col('extension').mean().alias('extension'),
	pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
	(pl.col("batter_hand").eq("R").sum()).alias("rhh_count"), # Corrected: Counts RHH (batter_hand == "R")
	(pl.col("batter_hand").eq("L").sum()).alias("lhh_count") # Corrected: Counts LHH (batter_hand == "L")
	])

	# Join total pitches per pitcher to the grouped DataFrame on pitcher_id
	df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
	df_spring_group = df_spring_group.join(df_pitcher_totals_hands, on=["pitcher_id"], how="left")

	# Now calculate the pitch percent for each pitcher/pitch_type combination
	df_spring_group = df_spring_group.with_columns(
	(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
	)

	# Optionally, if you want the percentage of left/right-handed batters within the group:
	df_spring_group = df_spring_group.with_columns([
	(pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"),
	(pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent")
	])


	df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')


	df_merge = df_merge.with_columns(
	pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
	)

	df_merge = df_merge.with_columns(
	pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
	.then(pl.lit(True))
	.otherwise(pl.lit(None))
	.alias("new_pitch")
	)

	import polars as pl

	# Define the columns to subtract
	cols_to_subtract = [
	("start_speed", "start_speed_old"),
	("max_start_speed", "max_start_speed_old"),
	("ivb", "ivb_old"),
	("hb", "hb_old"),
	("release_pos_z", "release_pos_z_old"),
	("release_pos_x", "release_pos_x_old"),
	("extension", "extension_old"),
	("tj_stuff_plus", "tj_stuff_plus_old")
	]

	df_merge = df_merge.with_columns([
	# Step 1: Create _diff columns with the default value (e.g., 80) if old is null
	pl.when(pl.col(old).is_null())
	.then(pl.lit(10000)) # If old is null, assign 80 as the default
	.otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
	.alias(new + "_diff")
	for new, old in cols_to_subtract
	])

	# Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
	df_merge = df_merge.with_columns([
	pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
	.then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
	.otherwise(
	pl.col(new).round(1).cast(pl.Utf8) +
	"\n(" +
	pl.col(new + "_diff").round(1)
	.map_elements(lambda x: f"{x:+.1f}") +
	")"
	).alias(new + "_formatted")
	for new, _ in cols_to_subtract
	])






	percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']

	df_merge = df_merge.with_columns([
	(pl.col(col) * 100) # Convert to percentage
	.round(1) # Round to 1 decimal
	.map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
	.alias(col + "_formatted")
	for col in percent_cols
	]).sort(['pitcher_id','count'],descending=True)


	columns = [
	{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
	{ "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
	{ "title": "Date", "field": "game_date", "width": 100, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
	{ "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
	{ "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
	{ "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
	{ "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
	]


	df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_daily_min()))

	df_plot = df_merge.to_pandas()

	team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
	df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)



	return Tabulator(
	df_plot,

	table_options=TableOptions(
	height=750,

	columns=columns,
	)
	)

	@output
	@render_tabulator
	@reactive.event(input.refresh)
	def table_tjstuff():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))



	import polars as pl

	# Compute total pitches for each pitcher
	df_pitcher_totals = df_spring_stuff.group_by(["pitcher_id"]).agg(
	pl.col("start_speed").count().alias("pitcher_total")
	)

	df_pitcher_totals_hands = (
	df_spring_stuff
	.group_by(["pitcher_id", "batter_hand"])
	.agg(pl.col("start_speed").count().alias("pitcher_total"))
	.pivot(
	values="pitcher_total",
	index="pitcher_id",
	columns="batter_hand",
	aggregate_function="sum"
	)
	.rename({"L": "pitcher_total_left", "R": "pitcher_total_right"})
	.fill_null(0) # Fill missing values with 0 if some pitchers don't face both hands
	)


	df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
	pl.col('start_speed').count().alias('count'),
	pl.col('start_speed').mean().alias('start_speed'),
	pl.col('start_speed').max().alias('max_start_speed'),
	pl.col('ivb').mean().alias('ivb'),
	pl.col('hb').mean().alias('hb'),
	pl.col('release_pos_z').mean().alias('release_pos_z'),
	pl.col('release_pos_x').mean().alias('release_pos_x'),
	pl.col('extension').mean().alias('extension'),
	pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
	(pl.col("batter_hand").eq("R").sum()).alias("rhh_count"), # Corrected: Counts RHH (batter_hand == "R")
	(pl.col("batter_hand").eq("L").sum()).alias("lhh_count") # Corrected: Counts LHH (batter_hand == "L")
	])

	# Join total pitches per pitcher to the grouped DataFrame on pitcher_id
	df_spring_group = df_spring_group.join(df_pitcher_totals, on=["pitcher_id"], how="left")
	df_spring_group = df_spring_group.join(df_pitcher_totals_hands, on=["pitcher_id"], how="left")

	# Now calculate the pitch percent for each pitcher/pitch_type combination
	df_spring_group = df_spring_group.with_columns(
	(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
	)

	# Optionally, if you want the percentage of left/right-handed batters within the group:
	df_spring_group = df_spring_group.with_columns([
	(pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"),
	(pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent")
	])

	df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')


	df_merge = df_merge.with_columns(
	pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
	)

	df_merge = df_merge.with_columns(
	pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
	.then(pl.lit(True))
	.otherwise(pl.lit(None))
	.alias("new_pitch")
	)

	import polars as pl

	# Define the columns to subtract
	cols_to_subtract = [
	("start_speed", "start_speed_old"),
	("max_start_speed", "max_start_speed_old"),
	("ivb", "ivb_old"),
	("hb", "hb_old"),
	("release_pos_z", "release_pos_z_old"),
	("release_pos_x", "release_pos_x_old"),
	("extension", "extension_old"),
	("tj_stuff_plus", "tj_stuff_plus_old")
	]

	df_merge = df_merge.with_columns([
	# Step 1: Create _diff columns with the default value (e.g., 80) if old is null
	pl.when(pl.col(old).is_null())
	.then(pl.lit(None)) # If old is null, assign 80 as the default
	.otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
	.alias(new + "_diff")
	for new, old in cols_to_subtract
	])

	# Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
	# Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
	df_merge = df_merge.with_columns([

	pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted")
	for new, _ in cols_to_subtract
	])



	df_merge = df_merge.with_columns([
	pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"),
	pl.col("tj_stuff_plus_diff").round(1).map_elements(lambda x: f"{x:+.1f}").alias("tj_stuff_plus_diff")
	])



	percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']

	df_merge = df_merge.with_columns([
	(pl.col(col) * 100) # Convert to percentage
	.round(1) # Round to 1 decimal
	.map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
	.alias(col + "_formatted")
	for col in percent_cols
	]).sort(['pitcher_id','count'],descending=True)




	columns = [
	{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
	{ "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
	{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
	{ "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},+
	{ "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
	{ "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
	{ "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
	]

	df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_tjstuff_min()))

	df_plot = df_merge.sort(['pitcher_id','count'],descending=True).to_pandas()

	team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
	df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)



	return Tabulator(
	df_plot,

	table_options=TableOptions(
	height=750,

	columns=columns,
	)
	)

	@output
	@render_tabulator
	@reactive.event(input.refresh)
	def table_stuff_all():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))







	# Aggregate tj_stuff_plus by pitcher_id and year
	df_agg_2024_pitch = df_spring_stuff.group_by(['pitcher_id','pitcher_name', 'pitch_type']).agg(
	pl.col('tj_stuff_plus').len().alias('count'),
	pl.col('tj_stuff_plus').mean()
	)

	# Calculate the weighted average of 'tj_stuff_plus' for each pitcher
	df_weighted_avg = df_agg_2024_pitch.with_columns(
	(pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus')
	).group_by(['pitcher_id', 'pitcher_name']).agg(
	pl.col('count').sum().alias('total_count'),
	pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus')
	).with_columns(
	(pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus')
	).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count'])

	# Add the 'pitch_type' column with value "All"
	df_weighted_avg = df_weighted_avg.with_columns(
	pl.lit("All").alias('pitch_type')
	)

	# Select and rename columns to match the original DataFrame
	df_weighted_avg = df_weighted_avg.select([
	'pitcher_id',
	'pitcher_name',

	'pitch_type',
	pl.col('total_count').alias('count'),
	'tj_stuff_plus'
	])

	# Concatenate the new rows with the original DataFrame
	df_small = pl.concat([df_agg_2024_pitch, df_weighted_avg])

	df_game_count = df_spring_stuff.group_by(['pitcher_id']).agg(

	(((pl.col('game_id').count())).alias('pitches')/((pl.col('game_id').n_unique()))).alias('pitches_per_game'),
	)



	count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'],
	df_small.filter(pl.col('pitch_type')=='All')['count']))
	# Check if 'FS' column exists, if not create it and fill with None

	df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'],
	columns='pitch_type',
	values='tj_stuff_plus').with_columns(
	pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count")))

	# Check if 'FS' column exists, if not create it and fill with None
	for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']:
	if col not in df_small_pivot.columns:
	df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col))

	df_small_pivot.select(['pitcher_id','pitcher_name','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard()


	df_small_pivot = df_small_pivot.with_columns([
	pl.col(col).round(0).alias(col) for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']
	])


	df_small_pivot = df_small_pivot.filter(pl.col('count')>=int(input.pitches_tjsumm_min()))

	df_plot = df_small_pivot.sort(['pitcher_id','count'],descending=True).to_pandas()

	team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
	df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)


	columns = [
	{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Team", "field": "pitcher_team", "width": 90, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
	{ "title": "CH", "field": "CH", "width": 80, "formatter":"textarea" },
	{ "title": "CU", "field": "CU", "width": 80, "formatter":"textarea" },
	{ "title": "FC", "field": "FC", "width": 80, "formatter":"textarea" },
	{ "title": "FF", "field": "FF", "width": 80, "formatter":"textarea" },
	{ "title": "FS", "field": "FS", "width": 80, "formatter":"textarea" },
	{ "title": "SI", "field": "SI", "width": 80, "formatter":"textarea" },
	{ "title": "SL", "field": "SL", "width": 80, "formatter":"textarea" },
	{ "title": "ST", "field": "ST", "width": 80, "formatter":"textarea" },
	{ "title": "All", "field": "All", "width": 80, "formatter":"textarea" }

	]




	return Tabulator(
	df_plot,

	table_options=TableOptions(
	height=750,


	columns=columns,
	),
	)


	@output
	@render_tabulator
	@reactive.event(input.refresh)
	def table_tjstuff_team():

	df_spring = spring_data()

	# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
	# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
	df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))



	import polars as pl



	df_spring_group = df_spring_stuff.group_by(['pitcher_team']).agg([
	pl.col('start_speed').count().alias('count'),
	pl.col('start_speed').mean().alias('start_speed'),
	pl.col('start_speed').max().alias('max_start_speed'),
	pl.col('ivb').mean().alias('ivb'),
	pl.col('hb').mean().alias('hb'),
	pl.col('release_pos_z').mean().alias('release_pos_z'),
	pl.col('release_pos_x').mean().alias('release_pos_x'),
	pl.col('extension').mean().alias('extension'),
	pl.col('tj_stuff_plus').mean().round(0).alias('tj_stuff_plus'),
	(pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
	(pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
	])



	columns = [
	# { "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
	{ "title": "Team", "field": "pitcher_team", "width": 250, "headerFilter":"input" ,"frozen":True,},
	# { "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
	# { "title": "New?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
	{ "title": "Pitches", "field": "count", "width": 250 , "headerFilter":"input"},
	# { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
	# { "title": "RHH%", "field": "rhh_percent_formatted", "width": 90, "headerFilter":"input"},
	# { "title": "LHH%", "field": "lhh_percent_formatted", "width": 90, "headerFilter":"input"},
	# { "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "iVB", "field": "ivb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "HB", "field": "hb_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "RelH", "field": "release_pos_z_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "RelS", "field": "release_pos_x_formatted", "width": 80, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
	{ "title": "tjStuff+", "field": "tj_stuff_plus", "width": 250, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "headerFilter":"input", "formatter":"textarea" },
	# { "title": "Δ", "field": "tj_stuff_plus_diff", "width": 100, "headerFilter":"input", "formatter":"textarea" }
	]

	df_merge = df_spring_group.clone()

	df_plot = df_merge.sort(['pitcher_team','count'],descending=True).to_pandas()

	# team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
	# df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)



	return Tabulator(
	df_plot,

	table_options=TableOptions(
	height=750,

	columns=columns,
	)
	)




	app = App(app_ui, server)