spring_training_pitching_app

Running

App Files Files Community

nesticot commited on Mar 5

Commit

4a633c2

verified ·

1 Parent(s): 7b9b295

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -94

app.py CHANGED Viewed

@@ -70,7 +70,9 @@ app_ui = ui.page_fluid(
                 output_tabulator("table_tjstuff")
             ),
             ui.nav("tjStuff+ Summary",
                 output_tabulator("table_stuff_all")
                 )
         )
@@ -81,8 +83,8 @@ def server(input, output, session):
     @reactive.Calc
-    def ts_data():
         import polars as pl
         df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
@@ -101,11 +103,16 @@ def server(input, output, session):
         df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
@@ -178,37 +185,180 @@ def server(input, output, session):
         return df_merge
-    @session.download(filename="data.csv")
-    def download_all():
-        yield ts_data().write_csv()
-    @output
-    @render_tabulator
-    @reactive.event(input.refresh)
-    def table_all():
         import polars as pl
-        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
-        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
-        print(datetime.datetime.now())
-        date_str = date.strftime('%Y-%m-%d')
-        # Initialize the scraper
-        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
-                    .filter(pl.col('date') == date)['game_id'])
-        data = scraper.get_data(game_list_input)
-        df = scraper.get_data_df(data)
-        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
@@ -357,32 +507,11 @@ def server(input, output, session):
     @reactive.event(input.refresh)
     def table_daily():
-        import polars as pl
-        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
-        import datetime
-        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
-        print(datetime.datetime.now())
-        date_str = date.strftime('%Y-%m-%d')
-        # Initialize the scraper
-        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
-                    .filter(pl.col('date') == date)['game_id'])
-        data = scraper.get_data(game_list_input)
-        df = scraper.get_data_df(data)
-        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
@@ -531,32 +660,11 @@ def server(input, output, session):
     @reactive.event(input.refresh)
     def table_tjstuff():
-        import polars as pl
-        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
-        import datetime
-        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
-        print(datetime.datetime.now())
-        date_str = date.strftime('%Y-%m-%d')
-        # Initialize the scraper
-        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
-                    .filter(pl.col('date') == date)['game_id'])
-        data = scraper.get_data(game_list_input)
-        df = scraper.get_data_df(data)
-        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
@@ -706,36 +814,11 @@ def server(input, output, session):
     @reactive.event(input.refresh)
     def table_stuff_all():
-        import polars as pl
-        df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
-        date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
-        print(datetime.datetime.now())
-        date_str = date.strftime('%Y-%m-%d')
-        # Initialize the scraper
-        game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
-                    .filter(pl.col('date') == date)['game_id'])
-        data = scraper.get_data(game_list_input)
-        df = scraper.get_data_df(data)
-        df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
-        # Update "KC" and "SV" to "CU"
-        df_spring = df_spring.with_columns(
-            pl.when(pl.col("pitch_type").is_in(["KC", "SV"]))
-            .then(pl.lit("CU"))
-            .otherwise(pl.col("pitch_type"))
-            .alias("pitch_type")
-        )
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
-        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))

                 output_tabulator("table_tjstuff")
             ),
             ui.nav("tjStuff+ Summary",
+                ui.download_button("download_tjsumm", "Download Data", class_="btn-sm mb-3"),
                 output_tabulator("table_stuff_all")
                 )
         )
     @reactive.Calc
+    def spring_data():
         import polars as pl
         df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
         df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
+        return df_spring
+    @reactive.Calc
+    def ts_data():
+        df_spring = spring_data()
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
         return df_merge
+    @reactive.Calc
+    def ts_data():
+        df_spring = spring_data()
+        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
+        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
         import polars as pl
+        # Compute total pitches for each pitcher
+        df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
+            pl.col("start_speed").count().alias("pitcher_total")
+        )
+        df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
+            pl.col('start_speed').count().alias('count'),
+            pl.col('start_speed').mean().alias('start_speed'),
+            pl.col('start_speed').max().alias('max_start_speed'),
+            pl.col('ivb').mean().alias('ivb'),
+            pl.col('hb').mean().alias('hb'),
+            pl.col('release_pos_z').mean().alias('release_pos_z'),
+            pl.col('release_pos_x').mean().alias('release_pos_x'),
+            pl.col('extension').mean().alias('extension'),
+            pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
+            (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
+        ])
+        # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
+        df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
+        # Now calculate the pitch percent for each pitcher/pitch_type combination
+        df_spring_group = df_spring_group.with_columns(
+            (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
+        )
+        # Optionally, if you want the percentage of left/right-handed batters within the group:
+        df_spring_group = df_spring_group.with_columns([
+            (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
+            (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
+        ])
+        df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
+        df_merge = df_merge.with_columns(
+            pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
+        )
+        df_merge = df_merge.with_columns(
+            pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
+            .then(pl.lit(True))
+            .otherwise(pl.lit(None))
+            .alias("new_pitch")
+        )
+        df_merge = df_merge.select([
+            'pitcher_id',
+            'pitcher_name',
+            'pitch_type',
+            'count',
+            'pitch_percent',
+            'rhh_percent',
+            'lhh_percent',
+            'start_speed',
+            'max_start_speed',
+            'ivb',
+            'hb',
+            'release_pos_z',
+            'release_pos_x',
+            'extension',
+            'tj_stuff_plus',
+        ])
+        return df_merge
+    @reactive.Calc
+    def ts_data_summ():
+        df_spring = spring_data()
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
+        # Aggregate tj_stuff_plus by pitcher_id and year
+        df_agg_2024_pitch = df_spring_stuff.group_by(['pitcher_id','pitcher_name', 'pitch_type']).agg(
+            pl.col('tj_stuff_plus').len().alias('count'),
+            pl.col('tj_stuff_plus').mean()
+        )
+        # Calculate the weighted average of 'tj_stuff_plus' for each pitcher
+        df_weighted_avg = df_agg_2024_pitch.with_columns(
+            (pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus')
+        ).group_by(['pitcher_id', 'pitcher_name']).agg(
+            pl.col('count').sum().alias('total_count'),
+            pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus')
+        ).with_columns(
+            (pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus')
+        ).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count'])
+        # Add the 'pitch_type' column with value "All"
+        df_weighted_avg = df_weighted_avg.with_columns(
+            pl.lit("All").alias('pitch_type')
+        )
+        # Select and rename columns to match the original DataFrame
+        df_weighted_avg = df_weighted_avg.select([
+            'pitcher_id',
+            'pitcher_name',
+            'pitch_type',
+            pl.col('total_count').alias('count'),
+            'tj_stuff_plus'
+        ])
+        # Concatenate the new rows with the original DataFrame
+        df_small = pl.concat([df_agg_2024_pitch, df_weighted_avg])
+        df_game_count = df_spring_stuff.group_by(['pitcher_id']).agg(
+            (((pl.col('game_id').count())).alias('pitches')/((pl.col('game_id').n_unique()))).alias('pitches_per_game'),
+        )
+        count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'],
+                                df_small.filter(pl.col('pitch_type')=='All')['count']))
+        # Check if 'FS' column exists, if not create it and fill with None
+        df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'],
+                        columns='pitch_type',
+                        values='tj_stuff_plus').with_columns(
+                        pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count")))
+        # Check if 'FS' column exists, if not create it and fill with None
+        for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']:
+            if col not in df_small_pivot.columns:
+                df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col))
+        df_small_pivot.select(['pitcher_id','pitcher_name','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard()
+        return df_small_pivot
+    @session.download(filename="data.csv")
+    def download_all():
+        yield ts_data().write_csv()
+    @session.download(filename="data_tjstuff.csv")
+    def download_tjsumm():
+        yield ts_data_summ().write_csv()
+    @output
+    @render_tabulator
+    @reactive.event(input.refresh)
+    def table_all():
+        df_spring = spring_data()
+        # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
+        # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
     @reactive.event(input.refresh)
     def table_daily():
+        df_spring = spring_data()
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
     @reactive.event(input.refresh)
     def table_tjstuff():
+        df_spring = spring_data()
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
     @reactive.event(input.refresh)
     def table_stuff_all():
+        df_spring = spring_data()
         # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
         # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))