nesticot commited on
Commit
4a633c2
·
verified ·
1 Parent(s): 7b9b295

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -94
app.py CHANGED
@@ -70,7 +70,9 @@ app_ui = ui.page_fluid(
70
  output_tabulator("table_tjstuff")
71
  ),
72
  ui.nav("tjStuff+ Summary",
 
73
  output_tabulator("table_stuff_all")
 
74
  )
75
 
76
  )
@@ -81,8 +83,8 @@ def server(input, output, session):
81
 
82
 
83
  @reactive.Calc
84
- def ts_data():
85
-
86
  import polars as pl
87
  df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
88
 
@@ -101,11 +103,16 @@ def server(input, output, session):
101
 
102
  df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
103
 
 
 
 
 
104
 
 
105
 
106
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
107
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
108
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
109
 
110
 
111
 
@@ -178,37 +185,180 @@ def server(input, output, session):
178
 
179
  return df_merge
180
 
181
- @session.download(filename="data.csv")
182
- def download_all():
183
- yield ts_data().write_csv()
184
- @output
185
- @render_tabulator
186
- @reactive.event(input.refresh)
187
- def table_all():
 
 
188
 
189
  import polars as pl
190
- df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
191
 
 
 
 
 
192
 
193
- date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
194
- print(datetime.datetime.now())
195
- date_str = date.strftime('%Y-%m-%d')
196
- # Initialize the scraper
 
 
 
 
 
 
 
 
 
197
 
 
 
198
 
199
- game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
200
- .filter(pl.col('date') == date)['game_id'])
 
 
201
 
202
- data = scraper.get_data(game_list_input)
203
- df = scraper.get_data_df(data)
 
 
 
204
 
205
- df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
206
 
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
210
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
211
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
 
214
 
@@ -357,32 +507,11 @@ def server(input, output, session):
357
  @reactive.event(input.refresh)
358
  def table_daily():
359
 
360
- import polars as pl
361
- df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
362
-
363
-
364
- import datetime
365
-
366
- date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
367
- print(datetime.datetime.now())
368
-
369
- date_str = date.strftime('%Y-%m-%d')
370
- # Initialize the scraper
371
-
372
-
373
- game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
374
- .filter(pl.col('date') == date)['game_id'])
375
-
376
- data = scraper.get_data(game_list_input)
377
- df = scraper.get_data_df(data)
378
-
379
- df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
380
-
381
-
382
 
383
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
384
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
385
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
386
 
387
 
388
 
@@ -531,32 +660,11 @@ def server(input, output, session):
531
  @reactive.event(input.refresh)
532
  def table_tjstuff():
533
 
534
- import polars as pl
535
- df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
536
-
537
-
538
- import datetime
539
-
540
- date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
541
- print(datetime.datetime.now())
542
-
543
- date_str = date.strftime('%Y-%m-%d')
544
- # Initialize the scraper
545
-
546
-
547
- game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
548
- .filter(pl.col('date') == date)['game_id'])
549
-
550
- data = scraper.get_data(game_list_input)
551
- df = scraper.get_data_df(data)
552
-
553
- df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
554
-
555
-
556
 
557
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
558
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
559
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
560
 
561
 
562
 
@@ -706,36 +814,11 @@ def server(input, output, session):
706
  @reactive.event(input.refresh)
707
  def table_stuff_all():
708
 
709
- import polars as pl
710
- df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
711
-
712
-
713
- date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date()
714
- print(datetime.datetime.now())
715
- date_str = date.strftime('%Y-%m-%d')
716
- # Initialize the scraper
717
-
718
-
719
- game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
720
- .filter(pl.col('date') == date)['game_id'])
721
-
722
- data = scraper.get_data(game_list_input)
723
- df = scraper.get_data_df(data)
724
-
725
- df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
726
-
727
- # Update "KC" and "SV" to "CU"
728
- df_spring = df_spring.with_columns(
729
- pl.when(pl.col("pitch_type").is_in(["KC", "SV"]))
730
- .then(pl.lit("CU"))
731
- .otherwise(pl.col("pitch_type"))
732
- .alias("pitch_type")
733
- )
734
-
735
 
736
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
737
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
738
- df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
739
 
740
 
741
 
 
70
  output_tabulator("table_tjstuff")
71
  ),
72
  ui.nav("tjStuff+ Summary",
73
+ ui.download_button("download_tjsumm", "Download Data", class_="btn-sm mb-3"),
74
  output_tabulator("table_stuff_all")
75
+
76
  )
77
 
78
  )
 
83
 
84
 
85
  @reactive.Calc
86
+ def spring_data():
87
+
88
  import polars as pl
89
  df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
90
 
 
103
 
104
  df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
105
 
106
+ return df_spring
107
+
108
+ @reactive.Calc
109
+ def ts_data():
110
 
111
+ df_spring = spring_data()
112
 
113
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
114
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
115
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
116
 
117
 
118
 
 
185
 
186
  return df_merge
187
 
188
+ @reactive.Calc
189
+ def ts_data():
190
+
191
+ df_spring = spring_data()
192
+
193
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
194
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
195
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
196
+
197
 
198
  import polars as pl
 
199
 
200
+ # Compute total pitches for each pitcher
201
+ df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
202
+ pl.col("start_speed").count().alias("pitcher_total")
203
+ )
204
 
205
+ df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
206
+ pl.col('start_speed').count().alias('count'),
207
+ pl.col('start_speed').mean().alias('start_speed'),
208
+ pl.col('start_speed').max().alias('max_start_speed'),
209
+ pl.col('ivb').mean().alias('ivb'),
210
+ pl.col('hb').mean().alias('hb'),
211
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
212
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
213
+ pl.col('extension').mean().alias('extension'),
214
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
215
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
216
+ (pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
217
+ ])
218
 
219
+ # Join total pitches per pitcher to the grouped DataFrame on pitcher_id
220
+ df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
221
 
222
+ # Now calculate the pitch percent for each pitcher/pitch_type combination
223
+ df_spring_group = df_spring_group.with_columns(
224
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
225
+ )
226
 
227
+ # Optionally, if you want the percentage of left/right-handed batters within the group:
228
+ df_spring_group = df_spring_group.with_columns([
229
+ (pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
230
+ (pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
231
+ ])
232
 
233
+ df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitch_type'],how='left',suffix='_old')
234
 
235
 
236
+ df_merge = df_merge.with_columns(
237
+ pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
238
+ )
239
+
240
+ df_merge = df_merge.with_columns(
241
+ pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
242
+ .then(pl.lit(True))
243
+ .otherwise(pl.lit(None))
244
+ .alias("new_pitch")
245
+ )
246
+
247
+ df_merge = df_merge.select([
248
+ 'pitcher_id',
249
+ 'pitcher_name',
250
+ 'pitch_type',
251
+ 'count',
252
+ 'pitch_percent',
253
+ 'rhh_percent',
254
+ 'lhh_percent',
255
+ 'start_speed',
256
+ 'max_start_speed',
257
+ 'ivb',
258
+ 'hb',
259
+ 'release_pos_z',
260
+ 'release_pos_x',
261
+ 'extension',
262
+ 'tj_stuff_plus',
263
+ ])
264
+
265
+ return df_merge
266
+
267
+ @reactive.Calc
268
+ def ts_data_summ():
269
+
270
+ df_spring = spring_data()
271
 
272
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
273
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
274
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
275
+
276
+
277
+
278
+
279
+
280
+
281
+
282
+ # Aggregate tj_stuff_plus by pitcher_id and year
283
+ df_agg_2024_pitch = df_spring_stuff.group_by(['pitcher_id','pitcher_name', 'pitch_type']).agg(
284
+ pl.col('tj_stuff_plus').len().alias('count'),
285
+ pl.col('tj_stuff_plus').mean()
286
+ )
287
+
288
+ # Calculate the weighted average of 'tj_stuff_plus' for each pitcher
289
+ df_weighted_avg = df_agg_2024_pitch.with_columns(
290
+ (pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus')
291
+ ).group_by(['pitcher_id', 'pitcher_name']).agg(
292
+ pl.col('count').sum().alias('total_count'),
293
+ pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus')
294
+ ).with_columns(
295
+ (pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus')
296
+ ).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count'])
297
+
298
+ # Add the 'pitch_type' column with value "All"
299
+ df_weighted_avg = df_weighted_avg.with_columns(
300
+ pl.lit("All").alias('pitch_type')
301
+ )
302
+
303
+ # Select and rename columns to match the original DataFrame
304
+ df_weighted_avg = df_weighted_avg.select([
305
+ 'pitcher_id',
306
+ 'pitcher_name',
307
+
308
+ 'pitch_type',
309
+ pl.col('total_count').alias('count'),
310
+ 'tj_stuff_plus'
311
+ ])
312
+
313
+ # Concatenate the new rows with the original DataFrame
314
+ df_small = pl.concat([df_agg_2024_pitch, df_weighted_avg])
315
+
316
+ df_game_count = df_spring_stuff.group_by(['pitcher_id']).agg(
317
+
318
+ (((pl.col('game_id').count())).alias('pitches')/((pl.col('game_id').n_unique()))).alias('pitches_per_game'),
319
+ )
320
+
321
+
322
+
323
+ count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'],
324
+ df_small.filter(pl.col('pitch_type')=='All')['count']))
325
+ # Check if 'FS' column exists, if not create it and fill with None
326
+
327
+ df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'],
328
+ columns='pitch_type',
329
+ values='tj_stuff_plus').with_columns(
330
+ pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count")))
331
+
332
+ # Check if 'FS' column exists, if not create it and fill with None
333
+ for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']:
334
+ if col not in df_small_pivot.columns:
335
+ df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col))
336
+
337
+ df_small_pivot.select(['pitcher_id','pitcher_name','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard()
338
+
339
+ return df_small_pivot
340
+
341
+
342
+
343
+ @session.download(filename="data.csv")
344
+ def download_all():
345
+ yield ts_data().write_csv()
346
+
347
+
348
+ @session.download(filename="data_tjstuff.csv")
349
+ def download_tjsumm():
350
+ yield ts_data_summ().write_csv()
351
+
352
+ @output
353
+ @render_tabulator
354
+ @reactive.event(input.refresh)
355
+ def table_all():
356
+
357
+ df_spring = spring_data()
358
+
359
+ # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
360
+ # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
361
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
362
 
363
 
364
 
 
507
  @reactive.event(input.refresh)
508
  def table_daily():
509
 
510
+ df_spring = spring_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
513
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
514
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
515
 
516
 
517
 
 
660
  @reactive.event(input.refresh)
661
  def table_tjstuff():
662
 
663
+ df_spring = spring_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
664
 
665
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
666
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
667
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
668
 
669
 
670
 
 
814
  @reactive.event(input.refresh)
815
  def table_stuff_all():
816
 
817
+ df_spring = spring_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
 
819
  # df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
820
  # df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
821
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
822
 
823
 
824