nesticot commited on
Commit
3b0dddc
·
verified ·
1 Parent(s): 9ed231a

Update batting_update.py

Browse files
Files changed (1) hide show
  1. batting_update.py +630 -622
batting_update.py CHANGED
@@ -1,623 +1,631 @@
1
- import pandas as pd
2
- import numpy as np
3
- import joblib
4
- import math
5
- import pickle
6
-
7
- loaded_model = joblib.load('joblib_model/barrel_model.joblib')
8
- in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
9
- attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
10
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
11
- px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
12
- pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
13
- barrel_model = joblib.load('joblib_model/barrel_model.joblib')
14
-
15
-
16
- def percentile(n):
17
- def percentile_(x):
18
- return np.nanpercentile(x, n)
19
- percentile_.__name__ = 'percentile_%s' % n
20
- return percentile_
21
-
22
-
23
- def df_update(df=pd.DataFrame()):
24
- df.loc[df['sz_top']==0,'sz_top'] = np.nan
25
- df.loc[df['sz_bot']==0,'sz_bot'] = np.nan
26
-
27
-
28
- df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
29
- if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0:
30
- df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']])
31
- df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2
32
-
33
-
34
- # df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
35
- # df_a['in_zone'] = [x < 10 if x > 0 else np.nan for x in df_a['zone']]
36
- if len(df.loc[(~df['px'].isna())&
37
- (df['in_zone'].isna())&
38
- (~df['sz_top'].isna())]) > 0:
39
- print('We found missing data')
40
- df.loc[(~df['px'].isna())&
41
- (df['in_zone'].isna())&
42
- (~df['sz_top'].isna())&
43
- (~df['pz'].isna())&
44
- (~df['sz_bot'].isna())
45
- ,'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())&
46
- (df['in_zone'].isna())&
47
- (~df['sz_top'].isna())&
48
- (~df['pz'].isna())&
49
- (~df['sz_bot'].isna())][['px','pz','sz_top','sz_bot']].values)
50
- hit_codes = ['single',
51
- 'double','home_run', 'triple']
52
-
53
- ab_codes = ['single', 'strikeout', 'field_out',
54
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
55
- 'double', 'field_error', 'home_run', 'triple',
56
- 'double_play',
57
- 'fielders_choice_out', 'strikeout_double_play',
58
- 'other_out','triple_play']
59
-
60
-
61
- obp_true_codes = ['single', 'walk',
62
- 'double','home_run', 'triple',
63
- 'hit_by_pitch', 'intent_walk']
64
-
65
- obp_codes = ['single', 'strikeout', 'walk', 'field_out',
66
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
67
- 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
68
- 'hit_by_pitch', 'double_play', 'intent_walk',
69
- 'fielders_choice_out', 'strikeout_double_play',
70
- 'sac_fly_double_play',
71
- 'other_out','triple_play']
72
-
73
-
74
- contact_codes = ['In play, no out',
75
- 'Foul', 'In play, out(s)',
76
- 'In play, run(s)',
77
- 'Foul Bunt']
78
-
79
-
80
-
81
- conditions_hit = [df.event_type.isin(hit_codes)]
82
- choices_hit = [True]
83
- df['hits'] = np.select(conditions_hit, choices_hit, default=False)
84
-
85
- conditions_ab = [df.event_type.isin(ab_codes)]
86
- choices_ab = [True]
87
- df['ab'] = np.select(conditions_ab, choices_ab, default=False)
88
-
89
- conditions_obp_true = [df.event_type.isin(obp_true_codes)]
90
- choices_obp_true = [True]
91
- df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False)
92
-
93
- conditions_obp = [df.event_type.isin(obp_codes)]
94
- choices_obp = [True]
95
- df['obp'] = np.select(conditions_obp, choices_obp, default=False)
96
-
97
- bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
98
-
99
- conditions_bip = [df.play_description.isin(bip_codes)]
100
- choices_bip = [True]
101
- df['bip'] = np.select(conditions_bip, choices_bip, default=False)
102
-
103
- # conditions = [
104
- # (df['launch_speed'].isna()),
105
- # (df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50)
106
- # ]
107
- df['bip_div'] = ~df.launch_speed.isna()
108
- # choices = [False,True]
109
- # df['barrel'] = np.select(conditions, choices, default=np.nan)
110
- # df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values)
111
- df['barrel'] = np.nan
112
- if len(df.loc[(~df['launch_speed'].isnull())]) > 0:
113
- df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull()),'barrel'] = barrel_model.predict(df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull())][['launch_speed','launch_angle']])
114
-
115
-
116
- conditions_ss = [
117
- (df['launch_angle'].isna()),
118
- (df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 )
119
- ]
120
-
121
- choices_ss = [False,True]
122
- df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
123
-
124
- conditions_hh = [
125
- (df['launch_speed'].isna()),
126
- (df['launch_speed'] >= 94.5 )
127
- ]
128
-
129
- choices_hh = [False,True]
130
- df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
131
-
132
-
133
- conditions_tb = [
134
- (df['event_type']=='single'),
135
- (df['event_type']=='double'),
136
- (df['event_type']=='triple'),
137
- (df['event_type']=='home_run'),
138
- ]
139
-
140
- choices_tb = [1,2,3,4]
141
-
142
- df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
143
-
144
- conditions_woba = [
145
- (df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out',
146
- 'grounded_into_double_play', 'fielders_choice', 'field_error',
147
- 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play',
148
- 'sac_fly_double_play', 'other_out'])),
149
- (df['event_type']=='walk'),
150
- (df['event_type']=='hit_by_pitch'),
151
- (df['event_type']=='single'),
152
- (df['event_type']=='double'),
153
- (df['event_type']=='triple'),
154
- (df['event_type']=='home_run'),
155
- ]
156
-
157
- choices_woba = [0,
158
- 0.696,
159
- 0.726,
160
- 0.883,
161
- 1.244,
162
- 1.569,
163
- 2.004]
164
-
165
- df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
166
-
167
-
168
- woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
169
- 'double', 'sac_fly', 'force_out', 'home_run',
170
- 'grounded_into_double_play', 'fielders_choice', 'field_error',
171
- 'triple', 'sac_bunt', 'double_play',
172
- 'fielders_choice_out', 'strikeout_double_play',
173
- 'sac_fly_double_play', 'other_out']
174
-
175
-
176
-
177
-
178
-
179
-
180
- conditions_woba_code = [
181
- (df['event_type'].isin(woba_codes))
182
- ]
183
-
184
- choices_woba_code = [1]
185
-
186
- df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
187
-
188
-
189
- df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))]
190
-
191
- #df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']]
192
-
193
- # df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values)
194
- # df['in_zone_3'] = df['in_zone_2'] < 10
195
- # df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0)
196
-
197
-
198
- df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code]
199
- df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code]
200
- df['swings'] = [1 if x == True else 0 for x in df.is_swing]
201
-
202
-
203
- df['out_zone'] = df.in_zone == False
204
- df['zone_swing'] = (df.in_zone == True)&(df.swings == 1)
205
- df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0)
206
- df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1)
207
- df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0)
208
-
209
- df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()])))
210
- df['bb'] = df.event_type.isin(['walk','intent_walk'])
211
-
212
- df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
213
- df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32)
214
-
215
- df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type]
216
- df['pitches'] = [1 if x else 0 for x in df.is_pitch]
217
-
218
-
219
- df.loc[df['launch_speed'].isna(),'barrel'] = np.nan
220
-
221
-
222
- pitch_cat = {'FA':'Fastball',
223
- 'FF':'Fastball',
224
- 'FT':'Fastball',
225
- 'FC':'Fastball',
226
- 'FS':'Off-Speed',
227
- 'FO':'Off-Speed',
228
- 'SI':'Fastball',
229
- 'ST':'Breaking',
230
- 'SL':'Breaking',
231
- 'CU':'Breaking',
232
- 'KC':'Breaking',
233
- 'SC':'Off-Speed',
234
- 'GY':'Off-Speed',
235
- 'SV':'Breaking',
236
- 'CS':'Breaking',
237
- 'CH':'Off-Speed',
238
- 'KN':'Off-Speed',
239
- 'EP':'Breaking',
240
- 'UN':np.nan,
241
- 'IN':np.nan,
242
- 'PO':np.nan,
243
- 'AB':np.nan,
244
- 'AS':np.nan,
245
- 'NP':np.nan}
246
- df['pitch_category'] = df['pitch_type'].map(pitch_cat).fillna('Unknown')
247
- df['average'] = 'average'
248
-
249
- df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup'
250
- df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball'
251
- df.loc[df['trajectory'] == '','trajectory'] = np.nan
252
- df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive'
253
- df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory')
254
-
255
- df['attack_zone'] = np.nan
256
-
257
-
258
-
259
- df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']])
260
-
261
-
262
-
263
- df['heart'] = df['attack_zone'] == 0
264
- df['shadow'] = df['attack_zone'] == 1
265
- df['chase'] = df['attack_zone'] == 2
266
- df['waste'] = df['attack_zone'] == 3
267
-
268
- df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1)
269
- df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1)
270
- df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1)
271
- df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1)
272
-
273
- df['xwoba'] = np.nan
274
- df['xwoba_contact'] = np.nan
275
-
276
- if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba']) > 0:
277
-
278
-
279
- df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
280
-
281
- ## Assign a value of 0.696 to every walk in the dataset
282
- df.loc[df['event_type'].isin(['walk']),'xwoba'] = 0.696
283
-
284
- ## Assign a value of 0.726 to every hit by pitch in the dataset
285
- df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba'] = 0.726
286
-
287
- ## Assign a value of 0 to every Strikeout in the dataset
288
- df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba'] = 0
289
-
290
-
291
- df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
292
-
293
- df['xwoba_codes'] = np.nan
294
- df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_codes'] = 1
295
- ## Assign a value of 0.696 to every walk in the dataset
296
- df.loc[df['event_type'].isin(['walk']),'xwoba_codes'] = 1
297
-
298
- ## Assign a value of 0.726 to every hit by pitch in the dataset
299
- df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba_codes'] = 1
300
-
301
- ## Assign a value of 0 to every Strikeout in the dataset
302
- df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba_codes'] = 1
303
- return df
304
-
305
- def df_update_summ(df=pd.DataFrame()):
306
- df_summ = df.groupby(['batter_id','batter_name']).agg(
307
- pa = ('pa','sum'),
308
- ab = ('ab','sum'),
309
- obp_pa = ('obp','sum'),
310
- hits = ('hits','sum'),
311
- on_base = ('on_base','sum'),
312
- k = ('k','sum'),
313
- bb = ('bb','sum'),
314
- bb_minus_k = ('bb_minus_k','sum'),
315
- csw = ('csw','sum'),
316
- bip = ('bip','sum'),
317
- bip_div = ('bip_div','sum'),
318
- tb = ('tb','sum'),
319
- woba = ('woba','sum'),
320
- woba_contact = ('woba_contact','sum'),
321
- xwoba = ('xwoba','sum'),
322
- xwoba_contact = ('xwoba_contact','sum'),
323
- woba_codes = ('woba_codes','sum'),
324
- xwoba_codes = ('xwoba_codes','sum'),
325
- hard_hit = ('hard_hit','sum'),
326
- barrel = ('barrel','sum'),
327
- sweet_spot = ('sweet_spot','sum'),
328
- max_launch_speed = ('launch_speed','max'),
329
- launch_speed_90 = ('launch_speed',percentile(90)),
330
- launch_speed = ('launch_speed','mean'),
331
- launch_angle = ('launch_angle','mean'),
332
- pitches = ('is_pitch','sum'),
333
- swings = ('swings','sum'),
334
- in_zone = ('in_zone','sum'),
335
- out_zone = ('out_zone','sum'),
336
- whiffs = ('whiffs','sum'),
337
- zone_swing = ('zone_swing','sum'),
338
- zone_contact = ('zone_contact','sum'),
339
- ozone_swing = ('ozone_swing','sum'),
340
- ozone_contact = ('ozone_contact','sum'),
341
- ground_ball = ('trajectory_ground_ball','sum'),
342
- line_drive = ('trajectory_line_drive','sum'),
343
- fly_ball =('trajectory_fly_ball','sum'),
344
- pop_up = ('trajectory_popup','sum'),
345
- attack_zone = ('attack_zone','count'),
346
- heart = ('heart','sum'),
347
- shadow = ('shadow','sum'),
348
- chase = ('chase','sum'),
349
- waste = ('waste','sum'),
350
- heart_swing = ('heart_swing','sum'),
351
- shadow_swing = ('shadow_swing','sum'),
352
- chase_swing = ('chase_swing','sum'),
353
- waste_swing = ('waste_swing','sum'),
354
- ).reset_index()
355
- return df_summ
356
-
357
- def df_update_summ_avg(df=pd.DataFrame()):
358
- df_summ_avg = df.groupby(['average']).agg(
359
- pa = ('pa','sum'),
360
- ab = ('ab','sum'),
361
- obp_pa = ('obp','sum'),
362
- hits = ('hits','sum'),
363
- on_base = ('on_base','sum'),
364
- k = ('k','sum'),
365
- bb = ('bb','sum'),
366
- bb_minus_k = ('bb_minus_k','sum'),
367
- csw = ('csw','sum'),
368
- bip = ('bip','sum'),
369
- bip_div = ('bip_div','sum'),
370
- tb = ('tb','sum'),
371
- woba = ('woba','sum'),
372
- woba_contact = ('woba_contact','sum'),
373
- xwoba = ('xwoba','sum'),
374
- xwoba_contact = ('xwoba_contact','sum'),
375
- woba_codes = ('woba_codes','sum'),
376
- xwoba_codes = ('xwoba_codes','sum'),
377
- hard_hit = ('hard_hit','sum'),
378
- barrel = ('barrel','sum'),
379
- sweet_spot = ('sweet_spot','sum'),
380
- max_launch_speed = ('launch_speed','max'),
381
- launch_speed_90 = ('launch_speed',percentile(90)),
382
- launch_speed = ('launch_speed','mean'),
383
- launch_angle = ('launch_angle','mean'),
384
- pitches = ('is_pitch','sum'),
385
- swings = ('swings','sum'),
386
- in_zone = ('in_zone','sum'),
387
- out_zone = ('out_zone','sum'),
388
- whiffs = ('whiffs','sum'),
389
- zone_swing = ('zone_swing','sum'),
390
- zone_contact = ('zone_contact','sum'),
391
- ozone_swing = ('ozone_swing','sum'),
392
- ozone_contact = ('ozone_contact','sum'),
393
- ground_ball = ('trajectory_ground_ball','sum'),
394
- line_drive = ('trajectory_line_drive','sum'),
395
- fly_ball =('trajectory_fly_ball','sum'),
396
- pop_up = ('trajectory_popup','sum'),
397
- attack_zone = ('attack_zone','count'),
398
- heart = ('heart','sum'),
399
- shadow = ('shadow','sum'),
400
- chase = ('chase','sum'),
401
- waste = ('waste','sum'),
402
- heart_swing = ('heart_swing','sum'),
403
- shadow_swing = ('shadow_swing','sum'),
404
- chase_swing = ('chase_swing','sum'),
405
- waste_swing = ('waste_swing','sum'),
406
-
407
-
408
-
409
-
410
- ).reset_index()
411
- return df_summ_avg
412
-
413
- def df_summ_changes(df_summ=pd.DataFrame()):
414
- df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
415
- df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
416
- df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
417
-
418
- df_summ['ops'] = df_summ['obp']+df_summ['slg']
419
-
420
- df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
421
- df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
422
- df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
423
-
424
- df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
425
-
426
-
427
-
428
-
429
- df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
430
-
431
-
432
- df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
433
-
434
- df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
435
- df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
436
- #df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
437
- df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
438
-
439
-
440
- df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
441
-
442
- df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
443
-
444
- df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
445
-
446
- df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
447
-
448
- df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
449
-
450
- df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
451
-
452
- df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
453
-
454
- df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
455
-
456
- df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
457
-
458
- df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
459
-
460
- df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
461
-
462
- df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
463
-
464
- df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
465
-
466
-
467
-
468
- df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
469
-
470
- df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
471
-
472
- df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
473
-
474
- df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
475
-
476
-
477
- df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
478
-
479
- df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
480
-
481
- df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
482
-
483
- df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
484
-
485
-
486
- df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.xwoba_codes[x] if df_summ.xwoba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
487
- df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
488
-
489
- df_summ = df_summ.dropna(subset=['bip'])
490
- return df_summ
491
-
492
- def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0):
493
- df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500)]
494
- df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True)
495
- df_summ_player = df_summ.xs(batter_select,level=0)
496
- df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0)
497
- return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct
498
-
499
- def df_summ_batter_pitch_up(df=pd.DataFrame()):
500
- df_summ_batter_pitch = df.dropna(subset=['pitch_category']).groupby(['batter_id','batter_name','pitch_category']).agg(
501
- pa = ('pa','sum'),
502
- ab = ('ab','sum'),
503
- obp_pa = ('obp','sum'),
504
- hits = ('hits','sum'),
505
- on_base = ('on_base','sum'),
506
- k = ('k','sum'),
507
- bb = ('bb','sum'),
508
- bb_minus_k = ('bb_minus_k','sum'),
509
- csw = ('csw','sum'),
510
- bip = ('bip','sum'),
511
- bip_div = ('bip_div','sum'),
512
- tb = ('tb','sum'),
513
- woba = ('woba','sum'),
514
- woba_contact = ('xwoba_contact','sum'),
515
- xwoba = ('xwoba','sum'),
516
- xwoba_contact = ('xwoba','sum'),
517
- woba_codes = ('woba_codes','sum'),
518
- xwoba_codes = ('xwoba_codes','sum'),
519
- hard_hit = ('hard_hit','sum'),
520
- barrel = ('barrel','sum'),
521
- sweet_spot = ('sweet_spot','sum'),
522
- max_launch_speed = ('launch_speed','max'),
523
- launch_speed_90 = ('launch_speed',percentile(90)),
524
- launch_speed = ('launch_speed','mean'),
525
- launch_angle = ('launch_angle','mean'),
526
- pitches = ('is_pitch','sum'),
527
- swings = ('swings','sum'),
528
- in_zone = ('in_zone','sum'),
529
- out_zone = ('out_zone','sum'),
530
- whiffs = ('whiffs','sum'),
531
- zone_swing = ('zone_swing','sum'),
532
- zone_contact = ('zone_contact','sum'),
533
- ozone_swing = ('ozone_swing','sum'),
534
- ozone_contact = ('ozone_contact','sum'),
535
- ground_ball = ('trajectory_ground_ball','sum'),
536
- line_drive = ('trajectory_line_drive','sum'),
537
- fly_ball =('trajectory_fly_ball','sum'),
538
- pop_up = ('trajectory_popup','sum'),
539
- attack_zone = ('attack_zone','count'),
540
- heart = ('heart','sum'),
541
- shadow = ('shadow','sum'),
542
- chase = ('chase','sum'),
543
- waste = ('waste','sum'),
544
- heart_swing = ('heart_swing','sum'),
545
- shadow_swing = ('shadow_swing','sum'),
546
- chase_swing = ('chase_swing','sum'),
547
- waste_swing = ('waste_swing','sum'),
548
- ).reset_index()
549
-
550
- #return df_summ_batter_pitch
551
- df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
552
- df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
553
- df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
554
-
555
- df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg']
556
-
557
- df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
558
- df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
559
- df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
560
-
561
- df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
562
-
563
-
564
-
565
-
566
- df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
567
-
568
-
569
- df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
570
-
571
- df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
572
- df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
573
- #df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
574
- df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
575
-
576
-
577
- df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
578
-
579
- df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
580
-
581
- df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
582
-
583
- df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
584
-
585
- df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
586
-
587
- df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
588
-
589
- df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
590
-
591
- df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
592
-
593
- df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
594
-
595
- df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
596
-
597
- df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
598
-
599
- df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
600
-
601
- df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
602
-
603
-
604
- df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
605
-
606
- df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
607
-
608
- df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
609
-
610
- df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
611
-
612
-
613
-
614
-
615
- df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.xwoba_codes[x] if df_summ_batter_pitch.xwoba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
616
- df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
617
-
618
-
619
-
620
-
621
- df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0)
622
-
 
 
 
 
 
 
 
 
623
  return df_summ_batter_pitch
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ import math
5
+ import pickle
6
+
7
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
8
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
9
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
10
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
11
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
12
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
13
+ barrel_model = joblib.load('joblib_model/barrel_model.joblib')
14
+
15
+
16
+ def percentile(n):
17
+ def percentile_(x):
18
+ return np.nanpercentile(x, n)
19
+ percentile_.__name__ = 'percentile_%s' % n
20
+ return percentile_
21
+
22
+
23
+ def df_update(df=pd.DataFrame()):
24
+ df.loc[df['sz_top']==0,'sz_top'] = np.nan
25
+ df.loc[df['sz_bot']==0,'sz_bot'] = np.nan
26
+
27
+
28
+ df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
29
+ if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0:
30
+ df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']])
31
+ df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2
32
+
33
+
34
+ # df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
35
+ # df_a['in_zone'] = [x < 10 if x > 0 else np.nan for x in df_a['zone']]
36
+ if len(df.loc[(~df['px'].isna())&
37
+ (df['in_zone'].isna())&
38
+ (~df['sz_top'].isna())]) > 0:
39
+ print('We found missing data')
40
+ df.loc[(~df['px'].isna())&
41
+ (df['in_zone'].isna())&
42
+ (~df['sz_top'].isna())&
43
+ (~df['pz'].isna())&
44
+ (~df['sz_bot'].isna())
45
+ ,'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())&
46
+ (df['in_zone'].isna())&
47
+ (~df['sz_top'].isna())&
48
+ (~df['pz'].isna())&
49
+ (~df['sz_bot'].isna())][['px','pz','sz_top','sz_bot']].values)
50
+ hit_codes = ['single',
51
+ 'double','home_run', 'triple']
52
+
53
+ ab_codes = ['single', 'strikeout', 'field_out',
54
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
55
+ 'double', 'field_error', 'home_run', 'triple',
56
+ 'double_play',
57
+ 'fielders_choice_out', 'strikeout_double_play',
58
+ 'other_out','triple_play']
59
+
60
+
61
+ obp_true_codes = ['single', 'walk',
62
+ 'double','home_run', 'triple',
63
+ 'hit_by_pitch', 'intent_walk']
64
+
65
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
66
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
67
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
68
+ 'hit_by_pitch', 'double_play', 'intent_walk',
69
+ 'fielders_choice_out', 'strikeout_double_play',
70
+ 'sac_fly_double_play',
71
+ 'other_out','triple_play']
72
+
73
+
74
+ contact_codes = ['In play, no out',
75
+ 'Foul', 'In play, out(s)',
76
+ 'In play, run(s)',
77
+ 'Foul Bunt']
78
+
79
+
80
+
81
+ conditions_hit = [df.event_type.isin(hit_codes)]
82
+ choices_hit = [True]
83
+ df['hits'] = np.select(conditions_hit, choices_hit, default=False)
84
+
85
+ conditions_ab = [df.event_type.isin(ab_codes)]
86
+ choices_ab = [True]
87
+ df['ab'] = np.select(conditions_ab, choices_ab, default=False)
88
+
89
+ conditions_obp_true = [df.event_type.isin(obp_true_codes)]
90
+ choices_obp_true = [True]
91
+ df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False)
92
+
93
+ conditions_obp = [df.event_type.isin(obp_codes)]
94
+ choices_obp = [True]
95
+ df['obp'] = np.select(conditions_obp, choices_obp, default=False)
96
+
97
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
98
+
99
+ conditions_bip = [df.play_description.isin(bip_codes)]
100
+ choices_bip = [True]
101
+ df['bip'] = np.select(conditions_bip, choices_bip, default=False)
102
+
103
+ # conditions = [
104
+ # (df['launch_speed'].isna()),
105
+ # (df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50)
106
+ # ]
107
+ df['bip_div'] = ~df.launch_speed.isna()
108
+ # choices = [False,True]
109
+ # df['barrel'] = np.select(conditions, choices, default=np.nan)
110
+ # df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values)
111
+ df['barrel'] = np.nan
112
+ if len(df.loc[(~df['launch_speed'].isnull())]) > 0:
113
+ df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull()),'barrel'] = barrel_model.predict(df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull())][['launch_speed','launch_angle']])
114
+
115
+
116
+ conditions_ss = [
117
+ (df['launch_angle'].isna()),
118
+ (df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 )
119
+ ]
120
+
121
+ choices_ss = [False,True]
122
+ df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
123
+
124
+ conditions_hh = [
125
+ (df['launch_speed'].isna()),
126
+ (df['launch_speed'] >= 94.5 )
127
+ ]
128
+
129
+ choices_hh = [False,True]
130
+ df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
131
+
132
+
133
+ conditions_tb = [
134
+ (df['event_type']=='single'),
135
+ (df['event_type']=='double'),
136
+ (df['event_type']=='triple'),
137
+ (df['event_type']=='home_run'),
138
+ ]
139
+
140
+ choices_tb = [1,2,3,4]
141
+
142
+ df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
143
+
144
+ conditions_woba = [
145
+ (df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out',
146
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
147
+ 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play',
148
+ 'sac_fly_double_play', 'other_out'])),
149
+ (df['event_type']=='walk'),
150
+ (df['event_type']=='hit_by_pitch'),
151
+ (df['event_type']=='single'),
152
+ (df['event_type']=='double'),
153
+ (df['event_type']=='triple'),
154
+ (df['event_type']=='home_run'),
155
+ ]
156
+
157
+ choices_woba = [0,
158
+ 0.696,
159
+ 0.726,
160
+ 0.883,
161
+ 1.244,
162
+ 1.569,
163
+ 2.004]
164
+
165
+ df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
166
+
167
+
168
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
169
+ 'double', 'sac_fly', 'force_out', 'home_run',
170
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
171
+ 'triple', 'sac_bunt', 'double_play',
172
+ 'fielders_choice_out', 'strikeout_double_play',
173
+ 'sac_fly_double_play', 'other_out']
174
+
175
+
176
+
177
+
178
+
179
+
180
+ conditions_woba_code = [
181
+ (df['event_type'].isin(woba_codes))
182
+ ]
183
+
184
+ choices_woba_code = [1]
185
+
186
+ df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
187
+
188
+
189
+ df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))]
190
+
191
+ #df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']]
192
+
193
+ # df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values)
194
+ # df['in_zone_3'] = df['in_zone_2'] < 10
195
+ # df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0)
196
+
197
+
198
+ df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code]
199
+ df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code]
200
+ df['swings'] = [1 if x == True else 0 for x in df.is_swing]
201
+
202
+
203
+ df['out_zone'] = df.in_zone == False
204
+ df['zone_swing'] = (df.in_zone == True)&(df.swings == 1)
205
+ df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0)
206
+ df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1)
207
+ df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0)
208
+
209
+ df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()])))
210
+ df['bb'] = df.event_type.isin(['walk','intent_walk'])
211
+
212
+ df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
213
+ df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32)
214
+
215
+ df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type]
216
+ df['pitches'] = [1 if x else 0 for x in df.is_pitch]
217
+
218
+
219
+ df.loc[df['launch_speed'].isna(),'barrel'] = np.nan
220
+
221
+
222
+ pitch_cat = {'FA':'Fastball',
223
+ 'FF':'Fastball',
224
+ 'FT':'Fastball',
225
+ 'FC':'Fastball',
226
+ 'FS':'Off-Speed',
227
+ 'FO':'Off-Speed',
228
+ 'SI':'Fastball',
229
+ 'ST':'Breaking',
230
+ 'SL':'Breaking',
231
+ 'CU':'Breaking',
232
+ 'KC':'Breaking',
233
+ 'SC':'Off-Speed',
234
+ 'GY':'Off-Speed',
235
+ 'SV':'Breaking',
236
+ 'CS':'Breaking',
237
+ 'CH':'Off-Speed',
238
+ 'KN':'Off-Speed',
239
+ 'EP':'Breaking',
240
+ 'UN':np.nan,
241
+ 'IN':np.nan,
242
+ 'PO':np.nan,
243
+ 'AB':np.nan,
244
+ 'AS':np.nan,
245
+ 'NP':np.nan}
246
+ df['pitch_category'] = df['pitch_type'].map(pitch_cat).fillna('Unknown')
247
+ df['average'] = 'average'
248
+
249
+ df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup'
250
+ df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball'
251
+ df.loc[df['trajectory'] == '','trajectory'] = np.nan
252
+ df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive'
253
+ df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory')
254
+
255
+ df['attack_zone'] = np.nan
256
+
257
+
258
+
259
+ df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']])
260
+
261
+
262
+
263
+ df['heart'] = df['attack_zone'] == 0
264
+ df['shadow'] = df['attack_zone'] == 1
265
+ df['chase'] = df['attack_zone'] == 2
266
+ df['waste'] = df['attack_zone'] == 3
267
+
268
+ df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1)
269
+ df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1)
270
+ df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1)
271
+ df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1)
272
+
273
+ df['xwoba'] = np.nan
274
+ df['xwoba_contact'] = np.nan
275
+
276
+ if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba']) > 0:
277
+
278
+
279
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
280
+
281
+ ## Assign a value of 0.696 to every walk in the dataset
282
+ df.loc[df['event_type'].isin(['walk']),'xwoba'] = 0.696
283
+
284
+ ## Assign a value of 0.726 to every hit by pitch in the dataset
285
+ df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba'] = 0.726
286
+
287
+ ## Assign a value of 0 to every Strikeout in the dataset
288
+ df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba'] = 0
289
+
290
+
291
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
292
+
293
+ df['xwoba_codes'] = np.nan
294
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_codes'] = 1
295
+ ## Assign a value of 0.696 to every walk in the dataset
296
+ df.loc[df['event_type'].isin(['walk']),'xwoba_codes'] = 1
297
+
298
+ ## Assign a value of 0.726 to every hit by pitch in the dataset
299
+ df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba_codes'] = 1
300
+
301
+ ## Assign a value of 0 to every Strikeout in the dataset
302
+ df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba_codes'] = 1
303
+ return df
304
+
305
+ def df_update_summ(df=pd.DataFrame()):
306
+ df_summ = df.groupby(['batter_id','batter_name']).agg(
307
+ pa = ('pa','sum'),
308
+ ab = ('ab','sum'),
309
+ obp_pa = ('obp','sum'),
310
+ hits = ('hits','sum'),
311
+ on_base = ('on_base','sum'),
312
+ k = ('k','sum'),
313
+ bb = ('bb','sum'),
314
+ bb_minus_k = ('bb_minus_k','sum'),
315
+ csw = ('csw','sum'),
316
+ bip = ('bip','sum'),
317
+ bip_div = ('bip_div','sum'),
318
+ tb = ('tb','sum'),
319
+ woba = ('woba','sum'),
320
+ woba_contact = ('woba_contact','sum'),
321
+ xwoba = ('xwoba','sum'),
322
+ xwoba_contact = ('xwoba_contact','sum'),
323
+ woba_codes = ('woba_codes','sum'),
324
+ xwoba_codes = ('xwoba_codes','sum'),
325
+ hard_hit = ('hard_hit','sum'),
326
+ barrel = ('barrel','sum'),
327
+ sweet_spot = ('sweet_spot','sum'),
328
+ max_launch_speed = ('launch_speed','max'),
329
+ launch_speed_90 = ('launch_speed',percentile(90)),
330
+ launch_speed = ('launch_speed','mean'),
331
+ launch_angle = ('launch_angle','mean'),
332
+ pitches = ('is_pitch','sum'),
333
+ swings = ('swings','sum'),
334
+ in_zone = ('in_zone','sum'),
335
+ out_zone = ('out_zone','sum'),
336
+ whiffs = ('whiffs','sum'),
337
+ zone_swing = ('zone_swing','sum'),
338
+ zone_contact = ('zone_contact','sum'),
339
+ ozone_swing = ('ozone_swing','sum'),
340
+ ozone_contact = ('ozone_contact','sum'),
341
+ ground_ball = ('trajectory_ground_ball','sum'),
342
+ line_drive = ('trajectory_line_drive','sum'),
343
+ fly_ball =('trajectory_fly_ball','sum'),
344
+ pop_up = ('trajectory_popup','sum'),
345
+ attack_zone = ('attack_zone','count'),
346
+ heart = ('heart','sum'),
347
+ shadow = ('shadow','sum'),
348
+ chase = ('chase','sum'),
349
+ waste = ('waste','sum'),
350
+ heart_swing = ('heart_swing','sum'),
351
+ shadow_swing = ('shadow_swing','sum'),
352
+ chase_swing = ('chase_swing','sum'),
353
+ waste_swing = ('waste_swing','sum'),
354
+ ).reset_index()
355
+ return df_summ
356
+
357
+ def df_update_summ_avg(df=pd.DataFrame()):
358
+ df_summ_avg = df.groupby(['average']).agg(
359
+ pa = ('pa','sum'),
360
+ ab = ('ab','sum'),
361
+ obp_pa = ('obp','sum'),
362
+ hits = ('hits','sum'),
363
+ on_base = ('on_base','sum'),
364
+ k = ('k','sum'),
365
+ bb = ('bb','sum'),
366
+ bb_minus_k = ('bb_minus_k','sum'),
367
+ csw = ('csw','sum'),
368
+ bip = ('bip','sum'),
369
+ bip_div = ('bip_div','sum'),
370
+ tb = ('tb','sum'),
371
+ woba = ('woba','sum'),
372
+ woba_contact = ('woba_contact','sum'),
373
+ xwoba = ('xwoba','sum'),
374
+ xwoba_contact = ('xwoba_contact','sum'),
375
+ woba_codes = ('woba_codes','sum'),
376
+ xwoba_codes = ('xwoba_codes','sum'),
377
+ hard_hit = ('hard_hit','sum'),
378
+ barrel = ('barrel','sum'),
379
+ sweet_spot = ('sweet_spot','sum'),
380
+ max_launch_speed = ('launch_speed','max'),
381
+ launch_speed_90 = ('launch_speed',percentile(90)),
382
+ launch_speed = ('launch_speed','mean'),
383
+ launch_angle = ('launch_angle','mean'),
384
+ pitches = ('is_pitch','sum'),
385
+ swings = ('swings','sum'),
386
+ in_zone = ('in_zone','sum'),
387
+ out_zone = ('out_zone','sum'),
388
+ whiffs = ('whiffs','sum'),
389
+ zone_swing = ('zone_swing','sum'),
390
+ zone_contact = ('zone_contact','sum'),
391
+ ozone_swing = ('ozone_swing','sum'),
392
+ ozone_contact = ('ozone_contact','sum'),
393
+ ground_ball = ('trajectory_ground_ball','sum'),
394
+ line_drive = ('trajectory_line_drive','sum'),
395
+ fly_ball =('trajectory_fly_ball','sum'),
396
+ pop_up = ('trajectory_popup','sum'),
397
+ attack_zone = ('attack_zone','count'),
398
+ heart = ('heart','sum'),
399
+ shadow = ('shadow','sum'),
400
+ chase = ('chase','sum'),
401
+ waste = ('waste','sum'),
402
+ heart_swing = ('heart_swing','sum'),
403
+ shadow_swing = ('shadow_swing','sum'),
404
+ chase_swing = ('chase_swing','sum'),
405
+ waste_swing = ('waste_swing','sum'),
406
+
407
+
408
+
409
+
410
+ ).reset_index()
411
+ return df_summ_avg
412
+
413
+ def df_summ_changes(df_summ=pd.DataFrame()):
414
+ df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
415
+ df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
416
+ df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
417
+
418
+ df_summ['ops'] = df_summ['obp']+df_summ['slg']
419
+
420
+ df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
421
+ df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
422
+ df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
423
+
424
+ df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
425
+
426
+
427
+
428
+
429
+ df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
430
+
431
+
432
+ df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
433
+
434
+ df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
435
+ df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
436
+ #df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
437
+ df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
438
+
439
+
440
+ df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
441
+
442
+ df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
443
+
444
+ df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
445
+
446
+ df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
447
+
448
+ df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
449
+
450
+ df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
451
+
452
+ df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
453
+
454
+ df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
455
+
456
+ df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
457
+
458
+ df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
459
+
460
+ df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
461
+
462
+ df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
463
+
464
+ df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
465
+
466
+
467
+
468
+ df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
469
+
470
+ df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
471
+
472
+ df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
473
+
474
+ df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
475
+
476
+
477
+ df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
478
+
479
+ df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
480
+
481
+ df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
482
+
483
+ df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
484
+
485
+
486
+ df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.xwoba_codes[x] if df_summ.xwoba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
487
+ df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
488
+
489
+ df_summ = df_summ.dropna(subset=['bip'])
490
+ return df_summ
491
+
492
+ def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0,date_min=0):
493
+ import datetime
494
+
495
+ def weeks_after(day):
496
+ today = datetime.date.today()
497
+ time_difference = today - day
498
+ weeks = time_difference.days // 7
499
+ return weeks
500
+
501
+ df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500,weeks_after(date_min)*20)]
502
+ df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True)
503
+ df_summ_player = df_summ.xs(batter_select,level=0)
504
+ df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0)
505
+ return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct
506
+
507
+ def df_summ_batter_pitch_up(df=pd.DataFrame()):
508
+ df_summ_batter_pitch = df.dropna(subset=['pitch_category']).groupby(['batter_id','batter_name','pitch_category']).agg(
509
+ pa = ('pa','sum'),
510
+ ab = ('ab','sum'),
511
+ obp_pa = ('obp','sum'),
512
+ hits = ('hits','sum'),
513
+ on_base = ('on_base','sum'),
514
+ k = ('k','sum'),
515
+ bb = ('bb','sum'),
516
+ bb_minus_k = ('bb_minus_k','sum'),
517
+ csw = ('csw','sum'),
518
+ bip = ('bip','sum'),
519
+ bip_div = ('bip_div','sum'),
520
+ tb = ('tb','sum'),
521
+ woba = ('woba','sum'),
522
+ woba_contact = ('xwoba_contact','sum'),
523
+ xwoba = ('xwoba','sum'),
524
+ xwoba_contact = ('xwoba','sum'),
525
+ woba_codes = ('woba_codes','sum'),
526
+ xwoba_codes = ('xwoba_codes','sum'),
527
+ hard_hit = ('hard_hit','sum'),
528
+ barrel = ('barrel','sum'),
529
+ sweet_spot = ('sweet_spot','sum'),
530
+ max_launch_speed = ('launch_speed','max'),
531
+ launch_speed_90 = ('launch_speed',percentile(90)),
532
+ launch_speed = ('launch_speed','mean'),
533
+ launch_angle = ('launch_angle','mean'),
534
+ pitches = ('is_pitch','sum'),
535
+ swings = ('swings','sum'),
536
+ in_zone = ('in_zone','sum'),
537
+ out_zone = ('out_zone','sum'),
538
+ whiffs = ('whiffs','sum'),
539
+ zone_swing = ('zone_swing','sum'),
540
+ zone_contact = ('zone_contact','sum'),
541
+ ozone_swing = ('ozone_swing','sum'),
542
+ ozone_contact = ('ozone_contact','sum'),
543
+ ground_ball = ('trajectory_ground_ball','sum'),
544
+ line_drive = ('trajectory_line_drive','sum'),
545
+ fly_ball =('trajectory_fly_ball','sum'),
546
+ pop_up = ('trajectory_popup','sum'),
547
+ attack_zone = ('attack_zone','count'),
548
+ heart = ('heart','sum'),
549
+ shadow = ('shadow','sum'),
550
+ chase = ('chase','sum'),
551
+ waste = ('waste','sum'),
552
+ heart_swing = ('heart_swing','sum'),
553
+ shadow_swing = ('shadow_swing','sum'),
554
+ chase_swing = ('chase_swing','sum'),
555
+ waste_swing = ('waste_swing','sum'),
556
+ ).reset_index()
557
+
558
+ #return df_summ_batter_pitch
559
+ df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
560
+ df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
561
+ df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
562
+
563
+ df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg']
564
+
565
+ df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
566
+ df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
567
+ df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
568
+
569
+ df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
570
+
571
+
572
+
573
+
574
+ df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
575
+
576
+
577
+ df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
578
+
579
+ df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
580
+ df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
581
+ #df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
582
+ df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
583
+
584
+
585
+ df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
586
+
587
+ df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
588
+
589
+ df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
590
+
591
+ df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
592
+
593
+ df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
594
+
595
+ df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
596
+
597
+ df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
598
+
599
+ df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
600
+
601
+ df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
602
+
603
+ df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
604
+
605
+ df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
606
+
607
+ df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
608
+
609
+ df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
610
+
611
+
612
+ df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
613
+
614
+ df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
615
+
616
+ df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
617
+
618
+ df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
619
+
620
+
621
+
622
+
623
+ df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.xwoba_codes[x] if df_summ_batter_pitch.xwoba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
624
+ df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
625
+
626
+
627
+
628
+
629
+ df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0)
630
+
631
  return df_summ_batter_pitch