Upload stuff_model/feature_engineering.py with huggingface_hub
Browse files
stuff_model/feature_engineering.py
CHANGED
@@ -7,6 +7,29 @@ def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
|
|
7 |
pl.col('game_date').str.slice(0, 4).alias('year')
|
8 |
)
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
df = df.with_columns([
|
11 |
|
12 |
(-(pl.col('vy0')**2 - (2 * pl.col('ay') * (pl.col('y0') - 17/12)))**0.5).alias('vy_f'),
|
@@ -69,7 +92,7 @@ def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
|
|
69 |
df_agg = df_agg.unique(subset=['pitcher_id', 'year'], keep='first')
|
70 |
|
71 |
# Join the aggregated data with the main DataFrame
|
72 |
-
df = df.join(df_agg, on=['pitcher_id', 'year'])
|
73 |
|
74 |
# If no fastball, use the fastest pitch for avg_fastball_speed
|
75 |
df = df.with_columns(
|
@@ -90,7 +113,7 @@ def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
|
|
90 |
# If no fastball, use the fastest pitch for avg_fastball_ax
|
91 |
df = df.with_columns(
|
92 |
pl.when(pl.col('avg_fastball_ax').is_null())
|
93 |
-
.then(pl.col('ax').max().over('
|
94 |
.otherwise(pl.col('avg_fastball_ax'))
|
95 |
.alias('avg_fastball_ax')
|
96 |
)
|
@@ -113,27 +136,8 @@ def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
|
|
113 |
pl.lit('All').alias('all')
|
114 |
])
|
115 |
|
116 |
-
# Calculate mound_to_release as 60.5 - extension
|
117 |
-
df = df.with_columns([
|
118 |
-
(60.5 - df["extension"]).alias("release_pos_y")
|
119 |
-
])
|
120 |
-
|
121 |
-
# Calculate delta time (Δt)
|
122 |
-
delta_t = (df["release_pos_y"] - df["y0"]) / df["vy0"]
|
123 |
-
# print((df["vx0"] * delta_t + 0.5 * df["ax"] * delta_t ** 2))
|
124 |
-
# Corrected back-calculation of release_pos_x and release_pos_z
|
125 |
|
126 |
-
|
127 |
-
df = df.with_columns(
|
128 |
-
pl.when(pl.col('pitcher_hand')== 'R')
|
129 |
-
.then(df["x0"] - df["vx0"] * delta_t - 0.5 * df["ax"] * delta_t ** 2)
|
130 |
-
.otherwise(df["x0"] + df["vx0"] * delta_t - 0.5 * df["ax"] * delta_t ** 2)
|
131 |
-
.alias('release_pos_x')
|
132 |
-
)
|
133 |
|
134 |
-
df = df.with_columns([
|
135 |
-
(df["z0"] + df["vz0"] * delta_t + 0.5 * df["az"] * delta_t ** 2).alias("release_pos_z")
|
136 |
-
])
|
137 |
|
138 |
|
139 |
|
|
|
7 |
pl.col('game_date').str.slice(0, 4).alias('year')
|
8 |
)
|
9 |
|
10 |
+
# Calculate mound_to_release as 60.5 - extension
|
11 |
+
df = df.with_columns([
|
12 |
+
(60.5 - df["extension"]).alias("release_pos_y")
|
13 |
+
])
|
14 |
+
|
15 |
+
# Calculate delta time (Δt)
|
16 |
+
delta_t = (df["release_pos_y"] - df["y0"]) / df["vy0"]
|
17 |
+
# print((df["vx0"] * delta_t + 0.5 * df["ax"] * delta_t ** 2))
|
18 |
+
# Corrected back-calculation of release_pos_x and release_pos_z
|
19 |
+
|
20 |
+
|
21 |
+
df = df.with_columns(
|
22 |
+
pl.when(pl.col('pitcher_hand')== 'R')
|
23 |
+
.then((df["x0"] + df["vx0"] * delta_t + 0.5 * df["ax"] * delta_t ** 2)*-1)
|
24 |
+
.otherwise(df["x0"] + df["vx0"] * delta_t + 0.5 * df["ax"] * delta_t ** 2)
|
25 |
+
.alias('release_pos_x')
|
26 |
+
)
|
27 |
+
|
28 |
+
df = df.with_columns([
|
29 |
+
(df["z0"] + df["vz0"] * delta_t + 0.5 * df["az"] * delta_t ** 2).alias("release_pos_z")
|
30 |
+
])
|
31 |
+
|
32 |
+
|
33 |
df = df.with_columns([
|
34 |
|
35 |
(-(pl.col('vy0')**2 - (2 * pl.col('ay') * (pl.col('y0') - 17/12)))**0.5).alias('vy_f'),
|
|
|
92 |
df_agg = df_agg.unique(subset=['pitcher_id', 'year'], keep='first')
|
93 |
|
94 |
# Join the aggregated data with the main DataFrame
|
95 |
+
df = df.join(df_agg, on=['pitcher_id', 'year'],how='left')
|
96 |
|
97 |
# If no fastball, use the fastest pitch for avg_fastball_speed
|
98 |
df = df.with_columns(
|
|
|
113 |
# If no fastball, use the fastest pitch for avg_fastball_ax
|
114 |
df = df.with_columns(
|
115 |
pl.when(pl.col('avg_fastball_ax').is_null())
|
116 |
+
.then(pl.col('ax').max().over('pitcher_id'))
|
117 |
.otherwise(pl.col('avg_fastball_ax'))
|
118 |
.alias('avg_fastball_ax')
|
119 |
)
|
|
|
136 |
pl.lit('All').alias('all')
|
137 |
])
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
|
|
|
|
|
|
141 |
|
142 |
|
143 |
|