Spaces:
Runtime error
Runtime error
feat(formula1_databases): add performant `TelemetryAnalysisWithWeather` view
Browse files- notebooks/formula1_databases.py +48 -11
notebooks/formula1_databases.py
CHANGED
@@ -106,6 +106,7 @@ class FastF1ToSQL:
|
|
106 |
CREATE TABLE IF NOT EXISTS Telemetry (
|
107 |
telemetry_id INTEGER PRIMARY KEY,
|
108 |
lap_id INTEGER,
|
|
|
109 |
speed_in_km REAL,
|
110 |
RPM INTEGER,
|
111 |
gear_number INTEGER,
|
@@ -117,7 +118,8 @@ class FastF1ToSQL:
|
|
117 |
z_position REAL,
|
118 |
is_off_track BOOLEAN,
|
119 |
datetime DATETIME,
|
120 |
-
FOREIGN KEY (lap_id) REFERENCES Laps(lap_id)
|
|
|
121 |
);
|
122 |
|
123 |
CREATE INDEX IF NOT EXISTS idx_laps_driver_name ON Laps(driver_name);
|
@@ -187,6 +189,7 @@ class FastF1ToSQL:
|
|
187 |
placeholders = ', '.join(['?' for _ in event_data])
|
188 |
query = f"INSERT OR REPLACE INTO Event ({columns}) VALUES ({placeholders})"
|
189 |
self.cursor.execute(query, list(event_data.values()))
|
|
|
190 |
|
191 |
def insert_session(self, session: Session) -> None:
|
192 |
"""
|
@@ -197,7 +200,7 @@ class FastF1ToSQL:
|
|
197 |
"""
|
198 |
session_data: dict[str, Any] = {
|
199 |
# Assuming this is called right after insert_event
|
200 |
-
'event_id': self.
|
201 |
'track_id': self.get_or_create_track(session.event.Location, session.event.Country),
|
202 |
'session_type': session.name,
|
203 |
'date': str(session.date),
|
@@ -266,6 +269,7 @@ class FastF1ToSQL:
|
|
266 |
placeholders = ':' + ', :'.join(lap_data.keys())
|
267 |
query = f"INSERT INTO Laps ({columns}) VALUES ({placeholders})"
|
268 |
self.cursor.execute(query, lap_data)
|
|
|
269 |
|
270 |
def insert_telemetry(self, session: Session) -> None:
|
271 |
"""
|
@@ -279,16 +283,31 @@ class FastF1ToSQL:
|
|
279 |
|
280 |
for driver in session.drivers:
|
281 |
laps_per_driver = session.laps.pick_driver(driver)
|
|
|
|
|
282 |
|
283 |
for _, lap in laps_per_driver.iterrows():
|
284 |
lap_number = lap['LapNumber']
|
285 |
telemetry = lap.get_telemetry()
|
286 |
telemetry['datetime'] = self._session_start_date + \
|
287 |
-
telemetry['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
-
|
|
|
|
|
|
|
|
|
290 |
telemetry_data: dict[str, Any] = {
|
291 |
-
'lap_id':
|
|
|
292 |
'speed_in_km': sample['Speed'],
|
293 |
'RPM': sample['RPM'],
|
294 |
'gear_number': sample['nGear'],
|
@@ -360,7 +379,7 @@ class FastF1ToSQL:
|
|
360 |
"INSERT INTO Tracks (track_name, country) VALUES (?, ?)", (track_name, country))
|
361 |
return self.cursor.lastrowid or 0
|
362 |
|
363 |
-
def
|
364 |
"""
|
365 |
Get the lap_id for a given driver and time.
|
366 |
|
@@ -372,14 +391,27 @@ class FastF1ToSQL:
|
|
372 |
Returns:
|
373 |
int: The lap_id of the found lap.
|
374 |
"""
|
375 |
-
|
376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
377 |
|
378 |
if self._session_id is None:
|
379 |
raise ValueError("No ID was generated")
|
380 |
|
381 |
self.cursor.execute("SELECT lap_id FROM Laps WHERE session_id = ? AND driver_name = ? AND lap_number = ?",
|
382 |
-
(self._session_id,
|
383 |
return self.cursor.fetchone()[0]
|
384 |
|
385 |
def create_data_analysis_views(self) -> None:
|
@@ -477,8 +509,13 @@ class FastF1ToSQL:
|
|
477 |
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
|
478 |
GROUP BY e.event_id, s.session_id;
|
479 |
|
480 |
-
-- 5. Telemetry Analysis with Weather
|
481 |
CREATE VIEW IF NOT EXISTS TelemetryAnalysisWithWeather AS
|
|
|
|
|
|
|
|
|
|
|
482 |
SELECT
|
483 |
l.lap_id,
|
484 |
l.driver_name,
|
@@ -502,7 +539,7 @@ class FastF1ToSQL:
|
|
502 |
JOIN Sessions s ON l.session_id = s.session_id
|
503 |
JOIN Tracks t ON s.track_id = t.track_id
|
504 |
JOIN Event e ON s.event_id = e.event_id
|
505 |
-
JOIN
|
506 |
LEFT JOIN Weather w ON s.session_id = w.session_id
|
507 |
AND tel.datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
|
508 |
GROUP BY l.lap_id;
|
|
|
106 |
CREATE TABLE IF NOT EXISTS Telemetry (
|
107 |
telemetry_id INTEGER PRIMARY KEY,
|
108 |
lap_id INTEGER,
|
109 |
+
driver_name TEXT NOT NULL,
|
110 |
speed_in_km REAL,
|
111 |
RPM INTEGER,
|
112 |
gear_number INTEGER,
|
|
|
118 |
z_position REAL,
|
119 |
is_off_track BOOLEAN,
|
120 |
datetime DATETIME,
|
121 |
+
FOREIGN KEY (lap_id) REFERENCES Laps(lap_id),
|
122 |
+
FOREIGN KEY (driver_name) REFERENCES Drivers(driver_name)
|
123 |
);
|
124 |
|
125 |
CREATE INDEX IF NOT EXISTS idx_laps_driver_name ON Laps(driver_name);
|
|
|
189 |
placeholders = ', '.join(['?' for _ in event_data])
|
190 |
query = f"INSERT OR REPLACE INTO Event ({columns}) VALUES ({placeholders})"
|
191 |
self.cursor.execute(query, list(event_data.values()))
|
192 |
+
self._event_id = self.cursor.lastrowid
|
193 |
|
194 |
def insert_session(self, session: Session) -> None:
|
195 |
"""
|
|
|
200 |
"""
|
201 |
session_data: dict[str, Any] = {
|
202 |
# Assuming this is called right after insert_event
|
203 |
+
'event_id': self._event_id,
|
204 |
'track_id': self.get_or_create_track(session.event.Location, session.event.Country),
|
205 |
'session_type': session.name,
|
206 |
'date': str(session.date),
|
|
|
269 |
placeholders = ':' + ', :'.join(lap_data.keys())
|
270 |
query = f"INSERT INTO Laps ({columns}) VALUES ({placeholders})"
|
271 |
self.cursor.execute(query, lap_data)
|
272 |
+
self.conn.commit()
|
273 |
|
274 |
def insert_telemetry(self, session: Session) -> None:
|
275 |
"""
|
|
|
283 |
|
284 |
for driver in session.drivers:
|
285 |
laps_per_driver = session.laps.pick_driver(driver)
|
286 |
+
driver_name = session.get_driver(driver)['Abbreviation']
|
287 |
+
console.print(f"> Processing telemetry for driver: {driver_name}")
|
288 |
|
289 |
for _, lap in laps_per_driver.iterrows():
|
290 |
lap_number = lap['LapNumber']
|
291 |
telemetry = lap.get_telemetry()
|
292 |
telemetry['datetime'] = self._session_start_date + \
|
293 |
+
telemetry['SessionTime']
|
294 |
+
|
295 |
+
# Sort telemetry data by datetime
|
296 |
+
telemetry_sorted = telemetry.sort_values('datetime')
|
297 |
+
|
298 |
+
# Floor the 'datetime' to the specified decimal of a second
|
299 |
+
telemetry_sorted['floored_datetime'] = telemetry_sorted['datetime'].apply(
|
300 |
+
lambda x: x.floor(f'{0.1}s')
|
301 |
+
)
|
302 |
|
303 |
+
# Keep only the first occurrence for each floored_datetime
|
304 |
+
telemetry_unique = telemetry_sorted.groupby(
|
305 |
+
'floored_datetime', as_index=False).first()
|
306 |
+
|
307 |
+
for _, sample in telemetry_unique.iterrows():
|
308 |
telemetry_data: dict[str, Any] = {
|
309 |
+
'lap_id': self.__get_lap_id(session, driver_name, sample['datetime']),
|
310 |
+
'driver_name': driver_name,
|
311 |
'speed_in_km': sample['Speed'],
|
312 |
'RPM': sample['RPM'],
|
313 |
'gear_number': sample['nGear'],
|
|
|
379 |
"INSERT INTO Tracks (track_name, country) VALUES (?, ?)", (track_name, country))
|
380 |
return self.cursor.lastrowid or 0
|
381 |
|
382 |
+
def __get_lap_id(self, session: Session, driver_name: str, time: datetime) -> int:
|
383 |
"""
|
384 |
Get the lap_id for a given driver and time.
|
385 |
|
|
|
391 |
Returns:
|
392 |
int: The lap_id of the found lap.
|
393 |
"""
|
394 |
+
|
395 |
+
laps = session.laps.pick_driver(driver_name).copy()
|
396 |
+
# Convert LapStartDate to pd.Timestamp for proper comparison
|
397 |
+
laps['LapStartTime'] = pd.to_datetime(laps['LapStartDate'])
|
398 |
+
# Find the lap where the given time falls between LapStartTime and LapStartTime of the next lap
|
399 |
+
matching_laps = laps.loc[(laps['LapStartTime'] <= time) & (
|
400 |
+
laps['LapStartTime'].shift(-1) > time)]
|
401 |
+
|
402 |
+
if matching_laps.empty:
|
403 |
+
# Handle the case when no matching lap is found
|
404 |
+
print(
|
405 |
+
f"No matching lap found for driver {driver_name} at time {time}")
|
406 |
+
return 999 # or some default value, or raise a custom exception
|
407 |
+
|
408 |
+
lap = matching_laps.iloc[0]
|
409 |
|
410 |
if self._session_id is None:
|
411 |
raise ValueError("No ID was generated")
|
412 |
|
413 |
self.cursor.execute("SELECT lap_id FROM Laps WHERE session_id = ? AND driver_name = ? AND lap_number = ?",
|
414 |
+
(self._session_id, driver_name, lap['LapNumber']))
|
415 |
return self.cursor.fetchone()[0]
|
416 |
|
417 |
def create_data_analysis_views(self) -> None:
|
|
|
509 |
AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
|
510 |
GROUP BY e.event_id, s.session_id;
|
511 |
|
512 |
+
-- 5. Telemetry Analysis with Weather (Optimized)
|
513 |
CREATE VIEW IF NOT EXISTS TelemetryAnalysisWithWeather AS
|
514 |
+
WITH SampledTelemetry AS (
|
515 |
+
SELECT *,
|
516 |
+
ROW_NUMBER() OVER (PARTITION BY lap_id ORDER BY RANDOM()) as rn
|
517 |
+
FROM Telemetry
|
518 |
+
)
|
519 |
SELECT
|
520 |
l.lap_id,
|
521 |
l.driver_name,
|
|
|
539 |
JOIN Sessions s ON l.session_id = s.session_id
|
540 |
JOIN Tracks t ON s.track_id = t.track_id
|
541 |
JOIN Event e ON s.event_id = e.event_id
|
542 |
+
JOIN SampledTelemetry tel ON l.lap_id = tel.lap_id AND tel.rn <= 100
|
543 |
LEFT JOIN Weather w ON s.session_id = w.session_id
|
544 |
AND tel.datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
|
545 |
GROUP BY l.lap_id;
|