Draichi commited on
Commit
6f2a622
·
unverified ·
1 Parent(s): 6eb3b0c

feat(formula1_databases): add performant `TelemetryAnalysisWithWeather` view

Browse files
Files changed (1) hide show
  1. notebooks/formula1_databases.py +48 -11
notebooks/formula1_databases.py CHANGED
@@ -106,6 +106,7 @@ class FastF1ToSQL:
106
  CREATE TABLE IF NOT EXISTS Telemetry (
107
  telemetry_id INTEGER PRIMARY KEY,
108
  lap_id INTEGER,
 
109
  speed_in_km REAL,
110
  RPM INTEGER,
111
  gear_number INTEGER,
@@ -117,7 +118,8 @@ class FastF1ToSQL:
117
  z_position REAL,
118
  is_off_track BOOLEAN,
119
  datetime DATETIME,
120
- FOREIGN KEY (lap_id) REFERENCES Laps(lap_id)
 
121
  );
122
 
123
  CREATE INDEX IF NOT EXISTS idx_laps_driver_name ON Laps(driver_name);
@@ -187,6 +189,7 @@ class FastF1ToSQL:
187
  placeholders = ', '.join(['?' for _ in event_data])
188
  query = f"INSERT OR REPLACE INTO Event ({columns}) VALUES ({placeholders})"
189
  self.cursor.execute(query, list(event_data.values()))
 
190
 
191
  def insert_session(self, session: Session) -> None:
192
  """
@@ -197,7 +200,7 @@ class FastF1ToSQL:
197
  """
198
  session_data: dict[str, Any] = {
199
  # Assuming this is called right after insert_event
200
- 'event_id': self.cursor.lastrowid,
201
  'track_id': self.get_or_create_track(session.event.Location, session.event.Country),
202
  'session_type': session.name,
203
  'date': str(session.date),
@@ -266,6 +269,7 @@ class FastF1ToSQL:
266
  placeholders = ':' + ', :'.join(lap_data.keys())
267
  query = f"INSERT INTO Laps ({columns}) VALUES ({placeholders})"
268
  self.cursor.execute(query, lap_data)
 
269
 
270
  def insert_telemetry(self, session: Session) -> None:
271
  """
@@ -279,16 +283,31 @@ class FastF1ToSQL:
279
 
280
  for driver in session.drivers:
281
  laps_per_driver = session.laps.pick_driver(driver)
 
 
282
 
283
  for _, lap in laps_per_driver.iterrows():
284
  lap_number = lap['LapNumber']
285
  telemetry = lap.get_telemetry()
286
  telemetry['datetime'] = self._session_start_date + \
287
- telemetry['Time']
 
 
 
 
 
 
 
 
288
 
289
- for _, sample in telemetry.iterrows():
 
 
 
 
290
  telemetry_data: dict[str, Any] = {
291
- 'lap_id': lap_number,
 
292
  'speed_in_km': sample['Speed'],
293
  'RPM': sample['RPM'],
294
  'gear_number': sample['nGear'],
@@ -360,7 +379,7 @@ class FastF1ToSQL:
360
  "INSERT INTO Tracks (track_name, country) VALUES (?, ?)", (track_name, country))
361
  return self.cursor.lastrowid or 0
362
 
363
- def get_lap_id(self, session: Session, driver: str, time: datetime) -> int:
364
  """
365
  Get the lap_id for a given driver and time.
366
 
@@ -372,14 +391,27 @@ class FastF1ToSQL:
372
  Returns:
373
  int: The lap_id of the found lap.
374
  """
375
- laps = session.laps.pick_driver(driver)
376
- lap = laps.loc[laps['LapStartTime'] <= time].iloc[-1]
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  if self._session_id is None:
379
  raise ValueError("No ID was generated")
380
 
381
  self.cursor.execute("SELECT lap_id FROM Laps WHERE session_id = ? AND driver_name = ? AND lap_number = ?",
382
- (self._session_id, driver, lap['LapNumber']))
383
  return self.cursor.fetchone()[0]
384
 
385
  def create_data_analysis_views(self) -> None:
@@ -477,8 +509,13 @@ class FastF1ToSQL:
477
  AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
478
  GROUP BY e.event_id, s.session_id;
479
 
480
- -- 5. Telemetry Analysis with Weather
481
  CREATE VIEW IF NOT EXISTS TelemetryAnalysisWithWeather AS
 
 
 
 
 
482
  SELECT
483
  l.lap_id,
484
  l.driver_name,
@@ -502,7 +539,7 @@ class FastF1ToSQL:
502
  JOIN Sessions s ON l.session_id = s.session_id
503
  JOIN Tracks t ON s.track_id = t.track_id
504
  JOIN Event e ON s.event_id = e.event_id
505
- JOIN Telemetry tel ON l.lap_id = tel.lap_id
506
  LEFT JOIN Weather w ON s.session_id = w.session_id
507
  AND tel.datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
508
  GROUP BY l.lap_id;
 
106
  CREATE TABLE IF NOT EXISTS Telemetry (
107
  telemetry_id INTEGER PRIMARY KEY,
108
  lap_id INTEGER,
109
+ driver_name TEXT NOT NULL,
110
  speed_in_km REAL,
111
  RPM INTEGER,
112
  gear_number INTEGER,
 
118
  z_position REAL,
119
  is_off_track BOOLEAN,
120
  datetime DATETIME,
121
+ FOREIGN KEY (lap_id) REFERENCES Laps(lap_id),
122
+ FOREIGN KEY (driver_name) REFERENCES Drivers(driver_name)
123
  );
124
 
125
  CREATE INDEX IF NOT EXISTS idx_laps_driver_name ON Laps(driver_name);
 
189
  placeholders = ', '.join(['?' for _ in event_data])
190
  query = f"INSERT OR REPLACE INTO Event ({columns}) VALUES ({placeholders})"
191
  self.cursor.execute(query, list(event_data.values()))
192
+ self._event_id = self.cursor.lastrowid
193
 
194
  def insert_session(self, session: Session) -> None:
195
  """
 
200
  """
201
  session_data: dict[str, Any] = {
202
  # Assuming this is called right after insert_event
203
+ 'event_id': self._event_id,
204
  'track_id': self.get_or_create_track(session.event.Location, session.event.Country),
205
  'session_type': session.name,
206
  'date': str(session.date),
 
269
  placeholders = ':' + ', :'.join(lap_data.keys())
270
  query = f"INSERT INTO Laps ({columns}) VALUES ({placeholders})"
271
  self.cursor.execute(query, lap_data)
272
+ self.conn.commit()
273
 
274
  def insert_telemetry(self, session: Session) -> None:
275
  """
 
283
 
284
  for driver in session.drivers:
285
  laps_per_driver = session.laps.pick_driver(driver)
286
+ driver_name = session.get_driver(driver)['Abbreviation']
287
+ console.print(f"> Processing telemetry for driver: {driver_name}")
288
 
289
  for _, lap in laps_per_driver.iterrows():
290
  lap_number = lap['LapNumber']
291
  telemetry = lap.get_telemetry()
292
  telemetry['datetime'] = self._session_start_date + \
293
+ telemetry['SessionTime']
294
+
295
+ # Sort telemetry data by datetime
296
+ telemetry_sorted = telemetry.sort_values('datetime')
297
+
298
+ # Floor the 'datetime' to the specified decimal of a second
299
+ telemetry_sorted['floored_datetime'] = telemetry_sorted['datetime'].apply(
300
+ lambda x: x.floor(f'{0.1}s')
301
+ )
302
 
303
+ # Keep only the first occurrence for each floored_datetime
304
+ telemetry_unique = telemetry_sorted.groupby(
305
+ 'floored_datetime', as_index=False).first()
306
+
307
+ for _, sample in telemetry_unique.iterrows():
308
  telemetry_data: dict[str, Any] = {
309
+ 'lap_id': self.__get_lap_id(session, driver_name, sample['datetime']),
310
+ 'driver_name': driver_name,
311
  'speed_in_km': sample['Speed'],
312
  'RPM': sample['RPM'],
313
  'gear_number': sample['nGear'],
 
379
  "INSERT INTO Tracks (track_name, country) VALUES (?, ?)", (track_name, country))
380
  return self.cursor.lastrowid or 0
381
 
382
+ def __get_lap_id(self, session: Session, driver_name: str, time: datetime) -> int:
383
  """
384
  Get the lap_id for a given driver and time.
385
 
 
391
  Returns:
392
  int: The lap_id of the found lap.
393
  """
394
+
395
+ laps = session.laps.pick_driver(driver_name).copy()
396
+ # Convert LapStartDate to pd.Timestamp for proper comparison
397
+ laps['LapStartTime'] = pd.to_datetime(laps['LapStartDate'])
398
+ # Find the lap where the given time falls between LapStartTime and LapStartTime of the next lap
399
+ matching_laps = laps.loc[(laps['LapStartTime'] <= time) & (
400
+ laps['LapStartTime'].shift(-1) > time)]
401
+
402
+ if matching_laps.empty:
403
+ # Handle the case when no matching lap is found
404
+ print(
405
+ f"No matching lap found for driver {driver_name} at time {time}")
406
+ return 999 # or some default value, or raise a custom exception
407
+
408
+ lap = matching_laps.iloc[0]
409
 
410
  if self._session_id is None:
411
  raise ValueError("No ID was generated")
412
 
413
  self.cursor.execute("SELECT lap_id FROM Laps WHERE session_id = ? AND driver_name = ? AND lap_number = ?",
414
+ (self._session_id, driver_name, lap['LapNumber']))
415
  return self.cursor.fetchone()[0]
416
 
417
  def create_data_analysis_views(self) -> None:
 
509
  AND l.lap_start_time_in_datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
510
  GROUP BY e.event_id, s.session_id;
511
 
512
+ -- 5. Telemetry Analysis with Weather (Optimized)
513
  CREATE VIEW IF NOT EXISTS TelemetryAnalysisWithWeather AS
514
+ WITH SampledTelemetry AS (
515
+ SELECT *,
516
+ ROW_NUMBER() OVER (PARTITION BY lap_id ORDER BY RANDOM()) as rn
517
+ FROM Telemetry
518
+ )
519
  SELECT
520
  l.lap_id,
521
  l.driver_name,
 
539
  JOIN Sessions s ON l.session_id = s.session_id
540
  JOIN Tracks t ON s.track_id = t.track_id
541
  JOIN Event e ON s.event_id = e.event_id
542
+ JOIN SampledTelemetry tel ON l.lap_id = tel.lap_id AND tel.rn <= 100
543
  LEFT JOIN Weather w ON s.session_id = w.session_id
544
  AND tel.datetime BETWEEN w.datetime AND datetime(w.datetime, '+1 minutes')
545
  GROUP BY l.lap_id;