|
from typing import Dict, List |
|
|
|
from collections import defaultdict |
|
|
|
from lightning.pytorch.callbacks import Callback |
|
|
|
from relik.common.log import get_logger |
|
|
|
import os |
|
|
|
logger = get_logger() |
|
|
|
class ShuffleTrainCallback(Callback): |
|
def __init__(self, shuffle_every: int = 1, data_path: str = None): |
|
self.shuffle_every = shuffle_every |
|
self.data_path = data_path |
|
|
|
def on_train_epoch_end(self, trainer, pl_module): |
|
if (trainer.current_epoch + 1) % self.shuffle_every == 0: |
|
logger.info("Shuffling train dataset") |
|
os.system(f"shuf {self.data_path} > {self.data_path}.shuf") |
|
os.system(f"mv {self.data_path}.shuf {self.data_path}") |