Spaces:

bhimrazy
/

diabetic-retinopathy-detection

Running

App Files Files Community

bhimrazy commited on Mar 28

Commit

c118196

•

1 Parent(s): e5d6e03

Refactors dataset and datamodules

Browse files

Files changed (2) hide show

src/data_module.py +117 -0
src/dataset.py +1 -79

src/data_module.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import lightning as L
+import numpy as np
+import torch
+from sklearn.utils.class_weight import compute_class_weight
+from torch.utils.data import DataLoader, WeightedRandomSampler
+from torchvision.transforms import v2 as T
+from src.dataset import DRDataset
+class DRDataModule(L.LightningDataModule):
+    def __init__(
+        self,
+        train_csv_path,
+        val_csv_path,
+        image_size: int = 224,
+        batch_size: int = 8,
+        num_workers: int = 4,
+        use_class_weighting: bool = False,
+        use_weighted_sampler: bool = False,
+    ):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        # Ensure mutual exclusivity between use_class_weighting and use_weighted_sampler
+        if use_class_weighting and use_weighted_sampler:
+            raise ValueError(
+                "use_class_weighting and use_weighted_sampler cannot both be True"
+            )
+        self.train_csv_path = train_csv_path
+        self.val_csv_path = val_csv_path
+        self.use_class_weighting = use_class_weighting
+        self.use_weighted_sampler = use_weighted_sampler
+        # Define the transformations
+        self.train_transform = T.Compose(
+            [
+                T.Resize((image_size, image_size), antialias=True),
+                T.RandomAffine(degrees=10, translate=(0.01, 0.01), scale=(0.99, 1.01)),
+                T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.01),
+                T.RandomHorizontalFlip(p=0.5),
+                T.ToDtype(torch.float32, scale=True),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        )
+        self.val_transform = T.Compose(
+            [
+                T.Resize((image_size, image_size), antialias=True),
+                T.ToDtype(torch.float32, scale=True),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        )
+    def setup(self, stage=None):
+        """Set up datasets for training and validation."""
+        # Initialize datasets with specified transformations
+        self.train_dataset = DRDataset(
+            self.train_csv_path, transform=self.train_transform
+        )
+        self.val_dataset = DRDataset(self.val_csv_path, transform=self.val_transform)
+        # Compute number of classes and class weights
+        labels = self.train_dataset.labels.numpy()
+        self.num_classes = len(np.unique(labels))
+        self.class_weights = (
+            self._compute_class_weights(labels) if self.use_class_weighting else None
+        )
+    def train_dataloader(self):
+        """Returns a DataLoader for training data."""
+        if self.use_weighted_sampler:
+            sampler = self._get_weighted_sampler(self.train_dataset.labels.numpy())
+            shuffle = False  # Sampler will handle shuffling
+        else:
+            sampler = None
+            shuffle = True
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            sampler=sampler,
+            shuffle=shuffle,
+            num_workers=self.num_workers,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers
+        )
+    def _compute_class_weights(self, labels):
+        class_weights = compute_class_weight(
+            class_weight="balanced", classes=np.unique(labels), y=labels
+        )
+        return torch.tensor(class_weights, dtype=torch.float32)
+    def _get_weighted_sampler(self, labels: np.ndarray) -> WeightedRandomSampler:
+        """Returns a WeightedRandomSampler based on class weights.
+        The weights tensor should contain a weight for each sample, not the class weights.
+        Have a look at this post for an example: https://discuss.pytorch.org/t/how-to-handle-imbalanced-classes/11264/2
+        https://www.maskaravivek.com/post/pytorch-weighted-random-sampler/
+        """
+        class_sample_count = np.array(
+            [len(np.where(labels == label)[0]) for label in np.unique(labels)]
+        )
+        weight = 1.0 / class_sample_count
+        samples_weight = np.array([weight[label] for label in labels])
+        samples_weight = torch.from_numpy(samples_weight)
+        return WeightedRandomSampler(
+            weights=samples_weight, num_samples=len(labels), replacement=True
+        )

src/dataset.py CHANGED Viewed

@@ -1,13 +1,9 @@
 import os
-import lightning as L
-import numpy as np
 import pandas as pd
 import torch
-from sklearn.utils.class_weight import compute_class_weight
-from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
 from torchvision.io import read_image
-from torchvision.transforms import v2 as T
 class DRDataset(Dataset):
@@ -68,77 +64,3 @@ class DRDataset(Dataset):
         return image, label
-class DRDataModule(L.LightningDataModule):
-    def __init__(self, batch_size: int = 8, num_workers: int = 4):
-        super().__init__()
-        self.batch_size = batch_size
-        self.num_workers = num_workers
-        # Define the transformations
-        self.train_transform = T.Compose(
-            [
-                T.Resize((224, 224), antialias=True),
-                T.RandomAffine(degrees=10, translate=(0.01, 0.01), scale=(0.99, 1.01)),
-                T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.01),
-                T.RandomHorizontalFlip(p=0.5),
-                T.ToDtype(torch.float32, scale=True),
-                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-            ]
-        )
-        self.val_transform = T.Compose(
-            [
-                T.Resize((224, 224), antialias=True),
-                T.ToDtype(torch.float32, scale=True),
-                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-            ]
-        )
-        self.num_classes = 5
-    def setup(self, stage=None):
-        self.train_dataset = DRDataset("data/train.csv", transform=self.train_transform)
-        self.val_dataset = DRDataset("data/val.csv", transform=self.val_transform)
-        # compute class weights
-        labels = self.train_dataset.labels.numpy()
-        self.class_weights = None # self.compute_class_weights(labels)
-    def train_dataloader(self):
-        return DataLoader(
-            self.train_dataset,
-            batch_size=self.batch_size,
-            sampler=self._get_weighted_sampler(self.train_dataset.labels.numpy()),
-            # shuffle=True,
-            num_workers=self.num_workers,
-        )
-    def val_dataloader(self):
-        return DataLoader(
-            self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers
-        )
-    def compute_class_weights(self, labels):
-        class_weights = compute_class_weight(
-            class_weight="balanced", classes=np.unique(labels), y=labels
-        )
-        return torch.tensor(class_weights, dtype=torch.float32)
-    def _get_weighted_sampler(self, labels: np.ndarray) -> WeightedRandomSampler:
-        """Returns a WeightedRandomSampler based on class weights.
-        The weights tensor should contain a weight for each sample, not the class weights.
-        Have a look at this post for an example: https://discuss.pytorch.org/t/how-to-handle-imbalanced-classes/11264/2
-        https://www.maskaravivek.com/post/pytorch-weighted-random-sampler/
-        """
-        class_sample_count = np.array([len(np.where(labels == label)[0]) for label in np.unique(labels)])
-        weight = 1. / class_sample_count
-        samples_weight = np.array([weight[label] for label in labels])
-        samples_weight = torch.from_numpy(samples_weight)
-        # class_weights = compute_class_weight("balanced", classes=np.unique(labels), y=labels)
-        # class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32)
-        return WeightedRandomSampler(weights=samples_weight, num_samples=len(labels), replacement=True)

 import os
 import pandas as pd
 import torch
+from torch.utils.data import Dataset
 from torchvision.io import read_image
 class DRDataset(Dataset):
         return image, label