Spaces:

bhimrazy
/

diabetic-retinopathy-detection

Running

App Files Files Community

bhimrazy commited on Mar 24, 2024

Commit

23fa981

1 Parent(s): 60c474e

Add DRDataset and DRDataModule classes

Browse files

Files changed (2) hide show

src/dataset.py +123 -0
src/model.py +69 -0

src/dataset.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os
+import lightning as L
+import numpy as np
+import pandas as pd
+import torch
+from sklearn.utils.class_weight import compute_class_weight
+from torch.utils.data import DataLoader, Dataset
+from torchvision.io import read_image
+from torchvision.transforms import v2 as T
+class DRDataset(Dataset):
+    def __init__(self, csv_path: str, transform=None):
+        self.csv_path = csv_path
+        self.transform = transform
+        self.image_paths, self.labels = self.load_csv_data()
+    def load_csv_data(self):
+        # Check if CSV file exists
+        if not os.path.isfile(self.csv_path):
+            raise FileNotFoundError(f"CSV file '{self.csv_path}' not found.")
+        # Load data from CSV file
+        data = pd.read_csv(self.csv_path)
+        # Check if 'image_path' and 'label' columns exist
+        if "image_path" not in data.columns or "label" not in data.columns:
+            raise ValueError("CSV file must contain 'image_path' and 'label' columns.")
+        # Extract image paths and labels
+        image_paths = data["image_path"].tolist()
+        labels = data["label"].tolist()
+        # Check if any image paths are invalid
+        invalid_image_paths = [
+            img_path for img_path in image_paths if not os.path.isfile(img_path)
+        ]
+        if invalid_image_paths:
+            raise FileNotFoundError(f"Invalid image paths found: {invalid_image_paths}")
+        # Convert labels to LongTensor
+        labels = torch.LongTensor(labels)
+        return image_paths, labels
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        image_path = self.image_paths[idx]
+        label = self.labels[idx]
+        # Load image
+        try:
+            image = read_image(image_path)
+        except Exception as e:
+            raise IOError(f"Error loading image at path '{image_path}': {e}")
+        # Apply transformations if provided
+        if self.transform:
+            try:
+                image = self.transform(image)
+            except Exception as e:
+                raise RuntimeError(
+                    f"Error applying transformations to image at path '{image_path}': {e}"
+                )
+        return image, label
+class DRDataModule(L.LightningDataModule):
+    def __init__(self, batch_size: int = 8, num_workers: int = 4):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        # Define the transformations
+        self.train_transform = T.Compose(
+            [
+                T.Resize((224, 224), antialias=True),
+                T.RandomHorizontalFlip(p=0.5),
+                T.ToDtype(torch.float32, scale=True),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        )
+        self.val_transform = T.Compose(
+            [
+                T.Resize((224, 224), antialias=True),
+                T.ToDtype(torch.float32, scale=True),
+                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+            ]
+        )
+        self.num_classes = 5
+    def setup(self, stage=None):
+        self.train_dataset = DRDataset("data/train.csv", transform=self.train_transform)
+        self.val_dataset = DRDataset("data/val.csv", transform=self.val_transform)
+        # compute class weights
+        labels = self.train_dataset.labels.numpy()
+        self.class_weights = self.compute_class_weights(labels)
+    def train_dataloader(self):
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers
+        )
+    def compute_class_weights(self, labels):
+        class_weights = compute_class_weight(
+            class_weight="balanced", classes=np.unique(labels), y=labels
+        )
+        return torch.tensor(class_weights, dtype=torch.float32)

src/model.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import lightning as L
+import torch
+from torch import nn
+from torchmetrics.functional import accuracy
+from torchvision import models
+class DRModel(L.LightningModule):
+    def __init__(
+        self, num_classes: int, learning_rate: float = 2e-4, class_weights=None
+    ):
+        super().__init__()
+        self.save_hyperparameters()
+        self.num_classes = num_classes
+        self.learning_rate = learning_rate
+        # Define the model
+        # self.model = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)
+        self.model = models.densenet169(weights=models.DenseNet169_Weights.DEFAULT)
+        # self.model = models.vit_b_16(weights=models.ViT_B_16_Weights.DEFAULT)
+        # freeze the feature extractor
+        for param in self.model.parameters():
+            param.requires_grad = False
+        # Change the output layer to have the number of classes
+        in_features = self.model.classifier.in_features
+        # in_features = 768
+        self.model.classifier = nn.Sequential(
+            nn.Linear(in_features, in_features // 2),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(in_features // 2, num_classes),
+        )
+        # Define the loss function
+        self.criterion = nn.CrossEntropyLoss(weight=class_weights)
+    def forward(self, x):
+        return self.model(x)
+    def training_step(self, batch):
+        x, y = batch
+        logits = self.model(x)
+        loss = self.criterion(logits, y)
+        self.log("train_loss", loss, prog_bar=True)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self.model(x)
+        loss = self.criterion(logits, y)
+        preds = torch.argmax(logits, dim=1)
+        acc = accuracy(preds, y, task="multiclass", num_classes=self.num_classes)
+        self.log("val_loss", loss, prog_bar=True)
+        self.log("val_acc", acc, prog_bar=True)
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(
+            self.parameters(), lr=self.learning_rate, weight_decay=1e-4
+        )
+        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {
+                "scheduler": scheduler,
+                "interval": "epoch",
+                "monitor": "val_loss",
+            },
+        }
+        # return optimizer