Spaces:

soutrik
/

gradio_demo_CatDogClassifier

Runtime error

App Files Files Community

Soutrik commited on Nov 10, 2024

Commit

de7d21e

1 Parent(s): b0bdbcf

new train model

Browse files

Files changed (11) hide show

configs/callbacks/early_stopping.yaml +1 -1
configs/callbacks/model_checkpoint.yaml +1 -1
configs/callbacks/rich_model_summary.yaml +2 -1
configs/callbacks/rich_progress_bar.yaml +2 -1
configs/experiment/catdog_experiment.yaml +10 -10
configs/model/catdog_classifier.yaml +4 -4
configs/trainer/default.yaml +2 -0
docker-compose.yaml +2 -2
src/datamodules/catdog_datamodule.py +8 -10
src/models/catdog_model.py +35 -39
src/train_new.py +1 -1

configs/callbacks/early_stopping.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html
 early_stopping:
-  _target_: pytorch_lightning.callbacks.EarlyStopping
   monitor: val_loss # quantity to be monitored, must be specified !!!
   min_delta: 0. # minimum change in the monitored quantity to qualify as an improvement
   patience: 3 # number of checks with no improvement after which training will be stopped

 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html
 early_stopping:
+  _target_: lightning.pytorch.callbacks.EarlyStopping
   monitor: val_loss # quantity to be monitored, must be specified !!!
   min_delta: 0. # minimum change in the monitored quantity to qualify as an improvement
   patience: 3 # number of checks with no improvement after which training will be stopped

configs/callbacks/model_checkpoint.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
 model_checkpoint:
-  _target_: pytorch_lightning.callbacks.ModelCheckpoint
   dirpath: null # directory to save the model file
   filename: best-checkpoint # checkpoint filename
   monitor: val_loss # name of the logged metric which determines when model is improving

 # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
 model_checkpoint:
+  _target_: lightning.pytorch.callbacks.ModelCheckpoint
   dirpath: null # directory to save the model file
   filename: best-checkpoint # checkpoint filename
   monitor: val_loss # name of the logged metric which determines when model is improving

configs/callbacks/rich_model_summary.yaml CHANGED Viewed

@@ -1,3 +1,4 @@
 rich_model_summary:
-  _target_: pytorch_lightning.callbacks.RichModelSummary
   max_depth: 1

+# https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html
 rich_model_summary:
+  _target_: lightning.pytorch.callbacks.RichModelSummary
   max_depth: 1

configs/callbacks/rich_progress_bar.yaml CHANGED Viewed

@@ -1,3 +1,4 @@
 rich_progress_bar:
-  _target_: pytorch_lightning.callbacks.RichProgressBar
   refresh_rate: 1

+# https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichProgressBar.html
 rich_progress_bar:
+  _target_: lightning.pytorch.callbacks.RichProgressBar
   refresh_rate: 1

configs/experiment/catdog_experiment.yaml CHANGED Viewed

@@ -18,28 +18,28 @@ seed: 42
 name: "catdog_experiment"
 data:
-  dataset: "cats_and_dogs_filtered"
-  batch_size: 32
   num_workers: 8
   pin_memory: True
   image_size: 224
 model:
-  lr: 1e-3
   weight_decay: 1e-5
-  factor: 0.1
-  patience: 10
   min_lr: 1e-6
   num_classes: 2
   patch_size: 16
-  embed_dim: 64
-  depth: 6
-  num_heads: 2
-  mlp_ratio: 3
 trainer:
   min_epochs: 1
-  max_epochs: 10
 callbacks:
   model_checkpoint:

 name: "catdog_experiment"
 data:
+  data_dir: "cats_and_dogs_filtered"
+  batch_size: 64
   num_workers: 8
   pin_memory: True
   image_size: 224
 model:
+  lr: 5e-5
   weight_decay: 1e-5
+  factor: 0.5
+  patience: 5
   min_lr: 1e-6
   num_classes: 2
   patch_size: 16
+  embed_dim: 256
+  depth: 4
+  num_heads: 4
+  mlp_ratio: 4
 trainer:
   min_epochs: 1
+  max_epochs: 6
 callbacks:
   model_checkpoint:

configs/model/catdog_classifier.yaml CHANGED Viewed

@@ -3,13 +3,13 @@
 _target_: src.models.catdog_model.ViTTinyClassifier
 # model params
-img_size: 160
 patch_size: 16
 num_classes: 2
-embed_dim: 64
 depth: 6
-num_heads: 2
-mlp_ratio: 3.0
 pre_norm: False
 # optimizer params

 _target_: src.models.catdog_model.ViTTinyClassifier
 # model params
+img_size: ${data.image_size}
 patch_size: 16
 num_classes: 2
+embed_dim: 128
 depth: 6
+num_heads: 4
+mlp_ratio: 4
 pre_norm: False
 # optimizer params

configs/trainer/default.yaml CHANGED Viewed

@@ -17,3 +17,5 @@ deterministic: True
 # Log every N steps in training and validation
 log_every_n_steps: 10
 fast_dev_run: False

 # Log every N steps in training and validation
 log_every_n_steps: 10
 fast_dev_run: False
+gradient_clip_val: 1.0

docker-compose.yaml CHANGED Viewed

@@ -5,7 +5,7 @@ services:
     build:
       context: .
     command: |
-      python -m src.train experiment=catdog_experiment ++task_name=train ++train=True ++test=False && \
       touch /app/checkpoints/train_done.flag
     volumes:
       - ./data:/app/data
@@ -25,7 +25,7 @@ services:
     build:
       context: .
     command: |
-      sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.train experiment=catdog_experiment ++task_name=eval ++train=False ++test=True'
     volumes:
       - ./data:/app/data
       - ./checkpoints:/app/checkpoints

     build:
       context: .
     command: |
+      python -m src.train_new experiment=catdog_experiment ++task_name=train ++train=True ++test=False && \
       touch /app/checkpoints/train_done.flag
     volumes:
       - ./data:/app/data
     build:
       context: .
     command: |
+      sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.train_new experiment=catdog_experiment ++task_name=eval ++train=False ++test=True'
     volumes:
       - ./data:/app/data
       - ./checkpoints:/app/checkpoints

src/datamodules/catdog_datamodule.py CHANGED Viewed

@@ -14,7 +14,7 @@ class CatDogImageDataModule(L.LightningDataModule):
     def __init__(
         self,
-        data_root: Union[str, Path] = "data",
         data_dir: Union[str, Path] = "cats_and_dogs_filtered",
         batch_size: int = 32,
         num_workers: int = 4,
@@ -24,7 +24,7 @@ class CatDogImageDataModule(L.LightningDataModule):
         url: str = "https://download.pytorch.org/tutorials/cats_and_dogs_filtered.zip",
     ):
         super().__init__()
-        self.data_root = Path(data_root)
         self.data_dir = data_dir
         self.batch_size = batch_size
         self.num_workers = num_workers
@@ -40,11 +40,11 @@ class CatDogImageDataModule(L.LightningDataModule):
     def prepare_data(self):
         """Download the dataset if it doesn't exist."""
-        self.dataset_path = self.data_root / self.data_dir
         if not self.dataset_path.exists():
             logger.info("Downloading and extracting dataset.")
             download_and_extract_archive(
-                url=self.url, download_root=self.data_root, remove_finished=True
             )
             logger.info("Download completed.")
@@ -56,11 +56,9 @@ class CatDogImageDataModule(L.LightningDataModule):
         train_transform = transforms.Compose(
             [
                 transforms.Resize((self.image_size, self.image_size)),
-                transforms.RandomHorizontalFlip(0.1),
-                transforms.RandomRotation(10),
-                transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
-                transforms.RandomAutocontrast(0.1),
-                transforms.RandomAdjustSharpness(2, 0.1),
                 transforms.ToTensor(),
                 transforms.Normalize(
                     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
@@ -134,7 +132,7 @@ if __name__ == "__main__":
     def test_datamodule(cfg: DictConfig):
         logger.info(f"Config:\n{OmegaConf.to_yaml(cfg)}")
         datamodule = CatDogImageDataModule(
-            data_root=cfg.paths.data_dir,
             data_dir=cfg.data.data_dir,
             batch_size=cfg.data.batch_size,
             num_workers=cfg.data.num_workers,

     def __init__(
         self,
+        root_dir: Union[str, Path] = "data",
         data_dir: Union[str, Path] = "cats_and_dogs_filtered",
         batch_size: int = 32,
         num_workers: int = 4,
         url: str = "https://download.pytorch.org/tutorials/cats_and_dogs_filtered.zip",
     ):
         super().__init__()
+        self.root_dir = Path(root_dir)
         self.data_dir = data_dir
         self.batch_size = batch_size
         self.num_workers = num_workers
     def prepare_data(self):
         """Download the dataset if it doesn't exist."""
+        self.dataset_path = self.root_dir / self.data_dir
         if not self.dataset_path.exists():
             logger.info("Downloading and extracting dataset.")
             download_and_extract_archive(
+                url=self.url, download_root=self.root_dir, remove_finished=True
             )
             logger.info("Download completed.")
         train_transform = transforms.Compose(
             [
                 transforms.Resize((self.image_size, self.image_size)),
+                transforms.RandomHorizontalFlip(0.5),  # Flip probability increased
+                transforms.RandomRotation(5),  # Reduced rotation for stability
+                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                 transforms.ToTensor(),
                 transforms.Normalize(
                     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
     def test_datamodule(cfg: DictConfig):
         logger.info(f"Config:\n{OmegaConf.to_yaml(cfg)}")
         datamodule = CatDogImageDataModule(
+            root_dir=cfg.data.root_dir,
             data_dir=cfg.data.data_dir,
             batch_size=cfg.data.batch_size,
             num_workers=cfg.data.num_workers,

src/models/catdog_model.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import lightning as L
-import torch
-from torch import nn, optim
-from torchmetrics import Accuracy, Precision, Recall, F1Score
 from timm.models import VisionTransformer
 class ViTTinyClassifier(L.LightningModule):
     def __init__(
         self,
         img_size: int = 224,
-        num_classes: int = 2,  # Should be 2 for binary classification
         embed_dim: int = 64,
         depth: int = 6,
         num_heads: int = 2,
@@ -25,7 +26,7 @@ class ViTTinyClassifier(L.LightningModule):
         super().__init__()
         self.save_hyperparameters()
-        # Create ViT model
         self.model = VisionTransformer(
             img_size=img_size,
             patch_size=patch_size,
@@ -35,51 +36,40 @@ class ViTTinyClassifier(L.LightningModule):
             depth=depth,
             num_heads=num_heads,
             mlp_ratio=mlp_ratio,
-            qkv_bias=False,
             pre_norm=pre_norm,
             global_pool="token",
         )
-        # Metrics for binary classification
-        metrics = {
-            "acc": Accuracy(task="binary"),
-            "precision": Precision(task="binary"),
-            "recall": Recall(task="binary"),
-            "f1": F1Score(task="binary"),
-        }
-        # Initialize metrics for each stage
-        self.train_metrics = nn.ModuleDict(
-            {name: metric.clone() for name, metric in metrics.items()}
-        )
-        self.val_metrics = nn.ModuleDict(
-            {name: metric.clone() for name, metric in metrics.items()}
-        )
-        self.test_metrics = nn.ModuleDict(
-            {name: metric.clone() for name, metric in metrics.items()}
-        )
-        # Loss function
-        self.criterion = nn.CrossEntropyLoss()
     def forward(self, x):
         return self.model(x)
-    def _shared_step(self, batch, stage: str):
         x, y = batch
-        logits = self(x)
-        loss = self.criterion(logits, y)
-        preds = logits.argmax(dim=1)
-        # Get appropriate metric dictionary based on stage
-        metrics = getattr(self, f"{stage}_metrics")
-        metric_logs = {
-            f"{stage}_{name}": metric(preds, y) for name, metric in metrics.items()
-        }
-        # Log metrics
-        self.log(f"{stage}_loss", loss, prog_bar=True)
-        self.log_dict(metric_logs, prog_bar=True, on_step=False, on_epoch=True)
         return loss
     def training_step(self, batch, batch_idx):
@@ -100,6 +90,7 @@ class ViTTinyClassifier(L.LightningModule):
         scheduler = optim.lr_scheduler.ReduceLROnPlateau(
             optimizer,
             factor=self.hparams.factor,
             patience=self.hparams.patience,
             min_lr=self.hparams.min_lr,
@@ -113,3 +104,8 @@ class ViTTinyClassifier(L.LightningModule):
                 "interval": "epoch",
             },
         }

 import lightning as L
+import torch.nn.functional as F
+from torch import optim
+from torchmetrics.classification import Accuracy, F1Score
 from timm.models import VisionTransformer
+import torch
 class ViTTinyClassifier(L.LightningModule):
     def __init__(
         self,
         img_size: int = 224,
+        num_classes: int = 2,  # Binary classification with two classes
         embed_dim: int = 64,
         depth: int = 6,
         num_heads: int = 2,
         super().__init__()
         self.save_hyperparameters()
+        # Vision Transformer model initialization
         self.model = VisionTransformer(
             img_size=img_size,
             patch_size=patch_size,
             depth=depth,
             num_heads=num_heads,
             mlp_ratio=mlp_ratio,
+            qkv_bias=True,
             pre_norm=pre_norm,
             global_pool="token",
         )
+        # Define accuracy and F1 metrics for binary classification
+        self.train_acc = Accuracy(task="binary")
+        self.val_acc = Accuracy(task="binary")
+        self.test_acc = Accuracy(task="binary")
+        self.train_f1 = F1Score(task="binary")
+        self.val_f1 = F1Score(task="binary")
+        self.test_f1 = F1Score(task="binary")
     def forward(self, x):
         return self.model(x)
+    def _shared_step(self, batch, stage):
         x, y = batch
+        logits = self(x)  # Model output shape: [batch_size, num_classes]
+        loss = F.cross_entropy(logits, y)  # Cross-entropy for binary classification
+        preds = torch.argmax(logits, dim=1)  # Predicted class (0 or 1)
+        # Update and log metrics
+        acc = getattr(self, f"{stage}_acc")
+        f1 = getattr(self, f"{stage}_f1")
+        acc(preds, y)
+        f1(preds, y)
+        # Logging of metrics and loss
+        self.log(f"{stage}_loss", loss, prog_bar=True, on_epoch=True)
+        self.log(f"{stage}_acc", acc, prog_bar=True, on_epoch=True)
+        self.log(f"{stage}_f1", f1, prog_bar=True, on_epoch=True)
         return loss
     def training_step(self, batch, batch_idx):
         scheduler = optim.lr_scheduler.ReduceLROnPlateau(
             optimizer,
+            mode="min",
             factor=self.hparams.factor,
             patience=self.hparams.patience,
             min_lr=self.hparams.min_lr,
                 "interval": "epoch",
             },
         }
+if __name__ == "__main__":
+    model = ViTTinyClassifier()
+    print(model)

src/train_new.py CHANGED Viewed

@@ -160,7 +160,7 @@ def setup_run_trainer(cfg: DictConfig):
     # Set up callbacks, loggers, and Trainer
     callbacks = instantiate_callbacks(cfg.callbacks)
     logger.info(f"Callbacks: {callbacks}")
-    loggers = instantiate_loggers(cfg.loggers)
     logger.info(f"Loggers: {loggers}")
     trainer: L.Trainer = hydra.utils.instantiate(
         cfg.trainer, callbacks=callbacks, logger=loggers

     # Set up callbacks, loggers, and Trainer
     callbacks = instantiate_callbacks(cfg.callbacks)
     logger.info(f"Callbacks: {callbacks}")
+    loggers = instantiate_loggers(cfg.logger)
     logger.info(f"Loggers: {loggers}")
     trainer: L.Trainer = hydra.utils.instantiate(
         cfg.trainer, callbacks=callbacks, logger=loggers