Anthony Miyaguchi commited on
Commit
a0583df
1 Parent(s): 867532a

Remove lightning dependency from submission

Browse files
evaluate/data.py CHANGED
@@ -1,10 +1,9 @@
1
  from pathlib import Path
2
 
3
  import pandas as pd
4
- import pytorch_lightning as pl
5
  import torch
6
  from PIL import Image
7
- from torch.utils.data import DataLoader, Dataset
8
  from torchvision.transforms import v2
9
  from transformers import AutoImageProcessor, AutoModel
10
 
@@ -40,30 +39,3 @@ class ImageDataset(Dataset):
40
  img = Image.open(image_path).convert("RGB")
41
  img = v2.ToTensor()(img)
42
  return {"features": img, "observation_id": row.observation_id}
43
-
44
-
45
- class InferenceDataModel(pl.LightningDataModule):
46
- def __init__(
47
- self,
48
- metadata_path,
49
- images_root_path,
50
- batch_size=32,
51
- ):
52
- super().__init__()
53
- self.metadata_path = metadata_path
54
- self.images_root_path = images_root_path
55
- self.batch_size = batch_size
56
-
57
- def setup(self, stage=None):
58
- self.dataloader = DataLoader(
59
- ImageDataset(self.metadata_path, self.images_root_path),
60
- batch_size=self.batch_size,
61
- shuffle=False,
62
- num_workers=4,
63
- )
64
-
65
- def predict_dataloader(self):
66
- transform = v2.Compose([TransformDino("facebook/dinov2-base")])
67
- for batch in self.dataloader:
68
- batch = transform(batch)
69
- yield batch
 
1
  from pathlib import Path
2
 
3
  import pandas as pd
 
4
  import torch
5
  from PIL import Image
6
+ from torch.utils.data import Dataset
7
  from torchvision.transforms import v2
8
  from transformers import AutoImageProcessor, AutoModel
9
 
 
39
  img = Image.open(image_path).convert("RGB")
40
  img = v2.ToTensor()(img)
41
  return {"features": img, "observation_id": row.observation_id}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
evaluate/data_lightning.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytorch_lightning as pl
2
+ from torch.utils.data import DataLoader
3
+ from torchvision.transforms import v2
4
+ from .data import ImageDataset, TransformDino
5
+
6
+
7
+ class InferenceDataModel(pl.LightningDataModule):
8
+ def __init__(
9
+ self,
10
+ metadata_path,
11
+ images_root_path,
12
+ batch_size=32,
13
+ ):
14
+ super().__init__()
15
+ self.metadata_path = metadata_path
16
+ self.images_root_path = images_root_path
17
+ self.batch_size = batch_size
18
+
19
+ def setup(self, stage=None):
20
+ self.dataloader = DataLoader(
21
+ ImageDataset(self.metadata_path, self.images_root_path),
22
+ batch_size=self.batch_size,
23
+ shuffle=False,
24
+ num_workers=4,
25
+ )
26
+
27
+ def predict_dataloader(self):
28
+ transform = v2.Compose([TransformDino("facebook/dinov2-base")])
29
+ for batch in self.dataloader:
30
+ batch = transform(batch)
31
+ yield batch
evaluate/{model.py → model_lightning.py} RENAMED
File without changes
evaluate/submission.py CHANGED
@@ -1,11 +1,21 @@
1
- import zipfile
2
-
3
  import pandas as pd
4
  import torch
5
- from pytorch_lightning import Trainer
 
 
 
 
 
6
 
7
- from .data import InferenceDataModel
8
- from .model import LinearClassifier
 
 
 
 
 
 
 
9
 
10
 
11
  def make_submission(
@@ -14,17 +24,22 @@ def make_submission(
14
  output_csv_path="./submission.csv",
15
  images_root_path="/tmp/data/private_testset",
16
  ):
17
- model = LinearClassifier.load_from_checkpoint(model_path)
18
- dm = InferenceDataModel(
19
- metadata_path=test_metadata, images_root_path=images_root_path
20
- )
21
- trainer = Trainer(
22
- accelerator="gpu" if torch.cuda.is_available() else "cpu",
 
 
23
  )
24
- predictions = trainer.predict(model, datamodule=dm)
25
  rows = []
26
- for batch in predictions:
27
- for observation_id, class_id in zip(batch["observation_id"], batch["class_id"]):
 
 
 
 
28
  row = {"observation_id": int(observation_id), "class_id": int(class_id)}
29
  rows.append(row)
30
  submission_df = pd.DataFrame(rows)
 
 
 
1
  import pandas as pd
2
  import torch
3
+ from torch import nn
4
+ from torch.utils.data import DataLoader
5
+ from torchvision.transforms import v2
6
+
7
+ from .data import ImageDataset, TransformDino
8
+
9
 
10
+ class LinearClassifier(nn.Module):
11
+ def __init__(self, num_features, num_classes):
12
+ super().__init__()
13
+ self.num_features = num_features
14
+ self.num_classes = num_classes
15
+ self.model = nn.Linear(num_features, num_classes)
16
+
17
+ def forward(self, x):
18
+ return torch.log_softmax(self.model(x), dim=1)
19
 
20
 
21
  def make_submission(
 
24
  output_csv_path="./submission.csv",
25
  images_root_path="/tmp/data/private_testset",
26
  ):
27
+ checkpoint = torch.load(model_path)
28
+ hparams = checkpoint["hyper_parameters"]
29
+ model = LinearClassifier(hparams["num_features"], hparams["num_classes"])
30
+ model.load_state_dict(checkpoint["state_dict"])
31
+
32
+ transform = v2.Compose([TransformDino("facebook/dinov2-base")])
33
+ dataloader = DataLoader(
34
+ ImageDataset(test_metadata, images_root_path), batch_size=32, num_workers=4
35
  )
 
36
  rows = []
37
+ for batch in dataloader:
38
+ batch = transform(batch)
39
+ observation_ids = batch["observation_id"]
40
+ logits = model(batch["features"])
41
+ class_ids = torch.argmax(logits, dim=1)
42
+ for observation_id, class_id in zip(observation_ids, class_ids):
43
  row = {"observation_id": int(observation_id), "class_id": int(class_id)}
44
  rows.append(row)
45
  submission_df = pd.DataFrame(rows)
evaluate/test_evaluate.py CHANGED
@@ -5,8 +5,9 @@ import pytest
5
  import torch
6
  from pytorch_lightning import Trainer
7
 
8
- from .data import ImageDataset, InferenceDataModel
9
- from .model import LinearClassifier
 
10
  from .submission import make_submission
11
 
12
 
@@ -43,7 +44,7 @@ def metadata(tmp_path, images_root):
43
  @pytest.fixture
44
  def model_checkpoint(tmp_path, metadata, images_root):
45
  model_checkpoint = tmp_path / "model.ckpt"
46
- model = LinearClassifier(768, 10)
47
  trainer = Trainer(max_epochs=1, fast_dev_run=True)
48
  dm = TestingInferenceDataModel(metadata, images_root)
49
  trainer.fit(model, dm)
@@ -69,7 +70,7 @@ def test_inference_datamodel(images_root, metadata):
69
 
70
 
71
  def test_model_checkpoint(model_checkpoint):
72
- model = LinearClassifier.load_from_checkpoint(model_checkpoint)
73
  assert model
74
 
75
 
 
5
  import torch
6
  from pytorch_lightning import Trainer
7
 
8
+ from .data import ImageDataset
9
+ from .data_lightning import InferenceDataModel
10
+ from .model_lightning import LinearClassifier as LightningLinearClassifier
11
  from .submission import make_submission
12
 
13
 
 
44
  @pytest.fixture
45
  def model_checkpoint(tmp_path, metadata, images_root):
46
  model_checkpoint = tmp_path / "model.ckpt"
47
+ model = LightningLinearClassifier(768, 10)
48
  trainer = Trainer(max_epochs=1, fast_dev_run=True)
49
  dm = TestingInferenceDataModel(metadata, images_root)
50
  trainer.fit(model, dm)
 
70
 
71
 
72
  def test_model_checkpoint(model_checkpoint):
73
+ model = LightningLinearClassifier.load_from_checkpoint(model_checkpoint)
74
  assert model
75
 
76