diff --git a/configs/computer/a100.yaml b/configs/computer/a100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b04a38b8241a9ebae448eac00b2f10c40200edd --- /dev/null +++ b/configs/computer/a100.yaml @@ -0,0 +1,8 @@ +devices: 1 +progress_bar_refresh_rate: 2 +num_workers: 8 +sync_batchnorm: False +accelerator: gpu +precision: 32 +strategy: auto +num_nodes: 1 diff --git a/configs/computer/cluster-node-a100.yaml b/configs/computer/cluster-node-a100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09742d7d495526cc0cdc60e7cc8c41b0383424f2 --- /dev/null +++ b/configs/computer/cluster-node-a100.yaml @@ -0,0 +1,8 @@ +devices: 8 +num_workers: 8 +progress_bar_refresh_rate: 2 +sync_batchnorm: True +accelerator: gpu +precision: 32 +strategy: ddp +num_nodes: 1 diff --git a/configs/computer/cluster-node-v100.yaml b/configs/computer/cluster-node-v100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b5f41cbff1d1de8c6c24cdd4ef54001fa5d6211 --- /dev/null +++ b/configs/computer/cluster-node-v100.yaml @@ -0,0 +1,8 @@ +devices: 4 +num_workers: 10 +progress_bar_refresh_rate: 2 +sync_batchnorm: True +accelerator: gpu +precision: 32 +strategy: ddp +num_nodes: 1 diff --git a/configs/computer/cpu.yaml b/configs/computer/cpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9460ab763f7f68612288dc50c9bd5934d5d14d4f --- /dev/null +++ b/configs/computer/cpu.yaml @@ -0,0 +1,8 @@ +devices: null +num_workers: 0 +progress_bar_refresh_rate: 2 +sync_batchnorm: False +accelerator: cpu +precision: 32 +strategy: auto +num_nodes: null diff --git a/configs/computer/v100.yaml b/configs/computer/v100.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8251dedb4026eaeb77286e3f750a1abe8862d261 --- /dev/null +++ b/configs/computer/v100.yaml @@ -0,0 +1,8 @@ +devices: 1 +num_workers: 10 +progress_bar_refresh_rate: 2 +sync_batchnorm: False +accelerator: gpu +precision: 32 +strategy: auto +num_nodes: 1 diff --git a/configs/config.yaml b/configs/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffc1405d94a9bb4e260820e40a85ea5f8c32155e --- /dev/null +++ b/configs/config.yaml @@ -0,0 +1,89 @@ +defaults: + - model: default + - computer: v100 + - dataset: osv5m + - _self_ + - exp: ??? + +model: + val_metrics: + _target_: metrics.distance_based.HaversineMetrics + acc_radiuses: + - 1 + - 25 + - 200 + - 750 + - 2500 + acc_area: [] + aux_data: ${aux_data} + test_metrics: + _target_: metrics.distance_based.HaversineMetrics + acc_radiuses: + - 1 + - 25 + - 200 + - 750 + - 2500 + acc_area: ${areas} + aux_data: ${aux_data} + +datamodule: + _target_: data.datamodule.ImageDataModule + train_dataset: ${dataset.train_dataset} + val_dataset: ${dataset.val_dataset} + test_dataset: ${dataset.test_dataset} + global_batch_size: ${dataset.global_batch_size} + num_workers: ${computer.num_workers} + num_nodes: ${computer.num_nodes} + num_devices: ${computer.devices} + val_proportion: 0.1 + +trainer: + _target_: pytorch_lightning.Trainer + devices: ${computer.devices} + accelerator: ${computer.accelerator} + strategy: ${computer.strategy} + num_nodes: ${computer.num_nodes} + precision: ${computer.precision} + max_epochs: ${max_epochs} + +logger: + _target_: pytorch_lightning.loggers.WandbLogger + save_dir: ${root_dir} + name: ${experiment_name} + project: plonk + log_model: False + offline: False + entity: imaginelab + +checkpoints: + _target_: pytorch_lightning.callbacks.ModelCheckpoint + dirpath: ${root_dir}/checkpoints/${experiment_name} + filename: 'epoch_{epoch}' + monitor: val/loss + save_last: True + save_top_k: 0 + every_n_epochs: 1 + +progress_bar: + _target_: pytorch_lightning.callbacks.TQDMProgressBar + refresh_rate: ${computer.progress_bar_refresh_rate} + +aux_data: [] +max_epochs: 100 +data_dir: ${root_dir}/datasets +root_dir: ${hydra:runtime.cwd} +experiment_name: ${dataset.name}__${model.name} +mode: train # change that to eval to do the testing +num_classes: 0 +areas: ['country', 'region', 'sub-region', 'city'] +class_name: null +streetclip: False +blur: False +text_tuning: False + +hydra: + run: + dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}/${experiment_name} + job: + chdir: true diff --git a/configs/dataset/baselines/im2gps.yaml b/configs/dataset/baselines/im2gps.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9bd956b848db55eb4420328451dc1a1e208e1d44 --- /dev/null +++ b/configs/dataset/baselines/im2gps.yaml @@ -0,0 +1,16 @@ +dataset: + name: im2gps + global_batch_size: 512 + test_dataset: + _partial_: true + _target_: data.data.Baseline + path: ${data_dir}/baselines/im2gps + which: 'im2gps' + transforms: ${dataset.test_transform} +datamodule: + _target_: data.datamodule.BaselineDataModule + test_dataset: ${dataset.test_dataset} + global_batch_size: ${dataset.global_batch_size} + num_workers: ${computer.num_workers} + num_nodes: ${computer.num_nodes} + num_devices: ${computer.devices} \ No newline at end of file diff --git a/configs/dataset/baselines/im2gps3k.yaml b/configs/dataset/baselines/im2gps3k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a5bc31cd92f7d5b8e654e2029ab85fd3704765b --- /dev/null +++ b/configs/dataset/baselines/im2gps3k.yaml @@ -0,0 +1,16 @@ +dataset: + name: im2gps3k + global_batch_size: 512 + test_dataset: + _partial_: true + _target_: data.data.Baseline + path: ${data_dir}/baselines/im2gps3k + which: 'im2gps3k' + transforms: ${dataset.test_transform} +datamodule: + _target_: data.datamodule.BaselineDataModule + test_dataset: ${dataset.test_dataset} + global_batch_size: ${dataset.global_batch_size} + num_workers: ${computer.num_workers} + num_nodes: ${computer.num_nodes} + num_devices: ${computer.devices} \ No newline at end of file diff --git a/configs/dataset/baselines/yfcc4k.yaml b/configs/dataset/baselines/yfcc4k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65ef8274be9c538f9d871e50e4fda473bd98bb35 --- /dev/null +++ b/configs/dataset/baselines/yfcc4k.yaml @@ -0,0 +1,16 @@ +dataset: + name: yfcc4k + global_batch_size: 512 + test_dataset: + _partial_: true + _target_: data.data.Baseline + path: ${data_dir}/baselines/yfcc4k + which: 'yfcc4k' + transforms: ${dataset.test_transform} +datamodule: + _target_: data.datamodule.BaselineDataModule + test_dataset: ${dataset.test_dataset} + global_batch_size: ${dataset.global_batch_size} + num_workers: ${computer.num_workers} + num_nodes: ${computer.num_nodes} + num_devices: ${computer.devices} \ No newline at end of file diff --git a/configs/dataset/osv5m.yaml b/configs/dataset/osv5m.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90d2626a83be312502fd8bf82f83357c405b0181 --- /dev/null +++ b/configs/dataset/osv5m.yaml @@ -0,0 +1,46 @@ +defaults: + - train_transform: fast_clip + - test_transform: fast_clip + - _self_ + +name: osv5m +global_batch_size: 256 + +train_dataset: + _partial_: true + _target_: data.data.osv5m + path: ${data_dir}/osv5m/ + split: train + class_name: ${class_name} + transforms: ${dataset.train_transform} + aux_data: ${aux_data} + is_baseline: ${is_baseline} + areas: ${areas} + streetclip: ${streetclip} + blur: ${blur} + +val_dataset: + _partial_: true + _target_: data.data.osv5m + path: ${data_dir}/osv5m/ + split: val + class_name: ${class_name} + transforms: ${dataset.test_transform} + aux_data: ${aux_data} + is_baseline: ${is_baseline} + areas: ${areas} + streetclip: ${streetclip} + blur: ${blur} + +test_dataset: + _partial_: true + _target_: data.data.osv5m + path: ${data_dir}/osv5m/ + split: test + class_name: ${class_name} + transforms: ${dataset.test_transform} + aux_data: ${aux_data} + is_baseline: ${is_baseline} + areas: ${areas} + streetclip: ${streetclip} + blur: ${blur} diff --git a/configs/dataset/osv5m_contrastive.yaml b/configs/dataset/osv5m_contrastive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78d154f823ce670328972e9364d4163a8f16cd97 --- /dev/null +++ b/configs/dataset/osv5m_contrastive.yaml @@ -0,0 +1,34 @@ +defaults: + - train_transform: fast_clip + - test_transform: fast_clip + - _self_ + +name: osv5m +global_batch_size: 256 + +train_dataset: + _partial_: true + _target_: data.data.Contrastiveosv5m + path: ${data_dir}/osv5m/ + split: train + class_name: ${class_name} + transforms: ${dataset.train_transform} + blur: ${blur} + +val_dataset: + _partial_: true + _target_: data.data.Contrastiveosv5m + path: ${data_dir}/osv5m/ + split: val + class_name: ${class_name} + transforms: ${dataset.test_transform} + blur: ${blur} + +test_dataset: + _partial_: true + _target_: data.data.Contrastiveosv5m + path: ${data_dir}/osv5m/ + split: test + class_name: ${class_name} + transforms: ${dataset.test_transform} + blur: ${blur} diff --git a/configs/dataset/osv5m_contrastive_best.yaml b/configs/dataset/osv5m_contrastive_best.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb9fc4bae3b46442844f0032ed41261de3dfc8e2 --- /dev/null +++ b/configs/dataset/osv5m_contrastive_best.yaml @@ -0,0 +1,37 @@ +defaults: + - train_transform: fast_clip + - test_transform: fast_clip + - _self_ + +name: osv5m +global_batch_size: 256 + +train_dataset: + _partial_: true + _target_: data.data.Contrastiveosv5m + path: ${data_dir}/osv5m/ + split: train + class_name: ${class_name} + transforms: ${dataset.train_transform} + class_name2: 'unique_region' + blur: ${blur} + +val_dataset: + _partial_: true + _target_: data.data.Contrastiveosv5m + path: ${data_dir}/osv5m/ + split: val + class_name: ${class_name} + transforms: ${dataset.test_transform} + class_name2: 'unique_region' + blur: ${blur} + +test_dataset: + _partial_: true + _target_: data.data.Contrastiveosv5m + path: ${data_dir}/osv5m/ + split: test + class_name: ${class_name} + transforms: ${dataset.test_transform} + class_name2: 'unique_region' + blur: ${blur} \ No newline at end of file diff --git a/configs/dataset/osv5m_text_contrastive.yaml b/configs/dataset/osv5m_text_contrastive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..407f3fd6d8f6f7b3076b753b95304d0dba95953d --- /dev/null +++ b/configs/dataset/osv5m_text_contrastive.yaml @@ -0,0 +1,34 @@ +defaults: + - train_transform: fast_clip + - test_transform: fast_clip + - _self_ + +name: osv5m +global_batch_size: 256 + +train_dataset: + _partial_: true + _target_: data.data.TextContrastiveosv5m + path: ${data_dir}/osv5m/ + split: train + class_name: ${class_name} + transforms: ${dataset.train_transform} + blur: ${blur} + +val_dataset: + _partial_: true + _target_: data.data.TextContrastiveosv5m + path: ${data_dir}/osv5m/ + split: val + class_name: ${class_name} + transforms: ${dataset.test_transform} + blur: ${blur} + +test_dataset: + _partial_: true + _target_: data.data.TextContrastiveosv5m + path: ${data_dir}/osv5m/ + split: test + class_name: ${class_name} + transforms: ${dataset.test_transform} + blur: ${blur} diff --git a/configs/dataset/test_transform/center_crop.yaml b/configs/dataset/test_transform/center_crop.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee4cbad1b36738048774feedb11a83a616fd222c --- /dev/null +++ b/configs/dataset/test_transform/center_crop.yaml @@ -0,0 +1,12 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.ToTensor + - _target_: utils.image_processing.CenterCrop + ratio: "1:1" + - _target_: torchvision.transforms.Resize + size: ${dataset.img_resolution} + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.Normalize + mean: 0.5 + std: 0.5 diff --git a/configs/dataset/test_transform/clip.yaml b/configs/dataset/test_transform/clip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b95064021c7b01515892be99de05a5da6fbbb10 --- /dev/null +++ b/configs/dataset/test_transform/clip.yaml @@ -0,0 +1,2 @@ +_target_: data.transforms.ClipTransform +split: val diff --git a/configs/dataset/test_transform/fast_clip.yaml b/configs/dataset/test_transform/fast_clip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c59fc7d9b1e9ab7a9ac93a0fb95f33c5f009b05 --- /dev/null +++ b/configs/dataset/test_transform/fast_clip.yaml @@ -0,0 +1,12 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.Resize + size: 224 + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.CenterCrop + size: 224 + - _target_: torchvision.transforms.ToTensor + - _target_: torchvision.transforms.Normalize + mean: [0.48145466, 0.4578275, 0.40821073] + std: [0.26862954, 0.26130258, 0.27577711] diff --git a/configs/dataset/test_transform/fast_resnet.yaml b/configs/dataset/test_transform/fast_resnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c4e0c6eddd05ba839d8c44aaa85ac9e62da2ba7 --- /dev/null +++ b/configs/dataset/test_transform/fast_resnet.yaml @@ -0,0 +1,12 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.Resize + size: 224 + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.CenterCrop + size: 224 + - _target_: torchvision.transforms.ToTensor + - _target_: torchvision.transforms.Normalize + mean: [0.485 ,0.456 ,0.406] + std: [0.229, 0.224, 0.225] \ No newline at end of file diff --git a/configs/dataset/test_transform/none.yaml b/configs/dataset/test_transform/none.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35a7d36b94c3f9c42e595bbcd7742190ec42058d --- /dev/null +++ b/configs/dataset/test_transform/none.yaml @@ -0,0 +1,6 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.ToTensor + - _target_: torchvision.transforms.Normalize + mean: 0.5 + std: 0.5 diff --git a/configs/dataset/train_transform/augmentation.yaml b/configs/dataset/train_transform/augmentation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a44879cd406d15cbc0b31070f00d062290457f9 --- /dev/null +++ b/configs/dataset/train_transform/augmentation.yaml @@ -0,0 +1,85 @@ +_target_: data.augmentation.ImageAugmentation +names: "standard_augmentation,geometric_augmentation,clip_transform" + +# always apply clip_transform at the end +clip_transform: + _target_: torchvision.transforms.Compose + transforms: + - _target_: torchvision.transforms.Resize + size: 224 + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.CenterCrop + size: 224 + - _target_: torchvision.transforms.ToTensor + - _target_: torchvision.transforms.Normalize + mean: [0.48145466, 0.4578275, 0.40821073] + std: [0.26862954, 0.26130258, 0.27577711] + +standard_augmentation: + _target_: data.augmentation.StandardAugmentation + # by default, we all augmentation methods + names: "brightness,contrast,sharpness,color,blur,gaussian_noise" + + # random PIL brigtness + brightness: + _target_: data.augmentation.PillowBrightness + p: 0.2 + factor_interval: [0.5, 1.5] + + # random PIL contrast + contrast: + _target_: data.augmentation.PillowContrast + p: 0.2 + factor_interval: [0.3, 3] + + # random PIL sharpness + sharpness: + _target_: data.augmentation.PillowSharpness + p: 0.2 + factor_interval: [0.5, 30.0] + + # random PIL color + color: + _target_: data.augmentation.PillowColor + p: 0.2 + factor_interval: [0.0, 2.0] + + # random PIL blur + blur: + _target_: data.augmentation.PillowBlur + p: 0.2 + factor_interval: [1, 2] + + # random numpy gaussian noise + gaussian_noise: + _target_: data.augmentation.NumpyGaussianNoise + p: 0.2 + factor_interval: [0.1, 0.04] + +geometric_augmentation: + _target_: data.augmentation.GeometricAugmentation + # by default, we all augmentation methods + names: "random_rotation,random_resized_crop,random_horizontal_flip" + + # random rotation + random_rotation: + _target_: torchvision.transforms.RandomRotation + degrees: [-15, 15] + + # random crop + random_resized_crop: + _target_: torchvision.transforms.RandomResizedCrop + scale: [0.5, 1.0] + ratio: [0.9, 1.1] + size: 224 + + # random horizontal flip + random_horizontal_flip: + _target_: torchvision.transforms.RandomHorizontalFlip + p: 0.5 + + # random vertical flip + random_vertical_flip: + _target_: torchvision.transforms.RandomVerticalFlip + p: 0.5 diff --git a/configs/dataset/train_transform/center_crop.yaml b/configs/dataset/train_transform/center_crop.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f29b4f6055d2df6491a90206318f8a0bb4b836b --- /dev/null +++ b/configs/dataset/train_transform/center_crop.yaml @@ -0,0 +1,14 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.ToTensor + - _target_: utils.image_processing.CenterCrop + ratio: "1:1" + - _target_: torchvision.transforms.Resize + size: ${dataset.img_resolution} + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.RandomHorizontalFlip + p: 0.5 + - _target_: torchvision.transforms.Normalize + mean: 0.5 + std: 0.5 diff --git a/configs/dataset/train_transform/clip.yaml b/configs/dataset/train_transform/clip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b95064021c7b01515892be99de05a5da6fbbb10 --- /dev/null +++ b/configs/dataset/train_transform/clip.yaml @@ -0,0 +1,2 @@ +_target_: data.transforms.ClipTransform +split: val diff --git a/configs/dataset/train_transform/fast_clip.yaml b/configs/dataset/train_transform/fast_clip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c59fc7d9b1e9ab7a9ac93a0fb95f33c5f009b05 --- /dev/null +++ b/configs/dataset/train_transform/fast_clip.yaml @@ -0,0 +1,12 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.Resize + size: 224 + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.CenterCrop + size: 224 + - _target_: torchvision.transforms.ToTensor + - _target_: torchvision.transforms.Normalize + mean: [0.48145466, 0.4578275, 0.40821073] + std: [0.26862954, 0.26130258, 0.27577711] diff --git a/configs/dataset/train_transform/fast_resnet.yaml b/configs/dataset/train_transform/fast_resnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c4e0c6eddd05ba839d8c44aaa85ac9e62da2ba7 --- /dev/null +++ b/configs/dataset/train_transform/fast_resnet.yaml @@ -0,0 +1,12 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.Resize + size: 224 + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.CenterCrop + size: 224 + - _target_: torchvision.transforms.ToTensor + - _target_: torchvision.transforms.Normalize + mean: [0.485 ,0.456 ,0.406] + std: [0.229, 0.224, 0.225] \ No newline at end of file diff --git a/configs/dataset/train_transform/none.yaml b/configs/dataset/train_transform/none.yaml new file mode 100644 index 0000000000000000000000000000000000000000..235b00288f91bf207e8b21feae6a93dba7cd9120 --- /dev/null +++ b/configs/dataset/train_transform/none.yaml @@ -0,0 +1,7 @@ +_target_: torchvision.transforms.Compose +transforms: + - _target_: torchvision.transforms.Resize + size: 224 + interpolation: 3 + antialias: true + - _target_: torchvision.transforms.ToTensor diff --git a/configs/exp/DinoV2.yaml b/configs/exp/DinoV2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc244acb79f20a52a05eb8b9684c6c038d932bfa --- /dev/null +++ b/configs/exp/DinoV2.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: dinov2_vitl14 + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/ResNet.yaml b/configs/exp/ResNet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fae93795a757e831ad75809950080698b5085c76 --- /dev/null +++ b/configs/exp/ResNet.yaml @@ -0,0 +1,21 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /dataset/test_transform: fast_resnet + - override /dataset/train_transform: fast_resnet + - override /model.network.mid: mlp_resnet + - override /model/network/backbone: ResNet50 + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/base_model.yaml b/configs/exp/base_model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c526707801a66df05fa11ceec9bd5347cf50406 --- /dev/null +++ b/configs/exp/base_model.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: openclip_B_32 + - _self_ + +model: + name: base_model + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/best_model.yaml b/configs/exp/best_model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a67b491f3e47020257a8eea6ca6367828dc26d2d --- /dev/null +++ b/configs/exp/best_model.yaml @@ -0,0 +1,25 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_contrastive_best + - override /model: hybrid + - override /model/network: best_backbone + - override /model/network/backbone: clip_L_14_DataComp + - override /model/network/mid: mlp_hybrid + - override /model/loss: best_model + - _self_ + +class_name: 'quadtree_10_1000' +is_baseline: false +max_epochs: 30 + +model: + name: best_model + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/classification_area.yaml b/configs/exp/classification_area.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad4fa7f2fe4f68cedf40bbc26064238c861f1ac0 --- /dev/null +++ b/configs/exp/classification_area.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - _self_ + +class_name: 'area' +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/classification_cell.yaml b/configs/exp/classification_cell.yaml new file mode 100644 index 0000000000000000000000000000000000000000..060116385fbde15ada31b94612b9200ab6abeb2b --- /dev/null +++ b/configs/exp/classification_cell.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - _self_ + +class_name: quadtree_10_1000 +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/classification_cell_hier.yaml b/configs/exp/classification_cell_hier.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60d9b4d08b48fc1dcbec89149f3ff2726b31dc0c --- /dev/null +++ b/configs/exp/classification_cell_hier.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - override /model/loss: cls_hier_quad + - _self_ + +class_name: quadtree_10_1000 +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/classification_city.yaml b/configs/exp/classification_city.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb4ffa233649b95e1b32ec63cd2ee36f5665c6ed --- /dev/null +++ b/configs/exp/classification_city.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - _self_ + +class_name: 'city' +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/classification_city_hier.yaml b/configs/exp/classification_city_hier.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6239da3a6759956df37cb2839a8ba8b9661adfc4 --- /dev/null +++ b/configs/exp/classification_city_hier.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - override /model/loss: cls_hier + - _self_ + +class_name: 'city' +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/classification_country.yaml b/configs/exp/classification_country.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5eba63e8fbcb6301b42929206d34570aa0659058 --- /dev/null +++ b/configs/exp/classification_country.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - _self_ + +class_name: 'country' +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/classification_region copy.yaml b/configs/exp/classification_region copy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b47d706b4c757d6f552221909e7fa135b9dfed96 --- /dev/null +++ b/configs/exp/classification_region copy.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - _self_ + +class_name: 'region' +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/classification_region.yaml b/configs/exp/classification_region.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b47d706b4c757d6f552221909e7fa135b9dfed96 --- /dev/null +++ b/configs/exp/classification_region.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: classification + - override /model/network/backbone: openclip_B_32 + - _self_ + +class_name: 'region' +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 15 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/clip_L_14_DataComp.yaml b/configs/exp/clip_L_14_DataComp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20c0d9805e90352b1611a75cb5ee2c4f59fe6d3b --- /dev/null +++ b/configs/exp/clip_L_14_DataComp.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: clip_L_14_DataComp + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/clip_L_14_Laion.yaml b/configs/exp/clip_L_14_Laion.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50015a29e6646b7d1067d76c0eacd35b9050083d --- /dev/null +++ b/configs/exp/clip_L_14_Laion.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: openclip_L_14 + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/clip_L_14_OpenAI.yaml b/configs/exp/clip_L_14_OpenAI.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b71c7caab5f2c92b23b9eb4f5bed061ec3b74f2d --- /dev/null +++ b/configs/exp/clip_L_14_OpenAI.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: clip_L_14 + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/clip_bigG_14_Laion.yaml b/configs/exp/clip_bigG_14_Laion.yaml new file mode 100644 index 0000000000000000000000000000000000000000..340cd07e004084e8fb577d97bfb8326367804dd9 --- /dev/null +++ b/configs/exp/clip_bigG_14_Laion.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: openclip_bigG_14 + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/contrastive_area.yaml b/configs/exp/contrastive_area.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92f34df2ad3af9c64827337993be9163c137e994 --- /dev/null +++ b/configs/exp/contrastive_area.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_contrastive + - override /model: regression + - override /model/network: contrastive_unfrozen_backbone + - override /model/network/backbone: openclip_B_32 + - override /model/loss: contrastive + - _self_ + +model: + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +class_name: area +is_baseline: false +max_epochs: 30 diff --git a/configs/exp/contrastive_cell.yaml b/configs/exp/contrastive_cell.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c5be9e29a2227b55839af173c2b7761a12488fc --- /dev/null +++ b/configs/exp/contrastive_cell.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_contrastive + - override /model: regression + - override /model/network: contrastive_unfrozen_backbone + - override /model/network/backbone: openclip_B_32 + - override /model/loss: contrastive + - _self_ + +model: + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +class_name: quadtree_10_1000 +is_baseline: false +max_epochs: 30 diff --git a/configs/exp/contrastive_city.yaml b/configs/exp/contrastive_city.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c9524b16486736eef6a4b7a606d8d74f783e900 --- /dev/null +++ b/configs/exp/contrastive_city.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_contrastive + - override /model: regression + - override /model/network: contrastive_unfrozen_backbone + - override /model/network/backbone: openclip_B_32 + - override /model/loss: contrastive + - _self_ + +model: + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +class_name: city +is_baseline: false +max_epochs: 30 diff --git a/configs/exp/contrastive_country.yaml b/configs/exp/contrastive_country.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f973f805d406c249ff25f1d3e14525e60eaf891 --- /dev/null +++ b/configs/exp/contrastive_country.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_contrastive + - override /model: regression + - override /model/network: contrastive_unfrozen_backbone + - override /model/network/backbone: openclip_B_32 + - override /model/loss: contrastive + - _self_ + +model: + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +class_name: country +is_baseline: false +max_epochs: 30 diff --git a/configs/exp/contrastive_region.yaml b/configs/exp/contrastive_region.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e469d19d359d33436f8f27d131c36fc574a83c32 --- /dev/null +++ b/configs/exp/contrastive_region.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_contrastive + - override /model: regression + - override /model/network: contrastive_unfrozen_backbone + - override /model/network/backbone: openclip_B_32 + - override /model/loss: contrastive + - _self_ + +model: + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +class_name: region +is_baseline: false +max_epochs: 30 diff --git a/configs/exp/contrastive_text.yaml b/configs/exp/contrastive_text.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f881237de30a6a5ded481ea2e25b14924955a759 --- /dev/null +++ b/configs/exp/contrastive_text.yaml @@ -0,0 +1,22 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_text_contrastive + - override /model: text_tuning + - override /model/network/backbone: openclip_B_32 + - _self_ + +model: + network: + backbone: + instance: + _target_: models.networks.backbones.CLIPText + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +class_name: city +text_tuning: True +max_epochs: 30 diff --git a/configs/exp/eval_best_model.yaml b/configs/exp/eval_best_model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5d0ac9c8234c42858b2f9c20b224e0b6b98ff99 --- /dev/null +++ b/configs/exp/eval_best_model.yaml @@ -0,0 +1,29 @@ +# @package _global_ + +defaults: + - override /dataset: osv5m_contrastive_best + - override /model: hybrid + - override /model/network: best_backbone + - override /model/network/backbone: clip_L_14_DataComp + - override /model/network/mid: mlp_hybrid + - _self_ + +class_name: 'quadtree_10_1000' +is_baseline: false +max_epochs: 30 +mode: 'eval' + +model: + name: best_model + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + network: + head: + instance: + quadtree_path: ${root_dir}/quadtree_10_1000.csv + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/fine_tuning.yaml b/configs/exp/fine_tuning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9582b23461c899929ee1209c6f5e671c6e90c600 --- /dev/null +++ b/configs/exp/fine_tuning.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network: unfrozen_backbone + - override /model/network/backbone: openclip_B_32 + - _self_ + +model: + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/hybrid.yaml b/configs/exp/hybrid.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a646b76a616b38bd6919311bbf005f7b7de5a172 --- /dev/null +++ b/configs/exp/hybrid.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /model: hybrid + - override /model/network/backbone: openclip_B_32 + - override /model/network/mid: mlp_hybrid + - _self_ + +class_name: 'quadtree_10_1000' +is_baseline: false +max_epochs: 30 + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/last_block.yaml b/configs/exp/last_block.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2cbcfab4cbd0db7689d7849644557b3c8187f1d --- /dev/null +++ b/configs/exp/last_block.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network: last_block_backbone + - override /model/network/backbone: openclip_B_32 + - _self_ + +model: + optimizer: + optim: + lr: 2e-4 + weight_decay: 0.0001 + backbone_lr: 2e-5 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/lora-32.yaml b/configs/exp/lora-32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9c651eccfcfc2d54bb7f972c5b3e749a0483dcb --- /dev/null +++ b/configs/exp/lora-32.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network: lora_backbone + - override /model/network/backbone: openclip_B_32 + - _self_ + +is_baseline: false + +lora_r: 32 +lora_alpha: 256 +lora_dropout: 0.1 +lora_bias: lora_only +max_epochs: 30 + +dataset: + global_batch_size: 2048 diff --git a/configs/exp/metaclip.yaml b/configs/exp/metaclip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c2504e9d496d12b3361a5aa94c9611c8b100414 --- /dev/null +++ b/configs/exp/metaclip.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: metaclip + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/random.yaml b/configs/exp/random.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdf7a50d0ab1212491766736f1f8a39d02a5d2e8 --- /dev/null +++ b/configs/exp/random.yaml @@ -0,0 +1,10 @@ +# @package _global_ + +defaults: + - override /model: random + - _self_ + +class_name: 'country' +is_baseline: false +max_epochs: 1 +mode: eval diff --git a/configs/exp/reg_sincos.yaml b/configs/exp/reg_sincos.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de3f1e19ebdc1c6079a6c45d590af307a4626cc1 --- /dev/null +++ b/configs/exp/reg_sincos.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: openclip_B_32 + - override /model/network/head: regression_angle + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/exp/streetclip.yaml b/configs/exp/streetclip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b47889d5a887478526d7ad026a95d4b153f5f07 --- /dev/null +++ b/configs/exp/streetclip.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +defaults: + - override /model: regression + - override /model/network/backbone: streetclip + - _self_ + +model: + optimizer: + optim: + lr: 0.0002 + weight_decay: 0.0001 + +is_baseline: false +max_epochs: 30 +streetclip: True + +dataset: + global_batch_size: 2048 \ No newline at end of file diff --git a/configs/model/baselines.yaml b/configs/model/baselines.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af46e96c9564a4ec554e2b57be192e327a08a643 --- /dev/null +++ b/configs/model/baselines.yaml @@ -0,0 +1,10 @@ +defaults: + - optimizer: none + - lr_scheduler: none + - network: baselines + - loss: mix + - _self_ + +name: Baseline +aux_data: ${aux_data} +text_tuning: ${text_tuning} diff --git a/configs/model/classification.yaml b/configs/model/classification.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1646ea39a79bccb878a70c6e62cf9cc5543e9fcc --- /dev/null +++ b/configs/model/classification.yaml @@ -0,0 +1,11 @@ +defaults: + - optimizer: adam + - lr_scheduler: none + - network: frozen_backbone + - loss: cls + - override network/head: classification + - _self_ + +name: Classification +aux_data: ${aux_data} +text_tuning: ${text_tuning} diff --git a/configs/model/hybrid.yaml b/configs/model/hybrid.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65401813d48a739db759610bac8e00c7f75f1794 --- /dev/null +++ b/configs/model/hybrid.yaml @@ -0,0 +1,10 @@ +defaults: + - optimizer: adam + - lr_scheduler: none + - network: hybrid_frozen_backbone + - loss: hybrid + - override network/head: hybrid + - _self_ + +name: Hybrid +text_tuning: ${text_tuning} diff --git a/configs/model/hybrid_sharedreg.yaml b/configs/model/hybrid_sharedreg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc17ba5c4e6ad5f1ed2198996b3c9abc146606c4 --- /dev/null +++ b/configs/model/hybrid_sharedreg.yaml @@ -0,0 +1,10 @@ +defaults: + - optimizer: adam + - lr_scheduler: none + - network: hybrid_frozen_backbone + - loss: hybrid + - override network/head: hybrid_sharedreg + - _self_ + +name: SharedHybrid +text_tuning: ${text_tuning} diff --git a/configs/model/loss/aux_loss.yaml b/configs/model/loss/aux_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04f7a4539c641db26bd9a21d1cc5f9396b22debe --- /dev/null +++ b/configs/model/loss/aux_loss.yaml @@ -0,0 +1,11 @@ +_target_: models.losses.Losses +mix: { + haversine : 0.0, + L1 : 1.0, + land_cover: 1.0, + drive_side: 1.0, + climate: 1.0, + soil: 1.0, + dist_sea: 1.0, +} +aux_data: ${aux_data} diff --git a/configs/model/loss/best_model.yaml b/configs/model/loss/best_model.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02ae7172aaf89f57f0e8aaf766557b2f129c9c18 --- /dev/null +++ b/configs/model/loss/best_model.yaml @@ -0,0 +1,8 @@ +_target_: models.losses.Losses +mix: { + region_mil : 1.0, + hier_quad : 1.0, + l2_hybrid : 1.0, +} +path: ${data_dir} +num_devices: ${computer.devices} diff --git a/configs/model/loss/cls.yaml b/configs/model/loss/cls.yaml new file mode 100644 index 0000000000000000000000000000000000000000..552d89f149cc38f63a17ff1a515ee43ff9f46ecd --- /dev/null +++ b/configs/model/loss/cls.yaml @@ -0,0 +1,4 @@ +_target_: models.losses.Losses +mix: { + CrossEntropy : 1.0, +} diff --git a/configs/model/loss/cls_hier.yaml b/configs/model/loss/cls_hier.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca9b5f0f2212bfa79b23891cd1b8504455668cce --- /dev/null +++ b/configs/model/loss/cls_hier.yaml @@ -0,0 +1,5 @@ +_target_: models.losses.Losses +mix: { + hierarchical : 1.0, +} +path: ${data_dir} diff --git a/configs/model/loss/cls_hier_quad.yaml b/configs/model/loss/cls_hier_quad.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b6e4678f20bcaa4e9671698451c264189a6f36c --- /dev/null +++ b/configs/model/loss/cls_hier_quad.yaml @@ -0,0 +1,5 @@ +_target_: models.losses.Losses +mix: { + hier_quad : 1.0, +} +path: ${data_dir} diff --git a/configs/model/loss/contrastive.yaml b/configs/model/loss/contrastive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a09d89956af3cf4ff240c6959d2034308c14e61 --- /dev/null +++ b/configs/model/loss/contrastive.yaml @@ -0,0 +1,7 @@ +_target_: models.losses.Losses +mix: { + MIL-NCE : 1.0, + #infoNCE : 1.0, + L1 : 1.0, +} +num_devices: ${computer.devices} diff --git a/configs/model/loss/contrastive_only.yaml b/configs/model/loss/contrastive_only.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4d973e32fa4106b6c9cf78a68ce8268e5dfb7c9 --- /dev/null +++ b/configs/model/loss/contrastive_only.yaml @@ -0,0 +1,6 @@ +_target_: models.losses.Losses +mix: { + MIL-NCE : 1.0, + #L1 : 1.0, +} +num_devices: ${computer.devices} diff --git a/configs/model/loss/geoguessr.yaml b/configs/model/loss/geoguessr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f40223f6cbae7047f8dc9f571f08a9d3b119e25 --- /dev/null +++ b/configs/model/loss/geoguessr.yaml @@ -0,0 +1,4 @@ +_target_: models.losses.Losses +mix: { + geoguessr : 1.0, +} diff --git a/configs/model/loss/hybrid.yaml b/configs/model/loss/hybrid.yaml new file mode 100644 index 0000000000000000000000000000000000000000..189584e8943cbeb2d2f7e957cd15bec13991a864 --- /dev/null +++ b/configs/model/loss/hybrid.yaml @@ -0,0 +1,6 @@ +_target_: models.losses.Losses +mix: { + crossentropy: 1.0, + #l1 : 1.0, + l2_hybrid : 1.0, +} diff --git a/configs/model/loss/mix.yaml b/configs/model/loss/mix.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e52a9dda0238b95e134927c78a4c40490dc8d87 --- /dev/null +++ b/configs/model/loss/mix.yaml @@ -0,0 +1,5 @@ +_target_: models.losses.Losses +mix: { + crossentropy: 1.0, + l1 : 1.0, +} diff --git a/configs/model/loss/reg.yaml b/configs/model/loss/reg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9acdfbded1d94cbea20bf7b51d13ae7500a302df --- /dev/null +++ b/configs/model/loss/reg.yaml @@ -0,0 +1,5 @@ +_target_: models.losses.Losses +mix: { + haversine : 0.0, + L1 : 1.0, +} diff --git a/configs/model/loss/text_tuning.yaml b/configs/model/loss/text_tuning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..259d09ba92574a969f494b0787dd3460eb51ee9e --- /dev/null +++ b/configs/model/loss/text_tuning.yaml @@ -0,0 +1,6 @@ +_target_: models.losses.Losses +mix: { + Text-NCE : 1.0, + L1 : 1.0, +} +num_devices: ${computer.devices} diff --git a/configs/model/lr_scheduler/none.yaml b/configs/model/lr_scheduler/none.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3332f01b219d660201225466878cd314302e263 --- /dev/null +++ b/configs/model/lr_scheduler/none.yaml @@ -0,0 +1,3 @@ +_partial_: true +_target_: utils.lr_scheduler.WarmupLR +warmup_steps: 0 diff --git a/configs/model/lr_scheduler/warmup.yaml b/configs/model/lr_scheduler/warmup.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c5fb91b182270b9ad3947aac405f413af092fba --- /dev/null +++ b/configs/model/lr_scheduler/warmup.yaml @@ -0,0 +1,3 @@ +_partial_: true +_target_: utils.lr_scheduler.WarmupLR +warmup_steps: 20000 diff --git a/configs/model/lr_scheduler/warmup_cosine_decay.yaml b/configs/model/lr_scheduler/warmup_cosine_decay.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17658f3c6db1c3197be06e79768341e223e549fd --- /dev/null +++ b/configs/model/lr_scheduler/warmup_cosine_decay.yaml @@ -0,0 +1,4 @@ +_partial_: true +_target_: utils.lr_scheduler.WarmupCosineDecayLR +warmup_steps: 20000 +total_steps: ${trainer.max_steps} diff --git a/configs/model/multi.yaml b/configs/model/multi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b17304cb927da46f78e498702d913bb24e560917 --- /dev/null +++ b/configs/model/multi.yaml @@ -0,0 +1,9 @@ +defaults: + - optimizer: adam + - lr_scheduler: none + - network: multi_task + - loss: aux_loss + - _self_ + +name: Multi_task +text_tuning: ${text_tuning} diff --git a/configs/model/network/backbone/ResNet50.yaml b/configs/model/network/backbone/ResNet50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84d03e71ef40798dcf07da2dc97268749dd85e30 --- /dev/null +++ b/configs/model/network/backbone/ResNet50.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.ResNet + path: microsoft/resnet-50 + +output_dim: 2048 \ No newline at end of file diff --git a/configs/model/network/backbone/clip_B_32.yaml b/configs/model/network/backbone/clip_B_32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e9dad30a064a3ca219e85c39e73430b91d70ea4 --- /dev/null +++ b/configs/model/network/backbone/clip_B_32.yaml @@ -0,0 +1,6 @@ +instance: + _target_: models.networks.backbones.CLIP + path: openai/clip-vit-base-patch32 + + +output_dim: 768 diff --git a/configs/model/network/backbone/clip_L_14.yaml b/configs/model/network/backbone/clip_L_14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fb45ebc159a7c62fb3d1fcfc858e677bfec92df --- /dev/null +++ b/configs/model/network/backbone/clip_L_14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: openai/clip-vit-large-patch14 + +output_dim: 1024 diff --git a/configs/model/network/backbone/clip_L_14_DataComp.yaml b/configs/model/network/backbone/clip_L_14_DataComp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9785bd6540a55282072028944f67c15534aa49c2 --- /dev/null +++ b/configs/model/network/backbone/clip_L_14_DataComp.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K + +output_dim: 1024 diff --git a/configs/model/network/backbone/dinov2_vitb14.yaml b/configs/model/network/backbone/dinov2_vitb14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6e948cacc56de16738a9f76ac523c8a446f7231 --- /dev/null +++ b/configs/model/network/backbone/dinov2_vitb14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.DINOv2 + tag: dinov2_vitb14 + +output_dim: 768 diff --git a/configs/model/network/backbone/dinov2_vitg14.yaml b/configs/model/network/backbone/dinov2_vitg14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2bec9f74c484025b94600ff15da5244698f6aba --- /dev/null +++ b/configs/model/network/backbone/dinov2_vitg14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.DINOv2 + tag: dinov2_vitg14 + +output_dim: 1536 diff --git a/configs/model/network/backbone/dinov2_vitl14.yaml b/configs/model/network/backbone/dinov2_vitl14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a562f1cc1f0937970697235221d15c51f33270c --- /dev/null +++ b/configs/model/network/backbone/dinov2_vitl14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.DINOv2 + tag: dinov2_vitl14 + +output_dim: 1024 diff --git a/configs/model/network/backbone/dinov2_vits14.yaml b/configs/model/network/backbone/dinov2_vits14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b2d2d4525826dc0986f5d403569398097b0bb0f --- /dev/null +++ b/configs/model/network/backbone/dinov2_vits14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.DINOv2 + tag: dinov2_vits14 + +output_dim: 384 diff --git a/configs/model/network/backbone/identity.yaml b/configs/model/network/backbone/identity.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad8a48eb52d85f0d680e0f04bfbe8feb65bfd8f7 --- /dev/null +++ b/configs/model/network/backbone/identity.yaml @@ -0,0 +1,2 @@ +instance: + _target_: torch.nn.Identity diff --git a/configs/model/network/backbone/metaclip.yaml b/configs/model/network/backbone/metaclip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb1ded120198c9038c8df4d98823d4402cdefecf --- /dev/null +++ b/configs/model/network/backbone/metaclip.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: facebook/metaclip-l14-fullcc2.5b + +output_dim: 1024 diff --git a/configs/model/network/backbone/openclip_B_32.yaml b/configs/model/network/backbone/openclip_B_32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9e2c7a15785f11c68653e7c5cce864f056679e1 --- /dev/null +++ b/configs/model/network/backbone/openclip_B_32.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: laion/CLIP-ViT-B-32-laion2B-s34B-b79K + +output_dim: 768 diff --git a/configs/model/network/backbone/openclip_H_14.yaml b/configs/model/network/backbone/openclip_H_14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69d510cc19b72921701414f4e076257831898f84 --- /dev/null +++ b/configs/model/network/backbone/openclip_H_14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K + +output_dim: 1280 diff --git a/configs/model/network/backbone/openclip_L_14.yaml b/configs/model/network/backbone/openclip_L_14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf01cdab4ffdc3ca9241eecd4e727262b5001af9 --- /dev/null +++ b/configs/model/network/backbone/openclip_L_14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: laion/CLIP-ViT-L-14-laion2B-s32B-b82K + +output_dim: 1024 diff --git a/configs/model/network/backbone/openclip_bigG_14.yaml b/configs/model/network/backbone/openclip_bigG_14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2fe60509d7da3834811b7be6be04eef3e2225ff3 --- /dev/null +++ b/configs/model/network/backbone/openclip_bigG_14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: laion/CLIP-ViT-bigG-14-laion2B-39B-b160k + +output_dim: 1664 diff --git a/configs/model/network/backbone/openclip_g_14.yaml b/configs/model/network/backbone/openclip_g_14.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d96c4f9a8a4d3b141f3dcdf5b65946a6c333f0f0 --- /dev/null +++ b/configs/model/network/backbone/openclip_g_14.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: laion/CLIP-ViT-g-14-laion2B-s12B-b42K + +output_dim: 1408 diff --git a/configs/model/network/backbone/scratch_B_32.yaml b/configs/model/network/backbone/scratch_B_32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e79f78928ca01421afcd4f7abadcbc9e42b444fb --- /dev/null +++ b/configs/model/network/backbone/scratch_B_32.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.CLIP + path: '' + +output_dim: 768 diff --git a/configs/model/network/backbone/streetclip.yaml b/configs/model/network/backbone/streetclip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ed65d14a3ed446323ddf53d4f0e36b1e1be726e --- /dev/null +++ b/configs/model/network/backbone/streetclip.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.StreetCLIP + path: geolocal/StreetCLIP + +output_dim: 768 diff --git a/configs/model/network/baselines.yaml b/configs/model/network/baselines.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe376147cad601300e67106022c8e8df7e79d4e4 --- /dev/null +++ b/configs/model/network/baselines.yaml @@ -0,0 +1,8 @@ +defaults: + - head: id_to_gps + +instance: + _target_: models.networks.network.NoFeatureBackbone + head: ${model.network.head} + +class_name: ${class_name} diff --git a/configs/model/network/best_backbone.yaml b/configs/model/network/best_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5b40524577606842d785f0a5713420faccadf662 --- /dev/null +++ b/configs/model/network/best_backbone.yaml @@ -0,0 +1,14 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.ContrastiveHybridUnFrozenBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + mode: ${mode} + +class_name: ${class_name} +root_dir: ${root_dir} diff --git a/configs/model/network/contrastive_frozen_backbone.yaml b/configs/model/network/contrastive_frozen_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2307bbc0b2d0acad41a70a4961eeaceddb08c305 --- /dev/null +++ b/configs/model/network/contrastive_frozen_backbone.yaml @@ -0,0 +1,13 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.ContrastiveFrozenBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + mode: ${mode} + +class_name: ${class_name} diff --git a/configs/model/network/contrastive_unfrozen_backbone.yaml b/configs/model/network/contrastive_unfrozen_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11698012a5b4a8bf263c152401ac56b26897a0db --- /dev/null +++ b/configs/model/network/contrastive_unfrozen_backbone.yaml @@ -0,0 +1,13 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.ContrastiveUnFrozenBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + mode: ${mode} + +class_name: ${class_name} diff --git a/configs/model/network/frozen_backbone.yaml b/configs/model/network/frozen_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b76f49694a4452f3f27c2f5832df8952cbd54ab --- /dev/null +++ b/configs/model/network/frozen_backbone.yaml @@ -0,0 +1,12 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.FrozenBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + +class_name: ${class_name} diff --git a/configs/model/network/head/classification.yaml b/configs/model/network/head/classification.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ab1ba7e92b0e2dc93a631f12ae7f692d98fdfa4 --- /dev/null +++ b/configs/model/network/head/classification.yaml @@ -0,0 +1,7 @@ +target_key: label +final_dim: ${num_classes} +instance: + _target_: models.networks.heads.classification.ClassificationHead + id_to_gps: + _target_: models.networks.heads.id_to_gps.IdToGPS + id_to_gps: ${data_dir}/index_to_gps_unique_${class_name}.pt diff --git a/configs/model/network/head/hybrid.yaml b/configs/model/network/head/hybrid.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aba5cedcf2811f56da364d77facce878189c18a3 --- /dev/null +++ b/configs/model/network/head/hybrid.yaml @@ -0,0 +1,8 @@ +target_key: label +final_dim: ${eval:'${num_classes}*3'} +instance: + _target_: models.networks.heads.hybrid.HybridHeadCentroid + final_dim: ${num_classes} + use_tanh: true + scale_tanh: 1.2 + quadtree_path: ${data_dir}/${class_name}.csv diff --git a/configs/model/network/head/hybrid_sharedreg.yaml b/configs/model/network/head/hybrid_sharedreg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..abd9a10de73c6cd5f96c6854d15bab1cadc20b57 --- /dev/null +++ b/configs/model/network/head/hybrid_sharedreg.yaml @@ -0,0 +1,7 @@ +final_dim: ${eval:'${num_classes}+2'} + +instance: + _target_: models.networks.heads.hybrid.SharedHybridHead + +defaults: + - hybrid diff --git a/configs/model/network/head/id_to_gps.yaml b/configs/model/network/head/id_to_gps.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e0855ed242021b42f23bf8d5c64449c01da3ae5 --- /dev/null +++ b/configs/model/network/head/id_to_gps.yaml @@ -0,0 +1,5 @@ +target_key: gps +final_dim: ${num_classes} +instance: + _target_: models.networks.heads.id_to_gps.IdToGPS + id_to_gps: ${data_dir}/index_to_gps_${class_name}.pt diff --git a/configs/model/network/head/multi_task.yaml b/configs/model/network/head/multi_task.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72b006d74e58be16fb5c2b18b415029e5bd32477 --- /dev/null +++ b/configs/model/network/head/multi_task.yaml @@ -0,0 +1,6 @@ +target_key: gps +final_dim: 2 +instance: + _target_: models.networks.heads.auxilliary.AuxHead + aux_data: ${aux_data} + use_tanh: true diff --git a/configs/model/network/head/random.yaml b/configs/model/network/head/random.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e09984cae75539d697d8f714e2960b6facbcef4 --- /dev/null +++ b/configs/model/network/head/random.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.heads.random.Random + num_output: 2 + +target_key: gps diff --git a/configs/model/network/head/random_class.yaml b/configs/model/network/head/random_class.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d40579b0070c3c598137e4917a15df127eaeba41 --- /dev/null +++ b/configs/model/network/head/random_class.yaml @@ -0,0 +1,3 @@ +instance: + _target_: models.models.networks.random.Random + num_output: ${num_classes} diff --git a/configs/model/network/head/random_coords.yaml b/configs/model/network/head/random_coords.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3a6912ed80ae5524b49375c25a2daf476ed08c1a --- /dev/null +++ b/configs/model/network/head/random_coords.yaml @@ -0,0 +1,3 @@ +instance: + _target_: models.networks.heads.random.RandomCoords + coords_path: ${dataset.train_dataset.path}/train/train.csv diff --git a/configs/model/network/head/regression.yaml b/configs/model/network/head/regression.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bbc1bfc389e799c57d2ce090e2e3d6aeab776192 --- /dev/null +++ b/configs/model/network/head/regression.yaml @@ -0,0 +1,5 @@ +target_key: gps +final_dim: 2 +instance: + _target_: models.networks.heads.regression.RegressionHead + use_tanh: true diff --git a/configs/model/network/head/regression_angle.yaml b/configs/model/network/head/regression_angle.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1edd6c5794619a35403d772d3f53882c9a3ff178 --- /dev/null +++ b/configs/model/network/head/regression_angle.yaml @@ -0,0 +1,4 @@ +target_key: gps +final_dim: 4 +instance: + _target_: models.networks.heads.regression.RegressionHeadAngle diff --git a/configs/model/network/hybrid_frozen_backbone.yaml b/configs/model/network/hybrid_frozen_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9adbe44c892691383a53ff131c4eb9f6db2fe0c --- /dev/null +++ b/configs/model/network/hybrid_frozen_backbone.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.network.HybridFrozenBackbone + +defaults: + - frozen_backbone diff --git a/configs/model/network/hybrid_unfrozen_backbone.yaml b/configs/model/network/hybrid_unfrozen_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa65dd02caafa3c6bffa5c59de728336518eaa74 --- /dev/null +++ b/configs/model/network/hybrid_unfrozen_backbone.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.network.HybridUnfrozenBackbone + +defaults: + - unfrozen_backbone diff --git a/configs/model/network/last_block_backbone.yaml b/configs/model/network/last_block_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1b7ac56a0adbee20fc0632e62319f49b8b3a7d1 --- /dev/null +++ b/configs/model/network/last_block_backbone.yaml @@ -0,0 +1,12 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.UnfrozenPartBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + +class_name: ${class_name} diff --git a/configs/model/network/lora_backbone.yaml b/configs/model/network/lora_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf87dcc4d6d16d4d3d6d666ee92f505b5a8224a6 --- /dev/null +++ b/configs/model/network/lora_backbone.yaml @@ -0,0 +1,16 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.LoraBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + r: ${lora_r} + alpha: ${lora_alpha} + dropout: ${lora_dropout} + bias: ${lora_bias} + +class_name: ${class_name} diff --git a/configs/model/network/mid/activation/gelu.yaml b/configs/model/network/mid/activation/gelu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38f605b3505ed6bc61101a38585cc79b9a915f4a --- /dev/null +++ b/configs/model/network/mid/activation/gelu.yaml @@ -0,0 +1,2 @@ +_target_: torch.nn.GELU +_partial_: true diff --git a/configs/model/network/mid/activation/relu.yaml b/configs/model/network/mid/activation/relu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f2018f12ef077af7ca12794130b47e003e100011 --- /dev/null +++ b/configs/model/network/mid/activation/relu.yaml @@ -0,0 +1,2 @@ +_target_: torch.nn.ReLU +_partial_: true diff --git a/configs/model/network/mid/identity.yaml b/configs/model/network/mid/identity.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5994f7f8f2cc87d962fe2d8ef348b5c823478dd --- /dev/null +++ b/configs/model/network/mid/identity.yaml @@ -0,0 +1,2 @@ +instance: + _target_: models.networks.mlp.Identity \ No newline at end of file diff --git a/configs/model/network/mid/mlp.yaml b/configs/model/network/mid/mlp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc1526e6fd55413aee194d3dff758b062637cd4b --- /dev/null +++ b/configs/model/network/mid/mlp.yaml @@ -0,0 +1,13 @@ +defaults: + - activation: gelu + - norm: groupnorm #instance_1d + +instance: + _target_: models.networks.mlp.MLP + initial_dim: ${model.network.backbone.output_dim} + hidden_dim: + - ${model.network.backbone.output_dim} + - 64 + final_dim: ${model.network.head.final_dim} + norm: ${model.network.mid.norm} + activation: ${model.network.mid.activation} diff --git a/configs/model/network/mid/mlp_classif.yaml b/configs/model/network/mid/mlp_classif.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af2a68746947e5c3bfcc789986738d31ce373fe9 --- /dev/null +++ b/configs/model/network/mid/mlp_classif.yaml @@ -0,0 +1,13 @@ +defaults: + - activation: gelu + - norm: groupnorm #instance_1d + +instance: + _target_: models.networks.mlp.MLP + initial_dim: ${model.network.backbone.output_dim} + hidden_dim: + - ${model.network.backbone.output_dim} + - 512 + final_dim: ${model.network.head.final_dim} + norm: ${model.network.mid.norm} + activation: ${model.network.mid.activation} diff --git a/configs/model/network/mid/mlp_hybrid.yaml b/configs/model/network/mid/mlp_hybrid.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3b2a0e5b8d9d1ee3aa756055ce6cb1c7cb9d78f --- /dev/null +++ b/configs/model/network/mid/mlp_hybrid.yaml @@ -0,0 +1,13 @@ +defaults: + - activation: gelu + - norm: groupnorm #instance_1d + +instance: + _target_: models.networks.mlp.MLPCentroid + initial_dim: ${model.network.backbone.output_dim} + hidden_dim: + - ${model.network.backbone.output_dim} + - 512 + final_dim: ${model.network.head.final_dim} + norm: ${model.network.mid.norm} + activation: ${model.network.mid.activation} \ No newline at end of file diff --git a/configs/model/network/mid/mlp_multi.yaml b/configs/model/network/mid/mlp_multi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c95d3d0ce657fc9582a7d1e1709654cde65e7298 --- /dev/null +++ b/configs/model/network/mid/mlp_multi.yaml @@ -0,0 +1,14 @@ +defaults: + - activation: gelu + - norm: identity + +instance: + _target_: models.networks.mlp.MLP + initial_dim: ${model.network.backbone.output_dim} + hidden_dim: + - ${model.network.backbone.output_dim} + - 64 + final_dim: ${model.network.head.final_dim} + norm: ${model.network.mid.norm} + activation: ${model.network.mid.activation} + aux_data: ${aux_data} diff --git a/configs/model/network/mid/mlp_resnet.yaml b/configs/model/network/mid/mlp_resnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f5470e0172b165d18415e04e2e5af6c0342bc57 --- /dev/null +++ b/configs/model/network/mid/mlp_resnet.yaml @@ -0,0 +1,13 @@ +defaults: + - activation: gelu + - norm: groupnorm #instance_1d + +instance: + _target_: models.networks.mlp.MLPResNet + initial_dim: ${model.network.backbone.output_dim} + hidden_dim: + - ${model.network.backbone.output_dim} + - 64 + final_dim: ${model.network.head.final_dim} + norm: ${model.network.mid.norm} + activation: ${model.network.mid.activation} diff --git a/configs/model/network/mid/norm/batchnorm.yaml b/configs/model/network/mid/norm/batchnorm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6981245907923e7eb25a69396ea0ac1ac33303f1 --- /dev/null +++ b/configs/model/network/mid/norm/batchnorm.yaml @@ -0,0 +1,2 @@ +_target_: torch.nn.BatchNorm1d +_partial_: true diff --git a/configs/model/network/mid/norm/groupnorm.yaml b/configs/model/network/mid/norm/groupnorm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cda2cd2679a59c0b622f1a23a0a604e2c48d5b8 --- /dev/null +++ b/configs/model/network/mid/norm/groupnorm.yaml @@ -0,0 +1,2 @@ +_target_: torch.nn.GroupNorm +_partial_: true diff --git a/configs/model/network/mid/norm/identity.yaml b/configs/model/network/mid/norm/identity.yaml new file mode 100644 index 0000000000000000000000000000000000000000..085370029c933d49a07de9bc621ab39e00b0d569 --- /dev/null +++ b/configs/model/network/mid/norm/identity.yaml @@ -0,0 +1,2 @@ +_target_: torch.nn.Identity +_partial_: true diff --git a/configs/model/network/mid/norm/instance_1d.yaml b/configs/model/network/mid/norm/instance_1d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb9a092eff21a89a5b2bb5dc4b10f683b23c2cf6 --- /dev/null +++ b/configs/model/network/mid/norm/instance_1d.yaml @@ -0,0 +1,2 @@ +_target_: torch.nn.InstanceNorm1d +_partial_: true diff --git a/configs/model/network/multi_task.yaml b/configs/model/network/multi_task.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5d36204f53fcd45cd02be3d7c2112cc46f16617 --- /dev/null +++ b/configs/model/network/multi_task.yaml @@ -0,0 +1,12 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp_multi + - head: multi_task + +instance: + _target_: models.networks.network.UnfrozenBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + +class_name: ${class_name} diff --git a/configs/model/network/random.yaml b/configs/model/network/random.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71cae4f78356df38be5601c92ee6f4944ac4d39f --- /dev/null +++ b/configs/model/network/random.yaml @@ -0,0 +1,8 @@ +defaults: + - head: random + +instance: + _target_: models.networks.network.NoFeatureBackbone + head: ${model.network.head} + +class_name: ${class_name} diff --git a/configs/model/network/text_contrastive.yaml b/configs/model/network/text_contrastive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2f965fd7c9b5c3a943676e4e6ef8da3c0c29ded --- /dev/null +++ b/configs/model/network/text_contrastive.yaml @@ -0,0 +1,12 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.TextContrastiveUnFrozenBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + +class_name: ${class_name} diff --git a/configs/model/network/unfrozen_backbone.yaml b/configs/model/network/unfrozen_backbone.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e2fe0e56cf7499a859e6363036f5b4fb76b8e7b8 --- /dev/null +++ b/configs/model/network/unfrozen_backbone.yaml @@ -0,0 +1,12 @@ +defaults: + - backbone: openclip_B_32 + - mid: mlp + - head: regression + +instance: + _target_: models.networks.network.UnfrozenBackbone + backbone : ${model.network.backbone} + mid: ${model.network.mid} + head: ${model.network.head} + +class_name: ${class_name} diff --git a/configs/model/optimizer/adam.yaml b/configs/model/optimizer/adam.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1626e276138a0b4a4226017f63e98cde141347a7 --- /dev/null +++ b/configs/model/optimizer/adam.yaml @@ -0,0 +1,12 @@ +optim: + _target_: torch.optim.Adam + lr: 1e-3 + betas: [0.9, 0.999] + weight_decay: 0.01 + +exclude_ln_and_biases_from_weight_decay: False +lora_lr: 1e-4 +backbone_lr: 5e-6 +last_block_lr: 5e-5 +unfreeze_lr: False +diff_backbone_last: False diff --git a/configs/model/optimizer/adamw.yaml b/configs/model/optimizer/adamw.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c50743c7edb140cf001b63d114e64c73b1168e1 --- /dev/null +++ b/configs/model/optimizer/adamw.yaml @@ -0,0 +1,10 @@ +optim: + _target_: torch.optim.AdamW + lr: 1e-3 + betas: [0.9, 0.999] + weight_decay: 0.01 + +exclude_ln_and_biases_from_weight_decay: False +lora_lr: 1e-4 +backbone_lr: 2e-5 +unfreeze_lr: False diff --git a/configs/model/optimizer/lamb.yaml b/configs/model/optimizer/lamb.yaml new file mode 100644 index 0000000000000000000000000000000000000000..112a1a78d889236af324edaa354ddd201764fc29 --- /dev/null +++ b/configs/model/optimizer/lamb.yaml @@ -0,0 +1,10 @@ +optim: + _target_: utils.optimizers.Lamb + lr: 1e-3 + betas: [0.9, 0.999] + weight_decay: 0.01 + +exclude_ln_and_biases_from_weight_decay: False +lora_lr: 1e-4 +backbone_lr: 2e-5 +unfreeze_lr: False diff --git a/configs/model/optimizer/none.yaml b/configs/model/optimizer/none.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8fcbba7bdb72a56b34dbaf07b6d8cfb0fe2c62a --- /dev/null +++ b/configs/model/optimizer/none.yaml @@ -0,0 +1,7 @@ +optim: + _target_: models.misc.DoNothingOptimizer + +exclude_ln_and_biases_from_weight_decay: false +lora_lr: 1e-4 +backbone_lr: 2e-5 +unfreeze_lr: False diff --git a/configs/model/random.yaml b/configs/model/random.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcb1a5b0f7e384f49429607ecfc60c5c69ff7e39 --- /dev/null +++ b/configs/model/random.yaml @@ -0,0 +1,10 @@ +defaults: + - optimizer: none + - lr_scheduler: none + - network: random + - loss: mix + - _self_ + +name: Random +aux_data: ${aux_data} +text_tuning: ${text_tuning} diff --git a/configs/model/regression.yaml b/configs/model/regression.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0b829190c551727910db2c913ff96e9fd1745337 --- /dev/null +++ b/configs/model/regression.yaml @@ -0,0 +1,10 @@ +defaults: + - optimizer: adam + - lr_scheduler: none + - network: frozen_backbone + - loss: reg + - _self_ + +name: Regression +aux_data: ${aux_data} +text_tuning: ${text_tuning} diff --git a/configs/model/text_network/clip.yaml b/configs/model/text_network/clip.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f02c2e858c69ba47a81f0db4a0ce917809c463f9 --- /dev/null +++ b/configs/model/text_network/clip.yaml @@ -0,0 +1,5 @@ +instance: + _target_: models.networks.backbones.TextEncoder + path: ${model.network.backbone.instance.path} + +class_name: ${class_name} diff --git a/configs/model/text_tuning.yaml b/configs/model/text_tuning.yaml new file mode 100644 index 0000000000000000000000000000000000000000..333606496b928dade37fa518e848d3a6fce3f14d --- /dev/null +++ b/configs/model/text_tuning.yaml @@ -0,0 +1,11 @@ +defaults: + - optimizer: adam + - lr_scheduler: none + - network: text_contrastive + - text_network: clip + - loss: text_tuning + - _self_ + +name: Regression +aux_data: ${aux_data} +text_tuning: ${text_tuning} diff --git a/configs/scripts/enrich-metadata-quadtree.yaml b/configs/scripts/enrich-metadata-quadtree.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acf366673d0976b7d15a5a1bc0bde34093680532 --- /dev/null +++ b/configs/scripts/enrich-metadata-quadtree.yaml @@ -0,0 +1,4 @@ +data_dir: ??? +depth: 10 +do_split: 1000 +overwrite_csv: False diff --git a/configs/scripts/preprocess.yaml b/configs/scripts/preprocess.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed72893008db792810a56a0587cdeb322746fb0e --- /dev/null +++ b/configs/scripts/preprocess.yaml @@ -0,0 +1,4 @@ +data_dir: ??? +depth: 10 +do_split: 1000 +overwrite_csv: True diff --git a/data/__init__.py b/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/augmentation.py b/data/augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..b1802a335705184f6709ee4a2ec1425b6175758a --- /dev/null +++ b/data/augmentation.py @@ -0,0 +1,223 @@ +""" +Adapted from https://github.com/nv-nguyen/template-pose/blob/main/src/utils/augmentation.py +""" + +from torchvision import transforms +from PIL import ImageEnhance, ImageFilter, Image +import numpy as np +import random +import logging +from torchvision.transforms import RandomResizedCrop, ToTensor + + +class PillowRGBAugmentation: + def __init__(self, pillow_fn, p, factor_interval): + self._pillow_fn = pillow_fn + self.p = p + self.factor_interval = factor_interval + + def __call__(self, PIL_image): + if random.random() <= self.p: + factor = random.uniform(*self.factor_interval) + if PIL_image.mode != "RGB": + logging.warning( + f"Error when apply data aug, image mode: {PIL_image.mode}" + ) + imgs = imgs.convert("RGB") + logging.warning(f"Success to change to {PIL_image.mode}") + PIL_image = (self._pillow_fn(PIL_image).enhance(factor=factor)).convert( + "RGB" + ) + return PIL_image + + +class PillowSharpness(PillowRGBAugmentation): + def __init__( + self, + p=0.3, + factor_interval=(0, 40.0), + ): + super().__init__( + pillow_fn=ImageEnhance.Sharpness, + p=p, + factor_interval=factor_interval, + ) + + +class PillowContrast(PillowRGBAugmentation): + def __init__( + self, + p=0.3, + factor_interval=(0.5, 1.6), + ): + super().__init__( + pillow_fn=ImageEnhance.Contrast, + p=p, + factor_interval=factor_interval, + ) + + +class PillowBrightness(PillowRGBAugmentation): + def __init__( + self, + p=0.5, + factor_interval=(0.5, 2.0), + ): + super().__init__( + pillow_fn=ImageEnhance.Brightness, + p=p, + factor_interval=factor_interval, + ) + + +class PillowColor(PillowRGBAugmentation): + def __init__( + self, + p=1, + factor_interval=(0.0, 20.0), + ): + super().__init__( + pillow_fn=ImageEnhance.Color, + p=p, + factor_interval=factor_interval, + ) + + +class PillowBlur: + def __init__(self, p=0.4, factor_interval=(1, 3)): + self.p = p + self.k = random.randint(*factor_interval) + + def __call__(self, PIL_image): + if random.random() <= self.p: + PIL_image = PIL_image.filter(ImageFilter.GaussianBlur(self.k)) + return PIL_image + + +class NumpyGaussianNoise: + def __init__(self, p, factor_interval=(0.01, 0.3)): + self.noise_ratio = random.uniform(*factor_interval) + self.p = p + + def __call__(self, img): + if random.random() <= self.p: + img = np.copy(img) + noisesigma = random.uniform(0, self.noise_ratio) + gauss = np.random.normal(0, noisesigma, img.shape) * 255 + img = img + gauss + + img[img > 255] = 255 + img[img < 0] = 0 + return Image.fromarray(np.uint8(img)) + + +class StandardAugmentation: + def __init__( + self, names, brightness, contrast, sharpness, color, blur, gaussian_noise + ): + self.brightness = brightness + self.contrast = contrast + self.sharpness = sharpness + self.color = color + self.blur = blur + self.gaussian_noise = gaussian_noise + + # define a dictionary of augmentation functions to be applied + self.names = names.split(",") + self.augmentations = { + "brightness": self.brightness, + "contrast": self.contrast, + "sharpness": self.sharpness, + "color": self.color, + "blur": self.blur, + "gaussian_noise": self.gaussian_noise, + } + + def __call__(self, img): + for name in self.names: + img = self.augmentations[name](img) + return img + + +class GeometricAugmentation: + def __init__( + self, + names, + random_resized_crop, + random_horizontal_flip, + random_vertical_flip, + random_rotation, + ): + self.random_resized_crop = random_resized_crop + self.random_horizontal_flip = random_horizontal_flip + self.random_vertical_flip = random_vertical_flip + self.random_rotation = random_rotation + self.names = names.split(",") + + self.augmentations = { + "random_resized_crop": self.random_resized_crop, + "random_horizontal_flip": self.random_horizontal_flip, + "random_vertical_flip": self.random_vertical_flip, + "random_rotation": self.random_rotation, + } + + def __call__(self, img): + for name in self.names: + img = self.augmentations[name](img) + return img + + +class ImageAugmentation: + def __init__( + self, names, clip_transform, standard_augmentation, geometric_augmentation + ): + self.clip_transform = clip_transform + self.standard_augmentation = standard_augmentation + self.geometric_augmentation = geometric_augmentation + self.names = names.split(",") + self.transforms = { + "clip_transform": self.clip_transform, + "standard_augmentation": self.standard_augmentation, + "geometric_augmentation": self.geometric_augmentation, + } + print(f"Image augmentation: {self.names}") + + def __call__(self, img): + for name in self.names: + img = self.transforms[name](img) + return img + + +if __name__ == "__main__": + # sanity check + import glob + import torchvision.transforms as transforms + from torchvision.utils import save_image + from omegaconf import DictConfig, OmegaConf + from hydra.utils import instantiate + import torch + from PIL import Image + + augmentation_config = OmegaConf.load( + "./configs/dataset/train_transform/augmentation.yaml" + ) + augmentation_config.names = "standard_augmentation,geometric_augmentation" + augmentation_transform = instantiate(augmentation_config) + img_paths = glob.glob("./datasets/osv5m/test/images/*.jpg") + + num_try = 20 + num_try_per_image = 8 + num_imgs = 8 + + for idx in range(num_try): + imgs = [] + for idx_img in range(num_imgs): + img = Image.open(img_paths[idx_img]) + for idx_try in range(num_try_per_image): + if idx_try == 0: + imgs.append(ToTensor()(img.resize((224, 224)))) + img_aug = augmentation_transform(img.copy()) + img_aug = ToTensor()(img_aug) + imgs.append(img_aug) + imgs = torch.stack(imgs) + save_image(imgs, f"augmentation_{idx:03d}.png", nrow=9) diff --git a/data/data.py b/data/data.py new file mode 100644 index 0000000000000000000000000000000000000000..f0cb0316cd067ba2e034557ee755e797681042e2 --- /dev/null +++ b/data/data.py @@ -0,0 +1,711 @@ +import numpy as np +import pandas as pd +import torch +import random + +from os.path import join +from os.path import isfile +from PIL import Image +from sklearn.model_selection import train_test_split +from torch.utils.data import Dataset +from torchvision.transforms import ( + Compose, + RandomCrop, + CenterCrop, + RandomHorizontalFlip, + ToTensor, +) +import time +from torchvision.transforms import GaussianBlur +from torchvision import transforms + +def normalize(lat, lon): + """Used to put all lat lon inside ±90 and ±180.""" + lat = (lat + 90) % 360 - 90 + if lat > 90: + lat = 180 - lat + lon += 180 + lon = (lon + 180) % 360 - 180 + return lat, lon + + +def collate_fn(batch): + """Collate function for the dataloader. + Args: + batch (list): list of dictionaries with keys "img", "gps", "idx" and optionally "label" + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + keys = list(batch[0].keys()) + if "weight" in batch[0].keys(): + keys.remove("weight") + output = {} + for key in [ + "idx", + "unique_country", + "unique_region", + "unique_sub-region", + "unique_city", + "img_idx", + "text", + ]: + if key in keys: + idx = [x[key] for x in batch] + output[key] = idx + keys.remove(key) + for key in keys: + if not ("text" in key): + output[key] = torch.stack([x[key] for x in batch]) + return output + + +def collate_fn_streetclip(batch): + """Collate function for the dataloader. + Args: + batch (list): list of dictionaries with keys "img", "gps", "idx" and optionally "label" + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + keys = list(batch[0].keys()) + if "weight" in batch[0].keys(): + keys.remove("weight") + output = {} + for key in [ + "idx", + "unique_country", + "unique_region", + "unique_sub-region", + "unique_city", + "img_idx", + "img", + "text", + ]: + if key in keys: + idx = [x[key] for x in batch] + output[key] = idx + keys.remove(key) + for key in keys: + if not ("text" in key): + output[key] = torch.stack([x[key] for x in batch]) + return output + + +def collate_fn_denstity(batch): + """Collate function for the dataloader. + Args: + batch (list): list of dictionaries with keys "img", "gps", "idx" and optionally "label" + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + keys = list(batch[0].keys()) + if "weight" in batch[0].keys(): + keys.remove("weight") + # Sample indices based on the weights + weights = np.array([x["weight"] for x in batch]) + normalized_weights = weights / np.sum(weights) + sampled_indices = np.random.choice( + len(batch), size=len(batch), p=normalized_weights, replace=True + ) + output = {} + for key in [ + "idx", + "unique_country", + "unique_region", + "unique_sub-region", + "unique_city", + "img_idx", + "text", + ]: + if key in keys: + idx = [batch[i][key] for i in sampled_indices] + output[key] = idx + keys.remove(key) + for key in keys: + if not ("text" in key): + output[key] = torch.stack([batch[i][key] for i in sampled_indices]) + return output + + +def collate_fn_streetclip_denstity(batch): + """Collate function for the dataloader. + Args: + batch (list): list of dictionaries with keys "img", "gps", "idx" and optionally "label" + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + keys = list(batch[0].keys()) + if "weight" in batch[0].keys(): + keys.remove("weight") + # Sample indices based on the weights + weights = np.array([x["weight"] for x in batch]) + normalized_weights = weights / np.sum(weights) + sampled_indices = np.random.choice( + len(batch), size=len(batch), p=normalized_weights, replace=True + ) + output = {} + for key in [ + "idx", + "unique_country", + "unique_region", + "unique_sub-region", + "unique_city", + "img_idx", + "img", + "text", + ]: + if key in keys: + idx = [batch[i][key] for i in sampled_indices] + output[key] = idx + keys.remove(key) + for key in keys: + if not ("text" in key): + output[key] = torch.stack([batch[i][key] for i in sampled_indices]) + return output + + +def collate_fn_contrastive(batch): + """Collate function for the dataloader. + Args: + batch (list): list of dictionaries with keys "img", "gps", "idx" and optionally "label" + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + output = collate_fn(batch) + pos_img = torch.stack([x["pos_img"] for x in batch]) + output["pos_img"] = pos_img + return output + + +def collate_fn_contrastive_density(batch): + """Collate function for the dataloader. + Args: + batch (list): list of dictionaries with keys "img", "gps", "idx" and optionally "label" + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + keys = list(batch[0].keys()) + if "weight" in batch[0].keys(): + keys.remove("weight") + # Sample indices based on the weights + weights = np.array([x["weight"] for x in batch]) + normalized_weights = weights / np.sum(weights) + sampled_indices = np.random.choice( + len(batch), size=len(batch), p=normalized_weights, replace=True + ) + output = {} + for key in [ + "idx", + "unique_country", + "unique_region", + "unique_sub-region", + "unique_city", + "img_idx", + ]: + if key in keys: + idx = [batch[i][key] for i in sampled_indices] + output[key] = idx + keys.remove(key) + for key in keys: + if not ("text" in key): + output[key] = torch.stack([batch[i][key] for i in sampled_indices]) + return output + + +class osv5m(Dataset): + csv_dtype = {"category": str, "country": str, "city": str} # Don't remove. + + def __init__( + self, + path, + transforms, + split="train", + class_name=None, + aux_data=[], + is_baseline=False, + areas=["country", "region", "sub-region", "city"], + streetclip=False, + suff="", + blur=False + ): + """Initializes the dataset. + Args: + path (str): path to the dataset + transforms (torchvision.transforms): transforms to apply to the images + split (str): split to use (train, val, test) + class_name (str): category to use (e.g. "city") + aux_data (list of str): auxilliary datas to use + areas (list of str): regions to perform accuracy + streetclip (bool): if the model is streetclip, do not use transform + suff (str): suffix of test csv + blur (bool): blur bottom of images or not + """ + self.suff = suff + self.path = path + self.aux = len(aux_data) > 0 + self.aux_list = aux_data + self.split = split + if split == "select": + self.df = self.load_split(split) + split = "test" + else: + self.df = self.load_split(split) + self.split = split + self.image_folder = join( + path, + 'images', + ("train" if split == "val" else split), + ) + + self.dict_names = {} + for root, _, files in os.walk(self.image_folder): + for file in files: + self.dict_names[file] = os.path.join(root, file) + + self.is_baseline = is_baseline + if self.aux: + self.aux_data = {} + for col in self.aux_list: + if col in ["land_cover", "climate", "soil"]: + self.aux_data[col] = pd.get_dummies(self.df[col], dtype=float) + if col == "climate": + for i in range(31): + if not (i in list(self.aux_data[col].columns)): + self.aux_data[col][i] = 0 + desired_order = [i for i in range(31)] + desired_order.remove(20) + self.aux_data[col] = self.aux_data[col][desired_order] + else: + self.aux_data[col] = self.df[col].apply(lambda x: [x]) + + self.areas = ["_".join(["unique", area]) for area in areas] + if class_name is None: + self.class_name = class_name + elif "quadtree" in class_name: + self.class_name = class_name + else: + self.class_name = "_".join(["unique", class_name]) + ex = self.extract_classes(self.class_name) + self.df = self.df[ + ["id", "latitude", "longitude", "weight"] + self.areas + ex + ].fillna("NaN") + if self.class_name in self.areas: + self.df.columns = list(self.df.columns)[:-1] + [self.class_name + "_2"] + self.transforms = transforms + self.collate_fn = collate_fn + self.collate_fn_density = collate_fn_denstity + self.blur = blur + self.streetclip = streetclip + if self.streetclip: + self.collate_fn = collate_fn_streetclip + self.collate_fn_density = collate_fn_streetclip_denstity + + def load_split(self, split): + """Returns a new dataset with the given split.""" + start_time = time.time() + if split == "test": + df = pd.read_csv(join(self.path, "test.csv"), dtype=self.csv_dtype) + # extract coord + longitude = df["longitude"].values + latitude = df["latitude"].values + # Create bins + num_bins = 100 + lon_bins = np.linspace(longitude.min(), longitude.max(), num_bins) + lat_bins = np.linspace(latitude.min(), latitude.max(), num_bins) + # compute density and weights + hist, _, _ = np.histogram2d(longitude, latitude, bins=[lon_bins, lat_bins]) + weights = 1.0 / np.power(hist[df["lon_bin"], df["lat_bin"]], 0.75) + normalized_weights = weights / np.sum(weights) + df["weight"] = normalized_weights + return df + elif split == "select": + df = pd.read_csv( + join(self.path, "select.csv"), dtype=self.csv_dtype + ) + # extract coord + longitude = df["longitude"].values + latitude = df["latitude"].values + # Create bins + num_bins = 100 + lon_bins = np.linspace(longitude.min(), longitude.max(), num_bins) + lat_bins = np.linspace(latitude.min(), latitude.max(), num_bins) + # compute density and weights + hist, _, _ = np.histogram2d(longitude, latitude, bins=[lon_bins, lat_bins]) + weights = 1.0 / np.power(hist[df["lon_bin"], df["lat_bin"]], 0.75) + normalized_weights = weights / np.sum(weights) + df["weight"] = normalized_weights + return df + else: + if len(self.suff) == 0: + df = pd.read_csv( + join(self.path, "train.csv"), dtype=self.csv_dtype + ) + else: + df = pd.read_csv( + join(self.path, "train" + "_" + self.suff + ".csv"), + dtype=self.csv_dtype, + ) + + # extract coord + longitude = df["longitude"].values + latitude = df["latitude"].values + # Create bins + num_bins = 100 + lon_bins = np.linspace(longitude.min(), longitude.max(), num_bins) + lat_bins = np.linspace(latitude.min(), latitude.max(), num_bins) + # compute density and weights + hist, _, _ = np.histogram2d(longitude, latitude, bins=[lon_bins, lat_bins]) + weights = 1.0 / np.power(hist[df["lon_bin"], df["lat_bin"]], 0.75) + normalized_weights = weights / np.sum(weights) + df["weight"] = normalized_weights + + test_df = df.sample( + n=int(0.1 * len(df)), + weights=normalized_weights, + replace=False, + random_state=42, + ) + + end_time = time.time() + print(f"Loading {split} dataset took {(end_time - start_time):.2f} seconds") + + if split == "val": + return test_df + else: + return df.drop(test_df.index) + + def extract_classes(self, tag=None): + """Extracts the categories from the dataset.""" + if tag is None: + self.has_labels = False + return [] + splits = ["train", "test"] if self.is_baseline else ["train"] + # splits = ["train", "test"] + print(f"Loading categories from {splits}") + + # concatenate all categories from relevant splits to find the unique ones. + self.categories = sorted( + pd.concat( + [ + pd.read_csv(join(self.path, f"{split}.csv"))[tag] + for split in splits + ] + ) + .fillna("NaN") + .unique() + .tolist() + ) + + if "NaN" in self.categories: + self.categories.remove("NaN") + if self.split != "test": + self.df = self.df.dropna(subset=[tag]) + # compute the total number of categories - this name is fixed and will be used as a lookup during init + self.num_classes = len(self.categories) + + # create a mapping from category to index + self.category_to_index = { + category: i for i, category in enumerate(self.categories) + } + self.has_labels = True + return [tag] + + def __getitem__(self, i): + """Returns an item from the dataset. + Args: + i (int): index of the item + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + x = list(self.df.iloc[i]) # id, latitude, longitude, {category} + if self.streetclip: + img = Image.open(self.dict_names[f"{int(x[0])}.jpg"]) + elif self.blur: + img = transforms.ToTensor()(Image.open(self.dict_names[f"{int(x[0])}.jpg"])) + u = GaussianBlur(kernel_size = 13, sigma=2.0) + bottom_part = img[:, -14:, :].unsqueeze(0) + blurred_bottom = u(bottom_part) + img[:, -14:, :] = blurred_bottom.squeeze() + img = self.transforms(transforms.ToPILImage()(img)) + else: + img = self.transforms( + Image.open(self.dict_names[f"{int(x[0])}.jpg"]) + ) + + lat, lon = normalize(x[1], x[2]) + gps = torch.FloatTensor([np.radians(lat), np.radians(lon)]).squeeze(0) + + output = { + "img": img, + "gps": gps, + "idx": i, + "img_idx": int(x[0]), + "weight": x[3], + } + + for count, area in enumerate(self.areas): + output[area] = x[ + count + 4 + ] #'country': x[3], 'region': x[4], 'sub-region': x[5], 'city': x[6]} + + if self.has_labels: + if x[-1] in self.categories: + output["label"] = torch.LongTensor( + [self.category_to_index[x[-1]]] + ).squeeze(-1) + else: + output["label"] = torch.LongTensor([-1]).squeeze(-1) + if self.aux: + for col in self.aux_list: + output[col] = torch.FloatTensor(self.aux_data[col].iloc[i]) + return output + + def __len__(self): + return len(self.df) + + +class Contrastiveosv5m(osv5m): + def __init__( + self, + path, + transforms, + split="train", + class_name=None, + aux_data=[], + class_name2=None, + blur=False, + ): + """ + class_name2 (str): if not None, we do contrastive an other class than the one specified for classif + """ + super().__init__( + path, + transforms, + split=split, + class_name=class_name, + aux_data=aux_data, + blur=blur, + ) + self.add_label = False + if not(class_name2 is None) and split != 'test' and split != 'select': + self.add_label = True + self.class_name = class_name2 + self.extract_classes_contrastive(tag=class_name2) + self.df = self.df.reset_index(drop=True) + self.dict_classes = { + value: indices.tolist() + for value, indices in self.df.groupby(self.class_name).groups.items() + } + self.collate_fn = collate_fn_contrastive + self.random_crop = RandomCrop(224) # use when no positive image is available + + def sample_positive(self, i): + """ + sample positive image from the same city, country if it is available + otherwise, apply different crop to the image + """ + x = self.df.iloc[i] # id, latitude, longitude, {category} + class_name = x[self.class_name] + idxs = self.dict_classes[class_name] + idxs.remove(i) + + if len(idxs) > 0: + idx = random.choice(idxs) + x = self.df.iloc[idx] + pos_img = self.transforms( + Image.open(self.dict_names[f"{int(x['id'])}.jpg"]) + ) + else: + pos_img = self.random_crop( + self.transforms( + Image.open(self.dict_names[f"{int(x['id'])}.jpg"]) + ) + ) + return pos_img + + def extract_classes_contrastive(self, tag=None): + """Extracts the categories from the dataset.""" + if tag is None: + self.has_labels = False + return [] + splits = ["train", "test"] if self.is_baseline else ["train"] + # splits = ["train", "test"] + print(f"Loading categories from {splits}") + + # concatenate all categories from relevant splits to find the unique ones. + categories = sorted( + pd.concat( + [ + pd.read_csv(join(self.path, f"{split}.csv"))[tag] + for split in splits + ] + ) + .fillna("NaN") + .unique() + .tolist() + ) + # create a mapping from category to index + self.contrastive_category_to_index = { + category: i for i, category in enumerate(categories) + } + + + def __getitem__(self, i): + output = super().__getitem__(i) + pos_img = self.sample_positive(i) + output["pos_img"] = pos_img + if self.add_label: + output["label_contrastive"] = torch.LongTensor( + [self.contrastive_category_to_index[self.df[self.class_name].iloc[i]]] + ).squeeze(-1) + return output + + +class TextContrastiveosv5m(osv5m): + def __init__( + self, + path, + transforms, + split="train", + class_name=None, + aux_data=[], + blur=False, + ): + super().__init__( + path, + transforms, + split=split, + class_name=class_name, + aux_data=aux_data, + blur=blur, + ) + self.df = self.df.reset_index(drop=True) + + def get_text(self, i): + """ + sample positive image from the same city, country if it is available + otherwise, apply different crop to the image + """ + x = self.df.iloc[i] # id, latitude, longitude, {category} + l = [ + name.split("_")[-1] + for name in [ + x["unique_city"], + x["unique_sub-region"], + x["unique_region"], + x["unique_country"], + ] + ] + + pre = False + sentence = "An image of " + if l[0] != "NaN": + sentence += "the city of " + sentence += l[0] + pre = True + + if l[1] != "NaN": + if pre: + sentence += ", in " + sentence += "the area of " + sentence += l[1] + pre = True + + if l[2] != "NaN": + if pre: + sentence += ", in " + sentence += "the region of " + sentence += l[2] + pre = True + + if l[3] != "NaN": + if pre: + sentence += ", in " + sentence += l[3] + + return sentence + + def __getitem__(self, i): + output = super().__getitem__(i) + output["text"] = self.get_text(i) + return output + + +import os +import json + + +class Baseline(Dataset): + def __init__( + self, + path, + which, + transforms, + ): + """Initializes the dataset. + Args: + path (str): path to the dataset + which (str): which baseline to use (im2gps, im2gps3k) + transforms (torchvision.transforms): transforms to apply to the images + """ + baselines = { + "im2gps": self.load_im2gps, + "im2gps3k": self.load_im2gps, + "yfcc4k": self.load_yfcc4k, + } + self.path = path + self.samples = baselines[which]() + self.transforms = transforms + self.collate_fn = collate_fn + self.class_name = which + + def load_im2gps( + self, + ): + json_path = join(self.path, "info.json") + with open(json_path) as f: + data = json.load(f) + + samples = [] + for f in os.listdir(join(self.path, "images")): + if len(data[f]): + lat = float(data[f][-4].replace("latitude: ", "")) + lon = float(data[f][-3].replace("longitude: ", "")) + samples.append((f, lat, lon)) + + return samples + + def load_yfcc4k( + self, + ): + samples = [] + with open(join(self.path, "info.txt")) as f: + lines = f.readlines() + for line in lines: + x = line.split("\t") + f, lon, lat = x[1], x[12], x[13] + samples.append((f + ".jpg", float(lat), float(lon))) + + return samples + + def __getitem__(self, i): + """Returns an item from the dataset. + Args: + i (int): index of the item + Returns: + dict: dictionary with keys "img", "gps", "idx" and optionally "label" + """ + img_path, lat, lon = self.samples[i] + img = self.transforms( + Image.open(join(self.path, "images", img_path)).convert("RGB") + ) + lat, lon = normalize(lat, lon) + gps = torch.FloatTensor([np.radians(lat), np.radians(lon)]).squeeze(0) + + return { + "img": img, + "gps": gps, + "idx": i, + } + + def __len__(self): + return len(self.samples) \ No newline at end of file diff --git a/data/datamodule.py b/data/datamodule.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5734cff7948e53c6ad64588debe0ecad51f46d --- /dev/null +++ b/data/datamodule.py @@ -0,0 +1,85 @@ +import pytorch_lightning as L +from torch.utils.data import DataLoader, random_split +import torch +import time + + +class ImageDataModule(L.LightningDataModule): + def __init__( + self, + train_dataset, + val_dataset, + test_dataset, + global_batch_size, + num_workers, + num_nodes=1, + num_devices=1, + val_proportion=0.1, + ): + super().__init__() + self._builders = { + "train": train_dataset, + "val": val_dataset, + "test": test_dataset, + } + self.num_workers = num_workers + self.batch_size = global_batch_size // (num_nodes * num_devices) + print(f"Each GPU will receive {self.batch_size} images") + self.val_proportion = val_proportion + + @property + def num_classes(self): + if hasattr(self, "train_dataset"): + return self.train_dataset.num_classes + else: + return self._builders["train"]().num_classes + + def setup(self, stage=None): + """Setup the datamodule. + Args: + stage (str): stage of the datamodule + Is be one of "fit" or "test" or None + """ + print("Stage", stage) + start_time = time.time() + if stage == "fit" or stage is None: + self.train_dataset = self._builders["train"]() + self.val_dataset = self._builders["val"]() + print(f"Train dataset size: {len(self.train_dataset)}") + print(f"Val dataset size: {len(self.val_dataset)}") + else: + self.test_dataset = self._builders["test"]() + print(f"Test dataset size: {len(self.test_dataset)}") + end_time = time.time() + print(f"Setup took {(end_time - start_time):.2f} seconds") + + def train_dataloader(self): + return DataLoader( + self.train_dataset, + batch_size=self.batch_size, + shuffle=True, + pin_memory=False, + drop_last=True, + num_workers=self.num_workers, + collate_fn=self.train_dataset.collate_fn_density, + ) + + def val_dataloader(self): + return DataLoader( + self.val_dataset, + batch_size=self.batch_size, + shuffle=False, + pin_memory=False, + num_workers=self.num_workers, + collate_fn=self.val_dataset.collate_fn, + ) + + def test_dataloader(self): + return DataLoader( + self.test_dataset, + batch_size=self.batch_size, + shuffle=False, + pin_memory=False, + num_workers=self.num_workers, + collate_fn=self.test_dataset.collate_fn, + ) diff --git a/data/transforms.py b/data/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..60aa1e04cdc80775f386742b038a4167c652e9e1 --- /dev/null +++ b/data/transforms.py @@ -0,0 +1,44 @@ +from transformers import CLIPProcessor + + +class ClipTransform(object): + def __init__(self, split): + self.transform = CLIPProcessor.from_pretrained("geolocal/StreetCLIP") + + def __call__(self, x): + # return self.transform(images=x, return_tensors="pt")["pixel_values"].squeeze(0) + return self.transform(images=[x], return_tensors="pt") + + +if __name__ == "__main__": + # sanity check + import glob + import torchvision.transforms as transforms + from torchvision.utils import save_image + from omegaconf import DictConfig, OmegaConf + from hydra.utils import instantiate + import torch + from PIL import Image + + fast_clip_config = OmegaConf.load( + "./configs/dataset/train_transform/fast_clip.yaml" + ) + fast_clip_transform = instantiate(fast_clip_config) + clip_transform = ClipTransform(None) + + img_paths = glob.glob("./datasets/osv5m/test/images/*.jpg") + original_imgs, re_implemted_imgs, diff = [], [], [] + + for i in range(16): + img = Image.open(img_paths[i]) + clip_img = clip_transform(img) + fast_clip_img = fast_clip_transform(img) + original_imgs.append(clip_img) + re_implemted_imgs.append(fast_clip_img) + max_diff = (clip_img - fast_clip_img).abs() + diff.append(max_diff) + if max_diff.max() > 1e-5: + print(max_diff.max()) + original_imgs = torch.stack(original_imgs) + re_implemted_imgs = torch.stack(re_implemted_imgs) + diff = torch.stack(diff)