Matteo Sirri commited on
Commit
7a129e5
1 Parent(s): d32b68f

fix: remove unusued files

Browse files
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import os.path as osp
2
- from tkinter.ttk import Style
3
  import gradio as gr
4
  import torch
5
  import logging
6
  import torchvision
7
  from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn_v2
8
  from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
9
- from configs.path_cfg import MOTCHA_ROOT, OUTPUT_DIR
10
  from src.detection.graph_utils import add_bbox
11
  from src.detection.vision import presets
12
  logging.getLogger('PIL').setLevel(logging.CRITICAL)
 
1
  import os.path as osp
 
2
  import gradio as gr
3
  import torch
4
  import logging
5
  import torchvision
6
  from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn_v2
7
  from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
8
+ from configs.path_cfg import OUTPUT_DIR
9
  from src.detection.graph_utils import add_bbox
10
  from src.detection.vision import presets
11
  logging.getLogger('PIL').setLevel(logging.CRITICAL)
deps/win/conda_environment.yml DELETED
@@ -1,42 +0,0 @@
1
- name: cvcspw
2
- channels:
3
- - defaults
4
- dependencies:
5
- - ca-certificates=2022.07.19=haa95532_0
6
- - certifi=2022.6.15=py38haa95532_0
7
- - openssl=1.1.1q=h2bbff1b_0
8
- - pip=22.1.2=py38haa95532_0
9
- - python=3.8.13=h6244533_0
10
- - setuptools=63.4.1=py38haa95532_0
11
- - sqlite=3.39.2=h2bbff1b_0
12
- - vc=14.2=h21ff451_1
13
- - vs2015_runtime=14.27.29016=h5e58377_2
14
- - wheel=0.37.1=pyhd3eb1b0_0
15
- - wincertstore=0.2=py38haa95532_2
16
- - pip:
17
- - charset-normalizer==2.1.1
18
- - coloredlogs==15.0.1
19
- - cycler==0.11.0
20
- - fonttools==4.37.1
21
- - humanfriendly==10.0
22
- - idna==3.3
23
- - kiwisolver==1.4.4
24
- - matplotlib==3.5.3
25
- - numpy==1.23.2
26
- - packaging==21.3
27
- - pandas==1.4.4
28
- - pillow==9.2.0
29
- - pycocotools==2.0.4
30
- - pyparsing==3.0.9
31
- - pyreadline3==3.4.1
32
- - python-dateutil==2.8.2
33
- - pytz==2022.2.1
34
- - requests==2.28.1
35
- - seaborn==0.12.0
36
- - six==1.16.0
37
- - torch==1.12.1+cu116
38
- - torchaudio==0.12.1+cu116
39
- - torchvision==0.13.1+cu116
40
- - typing-extensions==4.3.0
41
- - urllib3==1.26.12
42
- prefix: C:\Users\matte\anaconda3\envs\cvcspw
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deps/win/conda_requirements.txt DELETED
@@ -1,39 +0,0 @@
1
- # This file may be used to create an environment using:
2
- # $ conda create --name <env> --file <this file>
3
- # platform: win-64
4
- ca-certificates=2022.07.19=haa95532_0
5
- certifi=2022.6.15=py38haa95532_0
6
- charset-normalizer=2.1.1=pypi_0
7
- coloredlogs=15.0.1=pypi_0
8
- cycler=0.11.0=pypi_0
9
- fonttools=4.37.1=pypi_0
10
- humanfriendly=10.0=pypi_0
11
- idna=3.3=pypi_0
12
- kiwisolver=1.4.4=pypi_0
13
- matplotlib=3.5.3=pypi_0
14
- numpy=1.23.2=pypi_0
15
- openssl=1.1.1q=h2bbff1b_0
16
- packaging=21.3=pypi_0
17
- pandas=1.4.4=pypi_0
18
- pillow=9.2.0=pypi_0
19
- pip=22.1.2=py38haa95532_0
20
- pycocotools=2.0.4=pypi_0
21
- pyparsing=3.0.9=pypi_0
22
- pyreadline3=3.4.1=pypi_0
23
- python=3.8.13=h6244533_0
24
- python-dateutil=2.8.2=pypi_0
25
- pytz=2022.2.1=pypi_0
26
- requests=2.28.1=pypi_0
27
- seaborn=0.12.0=pypi_0
28
- setuptools=63.4.1=py38haa95532_0
29
- six=1.16.0=pypi_0
30
- sqlite=3.39.2=h2bbff1b_0
31
- torch=1.12.1+cu116=pypi_0
32
- torchaudio=0.12.1+cu116=pypi_0
33
- torchvision=0.13.1+cu116=pypi_0
34
- typing-extensions=4.3.0=pypi_0
35
- urllib3=1.26.12=pypi_0
36
- vc=14.2=h21ff451_1
37
- vs2015_runtime=14.27.29016=h5e58377_2
38
- wheel=0.37.1=pyhd3eb1b0_0
39
- wincertstore=0.2=py38haa95532_2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deps/win/pip_requirements.txt DELETED
@@ -1,30 +0,0 @@
1
- certifi==2022.6.15
2
- charset-normalizer==2.1.1
3
- coloredlogs==15.0.1
4
- cycler==0.11.0
5
- fonttools==4.37.1
6
- humanfriendly==10.0
7
- idna==3.3
8
- kiwisolver==1.4.4
9
- matplotlib==3.5.3
10
- numpy==1.23.2
11
- packaging==21.3
12
- pandas==1.4.4
13
- Pillow==9.2.0
14
- pip==22.1.2
15
- pycocotools==2.0.4
16
- pyparsing==3.0.9
17
- pyreadline3==3.4.1
18
- python-dateutil==2.8.2
19
- pytz==2022.2.1
20
- requests==2.28.1
21
- seaborn==0.12.0
22
- setuptools==63.4.1
23
- six==1.16.0
24
- torch==1.12.1
25
- torchaudio==0.12.1
26
- torchvision==0.13.1
27
- typing_extensions==4.3.0
28
- urllib3==1.26.12
29
- wheel==0.37.1
30
- wincertstore==0.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebook/colab/detector_show.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebook/colab/train_detector.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
scripts/evaluate_detector.sh DELETED
@@ -1,3 +0,0 @@
1
- #!/bin/sh
2
-
3
- python -m tools.train_detector --model-eval "d://cvcspw/storage/motsynth_output/detection_logs/fasterrcnn_training/checkpoint.pth" --test-only
 
 
 
 
scripts/inference_detector.sh DELETED
@@ -1,3 +0,0 @@
1
- #!/bin/sh
2
-
3
- python -m tools.inference_detector --model-path ./storage/motsynth_output/detection_logs/fasterrcnn_training_2/checkpoint.pth
 
 
 
 
scripts/train_detector.sh DELETED
@@ -1,3 +0,0 @@
1
- #!/bin/sh
2
-
3
- python -m tools.train_detector
 
 
 
 
src/detection/model_factory.py DELETED
@@ -1,54 +0,0 @@
1
- import logging
2
- import torch
3
- from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn_v2, FasterRCNN, FastRCNNPredictor
4
- from torchvision.models.detection import FasterRCNN_ResNet50_FPN_V2_Weights
5
- from torchvision.models.resnet import ResNet50_Weights
6
-
7
- logger = logging.getLogger(__name__)
8
-
9
-
10
- def set_seeds(seed: int = 42):
11
- """Sets random sets for torch operations.
12
-
13
- Args:
14
- seed (int, optional): Random seed to set. Defaults to 42.
15
- """
16
- # Set the seed for general torch operations
17
- torch.manual_seed(seed)
18
- # Set the seed for CUDA torch operations (ones that happen on the GPU)
19
- torch.cuda.manual_seed(seed)
20
-
21
-
22
- class ModelFactory:
23
- @staticmethod
24
- def get_model(name, weights, backbone, backbone_weights, trainable_backbone_layers):
25
- logger.debug(f"get_model -> model:{name}")
26
-
27
- if name == "fasterrcnn_resnet50_fpn":
28
- # backbone = backbone
29
- model_weights = FasterRCNN_ResNet50_FPN_V2_Weights[weights]
30
- model_backbone_weights = ResNet50_Weights[backbone_weights]
31
- # trainable_backbone_layers = 1
32
- model: FasterRCNN = fasterrcnn_resnet50_fpn_v2(
33
- weights=model_weights, backbone_name=backbone, weights_backbone=model_backbone_weights, trainable_backbone_layers=trainable_backbone_layers)
34
-
35
- # for param in model.rpn.parameters():
36
- # param.requires_grad = False
37
- # for param in model.roi_heads.parameters():
38
- # param.requires_grad = False
39
- # for param in model.backbone.fpn.parameters():
40
- # param.requires_grad = False
41
-
42
- set_seeds()
43
-
44
- num_classes = 2 # 1 class (person) + background
45
- in_features = model.roi_heads.box_predictor.cls_score.in_features
46
- model.roi_heads.box_predictor = FastRCNNPredictor(
47
- in_features, num_classes)
48
-
49
- else:
50
- logger.error(
51
- "Please, provide a valid model as argument. Select one of the following: fasterrcnn_resnet50_fpn.")
52
- raise ValueError(name)
53
-
54
- return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/detection/mot_dataset.py DELETED
@@ -1,48 +0,0 @@
1
-
2
- import numpy as np
3
- from src.detection.vision.coco_utils import ConvertCocoPolysToMask, CocoDetection, _coco_remove_images_without_annotations
4
- from src.detection.vision.transforms import Compose
5
-
6
-
7
- class UpdateIsCrowd(object):
8
- def __init__(self, min_size, min_vis=0.2):
9
- self.min_size = min_size
10
- self.min_vis = min_vis
11
-
12
- def __call__(self, image, target):
13
- for i, ann in enumerate(target['annotations']):
14
- bbox = ann['bbox']
15
- bbox_too_small = max(bbox[-1], bbox[-2]) < self.min_size
16
-
17
- if 'vis' in ann:
18
- vis = ann['vis']
19
-
20
- elif 'keypoints' in ann:
21
- vis = (np.array(ann['keypoints'])[2::3] == 2).mean().round(2)
22
-
23
- else:
24
- raise RuntimeError(
25
- "The given annotations have no visibility measure. Are you sure you want to proceed?")
26
-
27
- not_vis = vis < self.min_vis
28
- target['annotations'][i]['iscrowd'] = max(
29
- ann['iscrowd'], int(bbox_too_small), int(not_vis))
30
-
31
- return image, target
32
-
33
-
34
- def get_mot_dataset(img_folder, ann_file, transforms, min_size=25, min_vis=0.2):
35
- t = [UpdateIsCrowd(min_size=min_size, min_vis=min_vis),
36
- ConvertCocoPolysToMask()]
37
-
38
- if transforms is not None:
39
- t.append(transforms)
40
- transforms = Compose(t)
41
-
42
- dataset = CocoDetection(img_folder=img_folder,
43
- ann_file=ann_file,
44
- transforms=transforms)
45
-
46
- dataset = _coco_remove_images_without_annotations(dataset)
47
-
48
- return dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/detection/vision/coco_eval.py DELETED
@@ -1,194 +0,0 @@
1
- import copy
2
- import io
3
- from contextlib import redirect_stdout
4
-
5
- import numpy as np
6
- import pycocotools.mask as mask_util
7
- import torch
8
- from . import utils
9
- from pycocotools.coco import COCO
10
- from pycocotools.cocoeval import COCOeval
11
-
12
-
13
- class CocoEvaluator:
14
- def __init__(self, coco_gt, iou_types):
15
- assert isinstance(iou_types, (list, tuple))
16
- coco_gt = copy.deepcopy(coco_gt)
17
- self.coco_gt = coco_gt
18
-
19
- self.iou_types = iou_types
20
- self.coco_eval = {}
21
- for iou_type in iou_types:
22
- self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
23
-
24
- self.img_ids = []
25
- self.eval_imgs = {k: [] for k in iou_types}
26
-
27
- def update(self, predictions):
28
- img_ids = list(np.unique(list(predictions.keys())))
29
- self.img_ids.extend(img_ids)
30
-
31
- for iou_type in self.iou_types:
32
- results = self.prepare(predictions, iou_type)
33
- with redirect_stdout(io.StringIO()):
34
- coco_dt = COCO.loadRes(
35
- self.coco_gt, results) if results else COCO()
36
- coco_eval = self.coco_eval[iou_type]
37
-
38
- coco_eval.cocoDt = coco_dt
39
- coco_eval.params.imgIds = list(img_ids)
40
- img_ids, eval_imgs = evaluate(coco_eval)
41
-
42
- self.eval_imgs[iou_type].append(eval_imgs)
43
-
44
- def synchronize_between_processes(self):
45
- for iou_type in self.iou_types:
46
- self.eval_imgs[iou_type] = np.concatenate(
47
- self.eval_imgs[iou_type], 2)
48
- create_common_coco_eval(
49
- self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
50
-
51
- def accumulate(self):
52
- for coco_eval in self.coco_eval.values():
53
- coco_eval.accumulate()
54
-
55
- def summarize(self):
56
- for iou_type, coco_eval in self.coco_eval.items():
57
- print(f"IoU metric: {iou_type}")
58
- coco_eval.summarize()
59
-
60
- def prepare(self, predictions, iou_type):
61
- if iou_type == "bbox":
62
- return self.prepare_for_coco_detection(predictions)
63
- if iou_type == "segm":
64
- return self.prepare_for_coco_segmentation(predictions)
65
- if iou_type == "keypoints":
66
- return self.prepare_for_coco_keypoint(predictions)
67
- raise ValueError(f"Unknown iou type {iou_type}")
68
-
69
- def prepare_for_coco_detection(self, predictions):
70
- coco_results = []
71
- for original_id, prediction in predictions.items():
72
- if len(prediction) == 0:
73
- continue
74
-
75
- boxes = prediction["boxes"]
76
- boxes = convert_to_xywh(boxes).tolist()
77
- scores = prediction["scores"].tolist()
78
- labels = prediction["labels"].tolist()
79
-
80
- coco_results.extend(
81
- [
82
- {
83
- "image_id": original_id,
84
- "category_id": labels[k],
85
- "bbox": box,
86
- "score": scores[k],
87
- }
88
- for k, box in enumerate(boxes)
89
- ]
90
- )
91
- return coco_results
92
-
93
- def prepare_for_coco_segmentation(self, predictions):
94
- coco_results = []
95
- for original_id, prediction in predictions.items():
96
- if len(prediction) == 0:
97
- continue
98
-
99
- scores = prediction["scores"]
100
- labels = prediction["labels"]
101
- masks = prediction["masks"]
102
-
103
- masks = masks > 0.5
104
-
105
- scores = prediction["scores"].tolist()
106
- labels = prediction["labels"].tolist()
107
-
108
- rles = [
109
- mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks
110
- ]
111
- for rle in rles:
112
- rle["counts"] = rle["counts"].decode("utf-8")
113
-
114
- coco_results.extend(
115
- [
116
- {
117
- "image_id": original_id,
118
- "category_id": labels[k],
119
- "segmentation": rle,
120
- "score": scores[k],
121
- }
122
- for k, rle in enumerate(rles)
123
- ]
124
- )
125
- return coco_results
126
-
127
- def prepare_for_coco_keypoint(self, predictions):
128
- coco_results = []
129
- for original_id, prediction in predictions.items():
130
- if len(prediction) == 0:
131
- continue
132
-
133
- boxes = prediction["boxes"]
134
- boxes = convert_to_xywh(boxes).tolist()
135
- scores = prediction["scores"].tolist()
136
- labels = prediction["labels"].tolist()
137
- keypoints = prediction["keypoints"]
138
- keypoints = keypoints.flatten(start_dim=1).tolist()
139
-
140
- coco_results.extend(
141
- [
142
- {
143
- "image_id": original_id,
144
- "category_id": labels[k],
145
- "keypoints": keypoint,
146
- "score": scores[k],
147
- }
148
- for k, keypoint in enumerate(keypoints)
149
- ]
150
- )
151
- return coco_results
152
-
153
-
154
- def convert_to_xywh(boxes):
155
- xmin, ymin, xmax, ymax = boxes.unbind(1)
156
- return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
157
-
158
-
159
- def merge(img_ids, eval_imgs):
160
- all_img_ids = utils.all_gather(img_ids)
161
- all_eval_imgs = utils.all_gather(eval_imgs)
162
-
163
- merged_img_ids = []
164
- for p in all_img_ids:
165
- merged_img_ids.extend(p)
166
-
167
- merged_eval_imgs = []
168
- for p in all_eval_imgs:
169
- merged_eval_imgs.append(p)
170
-
171
- merged_img_ids = np.array(merged_img_ids)
172
- merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
173
-
174
- # keep only unique (and in sorted order) images
175
- merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
176
- merged_eval_imgs = merged_eval_imgs[..., idx]
177
-
178
- return merged_img_ids, merged_eval_imgs
179
-
180
-
181
- def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
182
- img_ids, eval_imgs = merge(img_ids, eval_imgs)
183
- img_ids = list(img_ids)
184
- eval_imgs = list(eval_imgs.flatten())
185
-
186
- coco_eval.evalImgs = eval_imgs
187
- coco_eval.params.imgIds = img_ids
188
- coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
189
-
190
-
191
- def evaluate(imgs):
192
- with redirect_stdout(io.StringIO()):
193
- imgs.evaluate()
194
- return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/detection/vision/coco_utils.py DELETED
@@ -1,263 +0,0 @@
1
- import copy
2
- import os
3
-
4
- import torch
5
- import torch.utils.data
6
- import torchvision
7
- from . import transforms as T
8
- from pycocotools import mask as coco_mask
9
- from pycocotools.coco import COCO
10
-
11
-
12
- class FilterAndRemapCocoCategories:
13
- def __init__(self, categories, remap=True):
14
- self.categories = categories
15
- self.remap = remap
16
-
17
- def __call__(self, image, target):
18
- anno = target["annotations"]
19
- anno = [obj for obj in anno if obj["category_id"] in self.categories]
20
- if not self.remap:
21
- target["annotations"] = anno
22
- return image, target
23
- anno = copy.deepcopy(anno)
24
- for obj in anno:
25
- obj["category_id"] = self.categories.index(obj["category_id"])
26
- target["annotations"] = anno
27
- return image, target
28
-
29
-
30
- def convert_coco_poly_to_mask(segmentations, height, width):
31
- masks = []
32
- for polygons in segmentations:
33
- if isinstance(polygons['counts'], list):
34
- rles = coco_mask.frPyObjects(polygons, height, width)
35
-
36
- else:
37
- rles = [polygons]
38
-
39
- mask = coco_mask.decode(rles)
40
- if len(mask.shape) < 3:
41
- mask = mask[..., None]
42
- mask = torch.as_tensor(mask, dtype=torch.uint8)
43
- mask = mask.any(dim=2)
44
- masks.append(mask)
45
- if masks:
46
- masks = torch.stack(masks, dim=0)
47
- else:
48
- masks = torch.zeros((0, height, width), dtype=torch.uint8)
49
- return masks
50
-
51
-
52
- class ConvertCocoPolysToMask:
53
- def __call__(self, image, target):
54
- w, h = image.size
55
-
56
- image_id = target["image_id"]
57
- image_id = torch.tensor([image_id])
58
-
59
- anno = target["annotations"]
60
- anno = [obj for obj in anno if obj["iscrowd"] == 0]
61
-
62
- boxes = [obj["bbox"] for obj in anno]
63
- # guard against no boxes via resizing
64
- boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
65
- boxes[:, 2:] += boxes[:, :2]
66
- boxes[:, 0::2].clamp_(min=0, max=w)
67
- boxes[:, 1::2].clamp_(min=0, max=h)
68
-
69
- classes = [obj["category_id"] for obj in anno]
70
- classes = torch.tensor(classes, dtype=torch.int64)
71
-
72
- # masks=None
73
- if anno and 'segmentation' in anno[0]:
74
- segmentations = [obj["segmentation"] for obj in anno]
75
-
76
- else:
77
- segmentations = []
78
-
79
- masks = convert_coco_poly_to_mask(segmentations, h, w)
80
-
81
- keypoints = None
82
- if anno and "keypoints" in anno[0]:
83
- keypoints = [obj["keypoints"] for obj in anno]
84
- keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
85
- num_keypoints = keypoints.shape[0]
86
- if num_keypoints:
87
- keypoints = keypoints.view(num_keypoints, -1, 3)
88
-
89
- keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
90
- boxes = boxes[keep]
91
- classes = classes[keep]
92
- if masks is not None and masks.shape[0] > 0:
93
- masks = masks[keep]
94
-
95
- if keypoints is not None:
96
- keypoints = keypoints[keep]
97
-
98
- target = {}
99
- target["boxes"] = boxes
100
- target["labels"] = classes
101
- target["masks"] = masks
102
-
103
- target["image_id"] = image_id
104
- if keypoints is not None:
105
- target["keypoints"] = keypoints
106
-
107
- # for conversion to coco api
108
- area = torch.tensor([obj["area"] for obj in anno])
109
- iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
110
- target["area"] = area
111
- target["iscrowd"] = iscrowd
112
- #target['vis'] = [obj['vis'] for obj in anno]
113
-
114
- return image, target
115
-
116
-
117
- def _coco_remove_images_without_annotations(dataset, cat_list=None):
118
- def _has_only_empty_bbox(anno):
119
- return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
120
-
121
- def _count_visible_keypoints(anno):
122
- return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
123
-
124
- min_keypoints_per_image = 10
125
-
126
- def _has_valid_annotation(anno):
127
- # if it's empty, there is no annotation
128
- if len(anno) == 0:
129
- return False
130
- # if all boxes have close to zero area, there is no annotation
131
- if _has_only_empty_bbox(anno):
132
- return False
133
- # keypoints task have a slight different critera for considering
134
- # if an annotation is valid
135
- if "keypoints" not in anno[0]:
136
- return True
137
- # for keypoint detection tasks, only consider valid images those
138
- # containing at least min_keypoints_per_image
139
- if _count_visible_keypoints(anno) >= min_keypoints_per_image:
140
- return True
141
- return False
142
-
143
- assert isinstance(dataset, torchvision.datasets.CocoDetection)
144
- ids = []
145
- for ds_idx, img_id in enumerate(dataset.ids):
146
- ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
147
- anno = dataset.coco.loadAnns(ann_ids)
148
- if cat_list:
149
- anno = [obj for obj in anno if obj["category_id"] in cat_list]
150
- if _has_valid_annotation(anno):
151
- ids.append(ds_idx)
152
-
153
- dataset = torch.utils.data.Subset(dataset, ids)
154
- return dataset
155
-
156
-
157
- def convert_to_coco_api(ds):
158
- coco_ds = COCO()
159
- # annotation IDs need to start at 1, not 0, see torchvision issue #1530
160
- ann_id = 1
161
- dataset = {"images": [], "categories": [], "annotations": []}
162
- categories = set()
163
- for img_idx in range(len(ds)):
164
- # find better way to get target
165
- # targets = ds.get_annotations(img_idx)
166
- img, targets = ds[img_idx]
167
- image_id = targets["image_id"].item()
168
- img_dict = {}
169
- img_dict["id"] = image_id
170
- img_dict["height"] = img.shape[-2]
171
- img_dict["width"] = img.shape[-1]
172
- dataset["images"].append(img_dict)
173
- bboxes = targets["boxes"].clone()
174
- bboxes[:, 2:] -= bboxes[:, :2]
175
- bboxes = bboxes.tolist()
176
- labels = targets["labels"].tolist()
177
- areas = targets["area"].tolist()
178
- iscrowd = targets["iscrowd"].tolist()
179
- if "masks" in targets:
180
- masks = targets["masks"]
181
- # make masks Fortran contiguous for coco_mask
182
- masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
183
- if "keypoints" in targets:
184
- keypoints = targets["keypoints"]
185
- keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
186
- num_objs = len(bboxes)
187
- for i in range(num_objs):
188
- ann = {}
189
- ann["image_id"] = image_id
190
- ann["bbox"] = bboxes[i]
191
- ann["category_id"] = labels[i]
192
- categories.add(labels[i])
193
- ann["area"] = areas[i]
194
- ann["iscrowd"] = iscrowd[i]
195
- ann["id"] = ann_id
196
- if "masks" in targets:
197
- ann["segmentation"] = coco_mask.encode(masks[i].numpy())
198
- if "keypoints" in targets:
199
- ann["keypoints"] = keypoints[i]
200
- ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3])
201
- dataset["annotations"].append(ann)
202
- ann_id += 1
203
- dataset["categories"] = [{"id": i} for i in sorted(categories)]
204
- coco_ds.dataset = dataset
205
- coco_ds.createIndex()
206
- return coco_ds
207
-
208
-
209
- def get_coco_api_from_dataset(dataset):
210
- for _ in range(10):
211
- if isinstance(dataset, torchvision.datasets.CocoDetection):
212
- break
213
- if isinstance(dataset, torch.utils.data.Subset):
214
- dataset = dataset.dataset
215
- if isinstance(dataset, torchvision.datasets.CocoDetection):
216
- return dataset.coco
217
- return convert_to_coco_api(dataset)
218
-
219
-
220
- class CocoDetection(torchvision.datasets.CocoDetection):
221
- def __init__(self, img_folder, ann_file, transforms):
222
- super().__init__(img_folder, ann_file)
223
- self._transforms = transforms
224
-
225
- def __getitem__(self, idx):
226
- img, target = super().__getitem__(idx)
227
- image_id = self.ids[idx]
228
- target = dict(image_id=image_id, annotations=target)
229
- if self._transforms is not None:
230
- img, target = self._transforms(img, target)
231
- return img, target
232
-
233
-
234
- def get_coco(root, image_set, transforms, mode="instances"):
235
- anno_file_template = "{}_{}2017.json"
236
- PATHS = {
237
- "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
238
- "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
239
- # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
240
- }
241
-
242
- t = [ConvertCocoPolysToMask()]
243
-
244
- if transforms is not None:
245
- t.append(transforms)
246
- transforms = T.Compose(t)
247
-
248
- img_folder, ann_file = PATHS[image_set]
249
- img_folder = os.path.join(root, img_folder)
250
- ann_file = os.path.join(root, ann_file)
251
-
252
- dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
253
-
254
- if image_set == "train":
255
- dataset = _coco_remove_images_without_annotations(dataset)
256
-
257
- # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
258
-
259
- return dataset
260
-
261
-
262
- def get_coco_kp(root, image_set, transforms):
263
- return get_coco(root, image_set, transforms, mode="person_keypoints")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/detection/vision/engine.py DELETED
@@ -1,137 +0,0 @@
1
- import math
2
- import sys
3
- import time
4
-
5
- import torch
6
- import torchvision.models.detection.faster_rcnn
7
- from . import utils
8
- from . import coco_eval
9
- from . import coco_utils
10
-
11
-
12
- def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
13
- model.train()
14
- metric_logger = utils.MetricLogger(delimiter=" ")
15
- metric_logger.add_meter("lr", utils.SmoothedValue(
16
- window_size=1, fmt="{value:.6f}"))
17
- header = f"Epoch: [{epoch}]"
18
- lr_scheduler = None
19
- if epoch == 0:
20
- warmup_factor = 1.0 / 1000
21
- warmup_iters = min(1000, len(data_loader) - 1)
22
-
23
- lr_scheduler = torch.optim.lr_scheduler.LinearLR(
24
- optimizer, start_factor=warmup_factor, total_iters=warmup_iters
25
- )
26
-
27
- losses_dict = {
28
- "lr": [],
29
- "loss": [],
30
- # loss rpn
31
- "loss_objectness": [],
32
- "loss_rpn_box_reg": [],
33
- # roi heads
34
- "loss_classifier": [],
35
- "loss_box_reg": [],
36
- }
37
- for images, targets in metric_logger.log_every(data_loader, print_freq, header):
38
- try:
39
- images = list(image.to(device) for image in images)
40
- targets = [{k: v.to(device) for k, v in t.items()}
41
- for t in targets]
42
- with torch.cuda.amp.autocast(enabled=scaler is not None):
43
- loss_dict = model(images, targets)
44
- losses = sum(loss for loss in loss_dict.values())
45
-
46
- # reduce losses over all GPUs for logging purposes
47
- loss_dict_reduced = utils.reduce_dict(loss_dict)
48
- losses_reduced = sum(loss for loss in loss_dict_reduced.values())
49
-
50
- loss_value = losses_reduced.item()
51
-
52
- # if problem with loss see below
53
- if not math.isfinite(loss_value):
54
- print(f"Loss is {loss_value}, stopping training")
55
- print(loss_dict_reduced)
56
- sys.exit(1)
57
-
58
- except Exception as exp:
59
- print("ERROR", str(exp))
60
- torch.save({'img': images, 'targets': targets},
61
- 'error_causing_batch.pth')
62
- raise RuntimeError
63
-
64
- optimizer.zero_grad()
65
- if scaler is not None:
66
- scaler.scale(losses).backward()
67
- scaler.step(optimizer)
68
- scaler.update()
69
- else:
70
- losses.backward()
71
- optimizer.step()
72
-
73
- if lr_scheduler is not None:
74
- lr_scheduler.step()
75
-
76
- metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
77
- metric_logger.update(lr=optimizer.param_groups[0]["lr"])
78
- for name, meter in metric_logger.meters.items():
79
- losses_dict[name].append(meter.global_avg)
80
- return metric_logger, losses_dict
81
-
82
-
83
- def _get_iou_types(model):
84
- model_without_ddp = model
85
- if isinstance(model, torch.nn.parallel.DistributedDataParallel):
86
- model_without_ddp = model.module
87
- iou_types = ["bbox"]
88
- if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
89
- iou_types.append("segm")
90
- if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
91
- iou_types.append("keypoints")
92
- return iou_types
93
-
94
-
95
- @ torch.inference_mode()
96
- def evaluate(model, data_loader, device, iou_types=None):
97
- n_threads = torch.get_num_threads()
98
- # FIXME remove this and make paste_masks_in_image run on the GPU
99
- torch.set_num_threads(1)
100
- cpu_device = torch.device("cpu")
101
- model.eval()
102
- metric_logger = utils.MetricLogger(delimiter=" ")
103
- header = "Test:"
104
- coco = coco_utils.get_coco_api_from_dataset(data_loader.dataset)
105
- if iou_types is None:
106
- iou_types = _get_iou_types(model)
107
- coco_evaluator = coco_eval.CocoEvaluator(coco, iou_types)
108
- for images, targets in metric_logger.log_every(data_loader, 100, header):
109
- images = list(img.to(device) for img in images)
110
-
111
- if torch.cuda.is_available():
112
- torch.cuda.synchronize()
113
- model_time = time.time()
114
- outputs = model(images)
115
-
116
- outputs = [{k: v.to(cpu_device) for k, v in t.items()}
117
- for t in outputs]
118
- model_time = time.time() - model_time
119
-
120
- res = {target["image_id"].item(): output for target,
121
- output in zip(targets, outputs)}
122
- evaluator_time = time.time()
123
- coco_evaluator.update(res)
124
- evaluator_time = time.time() - evaluator_time
125
- metric_logger.update(model_time=model_time,
126
- evaluator_time=evaluator_time)
127
-
128
- # gather the stats from all processes
129
- metric_logger.synchronize_between_processes()
130
- print("Averaged stats:", metric_logger)
131
- coco_evaluator.synchronize_between_processes()
132
-
133
- # accumulate predictions from all images and print table with results
134
- coco_evaluator.accumulate()
135
- coco_evaluator.summarize()
136
- torch.set_num_threads(n_threads)
137
- return coco_evaluator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/detection/vision/group_by_aspect_ratio.py DELETED
@@ -1,196 +0,0 @@
1
- import bisect
2
- import copy
3
- import math
4
- from collections import defaultdict
5
- from itertools import repeat, chain
6
-
7
- import numpy as np
8
- import torch
9
- import torch.utils.data
10
- import torchvision
11
- from PIL import Image
12
- from torch.utils.data.sampler import BatchSampler, Sampler
13
- from torch.utils.model_zoo import tqdm
14
-
15
-
16
- def _repeat_to_at_least(iterable, n):
17
- repeat_times = math.ceil(n / len(iterable))
18
- repeated = chain.from_iterable(repeat(iterable, repeat_times))
19
- return list(repeated)
20
-
21
-
22
- class GroupedBatchSampler(BatchSampler):
23
- """
24
- Wraps another sampler to yield a mini-batch of indices.
25
- It enforces that the batch only contain elements from the same group.
26
- It also tries to provide mini-batches which follows an ordering which is
27
- as close as possible to the ordering from the original sampler.
28
- Args:
29
- sampler (Sampler): Base sampler.
30
- group_ids (list[int]): If the sampler produces indices in range [0, N),
31
- `group_ids` must be a list of `N` ints which contains the group id of each sample.
32
- The group ids must be a continuous set of integers starting from
33
- 0, i.e. they must be in the range [0, num_groups).
34
- batch_size (int): Size of mini-batch.
35
- """
36
-
37
- def __init__(self, sampler, group_ids, batch_size):
38
- if not isinstance(sampler, Sampler):
39
- raise ValueError(f"sampler should be an instance of torch.utils.data.Sampler, but got sampler={sampler}")
40
- self.sampler = sampler
41
- self.group_ids = group_ids
42
- self.batch_size = batch_size
43
-
44
- def __iter__(self):
45
- buffer_per_group = defaultdict(list)
46
- samples_per_group = defaultdict(list)
47
-
48
- num_batches = 0
49
- for idx in self.sampler:
50
- group_id = self.group_ids[idx]
51
- buffer_per_group[group_id].append(idx)
52
- samples_per_group[group_id].append(idx)
53
- if len(buffer_per_group[group_id]) == self.batch_size:
54
- yield buffer_per_group[group_id]
55
- num_batches += 1
56
- del buffer_per_group[group_id]
57
- assert len(buffer_per_group[group_id]) < self.batch_size
58
-
59
- # now we have run out of elements that satisfy
60
- # the group criteria, let's return the remaining
61
- # elements so that the size of the sampler is
62
- # deterministic
63
- expected_num_batches = len(self)
64
- num_remaining = expected_num_batches - num_batches
65
- if num_remaining > 0:
66
- # for the remaining batches, take first the buffers with largest number
67
- # of elements
68
- for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True):
69
- remaining = self.batch_size - len(buffer_per_group[group_id])
70
- samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
71
- buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
72
- assert len(buffer_per_group[group_id]) == self.batch_size
73
- yield buffer_per_group[group_id]
74
- num_remaining -= 1
75
- if num_remaining == 0:
76
- break
77
- assert num_remaining == 0
78
-
79
- def __len__(self):
80
- return len(self.sampler) // self.batch_size
81
-
82
-
83
- def _compute_aspect_ratios_slow(dataset, indices=None):
84
- print(
85
- "Your dataset doesn't support the fast path for "
86
- "computing the aspect ratios, so will iterate over "
87
- "the full dataset and load every image instead. "
88
- "This might take some time..."
89
- )
90
- if indices is None:
91
- indices = range(len(dataset))
92
-
93
- class SubsetSampler(Sampler):
94
- def __init__(self, indices):
95
- self.indices = indices
96
-
97
- def __iter__(self):
98
- return iter(self.indices)
99
-
100
- def __len__(self):
101
- return len(self.indices)
102
-
103
- sampler = SubsetSampler(indices)
104
- data_loader = torch.utils.data.DataLoader(
105
- dataset,
106
- batch_size=1,
107
- sampler=sampler,
108
- num_workers=14, # you might want to increase it for faster processing
109
- collate_fn=lambda x: x[0],
110
- )
111
- aspect_ratios = []
112
- with tqdm(total=len(dataset)) as pbar:
113
- for _i, (img, _) in enumerate(data_loader):
114
- pbar.update(1)
115
- height, width = img.shape[-2:]
116
- aspect_ratio = float(width) / float(height)
117
- aspect_ratios.append(aspect_ratio)
118
- return aspect_ratios
119
-
120
-
121
- def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
122
- if indices is None:
123
- indices = range(len(dataset))
124
- aspect_ratios = []
125
- for i in indices:
126
- height, width = dataset.get_height_and_width(i)
127
- aspect_ratio = float(width) / float(height)
128
- aspect_ratios.append(aspect_ratio)
129
- return aspect_ratios
130
-
131
-
132
- def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
133
- if indices is None:
134
- indices = range(len(dataset))
135
- aspect_ratios = []
136
- for i in indices:
137
- img_info = dataset.coco.imgs[dataset.ids[i]]
138
- aspect_ratio = float(img_info["width"]) / float(img_info["height"])
139
- aspect_ratios.append(aspect_ratio)
140
- return aspect_ratios
141
-
142
-
143
- def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
144
- if indices is None:
145
- indices = range(len(dataset))
146
- aspect_ratios = []
147
- for i in indices:
148
- # this doesn't load the data into memory, because PIL loads it lazily
149
- width, height = Image.open(dataset.images[i]).size
150
- aspect_ratio = float(width) / float(height)
151
- aspect_ratios.append(aspect_ratio)
152
- return aspect_ratios
153
-
154
-
155
- def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
156
- if indices is None:
157
- indices = range(len(dataset))
158
-
159
- ds_indices = [dataset.indices[i] for i in indices]
160
- return compute_aspect_ratios(dataset.dataset, ds_indices)
161
-
162
-
163
- def compute_aspect_ratios(dataset, indices=None):
164
- if hasattr(dataset, "get_height_and_width"):
165
- return _compute_aspect_ratios_custom_dataset(dataset, indices)
166
-
167
- if isinstance(dataset, torchvision.datasets.CocoDetection):
168
- return _compute_aspect_ratios_coco_dataset(dataset, indices)
169
-
170
- if isinstance(dataset, torchvision.datasets.VOCDetection):
171
- return _compute_aspect_ratios_voc_dataset(dataset, indices)
172
-
173
- if isinstance(dataset, torch.utils.data.Subset):
174
- return _compute_aspect_ratios_subset_dataset(dataset, indices)
175
-
176
- # slow path
177
- return _compute_aspect_ratios_slow(dataset, indices)
178
-
179
-
180
- def _quantize(x, bins):
181
- bins = copy.deepcopy(bins)
182
- bins = sorted(bins)
183
- quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
184
- return quantized
185
-
186
-
187
- def create_aspect_ratio_groups(dataset, k=0):
188
- aspect_ratios = compute_aspect_ratios(dataset)
189
- bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]
190
- groups = _quantize(aspect_ratios, bins)
191
- # count number of elements per group
192
- counts = np.unique(groups, return_counts=True)[1]
193
- fbins = [0] + bins + [np.inf]
194
- print(f"Using {fbins} as bins for aspect ratio quantization")
195
- print(f"Count of instances per bin: {counts}")
196
- return groups
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/detection/vision/mot_data.py DELETED
@@ -1,370 +0,0 @@
1
- import configparser
2
- import csv
3
- import os
4
- import os.path as osp
5
- import pickle
6
-
7
- import numpy as np
8
- import pycocotools.mask as rletools
9
- import torch
10
- from PIL import Image
11
-
12
-
13
- class MOTObjDetect(torch.utils.data.Dataset):
14
- """ Data class for the Multiple Object Tracking Dataset
15
- """
16
-
17
- def __init__(self, root, transforms=None, vis_threshold=0.25,
18
- split_seqs=None, frame_range_start=0.0, frame_range_end=1.0):
19
- self.root = root
20
- self.transforms = transforms
21
- self._vis_threshold = vis_threshold
22
- self._classes = ('background', 'pedestrian')
23
- self._img_paths = []
24
- self._split_seqs = split_seqs
25
-
26
- self.mots_gts = {}
27
- for f in sorted(os.listdir(root)):
28
- path = os.path.join(root, f)
29
-
30
- if not os.path.isdir(path):
31
- continue
32
-
33
- if split_seqs is not None and f not in split_seqs:
34
- continue
35
-
36
- config_file = os.path.join(path, 'seqinfo.ini')
37
-
38
- assert os.path.exists(config_file), \
39
- 'Path does not exist: {}'.format(config_file)
40
-
41
- config = configparser.ConfigParser()
42
- config.read(config_file)
43
- seq_len = int(config['Sequence']['seqLength'])
44
- im_ext = config['Sequence']['imExt']
45
- im_dir = config['Sequence']['imDir']
46
-
47
- img_dir = os.path.join(path, im_dir)
48
-
49
- start_frame = int(frame_range_start * seq_len)
50
- end_frame = int(frame_range_end * seq_len)
51
-
52
- # for i in range(seq_len):
53
- for i in range(start_frame, end_frame):
54
- img_path = os.path.join(img_dir, f"{i + 1:06d}{im_ext}")
55
- assert os.path.exists(
56
- img_path), f'Path does not exist: {img_path}'
57
- self._img_paths.append(img_path)
58
-
59
- # print(len(self._img_paths))
60
-
61
- if self.has_masks:
62
- gt_file = os.path.join(
63
- os.path.dirname(img_dir), 'gt', 'gt.txt')
64
- self.mots_gts[gt_file] = load_mots_gt(gt_file)
65
-
66
- def __str__(self):
67
- if self._split_seqs is None:
68
- return self.root
69
- return f"{self.root}/{self._split_seqs}"
70
-
71
- @property
72
- def num_classes(self):
73
- return len(self._classes)
74
-
75
- def _get_annotation(self, idx):
76
- """
77
- """
78
-
79
- if 'test' in self.root:
80
- num_objs = 0
81
- boxes = torch.zeros((num_objs, 4), dtype=torch.float32)
82
-
83
- return {'boxes': boxes,
84
- 'labels': torch.ones((num_objs,), dtype=torch.int64),
85
- 'image_id': torch.tensor([idx]),
86
- 'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
87
- 'iscrowd': torch.zeros((num_objs,), dtype=torch.int64),
88
- 'visibilities': torch.zeros((num_objs), dtype=torch.float32)}
89
-
90
- img_path = self._img_paths[idx]
91
- file_index = int(os.path.basename(img_path).split('.')[0])
92
-
93
- gt_file = os.path.join(os.path.dirname(
94
- os.path.dirname(img_path)), 'gt', 'gt.txt')
95
-
96
- assert os.path.exists(gt_file), \
97
- 'GT file does not exist: {}'.format(gt_file)
98
-
99
- bounding_boxes = []
100
-
101
- if self.has_masks:
102
- mask_objects_per_frame = self.mots_gts[gt_file][file_index]
103
- masks = []
104
- for mask_object in mask_objects_per_frame:
105
- # class_id = 1 is car
106
- # class_id = 2 is pedestrian
107
- # class_id = 10 IGNORE
108
- if mask_object.class_id in [1, 10] or not rletools.area(mask_object.mask):
109
- continue
110
-
111
- bbox = rletools.toBbox(mask_object.mask)
112
- x1, y1, w, h = [int(c) for c in bbox]
113
-
114
- bb = {}
115
- bb['bb_left'] = x1
116
- bb['bb_top'] = y1
117
- bb['bb_width'] = w
118
- bb['bb_height'] = h
119
-
120
- # print(bb, rletools.area(mask_object.mask))
121
-
122
- bb['visibility'] = 1.0
123
- bb['track_id'] = mask_object.track_id
124
-
125
- masks.append(rletools.decode(mask_object.mask))
126
- bounding_boxes.append(bb)
127
- else:
128
- with open(gt_file, "r") as inf:
129
- reader = csv.reader(inf, delimiter=',')
130
- for row in reader:
131
- visibility = float(row[8])
132
-
133
- if int(row[0]) == file_index and int(row[6]) == 1 and int(row[7]) == 1 and visibility and visibility >= self._vis_threshold:
134
- bb = {}
135
- bb['bb_left'] = int(row[2])
136
- bb['bb_top'] = int(row[3])
137
- bb['bb_width'] = int(row[4])
138
- bb['bb_height'] = int(row[5])
139
- bb['visibility'] = float(row[8])
140
- bb['track_id'] = int(row[1])
141
-
142
- bounding_boxes.append(bb)
143
-
144
- num_objs = len(bounding_boxes)
145
-
146
- boxes = torch.zeros((num_objs, 4), dtype=torch.float32)
147
- visibilities = torch.zeros((num_objs), dtype=torch.float32)
148
- track_ids = torch.zeros((num_objs), dtype=torch.long)
149
-
150
- for i, bb in enumerate(bounding_boxes):
151
- # Make pixel indexes 0-based, should already be 0-based (or not)
152
- x1 = bb['bb_left'] # - 1
153
- y1 = bb['bb_top'] # - 1
154
- # This -1 accounts for the width (width of 1 x1=x2)
155
- x2 = x1 + bb['bb_width'] # - 1
156
- y2 = y1 + bb['bb_height'] # - 1
157
-
158
- boxes[i, 0] = x1
159
- boxes[i, 1] = y1
160
- boxes[i, 2] = x2
161
- boxes[i, 3] = y2
162
- visibilities[i] = bb['visibility']
163
- track_ids[i] = bb['track_id']
164
-
165
- annos = {'boxes': boxes,
166
- 'labels': torch.ones((num_objs,), dtype=torch.int64),
167
- 'image_id': torch.tensor([idx]),
168
- 'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
169
- 'iscrowd': torch.zeros((num_objs,), dtype=torch.int64),
170
- 'visibilities': visibilities,
171
- 'track_ids': track_ids, }
172
-
173
- if self.has_masks:
174
- # annos['masks'] = torch.tensor(masks, dtype=torch.uint8)
175
- annos['masks'] = torch.from_numpy(np.stack(masks))
176
- return annos
177
-
178
- @property
179
- def has_masks(self):
180
- return '/MOTS20/' in self.root
181
-
182
- def __getitem__(self, idx):
183
- # load images ad masks
184
- img_path = self._img_paths[idx]
185
- # mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
186
- img = Image.open(img_path).convert("RGB")
187
-
188
- target = self._get_annotation(idx)
189
-
190
- if self.transforms is not None:
191
- img, target = self.transforms(img, target)
192
-
193
- return img, target
194
-
195
- def __len__(self):
196
- return len(self._img_paths)
197
-
198
- def write_results_files(self, results, output_dir):
199
- """Write the detections in the format for MOT17Det sumbission
200
-
201
- all_boxes[image] = N x 5 array of detections in (x1, y1, x2, y2, score)
202
-
203
- Each file contains these lines:
204
- <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z>
205
-
206
- Files to sumbit:
207
- ./MOT17-01.txt
208
- ./MOT17-02.txt
209
- ./MOT17-03.txt
210
- ./MOT17-04.txt
211
- ./MOT17-05.txt
212
- ./MOT17-06.txt
213
- ./MOT17-07.txt
214
- ./MOT17-08.txt
215
- ./MOT17-09.txt
216
- ./MOT17-10.txt
217
- ./MOT17-11.txt
218
- ./MOT17-12.txt
219
- ./MOT17-13.txt
220
- ./MOT17-14.txt
221
- """
222
-
223
- #format_str = "{}, -1, {}, {}, {}, {}, {}, -1, -1, -1"
224
-
225
- files = {}
226
- for image_id, res in results.items():
227
- path = self._img_paths[image_id]
228
- img1, name = osp.split(path)
229
- # get image number out of name
230
- frame = int(name.split('.')[0])
231
- # smth like /train/MOT17-09-FRCNN or /train/MOT17-09
232
- tmp = osp.dirname(img1)
233
- # get the folder name of the sequence and split it
234
- tmp = osp.basename(tmp).split('-')
235
- # Now get the output name of the file
236
- out = tmp[0]+'-'+tmp[1]+'.txt'
237
- outfile = osp.join(output_dir, out)
238
-
239
- # check if out in keys and create empty list if not
240
- if outfile not in files.keys():
241
- files[outfile] = []
242
-
243
- if 'masks' in res:
244
- delimiter = ' '
245
- # print(torch.unique(res['masks'][0]))
246
- # > 0.5 #res['masks'].bool()
247
- masks = res['masks'].squeeze(dim=1)
248
-
249
- index_map = torch.arange(masks.size(0))[:, None, None]
250
- index_map = index_map.expand_as(masks)
251
-
252
- masks = torch.logical_and(
253
- # remove background
254
- masks > 0.5,
255
- # remove overlapp by largest probablity
256
- index_map == masks.argmax(dim=0)
257
- )
258
- for res_i in range(len(masks)):
259
- track_id = -1
260
- if 'track_ids' in res:
261
- track_id = res['track_ids'][res_i].item()
262
- mask = masks[res_i]
263
- mask = np.asfortranarray(mask)
264
-
265
- rle_mask = rletools.encode(mask)
266
-
267
- files[outfile].append(
268
- [frame,
269
- track_id,
270
- 2, # class pedestrian
271
- mask.shape[0],
272
- mask.shape[1],
273
- rle_mask['counts'].decode(encoding='UTF-8')])
274
- else:
275
- delimiter = ','
276
- for res_i in range(len(res['boxes'])):
277
- track_id = -1
278
- if 'track_ids' in res:
279
- track_id = res['track_ids'][res_i].item()
280
- box = res['boxes'][res_i]
281
- score = res['scores'][res_i]
282
-
283
- x1 = box[0].item()
284
- y1 = box[1].item()
285
- x2 = box[2].item()
286
- y2 = box[3].item()
287
-
288
- out = [frame, track_id, x1, y1, x2 - x1,
289
- y2 - y1, score.item(), -1, -1, -1]
290
-
291
- if 'keypoints' in res:
292
- out.extend(res['keypoints'][res_i]
293
- [:, :2].flatten().tolist())
294
- out.extend(res['keypoints_scores']
295
- [res_i].flatten().tolist())
296
-
297
- files[outfile].append(out)
298
-
299
- for k, v in files.items():
300
- with open(k, "w") as of:
301
- writer = csv.writer(of, delimiter=delimiter)
302
- for d in v:
303
- writer.writerow(d)
304
-
305
-
306
- class SegmentedObject:
307
- """
308
- Helper class for segmentation objects.
309
- """
310
-
311
- def __init__(self, mask: dict, class_id: int, track_id: int, full_bbox=None) -> None:
312
- self.mask = mask
313
- self.class_id = class_id
314
- self.track_id = track_id
315
- self.full_bbox = full_bbox
316
-
317
-
318
- def load_mots_gt(path: str) -> dict:
319
- """Load MOTS ground truth from path."""
320
- objects_per_frame = {}
321
- track_ids_per_frame = {} # Check that no frame contains two objects with same id
322
- combined_mask_per_frame = {} # Check that no frame contains overlapping masks
323
-
324
- with open(path, "r") as gt_file:
325
- for line in gt_file:
326
- line = line.strip()
327
- fields = line.split(" ")
328
-
329
- frame = int(fields[0])
330
- if frame not in objects_per_frame:
331
- objects_per_frame[frame] = []
332
- # if frame not in track_ids_per_frame:
333
- # track_ids_per_frame[frame] = set()
334
- # if int(fields[1]) in track_ids_per_frame[frame]:
335
- # assert False, f"Multiple objects with track id {fields[1]} in frame {fields[0]}"
336
- # else:
337
- # track_ids_per_frame[frame].add(int(fields[1]))
338
-
339
- class_id = int(fields[2])
340
- if not (class_id == 1 or class_id == 2 or class_id == 10):
341
- assert False, "Unknown object class " + fields[2]
342
-
343
- mask = {
344
- 'size': [int(fields[3]), int(fields[4])],
345
- 'counts': fields[5].encode(encoding='UTF-8')}
346
- if frame not in combined_mask_per_frame:
347
- combined_mask_per_frame[frame] = mask
348
- elif rletools.area(rletools.merge([
349
- combined_mask_per_frame[frame], mask],
350
- intersect=True)):
351
- assert False, "Objects with overlapping masks in frame " + \
352
- fields[0]
353
- else:
354
- combined_mask_per_frame[frame] = rletools.merge(
355
- [combined_mask_per_frame[frame], mask],
356
- intersect=False)
357
-
358
- full_bbox = None
359
- if len(fields) == 10:
360
- full_bbox = [int(fields[6]), int(fields[7]),
361
- int(fields[8]), int(fields[9])]
362
-
363
- objects_per_frame[frame].append(SegmentedObject(
364
- mask,
365
- class_id,
366
- int(fields[1]),
367
- full_bbox
368
- ))
369
-
370
- return objects_per_frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/detection/vision/utils.py DELETED
@@ -1,282 +0,0 @@
1
- import datetime
2
- import errno
3
- import os
4
- import time
5
- from collections import defaultdict, deque
6
-
7
- import torch
8
- import torch.distributed as dist
9
-
10
-
11
- class SmoothedValue:
12
- """Track a series of values and provide access to smoothed values over a
13
- window or the global series average.
14
- """
15
-
16
- def __init__(self, window_size=20, fmt=None):
17
- if fmt is None:
18
- fmt = "{median:.4f} ({global_avg:.4f})"
19
- self.deque = deque(maxlen=window_size)
20
- self.total = 0.0
21
- self.count = 0
22
- self.fmt = fmt
23
-
24
- def update(self, value, n=1):
25
- self.deque.append(value)
26
- self.count += n
27
- self.total += value * n
28
-
29
- def synchronize_between_processes(self):
30
- """
31
- Warning: does not synchronize the deque!
32
- """
33
- if not is_dist_avail_and_initialized():
34
- return
35
- t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
36
- dist.barrier()
37
- dist.all_reduce(t)
38
- t = t.tolist()
39
- self.count = int(t[0])
40
- self.total = t[1]
41
-
42
- @property
43
- def median(self):
44
- d = torch.tensor(list(self.deque))
45
- return d.median().item()
46
-
47
- @property
48
- def avg(self):
49
- d = torch.tensor(list(self.deque), dtype=torch.float32)
50
- return d.mean().item()
51
-
52
- @property
53
- def global_avg(self):
54
- return self.total / self.count
55
-
56
- @property
57
- def max(self):
58
- return max(self.deque)
59
-
60
- @property
61
- def value(self):
62
- return self.deque[-1]
63
-
64
- def __str__(self):
65
- return self.fmt.format(
66
- median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
67
- )
68
-
69
-
70
- def all_gather(data):
71
- """
72
- Run all_gather on arbitrary picklable data (not necessarily tensors)
73
- Args:
74
- data: any picklable object
75
- Returns:
76
- list[data]: list of data gathered from each rank
77
- """
78
- world_size = get_world_size()
79
- if world_size == 1:
80
- return [data]
81
- data_list = [None] * world_size
82
- dist.all_gather_object(data_list, data)
83
- return data_list
84
-
85
-
86
- def reduce_dict(input_dict, average=True):
87
- """
88
- Args:
89
- input_dict (dict): all the values will be reduced
90
- average (bool): whether to do average or sum
91
- Reduce the values in the dictionary from all processes so that all processes
92
- have the averaged results. Returns a dict with the same fields as
93
- input_dict, after reduction.
94
- """
95
- world_size = get_world_size()
96
- if world_size < 2:
97
- return input_dict
98
- with torch.inference_mode():
99
- names = []
100
- values = []
101
- # sort the keys so that they are consistent across processes
102
- for k in sorted(input_dict.keys()):
103
- names.append(k)
104
- values.append(input_dict[k])
105
- values = torch.stack(values, dim=0)
106
- dist.all_reduce(values)
107
- if average:
108
- values /= world_size
109
- reduced_dict = {k: v for k, v in zip(names, values)}
110
- return reduced_dict
111
-
112
-
113
- class MetricLogger:
114
- def __init__(self, delimiter="\t"):
115
- self.meters = defaultdict(SmoothedValue)
116
- self.delimiter = delimiter
117
-
118
- def update(self, **kwargs):
119
- for k, v in kwargs.items():
120
- if isinstance(v, torch.Tensor):
121
- v = v.item()
122
- assert isinstance(v, (float, int))
123
- self.meters[k].update(v)
124
-
125
- def __getattr__(self, attr):
126
- if attr in self.meters:
127
- return self.meters[attr]
128
- if attr in self.__dict__:
129
- return self.__dict__[attr]
130
- raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
131
-
132
- def __str__(self):
133
- loss_str = []
134
- for name, meter in self.meters.items():
135
- loss_str.append(f"{name}: {str(meter)}")
136
- return self.delimiter.join(loss_str)
137
-
138
- def synchronize_between_processes(self):
139
- for meter in self.meters.values():
140
- meter.synchronize_between_processes()
141
-
142
- def add_meter(self, name, meter):
143
- self.meters[name] = meter
144
-
145
- def log_every(self, iterable, print_freq, header=None):
146
- i = 0
147
- if not header:
148
- header = ""
149
- start_time = time.time()
150
- end = time.time()
151
- iter_time = SmoothedValue(fmt="{avg:.4f}")
152
- data_time = SmoothedValue(fmt="{avg:.4f}")
153
- space_fmt = ":" + str(len(str(len(iterable)))) + "d"
154
- if torch.cuda.is_available():
155
- log_msg = self.delimiter.join(
156
- [
157
- header,
158
- "[{0" + space_fmt + "}/{1}]",
159
- "eta: {eta}",
160
- "{meters}",
161
- "time: {time}",
162
- "data: {data}",
163
- "max mem: {memory:.0f}",
164
- ]
165
- )
166
- else:
167
- log_msg = self.delimiter.join(
168
- [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
169
- )
170
- MB = 1024.0 * 1024.0
171
- for obj in iterable:
172
- data_time.update(time.time() - end)
173
- yield obj
174
- iter_time.update(time.time() - end)
175
- if i % print_freq == 0 or i == len(iterable) - 1:
176
- eta_seconds = iter_time.global_avg * (len(iterable) - i)
177
- eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
178
- if torch.cuda.is_available():
179
- print(
180
- log_msg.format(
181
- i,
182
- len(iterable),
183
- eta=eta_string,
184
- meters=str(self),
185
- time=str(iter_time),
186
- data=str(data_time),
187
- memory=torch.cuda.max_memory_allocated() / MB,
188
- )
189
- )
190
- else:
191
- print(
192
- log_msg.format(
193
- i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
194
- )
195
- )
196
- i += 1
197
- end = time.time()
198
- total_time = time.time() - start_time
199
- total_time_str = str(datetime.timedelta(seconds=int(total_time)))
200
- print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)")
201
-
202
-
203
- def collate_fn(batch):
204
- return tuple(zip(*batch))
205
-
206
-
207
- def mkdir(path):
208
- try:
209
- os.makedirs(path, exist_ok=True)
210
- except OSError as e:
211
- if e.errno != errno.EEXIST:
212
- raise
213
-
214
-
215
- def setup_for_distributed(is_master):
216
- """
217
- This function disables printing when not in master process
218
- """
219
- import builtins as __builtin__
220
-
221
- builtin_print = __builtin__.print
222
-
223
- def print(*args, **kwargs):
224
- force = kwargs.pop("force", False)
225
- if is_master or force:
226
- builtin_print(*args, **kwargs)
227
-
228
- __builtin__.print = print
229
-
230
-
231
- def is_dist_avail_and_initialized():
232
- if not dist.is_available():
233
- return False
234
- if not dist.is_initialized():
235
- return False
236
- return True
237
-
238
-
239
- def get_world_size():
240
- if not is_dist_avail_and_initialized():
241
- return 1
242
- return dist.get_world_size()
243
-
244
-
245
- def get_rank():
246
- if not is_dist_avail_and_initialized():
247
- return 0
248
- return dist.get_rank()
249
-
250
-
251
- def is_main_process():
252
- return get_rank() == 0
253
-
254
-
255
- def save_on_master(*args, **kwargs):
256
- if is_main_process():
257
- torch.save(*args, **kwargs)
258
-
259
-
260
- def init_distributed_mode(args):
261
- if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
262
- args.rank = int(os.environ["RANK"])
263
- args.world_size = int(os.environ["WORLD_SIZE"])
264
- args.gpu = int(os.environ["LOCAL_RANK"])
265
- elif "SLURM_PROCID" in os.environ:
266
- args.rank = int(os.environ["SLURM_PROCID"])
267
- args.gpu = args.rank % torch.cuda.device_count()
268
- else:
269
- print("Not using distributed mode")
270
- args.distributed = False
271
- return
272
-
273
- args.distributed = True
274
-
275
- torch.cuda.set_device(args.gpu)
276
- args.dist_backend = "nccl"
277
- print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
278
- torch.distributed.init_process_group(
279
- backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank
280
- )
281
- torch.distributed.barrier()
282
- setup_for_distributed(args.rank == 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/__init__.py DELETED
File without changes
tools/anns/combine_anns.py DELETED
@@ -1,87 +0,0 @@
1
- import os
2
- import os.path as osp
3
- import json
4
- import argparse
5
- import tqdm
6
-
7
-
8
- def parse_args():
9
- parser = argparse.ArgumentParser()
10
- parser.add_argument(
11
- '--motsynth-path', help="Directory path containing the 'annotations' directory with .json files")
12
- parser.add_argument(
13
- '--save-path', help='Root file in which the new annoation files will be stored. If not provided, motsynth-root will be used')
14
- parser.add_argument('--save-dir', default='comb_annotations',
15
- help="name of directory within 'save-path'in which MOTS annotation files will be stored")
16
- parser.add_argument('--subsample', default=20, type=int,
17
- help="Frame subsampling rate. If e.g. 10 is selected, then we will select 1 in 10 frames")
18
- parser.add_argument('--split', default='train',
19
- help="Name of split (i.e. set of sequences being merged) being used. A file named '{args.split}.txt needs to exist in the splits dir")
20
- parser.add_argument(
21
- '--name', help="Name of the split that file that will be generated. If not provided, the split name will be used")
22
-
23
- args = parser.parse_args()
24
-
25
- if args.save_path is None:
26
- args.save_path = args.motsynth_path
27
-
28
- if args.name is None:
29
- args.name = args.split
30
-
31
- assert args.subsample > 0, "Argument '--subsample' needs to be a positive integer. Set it to 1 to use every frame"
32
-
33
- return args
34
-
35
-
36
- def read_split_file(path):
37
- with open(path, 'r') as file:
38
- seq_list = file.read().splitlines()
39
-
40
- return seq_list
41
-
42
-
43
- def main(args):
44
- # Determine which sequences to use
45
- seqs = [seq.zfill(3) for seq in read_split_file(osp.join(
46
- osp.dirname(os.path.abspath(__file__)), 'splits', f'{args.split}.txt'))]
47
- comb_anns = {'images': [], 'annotations': [],
48
- 'categories': None, 'info': {}}
49
-
50
- for seq in tqdm.tqdm(seqs):
51
- ann_path = osp.join(args.motsynth_path, 'annotations', f'{seq}.json')
52
- with open(ann_path) as f:
53
- seq_ann = json.load(f)
54
-
55
- # Subsample images and annotations if needed
56
- if args.subsample > 1:
57
- seq_ann['images'] = [{**img, **seq_ann['info']} for img in seq_ann['images'] if (
58
- (img['frame_n'] - 1) % args.subsample) == 0] # -1 bc in the paper this was 0-based
59
- img_ids = [img['id'] for img in seq_ann['images']]
60
- seq_ann['annotations'] = [
61
- ann for ann in seq_ann['annotations'] if ann['image_id'] in img_ids]
62
-
63
- comb_anns['images'].extend(seq_ann['images'])
64
- comb_anns['annotations'].extend(seq_ann['annotations'])
65
- comb_anns['info'][seq] = seq_ann['info']
66
-
67
- if len(seqs) > 0:
68
- comb_anns['categories'] = seq_ann['categories']
69
- comb_anns['licenses'] = seq_ann['categories']
70
-
71
- # Sanity check:
72
- img_ids = [img['id'] for img in comb_anns['images']]
73
- ann_ids = [ann['id'] for ann in comb_anns['annotations']]
74
- assert len(img_ids) == len(set(img_ids))
75
- assert len(ann_ids) == len(set(ann_ids))
76
-
77
- # Save the new annotations file
78
- comb_anns_dir = osp.join(args.save_path, args.save_dir)
79
- os.makedirs(comb_anns_dir, exist_ok=True)
80
- comb_anns_path = osp.join(comb_anns_dir, f"{args.name}.json")
81
- with open(comb_anns_path, 'w') as json_file:
82
- json.dump(comb_anns, json_file)
83
-
84
-
85
- if __name__ == '__main__':
86
- args = parse_args()
87
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/generate_mot_format_files.py DELETED
@@ -1,73 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
- import os.path as osp
5
- import os
6
- import json
7
-
8
- import tqdm
9
- import argparse
10
-
11
- from generate_mots_format_files import save_seqinfo
12
-
13
- def parse_args():
14
- parser = argparse.ArgumentParser()
15
- parser.add_argument('--motsynth-path', help="Directory path containing the 'annotations' directory with .json files")
16
- parser.add_argument('--save-path', help='Root file in which the new annoation files will be stored. If not provided, motsynth-root will be used')
17
- parser.add_argument('--save-dir', default='mot_annotations', help="name of directory within 'save-path'in which MOTS annotation files will be stored")
18
- args = parser.parse_args()
19
-
20
- if args.save_path is None:
21
- args.save_path = args.motsynth_path
22
-
23
- return args
24
-
25
- def main(args):
26
- ann_dir = osp.join(args.motsynth_path, 'annotations')
27
- mot_ann_dir = osp.join(args.save_path, args.save_dir)
28
- seqs = [f'{seq_num:03}' for seq_num in range(768) if seq_num not in (629, 757, 524, 652)]
29
-
30
- for seq in tqdm.tqdm(seqs):
31
- ann_path = osp.join(ann_dir, f'{seq}.json')
32
- with open(ann_path) as f:
33
- seq_ann = json.load(f)
34
-
35
- rows = []
36
- img_id2frame = {im['id']: im['frame_n'] for im in seq_ann['images']}
37
-
38
- for ann in seq_ann['annotations']:
39
- # We compute the 3D location as the mid point between both feet keypoints in 3D
40
- kps = np.array(ann['keypoints_3d']).reshape(-1, 4)
41
- feet_pos_3d = kps[[-1, -4], :3].mean(axis = 0).round(4)
42
-
43
- row = {'frame': img_id2frame[ann['image_id']],# STARTS AT 0!!!
44
- 'id': ann['ped_id'],
45
- 'bb_left': ann['bbox'][0] + 1, # Make it 1-based??
46
- 'bb_top': ann['bbox'][1] + 1,
47
- 'bb_width': ann['bbox'][2],
48
- 'bb_height': ann['bbox'][3],
49
- 'conf': 1 - ann['iscrowd'],
50
- 'class': 1 if ann['iscrowd'] == 0 else 8, # Class 8 means distractor. It is the one used by Trackeval as 'iscrowd'
51
- # We compute visibility as the proportion of visible keypoints
52
- 'vis': (np.array(ann['keypoints'])[2::3] ==2).mean().round(2),
53
- 'x': feet_pos_3d[0],
54
- 'y': feet_pos_3d[1],
55
- 'z': feet_pos_3d[2]}
56
-
57
- rows.append(row)
58
-
59
- # Save gt.txt file
60
- # Format in https://github.com/dendorferpatrick/MOTChallengeEvalKit/tree/master/MOT
61
- mot_ann = pd.DataFrame(rows, columns = ['frame', 'id', 'bb_left', 'bb_top', 'bb_width', 'bb_height', 'conf','class', 'vis', 'x', 'y', 'z'])
62
- gt_dir = osp.join(mot_ann_dir, seq, 'gt')
63
- os.makedirs(gt_dir, exist_ok=True)
64
- mot_ann.to_csv(osp.join(gt_dir, 'gt.txt'), header=None, index=None, sep=',')
65
-
66
-
67
- # Save seqinfo.ini
68
- seqinfo_path = osp.join(mot_ann_dir, seq, 'seqinfo.ini')
69
- save_seqinfo(seqinfo_path, info = seq_ann['info'])
70
-
71
- if __name__ =='__main__':
72
- args = parse_args()
73
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/generate_mots_format_files.py DELETED
@@ -1,102 +0,0 @@
1
- """
2
- TODOs:
3
- - argparse
4
- - List sequences by number
5
- - Get rid of asserts
6
- """
7
-
8
- import pandas as pd
9
-
10
- import os.path as osp
11
- import os
12
- import json
13
-
14
- import configparser
15
-
16
- import tqdm
17
- import argparse
18
-
19
- def parse_args():
20
- parser = argparse.ArgumentParser()
21
- parser.add_argument('--motsynth-path', help="Directory path containing the 'annotations' directory with .json files")
22
- parser.add_argument('--save-path', help='Root file in which the new annoation files will be stored. If not provided, motsynth-root will be used')
23
- parser.add_argument('--save-dir', default='mots_annotations', help="name of directory within 'save-path'in which MOTS annotation files will be stored")
24
- args = parser.parse_args()
25
-
26
- if args.save_path is None:
27
- args.save_path = args.motsynth_path
28
-
29
- return args
30
-
31
- def save_seqinfo(seqinfo_path, info):
32
- seqinfo = configparser.ConfigParser()
33
- seqinfo.optionxform = str # Otherwise capital letters are ignored in keys
34
-
35
- seqinfo['Sequence'] = dict(name=info['seq_name'],
36
- frameRate=info['fps'],
37
- seqLength=info['sequence_length'],
38
- imWidth= info['img_width'],
39
- imHeight= info['img_height'],
40
- weather=info['weather'],
41
- time=info['time'],
42
- isNight=info['is_night'],
43
- isMoving=info['is_moving'],
44
- FOV=info['cam_fov'],
45
- imExt='.jpg',
46
- fx=1158,
47
- fy=1158,
48
- cx=960,
49
- cy=540)
50
-
51
- with open(seqinfo_path, 'w') as configfile: # save
52
- seqinfo.write(configfile, space_around_delimiters=False)
53
-
54
-
55
- def main(args):
56
- ann_dir = osp.join(args.motsynth_path, 'annotations')
57
- mots_ann_dir = osp.join(args.save_path, args.save_dir)
58
-
59
- seqs = [f'{seq_num:03}' for seq_num in range(768) if seq_num not in (629, 757, 524, 652)]
60
-
61
- for seq in tqdm.tqdm(seqs):
62
- ann_path = osp.join(ann_dir, f'{seq}.json')
63
- with open(ann_path) as f:
64
- seq_ann = json.load(f)
65
-
66
- rows = []
67
- img_id2frame = {im['id']: im['frame_n'] for im in seq_ann['images']}
68
-
69
- for ann in seq_ann['annotations']:
70
- assert ann['category_id'] == 1
71
- if ann['area']: # Include only objects with non-empty masks
72
- if not ann['iscrowd']:
73
- mots_id = 2000 + ann['ped_id']
74
-
75
- else: # ID = 10000 means that the instance should be ignored during eval.
76
- mots_id = 10000
77
-
78
- row = {'time_frame': img_id2frame[ann['image_id']],# STARTS AT 0!!!
79
- 'id': mots_id,
80
- 'class_id': 2,
81
- 'img_height': ann['segmentation']['size'][0],
82
- 'img_width': ann['segmentation']['size'][1],
83
- 'rle': ann['segmentation']['counts']}
84
-
85
- rows.append(row)
86
-
87
- # Save gt.txt file
88
- # Format in https://www.vision.rwth-aachen.de/page/mots
89
- mots_ann = pd.DataFrame(rows, columns = ['time_frame', 'id', 'class_id', 'img_height', 'img_width', 'rle'])
90
- gt_dir = osp.join(mots_ann_dir, seq, 'gt')
91
- os.makedirs(gt_dir, exist_ok=True)
92
- mots_ann.to_csv(osp.join(gt_dir, 'gt.txt'), header=None, index=None, sep=' ')
93
-
94
-
95
- # Save seqinfo.ini
96
- seqinfo_path = osp.join(mots_ann_dir, seq, 'seqinfo.ini')
97
- save_seqinfo(seqinfo_path, info = seq_ann['info'])
98
-
99
-
100
- if __name__ =='__main__':
101
- args = parse_args()
102
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/motcha_to_coco.py DELETED
@@ -1,145 +0,0 @@
1
- import os
2
- import os.path as osp
3
- import numpy as np
4
- import json
5
-
6
- import argparse
7
- import configparser
8
- import datetime
9
-
10
- import tqdm
11
-
12
-
13
- def parse_args():
14
- parser = argparse.ArgumentParser()
15
- parser.add_argument(
16
- '--data-root', help="Path containing the dataset in a folder")
17
- parser.add_argument('--dataset', default='MOT17',
18
- help='Name of the dataset to be used. Should be either MOT17 or MOT20')
19
- parser.add_argument(
20
- '--save-dir', help='Root file in which the new annoation files will be stored. If not provided, data-root will be used')
21
- parser.add_argument('--split', default='train',
22
- help="Split processed within the dataset. Should be either 'train' or 'test'")
23
- parser.add_argument('--save-combined', default=True, action='store_true',
24
- help="Determines whether a separate .json file containing all sequence annotations will be created")
25
- parser.add_argument('--subsample', default=20, type=int,
26
- help="Frame subsampling rate. If e.g. 10 is selected, then we will select 1 in 20 frames")
27
-
28
- args = parser.parse_args()
29
-
30
- if args.save_dir is None:
31
- args.save_dir = osp.join(args.data_root, 'motcha_coco_annotations')
32
-
33
- return args
34
-
35
-
36
- def get_img_id(dataset, seq, fname):
37
- # Dataset num, seq num, frame num
38
- return int(f"{dataset[3:5]}{seq.split('-')[1]}{int(fname.split('.')[0]):06}")
39
-
40
-
41
- def read_seqinfo(path):
42
- cp = configparser.ConfigParser()
43
- cp.read(path)
44
- return {'height': int(cp.get('Sequence', 'imHeight')),
45
- 'width': int(cp.get('Sequence', 'imWidth')),
46
- 'fps': int(cp.get('Sequence', 'frameRate'))}
47
-
48
-
49
- def main(args):
50
- data_path = osp.join(args.data_root, args.dataset, args.split)
51
- seqs = os.listdir(data_path)
52
-
53
- if args.save_combined:
54
- comb_data = {'info': {'dataset': args.dataset,
55
- 'split': args.split,
56
- 'creation_date': datetime.datetime.today().strftime('%Y-%m-%d-%H-%M')},
57
- 'images': [],
58
- 'annotations': [],
59
- 'categories': [{'id': 1, 'name': 'person', 'supercategory': 'person'}]}
60
-
61
- for seq in tqdm.tqdm(seqs):
62
- if args.dataset.lower() == 'mot17':
63
- # Choose an arbitrary set of detections for MOT17, annotations are the same
64
- if not seq.endswith('FRCNN'):
65
- continue
66
-
67
- print(f"Processing sequence {seq} in dataset {args.dataset}")
68
-
69
- seq_path = osp.join(data_path, seq)
70
- seqinfo_path = osp.join(seq_path, 'seqinfo.ini')
71
- gt_path = osp.join(seq_path, 'gt/gt.txt')
72
- im_dir = osp.join(seq_path, 'img1')
73
-
74
- if args.dataset.lower() == 'mot17':
75
- seq_ = '-'.join(seq.split('-')[:-1]) # Get rid of detector string
76
-
77
- else:
78
- seq_ = seq.copy()
79
-
80
- seqinfo = read_seqinfo(seqinfo_path)
81
- data = {'info': {'sequence': seq_,
82
- 'dataset': args.dataset,
83
- 'split': args.split,
84
- 'creation_date': datetime.datetime.today().strftime('%Y-%m-%d-%H-%M'),
85
- **seqinfo},
86
- 'images': [],
87
- 'annotations': [],
88
- 'categories': [{'id': 1, 'name': 'person', 'supercategory': 'person'}]}
89
-
90
- # Load Bounding Box annotations
91
- gt = np.loadtxt(gt_path, dtype=np.float32, delimiter=',')
92
- keep_classes = [1, 2, 7, 8, 12]
93
- mask = np.isin(gt[:, 7], keep_classes)
94
- gt = gt[mask]
95
- anns = [{'ped_id': row[1],
96
- 'frame_n': row[0],
97
- 'category_id': 1,
98
- 'id': f"{get_img_id(args.dataset, seq, f'{int(row[0]):06}.jpg')}{int(row_i):010}",
99
- 'image_id': get_img_id(args.dataset, seq, f'{int(row[0]):06}.jpg'),
100
- # 'bbox': row[2:6].tolist(),
101
- # MOTCha annotations are 1-based
102
- 'bbox': [row[2] - 1, row[3] - 1, row[4], row[5]],
103
- 'area': row[4]*row[5],
104
- 'vis': row[8],
105
- 'iscrowd': 1 - row[6]}
106
- for row_i, row in enumerate(gt.astype(float)) if row[0] % args.subsample == 0]
107
-
108
- # Load Image information
109
- all_img_ids = list(set([aa['image_id'] for aa in anns]))
110
- imgs = [{'file_name': osp.join(args.dataset, args.split, seq, 'img1', fname),
111
- 'height': seqinfo['height'],
112
- 'width': seqinfo['width'],
113
- 'frame_n': int(fname.split('.')[0]),
114
- 'id': get_img_id(args.dataset, seq, fname)}
115
- for fname in os.listdir(im_dir) if get_img_id(args.dataset, seq, fname) in all_img_ids]
116
- assert len(set([im['id'] for im in imgs])) == len(imgs)
117
- data['images'].extend(imgs)
118
- assert len(str(imgs[0]['id'])) == len(str(anns[0]['image_id']))
119
-
120
- data['annotations'].extend(anns)
121
-
122
- os.makedirs(args.save_dir, exist_ok=True)
123
- fname = f"{args.dataset}_{seq_}.json" if args.dataset not in seq_ else f"{seq_}.json"
124
- save_path = osp.join(args.save_dir, fname)
125
- with open(save_path, 'w') as f:
126
- json.dump(data, f)
127
-
128
- print(f"Saved result at {save_path}")
129
-
130
- if args.save_combined:
131
- comb_data['annotations'].extend(anns)
132
- comb_data['images'].extend(imgs)
133
-
134
- if args.save_combined:
135
- save_path = osp.join(
136
- args.save_dir, f"{args.dataset}_{args.split}.json")
137
- with open(save_path, 'w') as f:
138
- json.dump(comb_data, f)
139
-
140
- print(f"Saved combined result at {save_path}")
141
-
142
-
143
- if __name__ == '__main__':
144
- args = parse_args()
145
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/splits/motsynth_split1.txt DELETED
@@ -1,16 +0,0 @@
1
- 000
2
- 008
3
- 016
4
- 024
5
- 032
6
- 040
7
- 048
8
- 056
9
- 064
10
- 072
11
- 080
12
- 088
13
- 096
14
- 104
15
- 112
16
- 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/splits/motsynth_split2.txt DELETED
@@ -1,31 +0,0 @@
1
- 000
2
- 004
3
- 008
4
- 012
5
- 016
6
- 020
7
- 024
8
- 028
9
- 032
10
- 036
11
- 040
12
- 044
13
- 048
14
- 052
15
- 056
16
- 060
17
- 064
18
- 068
19
- 072
20
- 076
21
- 080
22
- 084
23
- 088
24
- 092
25
- 096
26
- 100
27
- 104
28
- 108
29
- 112
30
- 116
31
- 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/splits/motsynth_split3.txt DELETED
@@ -1,62 +0,0 @@
1
- 000
2
- 002
3
- 004
4
- 006
5
- 008
6
- 010
7
- 012
8
- 014
9
- 016
10
- 018
11
- 020
12
- 022
13
- 024
14
- 026
15
- 028
16
- 030
17
- 032
18
- 034
19
- 036
20
- 038
21
- 040
22
- 042
23
- 044
24
- 046
25
- 048
26
- 050
27
- 052
28
- 054
29
- 056
30
- 058
31
- 060
32
- 062
33
- 064
34
- 066
35
- 068
36
- 070
37
- 072
38
- 074
39
- 076
40
- 078
41
- 080
42
- 082
43
- 084
44
- 086
45
- 088
46
- 090
47
- 092
48
- 094
49
- 096
50
- 098
51
- 100
52
- 102
53
- 104
54
- 106
55
- 108
56
- 110
57
- 112
58
- 114
59
- 116
60
- 118
61
- 120
62
- 122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/splits/motsynth_split4.txt DELETED
@@ -1,123 +0,0 @@
1
- 000
2
- 001
3
- 002
4
- 003
5
- 004
6
- 005
7
- 006
8
- 007
9
- 008
10
- 009
11
- 010
12
- 011
13
- 012
14
- 013
15
- 014
16
- 015
17
- 016
18
- 017
19
- 018
20
- 019
21
- 020
22
- 021
23
- 022
24
- 023
25
- 024
26
- 025
27
- 026
28
- 027
29
- 028
30
- 029
31
- 030
32
- 031
33
- 032
34
- 033
35
- 034
36
- 035
37
- 036
38
- 037
39
- 038
40
- 039
41
- 040
42
- 041
43
- 042
44
- 043
45
- 044
46
- 045
47
- 046
48
- 047
49
- 048
50
- 049
51
- 050
52
- 051
53
- 052
54
- 053
55
- 054
56
- 055
57
- 056
58
- 057
59
- 058
60
- 059
61
- 060
62
- 061
63
- 062
64
- 063
65
- 064
66
- 065
67
- 066
68
- 067
69
- 068
70
- 069
71
- 070
72
- 071
73
- 072
74
- 073
75
- 074
76
- 075
77
- 076
78
- 077
79
- 078
80
- 079
81
- 080
82
- 081
83
- 082
84
- 083
85
- 084
86
- 085
87
- 086
88
- 087
89
- 088
90
- 089
91
- 090
92
- 091
93
- 092
94
- 093
95
- 094
96
- 095
97
- 096
98
- 097
99
- 098
100
- 099
101
- 100
102
- 101
103
- 102
104
- 103
105
- 104
106
- 105
107
- 106
108
- 107
109
- 108
110
- 109
111
- 110
112
- 111
113
- 112
114
- 113
115
- 114
116
- 115
117
- 116
118
- 117
119
- 118
120
- 119
121
- 120
122
- 121
123
- 122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/store_reid_imgs.py DELETED
@@ -1,84 +0,0 @@
1
- import os
2
- import os.path as osp
3
- import numpy as np
4
- import json
5
-
6
- import argparse
7
-
8
- import tqdm
9
- from PIL import Image
10
- from collections import defaultdict
11
-
12
- def parse_args():
13
- parser = argparse.ArgumentParser()
14
- parser.add_argument('--ann-path', help=".JSON annotations file path in COCO format")
15
- parser.add_argument('--frames-path', help="Root directory containing images")
16
- parser.add_argument('--save-dir', help='Root file in which the new annoation files will be stored. If not provided, data-root will be used')
17
- parser.add_argument('--start-iter', default=0, type=int)
18
-
19
- #args = parser.parse_args(['--ann-path', '/storage/user/brasoand/MOTSynth/comb_annotations/train_mini.json'])
20
- args = parser.parse_args()
21
-
22
- if args.frames_path is None:
23
- args.frames_path = osp.dirname(osp.dirname(args.ann_path))
24
-
25
- if args.save_dir is None:
26
- #args.save_dir = osp.join(osp.dirname(osp.dirname(args.ann_path)), 'reid_images')
27
- args.save_dir = osp.join(osp.dirname(osp.dirname(args.ann_path)), 'reid')
28
-
29
-
30
- return args
31
-
32
- def crop_box(im, bbox):
33
- x1, y1, w, h = bbox
34
- x2, y2 = x1+ w, y1+ h
35
- return im.crop((x1, y1, x2, y2))
36
-
37
-
38
- def main(args):
39
- os.makedirs(args.save_dir, exist_ok=True)
40
-
41
- # Read annotations
42
- with open(args.ann_path) as f:
43
- anns = json.load(f)
44
-
45
-
46
- # Annotation ids are used as file names to store boxes.
47
- # Therefore they need to be unique
48
- ann_ids = [ann['id'] for ann in anns['annotations']]
49
- assert len(ann_ids) == len(set(ann_ids))
50
- imgid2file = {img['id']: img['file_name'] for img in anns['images']}
51
-
52
- # TODO: This needs to go!!!!!!!
53
- anns['annotations'] = [ann for ann in anns['annotations'] if not osp.exists(osp.join(args.save_dir, f"{ann['id']}.png"))]
54
- len(anns['annotations'])
55
- im2anns = defaultdict(list)
56
- for ann in anns['annotations']:
57
- im2anns[imgid2file[ann['image_id']]].append(ann)
58
-
59
- for img_file, im_anns in tqdm.tqdm(im2anns.items()):
60
- #break
61
- # Read Image
62
- im_path = osp.join(args.frames_path, img_file)
63
- if not osp.exists(im_path):
64
- im_path = osp.join(args.frames_path, img_file.replace('rgb/', ''))
65
-
66
- assert osp.exists(im_path)
67
- im = Image.open(im_path)
68
-
69
- for ann in im_anns:
70
- box_path = osp.join(args.save_dir, f"{ann['id']}.png")
71
-
72
- if osp.exists(box_path):
73
- continue
74
-
75
- #if ann['bbox'][-2] > 2000 or ann['bbox'][-1] > 2000:
76
- # continue
77
-
78
- box_im = crop_box(im, ann['bbox'])
79
- box_im.save(box_path)
80
-
81
-
82
- if __name__ == '__main__':
83
- args = parse_args()
84
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/anns/to_frames.py DELETED
@@ -1,56 +0,0 @@
1
- import argparse
2
- import os
3
- import os.path as osp
4
- import cv2
5
- import glob
6
-
7
- import sys
8
-
9
- import tqdm
10
-
11
-
12
- def main():
13
- parser = argparse.ArgumentParser(description='Get frames from a video')
14
- parser.add_argument(
15
- '--motsynth-root', help='Directory hosting MOTSYnth part directories')
16
- args = parser.parse_args()
17
-
18
- video_paths = glob.glob(
19
- osp.join(args.motsynth_root, 'MOTSynth_[0-9]/[0-9][0-9][0-9].mp4'))
20
-
21
- frames_dir = os.path.join(args.motsynth_root, "frames")
22
- os.makedirs(frames_dir, exist_ok=True)
23
-
24
- print("Start extracting frames...")
25
-
26
- for video_path in tqdm.tqdm(video_paths):
27
- vidcap = cv2.VideoCapture(video_path)
28
-
29
- seq_name = osp.basename(video_path).split(".")[0].zfill(3)
30
- out_dir = os.path.join(frames_dir, seq_name, 'rgb')
31
- os.makedirs(out_dir, exist_ok=True)
32
-
33
- count = 1
34
- success = True
35
-
36
- #print("Unpacking video...")
37
-
38
- while success:
39
- success, image = vidcap.read()
40
- if count < 3:
41
- count += 1
42
- continue
43
- if not success or count == 1803:
44
- break
45
- if count % 200 == 0:
46
- print("Extract frames until: " +
47
- str(count - 3).zfill(4) + ".jpg")
48
- filename = os.path.join(out_dir, str(count - 3).zfill(4) + ".jpg")
49
- cv2.imwrite(filename, image) # save frame as JPEG file
50
- count += 1
51
-
52
- print("Done!")
53
-
54
-
55
- if __name__ == '__main__':
56
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/inference_detector.py DELETED
@@ -1,46 +0,0 @@
1
- import torch
2
- from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn_v2, FastRCNNPredictor
3
- from configs.path_cfg import OUTPUT_DIR
4
- from src.detection.vision.engine import evaluate
5
- from tools.train_detector import create_dataset, create_data_loader, get_transform
6
- from src.detection.graph_utils import add_bbox, show_img
7
- import os.path as osp
8
- import argparse
9
-
10
-
11
- def parse_args(add_help=True):
12
- parser = argparse.ArgumentParser(
13
- description="Detector inference", add_help=add_help)
14
-
15
- # path to model used for inference
16
- parser.add_argument("--model-path", type=str,
17
- help="Path with model checkpoint used for inference")
18
-
19
- args = parser.parse_args()
20
-
21
- if args.model_path is None:
22
- args.model_path = osp.join(
23
- OUTPUT_DIR, "detection_logs", "fasterrcnn_training", "checkpoint.pth")
24
- return args
25
-
26
-
27
- def main(args):
28
- ds_val = create_dataset(
29
- "motsynth_val", get_transform(False, "hflip"), "test")
30
- data_loader_val = create_data_loader(ds_val, "test", 1, 0)
31
-
32
- device = torch.device("cuda")
33
- model = fasterrcnn_resnet50_fpn_v2()
34
- in_features = model.roi_heads.box_predictor.cls_score.in_features
35
- model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
36
- checkpoint = torch.load(
37
- args.model_path, map_location="cpu")
38
- model.load_state_dict(checkpoint["model"])
39
- model.eval()
40
- model.to(device)
41
- show_img(data_loader_val, model, device, 0.8)
42
-
43
-
44
- if __name__ == "__main__":
45
- args = parse_args()
46
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/train_detector.py DELETED
@@ -1,408 +0,0 @@
1
- from typing import List
2
- from configs.path_cfg import MOTSYNTH_ROOT, MOTCHA_ROOT, OUTPUT_DIR
3
- import datetime
4
- import os.path as osp
5
- import os
6
- import time
7
- import coloredlogs
8
- import logging
9
- from torchinfo import summary
10
- import torch
11
- import torch.utils.data
12
- from src.detection.vision.mot_data import MOTObjDetect
13
- from src.detection.model_factory import ModelFactory
14
- from src.detection.graph_utils import save_train_loss_plot
15
- import src.detection.vision.presets as presets
16
- import src.detection.vision.utils as utils
17
- from src.detection.vision.engine import train_one_epoch, evaluate
18
- from src.detection.vision.group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
19
- from src.detection.mot_dataset import get_mot_dataset
20
- import torchvision
21
-
22
- from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn_v2
23
- from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
24
- coloredlogs.install(level='DEBUG')
25
- logger = logging.getLogger(__name__)
26
-
27
-
28
- def get_args_parser(add_help=True):
29
- import argparse
30
-
31
- parser = argparse.ArgumentParser(
32
- description="PyTorch Detection Training", add_help=add_help)
33
-
34
- # Output directory used to save model, plots and summary
35
- parser.add_argument("--output-dir", default='fasterrcnn_training',
36
- type=str, help="Path to save outputs (default: fasterrcnn_training)")
37
-
38
- # Dataset params
39
- parser.add_argument("--train-dataset", default="motsynth_split1",
40
- type=str, help="Dataset name. Please select one of the following: motsynth_split1, motsynth_split2, motsynth_split3, motsynth_split4, MOT17 (default: motsynth_split1)")
41
- parser.add_argument("--val-dataset", default="MOT17",
42
- type=str, help="Dataset name. Please select one of the following: MOT17 (default: MOT17)")
43
-
44
- # Transforms params
45
- parser.add_argument(
46
- "--data-augmentation", default="hflip", type=str, help="Data augmentation policy (default: hflip)"
47
- )
48
-
49
- # Data Loaders params
50
- parser.add_argument(
51
- "-b", "--batch-size", default=3, type=int, help="Images per gpu (default: 3)"
52
- )
53
- parser.add_argument(
54
- "-j", "--workers", default=0, type=int, metavar="N", help="Number of data loading workers (default: 0)"
55
- )
56
- parser.add_argument("--aspect-ratio-group-factor", default=3,
57
- type=int, help="Aspect ration group factor (default:3)")
58
-
59
- # Model param
60
- parser.add_argument(
61
- "--model", default="fasterrcnn_resnet50_fpn", type=str, help="Model name (default: fasterrcnn_resnet50_fpn)")
62
- parser.add_argument(
63
- "--weights", default="DEFAULT", type=str, help="Model weights (default: DEFAULT)"
64
- )
65
- parser.add_argument(
66
- "--backbone", default='resnet50', type=str, help="Type of backbone (default: resnet50)"
67
- )
68
- parser.add_argument(
69
- "--trainable-backbone-layers", default=3, type=int, help="Number of trainable layers of backbone (default: 3)"
70
- )
71
- parser.add_argument(
72
- "--backbone-weights", default="DEFAULT", type=str, help="Backbone weights (default: DEFAULT)"
73
- )
74
-
75
- # Device param
76
- parser.add_argument("--device", default="cuda", type=str,
77
- help="device (default: cuda)")
78
-
79
- # Test mode param
80
- parser.add_argument(
81
- "--test-only",
82
- dest="test_only",
83
- help="Only test the model",
84
- action="store_true",
85
- )
86
- parser.add_argument(
87
- "--model-eval", type=str, help="model path for test only mode"
88
- )
89
-
90
- # Optimizer params
91
- parser.add_argument(
92
- "--lr",
93
- default=0.0025,
94
- type=float,
95
- help="Learning rate (default: 0.0025)",
96
- )
97
- parser.add_argument("--momentum", default=0.9,
98
- type=float, metavar="M", help="Momentum (default: 0.9")
99
- parser.add_argument(
100
- "--wd",
101
- "--weight-decay",
102
- default=1e-4,
103
- type=float,
104
- metavar="W",
105
- help="Weight decay (default: 1e-4)",
106
- dest="weight_decay",
107
- )
108
-
109
- # Lr Scheduler params
110
- parser.add_argument(
111
- "--lr-scheduler", default="multisteplr", type=str, help="Name of lr scheduler (default: multisteplr)"
112
- )
113
- parser.add_argument(
114
- "--lr-steps",
115
- default=[16, 22],
116
- nargs="+",
117
- type=int,
118
- help="Decrease lr every step-size epochs (multisteplr scheduler only)",
119
- )
120
- parser.add_argument(
121
- "--lr-gamma", default=0.1, type=float, help="Decrease lr by a factor of lr-gamma (multisteplr scheduler only)"
122
- )
123
-
124
- # Mixed precision training params
125
- parser.add_argument("--amp", action="store_true",
126
- help="Use torch.cuda.amp for mixed precision training")
127
-
128
- # Resume training params
129
- parser.add_argument("--resume", default="", type=str,
130
- help="path of checkpoint")
131
-
132
- # training param
133
- parser.add_argument("--start_epoch", default=0,
134
- type=int, help="start epoch")
135
- parser.add_argument("--epochs", default=30, type=int,
136
- metavar="N", help="number of total epochs to run")
137
- parser.add_argument("--print-freq", default=20,
138
- type=int, help="print frequency")
139
-
140
- return parser
141
-
142
-
143
- def get_transform(train, data_augmentation):
144
- if train:
145
- return presets.DetectionPresetTrain(data_augmentation)
146
- else:
147
- return presets.DetectionPresetEval()
148
-
149
-
150
- def get_motsynth_dataset(ds_name: str, transforms):
151
- data_path = osp.join(MOTSYNTH_ROOT, 'comb_annotations', f"{ds_name}.json")
152
- dataset = get_mot_dataset(MOTSYNTH_ROOT, data_path, transforms=transforms)
153
- return dataset
154
-
155
-
156
- def get_MOT17_dataset(split: str, split_seqs: List, transforms):
157
- data_path = osp.join(MOTCHA_ROOT, "MOT17", "train")
158
- dataset = MOTObjDetect(
159
- data_path, transforms=transforms, split_seqs=split_seqs)
160
- return dataset
161
-
162
-
163
- def create_dataset(ds_name: str, transforms, split=None):
164
- if (ds_name.startswith("motsynth")):
165
- return get_motsynth_dataset(ds_name, transforms)
166
-
167
- elif (ds_name.startswith("MOT17")):
168
- if split == "train":
169
- split_seqs = ['MOT17-02-FRCNN', 'MOT17-04-FRCNN',
170
- 'MOT17-11-FRCNN', 'MOT17-13-FRCNN']
171
- elif split == "test":
172
- split_seqs = ['MOT17-09-FRCNN', 'MOT17-10-FRCNN', 'MOT17-05-FRCNN']
173
- return get_MOT17_dataset(split, split_seqs, transforms)
174
-
175
- else:
176
- logger.error(
177
- "Please, provide a valid dataset as argument. Select one of the following: motsynth_split1, motsynth_split2, motsynth_split3, motsynth_split4, MOT17.")
178
- raise ValueError(ds_name)
179
-
180
-
181
- def create_data_loader(dataset, split: str, batch_size, workers, aspect_ratio_group_factor=-1):
182
- data_loader = None
183
- if split == "train":
184
- # random sampling on training dataset
185
- train_sampler = torch.utils.data.RandomSampler(dataset)
186
- if aspect_ratio_group_factor >= 0:
187
- group_ids = create_aspect_ratio_groups(
188
- dataset, k=aspect_ratio_group_factor)
189
- train_batch_sampler = GroupedBatchSampler(
190
- train_sampler, group_ids, batch_size)
191
- else:
192
- train_batch_sampler = torch.utils.data.BatchSampler(
193
- train_sampler, batch_size, drop_last=True)
194
- data_loader = torch.utils.data.DataLoader(
195
- dataset, batch_sampler=train_batch_sampler, num_workers=workers, collate_fn=utils.collate_fn
196
- )
197
- elif split == "test":
198
- # sequential sampling on eval dataset
199
- test_sampler = torch.utils.data.SequentialSampler(dataset)
200
- data_loader = torch.utils.data.DataLoader(
201
- dataset, batch_size=1, sampler=test_sampler, num_workers=workers, collate_fn=utils.collate_fn
202
- )
203
- return data_loader
204
-
205
-
206
- def create_optimizer(model, lr, momentum, weight_decay):
207
- params = [p for p in model.parameters() if p.requires_grad]
208
- optimizer = torch.optim.SGD(
209
- params, lr=lr, momentum=momentum, weight_decay=weight_decay)
210
- return optimizer
211
-
212
-
213
- def create_lr_scheduler(optimizer, lr_scheduler_type, lr_steps, lr_gamma, epochs):
214
- if lr_scheduler_type == "multisteplr":
215
- lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
216
- optimizer, milestones=lr_steps, gamma=lr_gamma)
217
- logger.debug(
218
- f"lr_scheduler: {lr_scheduler_type}, milestones: {lr_steps}, gamma: {lr_gamma}")
219
-
220
- elif lr_scheduler_type == "cosineannealinglr":
221
- lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
222
- optimizer, T_max=epochs)
223
- logger.debug(
224
- f"lr_scheduler: {lr_scheduler_type}, T_max: {epochs}")
225
- else:
226
- raise RuntimeError(
227
- f"Invalid lr scheduler '{lr_scheduler_type}'. Only MultiStepLR and CosineAnnealingLR are supported."
228
- )
229
- return lr_scheduler
230
-
231
-
232
- def resume_training(model, optimizer, lr_scheduler, scaler, args):
233
- checkpoint = torch.load(args.resume, map_location="cpu")
234
-
235
- model.load_state_dict(checkpoint["model"])
236
- optimizer.load_state_dict(checkpoint["optimizer"])
237
- lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
238
- args.start_epoch = checkpoint["epoch"] + 1
239
- if args.amp:
240
- scaler.load_state_dict(checkpoint["scaler"])
241
-
242
-
243
- def save_model_checkpoint(model, optimizer, lr_scheduler, epoch, scaler, output_dir, args):
244
- if output_dir:
245
- checkpoint = {
246
- "model": model.state_dict(),
247
- "optimizer": optimizer.state_dict(),
248
- "lr_scheduler": lr_scheduler.state_dict(),
249
- "args": args,
250
- "epoch": epoch,
251
- }
252
- if args.amp:
253
- checkpoint["scaler"] = scaler.state_dict()
254
- utils.save_on_master(checkpoint, os.path.join(
255
- output_dir, f"model_{epoch}.pth"))
256
- utils.save_on_master(checkpoint, os.path.join(
257
- output_dir, "checkpoint.pth"))
258
-
259
-
260
- def save_plots(losses_dict, batch_loss_dict, output_dir):
261
- if not losses_dict:
262
- for name, metric in batch_loss_dict.items():
263
- losses_dict[name] = []
264
-
265
- for name, metric in batch_loss_dict.items():
266
- losses_dict[name].extend(metric)
267
- save_train_loss_plot(losses_dict, output_dir)
268
-
269
-
270
- def save_model_summary(model, output_dir, batch_size):
271
- with open(osp.join(output_dir, "summary.txt"), 'w', encoding="utf-8") as f:
272
- print(summary(model,
273
- # (batch_size, color_channels, height, width)
274
- input_size=(batch_size, 3, 1080, 1920),
275
- verbose=0,
276
- col_names=["input_size", "output_size",
277
- "num_params", "kernel_size", "trainable"],
278
- col_width=20,
279
- row_settings=["var_names"]), file=f)
280
-
281
-
282
- def save_args(output_dir, args):
283
- with open(osp.join(output_dir, "args.txt"), 'w', encoding="utf-8") as f:
284
- print(args, file=f)
285
-
286
-
287
- def save_evaluate_summary(stats, output_dir):
288
- metrics = ["AP", "AP50", "AP75", "APs", "APm", "APl"]
289
- # the standard metrics
290
- results = {
291
- metric: float(stats[idx] *
292
- 100 if stats[idx] >= 0 else "nan")
293
- for idx, metric in enumerate(metrics)
294
- }
295
- with open(osp.join(output_dir, "evaluate.txt"), 'w', encoding="utf-8") as f:
296
- print(results, file=f)
297
-
298
-
299
- def main(args):
300
-
301
- output_dir = None
302
- if args.output_dir:
303
- output_dir = osp.join(
304
- OUTPUT_DIR, 'detection_logs', args.output_dir)
305
- utils.mkdir(output_dir)
306
- output_plots_dir = osp.join(output_dir, "plots")
307
- utils.mkdir(output_plots_dir)
308
-
309
- logger.debug("COMMAND LINE ARGUMENTS")
310
- logger.debug(args)
311
- save_args(output_dir, args)
312
-
313
- device = torch.device(args.device)
314
- logger.debug(f"DEVICE: {device}")
315
-
316
- logger.debug("CREATE DATASETS")
317
- ds_train_name = args.train_dataset
318
- ds_val_name = args.val_dataset
319
- data_augmentation = args.data_augmentation
320
-
321
- dataset_train = create_dataset(
322
- ds_train_name, get_transform(True, data_augmentation), "train")
323
- dataset_test = create_dataset(
324
- ds_val_name, get_transform(False, data_augmentation), "test")
325
-
326
- logger.debug("CREATE DATA LOADERS")
327
- batch_size = args.batch_size
328
- workers = args.workers
329
- aspect_ratio_group_factor = args.aspect_ratio_group_factor
330
- data_loader_train = create_data_loader(
331
- dataset_train, "train", batch_size, workers, aspect_ratio_group_factor)
332
- data_loader_test = create_data_loader(
333
- dataset_test, "test", batch_size, workers)
334
-
335
- if args.test_only:
336
- logger.debug("TEST ONLY")
337
- model = fasterrcnn_resnet50_fpn_v2()
338
- in_features = model.roi_heads.box_predictor.cls_score.in_features
339
- model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
340
- checkpoint = torch.load(args.model_eval, map_location="cuda")
341
- model.load_state_dict(checkpoint["model"])
342
- model.to(device)
343
- coco_evaluator = evaluate(model, data_loader_test,
344
- device=device, iou_types=['bbox'])
345
- save_evaluate_summary(
346
- coco_evaluator.coco_eval['bbox'].stats, output_dir)
347
- return
348
-
349
- logger.debug("CREATE MODEL")
350
- model_name = args.model
351
- weights = args.weights
352
- backbone = args.backbone
353
- backbone_weights = args.backbone_weights
354
- trainable_backbone_layers = args.trainable_backbone_layers
355
- model = ModelFactory.get_model(
356
- model_name, weights, backbone, backbone_weights, trainable_backbone_layers)
357
- save_model_summary(model, output_dir, batch_size)
358
-
359
- logger.debug("CREATE OPTIMIZER")
360
- lr = args.lr
361
- momentum = args.momentum
362
- weight_decay = args.weight_decay
363
- optimizer = create_optimizer(
364
- model, lr, momentum, weight_decay)
365
-
366
- logger.debug("CREATE LR SCHEDULER")
367
- epochs = args.epochs
368
- lr_scheduler_type = args.lr_scheduler.lower()
369
- lr_steps = args.lr_steps
370
- lr_gamma = args.lr_gamma
371
- lr_scheduler = create_lr_scheduler(
372
- optimizer, lr_scheduler_type, lr_steps, lr_gamma, epochs)
373
-
374
- logger.debug("CONFIGURE SCALER FOR amp")
375
- scaler = torch.cuda.amp.GradScaler() if args.amp else None
376
-
377
- if args.resume:
378
- logger.debug("RESUME TRAINING")
379
- resume_training(model, optimizer, lr_scheduler,
380
- scaler, args)
381
-
382
- logger.debug("START TRAINING")
383
- print_freq = args.print_freq
384
- start_epoch = args.start_epoch
385
- losses_dict = {}
386
- start_time = time.time()
387
- for epoch in range(start_epoch, epochs):
388
- _, batch_loss_dict = train_one_epoch(model, optimizer, data_loader_train, device,
389
- epoch, print_freq, scaler)
390
- lr_scheduler.step()
391
- save_plots(losses_dict, batch_loss_dict,
392
- output_dir=output_plots_dir)
393
-
394
- coco_evaluator = evaluate(model, data_loader_test,
395
- device=device, iou_types=['bbox'])
396
- save_evaluate_summary(
397
- coco_evaluator.coco_eval['bbox'].stats, output_dir)
398
-
399
- save_model_checkpoint(
400
- model, optimizer, lr_scheduler, epoch, scaler, output_dir, args)
401
- total_time = time.time() - start_time
402
- total_time_str = str(datetime.timedelta(seconds=int(total_time)))
403
- logger.debug(f"TRAINING TIME: {total_time_str}")
404
-
405
-
406
- if __name__ == "__main__":
407
- args = get_args_parser().parse_args()
408
- main(args)