Spaces:

yisol
/

IDM-VTON

Running on Zero

IDM-VTON / detectron2 /modeling /roi_heads /cascade_rcnn.py

IDM-VTON

update IDM-VTON Demo

938e515 7 months ago

13 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	from typing import List
	import torch
	from torch import nn
	from torch.autograd.function import Function

	from detectron2.config import configurable
	from detectron2.layers import ShapeSpec
	from detectron2.structures import Boxes, Instances, pairwise_iou
	from detectron2.utils.events import get_event_storage

	from ..box_regression import Box2BoxTransform
	from ..matcher import Matcher
	from ..poolers import ROIPooler
	from .box_head import build_box_head
	from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference
	from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads


	class _ScaleGradient(Function):
	@staticmethod
	def forward(ctx, input, scale):
	ctx.scale = scale
	return input

	@staticmethod
	def backward(ctx, grad_output):
	return grad_output * ctx.scale, None


	@ROI_HEADS_REGISTRY.register()
	class CascadeROIHeads(StandardROIHeads):
	"""
	The ROI heads that implement :paper:`Cascade R-CNN`.
	"""

	@configurable
	def __init__(
	self,
	*,
	box_in_features: List[str],
	box_pooler: ROIPooler,
	box_heads: List[nn.Module],
	box_predictors: List[nn.Module],
	proposal_matchers: List[Matcher],
	**kwargs,
	):
	"""
	NOTE: this interface is experimental.

	Args:
	box_pooler (ROIPooler): pooler that extracts region features from given boxes
	box_heads (list[nn.Module]): box head for each cascade stage
	box_predictors (list[nn.Module]): box predictor for each cascade stage
	proposal_matchers (list[Matcher]): matcher with different IoU thresholds to
	match boxes with ground truth for each stage. The first matcher matches
	RPN proposals with ground truth, the other matchers use boxes predicted
	by the previous stage as proposals and match them with ground truth.
	"""
	assert "proposal_matcher" not in kwargs, (
	"CascadeROIHeads takes 'proposal_matchers=' for each stage instead "
	"of one 'proposal_matcher='."
	)
	# The first matcher matches RPN proposals with ground truth, done in the base class
	kwargs["proposal_matcher"] = proposal_matchers[0]
	num_stages = self.num_cascade_stages = len(box_heads)
	box_heads = nn.ModuleList(box_heads)
	box_predictors = nn.ModuleList(box_predictors)
	assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!"
	assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!"
	super().__init__(
	box_in_features=box_in_features,
	box_pooler=box_pooler,
	box_head=box_heads,
	box_predictor=box_predictors,
	**kwargs,
	)
	self.proposal_matchers = proposal_matchers

	@classmethod
	def from_config(cls, cfg, input_shape):
	ret = super().from_config(cfg, input_shape)
	ret.pop("proposal_matcher")
	return ret

	@classmethod
	def _init_box_head(cls, cfg, input_shape):
	# fmt: off
	in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
	pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
	pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
	sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
	pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
	cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS
	cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS
	assert len(cascade_bbox_reg_weights) == len(cascade_ious)
	assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \
	"CascadeROIHeads only support class-agnostic regression now!"
	assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0]
	# fmt: on

	in_channels = [input_shape[f].channels for f in in_features]
	# Check all channel counts are equal
	assert len(set(in_channels)) == 1, in_channels
	in_channels = in_channels[0]

	box_pooler = ROIPooler(
	output_size=pooler_resolution,
	scales=pooler_scales,
	sampling_ratio=sampling_ratio,
	pooler_type=pooler_type,
	)
	pooled_shape = ShapeSpec(
	channels=in_channels, width=pooler_resolution, height=pooler_resolution
	)

	box_heads, box_predictors, proposal_matchers = [], [], []
	for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights):
	box_head = build_box_head(cfg, pooled_shape)
	box_heads.append(box_head)
	box_predictors.append(
	FastRCNNOutputLayers(
	cfg,
	box_head.output_shape,
	box2box_transform=Box2BoxTransform(weights=bbox_reg_weights),
	)
	)
	proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False))
	return {
	"box_in_features": in_features,
	"box_pooler": box_pooler,
	"box_heads": box_heads,
	"box_predictors": box_predictors,
	"proposal_matchers": proposal_matchers,
	}

	def forward(self, images, features, proposals, targets=None):
	del images
	if self.training:
	proposals = self.label_and_sample_proposals(proposals, targets)

	if self.training:
	# Need targets to box head
	losses = self._forward_box(features, proposals, targets)
	losses.update(self._forward_mask(features, proposals))
	losses.update(self._forward_keypoint(features, proposals))
	return proposals, losses
	else:
	pred_instances = self._forward_box(features, proposals)
	pred_instances = self.forward_with_given_boxes(features, pred_instances)
	return pred_instances, {}

	def _forward_box(self, features, proposals, targets=None):
	"""
	Args:
	features, targets: the same as in
	Same as in :meth:`ROIHeads.forward`.
	proposals (list[Instances]): the per-image object proposals with
	their matching ground truth.
	Each has fields "proposal_boxes", and "objectness_logits",
	"gt_classes", "gt_boxes".
	"""
	features = [features[f] for f in self.box_in_features]
	head_outputs = [] # (predictor, predictions, proposals)
	prev_pred_boxes = None
	image_sizes = [x.image_size for x in proposals]
	for k in range(self.num_cascade_stages):
	if k > 0:
	# The output boxes of the previous stage are used to create the input
	# proposals of the next stage.
	proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes)
	if self.training:
	proposals = self._match_and_label_boxes(proposals, k, targets)
	predictions = self._run_stage(features, proposals, k)
	prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals)
	head_outputs.append((self.box_predictor[k], predictions, proposals))

	if self.training:
	losses = {}
	storage = get_event_storage()
	for stage, (predictor, predictions, proposals) in enumerate(head_outputs):
	with storage.name_scope("stage{}".format(stage)):
	stage_losses = predictor.losses(predictions, proposals)
	losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()})
	return losses
	else:
	# Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1)
	scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs]

	# Average the scores across heads
	scores = [
	sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages)
	for scores_per_image in zip(*scores_per_stage)
	]
	# Use the boxes of the last head
	predictor, predictions, proposals = head_outputs[-1]
	boxes = predictor.predict_boxes(predictions, proposals)
	pred_instances, _ = fast_rcnn_inference(
	boxes,
	scores,
	image_sizes,
	predictor.test_score_thresh,
	predictor.test_nms_thresh,
	predictor.test_topk_per_image,
	)
	return pred_instances

	@torch.no_grad()
	def _match_and_label_boxes(self, proposals, stage, targets):
	"""
	Match proposals with groundtruth using the matcher at the given stage.
	Label the proposals as foreground or background based on the match.

	Args:
	proposals (list[Instances]): One Instances for each image, with
	the field "proposal_boxes".
	stage (int): the current stage
	targets (list[Instances]): the ground truth instances

	Returns:
	list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
	"""
	num_fg_samples, num_bg_samples = [], []
	for proposals_per_image, targets_per_image in zip(proposals, targets):
	match_quality_matrix = pairwise_iou(
	targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
	)
	# proposal_labels are 0 or 1
	matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix)
	if len(targets_per_image) > 0:
	gt_classes = targets_per_image.gt_classes[matched_idxs]
	# Label unmatched proposals (0 label from matcher) as background (label=num_classes)
	gt_classes[proposal_labels == 0] = self.num_classes
	gt_boxes = targets_per_image.gt_boxes[matched_idxs]
	else:
	gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
	gt_boxes = Boxes(
	targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4))
	)
	proposals_per_image.gt_classes = gt_classes
	proposals_per_image.gt_boxes = gt_boxes

	num_fg_samples.append((proposal_labels == 1).sum().item())
	num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])

	# Log the number of fg/bg samples in each stage
	storage = get_event_storage()
	storage.put_scalar(
	"stage{}/roi_head/num_fg_samples".format(stage),
	sum(num_fg_samples) / len(num_fg_samples),
	)
	storage.put_scalar(
	"stage{}/roi_head/num_bg_samples".format(stage),
	sum(num_bg_samples) / len(num_bg_samples),
	)
	return proposals

	def _run_stage(self, features, proposals, stage):
	"""
	Args:
	features (list[Tensor]): #lvl input features to ROIHeads
	proposals (list[Instances]): #image Instances, with the field "proposal_boxes"
	stage (int): the current stage

	Returns:
	Same output as `FastRCNNOutputLayers.forward()`.
	"""
	box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
	# The original implementation averages the losses among heads,
	# but scale up the parameter gradients of the heads.
	# This is equivalent to adding the losses among heads,
	# but scale down the gradients on features.
	if self.training:
	box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages)
	box_features = self.box_head[stage](box_features)
	return self.box_predictor[stage](box_features)

	def _create_proposals_from_boxes(self, boxes, image_sizes):
	"""
	Args:
	boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4
	image_sizes (list[tuple]): list of image shapes in (h, w)

	Returns:
	list[Instances]: per-image proposals with the given boxes.
	"""
	# Just like RPN, the proposals should not have gradients
	boxes = [Boxes(b.detach()) for b in boxes]
	proposals = []
	for boxes_per_image, image_size in zip(boxes, image_sizes):
	boxes_per_image.clip(image_size)
	if self.training:
	# do not filter empty boxes at inference time,
	# because the scores from each stage need to be aligned and added later
	boxes_per_image = boxes_per_image[boxes_per_image.nonempty()]
	prop = Instances(image_size)
	prop.proposal_boxes = boxes_per_image
	proposals.append(prop)
	return proposals