Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,688 Bytes
28c256d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from typing import Optional
import torch.nn as nn
from mmengine.hooks import Hook
from mmengine.model import is_model_wrapper
from mmengine.runner import Runner
from mmdet.registry import HOOKS
@HOOKS.register_module()
class MeanTeacherHook(Hook):
"""Mean Teacher Hook.
Mean Teacher is an efficient semi-supervised learning method in
`Mean Teacher <https://arxiv.org/abs/1703.01780>`_.
This method requires two models with exactly the same structure,
as the student model and the teacher model, respectively.
The student model updates the parameters through gradient descent,
and the teacher model updates the parameters through
exponential moving average of the student model.
Compared with the student model, the teacher model
is smoother and accumulates more knowledge.
Args:
momentum (float): The momentum used for updating teacher's parameter.
Teacher's parameter are updated with the formula:
`teacher = (1-momentum) * teacher + momentum * student`.
Defaults to 0.001.
interval (int): Update teacher's parameter every interval iteration.
Defaults to 1.
skip_buffers (bool): Whether to skip the model buffers, such as
batchnorm running stats (running_mean, running_var), it does not
perform the ema operation. Default to True.
"""
def __init__(self,
momentum: float = 0.001,
interval: int = 1,
skip_buffer=True) -> None:
assert 0 < momentum < 1
self.momentum = momentum
self.interval = interval
self.skip_buffers = skip_buffer
def before_train(self, runner: Runner) -> None:
"""To check that teacher model and student model exist."""
model = runner.model
if is_model_wrapper(model):
model = model.module
assert hasattr(model, 'teacher')
assert hasattr(model, 'student')
# only do it at initial stage
if runner.iter == 0:
self.momentum_update(model, 1)
def after_train_iter(self,
runner: Runner,
batch_idx: int,
data_batch: Optional[dict] = None,
outputs: Optional[dict] = None) -> None:
"""Update teacher's parameter every self.interval iterations."""
if (runner.iter + 1) % self.interval != 0:
return
model = runner.model
if is_model_wrapper(model):
model = model.module
self.momentum_update(model, self.momentum)
def momentum_update(self, model: nn.Module, momentum: float) -> None:
"""Compute the moving average of the parameters using exponential
moving average."""
if self.skip_buffers:
for (src_name, src_parm), (dst_name, dst_parm) in zip(
model.student.named_parameters(),
model.teacher.named_parameters()):
dst_parm.data.mul_(1 - momentum).add_(
src_parm.data, alpha=momentum)
else:
for (src_parm,
dst_parm) in zip(model.student.state_dict().values(),
model.teacher.state_dict().values()):
# exclude num_tracking
if dst_parm.dtype.is_floating_point:
dst_parm.data.mul_(1 - momentum).add_(
src_parm.data, alpha=momentum)
|