Spaces:
Running
on
T4
Running
on
T4
File size: 9,574 Bytes
186701e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 |
# Copyright (c) OpenMMLab. All rights reserved.
"""Hyper-parameter Scheduler Visualization.
This tool aims to help the user to check
the hyper-parameter scheduler of the optimizer(without training),
which support the "learning rate", "momentum", and "weight_decay".
Example:
```shell
python tools/analysis_tools/vis_scheduler.py \
configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py \
--dataset-size 118287 \
--ngpus 8 \
--out-dir ./output
```
Modified from: https://github.com/open-mmlab/mmclassification/blob/1.x/tools/visualizations/vis_scheduler.py # noqa
"""
import argparse
import json
import os.path as osp
import re
from pathlib import Path
from unittest.mock import MagicMock
import matplotlib.pyplot as plt
import rich
import torch.nn as nn
from mmengine.config import Config, DictAction
from mmengine.hooks import Hook
from mmengine.model import BaseModel
from mmengine.registry import init_default_scope
from mmengine.runner import Runner
from mmengine.utils.path import mkdir_or_exist
from mmengine.visualization import Visualizer
from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn
def parse_args():
parser = argparse.ArgumentParser(
description='Visualize a hyper-parameter scheduler')
parser.add_argument('config', help='config file path')
parser.add_argument(
'-p',
'--parameter',
type=str,
default='lr',
choices=['lr', 'momentum', 'wd'],
help='The parameter to visualize its change curve, choose from'
'"lr", "wd" and "momentum". Defaults to "lr".')
parser.add_argument(
'-d',
'--dataset-size',
type=int,
help='The size of the dataset. If specify, `DATASETS.build` will '
'be skipped and use this size as the dataset size.')
parser.add_argument(
'-n',
'--ngpus',
type=int,
default=1,
help='The number of GPUs used in training.')
parser.add_argument(
'-o', '--out-dir', type=Path, help='Path to output file')
parser.add_argument(
'--log-level',
default='WARNING',
help='The log level of the handler and logger. Defaults to '
'WARNING.')
parser.add_argument('--title', type=str, help='title of figure')
parser.add_argument(
'--style', type=str, default='whitegrid', help='style of plt')
parser.add_argument('--not-show', default=False, action='store_true')
parser.add_argument(
'--window-size',
default='12*7',
help='Size of the window to display images, in format of "$W*$H".')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
if args.window_size != '':
assert re.match(r'\d+\*\d+', args.window_size), \
"'window-size' must be in format 'W*H'."
return args
class SimpleModel(BaseModel):
"""simple model that do nothing in train_step."""
def __init__(self):
super().__init__()
self.data_preprocessor = nn.Identity()
self.conv = nn.Conv2d(1, 1, 1)
def forward(self, inputs, data_samples, mode='tensor'):
pass
def train_step(self, data, optim_wrapper):
pass
class ParamRecordHook(Hook):
def __init__(self, by_epoch):
super().__init__()
self.by_epoch = by_epoch
self.lr_list = []
self.momentum_list = []
self.wd_list = []
self.task_id = 0
self.progress = Progress(BarColumn(), MofNCompleteColumn(),
TextColumn('{task.description}'))
def before_train(self, runner):
if self.by_epoch:
total = runner.train_loop.max_epochs
self.task_id = self.progress.add_task(
'epochs', start=True, total=total)
else:
total = runner.train_loop.max_iters
self.task_id = self.progress.add_task(
'iters', start=True, total=total)
self.progress.start()
def after_train_epoch(self, runner):
if self.by_epoch:
self.progress.update(self.task_id, advance=1)
# TODO: Support multiple schedulers
def after_train_iter(self, runner, batch_idx, data_batch, outputs):
if not self.by_epoch:
self.progress.update(self.task_id, advance=1)
self.lr_list.append(runner.optim_wrapper.get_lr()['lr'][0])
self.momentum_list.append(
runner.optim_wrapper.get_momentum()['momentum'][0])
self.wd_list.append(
runner.optim_wrapper.param_groups[0]['weight_decay'])
def after_train(self, runner):
self.progress.stop()
def plot_curve(lr_list, args, param_name, iters_per_epoch, by_epoch=True):
"""Plot learning rate vs iter graph."""
try:
import seaborn as sns
sns.set_style(args.style)
except ImportError:
pass
wind_w, wind_h = args.window_size.split('*')
wind_w, wind_h = int(wind_w), int(wind_h)
plt.figure(figsize=(wind_w, wind_h))
ax: plt.Axes = plt.subplot()
ax.plot(lr_list, linewidth=1)
if by_epoch:
ax.xaxis.tick_top()
ax.set_xlabel('Iters')
ax.xaxis.set_label_position('top')
sec_ax = ax.secondary_xaxis(
'bottom',
functions=(lambda x: x / iters_per_epoch,
lambda y: y * iters_per_epoch))
sec_ax.set_xlabel('Epochs')
else:
plt.xlabel('Iters')
plt.ylabel(param_name)
if args.title is None:
plt.title(f'{osp.basename(args.config)} {param_name} curve')
else:
plt.title(args.title)
def simulate_train(data_loader, cfg, by_epoch):
model = SimpleModel()
param_record_hook = ParamRecordHook(by_epoch=by_epoch)
default_hooks = dict(
param_scheduler=cfg.default_hooks['param_scheduler'],
runtime_info=None,
timer=None,
logger=None,
checkpoint=None,
sampler_seed=None,
param_record=param_record_hook)
runner = Runner(
model=model,
work_dir=cfg.work_dir,
train_dataloader=data_loader,
train_cfg=cfg.train_cfg,
log_level=cfg.log_level,
optim_wrapper=cfg.optim_wrapper,
param_scheduler=cfg.param_scheduler,
default_scope=cfg.default_scope,
default_hooks=default_hooks,
visualizer=MagicMock(spec=Visualizer),
custom_hooks=cfg.get('custom_hooks', None))
runner.train()
param_dict = dict(
lr=param_record_hook.lr_list,
momentum=param_record_hook.momentum_list,
wd=param_record_hook.wd_list)
return param_dict
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
if cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
cfg.log_level = args.log_level
init_default_scope(cfg.get('default_scope', 'mmyolo'))
# init logger
print('Param_scheduler :')
rich.print_json(json.dumps(cfg.param_scheduler))
# prepare data loader
batch_size = cfg.train_dataloader.batch_size * args.ngpus
if 'by_epoch' in cfg.train_cfg:
by_epoch = cfg.train_cfg.get('by_epoch')
elif 'type' in cfg.train_cfg:
by_epoch = cfg.train_cfg.get('type') == 'EpochBasedTrainLoop'
else:
raise ValueError('please set `train_cfg`.')
if args.dataset_size is None and by_epoch:
from mmyolo.registry import DATASETS
dataset_size = len(DATASETS.build(cfg.train_dataloader.dataset))
else:
dataset_size = args.dataset_size or batch_size
class FakeDataloader(list):
dataset = MagicMock(metainfo=None)
data_loader = FakeDataloader(range(dataset_size // batch_size))
dataset_info = (
f'\nDataset infos:'
f'\n - Dataset size: {dataset_size}'
f'\n - Batch size per GPU: {cfg.train_dataloader.batch_size}'
f'\n - Number of GPUs: {args.ngpus}'
f'\n - Total batch size: {batch_size}')
if by_epoch:
dataset_info += f'\n - Iterations per epoch: {len(data_loader)}'
rich.print(dataset_info + '\n')
# simulation training process
param_dict = simulate_train(data_loader, cfg, by_epoch)
param_list = param_dict[args.parameter]
if args.parameter == 'lr':
param_name = 'Learning Rate'
elif args.parameter == 'momentum':
param_name = 'Momentum'
else:
param_name = 'Weight Decay'
plot_curve(param_list, args, param_name, len(data_loader), by_epoch)
if args.out_dir:
# make dir for output
mkdir_or_exist(args.out_dir)
# save the graph
out_file = osp.join(
args.out_dir, f'{osp.basename(args.config)}-{args.parameter}.jpg')
plt.savefig(out_file)
print(f'\nThe {param_name} graph is saved at {out_file}')
if not args.not_show:
plt.show()
if __name__ == '__main__':
main()
|