|
import math
|
|
|
|
|
|
class WarmupLR:
|
|
"""
|
|
Linear Warmup learning rate scheduler. After warmup, learning rate is
|
|
constant.
|
|
|
|
Args:
|
|
optimizer (torch.optim.Optimizer): optimizer
|
|
warmup_steps (int): number of warmup steps
|
|
|
|
"""
|
|
|
|
def __init__(self, optimizer, warmup_steps):
|
|
self.optimizer = optimizer
|
|
self.warmup_steps = warmup_steps
|
|
self.base_lr = None
|
|
|
|
def get_lr(self, lr, step):
|
|
return lr * min(step / max(self.warmup_steps, 1), 1.0)
|
|
|
|
def step(self, step):
|
|
if self.base_lr is None:
|
|
self.base_lr = [
|
|
param_group["lr"] for param_group in self.optimizer.param_groups
|
|
]
|
|
for param_group, base_lr_group in zip(
|
|
self.optimizer.param_groups, self.base_lr
|
|
):
|
|
param_group["lr"] = self.get_lr(base_lr_group, step)
|
|
|
|
def state_dict(self):
|
|
return {
|
|
key: value for key, value in self.__dict__.items() if key != "optimizer"
|
|
}
|
|
|
|
def load_state_dict(self, state_dict):
|
|
self.__dict__.update(state_dict)
|
|
|
|
|
|
class WarmupCosineDecayLR:
|
|
"""
|
|
Linear Warmup learning rate scheduler. After warmup, learning rate is
|
|
constant.
|
|
After warmup, learning rate follows a cosine decay.
|
|
|
|
Args:
|
|
optimizer (torch.optim.Optimizer): optimizer
|
|
warmup_steps (int): number of warmup steps
|
|
total_steps (int): total number of steps
|
|
rate (float): cosine decay rate
|
|
"""
|
|
|
|
def __init__(self, optimizer, warmup_steps, total_steps, rate=1.0):
|
|
self.optimizer = optimizer
|
|
self.warmup_steps = warmup_steps
|
|
self.base_lr = None
|
|
self.total_steps = total_steps
|
|
self.rate = rate
|
|
|
|
def get_lr(self, lr, step):
|
|
if step < self.warmup_steps:
|
|
return lr * min(step / max(self.warmup_steps, 1), 1.0)
|
|
else:
|
|
return (
|
|
0.5
|
|
* lr
|
|
* (
|
|
1
|
|
+ math.cos(
|
|
self.rate
|
|
* math.pi
|
|
* (step - self.warmup_steps)
|
|
/ (self.total_steps - self.warmup_steps)
|
|
)
|
|
)
|
|
)
|
|
|
|
def step(self, step):
|
|
if self.base_lr is None:
|
|
self.base_lr = [
|
|
param_group["lr"] for param_group in self.optimizer.param_groups
|
|
]
|
|
for param_group, base_lr_group in zip(
|
|
self.optimizer.param_groups, self.base_lr
|
|
):
|
|
param_group["lr"] = self.get_lr(base_lr_group, step)
|
|
|
|
def state_dict(self):
|
|
return {
|
|
key: value for key, value in self.__dict__.items() if key != "optimizer"
|
|
}
|
|
|
|
def load_state_dict(self, state_dict):
|
|
self.__dict__.update(state_dict)
|
|
|