LRScheduler¶

danling.optim.lr_scheduler ¶

LRScheduler ¶

Bases: _LRScheduler

General learning rate scheduler.

PyTorch LRScheduler is hard to extend. This class is a wrapper of PyTorch LRScheduler, which provides a more general interface. You only needs to add a new scaling which calculates a learning rate ratio (range from 0 to 1) with total progress (range from 0 to 1), and everything else will be done automatically.

Moreover, this class has warmup and cooldown built-in. By default, the first 5% and last 20% of training steps will be warmup and cooldown respectively. You can alternate by passing warmup_steps and cooldown_steps, or disable them by setting them to 0.

Parameters:

Name	Type	Description	Default
`optimizer` ¶	`Optimizer`	Wrapped optimizer.	required
`total_steps` ¶	`int`	Total number of trainable steps.	required
`final_lr_ratio` ¶	`Optional[float]`	Final learning rate ratio to initial learning rate. Defaults to 1e-3.	`None`
`final_lr` ¶	`Optional[float]`	Final learning rate.	`None`
`min_lr` ¶	`float`	Minimal learning rate. Defaults to 1e-9.	`1e-09`
`method` ¶	`str`	Scaling method. Defaults to “cosine”.	`'cosine'`
`warmup_steps` ¶	`Optional[int]`	Number of warmup steps. Defaults to `steps // 20`.	`None`
`cooldown_steps` ¶	`Optional[int]`	Number of cooldown steps. Defaults to `steps // 5`.	`None`
`last_epoch` ¶	`int`	The index of last epoch. Defaults to -1.	`-1`
`scaling` ¶	`Optional[str]`	Method to calculate learning rate given ratio, should be one of “percentile” or “numerical”. Defaults to “percentile” if `final_lr_ratio` is set, otherwise “numerical”.	`None`

Examples:

Python Console Session
>>> from danling.optim import LRScheduler
>>> import torch
>>> from torch import optim
>>> optimizer = optim.SGD([{'params': torch.tensor([0])}], lr=1, momentum=0.9)
>>> scheduler = LRScheduler(optimizer, total_steps=5, final_lr_ratio=1e-5, method='linear')
>>> lrs = []
>>> for epoch in range(5):
...     lrs.append(scheduler.get_lr()[0])
...     scheduler.step()
>>> [round(lr, 10) for lr in lrs]
[0.1, 0.01, 0.001, 0.0001, 1e-09]
>>> scheduler = LRScheduler(optimizer, total_steps=5, final_lr_ratio=1e-5, method='cosine')
>>> lrs = []
>>> for epoch in range(5):
...     lrs.append(scheduler.get_lr()[0])
...     scheduler.step()
>>> [round(lr, 10) for lr in lrs]
[0.3330753446, 0.0187302031, 0.000533897, 3.00232e-05, 1e-09]
>>> scheduler = LRScheduler(optimizer, total_steps=5, final_lr_ratio=1e-5, method='linear', scaling='numerical')
>>> lrs = []
>>> for epoch in range(5):
...     lrs.append(scheduler.get_lr()[0])
...     scheduler.step()
>>> [round(lr, 2) for lr in lrs]
[0.8, 0.6, 0.4, 0.2, 0.0]

Source code in danling/optim/lr_scheduler/lr_scheduler.py

Python
class LRScheduler(lr_scheduler._LRScheduler):  # pylint: disable=protected-access
    r"""
    General learning rate scheduler.

    PyTorch LRScheduler is hard to extend.
    This class is a wrapper of PyTorch LRScheduler, which provides a more general interface.
    You only needs to add a new scaling which calculates a learning rate ratio (range from 0 to 1)
    with total progress (range from 0 to 1), and everything else will be done automatically.

    Moreover, this class has warmup and cooldown built-in.
    By default, the first 5% and last 20% of training steps will be warmup and cooldown respectively.
    You can alternate by passing `warmup_steps` and `cooldown_steps`, or disable them by setting them to 0.

    Args:
        optimizer: Wrapped optimizer.
        total_steps: Total number of trainable steps.
        final_lr_ratio: Final learning rate ratio to initial learning rate.
            Defaults to 1e-3.
        final_lr: Final learning rate.
        min_lr: Minimal learning rate.
            Defaults to 1e-9.
        method: Scaling method.
            Defaults to "cosine".
        warmup_steps: Number of warmup steps.
            Defaults to `steps // 20`.
        cooldown_steps: Number of cooldown steps.
            Defaults to `steps // 5`.
        last_epoch: The index of last epoch.
            Defaults to -1.
        scaling: Method to calculate learning rate given ratio, should be one of "percentile" or "numerical".
            Defaults to "percentile" if `final_lr_ratio` is set, otherwise "numerical".

    Examples:
        >>> from danling.optim import LRScheduler
        >>> import torch
        >>> from torch import optim
        >>> optimizer = optim.SGD([{'params': torch.tensor([0])}], lr=1, momentum=0.9)
        >>> scheduler = LRScheduler(optimizer, total_steps=5, final_lr_ratio=1e-5, method='linear')
        >>> lrs = []
        >>> for epoch in range(5):
        ...     lrs.append(scheduler.get_lr()[0])
        ...     scheduler.step()
        >>> [round(lr, 10) for lr in lrs]
        [0.1, 0.01, 0.001, 0.0001, 1e-09]
        >>> scheduler = LRScheduler(optimizer, total_steps=5, final_lr_ratio=1e-5, method='cosine')
        >>> lrs = []
        >>> for epoch in range(5):
        ...     lrs.append(scheduler.get_lr()[0])
        ...     scheduler.step()
        >>> [round(lr, 10) for lr in lrs]
        [0.3330753446, 0.0187302031, 0.000533897, 3.00232e-05, 1e-09]
        >>> scheduler = LRScheduler(optimizer, total_steps=5, final_lr_ratio=1e-5, method='linear', scaling='numerical')
        >>> lrs = []
        >>> for epoch in range(5):
        ...     lrs.append(scheduler.get_lr()[0])
        ...     scheduler.step()
        >>> [round(lr, 2) for lr in lrs]
        [0.8, 0.6, 0.4, 0.2, 0.0]
    """  # noqa: E501

    def __init__(
        self,
        optimizer: Optimizer,
        total_steps: int,
        final_lr_ratio: Optional[float] = None,
        final_lr: Optional[float] = None,
        min_lr: float = 1e-9,
        method: str = "cosine",
        warmup_steps: Optional[int] = None,
        cooldown_steps: Optional[int] = None,
        last_epoch: int = -1,
        scaling: Optional[str] = None,
        step_with_optimizer: bool = True,
    ):
        if total_steps <= 0:
            raise ValueError(f"Total steps must be positive, but got {total_steps}")
        if warmup_steps is None:
            warmup_steps = total_steps // 20
        elif warmup_steps > total_steps:
            raise ValueError(f"Warmup steps must be less than total steps, but got {warmup_steps} > {total_steps}")
        elif warmup_steps < 0:
            raise ValueError(f"Warmup steps must be positive, but got {warmup_steps}")
        if cooldown_steps is None:
            cooldown_steps = total_steps // 5
        elif cooldown_steps > total_steps:
            raise ValueError(f"Cooldown steps must be less than total steps, but got {cooldown_steps} > {total_steps}")
        elif cooldown_steps < 0:
            raise ValueError(f"Cooldown steps must be positive, but got {cooldown_steps}")
        if warmup_steps + cooldown_steps > total_steps:
            raise ValueError(
                "Warmup steps + cooldown steps must be less than total steps, "
                f"but got {warmup_steps} + {cooldown_steps} > {total_steps}"
            )
        if final_lr_ratio is not None:
            if final_lr is not None:
                raise ValueError("Only one of `final_lr_ratio` and `final_lr` should be set, but not both")
            if final_lr_ratio < 0:
                raise ValueError(f"`final_lr_ratio` must be positive, but got {final_lr_ratio}")
            if scaling is None:
                scaling = "percentile"
        if final_lr is not None and final_lr < 0:
            raise ValueError(f"`final_lr` must be positive, but got {final_lr}")
        if min_lr < 0:
            raise ValueError(f"`min_lr` must be positive, but got {min_lr}")
        self.strategies = {
            k: v for k, v in self.__class__.__dict__.items() if callable(v) and (not k.startswith("_") or k in "get_lr")
        }
        if method not in self.strategies:
            raise ValueError(f"Scaling method must be one of {self.strategies.keys()}, but got {method}")

        if final_lr_ratio is None and final_lr is None:
            final_lr_ratio = 1e-3
            if scaling is None:
                scaling = "percentile"
        if final_lr is not None and min_lr > final_lr:
            min_lr = final_lr
        if scaling is None:
            scaling = "numerical"

        self.final_lr_ratio = final_lr_ratio
        self.final_lr = final_lr
        self.total_steps = total_steps
        self.min_lr = min_lr
        self.method = method
        self.scaling = scaling
        self.warmup_steps = warmup_steps
        self.cooldown_steps = cooldown_steps
        self.cooldown_steps_begin = self.total_steps - self.cooldown_steps
        super().__init__(optimizer, last_epoch)
        if step_with_optimizer:
            self.optimizer.register_step_post_hook(self.step_post_hook)

    def step_post_hook(self, optimizer: Optimizer, *args, **kwargs):
        self.step()

    def get_lr(self) -> List[float]:
        step_count = self._step_count
        if step_count > self.total_steps + 1 or step_count < 1:
            warn(
                f"Step count {step_count} is out of range [1, {self.total_steps + 1}]",
                category=RuntimeWarning,
                stacklevel=2,
            )
        return [self._get_lr(lr, step_count) for lr in self.base_lrs]

    def _get_lr(
        self,
        lr: float,
        step_count: Optional[int] = None,
        progress: Optional[float] = None,
        warmup_ratio: Optional[float] = None,
        cooldown_ratio: Optional[float] = None,
        scaling: Optional[str] = None,
    ) -> float:
        scaling = scaling or self.scaling
        step_count = step_count or self._step_count
        progress = progress or min(max(step_count / self.total_steps, 0.0), 1.0)
        final_lr = self.final_lr if self.final_lr is not None else lr * self.final_lr_ratio  # type: ignore[operator]
        ratio = getattr(self, self.method)(progress)
        if scaling == "percentile":
            lr *= pow(final_lr / lr, ratio)
        elif scaling == "numerical":
            lr = (1 - ratio) * (lr - final_lr) + final_lr
        else:
            raise ValueError(f"Method must be one of ['percentile', 'numerical'], but got {scaling}")
        if self.warmup_steps > step_count > 0:
            warmup_ratio = warmup_ratio or step_count / self.warmup_steps
            lr = warmup_ratio * (lr - self.min_lr) + self.min_lr
        elif self.cooldown_steps > 0 and step_count > self.cooldown_steps_begin:
            cooldown_ratio = cooldown_ratio or 1 - (step_count - self.cooldown_steps_begin) / self.cooldown_steps
            lr = cooldown_ratio * (lr - self.min_lr) + self.min_lr
        return max(self.min_lr, lr)

    def linear(self, progress: float) -> float:
        return progress

    def cosine(self, progress: float) -> float:
        return 1 - ((1 + cos(pi * progress)) / 2)

    def constant(self, progress: float) -> float:  # pylint: disable=unused-argument
        return 0.0

    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}({self.method}, scaling={self.scaling}, "
            f"final_lr_ratio={self.final_lr_ratio}, total_steps={self.total_steps}, "
            f"warmup_steps={self.warmup_steps}, cooldown_steps={self.cooldown_steps})"
        )

LRScheduler¶

danling.optim.lr_scheduler ¶

LRScheduler ¶

`optimizer` ¶

`total_steps` ¶

`final_lr_ratio` ¶

`final_lr` ¶

`min_lr` ¶

`method` ¶

`warmup_steps` ¶

`cooldown_steps` ¶

`last_epoch` ¶

`scaling` ¶

LRScheduler¶

danling.optim.lr_scheduler ¶

LRScheduler ¶

optimizer ¶

total_steps ¶

final_lr_ratio ¶

final_lr ¶

min_lr ¶

method ¶

warmup_steps ¶

cooldown_steps ¶

last_epoch ¶

scaling ¶

`optimizer` ¶

`total_steps` ¶

`final_lr_ratio` ¶

`final_lr` ¶

`min_lr` ¶

`method` ¶

`warmup_steps` ¶

`cooldown_steps` ¶

`last_epoch` ¶

`scaling` ¶