Source code for mygrad.nnet.initializers.dirac

from typing import Optional

import numpy as np

from mygrad import Tensor


[docs]def dirac(*shape: int, dtype=np.float32, constant: Optional[bool] = None) -> Tensor:
    """Initialize a :class:`mygrad.Tensor` according to the Dirac initialization procedure described
    by Zagoruyko and Komodakis.

    Parameters
    ----------
    shape : Sequence[int]
        The shape of the output Tensor. Note that ``shape`` must be at least two-dimensional.

    dtype : data-type, optional (default=float32)
        The data type of the output tensor.

    constant : Optional[bool]
        If ``True``, this tensor is treated as a constant, and thus does not
        facilitate back propagation (i.e. ``constant.grad`` will always return
        ``None``).

        Defaults to ``False`` for float-type data.
        Defaults to ``True`` for integer-type data.

        Integer-type tensors must be constant.

    Returns
    -------
    mygrad.Tensor, shape=``shape``
        A Tensor, with values initialized according to the Dirac initialization.

    Extended Description
    --------------------
    Zagoruyko and Komodakis put forward the Dirac initialization in the paper
        "DiracNets: Training Very Deep Neural Networks without Skip Connections"
    https://arxiv.org/abs/1706.00388

    A Tensor I initialized via this should satisfy:
        I ⋆ x = x

    for compatible tensors ``x``, where ``⋆`` indicates convolution. Note that this does not
    guarantee that the convolution will produce ``x``, but it will preserve as many channels of
    the input as possible.
    """
    if len(shape) == 1:
        shape = shape[0]

    if len(shape) < 2:
        raise ValueError("Dirac initialization requires at least two dimensions")

    tensor = np.zeros(shape, dtype=dtype)
    minimum_depth = np.minimum(shape[0], shape[1])  # out dim, in dim
    depths = range(minimum_depth)
    trailing_indices = ([i // 2] * len(depths) for i in tensor.shape[2:])
    # tensor[i, i, k1//2, k2//2, ..., kn//2] for each i in min(shape[0], shape[1]
    # where the k values are the spatial dimensions of `tensor`
    tensor[(depths, depths, *trailing_indices)] = 1
    return Tensor(tensor, constant=constant)