U
    <ºc¥K  ã                   @   s²   d dl mZ ddlmZmZ ddlmZ dddd	d
dgZG dd„ deƒZ	G dd„ de	ƒZ
G dd	„ d	ee	ƒZG dd„ de	ƒZG dd
„ d
ee	ƒZG dd„ de	ƒZG dd„ dee	ƒZdS )é    )ÚTensoré   )Ú_LazyNormBaseÚ	_NormBaseé   )Ú
functionalÚInstanceNorm1dÚInstanceNorm2dÚInstanceNorm3dÚLazyInstanceNorm1dÚLazyInstanceNorm2dÚLazyInstanceNorm3dc                       sj   e Zd Zdeeeeeddœ‡ fdd„Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
‡ fdd„Zeedœdd„Z‡  ZS )Ú_InstanceNormçñhãˆµøä>çš™™™™™¹?FN)Únum_featuresÚepsÚmomentumÚaffineÚtrack_running_statsÚreturnc           	         s*   ||dœ}t t| ƒj|||||f|Ž d S )N)ÚdeviceÚdtype)Úsuperr   Ú__init__)	Úselfr   r   r   r   r   r   r   Zfactory_kwargs©Ú	__class__© úA/tmp/pip-unpacked-wheel-gikjz4vx/torch/nn/modules/instancenorm.pyr   
   s    


    ÿÿz_InstanceNorm.__init__c                 C   s   t ‚d S ©N©ÚNotImplementedError©r   Úinputr   r   r   Ú_check_input_dim   s    z_InstanceNorm._check_input_dimc                 C   s   t ‚d S r    r!   ©r   r   r   r   Ú_get_no_batch_dim   s    z_InstanceNorm._get_no_batch_dimc                 C   s   |   | d¡¡ d¡S )Nr   )Ú_apply_instance_normZ	unsqueezeZsqueezer#   r   r   r   Ú_handle_no_batch_input   s    z$_InstanceNorm._handle_no_batch_inputc              
   C   s.   t  || j| j| j| j| jp"| j | j| j	¡S r    )
ÚFZinstance_normÚrunning_meanÚrunning_varZweightZbiasZtrainingr   r   r   r#   r   r   r   r(   !   s          þz"_InstanceNorm._apply_instance_normc              	      sª   |  dd ¡}|d krŠ| jsŠg }	dD ]}
||
 }||kr"|	 |¡ q"t|	ƒdkrŠ| djd dd„ |	D ƒ¡| jjd¡ |	D ]}| |¡ qzt	t
| ƒ |||||||¡ d S )	NÚversion)r+   r,   r   a¤  Unexpected running stats buffer(s) {names} for {klass} with track_running_stats=False. If state_dict is a checkpoint saved before 0.4.0, this may be expected because {klass} does not track running stats by default since 0.4.0. Please remove these keys from state_dict. If the running stats are actually needed, instead set track_running_stats=True in {klass} to enable them. See the documentation of {klass} for details.z and c                 s   s   | ]}d   |¡V  qdS )z"{}"N)Úformat)Ú.0Úkr   r   r   Ú	<genexpr>;   s     z6_InstanceNorm._load_from_state_dict.<locals>.<genexpr>)ÚnamesÚklass)Úgetr   ÚappendÚlenr.   Újoinr   Ú__name__Úpopr   r   Ú_load_from_state_dict)r   Z
state_dictÚprefixZlocal_metadataÚstrictZmissing_keysZunexpected_keysZ
error_msgsr-   Zrunning_stats_keysÚnameÚkeyr   r   r   r:   &   s2    ÷ÿ
     þz#_InstanceNorm._load_from_state_dict)r$   r   c                 C   s.   |   |¡ | ¡ |  ¡ kr$|  |¡S |  |¡S r    )r%   Údimr'   r)   r(   r#   r   r   r   ÚforwardD   s    

z_InstanceNorm.forward)r   r   FFNN)r8   Ú
__module__Ú__qualname__ÚintÚfloatÚboolr   r%   r'   r)   r(   r:   r   r@   Ú__classcell__r   r   r   r   r   	   s(         ø÷r   c                   @   s    e Zd ZdZdd„ Zdd„ ZdS )r   aÄ  Applies Instance Normalization over a 2D (unbatched) or 3D (batched) input
    as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size `C` (where `C` is the number of features or channels of the input) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm1d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm1d` is applied
        on each channel of channeled data like multidimensional time series, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm1d` usually don't apply affine
        transform.

    Args:
        num_features: number of features or channels :math:`C` of the input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, L)` or :math:`(C, L)`
        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm1d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm1d(100, affine=True)
        >>> input = torch.randn(20, 100, 40)
        >>> output = m(input)
    c                 C   s   dS ©Nr   r   r&   r   r   r   r'      s    z InstanceNorm1d._get_no_batch_dimc                 C   s"   |  ¡ dkrtd |  ¡ ¡ƒ‚d S ©N)r   é   z'expected 2D or 3D input (got {}D input)©r?   Ú
ValueErrorr.   r#   r   r   r   r%   “   s    ÿzInstanceNorm1d._check_input_dimN©r8   rA   rB   Ú__doc__r'   r%   r   r   r   r   r   M   s   Bc                   @   s$   e Zd ZdZeZdd„ Zdd„ ZdS )r   aq  A :class:`torch.nn.InstanceNorm1d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm1d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, L)` or :math:`(C, L)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, L)` or :math:`(C, L)`
        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input)
    c                 C   s   dS rG   r   r&   r   r   r   r'   ·   s    z$LazyInstanceNorm1d._get_no_batch_dimc                 C   s"   |  ¡ dkrtd |  ¡ ¡ƒ‚d S rH   rJ   r#   r   r   r   r%   º   s    ÿz#LazyInstanceNorm1d._check_input_dimN)r8   rA   rB   rM   r   Úcls_to_becomer'   r%   r   r   r   r   r   ™   s   c                   @   s    e Zd ZdZdd„ Zdd„ ZdS )r	   aì  Applies Instance Normalization over a 4D input (a mini-batch of 2D inputs
    with additional channel dimension) as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size `C` (where `C` is the input size) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm2d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm2d` is applied
        on each channel of channeled data like RGB images, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm2d` usually don't apply affine
        transform.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, H, W)` or :math:`(C, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`
        - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm2d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm2d(100, affine=True)
        >>> input = torch.randn(20, 100, 35, 45)
        >>> output = m(input)
    c                 C   s   dS ©NrI   r   r&   r   r   r   r'     s    z InstanceNorm2d._get_no_batch_dimc                 C   s"   |  ¡ dkrtd |  ¡ ¡ƒ‚d S ©N)rI   é   z'expected 3D or 4D input (got {}D input)rJ   r#   r   r   r   r%     s    ÿzInstanceNorm2d._check_input_dimNrL   r   r   r   r   r	   À   s   Cc                   @   s$   e Zd ZdZeZdd„ Zdd„ ZdS )r   aƒ  A :class:`torch.nn.InstanceNorm2d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm2d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, H, W)` or :math:`(C, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`
        - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input)
    c                 C   s   dS rO   r   r&   r   r   r   r'   +  s    z$LazyInstanceNorm2d._get_no_batch_dimc                 C   s"   |  ¡ dkrtd |  ¡ ¡ƒ‚d S rP   rJ   r#   r   r   r   r%   .  s    ÿz#LazyInstanceNorm2d._check_input_dimN)r8   rA   rB   rM   r	   rN   r'   r%   r   r   r   r   r     s   c                   @   s    e Zd ZdZdd„ Zdd„ ZdS )r
   a  Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs
    with additional channel dimension) as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size C (where C is the input size) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm3d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm3d` is applied
        on each channel of channeled data like 3D models with RGB color, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm3d` usually don't apply affine
        transform.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm3d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm3d(100, affine=True)
        >>> input = torch.randn(20, 100, 35, 45, 10)
        >>> output = m(input)
    c                 C   s   dS ©NrQ   r   r&   r   r   r   r'   x  s    z InstanceNorm3d._get_no_batch_dimc                 C   s"   |  ¡ dkrtd |  ¡ ¡ƒ‚d S ©N)rQ   é   z'expected 4D or 5D input (got {}D input)rJ   r#   r   r   r   r%   {  s    ÿzInstanceNorm3d._check_input_dimNrL   r   r   r   r   r
   4  s   Cc                   @   s$   e Zd ZdZeZdd„ Zdd„ ZdS )r   a•  A :class:`torch.nn.InstanceNorm3d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm3d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input)
    c                 C   s   dS rR   r   r&   r   r   r   r'   Ÿ  s    z$LazyInstanceNorm3d._get_no_batch_dimc                 C   s"   |  ¡ dkrtd |  ¡ ¡ƒ‚d S rS   rJ   r#   r   r   r   r%   ¢  s    ÿz#LazyInstanceNorm3d._check_input_dimN)r8   rA   rB   rM   r
   rN   r'   r%   r   r   r   r   r     s   N)Ztorchr   Z	batchnormr   r   Ú r   r*   Ú__all__r   r   r   r	   r   r
   r   r   r   r   r   Ú<module>   s    ÿDL'M'M