U
    <cK                     @   s   d dl mZ ddlmZmZ ddlmZ dddd	d
dgZG dd deZ	G dd de	Z
G dd	 d	ee	ZG dd de	ZG dd
 d
ee	ZG dd de	ZG dd dee	ZdS )    )Tensor   )_LazyNormBase	_NormBase   )
functionalInstanceNorm1dInstanceNorm2dInstanceNorm3dLazyInstanceNorm1dLazyInstanceNorm2dLazyInstanceNorm3dc                       sj   e Zd Zdeeeeedd fddZdd	 Zd
d Zdd Z	dd Z
 fddZeedddZ  ZS )_InstanceNormh㈵>皙?FN)num_featuresepsmomentumaffinetrack_running_statsreturnc           	         s*   ||d}t t| j|||||f| d S )N)devicedtype)superr   __init__)	selfr   r   r   r   r   r   r   Zfactory_kwargs	__class__ A/tmp/pip-unpacked-wheel-gikjz4vx/torch/nn/modules/instancenorm.pyr   
   s    


    z_InstanceNorm.__init__c                 C   s   t d S NNotImplementedErrorr   inputr   r   r   _check_input_dim   s    z_InstanceNorm._check_input_dimc                 C   s   t d S r    r!   r   r   r   r   _get_no_batch_dim   s    z_InstanceNorm._get_no_batch_dimc                 C   s   |  |ddS )Nr   )_apply_instance_normZ	unsqueezeZsqueezer#   r   r   r   _handle_no_batch_input   s    z$_InstanceNorm._handle_no_batch_inputc              
   C   s.   t || j| j| j| j| jp"| j | j| j	S r    )
FZinstance_normrunning_meanrunning_varZweightZbiasZtrainingr   r   r   r#   r   r   r   r(   !   s          z"_InstanceNorm._apply_instance_normc              	      s   | dd }|d kr| jsg }	dD ]}
||
 }||kr"|	| q"t|	dkr|djddd |	D | jjd |	D ]}|| qzt	t
| ||||||| d S )	Nversion)r+   r,   r   a  Unexpected running stats buffer(s) {names} for {klass} with track_running_stats=False. If state_dict is a checkpoint saved before 0.4.0, this may be expected because {klass} does not track running stats by default since 0.4.0. Please remove these keys from state_dict. If the running stats are actually needed, instead set track_running_stats=True in {klass} to enable them. See the documentation of {klass} for details.z and c                 s   s   | ]}d  |V  qdS )z"{}"N)format).0kr   r   r   	<genexpr>;   s     z6_InstanceNorm._load_from_state_dict.<locals>.<genexpr>)namesklass)getr   appendlenr.   joinr   __name__popr   r   _load_from_state_dict)r   Z
state_dictprefixZlocal_metadatastrictZmissing_keysZunexpected_keysZ
error_msgsr-   Zrunning_stats_keysnamekeyr   r   r   r:   &   s2    
     z#_InstanceNorm._load_from_state_dict)r$   r   c                 C   s.   |  | | |  kr$| |S | |S r    )r%   dimr'   r)   r(   r#   r   r   r   forwardD   s    

z_InstanceNorm.forward)r   r   FFNN)r8   
__module____qualname__intfloatboolr   r%   r'   r)   r(   r:   r   r@   __classcell__r   r   r   r   r   	   s(         r   c                   @   s    e Zd ZdZdd Zdd ZdS )r   a  Applies Instance Normalization over a 2D (unbatched) or 3D (batched) input
    as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size `C` (where `C` is the number of features or channels of the input) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm1d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm1d` is applied
        on each channel of channeled data like multidimensional time series, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm1d` usually don't apply affine
        transform.

    Args:
        num_features: number of features or channels :math:`C` of the input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, L)` or :math:`(C, L)`
        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm1d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm1d(100, affine=True)
        >>> input = torch.randn(20, 100, 40)
        >>> output = m(input)
    c                 C   s   dS Nr   r   r&   r   r   r   r'      s    z InstanceNorm1d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S N)r      z'expected 2D or 3D input (got {}D input)r?   
ValueErrorr.   r#   r   r   r   r%      s    zInstanceNorm1d._check_input_dimNr8   rA   rB   __doc__r'   r%   r   r   r   r   r   M   s   Bc                   @   s$   e Zd ZdZeZdd Zdd ZdS )r   aq  A :class:`torch.nn.InstanceNorm1d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm1d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, L)` or :math:`(C, L)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, L)` or :math:`(C, L)`
        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input)
    c                 C   s   dS rG   r   r&   r   r   r   r'      s    z$LazyInstanceNorm1d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S rH   rJ   r#   r   r   r   r%      s    z#LazyInstanceNorm1d._check_input_dimN)r8   rA   rB   rM   r   cls_to_becomer'   r%   r   r   r   r   r      s   c                   @   s    e Zd ZdZdd Zdd ZdS )r	   a  Applies Instance Normalization over a 4D input (a mini-batch of 2D inputs
    with additional channel dimension) as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size `C` (where `C` is the input size) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm2d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm2d` is applied
        on each channel of channeled data like RGB images, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm2d` usually don't apply affine
        transform.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, H, W)` or :math:`(C, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`
        - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm2d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm2d(100, affine=True)
        >>> input = torch.randn(20, 100, 35, 45)
        >>> output = m(input)
    c                 C   s   dS NrI   r   r&   r   r   r   r'     s    z InstanceNorm2d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S N)rI      z'expected 3D or 4D input (got {}D input)rJ   r#   r   r   r   r%     s    zInstanceNorm2d._check_input_dimNrL   r   r   r   r   r	      s   Cc                   @   s$   e Zd ZdZeZdd Zdd ZdS )r   a  A :class:`torch.nn.InstanceNorm2d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm2d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, H, W)` or :math:`(C, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`
        - Output: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input)
    c                 C   s   dS rO   r   r&   r   r   r   r'   +  s    z$LazyInstanceNorm2d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S rP   rJ   r#   r   r   r   r%   .  s    z#LazyInstanceNorm2d._check_input_dimN)r8   rA   rB   rM   r	   rN   r'   r%   r   r   r   r   r     s   c                   @   s    e Zd ZdZdd Zdd ZdS )r
   a  Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs
    with additional channel dimension) as described in the paper
    `Instance Normalization: The Missing Ingredient for Fast Stylization
    <https://arxiv.org/abs/1607.08022>`__.

    .. math::

        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated per-dimension separately
    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
    of size C (where C is the input size) if :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    By default, this layer uses instance statistics computed from input data in
    both training and evaluation modes.

    If :attr:`track_running_stats` is set to ``True``, during training this
    layer keeps running estimates of its computed mean and variance, which are
    then used for normalization during evaluation. The running estimates are
    kept with a default :attr:`momentum` of 0.1.

    .. note::
        This :attr:`momentum` argument is different from one used in optimizer
        classes and the conventional notion of momentum. Mathematically, the
        update rule for running statistics here is
        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
        new observed value.

    .. note::
        :class:`InstanceNorm3d` and :class:`LayerNorm` are very similar, but
        have some subtle differences. :class:`InstanceNorm3d` is applied
        on each channel of channeled data like 3D models with RGB color, but
        :class:`LayerNorm` is usually applied on entire sample and often in NLP
        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
        transform, while :class:`InstanceNorm3d` usually don't apply affine
        transform.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input)

    Examples::

        >>> # Without Learnable Parameters
        >>> m = nn.InstanceNorm3d(100)
        >>> # With Learnable Parameters
        >>> m = nn.InstanceNorm3d(100, affine=True)
        >>> input = torch.randn(20, 100, 35, 45, 10)
        >>> output = m(input)
    c                 C   s   dS NrQ   r   r&   r   r   r   r'   x  s    z InstanceNorm3d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S N)rQ      z'expected 4D or 5D input (got {}D input)rJ   r#   r   r   r   r%   {  s    zInstanceNorm3d._check_input_dimNrL   r   r   r   r   r
   4  s   Cc                   @   s$   e Zd ZdZeZdd Zdd ZdS )r   a  A :class:`torch.nn.InstanceNorm3d` module with lazy initialization of
    the ``num_features`` argument of the :class:`InstanceNorm3d` that is inferred
    from the ``input.size(1)``.
    The attributes that will be lazily initialized are `weight`, `bias`,
    `running_mean` and `running_var`.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        momentum: the value used for the running_mean and running_var computation. Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters, initialized the same way as done for batch normalization.
            Default: ``False``.
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``False``

    Shape:
        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`
        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input)
    c                 C   s   dS rR   r   r&   r   r   r   r'     s    z$LazyInstanceNorm3d._get_no_batch_dimc                 C   s"   |  dkrtd|  d S rS   rJ   r#   r   r   r   r%     s    z#LazyInstanceNorm3d._check_input_dimN)r8   rA   rB   rM   r
   rN   r'   r%   r   r   r   r   r     s   N)Ztorchr   Z	batchnormr   r    r   r*   __all__r   r   r   r	   r   r
   r   r   r   r   r   <module>   s    DL'M'M