U
    Jc)                     @   s   d dl mZmZ d dlmZ d dlmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZmZmZ d dlmZ dZG d	d
 d
eZG dd de
jjZejddfe
jjeee
jjdddZedd fddZdS )    )autoEnum)partial)AnyDictIteratorTupleN)save_on_cpu)_pack_kwargs_replace_by_prefix_unpack_kwargs)
checkpoint_checkpoint_wrapped_modulec                   @   s   e Zd Ze Ze ZdS )CheckpointImplN)__name__
__module____qualname__r   	REENTRANTZNO_REENTRANT r   r   _/tmp/pip-unpacked-wheel-gikjz4vx/torch/distributed/algorithms/_checkpoint/checkpoint_wrapper.pyr      s   r   c                       s   e Zd ZdZejddfejjee	d fddZ
eed fdd	Zeed
ddZdd Zeeeejjf  d fddZeejeeef eeeeef dddZeejeeef eeddddZ  ZS )CheckpointWrapperz
    An nn.Module that wraps another nn.Module with checkpointing. Note that this
    module is not meant to be used directly, but instead it is to be used
    through the ``checkpoint_wrapper`` function.
    FN)modcheckpoint_imploffload_to_cpuc                    s|   t    || _|| _|| _| jr*d | _n2|d krJtt| jtj	kd| _nt|f||| _| 
| j | j| jdd d S )N)Zuse_reentrantT)Zwith_module)super__init__r   r   r   checkpoint_fnr   r   r   r   Z_register_state_dict_hook_post_state_dict_hookZ"_register_load_state_dict_pre_hook_pre_load_state_dict_hook)selfr   r   r   r   checkpoint_fn_argscheckpoint_fn_kwargs	__class__r   r   r      s.    	


 zCheckpointWrapper.__init__)namereturnc                    s4   zt  |W S  tk
r.   t| j| Y S X dS )z-Forward missing attributes to wrapped module.N)r   __getattr__AttributeErrorgetattrr   )r   r$   r"   r   r   r&   ?   s    zCheckpointWrapper.__getattr__)keyr%   c                 C   s   | j |S )z=Forward indexing calls in case the module is a nn.Sequential.)r   __getitem__)r   r)   r   r   r   r*   F   s    zCheckpointWrapper.__getitem__c              
      s   j r2tdd j||W  5 Q R  S Q R X nTjtjkrr|i krrt||\}  fdd}j|f| S jjf||S d S )NT)Z
pin_memoryc                     s   t |  \}}j||S )N)r   r   )inputsZunpacked_argsZunpacked_kwargsZ
kwarg_keysr   r   r   my_functionY   s      z.CheckpointWrapper.forward.<locals>.my_function)r   r	   r   r   r   r   r
   r   )r   argskwargsZ	flat_argsr-   r   r,   r   forwardJ   s$     zCheckpointWrapper.forward)r%   c                 /   s4   t  j||D ] \}}|t dd|fV  qdS )z
        Overrides :meth:`named_parameters()` to intercept parameter names and
        remove all occurrences of _CHECKPOINT_PREFIX.
        . N)r   named_parametersreplace_CHECKPOINT_PREFIX)r   r.   r/   
param_nameparamr"   r   r   r3   p   s    	z"CheckpointWrapper.named_parameters)module
state_dictprefixr.   r%   c                 G   s   t || t d| |S )a  
        _post_state_dict_hook() is called after the state_dict() of this
        FSDP module is executed. For ``checkpoint_wrapper``, it will strip
        checkpoint-wrapped module prefix so that this module can be loaded into
        non-checkpointed modules. It would still be able to be loaded into
        checkpoint-wrapped modules as this class adds the prefix back before
        loading the state_dict.
        r1   r   r5   r8   r9   r:   r.   r   r   r   r   |   s    z'CheckpointWrapper._post_state_dict_hookc                 G   s   t |||t d  dS )a  
        ``_pre_state_dict_hook` is called before ``self._load_from_state_dict()``
        is called. For ``checkpoint_wrapper``, it will add back the module
        prefix so that non-checkpointed modules can be loaded into
        checkpoint_wrapper modules properly.
        r1   Nr;   r<   r   r   r   r      s    z+CheckpointWrapper._pre_load_state_dict_hook)r   r   r   __doc__r   r   torchnnModuleboolr   strr   r&   intr*   r0   r   r   	Parameterr3   staticmethodr   r   r   __classcell__r   r   r"   r   r      s8   '*


r   F)r8   r   r   r%   c                 O   s   t | |||||S )a  
    A convenience wrapper for activation checkpointing. If the module is wrapped
    with this function, all subsequent calls to the module will automatically
    perform checkpointing without the user having to explicitly call ``checkpoint``
    function.
    Usage::
        checkpointed_module = checkpoint_wrapper(module)
        outputs = checkpointed_module(inputs)
    Args:
        module (nn.Module):
            The module to be wrapped
        checkpoint_impl (Optional[CheckpointImpl]):
            The checkpointing implementation to use. Note that this will only
            be passed into the ``torch.utils.checkpoint.checkpoint``
            implementation, and is ignored if a custom ``checkpoint_fn`` is
            specified. Note that for implementations using reentrant checkpoint
            from ``torch.utils.checkpoint``, keyword arguments will only be
            supported if ``checkpoint_impl`` is passed as ``CheckpointImpl.REENTRANT`.
        offload_to_cpu (Optional[bool]):
            Whether to offload activations of this wrapped module to CPU. Note
            that if this is specified, ``checkpoint_impl`` and ``checkpoint_fn``
            arguments will be ignored in favor of the activations being
            offloaded to CPU. Default is ``False``. Wrappers with activation
            offload can be composed with ones that do recomputation-based
            checkpoint to trade off increased compute versus increased CPU
            memory usage and additional H2D transfers.
        checkpoint_fn (Optional[Callable]):
            Functional checkpoint implementation to use. If this is specified,
            it will be used over the default ``torch.utils.checkpoint.checkpoint``
            implementation and the `checkpoint_impl` argument will be ignored.
        *checkpoint_fn_args: (Sequence[Any]): Arguments to pass into `checkpoint_fn`.
        **checkpoint_fn_kwargs: (Dict[str, Any]): Keyword arguments to pass into `checkpoint_fn`.

    Returns:
        (nn.Module):
            Wrapped module
    )r   )r8   r   r   r   r    r!   r   r   r   checkpoint_wrapper   s    .     rG   c                 C   s   dS )NTr   )_r   r   r   <lambda>       rI   c                 C   s0   ddl m}m} || t||d|t t ddS )aK  
    Applies :func:`checkpoint_wrapper` to modules within `model` based on a user-defined
    configuration. For each module within `model`, the `check_fn` is used to decide
    whether `module` should be wrapped with :func:`checkpoint_wrapper` or not.

    Note::
        This function modifies `model` in place and replaces appropriate layers with
        their checkpoint-wrapped modules.
    Note::
        This function will not wrap the overall root module. If this is needed, please directly use
        :class:`CheckpointWrapper`.
    Usage::
        model = nn.Sequential(
            nn.Linear(10, 10), nn.Linear(10, 10), nn.Linear(10, 10)
        )
        check_fn = lambda l: isinstance(l, nn.Linear)
        apply_activation_checkpointing(model, checkpoint_wrapper_fn=checkpoint_wrapper, check_fn=check_fn)
    Args:
        model (nn.Module):
            The model whose submodules should be wrapped with activation checkpointing.
        checkpoint_wrapper_fn (Optional[Callable[nn.Module]])
            A ``Callable`` which will wrap modules
        check_fn (Optional[Callable[nn.Module, nn.Module]])
            A lambda function which will be passed each child submoule of ``model`` and returns
            ``True`` or ``False`` depending on whether the submodule should be wrapped.
    Returns: None (`model` is modified inplace)
    r   )_recursive_wraplambda_auto_wrap_policy)Z	lambda_fnT)r8   Zauto_wrap_policyZwrapper_clsZignored_modulesZignored_paramsZonly_wrap_children)Ztorch.distributed.fsdp.wraprK   rL   r   set)modelZcheckpoint_wrapper_fnZcheck_fnrK   rL   r   r   r   apply_activation_checkpointing   s     
rO   )enumr   r   	functoolsr   typingr   r   r   r   r>   Ztorch.nnr?   Ztorch.autograd.graphr	   Ztorch.distributed.utilsr
   r   r   Ztorch.utils.checkpointr   r5   r   r@   r   r   rA   rG   rO   r   r   r   r   <module>   s.    4 