U
    JcV                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
 ddlmZmZ dZdZdgZejeeef eeeeef d	d
dZeeef eeddddZG dd dejZdS )    N)AnyDict	GeneratorList)_replace_by_prefix   )FlatParamHandleHandleConfig
flat_param_fpw_moduleFlattenParamsWrapper)module
state_dictprefixargsreturnc                 G   s   t ||t d | |S )z
    _post_state_dict_hook() is called after the state_dict() is executed
    and before returning the state_dict to the users.
    This API post-processes the keys of the state_dict to remove the
    FlattenParamsWrapper internal prefix.
    .)r   
FPW_MODULE)r   r   r   r    r   Q/tmp/pip-unpacked-wheel-gikjz4vx/torch/distributed/fsdp/flatten_params_wrapper.py_post_state_dict_hook   s    
r   )r   r   r   r   c                 G   s~   t | ||t d  |t dt  }t|  D ]D}||r4|dd }|tshtd| t | |||  q4dS )z
    _pre_load_state_dict_hook() is called before the _load_from_state_dict() is
    executed. This API pre-processes the keys of the state_dict to add the
    FlattenParamsWrapper internal prefix.
    r   z4Expected key to contain flat_param, but key name is N)r   r   
FLAT_PARAMlistkeys
startswithsplitAssertionError)r   r   r   Zflat_param_keykZ	last_partr   r   r   _pre_load_state_dict_hook&   s    
r   c                       s   e Zd ZdZejeej ej	e
dd fddZeedddZeedd	d
ZeedddZejedddZeed fddZeedddZeeedddZ  ZS )r   an  
    This is a wrapper for flattening parameters in a ``nn.Module`` 's subtree
    into a single flattened parameter and is based on [1]. This is used for
    :class:`FullyShardedDataParallel` 's recursive wrapping.
    [1] https://github.com/SsnL/PyTorch-Reparam-Module

    Args:
        module (nn.Module): Module to wrap.
        params (List[nn.Parameter]): Parameters in ``module`` 's subtree to
            flatten into a single flattened parameter.
        device (torch.device): The compute and communication device for this
            wrapper's handle.
        config (HandleConfig): A config customizing this wrapper's handle based
            on FSDP's available features.

    Attributes:
        flat_param (Optional[FlatParameter]): The flattened parameter.
            ``flat_param`` is ``None`` either when (1) this wrapper manages no
            parameters or (2) the wrapped module's parameters are unflattened.
        _fpw_module (nn.Module): The wrapped module.
        _flat_param_handle (FlatParamHandle): A handle for the flattened
            parameter; only present if this wrapper manages parameters.
    N)r   paramsdeviceconfigr   c                    s   t    || _d | _| t | t t|dkr:d S t	||||| _
| j
j| _t| t| jkshtt| t| jks|td S )Nr   )super__init__r   r
   Z_register_state_dict_hookr   Z"_register_load_state_dict_pre_hookr   lenr   _flat_param_handlegetattrr   r   r   )selfr   r    r!   r"   	__class__r   r   r$   V   s    



zFlattenParamsWrapper.__init__)r   c                 C   s
   t | dS )z4Returns whether this wrapper manages any parameters.r&   )hasattrr(   r   r   r   
has_paramsn   s    zFlattenParamsWrapper.has_paramsc                 C   s   t | dstd| jS )Nr&   zTAccessing the handle of a `FlattenParamsWrapper` that does not manage any parameters)r+   r   r&   r,   r   r   r   handles   s    zFlattenParamsWrapper.handlec                 C   s   | j S )z&Returns the wrapped module (like DDP).)r   r,   r   r   r   r   {   s    zFlattenParamsWrapper.modulec              	   c   sN   t | dddkrdV  n2| `z | j  dV  W 5 Q R X W 5 | jj| _X dS )a  
        Assumes that the flattened parameter is unsharded. When in the context,
        unflattens the original parameters as ``nn.Parameter`` views into the
        flattened parameter and de-registers the flattened parameter. After the
        context, restores the original parameters as ``Tensor`` views into the
        flattened parameter and re-registers the flattened parameter.
        r
   N)r'   r
   r&   unflatten_as_paramsr,   r   r   r   r/      s    	z(FlattenParamsWrapper.unflatten_as_params)namer   c                    s4   zt  |W S  tk
r.   t| j| Y S X dS )zAForward missing attributes of this wrapper to the wrapped module.N)r#   __getattr__AttributeErrorr'   r   )r(   r0   r)   r   r   r1      s    z FlattenParamsWrapper.__getattr__)keyr   c                 C   s   | j |S )zhForward indexing calls to the wrapped module in case the wrapped
        module is an ``nn.Sequential``.)r   __getitem__)r(   r3   r   r   r   r4      s    z FlattenParamsWrapper.__getitem__)inputskwinputsr   c                 O   s$   | j d k	r| jjdd | j||S )NF)Z	as_params)r
   r&   Z
_unflattenr   )r(   r5   r6   r   r   r   forward   s    
zFlattenParamsWrapper.forward)__name__
__module____qualname____doc__nnModuler   	Parametertorchr!   r	   r$   propertyboolr-   r   r.   r   r   
contextlibcontextmanagerr   r/   strr1   intr4   r7   __classcell__r   r   r)   r   r   =   s$   )rB   typingr   r   r   r   r?   Ztorch.nnr<   Ztorch.distributed.utilsr   r
   r   r	   r   r   __all__r=   rD   r   r   r   r   r   r   r   <module>	   s(    
  

