U
    KcU                     @   s  d dl Z d dlmZ d dl mZ d dlmZmZ ddlmZm	Z	 d dl
Z
ejee ddddZejee edd	d
dZejee ddddZdd Zdd ZejdddZejdddZejdddZd?ejee eedf ddddZejee ddd Zd@ejee eedf dd!d"d#ZdAejee ee d%d&d'Zejd(d)d*Zejd(d+d,ZG d-d. d.ejZG d/d0 d0ejZdBejed1d2d3ZdCejed1d4d5Zd6d7 Z d8d9 Z!dDd;d<Z"dEd=d>Z#dS )F    N)Tensor)ListTuple   )_named_parameters_named_buffers)objnamesreturnc                 C   s<   t |dkrt| |d  ntt| |d |dd  dS )z
    Deletes the attribute specified by the given list of names.
    For example, to delete the attribute obj.conv.weight,
    use _del_nested_attr(obj, ['conv', 'weight'])
    r   r   N)lendelattr_del_nested_attrgetattrr   r	    r   B/tmp/pip-unpacked-wheel-gikjz4vx/functorch/_src/make_functional.pyr      s    r   )r   r	   valuer
   c                 C   s@   t |dkrt| |d | ntt| |d |dd | dS )z
    Set the attribute specified by the given list of names to value.
    For example, to set the attribute obj.conv.weight,
    use _del_nested_attr(obj, ['conv', 'weight'], value)
    r   r   N)r   setattr_set_nested_attrr   )r   r	   r   r   r   r   r      s    r   c                 C   s:   t |dkrt| |d S tt| |d |dd   d S )Nr   r   )r   r   _get_nested_attrr   r   r   r   r   +   s    r   c                   C   s   t dd S )Na$  make_functional(module): we don't yet support models that do parameter tying (also sometimes known as weight sharing). Please try to rewrite your model by replacing all instances of the tied parameter with another and/or comment your support in https://github.com/pytorch/functorch/issues/446)RuntimeErrorr   r   r   r   raise_parameter_tying_error2   s    r   c                 C   s   dd | D } dd |D }t |  }t | }||sBti }|  D ]\}}|g f||< qN| D ],\}}||kst|| d |d qldd | D }|S )a[  
    named_params is a dictionary of tensors: {'A': A, 'B': B}
    tied_named_params is another dictionary of tensors {'A': A, 'B': B, 'B_tied': B}
    with potentially tied (or 'duplicated') tensors

    This function creates a mapping from the names in named_params to the
    names in tied_named_params: {'A': ['A'], 'B': ['B', 'B_tied']}.
    c                 S   s   i | ]\}}||qS r   r   .0kvr   r   r   
<dictcomp>D   s      z$create_names_map.<locals>.<dictcomp>c                 S   s   i | ]\}}||qS r   r   r   r   r   r   r   E   s      r   .c                 S   s   i | ]\}}||qS r   r   )r   keyr   r   r   r   r   Q   s      )setkeysissubsetAssertionErroritemsappendsplitvalues)Znamed_paramsZtied_named_paramsZtensors_dict_keysZtied_tensors_dict_keysZtensor_to_mappingr   Ztensorresultr   r   r   create_names_map;   s    	r(   modc                 C   s   t || dd}t | }t||}i }|D ]@\}}||krR|tj|dd||< || }	t| |d|	 q,t|dkrd\}
}nt| \}
}||
|fS )NF)Zremove_duplicatemeta)devicer   r   )r   r   )tupler(   torchZ
empty_liker   r%   r   zip)r*   Z_named_membersZnamed_memberssubclassZall_named_members	names_mapmemonamepreplacementr	   paramsr   r   r   _extract_membersU   s    


r7   c                 C   s   t | t| jtjS )aZ  
    This function removes all the Parameters from the model and
    return them as a tuple as well as their original attribute names.
    The weights must be re-loaded with `load_weights` before the model
    can be used again.
    Note that this function modifies the model in place and after this
    call, mod.parameters() will be empty.
    )r7   r   Znamed_parametersnn	Parameterr)   r   r   r   extract_weightsi   s    	r:   c                 C   s   t | t| jdd S )Nc                 S   s   | S Nr   )xr   r   r   <lambda>v       z!extract_buffers.<locals>.<lambda>)r7   r   Znamed_buffersr)   r   r   r   extract_buffersu   s    r?   F.)r*   r	   r6   r
   c                 C   sH   t ||D ]8\}}|r t|}t| |d t| |d| q
dS )a	  
    Reload a set of weights so that `mod` can be used again to perform a forward pass.
    Note that the `params` are regular Tensors (that can have history) and so are left
    as Tensors. This means that mod.parameters() will still be empty after this call.
    r   N)r/   r8   r9   r   r%   r   )r*   r	   r6   	as_paramsr3   r4   r   r   r   load_weightsy   s
    
rA   )r*   r1   c           	      C   sd   g }t | |D ]L\\}}}t|D ]6\}}|dkrF|t| | t| | t| || q&q|S )Nr   )r/   r#   	enumerater$   r   r   r   )	r*   r1   elemsr'   _Z
attr_nameselemi	attr_namer   r   r   _swap_state   s    
rH   )r*   r	   buffersr
   c                 C   s*   t ||D ]\}}t| |d| q
d S )Nr   )r/   r   r%   )r*   r	   rI   r@   r3   r4   r   r   r   load_buffers   s    rJ   r   )modelweightsweight_namesc                 C   sP   t |t |kstt| || t |dkrLt |t |ks@tt| || | S )zload_state(model, weights, weight_names, buffers=(), buffer_names=()) -> model

    load_state takes `weights` and `buffers` and assigns them to the model.
    This is the inverse operation of `make_functional_deprecated_v1`.
    r   )r   r"   rA   rJ   )rK   rL   rM   rI   buffer_namesr   r   r   
load_state   s    	rO   )rK   c                    sF   t  }t|dkr tdt\} } fdd}|| fS )a  make_functional_deprecated_v1(model) -> weights, func, weight_names

    Given an nn.Module, make_functional_deprecated_v1 extracts the state (weights)
    and returns a functional version of the model, `func`. This makes
    it so that it is possible use transforms over the parameters of
    `model`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, func, _ = make_functional_deprecated_v1(model)
    func(weights, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, _, func = make_functional_deprecated_v1(model)
    grad_weights = grad(func)(weights, (x,))
    ```

    To put the state back into a model, use `load_state`.
    r   zmake_functional_deprecated_v1(model): `model` has buffers. Please use make_functional_with_buffers_deprecated_v1(model) instead.c                    s   t }t| |  || S r;   )copydeepcopyrA   )rL   datamutable_modelZdescriptorsrK   r   r   fun   s    
z*make_functional_deprecated_v1.<locals>.fun)listrI   r   r   r:   )rK   rI   rL   rD   rU   r   rT   r   make_functional_deprecated_v1   s    rW   c                    s:   t \}}t\} } fdd}||| fS )a`  make_functional_with_buffers_deprecated_v1(model) -> weights, buffers, func, weight_names, buffer_names

    Given an nn.Module, make_functional_with_buffers_deprecated_v1 extracts the state (weights and buffers)
    and returns a functional version of the model, `func`.

    `func` can be invoked as follows:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    ```

    And here is an example of applying the grad transform:
    ```
    x = torch.randn(4, 3)
    model = nn.Linear(3, 3)
    weights, buffers, func, _, _ = make_functional_with_buffers_deprecated_v1(model)
    func(weights, buffers, (x,))
    grad_weights = grad(func)(weights, buffers, (x,))
    ```

    To put the state back into a model, use `load_state`.
    c                    s*   t }t||  t| | || S r;   )rP   rQ   rA   rJ   )rL   rI   rR   rS   Zbuf_descriptorsrK   Zweight_descriptorsr   r   rU      s    
z7make_functional_with_buffers_deprecated_v1.<locals>.fun)r:   r?   )rK   rL   rD   rI   rU   r   rX   r   *make_functional_with_buffers_deprecated_v1   s    rY   c                       s6   e Zd ZdZ fddZed	ddZdd Z  ZS )
FunctionalModuleWithBufferszW
    This is the callable object returned by :func:`make_functional_with_buffers`.
    c                    s:   t t|   || _|| _|| _t|| _| j| d S r;   )	superrZ   __init__stateless_modelparam_namesrN   dictall_names_mapupdate)selfr]   r^   rN   param_names_mapbuffer_names_map	__class__r   r   r\      s    
z$FunctionalModuleWithBuffers.__init__Fc           
      C   sT   t | }t|\}}}t|\}}}|r>|D ]}	|	d q.t|||||||fS NF)rP   rQ   r:   r?   requires_grad_rZ   )
rK   disable_autograd_tracking
model_copyr6   r^   rc   rI   rN   rd   paramr   r   r   _create_from  s    
 z(FunctionalModuleWithBuffers._create_fromc              
   O   sD   t | j| jt|t| }z| j||W S t | j| j| X d S r;   )rH   r]   r`   rV   )rb   r6   rI   argskwargs	old_stater   r   r   forward  s    z#FunctionalModuleWithBuffers.forward)F	__name__
__module____qualname____doc__r\   staticmethodrl   rp   __classcell__r   r   re   r   rZ      s
   
rZ   c                       s6   e Zd ZdZ fddZed	ddZdd Z  ZS )
FunctionalModulezJ
    This is the callable object returned by :func:`make_functional`.
    c                    s$   t t|   || _|| _|| _d S r;   )r[   rx   r\   r]   r^   r1   )rb   r]   r^   r1   re   r   r   r\   %  s    zFunctionalModule.__init__Fc                 C   s@   t | }t|\}}}|r0|D ]}|d q t||||fS rg   )rP   rQ   r:   rh   rx   )rK   ri   rj   r6   r^   r1   rk   r   r   r   rl   +  s    
zFunctionalModule._create_fromc              
   O   s8   t | j| j|}z| j||W S t | j| j| X d S r;   )rH   r]   r1   )rb   r6   rm   rn   ro   r   r   r   rp   5  s    zFunctionalModule.forward)Frq   r   r   re   r   rx      s
   	rx   rK   ri   c                 C   s.   t |  }t|dkr tdtj| |dS )a  make_functional(model, disable_autograd_tracking=False) -> func, params

    Given a ``torch.nn.Module``, :func:`make_functional` extracts the state
    (params) and returns a functional version of the model, ``func``. This
    makes it so that it is possible use transforms over the parameters of
    ``model``.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)
        func(params, x)

    And here is an example of applying the grad transform over the parameters
    of a model.

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params = make_functional(model)

        def compute_loss(params, x, t):
            y = func(params, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, x, t)

    If the model has any buffers, please use :func:`make_functional_with_buffers` instead.

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    r   zdmake_functional(model): `model` has buffers. Please use make_functional_with_buffers(model) instead.ri   )rV   rI   r   r   rx   rl   )rK   ri   rI   r   r   r   make_functional?  s    9r{   c                 C   s   t j| |dS )a  make_functional_with_buffers(model, disable_autograd_tracking=False) -> func, params, buffers

    Given a ``torch.nn.Module``, make_functional_with_buffers extracts the
    state (params and buffers) and returns a functional version of the model
    ``func`` that can be invoked like a function.

    ``func`` can be invoked as follows:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers

        x = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)
        func(params, buffers, x)

    And here is an example of applying the grad transform over the parameters
    of a model:

    .. code-block:: python

        import torch
        import torch.nn as nn
        from functorch import make_functional_with_buffers, grad

        x = torch.randn(4, 3)
        t = torch.randn(4, 3)
        model = nn.Linear(3, 3)
        func, params, buffers = make_functional_with_buffers(model)

        def compute_loss(params, buffers, x, t):
            y = func(params, buffers, x)
            return nn.functional.mse_loss(y, t)

        grad_weights = grad(compute_loss)(params, buffers, x, t)

    Args:
        model (torch.nn.Module): Input model.
        disable_autograd_tracking (bool): Flag to disable gradients tracking for output parameters.
            The returned params are unrelated to the set of params from the original model. If False (default),
            the params will have ``requires_grad=True`` on them (aka they will be trackable with regular
            PyTorch autograd), matching the requires_grad-ness of the params from the original model.
            Otherwise, the returned params will have ``requires_grad=False``. Default, False.
            If you plan on using regular PyTorch autograd (e.g., if you want to call ``.backward()`` or
            ``torch.autograd.grad()``, then set ``disable_autograd_tracking=False``.
            Otherwise, if you're only planning on using functorch's gradient transforms,
            then please set ``disable_autograd_tracking=True`` to avoid unnecessarily tracking
            history with PyTorch autograd.

    rz   )rZ   rl   ry   r   r   r   make_functional_with_buffers  s    6r|   c                 C   s"   t t|  } t dd | D }|S )Nc                 s   s   | ]}t | V  qd S r;   r.   stackdetachr   Zshardsr   r   r   	<genexpr>  s     z"transpose_stack.<locals>.<genexpr>)r-   r/   )Ztuple_of_tuple_of_tensorsresultsr   r   r   transpose_stack  s    r   c                    s   t | dkrtdtdd | D s@tdd | D s@tdt| d  t fdd| D sjtdtd	d
 | D  \}}}t|}t|}|d ||fS )a(  combine_state_for_ensemble(models) -> func, params, buffers

    Prepares a list of torch.nn.Modules for ensembling with :func:`vmap`.

    Given a list of ``M`` ``nn.Modules`` of the same class, stacks all of their
    parameters and buffers together to make ``params`` and ``buffers``.
    Each parameter and buffer in the result will have an additional dimension
    of size ``M``.

    :func:`combine_state_for_ensemble` also returns ``func``, a functional
    version of one of the models in :attr:`models`. One cannot directly run
    ``func(params, buffers, *args, **kwargs)`` directly, you probably want to
    use ``vmap(func, ...)(params, buffers, *args, **kwargs)``

    Here's an example of how to ensemble over a very simple model:

    .. code-block:: python

        num_models = 5
        batch_size = 64
        in_features, out_features = 3, 3
        models = [torch.nn.Linear(in_features, out_features) for i in range(num_models)]
        data = torch.randn(batch_size, 3)

        fmodel, params, buffers = combine_state_for_ensemble(models)
        output = vmap(fmodel, (0, 0, None))(params, buffers, data)

        assert output.shape == (num_models, batch_size, out_features)

    .. warning::
        All of the modules being stacked together must be the same (except for
        the values of their parameters/buffers). For example, they should be in the
        same mode (training vs eval).

        This API is subject to change -- we're investigating better ways to
        create ensembles and would love your feedback how to improve this.
    r   z?combine_state_for_ensemble: Expected at least one model, got 0.c                 s   s   | ]}|j V  qd S r;   Ztrainingr   mr   r   r   r     s     z-combine_state_for_ensemble.<locals>.<genexpr>c                 s   s   | ]}|j  V  qd S r;   r   r   r   r   r   r     s     zTcombine_state_for_ensemble: Expected all models to have the same training/eval mode.c                 3   s   | ]}t | kV  qd S r;   )typer   Z
model0_typr   r   r     s     zHcombine_state_for_ensemble: Expected all models to be of the same class.c                 S   s   g | ]}t |qS r   )r|   r   rK   r   r   r   
<listcomp>  s   z.combine_state_for_ensemble.<locals>.<listcomp>)r   r   allr   r/   r   )modelsfuncsr6   rI   r   r   r   combine_state_for_ensemble  s    &$r   cpuc                    s    fdd}|S )Nc            	         s   t dkrtdt dkr8 }t|S d }|dkrXtd| dt fddt|D }t \}}}tdd |D }tt| }td	d |D }|||fS )
N   ,NYI: ensemble_shape with more than 1 elementr   num_models  should be > 0c                 3   s   | ]}  V  qd S r;   tor   rD   rm   r,   rn   model_classr   r   r     s   z3functional_init.<locals>.wrapped.<locals>.<genexpr>c                 s   s   | ]}t |d  V  qdS )r   N)rW   r   r   r   r   r     s     c                 s   s   | ]}t | V  qd S r;   r}   r   r   r   r   r     s     )r   
ValueErrorr   rW   r-   ranger/   )	rm   rn   rK   
num_modelsr   rD   fnr	   rL   r,   ensemble_shaper   rm   rn   r   wrapped  s     z functional_init.<locals>.wrappedr   r   r   r,   r   r   r   r   functional_init  s    r   c                    s    fdd}|S )Nc                     s   t dkrtdt dkr8 }t|S d }|dkrXtd| dt fddt|D }t \}}}}}ttdd |D  \}	}
tt|	 }	td	d |	D }	tt|
 }
td
d |
D }
|	|
|||fS )Nr   r   r   r   r   c                 3   s   | ]}  V  qd S r;   r   r   r   r   r   r     s   z@functional_init_with_buffers.<locals>.wrapped.<locals>.<genexpr>c                 s   s   | ]}t |d d V  qd S )Nr   )rY   r   r   r   r   r     s   c                 s   s   | ]}t | V  qd S r;   r}   r   r   r   r   r     s     c                 s   s   | ]}t | V  qd S r;   r}   r   r   r   r   r     s     )r   r   r   rW   r-   r   rY   r/   )rm   rn   rK   r   r   rD   r   rM   rN   rL   rI   r   r   r   r   
  s*    
z-functional_init_with_buffers.<locals>.wrappedr   r   r   r   r   functional_init_with_buffers	  s    r   )F)F)r   r   )F)F)r   r   )r   r   )$r.   Ztorch.nnr8   r   typingr   r   Znamed_members_polyfillr   r   rP   Modulestrr   r   r   r   r(   r7   r:   r?   rA   rH   rJ   rO   rW   rY   rZ   rx   boolr{   r|   r   r   r   r   r   r   r   r   <module>   sB   	$$    (%,@96
