U
    Kcl                  	   @   sR  U d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlZd dlm  mZ d dlmZ d dlm  mZ d dlZd dlmZ d dlmZmZ d dlm Z m!Z! d dl"m#Z# d d	l$m%Z% d d
l&m'Z' d dl(m)Z) d dl*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 zd dl3m4Z5 W n e6k
rT   dd Z5Y nX zd dl7m8Z8 W n e6k
r   dd Z8Y nX e9e j:dd dd  e9e j;dd dd  ej<j=Z=edd Z>da?edd d!Z@d"d# ZAd$d% ZBi ZCg aDeeE eFd&< d aGeHeFd'< d(aIeEeFd)< d*d+ ZJeeeE eEeHf d,d-d.ZKeEd,d/d0ZLeLZMedLd1d2ZNd3d4 ZOd5d6 ZPdMd7d8ZQejRG d9d: d:ZSee eSd;d<d=ZTee eSd;d>d?ZUe8ee eSd;d@dAZVG dBdC dCZWejeHeEeXeYgZZde2dddfe
e
ee
 e
ee eeeH  e
dDdEdFZ[ej\ej\dGdHdIZ]ej\ej\dGdJdKZ^e[Z_e]Z`dS )N    N)contextmanagernullcontextwraps)AnyCallableDictListOptionalTuple)Tensor)FakeTensorModeCrossRefFakeMode)immutable_collectionsInterpreter)ShapeEnv)	stateless)make_fx)functionalize)enable_python_dispatcher   )config)_named_buffers_named_parameters)default_partition)disablec                 C   s   | S N xr   r   ?/tmp/pip-unpacked-wheel-gikjz4vx/functorch/_src/aot_autograd.pydisable_torchdynamo   s    r!   )dynamo_timedc                 C   s   | S r   r   r   r   r   r    r"   &   s    r"   c                 C   s   t | d fS r   )listr   r   r   r    <lambda>,       r$   c                 C   s
   t | S r   )r   immutable_listr   cr   r   r    r$   -   r%   c                 C   s   t |  t |  fS r   )r#   valueskeysr   r   r   r    r$   1   r%   c                 C   s   t dd t|| D S )Nc                 S   s   i | ]\}}||qS r   r   ).0keyvaluer   r   r    
<dictcomp>3   s      z<lambda>.<locals>.<dictcomp>)r   immutable_dictzipr'   r   r   r    r$   2   s   c               	   c   s^   t t j } t j r*t t j }z
d V  W 5 t j|  t j rXt j| X d S r   )torchclonerandomZget_rng_stateZcudaZis_availableZset_rng_state)Z	rng_stateZcuda_rng_stater   r   r    preserve_rng_state:   s    


r4   F)rootsc                    sr   dd }dd   fdd}dd }|| D ]@}|j d	g }||| | }|d
 ||| q,d S )Nc                 s   s   | sd S t  }t }| D ] }|d k	r|| || q|r| }|jD ].\}}||ksN|d krhqN|| || qN|V  q<d S r   )setcollectionsdequeaddappendpopleftZnext_functions)r5   seenqnodefnZ_idxr   r   r    
iter_graphL   s     

z7setup_stacktrace_preservation_hooks.<locals>.iter_graphc                    s    fdd}|S )Nc                      s   t   dad S )NF)fx_tracebackset_stack_tracecallback_setr   saved_stack_r   r    callbacka   s    
zKsetup_stacktrace_preservation_hooks.<locals>.get_callback.<locals>.callbackr   )rE   rF   r   rD   r    get_callback`   s    z9setup_stacktrace_preservation_hooks.<locals>.get_callbackc                    s    fdd}|S )Nc                    s0   t s"tjjjj t  da t	 d S )NT)
rC   r1   autogradvariableVariableZ_execution_engineZqueue_callbackrA   format_stackrB   )grad_output)rG   stack_r   r    prehooki   s    
zIsetup_stacktrace_preservation_hooks.<locals>.get_prehook.<locals>.prehookr   )rM   rN   rG   )rM   r    get_prehookh   s    z8setup_stacktrace_preservation_hooks.<locals>.get_prehookc                    s    fdd}|S )Nc                    s   t   d S r   )rA   rB   )Z
grad_inputrL   special_stack_r   r    posthookw   s    zKsetup_stacktrace_preservation_hooks.<locals>.get_posthook.<locals>.posthookr   )rR   rS   r   rQ   r    get_posthookv   s    z9setup_stacktrace_preservation_hooks.<locals>.get_posthookZ
traceback_z<Gradient addition node due to multiple use of tensor around:)metadatagetZregister_prehookcopyr:   register_hook)r5   r@   rP   rT   r>   Zforward_node_stackZspecial_stackr   rO   r    #setup_stacktrace_preservation_hooksK   s    rY   c                    s2   t t t t tt t t t f d fdd}|S )N)primalstangentsreturnc              	      s   |  }g }g }| D ],}t |to&|j}|| |r|| qt|t|ksVtg }g }t||D ],\}	}
t |	trh|	jrh||	 ||
 qhtdd |D  g }|rt	  t
jj|||dd}W 5 Q R X t| | fdd|D fS )Nc                 S   s   g | ]
}|j qS r   )Zgrad_fn)r+   outr   r   r    
<listcomp>   s     zQcreate_joint_forward_backward.<locals>.joint_forward_backward.<locals>.<listcomp>T)Zgrad_outputsZallow_unusedc                    s   g | ]}|rt  nd qS r   )nextr+   iZbackward_out_iterr   r    r^      s    )
isinstancer   requires_gradr:   lenAssertionErrorr0   rY   rA   override_stack_tracer1   rH   Zgraditer)rZ   r[   ZoutsZgrad_primalsZinputs_needs_gradspZis_grad_tensorZneeded_outsZneeded_tangentsr]   ZtangentZbackward_outr?   rb   r    joint_forward_backward   s:    


z=create_joint_forward_backward.<locals>.joint_forward_backward)r	   r   r   )r?   rk   r   rj   r    create_joint_forward_backward   s
     (rl   c                 C   s&   t | trt| S t | tr | S | gS r   )rc   tupler#   r   r   r   r    normalize_as_list   s
    

rn   graph_being_compiled	nth_graphmodel
model_namec                 C   s   | a d S r   )rr   )namer   r   r    set_model_name   s    rt   )r\   c                   C   s   t tttfS r   )r#   ro   rr   rp   r   r   r   r    get_aot_compilation_context   s    ru   c                   C   s   t  ddt dt S )z7
    Returns the name of the graph being compiled.
    _)rr   joinro   rp   r   r   r   r    get_aot_graph_name   s    rx   c                 c   s    | ga d V  |rtd7 ag a d S )Nr   )ro   rp   )Z
graph_nameZincrement_indexr   r   r    track_graph_compiling   s
    ry   c                    s    fdd}d|_ |S )Nc                    s    |  S r   r   argsfr   r    g   s    zmake_boxed_func.<locals>.gT)_boxed_call)r}   r~   r   r|   r    make_boxed_func   s    r   c                    s   t   fdd}|S )Nc                    s    | |}t |} | S r   )r   )fx_gZinpsZout_fcompilerr   r    r}      s    
zmake_boxed_compiler.<locals>.fr   )r   r}   r   r   r    make_boxed_compiler   s    r   c                 C   sL   |st |}t|t stt| dr2t| |}ntd t| | }|S )Nr   a  Your compiler for AOTAutograd is returning a a function that doesn't take boxed arguments. Please wrap it with functorch.compile.make_boxed_func or handle the boxed arguments yourself. See https://github.com/pytorch/pytorch/pull/83137#issuecomment-1211320670 for rationale.)r#   rc   rf   hasattrrn   warningswarn)r}   r{   
steal_argsr]   r   r   r    call_func_with_args   s    
r   c                   @   s:   e Zd ZU dZeed< eed< eed< eeef ed< dS )	AOTConfigz)
    Configuration for AOTDispatcher
    fw_compilerbw_compilerpartition_fndecompositionsN)__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r    r     s
   
r   )	flat_args
aot_configc              	      s^   t | |j| }tjr&td |  td ||| W 5 Q R X t  fdd}|S )Nz"====== Forward (only) graph ======Z	inferencec                    s   t  | }|S r   )r   )r{   fw_outscompiled_fwr   r    new_fn  s    
z!aot_dispatch_base.<locals>.new_fn)	r   r   r   debug_graphsprintprint_readablery   r   r   )flat_fnr   r   	fw_moduler   r   r   r    aot_dispatch_base  s    
r   c              
      s  i }g di t |d}t|D ]N\}}||krPd d|| |< q$d |||< ||< |d7 }q$fddfdd|}t	fd	d
}	| }	tdd
 |	}	t|	ttfrt |	nd||	f}
t	j
r0t||
 

fdd}tt|j|
 

j  
  nt|j|
 
t	jrXtd 
  t r td 
|
\}W 5 Q R X t	jrtd |  td   td ||W 5 Q R X W 5 Q R X G  fdddtjj t j fdd}|S )NFr   Tr   c                    s    s| S dd t | D S )Nc                 S   s   g | ]\}}|r|qS r   r   )r+   tZkeepr   r   r    r^   R  s      zCaot_dispatch_autograd.<locals>.remove_dupe_args.<locals>.<listcomp>)r0   rz   )dropped_argskeep_arg_maskr   r    remove_dupe_argsO  s    z/aot_dispatch_autograd.<locals>.remove_dupe_argsc                    s    s S  fddt D S )Nc                    s   g | ]} |  qS r   r   r`   )add_dupe_mapr{   r   r    r^   W  s     z@aot_dispatch_autograd.<locals>.add_dupe_args.<locals>.<listcomp>)rangerz   )r   r   duped_arg_lenrz   r    add_dupe_argsT  s    z,aot_dispatch_autograd.<locals>.add_dupe_argsc                     s    |  S r   r   rz   )r   r   r   r    r$   [  r%   z'aot_dispatch_autograd.<locals>.<lambda>c                 S   s   t | tr|   S | S r   )rc   r   detach
contiguousr   r   r   r    r$   _  r%   c              
      s8   t jj " t j | |W  5 Q R  S Q R X d S r   )r1   fx	tracebackrg   r   run)rZ   r[   )r   r   r    fake_fnp  s    z&aot_dispatch_autograd.<locals>.fake_fnz====== Joint graph ======Zjointz====== Forward graph ======z====== Backward graph ======forwardc                       sD   e Zd ZZdZZee fddZee fddZ	dS )z/aot_dispatch_autograd.<locals>.CompiledFunctionNc                    s4   t  j|} j}| j||d    t|d| S Nr   )r   r   num_outsZsave_for_backwardrm   )ctxZdeduped_flat_tensor_argsr   r   )CompiledFunctionr   r    r     s     z7aot_dispatch_autograd.<locals>.CompiledFunction.forwardc              	      sn   dd |D }t | jt | } jd krNtdd | _W 5 Q R X |   t j|dd}t|S )Nc                 S   s   g | ]}|  qS r   )r   )r+   r   r   r   r    r^     s     zLaot_dispatch_autograd.<locals>.CompiledFunction.backward.<locals>.<listcomp>backwardT)r   )r#   Zsaved_tensorscompiled_bwry   r   Zmaybe_clear_saved_tensorsr   rm   )r   r   Zcontiguous_argsZall_argsr]   )r   r   	bw_moduler   r    r     s    
   z8aot_dispatch_autograd.<locals>.CompiledFunction.backward)
r   r   r   r   r   r   staticmethodr!   r   r   r   )r   	_num_outsr   r   compiled_fw_funcr   r    r     s   r   c                     s    j |  S r   )applyrz   )r   r   r   r    compiled_function  s    z0aot_dispatch_autograd.<locals>.compiled_function)re   	enumerater:   rl   pytreeZtree_maprc   r#   rm   r   Zuse_functionalizer   r   r   graphZeliminate_dead_codeZ	recompileZdebug_jointr   r   r1   Zno_gradry   r   r   r   rH   ZFunctionr   r   )r   r   r   Z	seen_argsjra   r   Zdeduped_flat_argsrk   r]   Zjoint_inputsr   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r    aot_dispatch_autograd$  sj    



	




   r   c                    sd  |j dkri |_ t|j |_ tjr(dt_tjr4t nt  tjrFt nt }tjrXt	 nt }tjrjt
 ndt  |   |  fdd}||}tdd |D ot }|rtt| ||W  5 Q R  W  5 Q R  W  5 Q R  W  5 Q R  S t| ||W  5 Q R  W  5 Q R  W  5 Q R  W  5 Q R  S W 5 Q R X W 5 Q R X W 5 Q R X W 5 Q R X dS )a8  
    Traces the forward and backward graphs of the attr:`flat_fn` to generate a
    joint graph. The joint graph is an Fx graph with Aten ops. Please refer to
    the tracing mechanism to understand the graph capturing details.

    The joint graph is then passed through attr:`partition_fn` to isolate the
    forward and backward portions, which are then respectively compiled via the
    provided attr:`fw_compiler` and attr:`bw_compiler`.

    The resulting compiled forward and backward graphs are then wrapped up in a
    ``torch.autograd.Function`` object.
    NFc                    s*   t jr" fdd}tt|| S | S d S )Nc                    s    j | dS )N)	shape_env)Zfrom_tensorr   Z	fake_moder   r   r    convert  s    zGcreate_aot_dispatcher_function.<locals>.process_inputs.<locals>.convert)r   use_fake_tensorr   Ztree_map_onlyr   )r   r   r   r   r    process_inputs  s    z6create_aot_dispatcher_function.<locals>.process_inputsc                 S   s   g | ]}t |tr|jqS r   )rc   r   rd   )r+   r   r   r   r    r^     s   
z2create_aot_dispatcher_function.<locals>.<listcomp>)r   aot_autograd_decompositionsr   Zdebug_fake_cross_refr   r   r   r   Zuse_dynamic_shapesr   r   r4   anyr1   Zis_grad_enabledr   r   r   )r   r   r   Z	cross_refZpython_dispatcher_moder   Zfake_flat_tensor_argsZneeds_autogradr   r   r    create_aot_dispatcher_function  s4    
	
4r   c                   @   s(   e Zd ZdZdZdZdd Zdd ZdS )PytreeThunkNc                 C   sb   | j d ks| j |kst|| _ t| j ttfkrJtdd |jD rJd| _t| j t	j
r^d| _d S )Nc                 s   s   | ]}t |tjV  qd S r   )rc   r   LeafSpecr`   r   r   r    	<genexpr>  s    z"PytreeThunk.set.<locals>.<genexpr>T)specrf   typerm   r#   allZchildren_specs	is_simplerc   r   r   is_really_simple)selfr   r   r   r    r6     s    zPytreeThunk.setc                 C   s&   | j r|d S | jr|S t|| jS r   )r   r   r   tree_unflattenr   )r   r   r   r   r    	unflatten  s
    zPytreeThunk.unflatten)r   r   r   r   r   r   r6   r   r   r   r   r    r     s   
r   )r?   r   r   r   r   static_argnumsr\   c                    sL   |dk	rt d|dkr|}t||||d dt fdd}|S )a	  
    Traces the forward and backward graph of :attr:`fn` using torch dispatch
    mechanism, and then compiles the generated forward and backward graphs
    through :attr:`fw_compiler` and :attr:`bw_compiler`.

    :func:`aot_function` traces the forward and backward graph ahead of time,
    and generates a joint forward and backward graph.  :attr:`partition_fn` is
    then used to separate out forward and backward graphs. The partitioner
    function can be used to perform optimizations such as recomputation. One can
    set `decompositions` dictionary to decompose the operators into a sequence
    of core or simpler operators supported by the backend compilers.

    :func:`aot_function` uses a compilation cache, based on input tensor
    properties, to detect when there is a need of recompilation.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fn (Callable): A Python function that takes one ore more arguments. Must
            return one or more Tensors.
        fw_compiler (Callable): A Python function that accepts an Fx graph with
            Aten ops and input args, and returns a Callable that semantically is
            equivalent to the input Fx graph.
        bw_compiler (Optional[Callable]): A Python function that accepts an
            Fx graph with Aten ops and input args, and returns a Callable that
            semantically is equivalent to the input Fx graph.  Default: None
            (when None, it defaults to the :attr:`fw_compiler`)
        partition_fn (Callable): A Python function that takes a joint forward
            and backward graph, and partitions it into separate forward and
            backward graphs.
        decompositions (Dict): A dictionary to define the decomposition of
            larger Aten ops into simpler or core Aten ops.

    Returns:
        Returns a ``Callable`` that retains the eager behavior of the original
        :attr:`fn`, but with forward and backward graph compiled via
        :attr:`fw_compile` and :attr:`bw_compile`.

    A simple example usage of :func:`aot_function` is as follows. This example
    will print the forward and backward graphs of the function ``fn``

        >>> fn = lambda x : x.sin().cos()
        >>> def print_compile_fn(fx_module, args):
        >>>     print(fx_module)
        >>>     return fx_module
        >>> aot_fn = aot_function(fn, print_compile_fn)
        >>> x = torch.randn(4, 5, requires_grad=True)
        >>> aot_fn(x)
    NzTstatic_argnums has been deprecated - manually wrap your function or use torchdynamo.r   r   r   r   c                     sp   t | |f\}}d krVt | |f\}t   fdd}t||}| f\} ||} |S )Nc            	         sx   t | \}} ||}t |\}}|D ]<}d}tD ]}t||r8d} qPq8|s,tdt| dq,| |S )NFTzFound aA   in output, which is not a known type. If this type holds tensors, you need to register a pytree for it. See https://github.com/pytorch/functorch/issues/475 for a brief explanation why. If you don't need to register a pytree, please leave a comment explaining your use case and we'll make this more ergonomic to deal with)r   r   tree_flattenKNOWN_TYPESrc   RuntimeErrorr   r6   )	r   r{   kwargsZtree_outZflat_outr   ra   Zis_known_typer   )r?   out_spectensor_args_specr   r    r   t  s$     


z8aot_function.<locals>.returned_function.<locals>.flat_fn)r   r   r   r   r   )r{   r   r   rv   r   compiled_fnZ	cached_fnr]   r   Z
cached_resr?   )r   r   r    returned_functionh  s    z'aot_function.<locals>.returned_function)r   r   r   )r?   r   r   r   r   hasher_typer   r   r   r   r    aot_function   s    ;2r   )modr\   c                    s<   fdd}t |f||G  fdddtj   S )a  
    Traces the forward and backward graph of :attr:`mod` using torch dispatch
    tracing mechanism. It is wrapper function, that underneath uses
    :func:`aot_function` to perform tracing and compilation.

    :func:`aot_module` lifts the parameters and buffers of ``nn.Module`` as inputs
    to a new callable which is then compiled through :func:`aot_function`.

    .. warning::
        This API is experimental and likely to change.

    Args:
        mod (Callable): A ``nn.Module`` module.
        args : args to be passed to :func:`aot_function`
        kwargs : kwargs to be passed to :func:`aot_function`

    Returns:
        Returns a ``nn.Module`` that retains the eager behavior of the original
        :attr:`mod`, but with forward and backward graph compiled.

    c                    s   | |}t  |||S r   )r   functional_call)Znamed_paramsZnamed_buffersr{   r   Zparams_and_buffers)r   r   r    r     s    z#aot_module.<locals>.functional_callc                       s.   e Zd Z fddZfddZ  ZS )zaot_module.<locals>.AOTModulec                    s   t  |   | _d S r   )super__init__Zorig_module)r   )	AOTModule	__class__r   r   r    r     s    z&aot_module.<locals>.AOTModule.__init__c                    s*    t tddt tddf||S )NFZremove_duplicate)dictr   r   )r   r{   r   )
compiled_fr   r   r    r     s    z%aot_module.<locals>.AOTModule.forward)r   r   r   r   r   __classcell__r   r   r   r   )r   r    r     s   r   )r   nnModule)r   r{   r   r   r   r   r    
aot_module  s    r   c                    s   t tddt tdd}t|\ttfdd}dtdddfttt	t tt	t
 tddd}||f|| |r fd	d
}n fdd
}j|_j|_|S )a  
    This is the simplified or low overhead version of aot_module. For frontends
    like TorchDynamo, the input functions/modules to AOT are static and have
    unpacked inputs/outputs. This gives us an opportunity to remove the
        (1) pytree overhead to parse inputs/outputs,
        (2) AOT Autograd cache,
        (3) Reading of params/buffers in every forward call

    :func:`aot_module_simplified` removes these overheads.
    Fr   c                     s   t  t| d   t tjjrt	 X t
 D t
dd tjjdd t j| d  |}W 5 Q R X W 5 Q R X W 5 Q R X n | d  |}W 5 Q R X t|ttfstd|S )Nignorez#Anomaly Detection has been enabled.F)Z	check_nanzGraph output must be a tuple(). This is so that we can avoid pytree processing of the ouputs. Please change the module to have tuple outputs or use aot_module instead.)r   Z_reparametrize_moduler   r   rc   r1   r   ZGraphModulerA   rg   r   catch_warningsfilterwarningsrH   Zdetect_anomalyr   r   rm   r#   r   )r{   r   r]   )r   
params_lenparams_specr   r    r     s$      8z.aot_module_simplified.<locals>.functional_callN)r?   r   r   r   r   r\   c                    sH   |d kst |d kr|}t||||d d t fdd}|S )Nr   c                     s   d krt |  | S r   )r   rz   r   r   r?   r   r    new_func  s    zHaot_module_simplified.<locals>.aot_function_simplified.<locals>.new_func)rf   r   r   )r?   r   r   r   r   r   r   r   r   r   r    aot_function_simplified  s    	
z6aot_module_simplified.<locals>.aot_function_simplifiedc                     s    | |S r   r   )r{   r   r   params_flatr   r    r     s    z&aot_module_simplified.<locals>.forwardc                     s    |  S r   r   rz   r   r   r    r   %  s    )r   r   r   r   r   rm   re   r   r   r
   r   Z	zero_gradZnamed_parameters)r   Ztop_argsZ
top_kwargsparamsr   r   r   r   )r   r   r   r   r   r    aot_module_simplified  s6    "	r   )F)F)ar7   Zdataclassesr   
contextlibr   r   	functoolsr   typingr   r   r   r	   r
   r   r1   Ztorch.fx.tracebackr   r   rA   Ztorch.nnr   Ztorch.utils._pytreeutilsZ_pytreer   Ztorch.utils.dlpackr   Ztorch._subclassesr   r   Ztorch.fxr   r   Z%torch.fx.experimental.symbolic_shapesr   Ztorch.nn.utilsr   Z	functorchr   Zfunctorch.experimentalr   Ztorch._dispatch.pythonr    r   Znamed_members_polyfillr   r   Zpartitionersr   Ztorchdynamor   r!   ImportErrorZtorchdynamo.utilsr"   Z_register_pytree_noder&   r/   opsZatenr4   rC   rY   rl   rn   r   ro   strr   rp   intrr   rt   ru   rx   Zget_graph_being_compiledry   r   r   r   Z	dataclassr   r   r   r   r   floatboolr   r   r   r   r   r   Zcompiled_moduler   r   r   r    <module>   s     
<,


  L
~-e