U
    Kc>                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlmZ d dlZd dlmZ d d	lmZ d
gZdd ZG dd deZdd Zdd Zdd Zdd ZG dd deZejZdS )    N)GraphModule)Module)partition_cudagraphs)StorageWeakRef)tree_map)AOTAutogradStrategy)defaultdict)Setaot_autograd_cudagraphsc                 C   s   t | tjr|  S | S d S N)
isinstancetorchZTensorclone)t r   =/tmp/pip-unpacked-wheel-gikjz4vx/torch/cuda/_dynamo_graphs.pycloner   s    r   c                       sJ   e Zd ZU eed< ee ed<  fddZdZdZ	dZ
dZdd Z  ZS )	CudaGraphModulegmmutated_inputsc                    s   t    || _|| _d S r   )super__init__r   r   )selfr   r   	__class__r   r   r      s    
zCudaGraphModule.__init__FNc              	   G   sP  | j d k	rvt|t| jks tt| j|D ]\}}|| q,| j   | jD ]}|| | j|  qPtt	| j
S | jrdd |D | _tj | _ tj | j  | j| j | _
W 5 Q R X | j   | jD ]}|| | j|  qtt	| j
S tj }|tj  tj| | j| }W 5 Q R X tj | d| _|S d S )Nc                 S   s   g | ]}|  qS r   )r   ).0xr   r   r   
<listcomp>;   s     z,CudaGraphModule.__call__.<locals>.<listcomp>T)graphlenstatic_inputsAssertionErrorzipZcopy_Zreplayr   r   r   static_outputs	warmed_upr   ZcudaZ	CUDAGraphr   ZStreamZwait_streamZcurrent_streamstream)r   argsdstsrcir%   rr   r   r   __call__+   s0    





zCudaGraphModule.__call__)__name__
__module____qualname__r   __annotations__r	   intr   r$   r   r    r#   r+   __classcell__r   r   r   r   r      s   
r   c                 C   s   d}t t}d}t }| jD ]}|jdkrP|t|j|   | |d7 }q|jdkr|jt	j
krhq|jj}t|jD ]p\}}|t|jk r|j| }	n|j|jkrqz|j|j }	d}
|jr|jjrd}
|
rz||t|	j|   O }qzq|S )NZfake_resultr   placeholder   call_functionFT)r   setnodesopr   metaZstorageaddtargetoperatorgetitemZ_schema	enumerate	argumentsr   r&   namekwargsZ
alias_infoZis_write)gZFKinputsZ	input_idxr   nZschemar)   argargumentZmut_argr   r   r   find_input_mutationsU   s2    



rF   c                 C   s\   | j jD ]N}|jdkr|jr t| |j}| |j t|j }| 	|jt
|| qd S )NZcall_module)r   r6   r7   r@   r!   Zget_submoduler:   Zdelete_submodulerF   Zadd_submoduler   )r   rC   Zsubmodr   r   r   r   apply_cuda_graphsv   s    


rG   c                 C   s   t | |} t|  | S r   )r   rG   )modelrB   r   r   r   
cudagraphs   s    
rI   c                    sH   t t d} fdd}|dp&|d  ||d< ddlm} || f|S )N)fw_compilerbw_compilerc                     s   t  | |S r   )torchdynamodisable)r&   r@   rK   r   r   _wrapped_bw_compiler   s    z9raw_aot_autograd_cudagraphs.<locals>._wrapped_bw_compilerrK   rJ   r   )aot_module_simplified)rI   getZfunctorch.compilerP   )rH   rB   r@   rO   rP   r   rN   r   raw_aot_autograd_cudagraphs   s    rR   c                   @   s   e Zd Zdd ZdS )AOTAutogradCudaGraphsc                 C   s   t | j| jS r   )rR   r   Zexample_inputs)r   r   r   r   	candidate   s    zAOTAutogradCudaGraphs.candidateN)r,   r-   r.   rT   r   r   r   r   rS      s   rS   )r   Ztorch.fxr   Ztorch.nnr   Z#torch.fx.passes.backends.cudagraphsr   Z torch.multiprocessing.reductionsr   Ztorch.utils._pytreer   rL   Z"torchdynamo.optimizations.trainingr   r;   collectionsr   typingr	   __all__r   r   rF   rG   rI   rR   rS   Z
compile_fnr
   r   r   r   r   <module>   s&   <!