U
    JcI4                     @   sx  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dl Zd dlZeeZeej ejdddZ i Z!i Z"e# D ]$\Z$Z%de%j&kre%e"e$< qe%e!e$< qdddhZ'e# D ]6\Z$Z%de%j&kr,e%e"e$< ne(e$e'kre%e!e$< qe e"ej)j*j+j< G dd deZ,G dd dZ-dS )    )DictN)Module)
OpOverload)GraphModule)Node_get_qualified_name)OperatorSupport)CALLABLE_NODE_OPS)CapabilityBasedPartitioner)execute)DecompositionInterpreterdecomposition_table)dtypec                 K   s&   t |dks|stdtj| |S )Nr   zHNo support for other to.dtype() formats other than to.dtype(self, dtype))lenRuntimeErrortorchZ_primsZconvert_element_type)selfr   kwargs r   D/tmp/pip-unpacked-wheel-gikjz4vx/torch/fx/passes/backends/nvfuser.pyaten_to_dtype   s    r   ztorch._refsz'aten.native_layer_norm_backward.defaultz%aten.embedding_dense_backward.defaultzaten.addmm.defaultc                       s@   e Zd ZdZ fddZejeef e	e
d fddZ  ZS )NvFuserOperatorSupporta  
    Operator support for nvFuser backend.

    Currently, partitioning is based on FX ATen graph. The fused subgraph will latter be decomposed into prims.
    To determine if an ATen ops is supported by nvFuser, we shall check the prim ops used in its ref decomposition.
    Only if all the prim ops in the ref has a nvfuser_impl, we say this Aten op is suppported by nvFuser.

    Note: When adding a rule, please add it to the corresponding section and follow the
    alphabetical order.
    c              a      s   d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d`}t  | d S )N)`ztorch.ops.aten.addztorch.ops.aten.subztorch.ops.aten.divztorch.ops.aten.atan2ztorch.ops.aten.mulztorch.ops.aten.maxztorch.ops.aten.minztorch.ops.aten.powztorch.ops.aten.remainderztorch.ops.aten.fmodztorch.ops.aten.bitwise_andztorch.ops.aten.__and__ztorch.ops.aten.bitwise_orztorch.ops.aten.__or__ztorch.ops.aten.bitwise_xorztorch.ops.aten.__xor__z!torch.ops.aten.bitwise_left_shiftztorch.ops.aten.__lshift__z"torch.ops.aten.bitwise_right_shiftztorch.ops.aten.__rshift__ztorch.ops.aten.eqztorch.ops.aten.neztorch.ops.aten.geztorch.ops.aten.gtztorch.ops.aten.leztorch.ops.aten.ltztorch.ops.aten.absztorch.ops.aten.bitwise_notztorch.ops.aten.ceilztorch.ops.aten.floorztorch.ops.aten.fracztorch.ops.aten.negztorch.ops.aten.reluztorch.ops.aten.roundztorch.ops.aten.siluztorch.ops.aten.truncztorch.ops.aten.logztorch.ops.aten.log10ztorch.ops.aten.log1pztorch.ops.aten.log2ztorch.ops.aten.lgammaztorch.ops.aten.expztorch.ops.aten.expm1ztorch.ops.aten.erfztorch.ops.aten.erfcztorch.ops.aten.cosztorch.ops.aten.acosztorch.ops.aten.coshztorch.ops.aten.sinztorch.ops.aten.asinztorch.ops.aten.sinhztorch.ops.aten.tanztorch.ops.aten.atanztorch.ops.aten.tanhztorch.ops.aten.atanhztorch.ops.aten.sqrtztorch.ops.aten.rsqrtztorch.ops.aten.reciprocalztorch.ops.aten.sigmoidztorch.ops.aten.isfiniteztorch.ops.aten.isinfztorch.ops.aten.isnanztorch.ops.aten.isneginfztorch.ops.aten.isposinfztorch.ops.aten.isrealztorch.ops.aten.softplusztorch.ops.aten.thresholdztorch.ops.aten.clampztorch.ops.aten.where.selfztorch.ops.aten.lerpztorch.ops.aten.addcmulztorch.ops.aten.dropoutztorch.ops.aten.instance_normz%torch.ops.aten._batch_norm_impl_indexztorch.ops.aten.batch_normztorch.ops.aten.cudnn_batch_normz.torch.ops.aten._batch_norm_impl_index_backwardz torch.ops.aten.native_layer_normztorch.ops.aten.layer_normztorch.ops.aten.softmax.intztorch.ops.aten.log_softmax.intz)torch.ops.aten._log_softmax_backward_dataztorch.ops.aten.std.dimztorch.ops.aten.sumz torch.ops.aten._grad_sum_to_sizeztorch.ops.aten.sum_to_sizez-torch.ops.aten._autocast_to_reduced_precisionz*torch.ops.aten._autocast_to_full_precisionztorch.ops.aten.linearztorch.ops.aten.geluztorch.ops.aten.leaky_reluztorch.ops.aten.squareztorch.ops.aten.tanh_backwardz!torch.ops.aten.flatten.using_intsgetattrz_operator.getitem)super__init__)r   Zsupport_dict	__class__r   r   r   D   s      zNvFuserOperatorSupport.__init__)
submodulesnodereturnc                    sB   |j tkrdS t|jtr4t|jj}|| jkr4dS t 	||S )NFT)
opr	   
isinstancetargetr   r   ZoverloadpacketZ_support_dictr   is_node_supported)r   r   r   r#   r   r   r   r$      s    

z(NvFuserOperatorSupport.is_node_supported)__name__
__module____qualname____doc__r   tMappingstrr   r   boolr$   __classcell__r   r   r   r   r   8   s     r   c                   @   sB   e Zd Zdd ZedddZeedddZeedd	d
ZdS )NvFuserBackendc                 C   s   t  | _i | _i | _d S N)r   supported_opspartitioner_cacheprim_decomp_cache)r   r   r   r   r      s    zNvFuserBackend.__init__)graph_modulec                 O   sz   || j kr td | j | }nFtj }t||tdj|| tj	||}|| j |< td|j
 t|f|ddiS )Nzprim_decomp_cache hit!r   zLower to prims graph: executorZnvfuser)r2   loggerdebugr   ZfxZGraphr   aten2prim_decomprunr   coder   )r   r3   argsr   Zprim_moduleZ
prim_graphr   r   r   lower_to_prims_and_execute   s    



z)NvFuserBackend.lower_to_prims_and_execute)r3   r    c                 C   s   t d|j || jkr.t d | j| }n"t|| jdd}| }|| j|< |jjD ],}|j	dkrXd|j
krXt||j
}| j|_qX|S )NzCompiling graph_module: zpartitioner_cache hit!F)Zallows_single_node_partitionZcall_moduleZfused_)r5   r6   r9   r1   r
   r0   Zpartition_and_fusegraphZnodesr!   namer   r;   Z_wrapped_call)r   r3   Zfused_graph_moduleZpartitionerr   Zfused_moduler   r   r   compile  s     

  

zNvFuserBackend.compilec                 C   s
   |  |S r/   )r>   )r   r3   _r   r   r   __call__  s    zNvFuserBackend.__call__N)r%   r&   r'   r   r   r;   r>   r@   r   r   r   r   r.      s   	r.   ).typingr   r   Ztorch.nnr   Z
torch._opsr   Ztorch.fxr   Ztorch.fx.noder   r   Z torch.fx.passes.operator_supportr   Ztorch.fx.passes.tools_commonr	   Z!torch.fx.passes.infra.partitionerr
   Ztorch._prims.executorr   Z"torch.fx.experimental.proxy_tensorr   Ztorch._decompr   r)   logging	getLoggerr%   r5   setLevelWARNINGr   r   Zaten2aten_decompr7   itemsr!   Z	decomp_fnr&   Zaten2aten_decomp_skipsr+   opsZatentor   r.   r   r   r   r   <module>   sF   




 1