U
    Kc:                  <   @   s  U d dl mZmZmZ d dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZmZ dZejedZejeddZejedd	Zejedd
ZejeddZddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGg<Zi Zeeef edH< ddddddddddddddddddddd d"d#d!d$d%d&d'd(d)d*d+d-h!ZedIdJdKZeD ].ZedLe dMe dNe dOe dPe dQ qd.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>hZeD ].ZedLe dMe dRe dSe dPe dQ qdBhZ e D ].ZedLe dMe dTe dUe dPe dQ qdVdW Z!eee
e
dXdYdZZ"eeej#d[d\d]Z$d^d_ Z%d`da Z&dbdc Z'eeedddedfZ(eeee)dgdhdiZ*deeeee+ e+e)dkdldmZ,eedndodpZ-eeedddqdrZ.eeedddsdtZ/e!edu< e"edA< e$edC< e%ed,< e&ed?< e'ed@< e-edv< e(edD< e*edE< e,edw< e.edF< e/edG< dxdy Z0dzd{ Z1d|d} Z2d~d Z3dS )    )AnyDictOptionalN)DimsSequenceTypeELEMENTWISE_TYPE_PROMOTION_KINDgetnvFuserDtypemake_contiguous_strides_for	ShapeTypeTensorLikeType)backwards_not_supported"elementwise_type_promotion_wrappernvprimsZDEFZIMPLZCompositeExplicitAutogradZCompositeImplicitAutogradZAutogradZMetaabsacosasinatanatanhcoscoshZbitwise_notceilerferfcexpexpm1floorimagisfinitelgammaloglog1plog2log10realZ
reciprocalnegroundZrsqrtsignsinsinhsqrttantanhZ	transposetruncaddatan2Zbitwise_andZ
bitwise_orZbitwise_xordiveqfmodgegtleltmulnepow	remaindersubsqueezeZview_ofbroadcast_in_dimwhereZconvert_element_typesumvarZamaxZamin_nvfuser_impls)fnamec                 C   s:   z ddl m} t|j| stW n tk
r4   Y nX d S )Nr   )FusionDefinition)Ztorch._C._nvfuserrA   getattrZ	OperatorsAssertionErrorImportError)r@   fd rF   >/tmp/pip-unpacked-wheel-gikjz4vx/torch/_prims/nvfuser_prims.py_assert_nvfuser_op_exists   s
    rH   zL
# Ensure that the nvfuser implementation exists
_assert_nvfuser_op_exists("z	")

def _z#_nvfuser(fd, a):
    return fd.ops.z3(a)  # type: ignore[attr-defined]

_nvfuser_impls["z"] = _z	_nvfuser
z&_nvfuser(fd, a, b):
    return fd.ops.z6(a, b)  # type: ignore[attr-defined]

_nvfuser_impls["z)_nvfuser(fd, a, b, c):
    return fd.ops.z9(a, b, c)  # type: ignore[attr-defined]

_nvfuser_impls["c	           	   
   C   sZ   |d kr|   }|d kr |   }|d kr0|   }|d kr@|   }| j||||||||S N)Zdefine_null_tensoropsZ
batch_norm)	rE   inputweightbiasrunning_meanrunning_vartrainingmomentumepsrF   rF   rG   _native_batch_norm_nvfuser   s$    rS   rE   ashapeZbroadcast_dimensionsc                 C   s   | j |||S rI   )rJ   r;   rT   rF   rF   rG   _broadcast_in_dim_nvfuser   s    rW   )rE   rU   dtypec                 C   s   t |}| j||S rI   )r   rJ   cast)rE   rU   rX   Znvfuser_dtyperF   rF   rG   _convert_element_type_nvfuser   s    rZ   c                 C   s   | j ||S rI   )rJ   Zpermute)rE   rU   ZpermutationrF   rF   rG   _transpose_nvfuser   s    r[   c                 C   sB   t t|D ]0}| j|||}|d | ||d d   }q|S )N   )reversedsortedrJ   r:   )rE   rU   Za_shapeZ
dimensionsidxrF   rF   rG   _squeeze_nvfuser   s    r`   c                 C   s   | j |S rI   )rJ   setrE   rU   rF   rF   rG   _view_of_nvfuser  s    rc   )rE   rU   dimsc                 C   s"   d}t jjjj}| j||||S NF)torchZ_CZ_nvfuserZDataTypeZNullrJ   r=   )rE   rU   rd   	keep_dimsoutput_dtyperF   rF   rG   _sum_nvfuser	  s    ri   )rE   rU   rd   
correctionc                C   s   d}| j ||||S re   )rJ   r>   )rE   rU   rd   rj   rg   rF   rF   rG   _var_nvfuser  s    rk   FrE   rU   rd   unbiasedkeepdimrj   c                C   s"   |d kst d}| j||||S re   )rC   rJ   var_meanrl   rF   rF   rG   _var_mean_nvfuser  s    
rp   rb   c                 C   s   | j |S rI   )rJ   	rand_likerb   rF   rF   rG   _rand_like_nvfuser/  s    rr   c                 C   s   d}| j |||S re   )rJ   maxrE   rU   rd   rg   rF   rF   rG   _amax_nvfuser3  s    ru   c                 C   s   d}| j |||S re   )rJ   minrt   rF   rF   rG   _amin_nvfuser<  s    rw   native_batch_normrq   ro   c                  C   s   d} t |  dd d  dd }t| | t| ttjjj	j
 tjjj	}|j
}||fD ] }d|_td |_tjjj|_q`dS )	z]This function is used to register the native_batch_norm function in torch.ops.nvprims module.rx   zX(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, z)bool training, float momentum, float eps)z -> (Tensor, Tensor, Tensor)c              
   S   s   t | |||||||S rI   )rf   rx   )rK   rL   rM   rN   rO   rP   rQ   rR   rF   rF   rG   
_prim_impl]  s           z.register_native_batch_norm.<locals>._prim_implzComputes batch normalization.N)nvprimdefinenvprim_implimplnvprim_autograd_implr   rf   rJ   r   rx   default__doc__r?   impl_nvfuser_prims_commonRETURN_TYPENEWreturn_type)namery   prim_packetprimprF   rF   rG   register_native_batch_normS  s(     

r   c                  C   s   d} t d d d d d d ddd}d d d d d ddd}t| | t| | ttjj| }|j	}t
| t| ||fD ] }d|_td |_tjjj|_q~d S )	Nrq   zrand_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> TensorrX   layoutdevice
pin_memorymemory_formatc                S   s"   t | j}tjj| | j|||dS )N)rV   stridesrX   r   )r   rV   rf   _primsZ
TensorMeta)selfrX   r   r   r   r   r   rF   rF   rG   _meta_rand_likey  s    	
z+register_rand_like.<locals>._meta_rand_likec                S   s   t j| |||||dS )Nr   )rf   rq   )r   rX   r   r   r   r   rF   rF   rG   ry     s    	z&register_rand_like.<locals>._prim_implzComputes rand_like)rz   r{   r|   r}   nvprim_meta_implrB   rf   rJ   r   r   r~   r   r   r?   r   r   r   r   r   )r   r   ry   r   r   r   rF   rF   rG   register_rand_likeq  s2    
r   c                     s   d} t d t |  dd  ddddd	}dddd
d}t| | t| | tjjj}|j	fdd}t
d| tdtjddddfdd ddd fdd}t| | |fD ] }d|_td |_tjjj|_qdS )zTThis function is used to register the var_mean function in torch.ops.nvprims module.zvar_mean.mainz7var_mean(Tensor inp, bool unbiased) -> (Tensor, Tensor)z`(Tensor inp, int[1]? dim=None, bool? unbiased=None, bool keepdim=False, *, int? correction=None)z -> (Tensor, Tensor)NFrj   c          
         s   t jjrt jj}nj}t jj |d}t jj jd}|r fddtjD } fddtjD }	t j	j
|||	}t j	j
|||	}||fS )N)rh   c                    s"   g | ]}| krj | nd qS r\   rV   .0idiminprF   rG   
<listcomp>  s    z=register_var_mean.<locals>._meta_var_mean.<locals>.<listcomp>c                    s   g | ]}| kr|qS rF   rF   r   r   rF   rG   r     s      )rf   r   is_complex_dtyperX   Zcorresponding_real_dtyper   Z_reduction_metarangendimrJ   r   r;   )
r   r   rm   rn   rj   rh   r>   meanoutput_shapebroadcast_dimsrF   r   rG   _meta_var_mean  s"      z)register_var_mean.<locals>._meta_var_meanc                S   s    t j||}t j| |||dS )N)rj   rn   )rf   r   set_correctionro   )r   r   rm   rn   rj   rF   rF   rG   ry     s    z%register_var_mean.<locals>._prim_implc                    s    | d |dS )N)r   rm   rF   )r   rm   r   rF   rG   _unbiased_overload_impl  s    z2register_var_mean.<locals>._unbiased_overload_implro   )rU   )Ztype_promoting_argsZtype_promotion_kindc          
         s   t j||}dksg kr"d t j jt j jrHtd |d}|r fddt j	D }fddt j	D }|\}}	t j
j|||}t j
j|	||}	||	f}|S )NrF   z!Complex tensors are not supportedr   c                    s"   g | ]}|kr j | nd qS r   r   r   rU   r   rF   rG   r     s     z<register_var_mean.<locals>._var_mean_ref.<locals>.<listcomp>c                    s   g | ]}| kr|qS rF   rF   r   r   rF   rG   r     s      )rf   r   r   Zreduction_dimsrV   r   rX   NotImplementedErrorr   r   rJ   r   r;   )
rU   r   rm   rn   rj   ro   r   r   r>   r   r   r   rG   _var_mean_ref  s&      z(register_var_mean.<locals>._var_mean_refc             
      s@   ddl m} | $ t | ||||dW  5 Q R  S Q R X d S )Nr   )NvfuserPrimsModer   )Ztorch._prims.contextr   r   )rU   r   rm   rn   rj   r   )r   rF   rG   _var_mean_autograd  s        z-register_var_mean.<locals>._var_mean_autogradz]Computes the variance and mean of x over the list of dimensions specified in the dim argument)NNF)NNF)NNF)NNF)rz   r{   r|   r}   r   rf   rJ   r   ro   mainnvprim_implicit_implr   r   ZCOMPLEX_TO_FLOATr~   r   r?   r   r   r   r   r   )r   r   ry   r   r   r   r   rF   )r   r   rG   register_var_mean  s>    

     
r   c                  C   s   t   t  t  tD ]} ttjj| }t	|j
 t| |j t| |j ttjj| }|j}t| t| ||fD ]}|j|_t|  |_|j|_q|qdS )zARegisters all nvFuser primitives in the torch.ops.nvprims module.N)r   r   r   nvprim_namesrB   rf   rJ   Zprimsrz   r{   Zschemar|   r}   Z	prim_implr   Zprim_meta_implr   r   r~   r   r   r?   r   r   )r   Z	main_primr   r   r   rF   rF   rG   register_nvprims  s    
r   )NF)4typingr   r   r   rf   Ztorch._prims_commonr   r   r   r   r	   r
   Ztorch._prims_common.wrappersr   r   Znvprim_namespaceZlibraryLibraryrz   r|   r   r~   r   r   r?   str__annotations__Z_nvfuser_unary_opsrH   r@   execZ_nvfuser_binary_opsZ_nvfuser_ternary_opsrS   rW   rX   rZ   r[   r`   rc   ri   intrk   boolrp   rr   ru   rw   r   r   r   r   rF   rF   rF   rG   <module>   s    	    ?%
	  
	:b