U
    Kc)                     @   s6  U d dl Z d dlmZmZmZmZmZ d dlZd dlZd dlm	Z	 ej
jZi Zeejjef ed< ej
jZejjZdd Zi Zdd Zdd
dZeejje	e	dddZeejje	ee	e	f dddZe	e	ee edddZeeje	e	ee e	e	ee	 ee	 ee eee	 ee	 ee	 f d	ddZee dddZeeje	e	ee	 ee	 ee	 ee	 ee	 eeee ee	ee	 ee	 f dddZeejjjjdd eejjj j eejjj!j eejjj"j eejjj#j eejjjj eejjjj eejjjj eejjj$j dS )     N)CallableDictListOptionalTuple)Tensordecomposition_table_for_jvpc                    s    fdd}|S )Nc                    s,   zt  | W S  tk
r&   |  Y S X d S N)register_decomposition	Exception)fop H/tmp/pip-unpacked-wheel-gikjz4vx/torch/_decomp/decompositions_for_jvp.py	decorator&   s    z/maybe_register_decomposition.<locals>.decoratorr   )r   r   r   r   r   maybe_register_decomposition%   s    r   c                 C   s   t | tdS )N)registry)r
   r   )fnr   r   r   register_decomposition_for_jvp5   s    r   Fc                 C   s   | t krt }n| tkrt}ntd|  ||  }|rntj|}t|}dd }||}tj|j	j
}ntj|j
}tj| | d S )Nz!could not find decomposition for c                 S   sF   dd | j  D }dd | j  D }dd| dd| dS )Nc                 S   s   g | ]
}| qS r   r   .0Z	param_strr   r   r   
<listcomp>K   s     zQ_register_jit_decomposition_for_jvp.<locals>.get_function_def.<locals>.<listcomp>c                 S   s   g | ]
}| qS r   r   r   r   r   r   r   L   s     zdef wrapped_decomp(z, z):
  return decomp_fn(z)
)
parametersvalueskeysjoin)sigZ	param_defZ	param_user   r   r   get_function_defJ   s    z=_register_jit_decomposition_for_jvp.<locals>.get_function_def)r   decomposition_tableRuntimeErrortorchZjitignoreinspect	signatureZCompilationUnitZwrapped_decompgraphscriptZ_register_decomposition)decomp
use_pythonZdecomposition_table_usedZ	decomp_fnr   r   Zf_strr%   r   r   r   #_register_jit_decomposition_for_jvp9   s    
r)   )selfreturnc                 C   s   t t | S r	   )r!   sumZdiag)r*   r   r   r   traceZ   s    r-   c                 C   sL   t | d| }t t |  }| jr6| d}n|}|t | |fS )Nr   )r   )r!   Zminimum	new_zerosexpabsZis_cudalog1p)r*   minzbufferr   r   r   log_sigmoid_forward_   s    r5   )inputrstdinner_dim_indiceskeepdimc                 C   sX   t j| ||d}t j| |d|d}t d| d| }| }dt ||  }||fS )N)dimr9   F)r:   Zunbiasedr9         )r!   meanvarpowdetachsqrt)r6   r7   r8   r9   r=   r>   epsr   r   r   recompute_mean_varj   s    rC   )	grad_outr6   normalized_shaper=   r7   weightbiasoutput_maskr+   c                 C   s  |j }| }	|	t| }
||
d  }|d |
 }tt|
|	}ttd|
}d}|D ]}||9 }qVd}|D ]}||9 }ql|dks|dkr|||||
d  |||
d  fS t|||dd\}}|| | }|d k	r| | }n| }|| }t||d}t	||}t||d}t	||}|| | }|d rL|| | }n
t
|}|d r|d k	rt|dkrt| | |d}n| | }n |d k	rt
|}n
td}|d r|d k	rt|dkrt| |d}n|  }n |d k	rt
|}n
td}|||fS )Nr   r;   Tr9   Fr   r<   )shaper:   lenlistranger.   rC   r!   r,   mul
zeros_likezerosclone)rD   r6   rE   r=   r7   rF   rG   rH   input_shapeZ
input_ndimaxisZ
inner_dimsZ
outer_dimsr8   Zouter_dim_indicesNiMZmean_Zrstd_Zx_hatZ
grad_x_hatabZc1c2c3innerZd_inputZd_weightZd_biasr   r   r   native_layer_norm_backwardx   sf    




  





r\   )xc                 C   s   d}| D ]}||9 }q|S )Nr;   r   )r]   rrU   r   r   r   prod   s    
r_   )rD   r6   rF   running_meanrunning_var	save_meansave_invstdtrainrB   rH   r+   c
                 C   s  |j }
| }|dkstdd}t|
|
|  }|}|}|r|d k	rN|d k	sVtddgttd|  }|d k	sztt|||dd\}}n&|d k	r|d k	st|}t|| }|d k	r|d k	stdg| }|
| ||< g }t|D ]}||kr|	| qt
||}d| }t| |}t| ||  |}t
|| |}t
t|| || |}|d krt
||d }nt
|| |}|r|| | }| | | | }n| | }|	d r|| }n |d k	rt|}n
td	}|	d r |}n
t|}|||fS )
Nr<   z$rank of the input must be at least 2r;   z7when train=True, save_mean and save_invstd are requiredr   FrI   g      ?r   )rJ   r:   AssertionErrorr_   rL   rM   rC   r!   ZrsqrtappendZreshaper,   rN   rO   rP   )rD   r6   rF   r`   ra   rb   rc   rd   rB   rH   rR   Z
input_rankrS   Znum_featuresr=   ZinvstdZreduciton_dimsZbroadcast_maskZreduction_axesrU   ZnormZgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojZ
grad_inputZgrad_weightZ	grad_biasr   r   r   native_batch_norm_backward   sj    






rg   T)r(   )F)%r#   typingr   r   r   r   r   r!   Ztorch._decompr   _decompr   r   Z_opsZ
OpOverload__annotations__r
   opsZatenr   r   r)   r-   defaultr5   intboolrC   r\   r_   rg   floatZnll_loss_backwardZnll_loss2d_backwardZ_log_softmax_backward_dataZ_softmax_backward_dataZcudnn_batch_norm_backwardr   r   r   r   <module>   sp    
!

   LQ