U
    Kc$D                    @   s  U d dl Z d dlZd dlZd dlmZ d dl mZmZ d dlmZ d dl	m
Z
mZmZmZmZmZ d dlZd dlmZ d dlm  mZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
l m!Z!m"Z" ej#j$Z$g Z%ee& e'd< ej(j)Z)G dd deZ*d2e
ej+e,dddZ-ee-ej+j.ddZ/ee-ej+j.dZ0ee-ej+j1dZ2ee-ej+j3dZ4ee5dddZ6ee)j7e0eedddZ7ee)j8e0eedddZ8ee)j9e0eee:e:dddZ9ee)j;e0d3ee:e:e:ed!d"d#Z;ee)j<e0ee:e:e:e,ed$d%d&Z<ee)j=e0eed'd(d)Z=ee)j>e0eed*d+d,Z>ee)j?e0eee:e:d-d.d/Z?ee)j@e0eee:d0d1d2Z@ee)jAe0eed'd3d4ZAee)jBe0eeed5d6d7ZBee)jCe0eee:d8d9d:ZCee)jDe0eee:e,d;d<d=ZDee)jEe0d4eee&d?d@dAZEee)jFe0eedBdCdDZFee)jGe0eed'dEdFZGee)jHe0eeed5dGdHZHee)jIeee:edIdJdKZIee)jJe0eeeeeef dLdMdNZJee)jKe0eeee:e:e,e,edOdPdQZKee)jLe0eeeedRdSdTZLee5dUdVdWZMejNdXdYdZZOee)jPe0e*jQjRfeee5ed[d\d]ZPee)jSe0eeee5d^d_d`ZSee)jTe0eeee5e:dadbdcZTeeeee e5e5eedddedfZUee)jVe0eee5edgdhdiZVee)jWeeeee e5e5eedddjdkZWee)jXeeeee e5e5eedddldmZXee)jYe0de*jQjRfeeee e5edndodpZYee)jZe0de*jQjRfeeeee e5edqdrdsZZee)j[e e0e*jQjRfeee5edtdudvZ[ee)j\e0e*jQjRfeeee5edwdxdyZ\ee)j]eeedzd{d|Z]ee)j^eee5 e5e5e5e5d}d~dZ^ee)j_jd5ee5ee5 ee5 e5dddZ`ee)jaeee5 e5e5dddZaee)jbeee5 e5e5e5dddZbeeejNdddZcee)jde/eee5ejNdddZdee)jee/eee5ejNdddZedd Zfee)jge e0eee5 ee5 ee5 ee5 edddZgee)jhe e0eee5 ee5 ee5 ee5 ee5 edddZhee)jie0eee:dddZiee)jjjke0d6eeee: edddZjee)jlee:ee, dddZlee)jmee5e,dddZmee)jnee5e,dddZnee)joe0d7eeee:dddZoee)jpjd8eee:edddZqee)jpjrd9ee:e:edddZsee)jtd:eee5e,e,edddZtee)jueee5e5e,dddZuee5 dddZvee)jwddd;eee5 e5ee dddZwee)jxjddd<ee5e5ee dddZxee)jye0d=eeee5e5dddZyddĄ Zzee)j{jkddeee ee e5e5e5e5e:eeeef dŜ	ddǄZ{ee)j|e0eeeeee e5e5e5e5ee, eee ee ee f dȜddʄZ|ee ee d˜dd̈́Z}ee)j~eeee5 eeee ee ee, eee ee ee f dΜ	ddЄZ~ee)jeee ee ee ee e,e:e:eeeef dќ	ddӄZee)je0d>ddՄZee)jddddddd֜eeejN eej e,e,eej dלddلZee)jje4eeedڜdd܄Zee)jje2d?eeee5  ee5 e,dݜdd߄Zee)jje2d@eeee5  ee5 e,dݜddZee)je)je)jgdddd Zee)jeeee ee ee e,e:e:dddZdd Zee)jeeee ee ee ee ee e,e:ee, eeee ee f dddZee)jeeeee ee ee ee e:ed	ddZee)jdde0eee5e5f dddZee)jd dee5eeedddZeee5 edddZee)jjke4dAeee5 e,edddZee)jedde0eeeef d'ddZee)je e2dBeee: ee5 e,eejN d ddZeej(j)jje0eeee5  e,eee:  edddZee)jjkeee,dddZee)je)jgddd	d
 Zee)jeeee e5e5eeef dddZee:edddZee:edddZeedddZeeedddZee edddZee)je0dCeee5e5e,edddZee)je0dd  Zee)jdde0d!d" Zee)jdde*jQjRfd#d$Zeje5e,d%d&d'Zej(j)jjke$jd(d) Zee)jjke e0dDeee5e5f e,ee: ee: ed*d+d,Zee)jje e0dEeeee5e5f  e,eee:e:f  ed-d.d/Zd0d1 Zee)je)j ee)je)j ee)je)j ee)je)j ee)je)j ee)je)jA ee)je)j ee)je)jG dS (F      N)Enum)partialreduce)product)CallablecastIterableListOptionalTuple)Tensorregister_decomposition)
NumberType
TensorLikeTensorSequenceType)out_wrapper)tree_flattentree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r   r   @/tmp/pip-unpacked-wheel-gikjz4vx/torch/_decomp/decompositions.pyr      s   r   F)ftype_promotioncompute_dtype_onlyc                    s   t  fdd}|S )Nc                     sr   dd t | |fd D }tj|di\  fdd}fdd}t|| t||}rd|S t||S d S )	Nc                 S   s   g | ]}t |tr|qS r   )
isinstancer   .0xr   r   r    
<listcomp>+   s    
 z-type_casts.<locals>.inner.<locals>.<listcomp>r   type_promotion_kindc                    s   t | tr|  S | S d S Nr$   r   tor'   computation_dtyper   r    increase_prec3   s    

z0type_casts.<locals>.inner.<locals>.increase_precc                    s   t | tr|  S | S d S r*   r+   r-   )result_dtyper   r    decrease_prec9   s    

z0type_casts.<locals>.inner.<locals>.decrease_prec)r   utilselementwise_dtypesr   )argskwargsZ	flat_argsr0   r2   rr#   r!   r"   )r/   r1   r    inner)   s    
ztype_casts.<locals>.inner)	functoolswraps)r!   r"   r#   r9   r   r8   r    
type_casts$   s    r<   T)r"   r#   )r"   )r'   dimc                 C   s$   t ||   D ]}| d} q| S )N)ranger=   	unsqueeze)r'   r=   _r   r   r    _unsqueeze_to_dimX   s    rB   out_gradyc                 C   s   | d||     S Nr   Zconj_physicalrC   r   r   r    tanh_backward^   s    rH   c                 C   s   | |d|     S rF   rG   rC   r   r   r    sigmoid_backwardd   s    rI   )rD   r'   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exptorchwhere)rD   r'   rJ   rK   zr   r   r    softplus_backwardj   s    rR   r   )selfalphascaleinput_scalereturnc                 C   s8   || }|}|}t | dk| | t | | d | S Nr   r   rO   rP   rN   )rS   rT   rU   rV   negcoefposcoef
negiptcoefr   r   r    eluq   s      r]   )grad_outputrT   rU   rV   	is_resultself_or_resultc           	      C   sf   || }|}|}|r6t |dk| | ||  || S t |dk| | | t ||  | | S d S Nr   rY   )	r^   rT   rU   rV   r_   r`   rZ   r[   r\   r   r   r    elu_backward~   s    
rb   )rS   rW   c                 C   s    t jt j| d ddddd S N   r   min   maxrO   clamprS   r   r   r    hardsigmoid   s    rm   r^   rS   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        rO   rP   rn   r   r   r    hardsigmoid_backward   s
    rq   r^   rS   Zmin_valmax_valc                 C   s   t ||k||kB d| S Nro   rp   rr   r   r   r    hardtanh_backward   s    ru   grad_outrS   lambdc                 C   s   t || k||k@ d| S rt   rp   rv   r   r   r    hardshrink_backward   s    ry   c                 C   s$   | t jt j| d dddd d S rc   rj   rl   r   r   r    	hardswish   s    rz   )r^   rS   rW   c              
   C   s,   t |dk dt |dk| |d d  | S )Nro   rd         ?rp   rn   r   r   r    hardswish_backward   s
    r}   r^   rS   rK   c                 C   s   t ||kd| S rt   rp   r~   r   r   r    threshold_backward   s    r   r^   rS   negative_slopeself_is_resultc                 C   s   t |dk| | | S ra   rp   r   r   r   r    leaky_relu_backward   s    r   none)gradrS   approximatec                 C   s   d}d}d}|dkr|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S d S )
Ng;f?g;f?gmBP?tanhr|   gHm?r   rd   g      )rO   r   erfrN   )r   rS   r   ZM_SQRT2Z	M_SQRT1_2Z
M_2_SQRTPIZkBetaZkKappaZx_sqZx_cuber9   Z
tanh_innerleftrightZleft_derivativeZtanh_derivativeZinner_derivativeZright_derivativeZkAlphaZcdfZpdfr   r   r    gelu_backward   s,    
r   )r^   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rF   )rO   r   FZsoftplussigmoid)r^   r   Zinput_tanh_softplusZinput_sigmoidoutr   r   r    mish_backward   s    
r   c                 C   s   | t |  S r*   )rO   r   rl   r   r   r    silu   s    r   c                 C   s,   ddt |   }| | d|d|    S rF   )rO   rN   )r^   rS   r   r   r   r    silu_backward  s    r   )r^   rS   rx   rW   c                 C   s   t || k||k@ d| S rt   rp   )r^   rS   rx   r   r   r    softshrink_backward  s    r   )r^   rS   weightrW   c                 C   s~   |}t d|  D ]}|d}qt|dk| ||  }t|dkd||  }||j}| | krv|d}qZ||fS )Nr   r>   r   ro   )r?   r=   r@   rO   rP   Zsum_to_sizeshapesqueeze)r^   rS   r   Z
cur_weightrA   Z
input_gradZweight_grad_collectorr   r   r   r    prelu_backward  s    	r   )r^   rS   noiseloweruppertrainingr   rW   c                 C   s:   |r|| dkr|  |S || d }t| |||S d S )Ngư>r   )mulatenr   )r^   rS   r   r   r   r   r   r   r   r   r    rrelu_with_noise_backward$  s    
   r   )r^   rS   bufferrW   c                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r   r>   )rO   rP   rN   abs)r^   rS   r   Zin_negativeZ	max_derivsignrQ   r   r   r    log_sigmoid_backward8  s
    r   loss	reductionc                 C   s4   |t jjkrt| S |t jjkr,t| S | S d S r*   )r   r   valuerO   meanr   sumr   r   r   r    apply_loss_reductionD  s
    

r   dtypec                 C   s4   | t jkrt jS | t jkr t jS | t jkr0t jS d S r*   )rO   Z	complex32Zfloat16Z	complex64Zfloat32Z
complex128Zfloat64r   r   r   r    to_real_dtypeM  s    


r   )rS   targetr   rW   c                 C   s   | | d }t ||S )Nr   )r   )rS   r   r   r   r   r   r    mse_loss\  s    r   )r^   r   r   r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r   r   r   numel)r^   r   r   r   normr   r   r    mse_loss_backwarde  s    r   )r^   rS   r   r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S rL   )r   r   r   r   rO   rP   )r^   rS   r   r   r   r   r'   r   r   r    huber_loss_backwardn  s     r   )r^   rS   r   r   r   ignore_indextotal_weightrW   c                 C   s   |  dk rdnd}|tjjkr(| | } ||}t|}t|||d}|  |     krhdkrvn n
| |} |d k	rdd t|  D }	|j	d |	|< |
|	}| | } |dk}
|
rt||k| d} ||  S )Nr   r   r   g      c                 S   s   g | ]}d qS )r   r   )r&   rA   r   r   r    r(     s     z&_nll_loss_backward.<locals>.<listcomp>)r=   r   r   r   r@   rO   
zeros_likeZscatterr?   r   reshaperP   )r^   rS   r   r   r   r   r   channel_dim
grad_inputZ	new_shapeZhas_ignore_indexr   r   r    _nll_loss_backward|  s"    	

 

r   )r^   rS   r=   rW   c           
      C   s   |  dkstdt|  |}||}|d dksNtd| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr   z.Halving dimension must be even, but dimension z	 is size rM   r=   )	r=   AssertionErrorr3   canonicalize_dimsizenarrowrO   r   cat)
r^   rS   r=   Zwrap_dimZnInZ	inputSizeZ	firstHalfZ
secondHalfZgradInputFirstHalfZgradInputSecondHalfr   r   r    glu_backward  s    


r   c                 C   sx  d|    krdks"n td|  dks6td|  dkoL|  dk}|s|jd |jd kstd|j d|j d| dkstd	|j d
|  df|d ks| |jd kstd|tjjkr8|  dkr8|   dkr| jd |jd ksdtd|jd  d|    d| jd  n,|   dkrT|  dksdtd| j t| ||||||S )Nr   r   input tensor should be 1D or 2Dr   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)r>   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )r=   r   r   r   r   r   r   r   )r^   rS   r   r   r   r   r   no_batch_dimr   r   r    nll_loss_backward  sP    
"
&$


      r   c                 C   s   |  dkstd|   |  dks<td|   |jd |jd krx|jd |jd krx|jd |jd kstd|j d	|j | dkstd
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: rd   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r   r   r   r   r   z ( z, elements))r=   r   r   r   r   )r^   rS   r   r   r   r   r   r   r   r    nll_loss2d_backward  s8    

      r   )rS   r   r   r   rW   c              	   C   s^   |d t t d|  | dd |t t | | dd  }|d k	rT|| }t||S )Nr   r   i)rO   maximumlognew_fullr   )rS   r   r   r   r   r   r   r    binary_cross_entropy  s    
 
r   )r^   rS   r   r   r   rW   c                 C   sR   d}| ||  t j|d|  |d }|d k	r6|| }|tjjkrN||  }|S )Ng-q=r   re   )rO   rk   r   r   r   r   )r^   rS   r   r   r   ZEPSILONresultr   r   r    binary_cross_entropy_backward  s    	"r   )r   r   r   rW   c                 C   s    t t |  | }t||S r*   )rO   log1prN   r   )r   r   r   r   r   r   r    soft_margin_loss%  s    r   )r^   rS   r   r   rW   c                 C   s6   ||  t || d  }|tjjkr2||  }|S rF   )rO   r   r   r   r   r   )r^   rS   r   r   r   r   r   r    soft_margin_loss_backward1  s    r   )x1x2rW   c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr   r>   Tmemory_formatr   )powr   rO   	ones_likeZcontiguous_formatr   r   matmulmT	clamp_minsqrt)	r   r   Zx1_normZx1_padZx2_normZx2_padZx1_Zx2_r   r   r   r    _euclidean_dist?  s    r   )r^   input_sizesr=   startendstepc                 C   s   |  |}t|| ||||S r*   )	new_zerosrO   Zslice_scatter)r^   r   r=   r   r   r   r   r   r   r    slice_backwardK  s    	
r   )rS   r=   r   r   r   c                 C   sB  |   }|dkrtdt|   |}t|  }t|  }|dkrPtd|d k	r\|nd}|d k	rl|ntj}	|dk r||| 7 }|	dk r|	|| 7 }	|dk rd}n||| kr|| }|	|k r|}	n|	|| kr|| }	| 	 |||   }
|	| }|| d | ||< ||  |9  < | j
r0tdn| |||
S d S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver   z<Slice decomposition for quantized tensors aren't implemented)r=   RuntimeErrorr3   r   listr   stridesysmaxsizestorage_offsetZis_quantizedNotImplementedErrorZ
as_strided)rS   r=   r   r   r   ndimZsizesstridesZ	start_valZend_valr   lenr   r   r    slice_forwardX  s>    
r   )r^   r   r=   indexc                 C   s   |  |}t|| ||S r*   )r   rO   Zselect_scatter)r^   r   r=   r   r   r   r   r    select_backward  s    
r   )r^   r   offsetdim1dim2c                 C   s   |  |}t|| |||S r*   )r   rO   Zdiagonal_scatter)r^   r   r   r   r   r   r   r   r    diagonal_backward  s    
r   r^   r   input_dtypec                 C   s   | j |kr||}|S r*   )r   r,   r   r   r   r    _cast_grad_to_input_dtype  s    

r   )r^   outputr=   r   c                 C   s,   | | }||t j||dd  }t| ||S NTr=   keepdim)rO   r   r   )r^   r   r=   r   Znew_grad_outputr   r   r   r    _softmax_backward_data  s      
r   c                 C   s*   | t |t j| |dd  }t| ||S r   )rO   rN   r   r   )r^   r   r=   r   r   r   r   r    _log_softmax_backward_data  s      
r  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr   r   r   devicer   r>   )r   rO   arangeint64r@   )
Zinput_dZkernel_dZ
dilation_dZ	padding_dZstride_dr  Zblocks_dZ	arange_kwZblocks_d_indicesZkernel_gridr   r   r     _im2col_col2im_indices_along_dim  s
    r  )r   kernel_sizedilationpaddingr   rW   c              	      s(  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dkotdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s@| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s$|d}|S ) Nr   c                   S   s   dS )Nz"im2col(): only 2D kernel supportedr   r   r   r   r    <lambda>      zim2col.<locals>.<lambda>c                   S   s   dS )Nz$im2col(): only 2D dilation supportedr   r   r   r   r    r
    r  c                   S   s   dS )Nz#im2col(): only 2D padding supportedr   r   r   r   r    r
    r  c                   S   s   dS )Nz"im2col(): only 2D stride supportedr   r   r   r   r    r
    r  Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS r   Nr   r&   pr   r   r    	<genexpr>  s     z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS r  r   r  r   r   r    r    s     c                   S   s   dS )Nz<{param_name} should be greater {'than' zero, but got {param}r   r   r   r   r    r
    r  z0im2col.<locals>.check_positive.<locals>.<lambda>allr3   checkparam
param_namestrictcondr   r   r    check_positive  s
    ( zim2col.<locals>.check_positiver  r  r	  Fr  r   rd   r   c                 s   s   | ]}|d kV  qdS r  r   r&   dr   r   r    r    s     zim2col.<locals>.<genexpr>r{   c                      s   dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler   r   r   r    r
    r  c                 s   s<   | ]4\}}}}}d |d|  ||d    d  |  V  qdS )r   r   Nr   r&   r   padZdilZkerstr   r   r    r    s   r   c                 s   s   | ]}|d kV  qdS r  r   )r&   cr   r   r    r    s     c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r   , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  r   r  r  output_sizer	  r   r   r   r    r
    r  r   r   r>   r   rd      )T)r3   r  r   r   r  r  zipr@   r  r  r   r!  permuter   r   r   )r   r  r  r	  r   r  r   batched_inputZ	batch_dimr   Zinput_hZinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wZblocks_row_indicesZblocks_col_indicesZpadded_inputr   Znum_blocks_rowZnum_blocks_colr   r(  r    im2col  s    




 

    
          

 
 
r6  )r   r)  r  r  r	  r   rW   c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dkotdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tjjj||| dd#}t|| | | | f}|
s|d}|S )%Nr   c                   S   s   dS )Nzonly 2D output_size supportedr   r   r   r   r    r
  (  r  zcol2im.<locals>.<lambda>c                   S   s   dS )Nzonly 2D kernel supportedr   r   r   r   r    r
  )  r  c                   S   s   dS )Nzonly 2D dilation supportedr   r   r   r   r    r
  *  r  c                   S   s   dS )Nzonly 2D padding supportedr   r   r   r   r    r
  +  r  c                   S   s   dS )Nzonly 2D stride supportedr   r   r   r   r    r
  ,  r  Tc                 S   s<   |rt dd | D nt dd | D }t|dd  d S )Nc                 s   s   | ]}|d kV  qdS r  r   r  r   r   r    r  /  s     z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   s   | ]}|d kV  qdS r  r   r  r   r   r    r  /  s     c                   S   s   dS )Nz9{param_name} should be greater than zero, but got {param}r   r   r   r   r    r
  1  r  z0col2im.<locals>.check_positive.<locals>.<lambda>r  r  r   r   r    r  .  s
    ( zcol2im.<locals>.check_positiver  r  r	  Fr  r   r)  )r   rd   c                 s   s   | ]}|d kV  qdS r  r   r  r   r   r    r  =  s     zcol2im.<locals>.<genexpr>r   c                      s   dt   S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r   r  r   r    r
  >  r  r   r   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r   z and kernel_size=r   r   )r  r   r   r    r
  D  r  c                 S   s:   g | ]2\}}}}}d |d|  ||d    d  |  qS )r   r   r   r   r   r   r    r(   H  s   zcol2im.<locals>.<listcomp>r>   c                      s4   d d d d d d  dd  d	S 
NzGiven output_size=r$  r%  r&  r'  z , expected input.size(-1) to be z	 but got r>   .r   r   Lr  r  r)  r	  r   r   r   r    r
  Q  r  c                      s4   d d d d d d  dd  d	S r7  r   r   r9  r   r    r
  W  r  rd   r   r*  c                 S   s   g | ]\}}|d |  qS )r   r   )r&   or  r   r   r    r(   s  s     
accumulate)T)r3   r  r   r   r  r+  r@   r   r,  r  r  rB   r   prodrO   opsr   	index_putr   r!  r   )r   r)  r  r  r	  r   r  r   Zprod_kernel_sizecolr-  out_hout_wr.  r/  r0  r1  r2  r3  r4  r5  Zindices_rowZindices_colZoutput_padded_sizer   idxr   r9  r    col2im  s    




 
    

"     
     
rE  r^   maskrU   c                 C   s   | | | |  S r*   )type_asrF  r   r   r    native_dropout_backward  s    rI  )r^   rS   epsrW   c              	   C   sx   |d k	r>|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS d S )NrM   ro   r   nan)rO   rP   logical_andr   float)r^   rS   rJ  lohir   r   r    logit_backward  s    rP  )r   r  trainc                 C   sJ   |r2t | |k}||  tdd|   }||fS | t j| t jdfS d S )NrM   r   )rO   	rand_likerM  r   bool)r   r  rQ  Z	bool_maskresr   r   r    native_dropout  s
    rU  )r'   r=   half_to_floatc                 C   s   |   } |r| jtjksttj| tjjd\}}| 	|} tj
| |dd}t| | }|tj||dd }|s||	|}|S N)r)   Tr   )
contiguousr   rO   halfr   r3   r4   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr,   amaxrN   r   )r'   r=   rV  r/   r1   x_maxZunnormalizedr   r   r   r    _softmax  s     


r_  c           	      C   s   |   } |r| jtjksttj| tjjd\}}| 	|} tj
| |dd}| | }ttjt||dd}|| }|s|	|}|S rW  )rY  r   rO   rZ  r   r3   r4   r[  r\  r,   r]  r   r   rN   )	r'   r=   rV  r/   r1   r^  ZshiftedZshifted_logsumexpr   r   r   r    _log_softmax  s     


r`  rS   tensor1tensor2r   c                 C   s8   |   s|  r | || |  S | t|| |  S d S r*   )is_floating_point
is_complexintra  r   r   r    addcmul  s    rg  )rS   otherrT   rW   c                 C   s   t j|| |dS NrT   rO   subrS   rh  rT   r   r   r    rsub_Tensor  s    rn  c                 C   s   t j|| |dS ri  rk  rm  r   r   r    rsub_Scalar  s    ro  r>   )r   indicespadding_idxscale_grad_by_freqsparserW   c                 C   sl   |   dkstd|  dkr,| d|S t|j}| jdd  D ]}|| qD| d|d|S )Nr   z'weight' must be 2-Dr   r   r>   )r=   r   Zindex_selectr   r   appendr   view)r   rp  rq  rr  rs  r   r  r   r   r    	embedding  s    
rv  )r^   rp  num_weightsrq  rr  c                 C   s   |  }| || d}| || jd f}||}|r|||f}	||f}
|	j|g|
dd}	|	| }||d }||kd}||}t	
|d}|j|gt	|||ddS )Nr>   Tr<  r   r   )r   r   r   r   r   new_onesr@  r@   Z	expand_asrO   Z	full_likerP   )r^   rp  rw  rq  rr  r   r   grad_weightZindices_rank1countsonesZgrad_weights_scaleZskip_paddingZ	zero_gradr   r   r    embedding_dense_backward  s$    

  r|  r-   c                 C   s   d}| D ]}||9 }q|S rF   r   )r'   r7   ir   r   r    r>    s    
r>  )Zdisable_meta)rS   split_sizesr=   rW   c                 C   sF   t |}g }d}t|D ](}|| }|| ||| ||7 }q|S ra   )r   r?   rt  r   )rS   r~  r=   Z
num_splitssplitsZ	start_idxr}  lengthr   r   r    split_with_sizes  s    
r  )rS   
split_sizer=   rW   c                    st   | j }|| } dkr(|dks"t| gS |  d   } fddt|D }  | |  ||d < t| ||S )Nr   r   c                    s   g | ]} qS r   r   )r&   r}  r  r   r    r(   /  s     zsplit.<locals>.<listcomp>)r   r   r?   rO   split)rS   r  r=   r   Zdim_sizechunksr~  r   r  r    r  '  s    r  )rS   mat1mat2rJ   rT   c                 C   sH   |   s |  s t|}t|}|t|| }|dkr<|S ||  | S ra   )rd  re  rf  rO   mm)rS   r  r  rJ   rT   r   r   r   r    addmm5  s    r  c           	      C   s^   t | j}| j|d}tj||ddd}tj||dd}t|| }| | | }|||fS )Nr   FT)r=   unbiasedr   r   )r3   get_computation_dtyper   r,   rO   varr   rsqrt)	r   Z	norm_dimsrJ  r/   Z	input_accZ
biased_varr   rstdr   r   r   r    	normalizeG  s    r  )	r   r   biasNCHxWgrouprJ  rW   c                 C   s   | j }| |||| |} ddg}	t| |	|\}
}}t||	}t||	}|
|}
|d k	rtt||
 d }|
| }
|d k	rt||
 d }|
| }
|
j| jd}
|j| jd}|j| jd}|
||fS )Nr   rd   r   r   )r   ru  r  _squeeze_multiplerB   r=   r,   r   )r   r   r  r  r  r  r  rJ  Z
orig_shapereduction_dimsr   r   r  r   r   r    native_group_normR  s"    


r  )r^   r   r   r  gammar  r  r  r  output_maskrW   c
              	      s  t j| ||dd t j|| dd t j|dd t |    k fdd t jfkfdd t d kp  k fdd t \}
}t |dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d rvd|
  }d k	rt|d|

d	}t|d|

d	}t|dd|
}nL||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r|	|
|	|
d  |d j
dgd
 }|	d	 r|j
dgd
}|||fS )NF)Zallow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr   r   )r  r  r  r   r    r
    r  z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got r  r   )r  r  r   r   r    r
    r  c                      s    d  dd k	r  nd S )NzExpect gamma to have z elements but got r>   r   r   )r  r  r   r    r
    r  r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r   r   )r  r  r   r    r
    r  r   r   rM   r>   r   r  r   )r3   Zcheck_same_deviceZcheck_same_shaper  r   r   divmodrO   r   ru  r   r@   r   r{  r  rB   r,   r   )r^   r   r   r  r  r  r  r  r  r  ZcpgZ_remZdsdbd_inputZd_gammad_biassZds_valZdb_valc1c2c3r   )r  r  r  r  r  r   r    native_group_norm_backwardq  s         

""



$
r  )r'   rW   c                 C   s   | d k	r|  |S | S r*   r,   )r'   r   r   r   r    _maybe_cast  s    
r  )	rw   r   normalized_shaper   r  r   r  r  rW   c           !         sF  |j }| }	t|j  fdd| |||fD \}
}}}|
d k	sHt|	t| }||d  }|d | }g }g }t|	D ]"}||kr|| q||| q|t	|}t	|}|dks|dkr|d r|
|nd |d r|r|
||d  nd |d r|r|
||d  nd fS || | }|d k	r>|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d }d } |d r|| | }|d r|d k	rt|dkrt|
| |d}n|
| }|d r$|d k	r$t|dkrt|
|d} n|
 } t||jt||jt| |jfS )Nc                    s&   g | ]}|d k	r|   n|qS r*   )r,   rY  r%   r.   r   r    r(     s   z.native_layer_norm_backward.<locals>.<listcomp>r   r   r   TF)r   r=   r3   r  r   r   r   r?   rt  r>  r   rO   r   r   cloner  )!rw   r   r  r   r  r   r  r  input_shapeZ
input_ndimgrad_out_cast
input_castweight_castZ	bias_castaxisZ
inner_dimsZ
outer_dimsZinner_dim_indicesZouter_dim_indicesr}  r  MZx_hatZ
grad_x_hatabr  r  r  r9   r  Zd_weightr  r   r.   r    native_layer_norm_backward  st    








r  )	r   r   r  running_meanrunning_varr   momentumrJ  rW   c                 C   s  dgt td|   }t| j}	|rt| ||\}
}}t||}t||}|d k	rp||| d| |   |d k	r| 	 | j
d  }tj| |dd||d   }||| d| |   n|d k	r|d k	st|j|	dd}|j|	dd}|}dt||  }| jjdkr |}|}n| d	}| d	}t||  d }t||  d }| | | }
|d krx| d
}|d kr| d
}t||  d }t||  d }|
| | }
| jjdkr|j| jd}|j| jd}|
j| jd||fS )Nr   r   r   F)r  T)r   copycpur   r   r   )r   r?   r=   r3   r  r   r  r  copy_r   r   rO   r  r   r,   r   r  typer   rB   rx  )r   r   r  r  r  r   r  rJ  r  r/   r   r   r  	save_meanZ	save_rstdnZunbiased_varinvstdr   r   r    native_batch_norm!  sL    








r  c                 C   s6   t | |k jt jd}|| |  d|  }||fS )Nr   rM   )rO   rR  r,   uint8rH  )r   r  	generatorrG  rT  r   r   r    _fused_dropout_decomposition^  s    r  )r   layoutr  
pin_memorynon_blockingr   )r'   r   r  r  r  r   c          	      C   s   |r|t jkstd|r"td|d k	s>|d k	s>|d k	s>td}|d k	r||  kr|d k	rz|jdkrzt j| |} d}t j| |} |d k	r|st j| |} |d k	rt j| |d}|	|  |S | S )NZTODOFr  Tr   )
rO   Zstridedr   Z
get_devicer  Z_primsZconvert_element_typeZ
device_putZ
empty_liker  )	r'   r   r  r  r  r  r   Zdtype_convertedr   r   r   r    _to_copyf  s     
r  )rS   rh  rW   c                 C   s<   t t | | t | t | dkt | d| t | S )Nr   )r   rP   isnanr   r   )rS   rh  r   r   r    xlogy  s    
r  r'   r=   
correctionr   c                 C   s   |d krg n|}|   rP| j}tj||||d}| j}tj||||d}|| S |d kr\d}t|dkrtt| j}	nd}	|D ]}
|	| j|
 9 }	q|t| |d}| | }|| }t	|||}|r|	| }	||	 S )Nr  r   r   r   T)
re  realrO   r  imagr   r>  r   r   r   )r'   r=   r  r   dimsZreal_inZvar_realZimag_inZvar_imagr  r  r   rl  sqr   r   r   r    var_correction  s*    r  c                 C   s   t t j| |||dS )Nr  )rO   r   r  r  r   r   r    std_decomposition  s    r  c                 C   s
   t | S r*   )r   aliasr-   r   r   r    nop_decomposition  s    r  )r   r   r  r  r  r   exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r:||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )r   r  r   rO   r  )r   r   r  r  r  r   r  r  r  r  r#  r   r   r    cudnn_batch_norm  s"    
r  c                 C   sD   t |D ]6\}}|dkr|| jk r4| j| || ks| |} q| S rF   )	enumerater   r   r@   )r'   broadcast_maskr  rG  r   r   r    _broadcast_batch_norm_backward  s    $r  )rw   r   r   r  r  r  save_invstdrQ  rJ  r  rW   c
           %         s  |j }
t|j   fdd| ||||||fD \}}}}}}}|j}| }|dks^tdd}tt|||  }|}|}|r|d k	r|d k	stn&|d k	r|d k	st|}t	|| }dg| }|| ||< g }t
|D ]}||kr|| qt||}d| }t||}t|||  |}t|| |}tt|| || |}|d krlt||d } nt|| |} |r|| | }!||! | |  }"n||  }"|	d r|| }#nd }#|	d r|}$nd }$|"|
t|#|
t|$|
fS )Nc                    s"   g | ]}|d k	r|  n|qS r*   r  r%   r.   r   r    r(     s   z.native_batch_norm_backward.<locals>.<listcomp>r   z$rank of the input must be at least 2r   rM   )r   r3   r  r   r=   r   r>  r   rO   r  r?   rt  r  r   r   r,   r  )%rw   r   r   r  r  r  r  rQ  rJ  r  r   r  r  r  Zrunning_mean_castZrunning_var_castZsave_mean_castZsave_invstd_castr  Z
input_rankr  Znum_featuresr   r  r  Zreduction_axesr}  r   Zgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojr   ry  Z	grad_biasr   r.   r    native_batch_norm_backward  s    	



 


r  	r   r^   r   r  r  r  Zsave_varr  ZreserveSpacec	           	      C   s"   t || |||||d|dddg
S )NT)r   r  r  r   r   r    cudnn_batch_norm_backwardT  s    r  )r   r)  c                    s  | j  | jttdkfdd | jdd  D ]}t|dkfdd q:d |d  dkrԈd |d  dkrtdd	 tdd  |D }td
d	 tdd  ||D }tjj	
| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt|d|
f }|	sV|sVtj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]B\}}|d kr|d|d d |f }n||d|d d |f  }q|||  S )Nr  c                      s
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r   r   r   r   r    r
  w  r  z%adaptive_avg_pool2d.<locals>.<lambda>r   r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r8  r  r   r  r   r    r
  |  r  r>   c                 s   s   | ]\}}|| V  qd S r*   r   )r&   r}  r;  r   r   r    r    s     z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s$   | ]\}}}||d  |  V  qdS )r   Nr   )r&   r}  r;  r  r   r   r    r    s    c                 S   s   t j| | |ddS )NtruncZrounding_moderO   divr  r  r#  r   r   r    start_index  s    z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr   r  r  r  r  r   r   r    	end_index  s    z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkpD|| dk }|rV|d7 }n|dkrf|d8 }t j| t jd}|d| }|rt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )N)r  r   r   r   r>   r  )rO   r  r  r@   Zscalar_tensorr   r  minimum)in_sizeout_sizeZorangeZi0Z	maxlengthZin_size_modadaptive	range_maxrD  maxvali1r  )r  r  r  r   r    compute_idx  s,    
  
z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r{   r>   r   c                 S   sd   t |tr| |fS |dk st||dk}|dkr>t|d}t| |d} t|| }| |fS d S )Nr   r>   r   r   ro   )r$   rf  r   r@   rB   rO   masked_fill)valsr  r  r  r=   rG  r   r   r    
maybe_mask  s    

z'adaptive_avg_pool2d.<locals>.maybe_mask)r  r=   r{   )r  r   r   r3   r  r  r+  rO   nn
functionalZ
avg_pool2drB   r   r   r?   )r   r)  r  r   kernelr  ZidxhZlength_hZrange_max_hZ
adaptive_hZidxwZlength_wZrange_max_wZ
adaptive_wr  r  retr}  jr   )r  r  r   r   r  r    adaptive_avg_pool2dn  s^    

(      
    
&
r  rj  )r'   r=   r   tensorrT   c                   s   t | j|}t jdkfdd  dkrft | jt t t  fdd |  }td f| f }t	j
jj| ||dd | S )Nr   c                      s   d j  dS )Nz(Index should have dimension 1 or 0 (got r   r  r   )r   r   r    r
    r  zindex_add_.<locals>.<lambda>c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r  r   )rT   python_typer   r    r
    r  Tr<  )r3   canonicalize_dimsr   r  Zdtype_to_typer   Zis_weakly_lesser_typer  slicerO   r?  r   Z
index_put_)r'   r=   r   r  rT   rD  r   )rT   r   r  r    
index_add_  s    	
r  )rS   r  rW   c                 C   sN   |   }t||}t|ts"tt|d ddD ]}||kr2| |} q2| S )Nr   r>   )r=   r3   r  r$   r  r   r?   r   )rS   r  r   Zwrapped_dimsrD  r   r   r    r    s    r  )rS   r=   r   rW   c                 C   s   |   dkr$tt| || S tj| |dd}|r<|nt||}t|| t	dkd}tt| | ||}| 
|S )Nr   TrX  inf)r   rO   r   rN   r   r]  r  r  r   rM  add)rS   r=   r   ZmaxesZmaxes_squeezedr   r   r   r    	logsumexp  s      r  r   r   c                 C   sL   t | d| }t t |  }| jr6| d}n|}|t | |fS )Nr   r  )rO   r  r   rN   r   Zis_cudar   )rS   rf   rQ   r   r   r   r    log_sigmoid_forward	  s    r  rS   r  r=   r   r   c                 C   s"   |d krd}t jj| ||||dS )Nr   r   )rO   ZlinalgZvector_normr   r   r   r    r     s    
r   )r   r)  align_cornersscale_factorsrW   c           "      C   sp  | j \}}}}|d k	r0t|d }t|d }	n |d k	rP||d  }||d  }	|dkr||rr|d t|d  }
q|| }
nd}
|	dkr|r|d t|	d  }q||	 }nd}tjt|| j| jd}tjt|	| j| jd}|r|
| }|| }n0|
|d  d jdd}||d  d jdd}t|	tj
}t|j|d d	tj
}t|	tj
}t|j|d d	tj
}|d}|d}|d}| d d d d ||f }| d d d d ||f }| d d d d ||f }| d d d d ||f }|| }d| }|| }d| }t||t|| }t||t|| } t||t| | }!|!S )	Nr   r   ro   r  r|   re   rh   rM   )r   rM  rf  rO   r  r   r  rk   floorr,   r  ceilr@   r   )"r   r)  r  r  Zn_batchZ
n_channelsZin_hZin_wrB  rC  Zh_scale_factorZw_scale_factorr}  r  r'   rE   Zx_floorZx_ceilZy_floorZy_ceilZx_viewZx_floor_viewZx_ceil_viewZv1Zv2Zv3Zv4Zxscale2Zxscale1Zyscale2Zyscale1Zq1Zq2r   r   r   r    upsample_bilinear2d_vec%  sV    	





r  )r  r  rW   c                 C   s   | j |j kS r*   r  )r  r  r   r   r    is_same_sizem  s    r  c                 G   s   t | |S r*   )r   ru  )r'   r   r5   r   r   r    _reshape_aliasr  s    r  )rS   r   r   r   r   rW   c                 C   s^  |   dkr|   dks td|  dks4td|   dkoJ|  dk}|s~| jd |jd ks~td| j d|j d| jd	 }|d ks|  dkr| |kstd
| d|j |   }d}|dk rd}|d k	r|dkr|dn|}	| |	 } ||}
t| ||
| }|dkr8t||k|d}|t	j
jkrd|dkrd| dd}||fS |d k	r|dkr|d| jn|}	t|	||
|}|dkrt||k|d}| }n2|dkr||k | }n| dd|  }|t	jjkr| }nD|t	jjkrV|d krJ|dkr@| | n| }n| | }||fS )Nr   r   r   r   r   r   r   r   r>   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: r   ro   rM   )r=   r   r   r   r@   rO   Zgatherr   rP   r   r   r   r   expandr   r,   r   r   r   )rS   r   r   r   r   r   Z	n_classesZn_dimsr   wZtarget_r   r   Zwsumr   r   r    nll_loss_forwardw  sb     






 




 r
  )r'   ArW   c                 C   s    |d |  |d  |  |  d S )Nr   rd   r   r   r'   r  r   r   r    _upsample_cubic_convolution1  s    r  c                 C   s(   ||  d|  |  d|  |  d|  S )Nr*     r   r   r  r   r   r    _upsample_cubic_convolution2  s    r  )trW   c                 C   s4   d}t | d |t| |td|  |t d|  |fS )Ng      rM   r   )r  r  )r  r  r   r   r     _upsample_get_cubic_coefficients  s    r  )coeffstsrW   c                 C   s    t |}tdd t| |D S )Nc                 s   s   | ]\}}|| V  qd S r*   r   )r&   r  r  r   r   r    r    s     z+_upsample_cubic_interp1d.<locals>.<genexpr>)r  _sum_tensorsr+  )r  r  Zcoeffs2r   r   r    _upsample_cubic_interp1d  s    r  )r  rW   c                 C   s   t tj| S r*   )r   rO   r  )r  r   r   r    r    s    r  )r  gridinterpolation_modepadding_moder  rW   c                    s  t dkfdd t dkfdd tttdfddttttdd	d
tttdfddtttdfdd}j\}
|j\}}tttd
fddtjjddddtj|jdd|dd tttt	dfddtttd fdd|d }|d }	dkr||}
||	
}|

 |
  d  }}d  }}|| }}||
 ||  }|
| ||  }||
 ||  }|
 |  }tfdd|f|||f|||f|||ffD S dkrH||}
||	
}|
 }| }||dS |}
|	
}|

 |
 |
 | }tttd
fdd 	ttd!	fd"d#tfd$dtd%D }t||dS d S )&N)r   r   r   c                      s
   d  S )NzInvalid interpolation mode r   r   )r  r   r    r
    r  z!grid_sampler_2d.<locals>.<lambda>c                      s
   d  S )NzInvalid padding mode r   r   )r  r   r    r
    r  )coordsr   rW   c                    s0    r|d d n|d }|d d }| | | S Nr|   r   )r  r   r   ofs)r  r   r    unnormalize  s    z$grid_sampler_2d.<locals>.unnormalize)r  	twice_low
twice_highrW   c                 S   sv   ||krt | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr   r   r   r   )rO   r   r   fmodr  r,   Zint8rP   )r  r  r  Z
coords_minZcoords_spanZcoords2extraZflipsr   r   r    reflect_coordinates  s    

  
z,grid_sampler_2d.<locals>.reflect_coordinatesc                    sj   dkr| S dkr&t | d|d S  r@| dd|d  }n| dd| d }t |d|d S d S )Nr   r   r   r>   rj   )r  r   Zcoords_reflected)r  r  r!  r   r    compute_coordinates  s    z,grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r*   r   )r  r   Z	coords_un)r"  r  r   r    compute_source_index  s    
z-grid_sampler_2d.<locals>.compute_source_index)xsysrW   c                    s,   t d| kt | k t d|k| k S ra   )rO   rL  )r$  r%  )iHiWr   r    in_bounds_cond  s     z'grid_sampler_2d.<locals>.in_bounds_condr  r   )r$  r%  wsrW   c                    s@   | | t  fdd| jtjd|jtjd|fD S )Nc                 3   s(   | ] }t |d  dV  qdS )r   r   N)rO   rP   ru  )r&   r  )r  r  oHoWr   r    r  $  s   z0grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r  r,   rO   r  )r$  r%  r)  )r  r(  r*  r+  )r  r    clip  s    
zgrid_sampler_2d.<locals>.clip)ixiyrW   c                    s&   | ||\}}} ||f | S r*   r   )r-  r.  r	  Zidx_xZidx_yZw_)C_idxN_idxr  r,  r   r    get_summand)  s    z$grid_sampler_2d.<locals>.get_summand).r   ).r   r   c                 3   s    | ]\}}} |||V  qd S r*   r   )r&   r-  r.  r	  )r1  r   r    r  ?  s   z"grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rF   r   )r-  r.  r'   rE   )r"  r1  r&  r'  r   r    get_value_boundedZ  s    

z*grid_sampler_2d.<locals>.get_value_bounded)r  rW   c                    sL   | d  } d | | d | d |f}t |dS )Nr   r   )r  r@   )r  Ziy_ofscs)r2  ix_nwiy_nwtxr   r    	get_coeff_  s    z"grid_sampler_2d.<locals>.get_coeffc                 3   s   | ]} |V  qd S r*   r   )r&   r  )r7  r   r    r  i  s     r   )r3   r  r   rf  r   rO   r  r  ru  r   r  r  roundr  r?   r  r@   )r  r  r  r  r  r#  r  rA   r'   rE   r-  r.  Zix_neZiy_neZix_swZiy_swZix_seZiy_seZw_nwZw_neZw_swZw_seZ
ix_nearestZ
iy_nearesttyr  r   )r/  r  r0  r  r  r,  r"  r7  r1  r2  r&  r'  r(  r  r4  r5  r*  r+  r  r!  r6  r  r    grid_sampler_2d  sn    	
 




	





r:  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr   r   c                      s   d    d   S )Nzmatrix @ vector expected, got r  r   r   rS   vecr   r    r
  r  r  zmv.<locals>.<lambda>r   c                      s(   d  d d  d d d S )Nzsize mismatch, got r   r'   r   ,)r   r   r;  r   r    r
  v  r  r   )r3   r  r=   r   r   r;  r   r;  r    mvm  s    r>  c                    s     rZ rB  r0t    S t  S n  rZt  S t dkot  dk fdd tj	 j	k fdd  fdd}t
  
 k|    S )Nr   c                      s   d   d    dS )Nz1D tensors expected, but got zD and z	D tensorsr   r   rh  rS   r   r    r
    r  zdot.<locals>.<lambda>c                      s   dj  d j  S )Nz:dot : expected both vectors to have same dtype, but found  and r   r   r?  r   r    r
    r  c                	      s.   d   d    d   d    d	S )Nz+inconsistent tensor size, expected tensor [z] and src [z.] to have thesame number of elements, but got r@  z elements respectivelyr  r   r?  r   r    numel_error  s    ,zdot.<locals>.numel_error)re  Zis_conjrO   dotZconjZvdotr3   r  r=   r   r   r   )rS   rh  rA  r   r?  r    rB  {  s$    
rB  c                 C   s   |   d}|d k	rV|d | d }d| |  ||  |  |    |   }n,d| |  | |  |  |     }|d k	r|| }t||S rX   )r   rN   r   r   )rS   r   r   Z
pos_weightr   rs   Z
log_weightr   r   r   r     binary_cross_entropy_with_logits  s    
rC  )rb  dim_tensor2rW   c                 C   sr   | j }|dkrj|dks|dkrj| j}|  }|dkrd|dkrd|d dkrd|d |d |d  krddS dS ndS d S )Nrd   r   r   r>   r   FT)r   r   r   )rb  rD  dim_tensor1Zt1_sizes_ptrZ
t1_stridesr   r   r    should_fold  s    
rF  c                 C   s  |   }|  }|dkr |dks$t|dkr@|dkr@t| |S |dkr\|dkr\t| |S |dkr|dkrttt| d|dS |dkr|dkrt| |S t| |st||r||k}|r|j	n| }|s|n|dkr| 
 n| }|j}t|d d }ttj|}	|  dk}
|
r0||jd  | }||	|d }|
rr|||}|rn|j	 S |S |||S nN|dkr|dkr|dkr| dnd}| d}| jd d }|dkr|dn|d}|dkr|dnd}g }t|d D ]}||| qtt||}|||g }|||g }t|}| | |||}|| |||}|}|dkr|| |dkr|| |||S tddd  d S )	Nr   r   r   r>   r   Fc                   S   s   dS )Nz/both arguments to matmul need to be at least 1Dr   r   r   r   r    r
  /	  r  zmatmul.<locals>.<lambda>)r=   r   rO   rB  r>  r   r  r@   rF  r   r  r   r   r   operatorr   rt  rY  ru  r   r?   Zbroadcast_shapesr>  r  Zbmmr3   r  )rb  rc  rE  rD  Z	transposet1t2Zsizes_1Zoutput_shapeZfolded_dim1Zt2_is_matrixZ	t1_foldedr   r  m1Zbatch_tensor1m2r  Zbatch_tensor2r}  Zexpand_batch_portionZtensor1_expand_sizeZtensor2_expand_sizeZexpand_batch_productZtensor1_expandedZtensor2_expandedr   r   r    r     s|    		

    



r   )r  r)  r  scale_hscale_wrW   c                    s  j \}}|\}}ddd}	dd }
|	|||}|	|||}tj|jd|dddtj|jdd|dd tj|jddd|df}tj|jdddd|f}|
|||}| }|| |jtjd}|
|||}| }|| }|jtjd}|d ||d |d f}|d ||d |d f fd	d
fddtfdd|D }t	||S )Nc                 S   sD   |r |dkr| d |d  S dS |d k	r8|dkr8d| S | | S d S )Nr   r   r   )r  r  r  rU   r   r   r    compute_scale?	  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   s    |r| | S | |d  d S d S r  r   )rU   Z	dst_indexr  r   r   r    r#  E	  s    z8upsample_bicubic2d_default.<locals>.compute_source_indexr  r   r   r   c                    s4   t | dd }t |dd } ||f S rX   rj   )r%  r$  Zy_idxZx_idx)r/  r0  r  r&  r'  r   r    load_bounded`	  s    z0upsample_bicubic2d_default.<locals>.load_boundedc                    s"   t  fddD }t|S )Nc                 3   s   | ]} |V  qd S r*   r   )r&   Zx_ofs)rO  rE   r   r    r  f	  s     zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>)r  r  )rE   Zcoeffs_x)ixs_ofsrO  t_x)rE   r    get_x_interpe	  s    z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   s   | ]} |V  qd S r*   r   )r&   Zy_ofs)rR  r   r    r  i	  s     z-upsample_bicubic2d_default.<locals>.<genexpr>)N)
r   rO   r  r  ru  r  r,   r  r  r  )r  r)  r  rL  rM  r  r  r*  r+  rN  r#  Zheight_scaleZwidth_scaleZout_yZout_xZreal_xZin_xr-  Zreal_yZin_yZt_yr.  Ziys_ofsZcoeffs_yr   )	r/  r0  r  rR  r&  r'  rP  rO  rQ  r    upsample_bicubic2d_default2	  s0    

rS  )r  r)  r  r  rW   c                 C   s   t t|t| dkdd  |d krd|d k	s4tttttf tdd t| j	dd  |D }|rl|nd\}}t
| ||||S )Nr   c                   S   s   dS )Nz:Must specify exactly one of output_size and scale_factors.r   r   r   r   r    r
  x	  r  z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s   | ]\}}t || V  qd S r*   )rf  )r&   r	  rU   r   r   r    r  ~	  s     z)upsample_bicubic2d_vec.<locals>.<genexpr>r   )NN)r3   r  rS  r   r   r   rf  r  r+  r   rS  )r  r)  r  r  rL  rM  r   r   r    upsample_bicubic2d_vecm	  s    	
 rT  c                    s   t |  fdd}|S )Nc                     s    | |}| d  |S ra   )r  )r5   r6   r   outplace_opr   r    
inplace_op	  s    
z$register_inplace.<locals>.inplace_opr   )Zaten_oprV  rW  r   rU  r    register_inplace	  s    rX  )F)r   r   r   )r   )r   NNr   )N)r   )r   )r   )r>   FF)r   )r   )r   r   )N)NF)NF)F)NNFN)r   r   F)NN)N)r:   rG  r   enumr   r   r   	itertoolsr   typingr   r   r   r	   r
   r   rO   Ztorch._prims_commonZ_prims_commonr3   Ztorch.nn.functionalr  r  r   r   Ztorch._decompr   r   r   r   Ztorch._prims_common.wrappersr   Ztorch.utils._pytreer   r   Z_CZDispatchKeyr   str__annotations__r?  r   r   r[  rS  r<   r\  Zcompute_only_pw_cast_for_opmathZpw_cast_for_opmathZCOMPLEX_TO_FLOATZreduction_complex_to_realZINT_TO_FLOATZpw_cast_for_int_to_realrf  rB   rH   rI   rR   rM  r]   rb   rm   rq   ru   ry   rz   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r  r  r6  rE  rI  rP  defaultrU  r_  r`  rg  Zrsubrn  ZScalarro  rv  r|  r>  r  r  r  r  r  r  r  r  r  Z_fused_dropoutr  r  r  r   r  r  r  r  Zstdr  detachZliftZ
lift_freshr  r  r  r  r  Z_adaptive_avg_pool2dr  r  r  r  r  r   Zupsample_bilinear2dr<  r  r  r  Z_unsafe_viewr
  r  r  r  r  r  r:  r>  rB  rC  rF  r   Zpy_implZCompositeImplicitAutogradZupsample_bicubic2drS  rT  rX  Zadd_r  Zsub_rl  Zmul_r   Zrelu_ZreluZ	hardtanh_ZhardtanhZ
hardswish_Zleaky_relu_Z
leaky_reluZsilu_r   r   r   r    <module>   s    $                  

	         !*!	
   3         
   	N`
  	

    VP<

 
&
 

_f

 
   

F

G
     d
 
8
	