U
    Kc                      @   s  U d dl Z d dlmZmZ d dlmZ d dlmZ d dlm  m	Z
 d dlm  mZ d dlm  m  mZ d dlm  m  m  mZ d dlm  m  mZ d dlm  m  m  mZ d dlm  m  m  mZ d dlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z% dd	l m&Z& dd
l'm(Z( ddl)m*Z* ddl m+Z+ d dl,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 d dl6Z6e j7j8jj9dge j7j8jj:dge j7j8jj;dge j7j8jj<ddddgiZ=dd Z>dd Z?dd Z@dd ZAdd ZBdd ZCdd  ZDd!d" ZEd#d$ ZFe jGjHje-eIe(f d%d&d'ZJejKejKejLejLejMejMejNejNiZOe-e/ejP e/e f eQd(< ejKejKejRejRejSejSejTejTejUejUiZVe-e/ejP e/ejP f eQd)< ejWejWejXejXiZYe-e/ejP e/ejP f eQd*< ejZejZej[ej[ej\ej\ej]ej]ej^ej^ej_ej_ej`ej`ejaejaejbejbejcejcejdejdejeejeejfejfejgejgejhejhejiejiiZjejkejKejkfejlejLejlfejmejMejmfejnejNejnfiZoe-e/ejP e.e/ejP e/e f f eQd+< ejkejKejkfiZpe-e/ejP e.e/ejP e/ejP f f eQd,< e
jqe j8jjqe j8jjrfe
jse j8jjse j8jjtfe
jue j8jjue j8jjvfe
jwe j8jjwe j8jjxfiZye-e1e.e1e1f f eQd-< e j7j8jjze j7j8jj{e j7j8jj|e j7j8jj}e j7j8jj~hZe4e1 eQd.< e
jqe je jfe j8jje j8jjfe je jfe j8jje j8jjfie
jse je jfe j8jjdfie
jue je jfe j8jjdfie
jwe je jfe j8jjdfiiZe-e1e-e.e je jf e.e1e5e1 f f f eQd/< e
jse
jue
jwhZe4e1 eQd0< e6je j8jje je j8jje6je j8jje je j8jje je j8jjiZe-e3e1eIf e1f eQd1< e6je j8jje je j8jje6je j8jje je j8jjiZe-e3e1eIf e1f eQd2< ee-eIe.eIef f ed3d4d5Zee-eIejPf e5ejP d6d7d8Zee-eIejPf e-eIe(f e0e1 e0e e3e.eeef e.d9 f d:d;d<Zee-eIe(f d=d>d?Zed@dAdBZed@dCdDZee-eIe(f d=dEdFZee-eIe(f d=dGdHZee-eIe(f d=dIdJZed@dKdLZed@dMdNZee-eIe(f e-eIe.eIef f edOdPdQZdS )R    N)map_argNode)Graph)WeightedQuantizedModule   )QuantizedGraphModule)collect_producer_nodesget_linear_prepack_op_for_dtypeget_new_attr_name_with_prefixget_qconv_prepack_op graph_module_from_producer_nodes   )_parent_name)
QConfigAny)get_quantized_operator)'create_node_from_old_node_preserve_meta)	DictTupleTypeListCallableAnyUnionSetOptionalZinplaceZrunning_meanZrunning_varZuse_input_statsZmomentumc                 C   sR   | j dko| j|k}| j dko&| j|k}| j dkoFt|t| j |k}|||fS )Ncall_functioncall_methodcall_module)optargettypestr)nodemodules	func_listmethod_listmodule_type_listis_call_functionis_call_methodis_call_module r*   U/tmp/pip-unpacked-wheel-gikjz4vx/torch/ao/quantization/fx/_lower_to_native_backend.py_is_node_in_list$   s     r,   c                 C   sX   t jjjt jjjt jt jg}ddddddg}t jjt jjt jjt jj	g}t
| ||||S )NhardsigmoidZhardsigmoid_sigmoidZsigmoid_tanhZtanh_)torchnn
functionalr-   r.   r/   ZHardsigmoidZSigmoidZTanhSoftmaxr,   r"   r#   r$   r%   r&   r*   r*   r+   is_fixed_qparams_node*   s$    	r5   c                 C   s   t jjjt jjjt jjjt jjjt jjjt jjjg}g }t	j
t	jt jjt jjt jjt jjt jjt jjt jjt jjt jjt jjt jjt jjjt jjjg}t| ||||S N)r0   r1   r2   elu	hardswishinstance_normZ
layer_normZ
leaky_reludropoutnnqrConvTranspose1dConvTranspose2dELU	LeakyReLU	HardswishInstanceNorm1dInstanceNorm2dInstanceNorm3d	LayerNormDropoutPReLUBatchNorm2dBatchNorm3d	intrinsicBNReLU2dBNReLU3dr,   r4   r*   r*   r+   is_default_nodeA   s2    rL   c                 C   s   t jt jjjt jjjt jjjt jjjt jjjt jjj	t jjj
t jjjt jjjt jjjt jt jjjt jjjt jt jt jtjg}ddddg}t jjt jjt jjt jjt jjt jjt jjt jjt jj t jj!t jj"t jj#g}t$| ||||S )NclampmeanreluZrelu_)%r0   Zadaptive_avg_pool1dr1   r2   Zadaptive_avg_pool2dZadaptive_avg_pool3dZhardtanhZ	hardtanh_ZinterpolateZ
max_pool1dZ
max_pool2dZ
max_pool3drO   Zrelu6Z
avg_pool1dZ_CZ_nnZ
avg_pool2dZ
avg_pool3drM   flattenrN   operatorfloordivZAdaptiveAvgPool1dZAdaptiveAvgPool2dZAdaptiveAvgPool3dZ	AvgPool1dZ	AvgPool2dZ	AvgPool3dZHardtanhZ	MaxPool1dZ	MaxPool2dZ	MaxPool3dReLUZReLU6r,   r4   r*   r*   r+   is_copy_node^   sL    rT   c                 C   sV   t jt jt jt jt jg}ddddddddd	d
ddddddg}t jjg}t| ||||S )N
contiguousdetachZdetach_Zpermuterepeatrepeat_interleaveZreshapeZresize_shapesizesqueezeZsqueeze_	transpose	unsqueezeZ
unsqueeze_view)	r0   r\   rX   r[   stackr]   r1   ZIdentityr,   r4   r*   r*   r+   is_general_tensor_shape_node   s4    r`   c                 C   s    t jg}g }g }t| ||||S r6   )r0   catr,   r4   r*   r*   r+   is_other_node   s
    rb   c           	      C   sP   d\}}}t ttttfD ],}|| |\}}}|p2|}|p:|}|pB|}q|||fS )N)FFF)r5   rL   rT   r`   rb   )	r"   r#   Zres_functionZ
res_methodZ
res_modulecheckerr'   r(   r)   r*   r*   r+   is_special_pattern_node   s    

rd   c                 C   s   t | to| jdko| jdkS )Nr   
dequantize)
isinstancer   r   r   r"   r*   r*   r+   is_dequantize_node   s    rh   c                 C   s"   | j dko | jtko | jd dkS )Nr   r   )rY   )r   r   getattrargsrg   r*   r*   r+   is_getattr_tensor_metadata_node   s
    
rk   r   qconfig_mapc                 C   s   | j |ko|| j  dkS )a  
    Return True if the op is configured with a None qconfig, False otherwise.
    Note: maybe need to generalize this to also check for the dtype, and we
    only lower when dtype matches, but right now fbgemm/qnnpack only support
    a single dtype, so it is OK for now.
    Nnamerl   r*   r*   r+   should_skip_lowering   s    rp   STATIC_LOWER_MODULE_MAPDYNAMIC_LOWER_MODULE_MAPWEIGHT_ONLY_LOWER_MODULE_MAPSTATIC_LOWER_FUSED_MODULE_MAPDYNAMIC_LOWER_FUSED_MODULE_MAPSTATIC_LOWER_FUNCTIONAL_MAPWEIGHT_PREPACK_OPSDYNAMIC_LOWER_FUNCTIONAL_MAPCONV_FUNCTIONAL_OPSQBIN_OP_MAPPINGQBIN_RELU_OP_MAPPING)	quantizednode_name_to_scopereturnc                    s8  i }i }| j jD ]V}|jdkr|jtkrt|}|dk	r|D ]}|||j< q<t| |}| }|||j< qt }	i   fdd}
| }| j }|jD ]}|	|jd}||kr||j }t
|jd }||j \}}t|d }||}t||| |	d|di  |j< q|dk	rqq|	||
 |j< qt||	|jS )	z
    Trace back from the weight node util we hit getattr, reconstruct the
    graph module with the traced nodes and run the graph module to pack the
    weight. then replace the original chain of ops with the packed weight.
    r   Nc                    s   t |  fddS )Nc                    s
    | j  S r6   rn   rg   envr*   r+   <lambda>c      z/fold_weight.<locals>.load_arg.<locals>.<lambda>)r   )ar   r*   r+   load_argb  s    zfold_weight.<locals>.load_argr   Z_packed_weight_Zget_attrr*   )graphnodesr   r   rw   r   ro   r   r   getlistZusersr
   setattrcreate_nodeZ	node_copyr   Zpreserved_attr_names)r|   r}   Zpacked_weightsZfolded_nodesr"   Znodes_to_foldZnode_to_foldZprepacking_modulepacked_weightZfolded_graphr   Zquantized_rootZquantized_graphZprepack_nodeZop_nodemodule_path_Zget_new_packed_weight_nameZpacked_weight_namer*   r   r+   fold_weightE  sN    	 



   
r   )r"   r#   r~   c                 C   s.   | j dkr&t| j|kr&|t| j S dS dS )z
    Return the `torch.nn.Module` that corresponds to the specified node's target.
    If no such node exists, return None.
    r   N)r   r!   r   )r"   r#   r*   r*   r+   _get_module}  s    r   NNN)r"   r#   rm   matching_modules_or_opsdequantize_node_arg_indicesr~   c                 C   sj  d}| j dks| jtjkr|S | }|jd }t|ts:t|j dkrV|jtj	tj	fkst|j dkrt
t||tjkr|}|jd }t|tstnd}t||r|S t|d t
rt|d tjrd}	t
t||}
n
d}	|j}
|j |	ks|
|kr|S d}|D ]V}|t|jk s(td|t|jf |j| }t|rBd}qt|tr|  S q|s`|S |||fS )	aS  
    Match the pattern (dequantize - ref node - quantize) against the node provided.

    If there is a match, return a 3-tuple of:
      1) q_node: the quantize node,
      2) relu_node: a relu node wrapping the ref_node, and
      3) ref_node: a reference module or functional node to replace with its quantized counterpart
    Otherwise, if there is no match, return a 3-tuple of (None, None, None).

    Parameters:
      node: The `torch.fx.Node` to match against.
      modules: A mapping from node names to modules in the model graph, used for module lookup.
      qconfig_map: A mapping from node names to the qconfigs associated with the nodes.
          If the corresponding qconfig for the reference node is None, then return no match.
      matching_modules_or_ops: Either a list of functions or a list of `torch.nn.Module`s.
          If the reference node is not in this list, then return no match.
      dequantize_node_arg_indices: A list of indices in the reference node args where dequantize
          nodes may be present. An empty list means skipping the check for dequantize nodes.
    r   r   r   r   NFz;Dequantize index %s exceeded reference node's arg length %sT)r   r   r0   quantize_per_tensorrj   rf   r   AssertionErrorFrO   r    r   r1   rS   rp   
issubclassModulelenrh   )r"   r#   rm   r   r   ZSKIP_LOWERING_VALUEq_noderef_node	relu_nodeZexpected_opZ	match_keyZmatched_dequantizeiargr*   r*   r+   _match_static_pattern  sJ    






r   )modelrm   c                 C   s  t | jdd}t| jj}| jjD ]f}tt tt  }t||||dgd\}}}|dkrfq$|dk	srt	|j
\}	}
}}	t||}t|}t|
tst	t|tst	t|tjst	|tkrt| \}}t|d |krq$nt| }t| |
j}t| |j}||||}t|j\}}t|| || |j
d }t|tsLt	|||j
d  || | j| | j|
 | j| q$dS )z
    Traverse the graph and find dequantize - ref module - quantize patterns
    and replace them with the quantized version of the ref module.
    FZremove_duplicater   r   N)dictnamed_modulesr   r   r   rq   keysrt   r   r   rj   r   r    rf   r   r   r1   r   ri   r   from_referencer   r   replace_input_withreplace_all_uses_with
erase_node)r   rm   r#   r   nZmatching_modulesr   r   r   r   
scale_nodezero_point_node
ref_module	ref_classinner_ref_classq_classoutput_scaleoutput_zero_pointq_moduleparent_namemodule_namedq_noder*   r*   r+   !_lower_static_weighted_ref_module  sJ        


r   )r   c                 C   sV  t | jdd}| jjD ]6}|jdkst|t|j tt	
 tt
 krVq|}|jd }|jdks|jdkrzq|jd }|jdks|jtjkrq|jd }|tjk}|tjtjfk}|s|sq|t|j }	t|	}
|
tkrt|
 \}}t|	d |krqn
t	|
}||	}t|j\}}t|| || |||jd  qd	S )
z
    Traverse the graph and find quantize_per_tensor_dynamic - dequantize - ref_module patterns
    and replace them with the dynamically quantized version of the ref module.
    Fr   r   r   r   re   r   r   N)r   r   r   r   r   r    r!   r   setrr   r   unionru   rj   r0   quantize_per_tensor_dynamicfloat16quint8qint8r   r   r   r   r   )r   r   r   r   r   input_dynamic_q_nodeactivation_compute_dtypeis_fp16is_int8r   r   r   r   r   r   r   r*   r*   r+   "_lower_dynamic_weighted_ref_module  sF    










r   c           
      C   s   t | jdd}| jjD ]z}|jdkst|t|j tt	
 krFq|}|t|j }t|}t	|}||}t|j\}}	t|| |	| qdS )z
    Traverse the graph and find ref_module patterns
    and replace them with the weight only quantized version of the ref module.
    Fr   r   N)r   r   r   r   r   r    r!   r   r   rs   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   r*   r*   r+   &_lower_weight_only_weighted_ref_module.  s    



r   c              
   C   s  t | jdd}t| jj}| jjD ]}tt }t||||ddgd\}}}|dkr\q$|dk	sht|j	\}	}
}}	|j	^}}}t
|tstt
|tstt
|tst|j	d }t
|tst|jdks$|jtjtjfkrq$|g| }|jtjkr|j	d }t|}nl|jtkrlt|j}|jtjkrzd	D ]2}t||kr6t
|| tr6|| f||< q6ntd
|j | j|
 | jd|t|i }W 5 Q R X t|j \}}|dk	r|n||_|j	d ||
|f|_	|| || | j| |dk	r$| j| q$dS )ze
    Traverse the graph and replace functional reference patterns with their quantized versions.
    Fr   r   r   r   Nr   r         %Lowering is not supported for op '%s') r   r   r   r   r   rv   r   r   r   rj   rf   r   r   r   r0   r   Zquantize_per_channelr   linearr	   ry   r   conv1dr   int
ValueErrorinserting_beforer   tupler   appendr   )r   rm   r#   r   r   Zmatching_opsr   r   	func_noder   Zoutput_scale_nodeZoutput_zp_nodeinput_dq_nodeweight_dq_noderemaining_func_argsquantized_weightprepack_argsweight_dtype
prepack_opr   r   q_funcq_relu_funcr*   r*   r+   %_lower_static_weighted_ref_functionalG  s\        





 

r   c              
   C   s  t | jdd}t| jj}t| jjD ]}|}|jdkrH|jtj	ksl|jdkr|t
|t|j tjjkr||}|jd }nd}t||rq(|jdks(|jtkrq(|j^}}}	|jdks(|jdks(|jdks(|jdkrq(|jd }
|
jdks(|
jtjkrq(d}|
j\}}}|tjk}|tjtjfk}|s2|s2q(|jd }|jd	 }||f}|t|j krvtd
| d|j  q(t|j | \}}|dks|dkrtd|j d|  q(|g|	 }|jtjkrt|}nl|jtkr4t|j}|jtjkrBdD ]2}t||krt|| tr|| f||< qntd|j | j | | j!d|t"|i }W 5 Q R X |dk	r~|n||_|r|||f|_n
||f|_|dk	r|#| |dk	r(| j$| q(dS )a&  
    Traverse the graph and replace functional reference patterns with their dynamically
    quantized versions.
    Examples:
    quantize_per_tensor_dynamic - dequantize - functional linear --> linear_dynamic
    to(torch.float16) - dequantize - functional linear --> linear_dynamic_fp16
    Fr   r   r   r   Nr   re   r   zDidn't find dtype combination z* during dynamic quantized op lowering for zLDidn't find corresponding quantized function or quantized relu function for z, r   r   )%r   r   r   r   r   reversedr   r   r   rO   r    r!   r0   r1   rS   rj   rp   rx   r   r   r   r   printr   r	   ry   r   r   r   rf   r   r   r   r   r   r   r   )r   rm   r#   r   r   r   r   r   r   r   r   Zreduce_range_nodeZpattern_inputr   r   r   r   r   Zdynamic_quant_dtype_keyr   r   r   r   r   r   r*   r*   r+   &_lower_dynamic_weighted_ref_functional  s    










 


r   c              
   C   s  t jtjt jtjtjg}t| jdd}| jjD ]N}t	||||ddgd\}}}|d kr\q0|d k	sht
|j\}}	}
}d}|jD ]>}t|sq|}t|tst
|jd }||| |d7 }q|dkst
|jtkst
|d krtnt}||j }t|j}|dkr||	|
g | j|* t| jd|t|i f|}|| W 5 Q R X | j| |d k	rt| j| | j| q0d S )NFr   r   r   r   r   r   )rQ   addr0   mulmatmulr   r   r   r   r   r   rj   rh   rf   r   r   r   rz   r{   r   extendinserting_afterr   r   r   r   )r   rm   Zbinary_ops_to_lowerr#   r   r   r   Zbop_noder   r   r   Znum_dq_nodesr   r   dn_inputZbinop_to_qbinopZqbin_opZqop_node_argsqop_noder*   r*   r+   _lower_quantized_binary_op  sR        






r   c               
   C   sR  t | jdd}| jjD ]2}|}|jtjk}|jdko^|jdko^t|j	dko^|j	d tj
k}|sj|sjq|j	d }t||\}}}	|r|s|s|	rqt||\}}}	|r|s|s|	rqt||\}}}	|	s|s|sq|j	d }
t|
tst|
ttfstd}t|
tr$|
jdko |
jdk}n"t|
ttfrFtd	d
 |
D }|sNq|	r||j }t|tkr|rtt|}|j	d }|j	d }t| |j}t| |j}||||}t|j\}}t|| || g }t|
tr|
g}nt|
ttfrt|
}|D ]}|j	d }||| qt|j	dd  }|j	d }|| | j| t||\}}}	|r(t|j}t|j	}t |j}|t krt | }|D ]}||kr|!| q|d |d< |d |d< | j"|d 6 t#| jd|t||f|}|| | j| W 5 Q R X q|D ]}t|tr,| j| q,q| S )NFr   r   tor   r   r   re   c                 s   s"   | ]}|j d ko|jdkV  qdS )r   re   N)r   r   ).0xr*   r*   r+   	<genexpr>C  s   z.special_pattern_replacement.<locals>.<genexpr>r   r   r   )$r   r   r   r   r   r0   r   r   r   rj   r   r5   rL   rd   rf   r   r   r   r   allr     SPECIAL_PATTERN_LOWER_MODULE_MAPr   ri   r   r   r   r   r   r   r   kwargsQOP_TO_ARG_NAMES_TO_SKIPpopr   r   ) r   r#   r   r   Zis_quantizeZ
is_to_fp16r   r'   r(   r)   Zdq_node_or_nodesZis_dequantizer   Zqmodule_clsr   r   r   r   Zqmoduler   r   Zdq_nodesr   r   Zqnode_qparamsZq_node_inputqoprj   r   Zargs_to_skipr   r   r*   r*   r+   special_pattern_replacement  s    













r   c                 C   sb   | j jD ]T}t|r|jd }|jdks|jdkr4qt|j}|jd jd |d< t||_qdS )z Modified the graph of the model inplace, to skip extra dequantize op before
    the general tensor shape ops when possible
    r   r   re   N)r   r   rk   rj   r   r   r   r   )r   r   Zmaybe_dqrj   r*   r*   r+    _lower_getattr_tensor_metadta_op  s    

r   )r   rm   r}   r~   c                 C   s|   t | | t|  t|  t| | t| | t| | t|  t|  | j	  t
| |} | j	  |   | j  | S )z Lower a quantized reference model (with reference quantized operator patterns)
    to the native backend in PyTorch (fbgemm/qnnpack), both backends shares the same
    operator signature so they can be lowered with the same function
    )r   r   r   r   r   r   r   r   r   Zeliminate_dead_coder   Z	recompileZlint)r   rm   r}   r*   r*   r+   _lower_to_native_backend  s    	







r   )r0   Ztorch.fxr   r   Ztorch.fx.graphr   Ztorch.nnr1   Ztorch.nn.functionalr2   r   Ztorch.nn.intrinsicrI   ZnniZtorch.nn.intrinsic.quantizedr|   ZnniqZ$torch.nn.intrinsic.quantized.dynamicZdynamicZnniqdZtorch.ao.nn.quantizedZaoZnnqZtorch.ao.nn.quantized.dynamicZnnqdZtorch.ao.nn.quantized.reference	referencer;   Z#torch.ao.nn.quantized.modules.utilsr   Zgraph_moduler   utilsr   r	   r
   r   r   r   Zqconfigr   Zquantization_mappingsr   r   typingr   r   r   r   r   r   r   r   r   rQ   Z_opsopsr8   r7   r:   r9   r   r,   r5   rL   rT   r`   rb   rd   rh   rk   Zfxr"   r!   rp   ZLinearZConv1dZConv2dZConv3drq   r   __annotations__ZGRUCellZLSTMCellZRNNCellZLSTMrr   Z	EmbeddingZEmbeddingBagrs   rG   rH   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   r3   rF   rJ   rK   r   Z
LinearReLUZ
ConvReLU1dZ
ConvReLU2dZ
ConvReLU3drt   ru   r   Zlinear_relur   Zconv1d_reluZconv2dZconv2d_reluZconv3dZconv3d_relurv   Zlinear_prepackZlinear_prepack_fp16Zconv1d_prepackZconv2d_prepackZconv3d_prepackrw   r   r   Zlinear_dynamicZlinear_relu_dynamicr   Zlinear_dynamic_fp16Zlinear_relu_dynamic_fp16Zconv1d_dynamicZconv2d_dynamicZconv3d_dynamicrx   Zdtypery   r   r   r   rz   Zadd_reluZmul_relur{   r    r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r*   r*   r+   <module>   s   ,
 
 
 

+	    	                          
 
 
 
, 
.    





 
 
 
 
 4         8"
J
1-
>
h
3k
