U
    Kc                    @   sR	  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
 ddlmZ ddlmZ ddlmZmZmZmZmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZ d dlm Z m!Z! d dl"m#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0 ddl/m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z< d dl=m>Z>m?Z? ddl/m@Z@mAZAmBZBmCZC ddlDmEZEmFZFmGZG ddlHmIZImJZJmKZK ddlLmMZM ddlNmOZOmPZP d dlQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZY d dlZm[Z[ ddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8gZ\e]e^ej_dgZ`eeSeaejbjcf e_d9d:d'Zde
eeSeaeSeaeUeWeYejeeff e_f  f f eeJeIe_d;d<d(ZgeeSeaeSeaeUeWeYejeeff e_f  f f eeJe_d=d>d*Zhd?d) ZieUe  eUeTe  eSeaeSeaeUeWeYejeeff e_f  f f eeIe_d@dAdBZjeeSeaejbjcf eOeeUeI eWeeWeRdCf eOeUeI f dDdEd$ZkejbjceSe eXejbjc f ddFdGd4Zle!eVea dHdIdZmdJdK ZneeejbjceSeaejbjcf eedLdMd%Zoeee]e]eTe] eTe] eUe eSeaejbjcf eSee_f eSeaeUeWeYejeeff e_f  f dN
dOd#ZpeeSeaejbjcf eSeaeSeaeUeWeYejeeff e_f  f f eUeYejeeff  dPdQd"ZqeeeSeaejbjcf eSeaeSeaeUeWeYejeeff e_f  f f eIeUeYejeeff  dRdSd!ZreeeSeaejbjcf eSeaeSeaeWeYejeefdf e_f f f eIe_dRdTd ZseYeeRf e
eejbjceSeaejbjcf eeSeaeSeaeUeWeYejeeff e_f  f f eUe eOeIe
dUdVd,ZteeejbjceSeaejbjcf eeSeaeSeaeUeWeYejeeff e_f  f f eUe eOeIddW
dXd-ZueeRejbjceSeaejbjcf eeSeaeSeaeUeWeYejeeff e_f  f f e_eIddY	dZd+ZveejbjceSeaejbjcf eeSeae-f eSeaeSeaeUeWeYejeeff e_f  f f eReUe e_eUe d[
d\d/ZweeTe] eSeaeSeaeUeWeYejeeff e_f  f f eSeaef ejbjceSeaejbjcf edd]d^d.ZxeeYejeeff eSeaeSeaeUeWeYejeeff e_f  f f eSeae-f dd_d`d1ZyeeSeaeSeaeUeWeYejeeff e_f  f f eSeae-f ddadbd3ZzeejbjceSeaejbjcf e_dcddd0Z{eejbjceSeaejbjcf dedfd5Z|eeeSeaejbjcf eOdgdhd8Z}eeSeaejbjcf eSeae-f eSeaef eeOeSeaeRf eTe] eTe] eIeVea e_eUe didjd&Z~eeSeaef dkdldmZejbjce_eSeaejbjcf eReOeIddndod6ZeeSeaef eSeaeWeaeff f eOeSeaeRf ee_eVea ddp	dqd7ZdueeYeeSeaeRf f e_eSeaeWeaeff f eWeRdCf eYeOeSeaeRf df eYeeSeaeRf df eYeIeSeaeRf df e_e(ds
dtd2ZdS )v    N)GraphModule)GraphNode)Argument   )propagate_qconfig_)ObserverBase)obs_or_fq_ctr_equalsfloat16_dynamic_qconfigfloat16_static_qconfigis_reuse_input_qconfig
QConfigAny)_FIXED_QPARAMS_OP_TO_OBSERVERQConfigMapping)get_flattened_qconfig_dictupdate_qconfig_for_qat   )generate_qconfig_mapupdate_qconfig_for_fusion)QuantizeHandler)PatternNodePattern)FixedQParamsFakeQuantize)is_equalization_observernode_supports_equalization)ObservedGraphModuleObservedStandaloneGraphModule)sorted_patterns_dict)_MatchResultWithQConfigfind_matches)_parent_name)3_insert_dequant_stubs_for_custom_module_lstm_output_is_custom_module_lstm+_maybe_get_custom_module_lstm_from_node_arg+_qconfig_satisfies_dtype_config_constraintsget_custom_module_class_keysall_node_args_have_no_tensorsassert_and_get_unique_device(get_non_observable_arg_indexes_and_typesget_new_attr_name_with_prefixnode_arg_is_weightnode_arg_is_biasNON_QUANTIZABLE_WEIGHT_OPS)is_activation_post_processconvert)get_qconfig_dtypesget_swapped_custom_module_class"activation_is_statically_quantizedactivation_is_int8_quantized)get_pattern_to_dtype_configsget_module_to_qat_module&get_fusion_pattern_to_root_node_getter)BackendConfigDTypeConfigget_native_backend_config) get_pattern_to_quantize_handlers)PrepareCustomConfigStandaloneModuleConfigEntry)AnyDictListOptionalSetTupleTypeUnion)defaultdictDO_NOT_OBS_DTYPE_LISTadd_matched_node_name_to_set*get_arg_target_is_dynamic_as_input_to_node%get_arg_target_dtype_as_input_to_nodeget_arg_target_dtype_as_output$get_target_activation_dtype_for_nodeget_standalone_module_configsinsert_observerinsert_observers_for_modelis_activation_post_process_node'is_input_arg_dtype_supported_by_backendis_observer_in_same_graph$is_output_dtype_supported_by_backend2maybe_insert_input_equalization_observers_for_node,maybe_insert_input_observer_for_arg_or_kwarg%maybe_insert_input_observers_for_node*maybe_insert_observers_before_graph_output%maybe_insert_output_observer_for_node'maybe_make_input_output_share_observersmaybe_propagate_dtype_for_nodeprepare propagate_dtypes_for_known_nodesqat_swap_modulesremove_output_observer$run_prepare_fx_on_standalone_modules
save_stateswap_custom_module_to_observed)nodemodulesreturnc                 C   s*   t | tjjo(| jdko(t|t| j S )Ncall_module)
isinstancetorchfxr   opr-   strtarget)r`   ra    rj   D/tmp/pip-unpacked-wheel-gikjz4vx/torch/ao/quantization/fx/prepare.pyrN      s    )argr`   node_name_to_target_dtypeqconfigdtype_configbackend_configrb   c                    s.  t | ttfr,t fdd| D S t | ts:dS t|  }t|  }| o\| }|rj d}	|	dk	r|	\}
}nd\}
}j	dkpj	|
kot
jt
|kotjS |rj}j d d |k}tjd	d
}|dkp|o|S j}|dkp(j d d |kS dS )z] Check if the configured qconfig for the argument
    is supported by the backend or not
    c                 3   s    | ]}t | V  qd S N)rO   ).0arp   ro   r`   rm   rn   rj   rk   	<genexpr>   s       z:is_input_arg_dtype_supported_by_backend.<locals>.<genexpr>Tinput_activation_dtypeN)NNweight_dtyper   F)is_activation
bias_dtype)rd   listtupleallr   r*   r+   namegetZinput_dtypebool
is_dynamicr$   Zinput_dtype_with_constraintsrw   Zweight_dtype_with_constraintsry   )rl   r`   rm   rn   ro   rp   	is_weightis_biasrx   Zqconfig_infoZqconfig_dtypeZqconfig_is_dynamicrw   dtype_matchesqconfig_satisfies_constraintsry   rj   rt   rk   rO      sF    





  
)r`   rm   rn   ro   rb   c                 C   s8   |j }|| j d d |k}t||j}|dkp6|o6|S )z[ Check if the configured qconfig for the output
    is supported by the backend or not
    output_activation_dtyper   N)output_dtyper}   r$   Zoutput_dtype_with_constraints)r`   rm   rn   ro   r   r   r   rj   rj   rk   rQ      s    	 c                 C   sL   t | ||}t| jdkrHt| jd trH|tjkrH| jd jdkrHdS dS )z Check if observer in same graph
    when the node output is not fp32 and input is 'placeholder'
    the input is assumed to be quantized, so it is observed
    in a different place rather than not observed.
    r   placeholderFT)rI   lenargsrd   r   re   quint8rg   )r`   ra   rm   Znode_output_dtyperj   rj   rk   rP      s
    )patternmatched_node_patternrm   rn   rp   rb   c              	   C   s   |dks| dkrdS |dk	r(t |dks,tt|}|| g }t|}|}|d }	|D ]X}
d}t|jt|j  D ]}|ot	|||||
|}qx|ot
|	|||
}|rX dS qXdS )z Check if the dtype configuration of a pattern is supported by
    the backend or not, and whether the qconfig satisfies constraints
    specified in the corresponding dtype config.
    NTr   r   F)r   AssertionErrorr3   r~   _default_root_node_getterrz   r   kwargsvaluesrO   rQ   )r   r   rm   rn   rp   Zpattern_to_dtype_configsZdtype_configs	root_nodeZ
input_nodeZoutput_nodero   	supportedrl   rj   rj   rk   9_is_pattern_dtype_config_and_qconfig_supported_by_backend   s8            r   .)r`   ra   prepare_custom_configparent_qconfigparent_backend_configrb   c                 C   sz   t | j}t|| }tdddd}|j||}|j||}|jpPt 	|}|j
}	|jpbt }|jpl|}
||	||
fS )z
    Returns the standalone module QConfigMapping and PrepareCustomConfig
    for `node`, assuming that the module pointed to by `node` is
    a standalone modules.
    Nrj   )rh   ri   typer;   standalone_module_classesr~   standalone_module_namesqconfig_mappingr   Z
set_globalexample_inputsr   r:   rp   )r`   ra   r   r   r   module_namemodule_typeZconfig_entryr   r   rp   rj   rj   rk   rK     s    

)rootmodule_to_qat_modulerb   c                 C   s   t | |ddd d S )NTF)mappingZinplaceZremove_qconfig)r.   )r   r   rj   rj   rk   r[   &  s    )r   sc                 C   s>   t | tr|| j n"t | ttfr:| D ]}t|| q*d S rq   )rd   r   addr}   rz   r{   rF   )r   r   
maybe_noderj   rj   rk   rF   +  s
    
c                 C   s   t | ts| d } q | S )N)rd   r   )Znode_patternrj   rj   rk   r   3  s    

r   )r`   observermodelra   graphrb   c           
   	   C   s~   t |}|r|| t|r*| jd }nd}t|}||}t||| |||< ||  |d|| fi }	W 5 Q R X |	S )zp
    Attaches `observer` to `model`, and creates a node which calls
    `observer` on the output of `node`.
    Z_equalization_process_Zactivation_post_process_rc   )r'   tor   r}   r)   setattrZinserting_afterZcreate_node)
r`   r   r   ra   r   Zmodel_deviceprefixZget_new_observer_nameZobserver_nameZnew_obsrj   rj   rk   rL   8  s$    
   )
r`   rn   inputs_seen_counteroutputs_seen_counterinput_quantized_idxsoutput_quantized_idxsqhandlerra   cache_for_no_tensor_checkrb   c	                 C   s  | j dkrB||kr(tjdftjdfdS tjdftjdfdS nv| j dkr@t| ||}	|	rhdddS | j dko|| jtjk}
|
rtjdftjdfdS |dk	r*|dk	r*| r*t	|\}}}|dk	}|dk	r|ntj}|tj
kr|tj
kr|dkrtj
ntj}||f|df|df|dfdS tjdftjdfdS | j d	krbtjdftjdfdS | j d
kr||krtjdftjdfdS tjdftjdfdS ntd|   dS )aR  
    For each op attribute in the op's input activation, output activation,
    weight, bias - returns the settings of dtype and is_dynamic we expect
    for the `quantize` call in the reference model representation, or None
    if there is no `quantize` call needed.

    For example, if we have a node corresponding to `op0` in

      x0 -> op0 -> x1

    And we want a reference quantized representation to be

      x0 -> quant_static -> dequant -> op0 -> quant_dynamic -> dequant -> x1

    Then this function will return

      {
        'input_activation': {'dtype': torch.quint8, is_dynamic: False},
        'output_activation': {'dtype': torch.quint8, is_dynamic: True},
      }

    Note: this is for activations only, weight dtypes are not handled here.

    TODO(future PR, if needed): explicitly spell out the non-Tensor
    dtypes.
    r   F)rv   r   )rc   call_methodcall_functionNr   T)rv   rw   ry   r   Zget_attroutputzneed to handle )rg   re   r   floatr&   ri   operatorgetitemZinput_output_observedr/   Zfloat16r   format_node)r`   rn   r   r   r   r   r   ra   r   Zargs_have_no_tensors
is_getitemZ	act_dtyperw   Zact_compute_dtypeZinput_act_is_dynamicZoutput_act_dtypery   rj   rj   rk   rJ   T  s    %

  





)rl   ra   rm   rb   c                 C   s   t | tstt| |}|dk	r2||j d d S t| |rj| jd }t |tsXtd||j d d S || j d }|dk	r|d S dS dS )a   Get the target output activation dtype for
    the argument in the original graph, skipping inserted observers
    We are assuming that the observers are inserted correctly, and the dtype for
    argument in quantized graph will match what is specified by the qconfig
    Nr   r   z(Currently we only support observing Node)rd   r   r   r#   r}   rN   r   )rl   ra   rm   Zcustom_module_lstm_nodeZobserved_argZtarget_dtype_inforj   rj   rk   rI     s    



)rl   r`   ra   rm   rp   rb   c                 C   s   t | tstt|| |}t|| |}| o0| }|rH||j d d S |rn|jtkrZdS ||j d d S n||j d d S dS )W Get the target argument dtype for the argument `arg`, as input
    to node `node`
    rv   r   Nrw   ry   )rd   r   r   r*   r+   r}   ri   r,   rl   r`   ra   rm   rp   r   r   rx   rj   rj   rk   rH     s    

c                 C   s^   t | tstt|| |}t|| |}| o0| }|rVd||j krV||j d d S dS dS )r   rv   r   FN)rd   r   r   r*   r+   r}   r   rj   rj   rk   rG   
  s    
)r`   rl   rn   r   ra   r   rm   r   r   rp   rb   c
           !      C   sH  t |ttfrLg }
|D ](}t| |||||||||	
}|
| qt||
S t |tsZ|S t |tsht|}|dk	oz| }|dk	st|s$t	| ||	}t
|}|r|jn|j}t|||}t|| |||	}t|| |||	}| r||kr|tjkr|tkr| p |o || jd k}nt| ||||	\}}}}|j}d}t| jD ]\}}||krP|} qpqP|dkrd}n6t|||}||krtjntj}||ko|tjk}|j}|rD| }d}|j D ]H\}}|jdkr||j }t|t|kr|j|kr|} q q|dkr@t|||||} | }n|}|S )zk
    Given a `node` and an `arg`, inserts an input observer between
    `node` and `arg` if necessary.
    Nr   Frc   )rd   rz   r{   rS   appendr   r   r   is_standalone_moduler*   r   weight
activationrI   rH   rG   re   r   rE   r   rK   input_quantized_indexes	enumerater   usersitemsrg   ri   dtyperL   )!r`   rl   rn   r   ra   r   rm   r   r   rp   Znew_arg_to_returnZ	inner_argZnew_inner_argnew_argr   r   is_reuse_input_qconfig_act_post_process_ctrZarg_as_output_target_dtypeZarg_as_input_target_dtypeZarg_as_input_target_is_dynamicZ	needs_obs_sm_prepare_custom_configZsm_input_quantized_idxsZcur_input_idxZarg_idxZarg_to_checkZnew_obs_modZexisting_obs_nodeZmaybe_obs_nodeZmaybe_obs_modZnew_obs_noderj   rj   rk   rS     s         

    	    








    )
r`   rn   r   ra   r   rm   r   r   rp   rb   c	                 C   s   |dkrdS |dk	st g }	| jD ](}
t| |
||||||||
}|	| q"i }| j D ]*\}}t| |||||||||
}|||< qZt|	| _|| _dS )a
  
    If needed, inserts observers to the input args and kwargs of `node`.
    Note: modifies `node` inplace.

    For example, if cur_node needs an observer after prev_node, we change from

      prev_node -> cur_node

    To

      prev_node -> obs -> cur_node
    N)r   r   rS   r   r   r   r{   )r`   rn   r   ra   r   rm   r   r   rp   new_argsrl   r   Z
new_kwargskkwargZ	new_kwargrj   rj   rk   rT     sF    
          

)	r`   equalization_qconfigr   ra   r   rm   	is_branchrp   rb   c                 C   s   |dkst | |sdS |r0td|  d dS g }| jD ]b}	t|	trTt| |	|r`||	 q:t| |	|}
|
rv|j	n|j
}| }t|	||||}|| q:t|| _dS )z
    If `node` needs to be equalized, find the input/weight observers it needs in
    `equalization_qconfig`, creates them, and inserts it into `graph`.

    If `node` does not need an equalization observer, returns None.
    NzCannot equalize z  because it is part of a branch.)r   warningswarnr   rd   r   r+   r   r*   r   Zinput_activationrL   r{   )r`   r   r   ra   r   rm   r   rp   r   rl   r   Zact_eq_process_ctrZnew_eq_obs_modZnew_eq_obs_noderj   rj   rk   rR     s4    



    )
r`   r   ra   r   matchesrm   matched_patternr   is_qatrb   c	                 C   s   | | jd\}	}
}}}|dkr$dS |dk	s0t| jdksBtd|dk	oP| }|| j d \}}|ttjg k}|o~t|}|o| }|r|j	}t
|r||||}| }t| ||||S dS dS )z
    If `node` needs an output observer, creates it, inserts it into `graph`
    and returns it.

    If `node` does not need an output observer, returns None.
    NNNNNNr   z3observer insertion for outputs is handled elsewherer   )r~   r}   r   rg   r   rE   re   r   r1   r   r2   Zget_activation_ctrrL   )r`   r   ra   r   r   rm   r   r   r   r   r   r   rn   r   r   r   Zshould_insert_observerr   r   rj   rj   rk   rV     s8     )graph_output_noder   rm   qconfig_mapr   ra   r   rb   c           
         s   |dgks|g kst d|g kr&dS tj}ttjttttttt	tjt
f tf  f f tttf tjjtttjjf ttd fdd g }| jD ]}	| |	|||||| qt|| _dS )z
    If the output needs to be quantized and there are any nodes
    in the output which are not already observed, inserts observers
    for those nodes.
    r   z,unrecognized format of output_quantized_idxsN)r   target_dtyperm   r   r   ra   r   rb   c                    s   t | tr\t| ||}||krV|| j}|dk	s:td| }	t| |	|||}
|
S | S nt | tt	frg }| D ]}|
 ||||||| qrt | tr|S t	|S n@t | tri }|  D ] \}} |||||||||< q|S |S dS )a`  
        Navigate an arbitrary data structure of lists, tuples, dicts.
        For each container type, recurse on all inputs. Once any Node
        is found, insert an observer if needed and do not recurse further.

        For example, given a structure of

          {'foo1': [[bar1]], 'foo2': {'foo3': [[[bar3]]]}}

        we recurse down to bar1 and bar3, observe them if necessary,
        and if we inserted an observer then replace the original node
        with its observer.

        Returns the data structure with all nodes needing observation being
        replaced by their observers.
        Nz=Quantizing the output node without a qconfig is not supported)rd   r   rI   r~   r}   r   r   rL   rz   r{   r   dictr   )r   r   rm   r   r   ra   r   Zthis_node_dtypern   Zobserver_modZobserver_noderesultsZ
inner_nodeZresults_dictr   Zinner_v&_recursive_maybe_replace_node_with_obsrj   rk   r   a  sd    
  
         


     
zZmaybe_insert_observers_before_graph_output.<locals>._recursive_maybe_replace_node_with_obs)r   re   r   r   r   r=   rh   r?   rA   rC   r   r   r   nnModuler   r   r   r{   )
r   r   rm   r   r   ra   r   Zoutput_target_dtyper   Zold_argrj   r   rk   rU   D  s:    (
>
     )r`   r   rm   r   rb   c           
      C   sr   |df|| j  d< |df|| j  d< || j d\}}}}}|dk	rn| rn| jd }	t|	trnt|	||| dS )a9  
    Assigns `target_dtype` to `node`, setting `is_dynamic` to False. If `node`
    is a general tensor shape op
    (see GeneralTensorShapeOpQuantizeHandler in quantization_patterns.py for more details)
    also call this function recursively on
    the first argument, to propagate the dtype to the caller.
    Frv   r   r   Nr   )r}   r~   is_general_tensor_value_opr   rd   r   rX   )
r`   r   rm   r   r   r   r   r   rn   Z	prev_noderj   rj   rk   rX     s     

   )r   rm   r   rb   c              	   C   s   | j D ]}t|}|D ]p}|| |}|D ]Z}|j| }t|tsLt|trVt|}	n|g}	|	D ]"}
t|
tjjj	r`t
|
||| q`q*qqdS )a  
    Currently we assume that inputs to the graph are either `torch.float` or
    `torch.quint8`, which is not always correct. For ops such as
    `x.masked_fill(mask, value)`, we know that the dtype of  `mask` is a
    `BoolTensor`. Propagate this information throughout the graph.

    Note: not all dtypes in the graph will be correct after this pass, but a
    higher percentage of them will be correct. Hopefully in the future we can
    replace this with a better way to reason about dtypes of tensors.
    N)nodesr(   r   rd   r{   rz   re   rf   r`   r   rX   )r   rm   r   r`   Znon_observable_arg_dictZarg_typeZnon_observable_indicesindexrl   arg_listZcur_argrj   rj   rk   rZ     s"    


   )r`   r   ra   rb   c                 C   s  d}t t| jD ](}t| j| tttfr| j| } q<q|dkrHdS t|ttfr`|d }nt|trp|}ndS d}t||st|tsdS |jdkrdS d}t t|jD ]}|j| }t|tr qq|dkrdS |}|d7 }|dkrxt	dqxt|tst	|j
}t|tst	|| }	t|ttfrt|D ]\}
}|
dkrTq>d}t||st|jdk rz dS |jd }|d7 }|dkrXt	dqXt|j
\}}t|| ||	 q>| j D ]8\}}t||st	t|j
\}}t|| ||	 qdS )	a  
    Ensures that we share an observer
    for all input arguments as well as the output argument. In detail, given
    a graph of

      x0 -> obs0 -> op -> x2
                  /
      x1 -> obs1 /

    where node obs0 points to observer instance observer0,
    obs1 points to observer1 and obs2 points to observer2, we make nodes obs1
    and ob2 point to observer0.
    Returns: whether the operation succeeded or not
    NFr   r   r   i'  z(Unable to find observer of previous nodeT)ranger   r   rd   r   rz   r{   rN   rg   r   ri   rh   r   r    r   r   r   )r`   r   ra   Z	first_argiZfirst_arg_argZiteration_guardZtrace_back_nodeZtarget_to_useZobs_mod_to_useZ	input_idxZ	input_argparent_namer}   output_obs_noder   rj   rj   rk   rW     sh    











)r`   r   ra   c                 C   sD   t | j }|D ],\}}t||s(t||  |j| qd S rq   )rz   r   r   rN   r   Zreplace_all_uses_withr   Z
erase_node)r`   r   ra   r   r   r   rj   rj   rk   r\   C  s
    
)r`   rn   ra   r   c           
      C   sH   || j  }|j}t|||}||}t| j \}}	t|| |	| d S rq   )ri   float_to_observed_mappingr0   
from_floatr    r   )
r`   rn   ra   r   Zcustom_moduleZcustom_module_class_mappingZobserved_custom_module_classZobserved_custom_moduler   r}   rj   rj   rk   r_   M  s    
  )r   ra   r   r   r   r   equalization_config_mapr   r   rp   observed_node_namesr   rb   c           +      C   s  t t}i }d}d}t| jdd}| jjD ]^}||jd\}}}}}t|||||||||	||j< |jdkrx|d7 }|jdkr,|d7 }q,t	| j|| t
| jj}d}d}d}|D ]}|jdkrʐn|jd	kr||jd\}}}}}||jd}||j }|dk}|jd
ko$|jtjk}|dks<|s<|oF|jdk }t|||||	}|s|rt| jdd}|jdkr||dk	stt||
 d} t|jdkr0t|jd tr0t|jd jdkr0|jd jD ]N}!||!jddk	p|!jdkot|t|!j t}"|!|kr|"rd} qt|}||k}#|#rtt||| ||||||		 t||| |||| |	 ||k}$|dk	o| }%t|}&|$rt||||rt|| || t|||| nt || |||||||	}'|'dk	rt
|j! }(|(D ] })|)|'krq|)"||' qt#|||}*|%r8|*s>|&rXt$|| |sXt%|| | |dk	r|& rt|||| nt'||||| || |jdkr|d7 }q|jdkr|d7 }|}q|S )a$  
    Inserts observers, using the following high level algorithm:

    For each node in the graph:
      1. determine the target dtype of this node in the quantized graph, and save
           it for future steps
      2. determine the target dtype or all args and kwargs of this node
      3. if any arg or kwarg's target dtype does not match the current node's
           dtype, insert an observer
      4. if the current node needs an output observer, insert it

    For example:

    - starting graph:
        x0 -> linear -> x1

    - observed graph after processing x0:
        x0(fp32)

    - observed graph after processing linear:
        x0(fp32) -> x0_obs0(int8) -> linear(int8) -> linear_obs0(int8)

    - observed graph after processing x1:
        x0(fp32) -> x0_obs0(int8) -> linear(int8) -> linear_obs0(int8) -> x1

    After a node is processed, the naive observer placement is guaranteed to be
    complete for that node and all of its predecessors. There can be future
    passes which optimize the graph by deduplicating observers, etc.
    r   FZremove_duplicater   r   r   r   N)rc   r   r   r   r   rc   T)(rD   r   named_modulesr   r   r~   r}   rJ   rg   rZ   rz   ri   r   r   r   r   rF   r   r   rd   r   r   rh   r   r   rT   rR   r   r   r"   r!   r_   rV   keysZreplace_input_withrP   rW   r\   Zis_custom_modulerU   )+r   ra   r   r   r   r   r   r   r   rp   r   r   rm   r   r   r   r`   r   r   r   r   rn   Znodes_before_observationZresults_nodeZ	last_noder   r   Zthis_node_dtype_infoZoutput_not_a_tensorr   Zskip_inserting_observersZis_supported_by_backendZis_quantized_branchuserZis_user_quantizedZis_input_node_of_the_patternZis_last_node_of_patternr   r   Zmaybe_output_obs_nodeZ
orig_usersZ	user_nodeZis_observer_in_same_graph_rj   rj   rk   rM   \  s2   D       



  


 



    
                 

    

)r   r   c           	      C   s   t jtjg}t| jdd}| jjD ]}|jdkr:|j}n|jdkrTt	||j }nd}|t
kr$d}||jd}|dkrd}n8|t
| g D ](}t|jtj|dst|j|rd}q|r$td| |f q$dS )	zk
    Validate whether the correct observers are configured for fixed qparams ops in the model, if any.
    Fr   r   rc   NT)r   a]  QConfigMapping must specify fixed qparams observer for fixed qparams op '%s' type: '%s'. Please use torch.ao.quantization.get_default_qconfig_mapping or torch.ao.quantization.get_default_qat_qconfig_mapping instead. Example: 
    qconfig_mapping = get_default_qconfig_mapping("fbgemm") 
    model = prepare_fx(model, qconfig_mapping, example_inputs))r
   r   r   r   r   r   r   rg   ri   r   r   r~   r}   r	   r   Z	with_args
ValueErrorr   )	r   r   Zallowed_observer_ctrsr   r`   Z!module_type_or_function_or_methodZbad_observerrn   Zobserver_ctrrj   rj   rk    _validate_fixed_qparams_qconfigso  s6    




r   )r   r   ra   r   r   rp   rb   c              	   C   s   |  D ]\}\}}}	}
}|
dkr&qn
|
 s0qt|||||\}}}}||j }tjjjj}|||||||d}t	|j
}t||j|}t|j\}}t|| || |||j< qdS )z
    Runs prepare_fx on each standalone module. Note: this does
    not modify the graph, it just replaces the unobserved modules with
    their observed versions.
    N)r   r   rp   )r   r   rK   ri   re   ZaoZquantizationZquantize_fxZ_prepare_standalone_module_fxsetpreserved_attributesr   r   r    r   )r   r   ra   r   r   rp   	node_namer   r   r   r   rn   Zsm_qconfig_mappingZsm_example_inputsr   Zsm_backend_configZstandalone_modulerY   Zobserved_standalone_moduler   r   r}   rj   rj   rk   r]     sP        


 
)	observedr   node_name_to_scoper   equalization_qconfig_mapr   r   r   rb   c                 C   s.   || _ || _|| _|| _|| _|| _|| _d S rq   )Z_qconfig_mapZ_prepare_custom_configZ_node_name_to_scopeZ_equalization_qconfig_mapZ_qconfig_mappingZ_is_qatZ_observed_node_names)r   r   r   r   r   r   r   r   rj   rj   rk   r^     s    
F)
r   r   r   r   r   r   _equalization_configrp   r   rb   c	                 C   s  |dkrt  }|dkrt }t|tr:td t|}t|trXtd t|}t|trvtd t |}t|trtd t|}t|tstt|tstt	
|}t	
|}i }	|dkrt }t|}	t|	}	t|}
t| | t| | t|}t| ||  |r<t|}t| | t|i  t| jdd}t| || j||}t| || j||}t| | t|j }t|j }t|j }t!| j||	|
|||}i }|" D ] \}}||| f}|||< q|j#}|j$}t%| ||||| t& }t'| |||| j|||||||}t(| ||||||| t&|j)}t*| | j|} |r|dk	sjtt|j+d t,std	t-.|| _/t-.|| _0| S )
aQ   standalone_module means it a submodule that is not inlined in
    parent module, and will be quantized separately as one unit.

    How the standalone module is observed is specified by `input_quantized_idxs` and
    `output_quantized_idxs` in the prepare_custom_config for the standalone module
    Args:
        node_name_to_scope: mapping from node name to the scope of the module which contains the node.
        The scope is a tuple of fully qualified path of the module and the type of the module
    Returns:
        model(GraphModule): prepared standalone module
        attributes:
            _standalone_module_input_quantized_idxs(List[Int]): a list of
                indexes for the graph input that is expected to be quantized,
                same as input_quantized_idxs configuration provided
                for the standalone module
            _standalone_module_output_quantized_idxs(List[Int]): a list of
                indexs for the graph output that is quantized
                same as input_quantized_idxs configuration provided
                for the standalone module
    NzPassing a QConfig dictionary to prepare is deprecated and will not be supported in a future version. Please pass in a QConfigMapping instead.zPassing a QConfig dictionary to prepare for equalization is deprecated and will not be supported in a future version. Please pass in a QConfigMapping instead.zPassing a prepare_custom_config_dict to prepare is deprecated and will not be supported in a future version. Please pass in a PrepareCustomConfig instead.zPassing a backend_config_dict to prepare is deprecated and will not be supported in a future version. Please pass in a BackendConfig instead.Fr   r   zVstandalone module only supports returning simple value currently(not tuple, dict etc.))1r:   r   rd   r=   r   r   	from_dictr6   r   copydeepcopyr8   r9   r   r5   r   r   r   Zto_dictr4   r[   r   r   r   r   r   r   rz   r   r   r   r%   r   r   r   r   Zoutput_quantized_indexesr]   r   rM   r^   r   r   r   r   re   ZtensorZ'_standalone_module_input_quantized_idxsZ(_standalone_module_output_quantized_idxs)r   r   r   r   r   r   r   rp   r   Zpattern_to_quantize_handlerZroot_node_getter_mappingZflattened_qconfig_dictr   ra   r   r   r   r   Zcustom_module_classesZmatches_without_qconfigr   r   Zmatch_without_qconfigZmatch_with_qconfigr   r   r   Zresult_noder   rj   rj   rk   rY     s    













	    

              
    
)NNNF)r   re   r   r   Ztorch.fxr   Ztorch.fx.graphr   r   Ztorch.fx.noder   quantizer   r   r   rn   r	   r
   r   r   r   r   r   r   Zqconfig_mapping_utilsr   r   r   r   Zquantization_patternsr   Z(torch.ao.quantization.quantization_typesr   r   Ztorch.ao.quantizationr   Z	_equalizer   r   Zgraph_moduler   r   Zpattern_utilsr   Zmatch_utilsr   r   utilsr    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   Ztorch.ao.quantization.quantizer-   r.   r/   r0   r1   r2   Zbackend_config.utilsr3   r4   r5   rp   r6   r7   r8   Zbackend_config_utilsr9   Zcustom_configr:   r;   typingr<   r=   r>   r?   r@   rA   rB   rC   collectionsrD   __all__intr   r   rE   rh   r   r   rN   r   r   rO   rQ   rP   r   rK   r[   rF   r   rL   rJ   rI   rH   rG   rS   rT   rR   rV   rU   rX   rZ   rW   r\   r_   rM   r   r]   r^   rY   rj   rj   rj   rk   <module>   s  8( (0(
($ 
  ( (&
( (8(.
(5(
f(
(
&^


  (/

    
 