U
    KcHt                     @   sX  d dl mZmZmZmZmZmZ d dlZd dlZd dl	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z  ej!j"ddddZ#ej!j"ddddZ$d9e
e%eeee&ef df eeee&ef df e
dddZ'G dd de(Z)G dd de(Z*d:ej!j"ee ee&ef f e%eedf eeee&ef df eee ee&ef f  eeee&ef df e%ed	dd Z+d;ej!j"ee ee&ef f e%eedf eeee&ef df eeee&ef df e
d!d"d#Z,d<ej!j"eeee&ef df eeee&ef df e
d$d%d&Z-d=ej!j"ee ee&ef f eedf eeee&ef df eee ee&ef f  eeee&ef df ed'd(d)Z.d>ej!j"ee ee&ef f eedf eeee&ef df eeee&ef df ed*d+d,Z/d?e
e%eeee&ef df e%e%ee ee&ef df eeee&ef df ej!j"d.d/d0Z0d@e
eeee&ef df e%ee ee&ef df eeee&ef df ej!j"d1d2d3Z1dAe
eeee&ef df e%ee ee&ef df eeee&ef df ej!j"d1d4d5Z2dBe
e%eeee&ef df ej!j"d6d7d8Z3dS )C    )AnyDictOptionalSetTupleUnionN)GraphModule   )QuantizationTracer)fuse)prepare)convert)BackendConfigget_tensorrt_backend_config)ObservedGraphModule)ConvertCustomConfigFuseCustomConfigPrepareCustomConfig)graph_pretty_str)get_custom_module_class_keys)#get_skipped_module_name_and_classes)QConfigMapping)modelreturnc                 C   s*   t | ts&tdtt|  d d d S )Nz,input model must be a GraphModule, Got type:z Please make zsure to follow the tutorials.)
isinstancer   
ValueErrorstrtype)r    r   E/tmp/pip-unpacked-wheel-gikjz4vx/torch/ao/quantization/quantize_fx.py_check_is_graph_module   s    

r    c                 C   sf   g }|   D ].\}}t|tjjjjr2|| qt| q|D ] }| j	|= tjjj
 | j	|< q@dS )z1 Swap FloatFunctional with FXFloatFunctional
    N)Znamed_childrenr   torchZaonn	quantizedZFloatFunctionalappend_swap_ff_with_fxffZ_modulesZFXFloatFunctional)r   Zmodules_to_swapnamemoduler   r   r   r%   $   s    
r%   )graph_moduleis_qatfuse_custom_configbackend_configr   c                 C   s   t |  t| |||S )z Internal helper function to fuse modules in preparation for quantization

    Args:
        graph_module: GraphModule object from symbolic tracing (torch.fx.symbolic_trace)
    )r    r   )r(   r)   r*   r+   r   r   r   _fuse_fx3   s       r,   c                       s(   e Zd ZdZeed fddZ  ZS )Scopea/   Scope object that records the module path and the module type
    of a module. Scope is used to track the information of the module
    that contains a Node in a Graph of GraphModule. For example::

        class Sub(torch.nn.Module):
            def forward(self, x):
                # This will be a call_method Node in GraphModule,
                # scope for this would be (module_path="sub", module_type=Sub)
                return x.transpose(1, 2)

        class M(torch.nn.Module):
            def __init__(self):
                self.sub = Sub()

            def forward(self, x):
                # This will be a call_method Node as well,
                # scope for this would be (module_path="", None)
                x = x.transpose(1, 2)
                x = self.sub(x)
                return x

    )module_pathmodule_typec                    s   t    || _|| _d S N)super__init__r.   r/   )selfr.   r/   	__class__r   r   r2   [   s    
zScope.__init__)__name__
__module____qualname____doc__r   r   r2   __classcell__r   r   r4   r   r-   C   s   r-   c                       s>   e Zd ZdZeejjed fddZ	dd Z
dd Z  ZS )	ScopeContextManagerz A context manager to track the Scope of Node during symbolic tracing.
    When entering a forward function of a Module, we'll update the scope information of
    the current module, and when we exit, we'll restore the previous scope information.
    )scopecurrent_modulecurrent_module_pathc                    s8   t    |j| _|j| _|| _|| j_t|| j_d S r0   )r1   r2   r/   prev_module_typer.   prev_module_pathr<   r   )r3   r<   r=   r>   r4   r   r   r2   g   s    
zScopeContextManager.__init__c                 C   s   d S r0   r   )r3   r   r   r   	__enter__q   s    zScopeContextManager.__enter__c                 G   s   | j | j_| j| j_d S r0   )r@   r<   r.   r?   r/   )r3   argsr   r   r   __exit__t   s    

zScopeContextManager.__exit__)r6   r7   r8   r9   r-   r!   r"   Moduler   r2   rA   rC   r:   r   r   r4   r   r;   a   s     
r;   F.)	r   qconfig_mappingr)   example_inputsprepare_custom_config_equalization_configr+   is_standalone_moduler   c                 C   s   |dkrt  }|dkrt }t|tr:td t |}t|  t||\}}	|j	}
t
||	}t| || }|
D ]}t||t| | qtt |j	}t||||}t||||j|||||d	}|
D ]}t||t| | q|S )a7   Internal helper function for prepare_fx
    Args:
      `model`, `qconfig_mapping`, `prepare_custom_config`, `_equalization_config`:
      see docs for :func:`~torch.ao.quantization.prepare_fx`
      `is_standalone_module`: a boolean flag indicates whether we are
      quantizing a standalone module or not, a standalone module
      is a submodule of the parent module that is not inlined in the
forward graph of the parent module,
      the way we quantize standalone module is described in:
      :func:`~torch.ao.quantization._prepare_standalone_module_fx`
    NzPassing a prepare_custom_config_dict to prepare is deprecated and will not be supported in a future version. Please pass in a PrepareCustomConfig instead.)rF   rG   rH   r+   rI   )r   r   r   r   warningswarn	from_dictr%   r   preserved_attributesr
   r   tracesetattrgetattrr   Zset_preserved_attributesr,   r   Znode_name_to_scope)r   rE   r)   rF   rG   rH   r+   rI   Zskipped_module_namesZskipped_module_classesrM   Ztracerr(   	attr_namer*   preparedr   r   r   _prepare_fxz   sL    


rS   )r   rE   r)   rF   rG   r+   r   c              	   C   s   t | |||||ddS )a   [Internal use only] Prepare a standalone module, so that it can be used when quantizing the
    parent module.
    standalone_module means it a submodule that is not inlined in parent module,
    and will be quantized separately as one unit.

    How the standalone module is observed is specified by `input_quantized_idxs` and
    `output_quantized_idxs` in the prepare_custom_config for the standalone module

    Returns:

        * model(GraphModule): prepared standalone module. It has these attributes:

            * `_standalone_module_input_quantized_idxs(List[Int])`: a list of
              indexes for the graph input that is expected to be quantized,
              same as input_quantized_idxs configuration provided
              for the standalone module
            * `_standalone_module_output_quantized_idxs(List[Int])`: a list of
              indexs for the graph output that is quantized
              same as input_quantized_idxs configuration provided
              for the standalone module

    T)r+   rI   )rS   )r   rE   r)   rF   rG   r+   r   r   r   _prepare_standalone_module_fx   s    rT   )r   r*   r+   r   c                 C   s   |dkrt  }t|tr,td t |}tjd tj	
| }t }|rXt|j}|D ]}t||t| | q\t|d||S )a   Fuse modules like conv+bn, conv+bn+relu etc, model must be in eval mode.
    Fusion rules are defined in torch.quantization.fx.fusion_pattern.py

    Args:

        * `model` (torch.nn.Module): a torch.nn.Module model
        * `fuse_custom_config` (FuseCustomConfig): custom configurations for fuse_fx.
            See :class:`~torch.ao.quantization.fx.custom_config.FuseCustomConfig` for more details
    Example::

        from torch.ao.quantization import fuse_fx
        m = Model().eval()
        m = fuse_fx(m)

    NzPassing a fuse_custom_config_dict to fuse is deprecated and will not be supported in a future version. Please pass in a FuseCustomConfig instead.z$quantization_api.quantize_fx.fuse_fxF)r   r   r   rJ   rK   rL   r!   _C_log_api_usage_oncefxZsymbolic_tracesetrM   rO   rP   r,   )r   r*   r+   r(   rM   rQ   r   r   r   fuse_fx   s    


rY   )r   rE   rF   rG   rH   r+   r   c                 C   s    t jd t| |d||||S )a   Prepare a model for post training static quantization

    Args:
      * `model` (torch.nn.Module): torch.nn.Module model

      * `qconfig_mapping` (QConfigMapping): QConfigMapping object to configure how a model is
         quantized, see :class:`~torch.ao.quantization.qconfig_mapping.QConfigMapping`
         for more details

      * `example_inputs` (Tuple[Any, ...]): Example inputs for forward function of the model,
         Tuple of positional args (keyword args can be passed as positional args as well)

      * `prepare_custom_config` (PrepareCustomConfig): customization configuration for quantization tool.
          See :class:`~torch.ao.quantization.fx.custom_config.PrepareCustomConfig` for more details

      * `_equalization_config`: config for specifying how to perform equalization on the model

      * `backend_config` (BackendConfig): config that specifies how operators are quantized
         in a backend, this includes how the operaetors are observed,
         supported fusion patterns, how quantize/dequantize ops are
         inserted, supported dtypes etc. See :class:`~torch.ao.quantization.backend_config.BackendConfig` for more details

    Return:
      A GraphModule with observer (configured by qconfig_mapping), ready for calibration

    Example::

        import torch
        from torch.ao.quantization import get_default_qconfig_mapping
        from torch.ao.quantization import prepare_fx

        class Submodule(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 5)
            def forward(self, x):
                x = self.linear(x)
                return x

        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 5)
                self.sub = Submodule()

            def forward(self, x):
                x = self.linear(x)
                x = self.sub(x) + x
                return x

        # initialize a floating point model
        float_model = M().eval()

        # define calibration function
        def calibrate(model, data_loader):
            model.eval()
            with torch.no_grad():
                for image, target in data_loader:
                    model(image)

        # qconfig is the configuration for how we insert observers for a particular
        # operator
        # qconfig = get_default_qconfig("fbgemm")
        # Example of customizing qconfig:
        # qconfig = torch.ao.quantization.QConfig(
        #    activation=MinMaxObserver.with_args(dtype=torch.qint8),
        #    weight=MinMaxObserver.with_args(dtype=torch.qint8))
        # `activation` and `weight` are constructors of observer module

        # qconfig_mapping is a collection of quantization configurations, user can
        # set the qconfig for each operator (torch op calls, functional calls, module calls)
        # in the model through qconfig_mapping
        # the following call will get the qconfig_mapping that works best for models
        # that target "fbgemm" backend
        qconfig_mapping = get_default_qconfig_mapping("fbgemm")

        # We can customize qconfig_mapping in different ways.
        # e.g. set the global qconfig, which means we will use the same qconfig for
        # all operators in the model, this can be overwritten by other settings
        # qconfig_mapping = QConfigMapping().set_global(qconfig)
        # e.g. quantize the linear submodule with a specific qconfig
        # qconfig_mapping = QConfigMapping().set_module_name("linear", qconfig)
        # e.g. quantize all nn.Linear modules with a specific qconfig
        # qconfig_mapping = QConfigMapping().set_object_type(torch.nn.Linear, qconfig)
        # for a more complete list, please see the docstring for :class:`torch.ao.quantization.QConfigMapping`
        # argument

        # example_inputs is a tuple of inputs, that is used to infer the type of the
        # outputs in the model
        # currently it's not used, but please make sure model(*example_inputs) runs
        example_inputs = (torch.randn(1, 3, 224, 224),)

        # TODO: add backend_config after we split the backend_config for fbgemm and qnnpack
        # e.g. backend_config = get_default_backend_config("fbgemm")
        # `prepare_fx` inserts observers in the model based on qconfig_mapping and
        # backend_config. If the configuration for an operator in qconfig_mapping
        # is supported in the backend_config (meaning it's supported by the target
        # hardware), we'll insert observer modules according to the qconfig_mapping
        # otherwise the configuration in qconfig_mapping will be ignored
        #
        # Example:
        # in qconfig_mapping, user sets linear module to be quantized with quint8 for
        # activation and qint8 for weight:
        # qconfig = torch.ao.quantization.QConfig(
        #     observer=MinMaxObserver.with_args(dtype=torch.quint8),
        #     weight=MinMaxObserver.with-args(dtype=torch.qint8))
        # Note: current qconfig api does not support setting output observer, but
        # we may extend this to support these more fine grained control in the
        # future
        #
        # qconfig_mapping = QConfigMapping().set_object_type(torch.nn.Linear, qconfig)
        # in backend config, linear module also supports in this configuration:
        # weighted_int8_dtype_config = DTypeConfig(
        #   input_dtype=torch.quint8,
        #   output_dtype=torch.quint8,
        #   weight_dtype=torch.qint8,
        #   bias_type=torch.float)

        # linear_pattern_config = BackendPatternConfig(torch.nn.Linear) \
        #    .set_observation_type(ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT) \
        #    .add_dtype_config(weighted_int8_dtype_config) \
        #    ...

        # backend_config = BackendConfig().set_backend_pattern_config(linear_pattern_config)
        # `prepare_fx` will check that the setting requested by suer in qconfig_mapping
        # is supported by the backend_config and insert observers and fake quant modules
        # in the model
        prepared_model = prepare_fx(float_model, qconfig_mapping, example_inputs)
        # Run calibration
        calibrate(prepared_model, sample_inference_data)
    z'quantization_api.quantize_fx.prepare_fxFr!   rU   rV   rS   )r   rE   rF   rG   rH   r+   r   r   r   
prepare_fx  s     r[   )r   rE   rF   rG   r+   r   c                 C   s    t jd t| |d|||dS )a   Prepare a model for quantization aware training

    Args:
      * `model` (torch.nn.Module): torch.nn.Module model
      * `qconfig_mapping` (QConfigMapping): see :func:`~torch.ao.quantization.prepare_fx`
      * `example_inputs` (Tuple[Any, ...]): see :func:`~torch.ao.quantization.prepare_fx`
      * `prepare_custom_config` (PrepareCustomConfig): see :func:`~torch.ao.quantization.prepare_fx`
      * `backend_config` (BackendConfig): see :func:`~torch.ao.quantization.prepare_fx`

    Return:
      A GraphModule with fake quant modules (configured by qconfig_mapping and backend_config), ready for
      quantization aware training

    Example::

        import torch
        from torch.ao.quantization import get_default_qat_qconfig_mapping
        from torch.ao.quantization import prepare_fx

        class Submodule(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 5)
            def forward(self, x):
                x = self.linear(x)
                return x

        class M(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.linear = torch.nn.Linear(5, 5)
                self.sub = Submodule()

            def forward(self, x):
                x = self.linear(x)
                x = self.sub(x) + x
                return x

        # initialize a floating point model
        float_model = M().train()
        # (optional, but preferred) load the weights from pretrained model
        # float_model.load_weights(...)

        # define the training loop for quantization aware training
        def train_loop(model, train_data):
            model.train()
            for image, target in data_loader:
                ...

        # qconfig is the configuration for how we insert observers for a particular
        # operator
        # qconfig = get_default_qconfig("fbgemm")
        # Example of customizing qconfig:
        # qconfig = torch.ao.quantization.QConfig(
        #    activation=FakeQuantize.with_args(observer=MinMaxObserver.with_args(dtype=torch.qint8)),
        #    weight=FakeQuantize.with_args(observer=MinMaxObserver.with_args(dtype=torch.qint8)))
        # `activation` and `weight` are constructors of observer module

        # qconfig_mapping is a collection of quantization configurations, user can
        # set the qconfig for each operator (torch op calls, functional calls, module calls)
        # in the model through qconfig_mapping
        # the following call will get the qconfig_mapping that works best for models
        # that target "fbgemm" backend
        qconfig_mapping = get_default_qat_qconfig("fbgemm")

        # We can customize qconfig_mapping in different ways, please take a look at
        # the doctring for :func:`~torch.ao.quantization.prepare_fx` for different ways
        # to configure this

        # example_inputs is a tuple of inputs, that is used to infer the type of the
        # outputs in the model
        # currently it's not used, but please make sure model(*example_inputs) runs
        example_inputs = (torch.randn(1, 3, 224, 224),)

        # TODO: add backend_config after we split the backend_config for fbgemm and qnnpack
        # e.g. backend_config = get_default_backend_config("fbgemm")
        # `prepare_qat_fx` inserts observers in the model based on qconfig_mapping and
        # backend_config, if the configuration for an operator in qconfig_mapping
        # is supported in the backend_config (meaning it's supported by the target
        # hardware), we'll insert fake_quantize modules according to the qconfig_mapping
        # otherwise the configuration in qconfig_mapping will be ignored
        # see :func:`~torch.ao.quantization.prepare_fx` for a detailed explanation of
        # how qconfig_mapping interacts with backend_config
        prepared_model = prepare_qat_fx(float_model, qconfig_mapping, example_inputs)
        # Run training
        train_loop(prepared_model, train_loop)

    z+quantization_api.quantize_fx.prepare_qat_fxT)r+   rZ   )r   rE   rF   rG   r+   r   r   r   prepare_qat_fx  s    _r\   T)r(   is_referenceconvert_custom_configrI   _remove_qconfigrE   r+   r   c           
   	   C   sp   |dkrt  }t|tr,td t |}t|  t| ||||||d}|j}|D ]}	t	||	t
| |	 qT|S )ze `is_standalone_module`: see docs in :func:`~torch.ao.quantization.prepare_standalone_module_fx`
    NzPassing a convert_custom_config_dict to convert is deprecated and will not be supported in a future version. Please pass in a ConvertCustomConfig instead.)Z_remove_qconfig_flagrE   r+   )r   r   r   rJ   rK   rL   r    r   rM   rO   rP   )
r(   r]   r^   rI   r_   rE   r+   r#   rM   rQ   r   r   r   _convert_fx  s*    


r`   )r(   r^   r_   rE   r+   r   c                 C   s    t jd t| d||||dS )a
   Convert a calibrated or trained model to a quantized model

    Args:
        * `graph_module` (torch.fx.GraphModule): A prepared and calibrated/trained model (GraphModule)

        * `convert_custom_config` (ConvertCustomConfig): custom configurations for convert function.
            See :class:`~torch.ao.quantization.fx.custom_config.ConvertCustomConfig` for more details

        * `_remove_qconfig` (bool): Option to remove the qconfig attributes in the model after convert.

        * `qconfig_mapping` (QConfigMapping): config for specifying how to convert a model for quantization.

           The keys must include the ones in the qconfig_mapping passed to `prepare_fx` or `prepare_qat_fx`,
           with the same values or `None`. Additional keys can be specified with values set to `None`.

          For each entry whose value is set to None, we skip quantizing that entry in the model::

            qconfig_mapping = QConfigMapping
                .set_global(qconfig_from_prepare)
                .set_object_type(torch.nn.functional.add, None)  # skip quantizing torch.nn.functional.add
                .set_object_type(torch.nn.functional.linear, qconfig_from_prepare)
                .set_module_name("foo.bar", None)  # skip quantizing module "foo.bar"

         * `backend_config` (BackendConfig): A configuration for the backend which describes how
            operators should be quantized in the backend, this includes quantization
            mode support (static/dynamic/weight_only), dtype support (quint8/qint8 etc.),
            observer placement for each operators and fused operators.
            See :class:`~torch.ao.quantization.backend_config.BackendConfig` for more details

    Return:
        A quantized model (torch.nn.Module)

    Example::

        # prepared_model: the model after prepare_fx/prepare_qat_fx and calibration/training
        # convert_fx converts a calibrated/trained model to a quantized model for the
        # target hardware, this includes converting the model first to a reference
        # quantized model, and then lower the reference quantized model to a backend
        # Currently, the supported backends are fbgemm (onednn), qnnpack (xnnpack) and
        # they share the same set of quantized operators, so we are using the same
        # lowering procedure
        #
        # backend_config defines the corresponding reference quantized module for
        # the weighted modules in the model, e.g. nn.Linear
        # TODO: add backend_config after we split the backend_config for fbgemm and qnnpack
        # e.g. backend_config = get_default_backend_config("fbgemm")
        quantized_model = convert_fx(prepared_model)

    z'quantization_api.quantize_fx.convert_fxFr]   r^   r_   rE   r+   r!   rU   rV   r`   r(   r^   r_   rE   r+   r   r   r   
convert_fx3  s    8rd   c                 C   s    t jd t| d||||dS )a~   Convert a calibrated or trained model to a reference quantized model,
    see https://github.com/pytorch/rfcs/blob/master/RFC-0019-Extending-PyTorch-Quantization-to-Custom-Backends.md for more details,
    reference quantzied model is a standard representation of a quantized model provided
    by FX Graph Mode Quantization, it can be further lowered to run on the target
    hardware, like accelerators

    Args:
        * `graph_module` (GraphModule): A prepared and calibrated/trained model (GraphModule)

        * `convert_custom_config` (ConvertCustomConfig): custom configurations for convert function.
            See :func:`~torch.ao.quantization.quantize_fx.convert_fx` for more details.

        * `_remove_qconfig` (bool): Option to remove the qconfig attributes in the model after convert.

        * `qconfig_mapping` (QConfigMapping): config for specifying how to convert a model for quantization.
            See :func:`~torch.ao.quantization.quantize_fx.convert_fx` for more details.

         * `backend_config` (BackendConfig): A configuration for the backend which describes how
            operators should be quantized in the backend. See
            :func:`~torch.ao.quantization.quantize_fx.convert_fx` for more details.

    Return:
        A reference quantized model (GraphModule)

    Example::

        # prepared_model: the model after prepare_fx/prepare_qat_fx and calibration/training
        # TODO: add backend_config after we split the backend_config for fbgemm and qnnpack
        # e.g. backend_config = get_default_backend_config("fbgemm")
        reference_quantized_model = convert_to_reference_fx(prepared_model)

    z4quantization_api.quantize_fx.convert_to_reference_fxTra   rb   rc   r   r   r   convert_to_reference_fxv  s    're   )r(   r]   r^   r   c                 C   s   t | ||ddS )aw   [Internal use only] Convert a model produced by :func:`~torch.ao.quantization.prepare_standalone_module_fx`
    and convert it to a quantized model

    Returns a quantized standalone module, whether input/output is quantized is
    specified by prepare_custom_config, with
    input_quantized_idxs, output_quantized_idxs, please
    see docs for prepare_fx for details
    T)rI   )r`   )r(   r]   r^   r   r   r   _convert_standalone_module_fx  s    rf   )NN)NNNF)NN)NN)NNN)NN)NFTNN)NTNN)NTNN)FN)4typingr   r   r   r   r   r   rJ   r!   Ztorch.fxr   Z	fx.tracerr
   rW   r   r   Z
fx.convertr   r+   r   r   Zfx.graph_moduler   Zfx.custom_configr   r   r   Zfx.utilsr   r   r   rE   r   r"   rD   r    r%   boolr   r,   objectr-   r;   rS   rT   rY   r[   r\   r`   rd   re   rf   r   r   r   r   <module>   s         
G  
+  +   
   
m     (    E    4  