| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- import onnx
- from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain
- from .base_operator import QuantOperatorBase
- from .qdq_base_operator import QDQOperatorBase
- class QLinearActivation(QuantOperatorBase):
- def __init__(self, onnx_quantizer, onnx_node):
- super().__init__(onnx_quantizer, onnx_node)
- def QuantizeClipRelu(self): # noqa: N802
- node = self.node
- assert node.op_type == "Relu" or node.op_type == "Clip"
- # When mode is QLinearOps, the output quantization params are calculated based on outputs from
- # activation nodes, therefore these nodes can be removed from the graph if they follow a quantized op.
- # If input to this node is not quantized then keep this node
- # If activation is symmetric, not quantize the op and simply return
- if node.input[0] not in self.quantizer.quantized_value_map or self.quantizer.is_activation_symmetric:
- return super().quantize()
- quantized_value = self.quantizer.quantized_value_map[node.input[0]]
- self.quantizer.quantized_value_map[node.output[0]] = quantized_value
- def quantize(self):
- node = self.node
- if node.op_type == "Relu" or node.op_type == "Clip":
- self.QuantizeClipRelu()
- return
- nnapi_sigmoid_option = "extra.Sigmoid.nnapi"
- sigmoid_nnapi_mode = (
- node.op_type == "Sigmoid"
- and nnapi_sigmoid_option in self.quantizer.extra_options
- and self.quantizer.extra_options[nnapi_sigmoid_option]
- )
- use_scale = 1 / 256.0 if sigmoid_nnapi_mode else None
- use_zeropoint = 0 if sigmoid_nnapi_mode else None
- # No assert on op_type as it is controlled by registry
- # only try to quantize when given quantization parameters for it
- (
- data_found,
- output_scale_name,
- output_zp_name,
- _,
- _,
- ) = self.quantizer._get_quantization_params(node.output[0], use_scale, use_zeropoint)
- (
- quantized_input_names,
- zero_point_names,
- scale_names,
- nodes,
- ) = self.quantizer.quantize_activation(node, [0])
- if not data_found or quantized_input_names is None:
- return super().quantize()
- qlinear_activation_output = node.output[0] + TENSOR_NAME_QUANT_SUFFIX
- qlinear_activation_name = ""
- if node.name:
- qlinear_activation_name = node.name + "_quant"
- kwargs = {}
- for attribute in node.attribute:
- kwargs.update(attribute_to_kwarg(attribute))
- kwargs["domain"] = ms_domain
- qlinear_activation_inputs = [
- quantized_input_names[0],
- scale_names[0],
- zero_point_names[0],
- output_scale_name,
- output_zp_name,
- ]
- qlinear_activation_node = onnx.helper.make_node(
- "QLinear" + node.op_type,
- qlinear_activation_inputs,
- [qlinear_activation_output],
- qlinear_activation_name,
- **kwargs,
- )
- # Create an entry for this quantized value
- q_output = QuantizedValue(
- node.output[0],
- qlinear_activation_output,
- output_scale_name,
- output_zp_name,
- QuantizedValueType.Input,
- )
- self.quantizer.quantized_value_map[node.output[0]] = q_output
- nodes.append(qlinear_activation_node)
- self.quantizer.new_nodes += nodes
- class QDQRemovableActivation(QDQOperatorBase):
- def __init__(self, onnx_quantizer, onnx_node):
- super().__init__(onnx_quantizer, onnx_node)
- def quantize(self):
- node = self.node
- # If input to this node is not quantized then keep this node
- if not self.quantizer.is_tensor_quantized(node.input[0]):
- return
- if (
- not self.quantizer.is_activation_symmetric
- and not self.quantizer.qdq_keep_removable_activations
- and self.quantizer.try_replacing_upstream_output(node.input[0], node.output[0])
- ):
- self.quantizer.remove_node(self.node)
- else:
- self.quantizer.quantize_activation_tensor(node.input[0])
- if not self.disable_qdq_for_node_output:
- self.quantizer.quantize_activation_tensor(node.output[0])
|