pad.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # --------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License.
  4. # --------------------------------------------------------------------------
  5. from __future__ import annotations
  6. from typing import Any
  7. import numpy as np
  8. import onnx
  9. from ..quant_utils import (
  10. TENSOR_NAME_QUANT_SUFFIX,
  11. QuantizedValue,
  12. QuantizedValueType,
  13. attribute_to_kwarg,
  14. quantize_nparray,
  15. )
  16. from .base_operator import QuantOperatorBase
  17. from .qdq_base_operator import QDQOperatorBase
  18. class QPad(QuantOperatorBase):
  19. def __init__(self, onnx_quantizer, onnx_node):
  20. super().__init__(onnx_quantizer, onnx_node)
  21. def quantize(self):
  22. node = self.node
  23. assert node.op_type == "Pad"
  24. # Only after version 11, it has the optional constant_value
  25. # If input[0] is not quantized, do not quanitize this node
  26. if (self.quantizer.opset_version < 11) or (node.input[0] not in self.quantizer.quantized_value_map):
  27. super().quantize()
  28. return
  29. quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]
  30. kwargs = {}
  31. for attribute in node.attribute:
  32. kv = attribute_to_kwarg(attribute)
  33. kwargs.update(kv)
  34. if "mode" not in kwargs or kwargs["mode"] == b"constant":
  35. if len(node.input) > 2 and node.input[2] != "": # There is 3rd input 'constant_value'
  36. zp_tensor = self.quantizer.model.get_initializer(quantized_input_value.zp_name)
  37. scale_tensor = self.quantizer.model.get_initializer(quantized_input_value.scale_name)
  38. if zp_tensor is None or scale_tensor is None:
  39. super().quantize()
  40. return
  41. padding_constant_initializer = self.quantizer.model.get_initializer(node.input[2])
  42. if padding_constant_initializer is not None:
  43. zp_array = onnx.numpy_helper.to_array(zp_tensor)
  44. zp_value = zp_array.item() if zp_array.ndim == 0 else zp_array[0]
  45. scale_array = onnx.numpy_helper.to_array(scale_tensor)
  46. scale_value = scale_array.item() if scale_array.ndim == 0 else scale_array[0]
  47. padding_constant_array = onnx.numpy_helper.to_array(padding_constant_initializer)
  48. quantized_padding_constant_array = quantize_nparray(
  49. self.quantizer.activation_qType,
  50. padding_constant_array,
  51. scale_value,
  52. zp_value,
  53. )
  54. quantized_padding_constant_name = node.input[2] + TENSOR_NAME_QUANT_SUFFIX
  55. quantized_padding_constant_initializer = onnx.numpy_helper.from_array(
  56. quantized_padding_constant_array,
  57. quantized_padding_constant_name,
  58. )
  59. # Suppose this padding constant initializer only used by the node
  60. self.quantizer.model.remove_initializer(padding_constant_initializer)
  61. self.quantizer.model.add_initializer(quantized_padding_constant_initializer)
  62. node.input[2] = quantized_padding_constant_name
  63. else:
  64. # TODO: check quantize_inputs after sub graph is supported
  65. pad_value_qnodes = self.quantizer._get_quantize_input_nodes(
  66. node,
  67. 2,
  68. self.quantizer.activation_qType,
  69. quantized_input_value.scale_name,
  70. quantized_input_value.zp_name,
  71. initial_type=scale_tensor.data_type,
  72. )
  73. self.quantizer.new_nodes.extend(pad_value_qnodes)
  74. node.input[2] = pad_value_qnodes[0].output[0]
  75. else:
  76. # In quantized format, the `zero` before quantization is mapped
  77. # to quantized_input_value.zp_name. Thus, padding 0 to
  78. # original tensor should become padding zero point to quantized
  79. # tensor.
  80. if len(node.input) == 2:
  81. # Feed quantization's zero point to padding node.
  82. node.input.append(quantized_input_value.zp_name)
  83. else:
  84. # Assign quantization's zero point to padding node.
  85. assert node.input[2] == ""
  86. node.input[2] = quantized_input_value.zp_name
  87. # Create an entry for output quantized value
  88. quantized_output_value = QuantizedValue(
  89. node.output[0],
  90. node.output[0] + TENSOR_NAME_QUANT_SUFFIX,
  91. quantized_input_value.scale_name,
  92. quantized_input_value.zp_name,
  93. QuantizedValueType.Input,
  94. )
  95. self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value
  96. node.input[0] = quantized_input_value.q_name
  97. node.output[0] = quantized_output_value.q_name
  98. self.quantizer.new_nodes += [node]
  99. class QDQPad(QDQOperatorBase):
  100. def __init__(self, onnx_quantizer, onnx_node):
  101. super().__init__(onnx_quantizer, onnx_node)
  102. def _get_pad_const_val(self, attrs_dict: dict[str, Any]) -> np.ndarray | None:
  103. """
  104. Returns the Pad's constant padding value. Returns `None` if the padding value is
  105. not constant (i.e., comes from a dynamic input).
  106. """
  107. const_val = None
  108. onnx_tensor_type = self.quantizer.model.get_tensor_type(self.node.input[0])
  109. if onnx_tensor_type is None:
  110. return None
  111. np_dtype = onnx.helper.tensor_dtype_to_np_dtype(onnx_tensor_type.elem_type)
  112. if self.quantizer.opset_version < 11:
  113. const_val = np.array(attrs_dict.get("value", 0), dtype=np_dtype)
  114. elif len(self.node.input) >= 3 and self.node.input[2]:
  115. const_val = self.quantizer.model.get_constant_value(self.node.input[2])
  116. else:
  117. const_val = np.array(0, dtype=np_dtype)
  118. return const_val
  119. def _should_quantize_output_same_as_input(self) -> bool:
  120. """
  121. Returns true if Pad's output should use the same quantization parameters as input[0]
  122. """
  123. attrs_dict = {}
  124. for attribute in self.node.attribute:
  125. kv = attribute_to_kwarg(attribute)
  126. attrs_dict.update(kv)
  127. pad_mode = attrs_dict.get("mode", b"constant")
  128. if pad_mode in (b"reflect", b"edge", b"wrap"):
  129. # These modes pad the output with a value that already exists in the input.
  130. # So, we can quantize the output the same as the input.
  131. return True
  132. # For 'constant' mode, if padding with 0, we can also quantize the output the same as the input
  133. # because our quantization floating-point range always includes 0.
  134. if pad_mode == b"constant":
  135. pad_val = self._get_pad_const_val(attrs_dict)
  136. if pad_val is not None and pad_val.dtype in (np.float32, np.float16):
  137. return float(pad_val.item()) == 0
  138. return False
  139. def quantize(self):
  140. assert self.node.op_type == "Pad"
  141. for input_name in self.node.input:
  142. if input_name:
  143. self.quantizer.quantize_activation_tensor(input_name)
  144. if not self.disable_qdq_for_node_output:
  145. if self._should_quantize_output_same_as_input():
  146. self.quantizer.quantize_output_same_as_input(self.node.output[0], self.node.input[0], self.node.name)
  147. else:
  148. self.quantizer.quantize_activation_tensor(self.node.output[0])