Quantizer.h 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
  2. #pragma once
  3. #include <c10/core/QScheme.h>
  4. #include <c10/core/MemoryFormat.h>
  5. #include <c10/macros/Macros.h>
  6. #include <c10/util/Exception.h>
  7. #include <c10/util/intrusive_ptr.h>
  8. #include <c10/core/ScalarType.h>
  9. #include <c10/core/TensorOptions.h>
  10. #include <ATen/Tensor.h>
  11. #include <ATen/TensorUtils.h>
  12. #include <ATen/core/QuantizerBase.h>
  13. #include <cmath>
  14. #include <memory>
  15. #include <utility>
  16. namespace at {
  17. /**
  18. * UnknownQuantizer is a placeholder quantizer for functions that implement
  19. * quantization in a two step process. First a tensor is allocated but with
  20. * unknown quantizer, and then the quantization kernel decides what the final
  21. * quantizer will be.
  22. */
  23. struct TORCH_API UnknownQuantizer : public Quantizer {
  24. explicit UnknownQuantizer(ScalarType scalar_type)
  25. : Quantizer(scalar_type) {}
  26. Tensor quantize(const Tensor& tensor) override;
  27. Tensor dequantize(const Tensor& qtensor) override;
  28. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  29. QScheme qscheme() const override;
  30. bool equalTo(QuantizerPtr other) const override;
  31. };
  32. /**
  33. * UniformQuantizer is the parent class for all uniform quantizers.
  34. * These quantization scheme will map float value uniformly to
  35. * the quantized value. For example, affine quantizer is
  36. * the most commonly used scheme in this category.
  37. */
  38. struct TORCH_API UniformQuantizer : public Quantizer {
  39. explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
  40. };
  41. /**
  42. * NonUniformQuantizer is the parent class for all non-uniform quantizers.
  43. * These quantization scheme may map float value non-uniformly to the quantized
  44. * value. K-means quantization is a representative example in this category.
  45. */
  46. struct TORCH_API NonUniformQuantizer : public Quantizer {
  47. explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
  48. };
  49. // There is also StochasticQuantizer which is uniform but not affine
  50. /**
  51. * AffineQuantizer uses affine transformation to do quantization.
  52. *
  53. * For quantize:
  54. * Y = clamp(round(X / scale + zero_point), min, max)
  55. * For dequantize:
  56. * X = (Y - zero_point) * scale
  57. */
  58. struct TORCH_API AffineQuantizer : public UniformQuantizer {
  59. explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {}
  60. };
  61. // Note that we will not have Symmetric Quantizer in backend to reduce
  62. // complications in quantized kernel implementation.
  63. /**
  64. * PerTensorAffineQuantizer stores a scale and a zero_point, which is used for
  65. * all the values in the Tensor.
  66. */
  67. struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer {
  68. explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point)
  69. : AffineQuantizer(scalar_type),
  70. scale_(scale),
  71. zero_point_(zero_point) {}
  72. Tensor quantize(const Tensor& tensor) override;
  73. Tensor dequantize(const Tensor& qtensor) override;
  74. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  75. QScheme qscheme() const override {
  76. return kPerTensorAffine;
  77. }
  78. double scale() const {
  79. return scale_;
  80. }
  81. int64_t zero_point() const {
  82. return zero_point_;
  83. }
  84. bool equalTo(QuantizerPtr other) const override {
  85. if (!other.get() || other->qscheme() != kPerTensorAffine) {
  86. return false;
  87. }
  88. auto* other_per_tensor_affine =
  89. static_cast<PerTensorAffineQuantizer*>(other.get());
  90. return scalar_type() == other_per_tensor_affine->scalar_type() &&
  91. scale() == other_per_tensor_affine->scale() &&
  92. zero_point() == other_per_tensor_affine->zero_point();
  93. }
  94. private:
  95. const double scale_;
  96. // We use int64_t for consistency with Python
  97. const int64_t zero_point_;
  98. };
  99. /**
  100. * PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer
  101. * except that we have an independent scale and zero_point parameter
  102. * for each channel.
  103. *
  104. * Also note that per channel quantization is mostly applied to output channels
  105. * of weights since per-input channel of weight quantization or per-channel
  106. * quantization for activations can't be efficiently supported in most of
  107. * processors since it requires each multiplication result within a single
  108. * dot-product to have a different scale.
  109. */
  110. struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer {
  111. explicit PerChannelAffineQuantizer(
  112. ScalarType scalar_type,
  113. Tensor scales,
  114. Tensor zero_points,
  115. int64_t axis)
  116. : AffineQuantizer(scalar_type),
  117. scales_(std::move(scales)),
  118. zero_points_(std::move(zero_points)),
  119. axis_(axis) {}
  120. QScheme qscheme() const override {
  121. return kPerChannelAffine;
  122. }
  123. Tensor scales() const {
  124. return scales_;
  125. }
  126. Tensor zero_points() const {
  127. return zero_points_;
  128. }
  129. int64_t axis() const {
  130. return axis_;
  131. }
  132. Tensor quantize(const Tensor& tensor) override;
  133. Tensor dequantize(const Tensor& qtensor) override;
  134. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  135. bool equalTo(QuantizerPtr other) const override {
  136. if (!other.get() || other->qscheme() != kPerChannelAffine) {
  137. return false;
  138. }
  139. auto* other_per_channel_affine =
  140. static_cast<PerChannelAffineQuantizer*>(other.get());
  141. return scalar_type() == other_per_channel_affine->scalar_type() &&
  142. scales().equal(other_per_channel_affine->scales()) &&
  143. zero_points().equal(other_per_channel_affine->zero_points()) &&
  144. axis() == other_per_channel_affine->axis();
  145. }
  146. protected:
  147. Tensor scales_;
  148. Tensor zero_points_;
  149. const int64_t axis_;
  150. };
  151. /**
  152. * PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer
  153. * except that it expects both scale and zero point to be floating point values.
  154. *
  155. * This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of
  156. * kPerChannelAffine.
  157. *
  158. * The quantize equation in this case looks like -
  159. * Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale
  160. *
  161. * Note: Usage of floating point zero point is useful in cases where 0 doesn't need to
  162. * be exactly represented in the quantized space. We can get additional precision by
  163. * using floating point values for zero point.
  164. */
  165. struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer {
  166. explicit PerChannelAffineFloatQParamsQuantizer(
  167. ScalarType scalar_type,
  168. Tensor scales,
  169. Tensor zero_points,
  170. int64_t axis)
  171. : PerChannelAffineQuantizer(scalar_type,
  172. scales,
  173. zero_points,
  174. axis) {}
  175. QScheme qscheme() const override {
  176. return kPerChannelAffineFloatQParams;
  177. }
  178. Tensor quantize(const Tensor& tensor) override;
  179. Tensor dequantize(const Tensor& qtensor) override;
  180. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  181. bool equalTo(QuantizerPtr other) const override {
  182. if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) {
  183. return false;
  184. }
  185. auto* other_per_channel_float_qparams =
  186. static_cast<PerChannelAffineFloatQParamsQuantizer*>(other.get());
  187. return scalar_type() == other_per_channel_float_qparams->scalar_type() &&
  188. scales().equal(other_per_channel_float_qparams->scales()) &&
  189. zero_points().equal(other_per_channel_float_qparams->zero_points()) &&
  190. axis() == other_per_channel_float_qparams->axis();
  191. }
  192. };
  193. // This is an internal utility function for getting at the QTensorImpl,
  194. // You should only use this for writing low level
  195. // setters/getters for QTensorImpl fields; otherwise, you should use
  196. // the low level setters/getters that were implemented using this.
  197. // This may be called repeatedly, so make sure it's pretty cheap.
  198. TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self);
  199. // double and int64_t are because of the native function API, we only have these
  200. // argument types right now in native functions
  201. TORCH_API QuantizerPtr
  202. make_per_tensor_affine_quantizer(
  203. double scale, int64_t zero_point, ScalarType scalar_type);
  204. TORCH_API QuantizerPtr make_per_channel_affine_quantizer(
  205. const Tensor& scales,
  206. const Tensor& zero_points,
  207. int64_t axis,
  208. ScalarType scalar_type);
  209. TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
  210. // Create a Quantized Tensor given arguments for normal Tensor and a quantizer
  211. TORCH_API Tensor new_qtensor(
  212. IntArrayRef sizes,
  213. const TensorOptions& options,
  214. QuantizerPtr quantizer);
  215. TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer);
  216. TORCH_API Tensor from_blob_quantized_per_tensor_affine(
  217. void* data,
  218. IntArrayRef sizes,
  219. IntArrayRef strides,
  220. std::function<void(void*)> deleter,
  221. const float scale,
  222. const int64_t zeroPoint,
  223. const TensorOptions& options);
  224. TORCH_API Tensor from_blob_quantized_per_tensor_affine(
  225. void* data,
  226. IntArrayRef sizes,
  227. std::function<void(void*)> deleter,
  228. const float scale,
  229. const int64_t zeroPoint,
  230. const TensorOptions& options);
  231. TORCH_API Tensor from_blob_quantized_per_channel_affine(
  232. void* data,
  233. IntArrayRef sizes,
  234. std::function<void(void*)> deleter,
  235. const Tensor& scales,
  236. const Tensor& zero_points,
  237. const int64_t axis,
  238. const TensorOptions& options);
  239. } // namespace at
  240. #else
  241. #error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
  242. #endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)