Tosa/Utils/QuantUtils.cpp

b2812113SSuraj Sudhir//===- QuantUtils.cpp -----------------------------------------------------===//
b2812113SSuraj Sudhir//
b2812113SSuraj Sudhir// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
b2812113SSuraj Sudhir// See https://llvm.org/LICENSE.txt for license information.
b2812113SSuraj Sudhir// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
b2812113SSuraj Sudhir//
b2812113SSuraj Sudhir//===----------------------------------------------------------------------===//
b2812113SSuraj Sudhir//
b2812113SSuraj Sudhir// This file contains TOSA numerical support functions and quantization
b2812113SSuraj Sudhir// attribute builders.
b2812113SSuraj Sudhir//
b2812113SSuraj Sudhir//===----------------------------------------------------------------------===//
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
b2812113SSuraj Sudhir
b2812113SSuraj Sudhirusing namespace mlir;
b2812113SSuraj Sudhirusing namespace mlir::tosa;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// From a scale value, generates multiplier and shift values where
b2812113SSuraj Sudhir/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
b2812113SSuraj Sudhir/// multiplier = mantissa*2^shift for 16-bit scaling.
ac3587f2SStella Laurenzostatic void computeMultiplierAndShiftTosaScale16(double scale,
ac3587f2SStella Laurenzo                                                 int32_t &multiplier,
b2812113SSuraj Sudhir                                                 int32_t &shift) {
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  const double mantissa = std::frexp(scale, &shift);
b2812113SSuraj Sudhir  auto shiftedM = std::round(mantissa * (int64_t(1) << 15));
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // Can't be greater than 1.0.
b2812113SSuraj Sudhir  assert(shiftedM <= (int64_t(1) << 15) &&
b2812113SSuraj Sudhir         "Shifted mantissa exceeds 16 signed bits");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (shiftedM == (int64_t(1) << 15)) {
b2812113SSuraj Sudhir    shiftedM /= 2;
b2812113SSuraj Sudhir    shift++;
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // TOSA expects right shift to be positive and embed (1 << 15) into right
b2812113SSuraj Sudhir  // shift bits.
b2812113SSuraj Sudhir  shift = (-shift) + 15;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  assert(shiftedM <= std::numeric_limits<int32_t>::max() &&
b2812113SSuraj Sudhir         "Shifted mantissa exceeds 32-bit signed output type");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  multiplier = static_cast<int32_t>(shiftedM);
0763f122SRob Suderman
a383a481SPeng Sun  // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
a383a481SPeng Sun  // The limit of 62 on shift allows the shift to be decomposed as
a383a481SPeng Sun  // two right shifts of 31.
a383a481SPeng Sun  if (shift > 62) {
cd2d7369SRob Suderman    // Shifting the multiplier by more than 31-bits is unnecessary.
a383a481SPeng Sun    multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
a383a481SPeng Sun    shift = 62;
0763f122SRob Suderman  }
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// From a scale value, generates multiplier and shift values where
b2812113SSuraj Sudhir/// mantissa is in [-1.0,-0.5] or [0.5, 1.0] such that
b2812113SSuraj Sudhir/// multiplier = mantissa*2^shift for 32-bit scaling.
ac3587f2SStella Laurenzostatic void computeMultiplierAndShiftTosaScale32(double scale,
ac3587f2SStella Laurenzo                                                 int32_t &multiplier,
b2812113SSuraj Sudhir                                                 int32_t &shift) {
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  const double mantissa = std::frexp(scale, &shift);
b2812113SSuraj Sudhir  auto shiftedM = std::round(mantissa * (int64_t(1) << 31));
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // Can't be greater than 1.0.
b2812113SSuraj Sudhir  assert(shiftedM <= (int64_t(1) << 31) &&
b2812113SSuraj Sudhir         "Shifted mantissa exceeds 32 signed bits");
b2812113SSuraj Sudhir  if (shiftedM == (int64_t(1) << 31)) {
b2812113SSuraj Sudhir    shiftedM /= 2;
b2812113SSuraj Sudhir    shift++;
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // TOSA expects right shift to be positive, and embed (1 << 31) into right
b2812113SSuraj Sudhir  // shift bits.
b2812113SSuraj Sudhir  shift = (-shift) + 31;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  assert(shiftedM <= std::numeric_limits<int32_t>::max() &&
b2812113SSuraj Sudhir         "Shifted mantissa exceeds 32-bit signed output type");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  multiplier = static_cast<int32_t>(shiftedM);
0763f122SRob Suderman
a383a481SPeng Sun  // Shifting tops out at 62 bits. Right shift to make 62 bits the max.
a383a481SPeng Sun  // The limit of 62 on shift allows the shift to be decomposed as
a383a481SPeng Sun  // two right shifts of 31.
a383a481SPeng Sun  if (shift > 62) {
0763f122SRob Suderman    // Shifting the multiplier by more than 32-bits is unnecessary.
a383a481SPeng Sun    multiplier = multiplier >> std::min<int32_t>(31, shift - 62);
a383a481SPeng Sun    shift = 62;
0763f122SRob Suderman  }
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Generates a quantized multiplier/shift from double.
ac3587f2SStella Laurenzovoid mlir::tosa::computeMultiplierAndShift(double scale, int32_t &multiplier,
b2812113SSuraj Sudhir                                           int32_t &shift, int32_t scaleWidth) {
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  switch (scaleWidth) {
b2812113SSuraj Sudhir  case 16:
b2812113SSuraj Sudhir    computeMultiplierAndShiftTosaScale16(scale, multiplier, shift);
b2812113SSuraj Sudhir    return;
b2812113SSuraj Sudhir  case 32:
b2812113SSuraj Sudhir    computeMultiplierAndShiftTosaScale32(scale, multiplier, shift);
b2812113SSuraj Sudhir    return;
b2812113SSuraj Sudhir  default:
b2812113SSuraj Sudhir    assert(0 && "Unsupported Tosa quantized_scale regime specified!");
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
*3745e708STai Ly#define GET_UQTYPE(inputType)                                                  \
*3745e708STai Ly  (llvm::dyn_cast<quant::UniformQuantizedType>((inputType).getElementType()))
*3745e708STai Ly#define GET_QTYPE(inputType)                                                   \
*3745e708STai Ly  (llvm::dyn_cast<quant::QuantizedType>((inputType).getElementType()))
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Method to build ConvOpQuantizationAttr, called from
b2812113SSuraj Sudhir/// ConvOpQuantInfoBuilder/TransConvOpQuantInfoBuilder:
b2812113SSuraj Sudhir/// input_zp: input zeropoint
b2812113SSuraj Sudhir/// weight_zp: weight zeropoint.
ac3587f2SStella LaurenzoConvOpQuantizationAttr
ac3587f2SStella Laurenzomlir::tosa::buildConvOpQuantizationAttr(OpBuilder &builder, Value input,
ac3587f2SStella Laurenzo                                        Value weight) {
b2812113SSuraj Sudhir
5550c821STres Popp  auto inputType = dyn_cast<ShapedType>(input.getType());
5550c821STres Popp  auto weightType = dyn_cast<ShapedType>(weight.getType());
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (!inputType || !weightType)
b2812113SSuraj Sudhir    return nullptr;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  auto inputQType = GET_UQTYPE(inputType);
b2812113SSuraj Sudhir  auto weightPerTensorQType = GET_UQTYPE(weightType);
5550c821STres Popp  auto weightPerAxisQType =
5550c821STres Popp      dyn_cast<quant::UniformQuantizedPerAxisType>(weightType.getElementType());
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // Weights must be either per-tensor quantized or per-axis quantized.
b2812113SSuraj Sudhir  assert(!((bool)weightPerTensorQType && (bool)weightPerAxisQType) &&
b2812113SSuraj Sudhir         "Weights must be either per-tensor or per-axis quantized");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // Either all quantized or all not quantized.
b2812113SSuraj Sudhir  assert(!((bool)inputQType ^
b2812113SSuraj Sudhir           ((bool)weightPerTensorQType || (bool)weightPerAxisQType)) &&
b2812113SSuraj Sudhir         "Inputs and weights must be all quantized or all not quantized");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (inputQType) {
b2812113SSuraj Sudhir    int64_t inputZp = inputQType.getZeroPoint();
b2812113SSuraj Sudhir    int64_t weightZp = 0;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir    if (weightPerTensorQType) {
b2812113SSuraj Sudhir      weightZp = weightPerTensorQType.getZeroPoint();
b2812113SSuraj Sudhir    } else if (weightPerAxisQType) {
b2812113SSuraj Sudhir      weightZp = weightPerAxisQType.getZeroPoints().front();
b2812113SSuraj Sudhir    }
b2812113SSuraj Sudhir
f1182bd6SMogball    return builder.getAttr<tosa::ConvOpQuantizationAttr>(inputZp, weightZp);
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  return nullptr;
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Builds MatMulOpQuantizationAttr, called from
b2812113SSuraj Sudhir/// MatMulOpQuantInfoBuilder:
b2812113SSuraj Sudhir/// aZp: input a zeropoint
b2812113SSuraj Sudhir/// bZp: input b zeropoint.
ac3587f2SStella LaurenzoMatMulOpQuantizationAttr
ac3587f2SStella Laurenzomlir::tosa::buildMatMulOpQuantizationAttr(OpBuilder &builder, Value a,
ac3587f2SStella Laurenzo                                          Value b) {
b2812113SSuraj Sudhir
5550c821STres Popp  auto aType = dyn_cast<ShapedType>(a.getType());
5550c821STres Popp  auto bType = dyn_cast<ShapedType>(b.getType());
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (!aType || !bType)
b2812113SSuraj Sudhir    return nullptr;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  auto aQType = GET_UQTYPE(aType);
b2812113SSuraj Sudhir  auto bQType = GET_UQTYPE(bType);
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // A and B are either all quantized or all not quantized.
b2812113SSuraj Sudhir  assert(!((bool)aQType ^ (bool)bQType) &&
b2812113SSuraj Sudhir         "Matmul operands must be all quantized or all not quantized");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (aQType) {
f1182bd6SMogball    return builder.getAttr<tosa::MatMulOpQuantizationAttr>(
f1182bd6SMogball        aQType.getZeroPoint(), bQType.getZeroPoint());
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  return nullptr;
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Builds UnaryOpQuantizationAttr
b2812113SSuraj Sudhir/// UnaryOpQuantInfoBuilder:
b2812113SSuraj Sudhir/// inputZp: input zeropoint
b2812113SSuraj Sudhir/// outputZp: output zeropoint.
ac3587f2SStella LaurenzoUnaryOpQuantizationAttr
ac3587f2SStella Laurenzomlir::tosa::buildUnaryOpQuantizationAttr(OpBuilder &builder, Value input,
b2812113SSuraj Sudhir                                         Type outputRawType) {
b2812113SSuraj Sudhir
5550c821STres Popp  auto inputType = dyn_cast<ShapedType>(input.getType());
5550c821STres Popp  auto outputType = dyn_cast<ShapedType>(outputRawType);
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (!inputType || !outputType)
b2812113SSuraj Sudhir    return nullptr;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  auto inputQType = GET_UQTYPE(inputType);
b2812113SSuraj Sudhir  auto outputQType = GET_UQTYPE(outputType);
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // Either all quantized or all not quantized.
b2812113SSuraj Sudhir  assert(!((bool)inputQType ^ (bool)outputQType) &&
b2812113SSuraj Sudhir         "Unary inputs/outputs must be all quantized or all not quantized");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (inputQType) {
f1182bd6SMogball    return builder.getAttr<UnaryOpQuantizationAttr>(inputQType.getZeroPoint(),
f1182bd6SMogball                                                    outputQType.getZeroPoint());
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  return nullptr;
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Builds PadOpQuantizationAttr, called from PadOpQuantInfoBuilder:
b2812113SSuraj Sudhir/// inputZp: input zeropoint.
ac3587f2SStella LaurenzoPadOpQuantizationAttr mlir::tosa::buildPadOpQuantizationAttr(OpBuilder &builder,
b2812113SSuraj Sudhir                                                             Value input) {
b2812113SSuraj Sudhir
5550c821STres Popp  auto inputType = dyn_cast<ShapedType>(input.getType());
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (!inputType)
b2812113SSuraj Sudhir    return nullptr;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  auto inputQType = GET_UQTYPE(inputType);
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (inputQType) {
f1182bd6SMogball    return builder.getAttr<tosa::PadOpQuantizationAttr>(
f1182bd6SMogball        inputQType.getZeroPoint());
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  return nullptr;
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Builds output type for a quantized ConvOp with the right bitwidth.
b2812113SSuraj Sudhir/// This is called by the builder when dealing with quantized content.
ac3587f2SStella LaurenzoType mlir::tosa::buildConvOpResultTypeInfo(OpBuilder &builder, Type outputType,
ac3587f2SStella Laurenzo                                           Value input, Value weight) {
b2812113SSuraj Sudhir
5550c821STres Popp  auto inputType = dyn_cast<ShapedType>(input.getType());
5550c821STres Popp  auto weightType = dyn_cast<ShapedType>(weight.getType());
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  assert(inputType && weightType &&
b2812113SSuraj Sudhir         "Could not extract input or weight tensors from Conv op");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  auto inputQType = GET_QTYPE(inputType);
b2812113SSuraj Sudhir  auto weightQType = GET_QTYPE(weightType);
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  assert(inputQType && weightQType &&
b2812113SSuraj Sudhir         "Could not extract input or weight tensor types from Conv op");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  unsigned inputBits = inputQType.getStorageTypeIntegralWidth();
b2812113SSuraj Sudhir  unsigned weightBits = weightQType.getStorageTypeIntegralWidth();
b2812113SSuraj Sudhir
5550c821STres Popp  auto outputShapedType = dyn_cast<ShapedType>(outputType);
b2812113SSuraj Sudhir  assert(outputShapedType &&
b2812113SSuraj Sudhir         "Could not extract output shape type from Conv op");
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  IntegerType accElementType;
b2812113SSuraj Sudhir  if (inputBits == 16 && weightBits == 8)
b2812113SSuraj Sudhir    accElementType = builder.getIntegerType(48);
b2812113SSuraj Sudhir  else
b2812113SSuraj Sudhir    accElementType = builder.getI32Type();
8662a2f2SRob Suderman  auto accType = outputShapedType.clone(accElementType);
b2812113SSuraj Sudhir  return accType;
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Builds Tosa quantization attributes from min/max values.
ac3587f2SStella LaurenzoType mlir::tosa::buildQTypeFromMinMax(OpBuilder builder, Type inputDType,
ac3587f2SStella Laurenzo                                      Attribute minAttr, Attribute maxAttr,
ac3587f2SStella Laurenzo                                      IntegerAttr quantBits, int filterQuantDim,
ac3587f2SStella Laurenzo                                      bool isSigned, BoolAttr narrowRange) {
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  quant::QuantizedType retType;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  auto convfunc =
b2812113SSuraj Sudhir      quant::ExpressedToQuantizedConverter::forInputType(inputDType);
b2812113SSuraj Sudhir
5550c821STres Popp  auto minElems = dyn_cast<DenseFPElementsAttr>(minAttr);
5550c821STres Popp  auto maxElems = dyn_cast<DenseFPElementsAttr>(maxAttr);
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  SmallVector<double, 2> min, max;
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  // At least one is per-axis quantized elementsattr.
b2812113SSuraj Sudhir  if (minElems || maxElems) {
b2812113SSuraj Sudhir    // Must have the same number of elements.
b2812113SSuraj Sudhir    if (minElems.getNumElements() != maxElems.getNumElements())
b2812113SSuraj Sudhir      return {};
b2812113SSuraj Sudhir    min.reserve(minElems.getNumElements());
b2812113SSuraj Sudhir    max.reserve(maxElems.getNumElements());
b2812113SSuraj Sudhir    for (auto i : minElems)
b2812113SSuraj Sudhir      min.push_back(FloatAttr::getValueAsDouble(i));
b2812113SSuraj Sudhir    for (auto i : maxElems)
b2812113SSuraj Sudhir      max.push_back(FloatAttr::getValueAsDouble(i));
b2812113SSuraj Sudhir  } else { // Just a single FP value.
5550c821STres Popp    auto minVal = dyn_cast<FloatAttr>(minAttr);
b2812113SSuraj Sudhir    if (minVal)
b2812113SSuraj Sudhir      min.push_back(minVal.getValueAsDouble());
b2812113SSuraj Sudhir    else
b2812113SSuraj Sudhir      return {};
5550c821STres Popp    auto maxVal = dyn_cast<FloatAttr>(maxAttr);
b2812113SSuraj Sudhir    if (maxVal)
b2812113SSuraj Sudhir      max.push_back(maxVal.getValueAsDouble());
b2812113SSuraj Sudhir    else
b2812113SSuraj Sudhir      return {};
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (min.size() == max.size()) {
b2812113SSuraj Sudhir    if (min.size() == 1) { // Per-tensor quantization with one min/max pair.
b2812113SSuraj Sudhir      retType = quant::fakeQuantAttrsToType(
b2812113SSuraj Sudhir          builder.getUnknownLoc(), quantBits.getInt(), min[0], max[0],
b2812113SSuraj Sudhir          narrowRange.getValue(), convfunc.expressedType, isSigned);
b2812113SSuraj Sudhir    } else if (min.size() > 1) { // Per-axis quant on filterQuantDim.
5550c821STres Popp      auto shape = dyn_cast<ShapedType>(inputDType);
b2812113SSuraj Sudhir      if (!shape)
b2812113SSuraj Sudhir        return {};
b2812113SSuraj Sudhir      if ((filterQuantDim) >= 0 && (shape.getRank() > filterQuantDim)) {
b2812113SSuraj Sudhir        retType = quant::fakeQuantAttrsToType(
b2812113SSuraj Sudhir            builder.getUnknownLoc(), quantBits.getInt(), filterQuantDim, min[0],
b2812113SSuraj Sudhir            max[0], narrowRange.getValue(), convfunc.expressedType, isSigned);
b2812113SSuraj Sudhir      }
b2812113SSuraj Sudhir    } else {
b2812113SSuraj Sudhir      return {};
b2812113SSuraj Sudhir    }
b2812113SSuraj Sudhir  } else {
b2812113SSuraj Sudhir    return {};
b2812113SSuraj Sudhir  }
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  if (!retType)
b2812113SSuraj Sudhir    return {};
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  return convfunc.convert(retType);
b2812113SSuraj Sudhir}
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir/// Builds Tosa quantization attributes from min/max values.
ac3587f2SStella LaurenzoTypeAttr
ac3587f2SStella Laurenzomlir::tosa::buildQTypeAttrFromMinMax(OpBuilder builder, Type inputDtype,
b2812113SSuraj Sudhir                                     Attribute minAttr, Attribute maxAttr,
b2812113SSuraj Sudhir                                     IntegerAttr quantBits, int filterQuantDim,
b2812113SSuraj Sudhir                                     bool isSigned, BoolAttr narrowRange) {
b2812113SSuraj Sudhir
b2812113SSuraj Sudhir  return TypeAttr::get(buildQTypeFromMinMax(builder, inputDtype, minAttr,
b2812113SSuraj Sudhir                                            maxAttr, quantBits, filterQuantDim,
b2812113SSuraj Sudhir                                            isSigned, narrowRange));
b2812113SSuraj Sudhir}