Quant/Transforms/LowerQuantOps.cpp

//===- LowerQuantOps.cpp - Lower 'quant' dialect ops ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Transforms `quant.dcast` and `quant.qcast` into lower-level ops.
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Quant/IR/Quant.h"
#include "mlir/Dialect/Quant/IR/QuantTypes.h"
#include "mlir/Dialect/Quant/Transforms/Passes.h"
#include "mlir/Dialect/Shape/IR/Shape.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/DialectConversion.h"

namespace mlir {
namespace quant {

#define GEN_PASS_DEF_LOWERQUANTOPS
#include "mlir/Dialect/Quant/Transforms/Passes.h.inc"

namespace {

// If 'inputType' is a tensor, return its element type. If it is a scalar,
// return it as is.
Type getScalarType(Type inputType) {
  if (auto tensorType = dyn_cast<TensorType>(inputType))
    return tensorType.getElementType();
  return inputType;
}

// Return the shape of an input value as a list of attributes (static dimensions)
// and values (dynamic dimensions). If 'input' is a scalar, an empty list is
// returned. If 'input' is a tensor, its shape is returned.
SmallVector<OpFoldResult>
getScalarOrTensorShape(OpBuilder &builder, Location loc, Value input) {
  if (isa<TensorType>(input.getType()))
    return tensor::getMixedSizes(builder, loc, input);
  return {};
}

// If 'referenceType' is a scalar, return 'elementType' as is. If
// 'referenceType' is a tensor, return another tensor with the same shape and
// elements of type 'elementType'.
Type getScalarOrTensorType(Type elementType, Type referenceType) {
  if (auto tensorType = dyn_cast<TensorType>(referenceType))
    return tensorType.clone(elementType);
  return elementType;
}

// Return a constant with the given value. If 'referenceType' is a tensor, a
// tensor splat of shape 'referenceShape' is returned. If 'referenceType' is a
// scalar, 'referenceShape' is ignored and a scalar constant is returned.
Value getScalarOrTensorConstant(OpBuilder &builder, Location loc, Value scalar,
                                Type referenceType,
                                ArrayRef<OpFoldResult> referenceShape) {
  // If the result type is a scalar, return the unmodified scalar constant.
  auto tensorType = dyn_cast<TensorType>(referenceType);
  if (!tensorType) {
    assert(referenceShape.empty());
    return scalar;
  }

  // Create tensor splat
  auto tensorConstant =
      builder.create<tensor::SplatOp>(loc, scalar, referenceShape);
  return tensorConstant;
}

// Reshape an unranked tensor into a 1D ranked tensor.
//
// - input
//   Unranked tensor.
//
// Return values:
//
// - flatInput
//   1D ranked, dynamically shaped tensor.
//
// - inputShape
//   1D extent tensor containing the shape of the original unranked input.
//
std::pair<Value, Value> flattenUnrankedTensor(OpBuilder &builder, Location loc,
                                              Value input) {
  // Get unranked input shape and total size
  auto *context = builder.getContext();
  auto shapeType = shape::getExtentTensorType(context);
  auto inputShape = builder.create<shape::ShapeOfOp>(loc, shapeType, input);
  Value inputSize = builder.create<shape::NumElementsOp>(
      loc, builder.getIndexType(), inputShape);

  // Turn input size into 1D tensor
  auto flatShapeType = shape::getExtentTensorType(context, 1);
  auto flatInputShape = builder.create<tensor::FromElementsOp>(
      loc, flatShapeType, inputSize);

  // Reshape input tensor into 1D
  auto inputType = cast<UnrankedTensorType>(input.getType());
  auto elementType = inputType.getElementType();
  auto flatInputType =
      RankedTensorType::get({ShapedType::kDynamic}, elementType);
  auto flatInput = builder.create<tensor::ReshapeOp>(
      loc, flatInputType, input, flatInputShape);
  return std::make_pair(flatInput, inputShape);
}

// Reshape an unranked tensor into a 3D ranked tensor where the central
// dimension of the result tensor corresponds to dimension 'axis' of the input
// tensor.
//
// - input
//   Unranked tensor.
//
// - axis
//   Index of the input dimension around which other input dimiensions will be
//   collapsed.
//
// - axisSize
//   Size of input dimension 'axis'.
//
// Return values:
//
// - flatInput
//   3D ranked tensor of shape [?, axisSize, ?].
//
// - inputShape
//   1D extent tensor containing the shape of the original unranked input.
//
std::pair<Value, Value> flattenUnrankedTensorAroundAxis(OpBuilder &builder,
                                                        Location loc,
                                                        Value input,
                                                        int64_t axis,
                                                        int64_t axisSize) {
  // Get full tensor shape
  auto *context = builder.getContext();
  auto indexType = builder.getIndexType();
  auto shapeType = shape::getExtentTensorType(context);
  auto inputShape = builder.create<shape::ShapeOfOp>(loc, shapeType, input);

  // Get shape and sizes on left and right of axis
  auto axisValue = builder.create<arith::ConstantIndexOp>(loc, axis);
  auto axisNextValue = builder.create<arith::ConstantIndexOp>(loc, axis + 1);
  auto shapeLeft = builder.create<shape::SplitAtOp>(
      loc, TypeRange{shapeType, shapeType}, inputShape, axisValue)
      .getResult(0);
  auto sizeLeft = builder.create<shape::NumElementsOp>(
      loc, indexType, shapeLeft);
  auto shapeRight = builder.create<shape::SplitAtOp>(
      loc, TypeRange{shapeType, shapeType}, inputShape, axisNextValue)
      .getResult(1);
  auto sizeRight = builder.create<shape::NumElementsOp>(
      loc, indexType, shapeRight);

  // Compute flat input shape as a 3-element 1D tensor
  auto axisSizeValue = builder.create<arith::ConstantIndexOp>(loc, axisSize);
  auto flatShapeType = shape::getExtentTensorType(context, 3);
  auto flatInputShape = builder.create<tensor::FromElementsOp>(
      loc, flatShapeType, ValueRange{sizeLeft, axisSizeValue, sizeRight});

  // Reshape input to 3D tensor
  auto inputType = cast<UnrankedTensorType>(input.getType());
  auto elementType = inputType.getElementType();
  auto flatInputType = RankedTensorType::get(
      {ShapedType::kDynamic, axisSize, ShapedType::kDynamic}, elementType);
  auto flatInput = builder.create<tensor::ReshapeOp>(
      loc, flatInputType, input, flatInputShape);

  return std::make_pair(flatInput, inputShape);
}

// Reshape an input tensor into its original unranked shape.
//
// - input
//   Ranked tensor.
//
// - inputShape
//   1D extent tensor.
//
Value restoreUnrankedTensorShape(OpBuilder &builder, Location loc, Value input,
                                 Value inputShape) {
  auto inputType = cast<RankedTensorType>(input.getType());
  auto elementType = inputType.getElementType();
  auto unrankedType = UnrankedTensorType::get(elementType);
  return builder.create<tensor::ReshapeOp>(loc, unrankedType, input, inputShape);
}

// Create a tensor constant containing all scales in a per-channel quantized
// type. Example:
//
//   !quant.uniform<i8:f32:1, {2.0:10, 3.0:20}>
//
// produces
//
//   %cst = arith.constant dense<[2.0, 3.0]> : tensor<2xf32>
//
Value materializePerChannelScales(OpBuilder &builder, Location loc,
                                  UniformQuantizedPerAxisType quantizedType) {
  auto scales = quantizedType.getScales();
  auto expressedType = quantizedType.getExpressedType();
  auto scaleAttrs = llvm::map_to_vector(scales, [&](double scale) -> Attribute {
    return builder.getFloatAttr(expressedType, scale);
  });
  auto tensorType = RankedTensorType::get({(int64_t) scales.size()}, expressedType);
  auto scalesAttr = DenseElementsAttr::get(tensorType, scaleAttrs);
  return builder.create<arith::ConstantOp>(loc, tensorType, scalesAttr);
}

// Create a tensor constant containing all zero points in a per-channel
// quantized type. Example:
//
//   !quant.uniform<i8:f32:1, {2.0:10, 3.0:20}>
//
// produces
//
//   %cst = arith.constant dense<[10, 20]> : tensor<2xi8>
//
Value materializePerChannelZeroPoints(
    OpBuilder &builder, Location loc,
    UniformQuantizedPerAxisType quantizedType) {
  auto zeroPoints = quantizedType.getZeroPoints();
  auto storageType = quantizedType.getStorageType();
  auto zeroPointAttrs = llvm::map_to_vector(
      zeroPoints,
      [&](int64_t zeroPoint) -> Attribute {
        return builder.getIntegerAttr(storageType, zeroPoint);
      });
  auto tensorType =
      RankedTensorType::get({(int64_t)zeroPoints.size()}, storageType);
  auto zeroPointsAttr = DenseElementsAttr::get(tensorType, zeroPointAttrs);
  return builder.create<arith::ConstantOp>(loc, tensorType, zeroPointsAttr);
}

// Clamp the given scalar or tensor input using the storage bounds encoded in
// the given quantized type, if present.
//
// - input
//   Scalar or ranked tensor input. The element type must match the storage type
//   of 'quantizedType'.
//
// - inputShape
//   If 'input' is a tensor, combination of attributes/values representing its
//   static/dynamic dimensions. If 'input' is a scalar, empty list.
//
// - quantizedType
//   Per-axis or per-channel quantized type.
Value clampScalarOrTensor(OpBuilder &builder, Location loc, Value input,
                          ArrayRef<OpFoldResult> inputShape,
                          QuantizedType quantizedType) {
  // If quantized type does not narrow down the storage type range, there is
  // nothing to do.
  if (!quantizedType.hasStorageTypeBounds())
    return input;

  // Materialize bounds
  auto inputType = input.getType();
  auto storageType = quantizedType.getStorageType();
  auto storageMinScalar = builder.create<arith::ConstantIntOp>(
      loc, quantizedType.getStorageTypeMin(), storageType);
  auto storageMaxScalar = builder.create<arith::ConstantIntOp>(
      loc, quantizedType.getStorageTypeMax(), storageType);
  auto storageMin = getScalarOrTensorConstant(builder, loc, storageMinScalar,
                                              inputType, inputShape);
  auto storageMax = getScalarOrTensorConstant(builder, loc, storageMaxScalar,
                                              inputType, inputShape);

  // Clamp
  if (quantizedType.isSigned()) {
    input = builder.create<arith::MaxSIOp>(loc, input, storageMin);
    input = builder.create<arith::MinSIOp>(loc, input, storageMax);
  } else {
    input = builder.create<arith::MaxUIOp>(loc, input, storageMin);
    input = builder.create<arith::MinUIOp>(loc, input, storageMax);
  }
  return input;
}

// Emit op 'arith.fptosi' or 'arith.fptoui'.
Value convertFloatToInteger(OpBuilder &builder, Location loc, Value input,
                            Type resultType, bool isSigned) {
  if (isSigned)
    return builder.create<arith::FPToSIOp>(loc, resultType, input);
  return builder.create<arith::FPToUIOp>(loc, resultType, input);
}

// Emit op 'arith.sitofp' or 'arith.uitofp'.
Value convertIntegerToFloat(OpBuilder &builder, Location loc, Value input,
                            Type resultType, bool isSigned) {
  if (isSigned)
    return builder.create<arith::SIToFPOp>(loc, resultType, input);
  return builder.create<arith::UIToFPOp>(loc, resultType, input);
}

// Quantize a scalar or ranked tensor value. The stored value is clamped using
// the storage bounds encoded in the given quantized type.
//
// See function 'convertRanked()' below for a description of the arguments.
Value quantizeValue(OpBuilder &builder, Location loc, Value input,
                    ArrayRef<OpFoldResult> inputShape, Value scale,
                    Value zeroPoint, QuantizedType quantizedType) {
  // Convert scale to tensor if necessary
  auto inputType = input.getType();
  scale = getScalarOrTensorConstant(
      builder, loc, scale, inputType, inputShape);

  // Scale input
  auto scaledValue = builder.create<arith::DivFOp>(loc, input, scale);

  // Skip unnecessary computations if no zero point is given
  Value storedValueFloat = scaledValue;
  if (!matchPattern(zeroPoint, m_Zero())) {
    // Convert zero point to tensor if necessary
    zeroPoint = getScalarOrTensorConstant(builder, loc, zeroPoint, inputType,
                                          inputShape);

    // Convert zero point from storage to expressed type
    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint,
                                      scale.getType(),
                                      quantizedType.isSigned());

    // Add zero point to stored value
    storedValueFloat =
        builder.create<arith::AddFOp>(loc, scaledValue, zeroPoint);
  }

  // Convert stored value to storage type
  auto storageScalarOrTensorType =
      getScalarOrTensorType(quantizedType.getStorageType(), inputType);
  auto storedValueInt = convertFloatToInteger(
      builder, loc, storedValueFloat, storageScalarOrTensorType,
      quantizedType.isSigned());

  // Clamp stored value it if the storage type is bound
  auto storedValueClamped = clampScalarOrTensor(builder, loc, storedValueInt,
                                                inputShape, quantizedType);
  return storedValueClamped;
}

// Dequantize a scalar or ranked tensor input.
//
// See function 'convertRanked()' below for a description of the arguments.
Value dequantizeValue(OpBuilder &builder, Location loc, Value input,
                      ArrayRef<OpFoldResult> inputShape, Value scale,
                      Value zeroPoint, QuantizedType quantizedType) {
  // Convert scale to tensor if necessary
  auto inputType = input.getType();
  scale = getScalarOrTensorConstant(
      builder, loc, scale, inputType, inputShape);

  // Convert stored value to float
  auto result = convertIntegerToFloat(
      builder, loc, input, scale.getType(), quantizedType.isSigned());

  // Skip unnecessary computations if no zero point is given
  if (!matchPattern(zeroPoint, m_Zero())) {
    // Convert zero point to tensor if necessary
    zeroPoint = getScalarOrTensorConstant(builder, loc, zeroPoint, inputType,
                                          inputShape);

    // Convert zero point from storage to expressed type
    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint,
                                      scale.getType(),
                                      quantizedType.isSigned());

    // Subtract zero point to stored value
    result = builder.create<arith::SubFOp>(loc, result, zeroPoint);
  }

  // Multiply by scale
  result = builder.create<arith::MulFOp>(loc, result, scale);
  return result;
}

// Convert a scalar or ranked tensor input with the given scale and zero point
// values.
//
// - input
//   Scalar or ranked tensor value.
//
// - inputShape
//   If 'input' is a tensor, combination or attributes/values representing its
//   static/dynamic dimensions. If 'input' is a scalar, empty list.
//
// - scale
//   Scale as a floating-point scalar value.
//
// - zeroPoint
//   Zero point as an integer scalar value.
//
// - quantizedType
//   Scalar quantized type of the result ('quant.qcast') or of the input
//   ('quant.dcast').
//
Value convertRanked(OpBuilder &builder, Location loc, Operation *op,
                    Value input, ArrayRef<OpFoldResult> inputShape, Value scale,
                    Value zeroPoint, QuantizedType quantizedType) {
  if (isa<QuantizeCastOp>(op))
    return quantizeValue(builder, loc, input, inputShape, scale, zeroPoint,
                         quantizedType);
  if (isa<DequantizeCastOp>(op))
    return dequantizeValue(builder, loc, input, inputShape, scale, zeroPoint,
                           quantizedType);
  llvm_unreachable("unexpected quant op");
}

// Convert an operation using per-layer quantization with a scalar or ranked
// tensor input.
//
// - op
//   'quant.dcast' or 'quant.qcast' op.
//
// - input
//   Scalar or ranked tensor.
//
// - quantizedType
//   Per-layer quantized type.
//
Value convertPerLayerRanked(OpBuilder &builder, Location loc, Operation *op,
                            Value input, UniformQuantizedType quantizedType) {
  // Create scale and zero point constants
  auto expressedType = quantizedType.getExpressedType();
  auto storageType = quantizedType.getStorageType();
  auto scaleAttr =
      builder.getFloatAttr(expressedType, quantizedType.getScale());
  auto scale = builder.create<arith::ConstantOp>(loc, expressedType, scaleAttr);
  auto zeroPointAttr =
      builder.getIntegerAttr(storageType, quantizedType.getZeroPoint());
  auto zeroPoint =
      builder.create<arith::ConstantOp>(loc, storageType, zeroPointAttr);

  auto inputShape = getScalarOrTensorShape(builder, loc, input);
  return convertRanked(builder, loc, op, input, inputShape, scale, zeroPoint,
                       quantizedType);
}

// Convert an operation using per-layer quantization.
//
// - op
//   'quant.dcast' or 'quant.qcast' op.
//
// - input
//   Scalar, ranked tensor, or unranked tensor.
//
// - quantizedType
//   Per-layer quantized type.
//
Value convertPerLayer(OpBuilder &builder, Location loc, Operation *op,
                      Value input, UniformQuantizedType quantizedType) {
  // Flatten input if unranked
  bool isUnranked = isa<UnrankedTensorType>(input.getType());
  Value inputShape;
  if (isUnranked)
    std::tie(input, inputShape) = flattenUnrankedTensor(builder, loc, input);

  // Process ranked tensor
  auto result = convertPerLayerRanked(builder, loc, op, input, quantizedType);

  // Restore original shape if unranked
  if (isUnranked)
    result = restoreUnrankedTensorShape(builder, loc, result, inputShape);

  return result;
}

// Convert an operation using per-channel quantization and a scalar or ranked
// tensor as an input.
//
// - op
//   'quant.dcast' or 'quant.qcast' op.
//
// - input
//   Scalar or ranked tensor.
//
// - quantizedType
//   Per-channel quantized type.
//
Value convertPerChannelRanked(OpBuilder &builder, Location loc, Operation *op,
                              Value input,
                              UniformQuantizedPerAxisType quantizedType,
                              int64_t channelAxis) {
  auto *context = builder.getContext();

  auto inputType = cast<RankedTensorType>(input.getType());
  auto inputRank = inputType.getRank();

  auto scales = materializePerChannelScales(builder, loc, quantizedType);
  auto zeroPoints =
      materializePerChannelZeroPoints(builder, loc, quantizedType);

  auto elementType = isa<FloatType>(inputType.getElementType())
                         ? quantizedType.getStorageType()
                         : quantizedType.getExpressedType();
  auto initShape = tensor::getMixedSizes(builder, loc, input);
  Value init = builder.create<tensor::EmptyOp>(loc, initShape, elementType);

  SmallVector<utils::IteratorType> iteratorTypes(
      inputRank, utils::IteratorType::parallel);
  auto channelAxisAffineMap = AffineMap::get(
      inputRank, 0, builder.getAffineDimExpr(channelAxis), context);
  SmallVector<AffineMap> indexingMaps{
    builder.getMultiDimIdentityMap(inputRank),
    channelAxisAffineMap,
    channelAxisAffineMap,
    builder.getMultiDimIdentityMap(inputRank)
  };
  auto result = builder.create<linalg::GenericOp>(
      loc,
      init.getType(),  // resultType
      ValueRange{input, scales, zeroPoints},  // inputs
      ValueRange{init},  // outputs
      indexingMaps,
      iteratorTypes,
      [&](OpBuilder& builder, Location loc, ValueRange args) {
        assert(args.size() == 4);
        auto input = args[0];
        auto scale = args[1];
        auto zeroPoint = args[2];

        auto result = convertRanked(builder, loc, op, input, {}, scale,
                                    zeroPoint, quantizedType);

        builder.create<linalg::YieldOp>(loc, result);
      })
      .getResult(0);

  return result;
}

// Convert an operation using per-channel quantization.
//
// - op
//   'quant.dcast' or 'quant.qcast' op.
//
// - input
//   Scalar, ranked tensor, or unranked tensor.
//
// - quantizedType
//   Per-channel quantized type.
//
Value convertPerChannel(OpBuilder &builder, Location loc, Operation *op,
                        Value input,
                        UniformQuantizedPerAxisType quantizedType) {
  // Flatten unranked tensor into a 3D ranked tensor if necessary
  bool isUnranked = isa<UnrankedTensorType>(input.getType());
  int64_t channelAxis = quantizedType.getQuantizedDimension();
  int64_t channelAxisSize = (int64_t) quantizedType.getScales().size();
  Value inputShape;
  if (isUnranked) {
    std::tie(input, inputShape) = flattenUnrankedTensorAroundAxis(
        builder, loc, input, channelAxis, channelAxisSize);
    channelAxis = 1;
  }

  // Work on a ranked tensor
  auto result = convertPerChannelRanked(builder, loc, op, input, quantizedType,
                                        channelAxis);

  // Restore original tensor shape if unranked
  if (isUnranked)
    result = restoreUnrankedTensorShape(builder, loc, result, inputShape);

  return result;
}

// Convert a quantization operation.
//
// - op
//   'quant.dcast' or 'quant.qcast' op.
//
// - input
//   Scalar, ranked tensor, or unranked tensor. The element type matches
//   the storage type (quant.dcast) or expressed type (quant.qcast) of
//   'quantizedType'.
//
// - quantizedType
//   Per-layer or per-channel quantized type.
//
Value convertQuantized(OpBuilder &builder, Location loc, Operation *op,
                       Value input, Type quantizedType) {
  if (auto uniformQuantizedType = dyn_cast<UniformQuantizedType>(quantizedType))
    return convertPerLayer(builder, loc, op, input, uniformQuantizedType);

  if (auto uniformQuantizedPerAxisType =
          dyn_cast<UniformQuantizedPerAxisType>(quantizedType))
    return convertPerChannel(builder, loc, op, input,
                             uniformQuantizedPerAxisType);

  llvm_unreachable("unexpected quantized type");
}

// Lowering pattern for 'quant.dcast'
struct DequantizeCastOpConversion : public OpConversionPattern<quant::DequantizeCastOp> {
  using OpConversionPattern<quant::DequantizeCastOp>::OpConversionPattern;

  LogicalResult
  matchAndRewrite(quant::DequantizeCastOp op, OpAdaptor adaptor,
                  ConversionPatternRewriter &rewriter) const override {
    auto loc = op.getLoc();
    auto input = op.getInput();
    auto quantizedType =
        cast<QuantizedType>(getScalarType(op.getInput().getType()));

    // Convert quantized input to storage type
    auto storageScalarOrTensorType =
        getScalarOrTensorType(quantizedType.getStorageType(), input.getType());
    input = rewriter.create<quant::StorageCastOp>(
        loc, storageScalarOrTensorType, input);

    auto result = convertQuantized(rewriter, loc, op, input, quantizedType);

    rewriter.replaceOp(op, result);
    return success();
  }
};

// Lowering pattern for 'quant.qcast'
struct QuantizeCastOpConversion : public OpConversionPattern<quant::QuantizeCastOp> {
  using OpConversionPattern<quant::QuantizeCastOp>::OpConversionPattern;

  LogicalResult
  matchAndRewrite(quant::QuantizeCastOp op, OpAdaptor adaptor,
                  ConversionPatternRewriter &rewriter) const override {
    auto loc = op.getLoc();
    auto input = op.getInput();
    auto quantizedType = getScalarType(op.getResult().getType());

    // Flatten unranked tensor input
    auto result = convertQuantized(rewriter, loc, op, input, quantizedType);

    // Cast stored value to result quantized value
    rewriter.replaceOpWithNewOp<quant::StorageCastOp>(
        op, op.getResult().getType(), result);
    return success();
  }
};

struct LowerQuantOps : public impl::LowerQuantOpsBase<LowerQuantOps> {
  void runOnOperation() override {
    RewritePatternSet patterns(&getContext());
    populateLowerQuantOpsPatterns(patterns);

    ConversionTarget target(getContext());
    target.addLegalOp<quant::StorageCastOp>();
    target.addIllegalDialect<quant::QuantDialect>();
    target.addLegalDialect<
      arith::ArithDialect,
      linalg::LinalgDialect,
      shape::ShapeDialect,
      tensor::TensorDialect
    >();

    if (failed(applyPartialConversion(getOperation(), target,
                                      std::move(patterns))))
      signalPassFailure();
  }
};

} // namespace

void populateLowerQuantOpsPatterns(RewritePatternSet &patterns) {
  patterns.add<
    DequantizeCastOpConversion,
    QuantizeCastOpConversion
  >(patterns.getContext());
}

} // namespace quant
} // namespace mlir