ArmSVE/Transforms/LegalizeForLLVMExport.cpp

b739badaSJavier Setoain//===- LegalizeForLLVMExport.cpp - Prepare ArmSVE for LLVM translation ----===//
b739badaSJavier Setoain//
b739badaSJavier Setoain// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
b739badaSJavier Setoain// See https://llvm.org/LICENSE.txt for license information.
b739badaSJavier Setoain// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
b739badaSJavier Setoain//
b739badaSJavier Setoain//===----------------------------------------------------------------------===//
b739badaSJavier Setoain
75e5f0aaSAlex Zinenko#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
75e5f0aaSAlex Zinenko#include "mlir/Conversion/LLVMCommon/Pattern.h"
7bbfd2aeSBenjamin Maxwell#include "mlir/Dialect/ArmSVE/IR/ArmSVEDialect.h"
7bbfd2aeSBenjamin Maxwell#include "mlir/Dialect/ArmSVE/Transforms/Transforms.h"
1f971e23SRiver Riddle#include "mlir/Dialect/Func/IR/FuncOps.h"
b739badaSJavier Setoain#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
b833bcb5SBenjamin Maxwell#include "mlir/Dialect/Utils/IndexingUtils.h"
b833bcb5SBenjamin Maxwell#include "mlir/Dialect/Vector/IR/VectorOps.h"
b739badaSJavier Setoain#include "mlir/IR/BuiltinOps.h"
b739badaSJavier Setoain#include "mlir/IR/PatternMatch.h"
b739badaSJavier Setoain
b739badaSJavier Setoainusing namespace mlir;
b739badaSJavier Setoainusing namespace mlir::arm_sve;
b739badaSJavier Setoain
b739badaSJavier Setoainusing SdotOpLowering = OneToOneConvertToLLVMPattern<SdotOp, SdotIntrOp>;
b739badaSJavier Setoainusing SmmlaOpLowering = OneToOneConvertToLLVMPattern<SmmlaOp, SmmlaIntrOp>;
b739badaSJavier Setoainusing UdotOpLowering = OneToOneConvertToLLVMPattern<UdotOp, UdotIntrOp>;
b739badaSJavier Setoainusing UmmlaOpLowering = OneToOneConvertToLLVMPattern<UmmlaOp, UmmlaIntrOp>;
95861216SJavier Setoainusing ScalableMaskedAddIOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedAddIOp,
95861216SJavier Setoain                                 ScalableMaskedAddIIntrOp>;
95861216SJavier Setoainusing ScalableMaskedAddFOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedAddFOp,
95861216SJavier Setoain                                 ScalableMaskedAddFIntrOp>;
95861216SJavier Setoainusing ScalableMaskedSubIOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedSubIOp,
95861216SJavier Setoain                                 ScalableMaskedSubIIntrOp>;
95861216SJavier Setoainusing ScalableMaskedSubFOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedSubFOp,
95861216SJavier Setoain                                 ScalableMaskedSubFIntrOp>;
95861216SJavier Setoainusing ScalableMaskedMulIOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedMulIOp,
95861216SJavier Setoain                                 ScalableMaskedMulIIntrOp>;
95861216SJavier Setoainusing ScalableMaskedMulFOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedMulFOp,
95861216SJavier Setoain                                 ScalableMaskedMulFIntrOp>;
95861216SJavier Setoainusing ScalableMaskedSDivIOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedSDivIOp,
95861216SJavier Setoain                                 ScalableMaskedSDivIIntrOp>;
95861216SJavier Setoainusing ScalableMaskedUDivIOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedUDivIOp,
95861216SJavier Setoain                                 ScalableMaskedUDivIIntrOp>;
95861216SJavier Setoainusing ScalableMaskedDivFOpLowering =
95861216SJavier Setoain    OneToOneConvertToLLVMPattern<ScalableMaskedDivFOp,
95861216SJavier Setoain                                 ScalableMaskedDivFIntrOp>;
b739badaSJavier Setoain
b833bcb5SBenjamin Maxwellnamespace {
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwell/// Unrolls a conversion to/from equivalent vector types, to allow using a
b833bcb5SBenjamin Maxwell/// conversion intrinsic that only supports 1-D vector types.
b833bcb5SBenjamin Maxwell///
b833bcb5SBenjamin Maxwell/// Example:
b833bcb5SBenjamin Maxwell/// ```
b833bcb5SBenjamin Maxwell/// %result = arm_sve.convert_to_svbool %source : vector<2x[4]xi1>
b833bcb5SBenjamin Maxwell/// ```
b833bcb5SBenjamin Maxwell/// is rewritten into:
b833bcb5SBenjamin Maxwell/// ```
b833bcb5SBenjamin Maxwell/// %cst = arith.constant dense<false> : vector<2x[16]xi1>
b833bcb5SBenjamin Maxwell/// %1 = vector.extract %source[0] : vector<[4]xi1> from vector<2x[4]xi1>
b833bcb5SBenjamin Maxwell/// %2 = "arm_sve.intr.convert.to.svbool"(%1)
b833bcb5SBenjamin Maxwell///                : (vector<[4]xi1>) -> vector<[16]xi1>
b833bcb5SBenjamin Maxwell/// %3 = vector.insert %2, %cst[0] : vector<[16]xi1> into vector<2x[16]xi1>
b833bcb5SBenjamin Maxwell/// %4 = vector.extract %source[1] : vector<[4]xi1> from vector<2x[4]xi1>
b833bcb5SBenjamin Maxwell/// %5 = "arm_sve.intr.convert.to.svbool"(%4)
b833bcb5SBenjamin Maxwell///                : (vector<[4]xi1>) -> vector<[16]xi1>
b833bcb5SBenjamin Maxwell/// %result = vector.insert %5, %3[1] : vector<[16]xi1> into vector<2x[16]xi1>
b833bcb5SBenjamin Maxwell/// ```
b833bcb5SBenjamin Maxwelltemplate <typename Op, typename IntrOp>
b833bcb5SBenjamin Maxwellstruct SvboolConversionOpLowering : public ConvertOpToLLVMPattern<Op> {
b833bcb5SBenjamin Maxwell  using ConvertOpToLLVMPattern<Op>::ConvertOpToLLVMPattern;
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwell  LogicalResult
b833bcb5SBenjamin Maxwell  matchAndRewrite(Op convertOp, typename Op::Adaptor,
b833bcb5SBenjamin Maxwell                  ConversionPatternRewriter &rewriter) const override {
b833bcb5SBenjamin Maxwell    auto loc = convertOp.getLoc();
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwell    auto source = convertOp.getSource();
b833bcb5SBenjamin Maxwell    VectorType sourceType = source.getType();
b833bcb5SBenjamin Maxwell    VectorType resultType = convertOp.getResult().getType();
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwell    Value result = rewriter.create<arith::ConstantOp>(
b833bcb5SBenjamin Maxwell        loc, resultType, rewriter.getZeroAttr(resultType));
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwell    // We want to iterate over the input vector in steps of the trailing
b833bcb5SBenjamin Maxwell    // dimension. So this creates tile shape where all leading dimensions are 1,
b833bcb5SBenjamin Maxwell    // and the trailing dimension step is the size of the dimension.
b833bcb5SBenjamin Maxwell    SmallVector<int64_t> tileShape(sourceType.getRank(), 1);
b833bcb5SBenjamin Maxwell    tileShape.back() = sourceType.getShape().back();
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwell    // Iterate over all scalable mask/predicate slices of the source vector.
b833bcb5SBenjamin Maxwell    for (SmallVector<int64_t> index :
b833bcb5SBenjamin Maxwell         StaticTileOffsetRange(sourceType.getShape(), tileShape)) {
b833bcb5SBenjamin Maxwell      auto extractOrInsertPosition = ArrayRef(index).drop_back();
b833bcb5SBenjamin Maxwell      auto sourceVector = rewriter.create<vector::ExtractOp>(
b833bcb5SBenjamin Maxwell          loc, source, extractOrInsertPosition);
b44b3494SBenjamin Maxwell      VectorType convertedType =
b833bcb5SBenjamin Maxwell          VectorType::Builder(llvm::cast<VectorType>(sourceVector.getType()))
b833bcb5SBenjamin Maxwell              .setDim(0, resultType.getShape().back());
b833bcb5SBenjamin Maxwell      auto convertedVector =
b833bcb5SBenjamin Maxwell          rewriter.create<IntrOp>(loc, TypeRange{convertedType}, sourceVector);
b833bcb5SBenjamin Maxwell      result = rewriter.create<vector::InsertOp>(loc, convertedVector, result,
b833bcb5SBenjamin Maxwell                                                 extractOrInsertPosition);
b833bcb5SBenjamin Maxwell    }
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwell    rewriter.replaceOp(convertOp, result);
b833bcb5SBenjamin Maxwell    return success();
b833bcb5SBenjamin Maxwell  }
b833bcb5SBenjamin Maxwell};
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwellusing ConvertToSvboolOpLowering =
b833bcb5SBenjamin Maxwell    SvboolConversionOpLowering<ConvertToSvboolOp, ConvertToSvboolIntrOp>;
b833bcb5SBenjamin Maxwell
b833bcb5SBenjamin Maxwellusing ConvertFromSvboolOpLowering =
b833bcb5SBenjamin Maxwell    SvboolConversionOpLowering<ConvertFromSvboolOp, ConvertFromSvboolIntrOp>;
b833bcb5SBenjamin Maxwell
7dcca621SBenjamin Maxwellusing ZipX2OpLowering = OneToOneConvertToLLVMPattern<ZipX2Op, ZipX2IntrOp>;
7dcca621SBenjamin Maxwellusing ZipX4OpLowering = OneToOneConvertToLLVMPattern<ZipX4Op, ZipX4IntrOp>;
7dcca621SBenjamin Maxwell
78113303SBenjamin Maxwell/// Lower `arm_sve.psel` to LLVM intrinsics. This is almost a 1-to-1 conversion
78113303SBenjamin Maxwell/// but first input (P1) and result predicates need conversion to/from svbool.
78113303SBenjamin Maxwellstruct PselOpLowering : public ConvertOpToLLVMPattern<PselOp> {
78113303SBenjamin Maxwell  using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
78113303SBenjamin Maxwell
78113303SBenjamin Maxwell  LogicalResult
78113303SBenjamin Maxwell  matchAndRewrite(PselOp pselOp, PselOp::Adaptor adaptor,
78113303SBenjamin Maxwell                  ConversionPatternRewriter &rewriter) const override {
78113303SBenjamin Maxwell    auto svboolType = VectorType::get(16, rewriter.getI1Type(), true);
78113303SBenjamin Maxwell    auto loc = pselOp.getLoc();
78113303SBenjamin Maxwell    auto svboolP1 = rewriter.create<ConvertToSvboolIntrOp>(loc, svboolType,
78113303SBenjamin Maxwell                                                           adaptor.getP1());
78113303SBenjamin Maxwell    auto indexI32 = rewriter.create<arith::IndexCastOp>(
78113303SBenjamin Maxwell        loc, rewriter.getI32Type(), pselOp.getIndex());
78113303SBenjamin Maxwell    auto pselIntr = rewriter.create<PselIntrOp>(loc, svboolType, svboolP1,
78113303SBenjamin Maxwell                                                pselOp.getP2(), indexI32);
78113303SBenjamin Maxwell    rewriter.replaceOpWithNewOp<ConvertFromSvboolIntrOp>(
78113303SBenjamin Maxwell        pselOp, adaptor.getP1().getType(), pselIntr);
78113303SBenjamin Maxwell    return success();
78113303SBenjamin Maxwell  }
78113303SBenjamin Maxwell};
78113303SBenjamin Maxwell
657ec732SBenjamin Maxwell/// Converts `vector.create_mask` ops that match the size of an SVE predicate
657ec732SBenjamin Maxwell/// to the `whilelt` intrinsic. This produces more canonical codegen than the
657ec732SBenjamin Maxwell/// generic LLVM lowering, see https://github.com/llvm/llvm-project/issues/81840
657ec732SBenjamin Maxwell/// for more details. Note that we can't use (the more general) active.lane.mask
657ec732SBenjamin Maxwell/// as its semantics don't neatly map on to `vector.create_mask`, as it does an
657ec732SBenjamin Maxwell/// unsigned comparison (whereas `create_mask` is signed), and is UB/posion if
657ec732SBenjamin Maxwell/// `n` is zero (whereas `create_mask` just returns an all-false mask).
657ec732SBenjamin Maxwellstruct CreateMaskOpLowering
657ec732SBenjamin Maxwell    : public ConvertOpToLLVMPattern<vector::CreateMaskOp> {
657ec732SBenjamin Maxwell  using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
657ec732SBenjamin Maxwell
657ec732SBenjamin Maxwell  LogicalResult
657ec732SBenjamin Maxwell  matchAndRewrite(vector::CreateMaskOp createMaskOp,
657ec732SBenjamin Maxwell                  vector::CreateMaskOp::Adaptor adaptor,
657ec732SBenjamin Maxwell                  ConversionPatternRewriter &rewriter) const override {
657ec732SBenjamin Maxwell    auto maskType = createMaskOp.getVectorType();
657ec732SBenjamin Maxwell    if (maskType.getRank() != 1 || !maskType.isScalable())
657ec732SBenjamin Maxwell      return rewriter.notifyMatchFailure(createMaskOp, "not 1-D and scalable");
657ec732SBenjamin Maxwell
657ec732SBenjamin Maxwell    // TODO: Support masks which are multiples of SVE predicates.
657ec732SBenjamin Maxwell    auto maskBaseSize = maskType.getDimSize(0);
657ec732SBenjamin Maxwell    if (maskBaseSize < 2 || maskBaseSize > 16 ||
657ec732SBenjamin Maxwell        !llvm::isPowerOf2_32(uint32_t(maskBaseSize)))
657ec732SBenjamin Maxwell      return rewriter.notifyMatchFailure(createMaskOp,
657ec732SBenjamin Maxwell                                         "not SVE predicate-sized");
657ec732SBenjamin Maxwell
657ec732SBenjamin Maxwell    auto loc = createMaskOp.getLoc();
657ec732SBenjamin Maxwell    auto zero = rewriter.create<LLVM::ZeroOp>(loc, rewriter.getI64Type());
657ec732SBenjamin Maxwell    rewriter.replaceOpWithNewOp<WhileLTIntrOp>(createMaskOp, maskType, zero,
657ec732SBenjamin Maxwell                                               adaptor.getOperands()[0]);
657ec732SBenjamin Maxwell    return success();
657ec732SBenjamin Maxwell  }
657ec732SBenjamin Maxwell};
657ec732SBenjamin Maxwell
b833bcb5SBenjamin Maxwell} // namespace
b833bcb5SBenjamin Maxwell
b739badaSJavier Setoain/// Populate the given list with patterns that convert from ArmSVE to LLVM.
b739badaSJavier Setoainvoid mlir::populateArmSVELegalizeForLLVMExportPatterns(
*206fad0eSMatthias Springer    const LLVMTypeConverter &converter, RewritePatternSet &patterns) {
b739badaSJavier Setoain  // Populate conversion patterns
b739badaSJavier Setoain
b739badaSJavier Setoain  // clang-format off
b739badaSJavier Setoain  patterns.add<SdotOpLowering,
b739badaSJavier Setoain               SmmlaOpLowering,
b739badaSJavier Setoain               UdotOpLowering,
b739badaSJavier Setoain               UmmlaOpLowering,
95861216SJavier Setoain               ScalableMaskedAddIOpLowering,
95861216SJavier Setoain               ScalableMaskedAddFOpLowering,
95861216SJavier Setoain               ScalableMaskedSubIOpLowering,
95861216SJavier Setoain               ScalableMaskedSubFOpLowering,
95861216SJavier Setoain               ScalableMaskedMulIOpLowering,
95861216SJavier Setoain               ScalableMaskedMulFOpLowering,
95861216SJavier Setoain               ScalableMaskedSDivIOpLowering,
95861216SJavier Setoain               ScalableMaskedUDivIOpLowering,
b833bcb5SBenjamin Maxwell               ScalableMaskedDivFOpLowering,
b833bcb5SBenjamin Maxwell               ConvertToSvboolOpLowering,
7dcca621SBenjamin Maxwell               ConvertFromSvboolOpLowering,
7dcca621SBenjamin Maxwell               ZipX2OpLowering,
78113303SBenjamin Maxwell               ZipX4OpLowering,
78113303SBenjamin Maxwell               PselOpLowering>(converter);
657ec732SBenjamin Maxwell  // Add vector.create_mask conversion with a high benefit as it produces much
657ec732SBenjamin Maxwell  // nicer code than the generic lowering.
657ec732SBenjamin Maxwell  patterns.add<CreateMaskOpLowering>(converter, /*benefit=*/4096);
b739badaSJavier Setoain  // clang-format on
b739badaSJavier Setoain}
b739badaSJavier Setoain
b739badaSJavier Setoainvoid mlir::configureArmSVELegalizeForExportTarget(
b739badaSJavier Setoain    LLVMConversionTarget &target) {
95861216SJavier Setoain  // clang-format off
95861216SJavier Setoain  target.addLegalOp<SdotIntrOp,
95861216SJavier Setoain                    SmmlaIntrOp,
95861216SJavier Setoain                    UdotIntrOp,
95861216SJavier Setoain                    UmmlaIntrOp,
95861216SJavier Setoain                    ScalableMaskedAddIIntrOp,
95861216SJavier Setoain                    ScalableMaskedAddFIntrOp,
95861216SJavier Setoain                    ScalableMaskedSubIIntrOp,
95861216SJavier Setoain                    ScalableMaskedSubFIntrOp,
95861216SJavier Setoain                    ScalableMaskedMulIIntrOp,
95861216SJavier Setoain                    ScalableMaskedMulFIntrOp,
95861216SJavier Setoain                    ScalableMaskedSDivIIntrOp,
95861216SJavier Setoain                    ScalableMaskedUDivIIntrOp,
b833bcb5SBenjamin Maxwell                    ScalableMaskedDivFIntrOp,
b833bcb5SBenjamin Maxwell                    ConvertToSvboolIntrOp,
7dcca621SBenjamin Maxwell                    ConvertFromSvboolIntrOp,
7dcca621SBenjamin Maxwell                    ZipX2IntrOp,
657ec732SBenjamin Maxwell                    ZipX4IntrOp,
78113303SBenjamin Maxwell                    PselIntrOp,
657ec732SBenjamin Maxwell                    WhileLTIntrOp>();
95861216SJavier Setoain  target.addIllegalOp<SdotOp,
95861216SJavier Setoain                      SmmlaOp,
95861216SJavier Setoain                      UdotOp,
95861216SJavier Setoain                      UmmlaOp,
95861216SJavier Setoain                      ScalableMaskedAddIOp,
95861216SJavier Setoain                      ScalableMaskedAddFOp,
95861216SJavier Setoain                      ScalableMaskedSubIOp,
95861216SJavier Setoain                      ScalableMaskedSubFOp,
95861216SJavier Setoain                      ScalableMaskedMulIOp,
95861216SJavier Setoain                      ScalableMaskedMulFOp,
95861216SJavier Setoain                      ScalableMaskedSDivIOp,
95861216SJavier Setoain                      ScalableMaskedUDivIOp,
b833bcb5SBenjamin Maxwell                      ScalableMaskedDivFOp,
b833bcb5SBenjamin Maxwell                      ConvertToSvboolOp,
7dcca621SBenjamin Maxwell                      ConvertFromSvboolOp,
7dcca621SBenjamin Maxwell                      ZipX2Op,
7dcca621SBenjamin Maxwell                      ZipX4Op>();
95861216SJavier Setoain  // clang-format on
b739badaSJavier Setoain}