106c3fb27SDimitry Andric //===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass implements IR expansion for reduction intrinsics, allowing targets 10e8d8bef9SDimitry Andric // to enable the intrinsics until just before codegen. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/CodeGen/ExpandReductions.h" 150b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h" 170b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 180b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h" 190b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 200b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 21480093f4SDimitry Andric #include "llvm/InitializePasses.h" 220b57cec5SDimitry Andric #include "llvm/Pass.h" 230b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric using namespace llvm; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric namespace { 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric bool expandReductions(Function &F, const TargetTransformInfo *TTI) { 300b57cec5SDimitry Andric bool Changed = false; 310b57cec5SDimitry Andric SmallVector<IntrinsicInst *, 4> Worklist; 32480093f4SDimitry Andric for (auto &I : instructions(F)) { 33480093f4SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 34480093f4SDimitry Andric switch (II->getIntrinsicID()) { 35480093f4SDimitry Andric default: break; 36e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 37e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: 38e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 39e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 40e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_and: 41e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_or: 42e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 43e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 44e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 45e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 46e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: 47e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 48e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: 49480093f4SDimitry Andric if (TTI->shouldExpandReduction(II)) 500b57cec5SDimitry Andric Worklist.push_back(II); 510b57cec5SDimitry Andric 52480093f4SDimitry Andric break; 53480093f4SDimitry Andric } 54480093f4SDimitry Andric } 55480093f4SDimitry Andric } 560b57cec5SDimitry Andric 57480093f4SDimitry Andric for (auto *II : Worklist) { 580b57cec5SDimitry Andric FastMathFlags FMF = 590b57cec5SDimitry Andric isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; 600b57cec5SDimitry Andric Intrinsic::ID ID = II->getIntrinsicID(); 61*0fca6ea1SDimitry Andric RecurKind RK = getMinMaxReductionRecurKind(ID); 62*0fca6ea1SDimitry Andric TargetTransformInfo::ReductionShuffle RS = 63*0fca6ea1SDimitry Andric TTI->getPreferredExpandedReductionShuffle(II); 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric Value *Rdx = nullptr; 660b57cec5SDimitry Andric IRBuilder<> Builder(II); 670b57cec5SDimitry Andric IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 680b57cec5SDimitry Andric Builder.setFastMathFlags(FMF); 690b57cec5SDimitry Andric switch (ID) { 70480093f4SDimitry Andric default: llvm_unreachable("Unexpected intrinsic!"); 71e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 72e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: { 730b57cec5SDimitry Andric // FMFs must be attached to the call, otherwise it's an ordered reduction 740b57cec5SDimitry Andric // and it can't be handled by generating a shuffle sequence. 750b57cec5SDimitry Andric Value *Acc = II->getArgOperand(0); 760b57cec5SDimitry Andric Value *Vec = II->getArgOperand(1); 77*0fca6ea1SDimitry Andric unsigned RdxOpcode = getArithmeticReductionInstruction(ID); 780b57cec5SDimitry Andric if (!FMF.allowReassoc()) 79*0fca6ea1SDimitry Andric Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK); 800b57cec5SDimitry Andric else { 815ffd83dbSDimitry Andric if (!isPowerOf2_32( 825ffd83dbSDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements())) 83480093f4SDimitry Andric continue; 84*0fca6ea1SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); 85*0fca6ea1SDimitry Andric Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx, 86*0fca6ea1SDimitry Andric "bin.rdx"); 870b57cec5SDimitry Andric } 88480093f4SDimitry Andric break; 89480093f4SDimitry Andric } 9006c3fb27SDimitry Andric case Intrinsic::vector_reduce_and: 9106c3fb27SDimitry Andric case Intrinsic::vector_reduce_or: { 9206c3fb27SDimitry Andric // Canonicalize logical or/and reductions: 9306c3fb27SDimitry Andric // Or reduction for i1 is represented as: 9406c3fb27SDimitry Andric // %val = bitcast <ReduxWidth x i1> to iReduxWidth 9506c3fb27SDimitry Andric // %res = cmp ne iReduxWidth %val, 0 9606c3fb27SDimitry Andric // And reduction for i1 is represented as: 9706c3fb27SDimitry Andric // %val = bitcast <ReduxWidth x i1> to iReduxWidth 9806c3fb27SDimitry Andric // %res = cmp eq iReduxWidth %val, 11111 9906c3fb27SDimitry Andric Value *Vec = II->getArgOperand(0); 10006c3fb27SDimitry Andric auto *FTy = cast<FixedVectorType>(Vec->getType()); 10106c3fb27SDimitry Andric unsigned NumElts = FTy->getNumElements(); 10206c3fb27SDimitry Andric if (!isPowerOf2_32(NumElts)) 10306c3fb27SDimitry Andric continue; 10406c3fb27SDimitry Andric 10506c3fb27SDimitry Andric if (FTy->getElementType() == Builder.getInt1Ty()) { 10606c3fb27SDimitry Andric Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts)); 10706c3fb27SDimitry Andric if (ID == Intrinsic::vector_reduce_and) { 10806c3fb27SDimitry Andric Rdx = Builder.CreateICmpEQ( 10906c3fb27SDimitry Andric Rdx, ConstantInt::getAllOnesValue(Rdx->getType())); 11006c3fb27SDimitry Andric } else { 11106c3fb27SDimitry Andric assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction."); 11206c3fb27SDimitry Andric Rdx = Builder.CreateIsNotNull(Rdx); 11306c3fb27SDimitry Andric } 11406c3fb27SDimitry Andric break; 11506c3fb27SDimitry Andric } 116*0fca6ea1SDimitry Andric unsigned RdxOpcode = getArithmeticReductionInstruction(ID); 117*0fca6ea1SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); 11806c3fb27SDimitry Andric break; 11906c3fb27SDimitry Andric } 120e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 121e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 122e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 123e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 124e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 125e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 126e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: { 1270b57cec5SDimitry Andric Value *Vec = II->getArgOperand(0); 1285ffd83dbSDimitry Andric if (!isPowerOf2_32( 1295ffd83dbSDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements())) 1300b57cec5SDimitry Andric continue; 131*0fca6ea1SDimitry Andric unsigned RdxOpcode = getArithmeticReductionInstruction(ID); 132*0fca6ea1SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); 133e8d8bef9SDimitry Andric break; 134e8d8bef9SDimitry Andric } 135e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 136e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: { 137fe6060f1SDimitry Andric // We require "nnan" to use a shuffle reduction; "nsz" is implied by the 138fe6060f1SDimitry Andric // semantics of the reduction. 139e8d8bef9SDimitry Andric Value *Vec = II->getArgOperand(0); 140e8d8bef9SDimitry Andric if (!isPowerOf2_32( 141e8d8bef9SDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements()) || 142fe6060f1SDimitry Andric !FMF.noNaNs()) 143e8d8bef9SDimitry Andric continue; 144*0fca6ea1SDimitry Andric unsigned RdxOpcode = getArithmeticReductionInstruction(ID); 145*0fca6ea1SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK); 146480093f4SDimitry Andric break; 147480093f4SDimitry Andric } 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric II->replaceAllUsesWith(Rdx); 1500b57cec5SDimitry Andric II->eraseFromParent(); 1510b57cec5SDimitry Andric Changed = true; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric return Changed; 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric class ExpandReductions : public FunctionPass { 1570b57cec5SDimitry Andric public: 1580b57cec5SDimitry Andric static char ID; 1590b57cec5SDimitry Andric ExpandReductions() : FunctionPass(ID) { 1600b57cec5SDimitry Andric initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric bool runOnFunction(Function &F) override { 1640b57cec5SDimitry Andric const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 1650b57cec5SDimitry Andric return expandReductions(F, TTI); 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1690b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 1700b57cec5SDimitry Andric AU.setPreservesCFG(); 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric }; 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric char ExpandReductions::ID; 1760b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", 1770b57cec5SDimitry Andric "Expand reduction intrinsics", false, false) 1780b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 1790b57cec5SDimitry Andric INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", 1800b57cec5SDimitry Andric "Expand reduction intrinsics", false, false) 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric FunctionPass *llvm::createExpandReductionsPass() { 1830b57cec5SDimitry Andric return new ExpandReductions(); 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric PreservedAnalyses ExpandReductionsPass::run(Function &F, 1870b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 1880b57cec5SDimitry Andric const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 1890b57cec5SDimitry Andric if (!expandReductions(F, &TTI)) 1900b57cec5SDimitry Andric return PreservedAnalyses::all(); 1910b57cec5SDimitry Andric PreservedAnalyses PA; 1920b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>(); 1930b57cec5SDimitry Andric return PA; 1940b57cec5SDimitry Andric } 195