xref: /llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp (revision 4e9778e346f27b09724f39f92b34dd7336c2147a)
1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the experimental intrinsics until just before codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/CodeGen/ExpandReductions.h"
15 #include "llvm/Analysis/TargetTransformInfo.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/Pass.h"
24 #include "llvm/Transforms/Utils/LoopUtils.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 
30 unsigned getOpcode(Intrinsic::ID ID) {
31   switch (ID) {
32   case Intrinsic::experimental_vector_reduce_v2_fadd:
33     return Instruction::FAdd;
34   case Intrinsic::experimental_vector_reduce_v2_fmul:
35     return Instruction::FMul;
36   case Intrinsic::experimental_vector_reduce_add:
37     return Instruction::Add;
38   case Intrinsic::experimental_vector_reduce_mul:
39     return Instruction::Mul;
40   case Intrinsic::experimental_vector_reduce_and:
41     return Instruction::And;
42   case Intrinsic::experimental_vector_reduce_or:
43     return Instruction::Or;
44   case Intrinsic::experimental_vector_reduce_xor:
45     return Instruction::Xor;
46   case Intrinsic::experimental_vector_reduce_smax:
47   case Intrinsic::experimental_vector_reduce_smin:
48   case Intrinsic::experimental_vector_reduce_umax:
49   case Intrinsic::experimental_vector_reduce_umin:
50     return Instruction::ICmp;
51   case Intrinsic::experimental_vector_reduce_fmax:
52   case Intrinsic::experimental_vector_reduce_fmin:
53     return Instruction::FCmp;
54   default:
55     llvm_unreachable("Unexpected ID");
56   }
57 }
58 
59 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
60   switch (ID) {
61   case Intrinsic::experimental_vector_reduce_smax:
62     return RecurrenceDescriptor::MRK_SIntMax;
63   case Intrinsic::experimental_vector_reduce_smin:
64     return RecurrenceDescriptor::MRK_SIntMin;
65   case Intrinsic::experimental_vector_reduce_umax:
66     return RecurrenceDescriptor::MRK_UIntMax;
67   case Intrinsic::experimental_vector_reduce_umin:
68     return RecurrenceDescriptor::MRK_UIntMin;
69   case Intrinsic::experimental_vector_reduce_fmax:
70     return RecurrenceDescriptor::MRK_FloatMax;
71   case Intrinsic::experimental_vector_reduce_fmin:
72     return RecurrenceDescriptor::MRK_FloatMin;
73   default:
74     return RecurrenceDescriptor::MRK_Invalid;
75   }
76 }
77 
78 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
79   bool Changed = false;
80   SmallVector<IntrinsicInst *, 4> Worklist;
81   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
82     if (auto II = dyn_cast<IntrinsicInst>(&*I))
83       Worklist.push_back(II);
84 
85   for (auto *II : Worklist) {
86     if (!TTI->shouldExpandReduction(II))
87       continue;
88 
89     FastMathFlags FMF =
90         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
91     Intrinsic::ID ID = II->getIntrinsicID();
92     RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
93 
94     Value *Rdx = nullptr;
95     IRBuilder<> Builder(II);
96     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
97     Builder.setFastMathFlags(FMF);
98     switch (ID) {
99     case Intrinsic::experimental_vector_reduce_v2_fadd:
100     case Intrinsic::experimental_vector_reduce_v2_fmul: {
101       // FMFs must be attached to the call, otherwise it's an ordered reduction
102       // and it can't be handled by generating a shuffle sequence.
103       Value *Acc = II->getArgOperand(0);
104       Value *Vec = II->getArgOperand(1);
105       if (!FMF.allowReassoc())
106         Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
107       else {
108         if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
109           continue;
110 
111         Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
112         Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
113                                   Acc, Rdx, "bin.rdx");
114       }
115     } break;
116     case Intrinsic::experimental_vector_reduce_add:
117     case Intrinsic::experimental_vector_reduce_mul:
118     case Intrinsic::experimental_vector_reduce_and:
119     case Intrinsic::experimental_vector_reduce_or:
120     case Intrinsic::experimental_vector_reduce_xor:
121     case Intrinsic::experimental_vector_reduce_smax:
122     case Intrinsic::experimental_vector_reduce_smin:
123     case Intrinsic::experimental_vector_reduce_umax:
124     case Intrinsic::experimental_vector_reduce_umin:
125     case Intrinsic::experimental_vector_reduce_fmax:
126     case Intrinsic::experimental_vector_reduce_fmin: {
127       Value *Vec = II->getArgOperand(0);
128       if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
129         continue;
130 
131       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
132     } break;
133     default:
134       continue;
135     }
136     II->replaceAllUsesWith(Rdx);
137     II->eraseFromParent();
138     Changed = true;
139   }
140   return Changed;
141 }
142 
143 class ExpandReductions : public FunctionPass {
144 public:
145   static char ID;
146   ExpandReductions() : FunctionPass(ID) {
147     initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
148   }
149 
150   bool runOnFunction(Function &F) override {
151     const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
152     return expandReductions(F, TTI);
153   }
154 
155   void getAnalysisUsage(AnalysisUsage &AU) const override {
156     AU.addRequired<TargetTransformInfoWrapperPass>();
157     AU.setPreservesCFG();
158   }
159 };
160 }
161 
162 char ExpandReductions::ID;
163 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
164                       "Expand reduction intrinsics", false, false)
165 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
166 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
167                     "Expand reduction intrinsics", false, false)
168 
169 FunctionPass *llvm::createExpandReductionsPass() {
170   return new ExpandReductions();
171 }
172 
173 PreservedAnalyses ExpandReductionsPass::run(Function &F,
174                                             FunctionAnalysisManager &AM) {
175   const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
176   if (!expandReductions(F, &TTI))
177     return PreservedAnalyses::all();
178   PreservedAnalyses PA;
179   PA.preserveSet<CFGAnalyses>();
180   return PA;
181 }
182