xref: /llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp (revision 05da2fe52162c80dfa18aedf70cf73cb11201811)
1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the experimental intrinsics until just before codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/CodeGen/ExpandReductions.h"
15 #include "llvm/Analysis/TargetTransformInfo.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/InitializePasses.h"
24 #include "llvm/Pass.h"
25 #include "llvm/Transforms/Utils/LoopUtils.h"
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 unsigned getOpcode(Intrinsic::ID ID) {
32   switch (ID) {
33   case Intrinsic::experimental_vector_reduce_v2_fadd:
34     return Instruction::FAdd;
35   case Intrinsic::experimental_vector_reduce_v2_fmul:
36     return Instruction::FMul;
37   case Intrinsic::experimental_vector_reduce_add:
38     return Instruction::Add;
39   case Intrinsic::experimental_vector_reduce_mul:
40     return Instruction::Mul;
41   case Intrinsic::experimental_vector_reduce_and:
42     return Instruction::And;
43   case Intrinsic::experimental_vector_reduce_or:
44     return Instruction::Or;
45   case Intrinsic::experimental_vector_reduce_xor:
46     return Instruction::Xor;
47   case Intrinsic::experimental_vector_reduce_smax:
48   case Intrinsic::experimental_vector_reduce_smin:
49   case Intrinsic::experimental_vector_reduce_umax:
50   case Intrinsic::experimental_vector_reduce_umin:
51     return Instruction::ICmp;
52   case Intrinsic::experimental_vector_reduce_fmax:
53   case Intrinsic::experimental_vector_reduce_fmin:
54     return Instruction::FCmp;
55   default:
56     llvm_unreachable("Unexpected ID");
57   }
58 }
59 
60 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
61   switch (ID) {
62   case Intrinsic::experimental_vector_reduce_smax:
63     return RecurrenceDescriptor::MRK_SIntMax;
64   case Intrinsic::experimental_vector_reduce_smin:
65     return RecurrenceDescriptor::MRK_SIntMin;
66   case Intrinsic::experimental_vector_reduce_umax:
67     return RecurrenceDescriptor::MRK_UIntMax;
68   case Intrinsic::experimental_vector_reduce_umin:
69     return RecurrenceDescriptor::MRK_UIntMin;
70   case Intrinsic::experimental_vector_reduce_fmax:
71     return RecurrenceDescriptor::MRK_FloatMax;
72   case Intrinsic::experimental_vector_reduce_fmin:
73     return RecurrenceDescriptor::MRK_FloatMin;
74   default:
75     return RecurrenceDescriptor::MRK_Invalid;
76   }
77 }
78 
79 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
80   bool Changed = false;
81   SmallVector<IntrinsicInst *, 4> Worklist;
82   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
83     if (auto II = dyn_cast<IntrinsicInst>(&*I))
84       Worklist.push_back(II);
85 
86   for (auto *II : Worklist) {
87     if (!TTI->shouldExpandReduction(II))
88       continue;
89 
90     FastMathFlags FMF =
91         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
92     Intrinsic::ID ID = II->getIntrinsicID();
93     RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
94 
95     Value *Rdx = nullptr;
96     IRBuilder<> Builder(II);
97     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
98     Builder.setFastMathFlags(FMF);
99     switch (ID) {
100     case Intrinsic::experimental_vector_reduce_v2_fadd:
101     case Intrinsic::experimental_vector_reduce_v2_fmul: {
102       // FMFs must be attached to the call, otherwise it's an ordered reduction
103       // and it can't be handled by generating a shuffle sequence.
104       Value *Acc = II->getArgOperand(0);
105       Value *Vec = II->getArgOperand(1);
106       if (!FMF.allowReassoc())
107         Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
108       else {
109         if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
110           continue;
111 
112         Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
113         Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
114                                   Acc, Rdx, "bin.rdx");
115       }
116     } break;
117     case Intrinsic::experimental_vector_reduce_add:
118     case Intrinsic::experimental_vector_reduce_mul:
119     case Intrinsic::experimental_vector_reduce_and:
120     case Intrinsic::experimental_vector_reduce_or:
121     case Intrinsic::experimental_vector_reduce_xor:
122     case Intrinsic::experimental_vector_reduce_smax:
123     case Intrinsic::experimental_vector_reduce_smin:
124     case Intrinsic::experimental_vector_reduce_umax:
125     case Intrinsic::experimental_vector_reduce_umin:
126     case Intrinsic::experimental_vector_reduce_fmax:
127     case Intrinsic::experimental_vector_reduce_fmin: {
128       Value *Vec = II->getArgOperand(0);
129       if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
130         continue;
131 
132       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
133     } break;
134     default:
135       continue;
136     }
137     II->replaceAllUsesWith(Rdx);
138     II->eraseFromParent();
139     Changed = true;
140   }
141   return Changed;
142 }
143 
144 class ExpandReductions : public FunctionPass {
145 public:
146   static char ID;
147   ExpandReductions() : FunctionPass(ID) {
148     initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
149   }
150 
151   bool runOnFunction(Function &F) override {
152     const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
153     return expandReductions(F, TTI);
154   }
155 
156   void getAnalysisUsage(AnalysisUsage &AU) const override {
157     AU.addRequired<TargetTransformInfoWrapperPass>();
158     AU.setPreservesCFG();
159   }
160 };
161 }
162 
163 char ExpandReductions::ID;
164 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
165                       "Expand reduction intrinsics", false, false)
166 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
167 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
168                     "Expand reduction intrinsics", false, false)
169 
170 FunctionPass *llvm::createExpandReductionsPass() {
171   return new ExpandReductions();
172 }
173 
174 PreservedAnalyses ExpandReductionsPass::run(Function &F,
175                                             FunctionAnalysisManager &AM) {
176   const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
177   if (!expandReductions(F, &TTI))
178     return PreservedAnalyses::all();
179   PreservedAnalyses PA;
180   PA.preserveSet<CFGAnalyses>();
181   return PA;
182 }
183