xref: /llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp (revision ed8019d9fbed2e6a6b08f8f73e9fa54a24f3ed52)
1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
11 //
12 // Roughly, the following rules are recognized:
13 //
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
24 //
25 // The pass works in 4 steps:
26 //
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 //    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
29 //    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
30 //    a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 //    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
33 //    as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 //    replacing them will actually simplify the final code) are replaced
36 //    with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
38 //
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "SystemZ.h"
47 #include "SystemZSubtarget.h"
48 #include "llvm/ADT/MapVector.h"
49 #include "llvm/CodeGen/TargetPassConfig.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/IR/InstIterator.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicsS390.h"
55 #include "llvm/IR/LegacyPassManager.h"
56 #include "llvm/IR/Module.h"
57 #include "llvm/Target/TargetMachine.h"
58 #include <set>
59 
60 using namespace llvm;
61 
62 namespace {
63 
64 class SystemZTDCPass : public FunctionPass {
65 public:
66   static char ID;
67   SystemZTDCPass() : FunctionPass(ID) {
68     initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
69   }
70 
71   bool runOnFunction(Function &F) override;
72 
73   void getAnalysisUsage(AnalysisUsage &AU) const override {
74     AU.addRequired<TargetPassConfig>();
75  }
76 
77 private:
78   // Maps seen instructions that can be mapped to a TDC, values are
79   // (TDC operand, TDC mask, worthy flag) triples.
80   MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
81   // The queue of and/or/xor i1 instructions to be potentially folded.
82   std::vector<BinaryOperator *> LogicOpsWorklist;
83   // Instructions matched while folding, to be removed at the end if unused.
84   std::set<Instruction *> PossibleJunk;
85 
86   // Tries to convert a fcmp instruction.
87   void convertFCmp(CmpInst &I);
88 
89   // Tries to convert an icmp instruction.
90   void convertICmp(CmpInst &I);
91 
92   // Tries to convert an i1 and/or/xor instruction, whose both operands
93   // have been already converted.
94   void convertLogicOp(BinaryOperator &I);
95 
96   // Marks an instruction as converted - adds it to ConvertedInsts and adds
97   // any and/or/xor i1 users to the queue.
98   void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
99     ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
100     auto &M = *I->getFunction()->getParent();
101     auto &Ctx = M.getContext();
102     for (auto *U : I->users()) {
103       auto *LI = dyn_cast<BinaryOperator>(U);
104       if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
105           (LI->getOpcode() == Instruction::And ||
106            LI->getOpcode() == Instruction::Or ||
107            LI->getOpcode() == Instruction::Xor)) {
108         LogicOpsWorklist.push_back(LI);
109       }
110     }
111   }
112 };
113 
114 } // end anonymous namespace
115 
116 char SystemZTDCPass::ID = 0;
117 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
118                 "SystemZ Test Data Class optimization", false, false)
119 
120 FunctionPass *llvm::createSystemZTDCPass() {
121   return new SystemZTDCPass();
122 }
123 
124 void SystemZTDCPass::convertFCmp(CmpInst &I) {
125   Value *Op0 = I.getOperand(0);
126   auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
127   auto Pred = I.getPredicate();
128   // Only comparisons with consts are interesting.
129   if (!Const)
130     return;
131   // Compute the smallest normal number (and its negation).
132   auto &Sem = Op0->getType()->getFltSemantics();
133   APFloat Smallest = APFloat::getSmallestNormalized(Sem);
134   APFloat NegSmallest = Smallest;
135   NegSmallest.changeSign();
136   // Check if Const is one of our recognized consts.
137   int WhichConst;
138   if (Const->isZero()) {
139     // All comparisons with 0 can be converted.
140     WhichConst = 0;
141   } else if (Const->isInfinity()) {
142     // Likewise for infinities.
143     WhichConst = Const->isNegative() ? 2 : 1;
144   } else if (Const->isExactlyValue(Smallest)) {
145     // For Smallest, we cannot do EQ separately from GT.
146     if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
147         (Pred & CmpInst::FCMP_OGE) != 0)
148       return;
149     WhichConst = 3;
150   } else if (Const->isExactlyValue(NegSmallest)) {
151     // Likewise for NegSmallest, we cannot do EQ separately from LT.
152     if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
153         (Pred & CmpInst::FCMP_OLE) != 0)
154       return;
155     WhichConst = 4;
156   } else {
157     // Not one of our special constants.
158     return;
159   }
160   // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
161   static const int Masks[][4] = {
162     { // 0
163       SystemZ::TDCMASK_ZERO,              // eq
164       SystemZ::TDCMASK_POSITIVE,          // gt
165       SystemZ::TDCMASK_NEGATIVE,          // lt
166       SystemZ::TDCMASK_NAN,               // un
167     },
168     { // inf
169       SystemZ::TDCMASK_INFINITY_PLUS,     // eq
170       0,                                  // gt
171       (SystemZ::TDCMASK_ZERO |
172        SystemZ::TDCMASK_NEGATIVE |
173        SystemZ::TDCMASK_NORMAL_PLUS |
174        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
175       SystemZ::TDCMASK_NAN,               // un
176     },
177     { // -inf
178       SystemZ::TDCMASK_INFINITY_MINUS,    // eq
179       (SystemZ::TDCMASK_ZERO |
180        SystemZ::TDCMASK_POSITIVE |
181        SystemZ::TDCMASK_NORMAL_MINUS |
182        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
183       0,                                  // lt
184       SystemZ::TDCMASK_NAN,               // un
185     },
186     { // minnorm
187       0,                                  // eq (unsupported)
188       (SystemZ::TDCMASK_NORMAL_PLUS |
189        SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
190       (SystemZ::TDCMASK_ZERO |
191        SystemZ::TDCMASK_NEGATIVE |
192        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
193       SystemZ::TDCMASK_NAN,               // un
194     },
195     { // -minnorm
196       0,                                  // eq (unsupported)
197       (SystemZ::TDCMASK_ZERO |
198        SystemZ::TDCMASK_POSITIVE |
199        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
200       (SystemZ::TDCMASK_NORMAL_MINUS |
201        SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
202       SystemZ::TDCMASK_NAN,               // un
203     }
204   };
205   // Construct the mask as a combination of the partial masks.
206   int Mask = 0;
207   if (Pred & CmpInst::FCMP_OEQ)
208     Mask |= Masks[WhichConst][0];
209   if (Pred & CmpInst::FCMP_OGT)
210     Mask |= Masks[WhichConst][1];
211   if (Pred & CmpInst::FCMP_OLT)
212     Mask |= Masks[WhichConst][2];
213   if (Pred & CmpInst::FCMP_UNO)
214     Mask |= Masks[WhichConst][3];
215   // A lone fcmp is unworthy of tdc conversion on its own, but may become
216   // worthy if combined with fabs.
217   bool Worthy = false;
218   if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
219     Function *F = CI->getCalledFunction();
220     if (F && F->getIntrinsicID() == Intrinsic::fabs) {
221       // Fold with fabs - adjust the mask appropriately.
222       Mask &= SystemZ::TDCMASK_PLUS;
223       Mask |= Mask >> 1;
224       Op0 = CI->getArgOperand(0);
225       // A combination of fcmp with fabs is a win, unless the constant
226       // involved is 0 (which is handled by later passes).
227       Worthy = WhichConst != 0;
228       PossibleJunk.insert(CI);
229     }
230   }
231   converted(&I, Op0, Mask, Worthy);
232 }
233 
234 void SystemZTDCPass::convertICmp(CmpInst &I) {
235   Value *Op0 = I.getOperand(0);
236   auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
237   auto Pred = I.getPredicate();
238   // All our icmp rules involve comparisons with consts.
239   if (!Const)
240     return;
241   if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
242     // Check for icmp+bitcast used for signbit.
243     if (!Cast->getSrcTy()->isFloatTy() &&
244         !Cast->getSrcTy()->isDoubleTy() &&
245         !Cast->getSrcTy()->isFP128Ty())
246       return;
247     Value *V = Cast->getOperand(0);
248     int Mask;
249     if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
250       // icmp slt (bitcast X), 0 - set if sign bit true
251       Mask = SystemZ::TDCMASK_MINUS;
252     } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
253       // icmp sgt (bitcast X), -1 - set if sign bit false
254       Mask = SystemZ::TDCMASK_PLUS;
255     } else {
256       // Not a sign bit check.
257       return;
258     }
259     PossibleJunk.insert(Cast);
260     converted(&I, V, Mask, true);
261   } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
262     // Check if this is a pre-existing call of our tdc intrinsic.
263     Function *F = CI->getCalledFunction();
264     if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
265       return;
266     if (!Const->isZero())
267       return;
268     Value *V = CI->getArgOperand(0);
269     auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
270     // Bail if the mask is not a constant.
271     if (!MaskC)
272       return;
273     int Mask = MaskC->getZExtValue();
274     Mask &= SystemZ::TDCMASK_ALL;
275     if (Pred == CmpInst::ICMP_NE) {
276       // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
277     } else if (Pred == CmpInst::ICMP_EQ) {
278       // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
279       Mask ^= SystemZ::TDCMASK_ALL;
280     } else {
281       // An unknown comparison - ignore.
282       return;
283     }
284     PossibleJunk.insert(CI);
285     converted(&I, V, Mask, false);
286   }
287 }
288 
289 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
290   Value *Op0, *Op1;
291   int Mask0, Mask1;
292   bool Worthy0, Worthy1;
293   std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
294   std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
295   if (Op0 != Op1)
296     return;
297   int Mask;
298   switch (I.getOpcode()) {
299     case Instruction::And:
300       Mask = Mask0 & Mask1;
301       break;
302     case Instruction::Or:
303       Mask = Mask0 | Mask1;
304       break;
305     case Instruction::Xor:
306       Mask = Mask0 ^ Mask1;
307       break;
308     default:
309       llvm_unreachable("Unknown op in convertLogicOp");
310   }
311   converted(&I, Op0, Mask, true);
312 }
313 
314 bool SystemZTDCPass::runOnFunction(Function &F) {
315   auto &TPC = getAnalysis<TargetPassConfig>();
316   if (TPC.getTM<TargetMachine>()
317           .getSubtarget<SystemZSubtarget>(F)
318           .hasSoftFloat())
319     return false;
320 
321   ConvertedInsts.clear();
322   LogicOpsWorklist.clear();
323   PossibleJunk.clear();
324 
325   // Look for icmp+fcmp instructions.
326   for (auto &I : instructions(F)) {
327     if (I.getOpcode() == Instruction::FCmp)
328       convertFCmp(cast<CmpInst>(I));
329     else if (I.getOpcode() == Instruction::ICmp)
330       convertICmp(cast<CmpInst>(I));
331   }
332 
333   // If none found, bail already.
334   if (ConvertedInsts.empty())
335     return false;
336 
337   // Process the queue of logic instructions.
338   while (!LogicOpsWorklist.empty()) {
339     BinaryOperator *Op = LogicOpsWorklist.back();
340     LogicOpsWorklist.pop_back();
341     // If both operands mapped, and the instruction itself not yet mapped,
342     // convert it.
343     if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
344         ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
345         !ConvertedInsts.count(Op))
346       convertLogicOp(*Op);
347   }
348 
349   // Time to actually replace the instructions.  Do it in the reverse order
350   // of finding them, since there's a good chance the earlier ones will be
351   // unused (due to being folded into later ones).
352   Module &M = *F.getParent();
353   auto &Ctx = M.getContext();
354   Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
355   bool MadeChange = false;
356   for (auto &It : reverse(ConvertedInsts)) {
357     Instruction *I = It.first;
358     Value *V;
359     int Mask;
360     bool Worthy;
361     std::tie(V, Mask, Worthy) = It.second;
362     if (!I->user_empty()) {
363       // If used and unworthy of conversion, skip it.
364       if (!Worthy)
365         continue;
366       // Call the intrinsic, compare result with 0.
367       IRBuilder<> IRB(I);
368       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
369       Instruction *TDC =
370           IRB.CreateIntrinsic(Intrinsic::s390_tdc, V->getType(), {V, MaskVal});
371       Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
372       I->replaceAllUsesWith(ICmp);
373     }
374     // If unused, or used and converted, remove it.
375     I->eraseFromParent();
376     MadeChange = true;
377   }
378 
379   if (!MadeChange)
380     return false;
381 
382   // We've actually done something - now clear misc accumulated junk (fabs,
383   // bitcast).
384   for (auto *I : PossibleJunk)
385     if (I->user_empty())
386       I->eraseFromParent();
387 
388   return true;
389 }
390