1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass looks for instructions that can be replaced by a Test Data Class 10 // instruction, and replaces them when profitable. 11 // 12 // Roughly, the following rules are recognized: 13 // 14 // 1: fcmp pred X, 0 -> tdc X, mask 15 // 2: fcmp pred X, +-inf -> tdc X, mask 16 // 3: fcmp pred X, +-minnorm -> tdc X, mask 17 // 4: tdc (fabs X), mask -> tdc X, newmask 18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] 19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask 20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask 21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) 22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) 23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) 24 // 25 // The pass works in 4 steps: 26 // 27 // 1. All fcmp and icmp instructions in a function are checked for a match 28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in 29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is 30 // a fabs, it's also folded according to rule 4. 31 // 2. All and/or/xor i1 instructions whose both operands have been already 32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used 33 // as a queue of instructions to check. 34 // 3. All mapped instructions that are considered worthy of conversion (ie. 35 // replacing them will actually simplify the final code) are replaced 36 // with a call to the s390.tdc intrinsic. 37 // 4. All intermediate results of replaced instructions are removed if unused. 38 // 39 // Instructions that match rules 1-3 are considered unworthy of conversion 40 // on their own (since a comparison instruction is superior), but are mapped 41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing 42 // the original comparison in the process). 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "SystemZ.h" 47 #include "SystemZSubtarget.h" 48 #include "llvm/ADT/MapVector.h" 49 #include "llvm/CodeGen/TargetPassConfig.h" 50 #include "llvm/IR/Constants.h" 51 #include "llvm/IR/IRBuilder.h" 52 #include "llvm/IR/InstIterator.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/IntrinsicsS390.h" 55 #include "llvm/IR/LegacyPassManager.h" 56 #include "llvm/IR/Module.h" 57 #include "llvm/Target/TargetMachine.h" 58 #include <set> 59 60 using namespace llvm; 61 62 namespace { 63 64 class SystemZTDCPass : public FunctionPass { 65 public: 66 static char ID; 67 SystemZTDCPass() : FunctionPass(ID) { 68 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); 69 } 70 71 bool runOnFunction(Function &F) override; 72 73 void getAnalysisUsage(AnalysisUsage &AU) const override { 74 AU.addRequired<TargetPassConfig>(); 75 } 76 77 private: 78 // Maps seen instructions that can be mapped to a TDC, values are 79 // (TDC operand, TDC mask, worthy flag) triples. 80 MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; 81 // The queue of and/or/xor i1 instructions to be potentially folded. 82 std::vector<BinaryOperator *> LogicOpsWorklist; 83 // Instructions matched while folding, to be removed at the end if unused. 84 std::set<Instruction *> PossibleJunk; 85 86 // Tries to convert a fcmp instruction. 87 void convertFCmp(CmpInst &I); 88 89 // Tries to convert an icmp instruction. 90 void convertICmp(CmpInst &I); 91 92 // Tries to convert an i1 and/or/xor instruction, whose both operands 93 // have been already converted. 94 void convertLogicOp(BinaryOperator &I); 95 96 // Marks an instruction as converted - adds it to ConvertedInsts and adds 97 // any and/or/xor i1 users to the queue. 98 void converted(Instruction *I, Value *V, int Mask, bool Worthy) { 99 ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); 100 auto &M = *I->getFunction()->getParent(); 101 auto &Ctx = M.getContext(); 102 for (auto *U : I->users()) { 103 auto *LI = dyn_cast<BinaryOperator>(U); 104 if (LI && LI->getType() == Type::getInt1Ty(Ctx) && 105 (LI->getOpcode() == Instruction::And || 106 LI->getOpcode() == Instruction::Or || 107 LI->getOpcode() == Instruction::Xor)) { 108 LogicOpsWorklist.push_back(LI); 109 } 110 } 111 } 112 }; 113 114 } // end anonymous namespace 115 116 char SystemZTDCPass::ID = 0; 117 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", 118 "SystemZ Test Data Class optimization", false, false) 119 120 FunctionPass *llvm::createSystemZTDCPass() { 121 return new SystemZTDCPass(); 122 } 123 124 void SystemZTDCPass::convertFCmp(CmpInst &I) { 125 Value *Op0 = I.getOperand(0); 126 auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); 127 auto Pred = I.getPredicate(); 128 // Only comparisons with consts are interesting. 129 if (!Const) 130 return; 131 // Compute the smallest normal number (and its negation). 132 auto &Sem = Op0->getType()->getFltSemantics(); 133 APFloat Smallest = APFloat::getSmallestNormalized(Sem); 134 APFloat NegSmallest = Smallest; 135 NegSmallest.changeSign(); 136 // Check if Const is one of our recognized consts. 137 int WhichConst; 138 if (Const->isZero()) { 139 // All comparisons with 0 can be converted. 140 WhichConst = 0; 141 } else if (Const->isInfinity()) { 142 // Likewise for infinities. 143 WhichConst = Const->isNegative() ? 2 : 1; 144 } else if (Const->isExactlyValue(Smallest)) { 145 // For Smallest, we cannot do EQ separately from GT. 146 if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && 147 (Pred & CmpInst::FCMP_OGE) != 0) 148 return; 149 WhichConst = 3; 150 } else if (Const->isExactlyValue(NegSmallest)) { 151 // Likewise for NegSmallest, we cannot do EQ separately from LT. 152 if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && 153 (Pred & CmpInst::FCMP_OLE) != 0) 154 return; 155 WhichConst = 4; 156 } else { 157 // Not one of our special constants. 158 return; 159 } 160 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. 161 static const int Masks[][4] = { 162 { // 0 163 SystemZ::TDCMASK_ZERO, // eq 164 SystemZ::TDCMASK_POSITIVE, // gt 165 SystemZ::TDCMASK_NEGATIVE, // lt 166 SystemZ::TDCMASK_NAN, // un 167 }, 168 { // inf 169 SystemZ::TDCMASK_INFINITY_PLUS, // eq 170 0, // gt 171 (SystemZ::TDCMASK_ZERO | 172 SystemZ::TDCMASK_NEGATIVE | 173 SystemZ::TDCMASK_NORMAL_PLUS | 174 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt 175 SystemZ::TDCMASK_NAN, // un 176 }, 177 { // -inf 178 SystemZ::TDCMASK_INFINITY_MINUS, // eq 179 (SystemZ::TDCMASK_ZERO | 180 SystemZ::TDCMASK_POSITIVE | 181 SystemZ::TDCMASK_NORMAL_MINUS | 182 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt 183 0, // lt 184 SystemZ::TDCMASK_NAN, // un 185 }, 186 { // minnorm 187 0, // eq (unsupported) 188 (SystemZ::TDCMASK_NORMAL_PLUS | 189 SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) 190 (SystemZ::TDCMASK_ZERO | 191 SystemZ::TDCMASK_NEGATIVE | 192 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt 193 SystemZ::TDCMASK_NAN, // un 194 }, 195 { // -minnorm 196 0, // eq (unsupported) 197 (SystemZ::TDCMASK_ZERO | 198 SystemZ::TDCMASK_POSITIVE | 199 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt 200 (SystemZ::TDCMASK_NORMAL_MINUS | 201 SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) 202 SystemZ::TDCMASK_NAN, // un 203 } 204 }; 205 // Construct the mask as a combination of the partial masks. 206 int Mask = 0; 207 if (Pred & CmpInst::FCMP_OEQ) 208 Mask |= Masks[WhichConst][0]; 209 if (Pred & CmpInst::FCMP_OGT) 210 Mask |= Masks[WhichConst][1]; 211 if (Pred & CmpInst::FCMP_OLT) 212 Mask |= Masks[WhichConst][2]; 213 if (Pred & CmpInst::FCMP_UNO) 214 Mask |= Masks[WhichConst][3]; 215 // A lone fcmp is unworthy of tdc conversion on its own, but may become 216 // worthy if combined with fabs. 217 bool Worthy = false; 218 if (CallInst *CI = dyn_cast<CallInst>(Op0)) { 219 Function *F = CI->getCalledFunction(); 220 if (F && F->getIntrinsicID() == Intrinsic::fabs) { 221 // Fold with fabs - adjust the mask appropriately. 222 Mask &= SystemZ::TDCMASK_PLUS; 223 Mask |= Mask >> 1; 224 Op0 = CI->getArgOperand(0); 225 // A combination of fcmp with fabs is a win, unless the constant 226 // involved is 0 (which is handled by later passes). 227 Worthy = WhichConst != 0; 228 PossibleJunk.insert(CI); 229 } 230 } 231 converted(&I, Op0, Mask, Worthy); 232 } 233 234 void SystemZTDCPass::convertICmp(CmpInst &I) { 235 Value *Op0 = I.getOperand(0); 236 auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); 237 auto Pred = I.getPredicate(); 238 // All our icmp rules involve comparisons with consts. 239 if (!Const) 240 return; 241 if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { 242 // Check for icmp+bitcast used for signbit. 243 if (!Cast->getSrcTy()->isFloatTy() && 244 !Cast->getSrcTy()->isDoubleTy() && 245 !Cast->getSrcTy()->isFP128Ty()) 246 return; 247 Value *V = Cast->getOperand(0); 248 int Mask; 249 if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { 250 // icmp slt (bitcast X), 0 - set if sign bit true 251 Mask = SystemZ::TDCMASK_MINUS; 252 } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { 253 // icmp sgt (bitcast X), -1 - set if sign bit false 254 Mask = SystemZ::TDCMASK_PLUS; 255 } else { 256 // Not a sign bit check. 257 return; 258 } 259 PossibleJunk.insert(Cast); 260 converted(&I, V, Mask, true); 261 } else if (auto *CI = dyn_cast<CallInst>(Op0)) { 262 // Check if this is a pre-existing call of our tdc intrinsic. 263 Function *F = CI->getCalledFunction(); 264 if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) 265 return; 266 if (!Const->isZero()) 267 return; 268 Value *V = CI->getArgOperand(0); 269 auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); 270 // Bail if the mask is not a constant. 271 if (!MaskC) 272 return; 273 int Mask = MaskC->getZExtValue(); 274 Mask &= SystemZ::TDCMASK_ALL; 275 if (Pred == CmpInst::ICMP_NE) { 276 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC 277 } else if (Pred == CmpInst::ICMP_EQ) { 278 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask 279 Mask ^= SystemZ::TDCMASK_ALL; 280 } else { 281 // An unknown comparison - ignore. 282 return; 283 } 284 PossibleJunk.insert(CI); 285 converted(&I, V, Mask, false); 286 } 287 } 288 289 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { 290 Value *Op0, *Op1; 291 int Mask0, Mask1; 292 bool Worthy0, Worthy1; 293 std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; 294 std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; 295 if (Op0 != Op1) 296 return; 297 int Mask; 298 switch (I.getOpcode()) { 299 case Instruction::And: 300 Mask = Mask0 & Mask1; 301 break; 302 case Instruction::Or: 303 Mask = Mask0 | Mask1; 304 break; 305 case Instruction::Xor: 306 Mask = Mask0 ^ Mask1; 307 break; 308 default: 309 llvm_unreachable("Unknown op in convertLogicOp"); 310 } 311 converted(&I, Op0, Mask, true); 312 } 313 314 bool SystemZTDCPass::runOnFunction(Function &F) { 315 auto &TPC = getAnalysis<TargetPassConfig>(); 316 if (TPC.getTM<TargetMachine>() 317 .getSubtarget<SystemZSubtarget>(F) 318 .hasSoftFloat()) 319 return false; 320 321 ConvertedInsts.clear(); 322 LogicOpsWorklist.clear(); 323 PossibleJunk.clear(); 324 325 // Look for icmp+fcmp instructions. 326 for (auto &I : instructions(F)) { 327 if (I.getOpcode() == Instruction::FCmp) 328 convertFCmp(cast<CmpInst>(I)); 329 else if (I.getOpcode() == Instruction::ICmp) 330 convertICmp(cast<CmpInst>(I)); 331 } 332 333 // If none found, bail already. 334 if (ConvertedInsts.empty()) 335 return false; 336 337 // Process the queue of logic instructions. 338 while (!LogicOpsWorklist.empty()) { 339 BinaryOperator *Op = LogicOpsWorklist.back(); 340 LogicOpsWorklist.pop_back(); 341 // If both operands mapped, and the instruction itself not yet mapped, 342 // convert it. 343 if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && 344 ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && 345 !ConvertedInsts.count(Op)) 346 convertLogicOp(*Op); 347 } 348 349 // Time to actually replace the instructions. Do it in the reverse order 350 // of finding them, since there's a good chance the earlier ones will be 351 // unused (due to being folded into later ones). 352 Module &M = *F.getParent(); 353 auto &Ctx = M.getContext(); 354 Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); 355 bool MadeChange = false; 356 for (auto &It : reverse(ConvertedInsts)) { 357 Instruction *I = It.first; 358 Value *V; 359 int Mask; 360 bool Worthy; 361 std::tie(V, Mask, Worthy) = It.second; 362 if (!I->user_empty()) { 363 // If used and unworthy of conversion, skip it. 364 if (!Worthy) 365 continue; 366 // Call the intrinsic, compare result with 0. 367 IRBuilder<> IRB(I); 368 Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); 369 Instruction *TDC = 370 IRB.CreateIntrinsic(Intrinsic::s390_tdc, V->getType(), {V, MaskVal}); 371 Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); 372 I->replaceAllUsesWith(ICmp); 373 } 374 // If unused, or used and converted, remove it. 375 I->eraseFromParent(); 376 MadeChange = true; 377 } 378 379 if (!MadeChange) 380 return false; 381 382 // We've actually done something - now clear misc accumulated junk (fabs, 383 // bitcast). 384 for (auto *I : PossibleJunk) 385 if (I->user_empty()) 386 I->eraseFromParent(); 387 388 return true; 389 } 390