1 //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass turns chains of integer comparisons into memcmp (the memcmp is 11 // later typically inlined as a chain of efficient hardware comparisons). This 12 // typically benefits c++ member or nonmember operator==(). 13 // 14 // The basic idea is to replace a larger chain of integer comparisons loaded 15 // from contiguous memory locations into a smaller chain of such integer 16 // comparisons. Benefits are double: 17 // - There are less jumps, and therefore less opportunities for mispredictions 18 // and I-cache misses. 19 // - Code size is smaller, both because jumps are removed and because the 20 // encoding of a 2*n byte compare is smaller than that of two n-byte 21 // compares. 22 23 //===----------------------------------------------------------------------===// 24 25 #include <algorithm> 26 #include <numeric> 27 #include <utility> 28 #include <vector> 29 #include "llvm/Analysis/Loads.h" 30 #include "llvm/Analysis/TargetLibraryInfo.h" 31 #include "llvm/Analysis/TargetTransformInfo.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/IRBuilder.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Transforms/Scalar.h" 36 #include "llvm/Transforms/Utils/BuildLibCalls.h" 37 38 using namespace llvm; 39 40 namespace { 41 42 #define DEBUG_TYPE "mergeicmps" 43 44 // A BCE atom. 45 struct BCEAtom { 46 BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {} 47 48 const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; } 49 50 bool operator<(const BCEAtom &O) const { 51 assert(Base() && "invalid atom"); 52 assert(O.Base() && "invalid atom"); 53 // Just ordering by (Base(), Offset) is sufficient. However because this 54 // means that the ordering will depend on the addresses of the base 55 // values, which are not reproducible from run to run. To guarantee 56 // stability, we use the names of the values if they exist; we sort by: 57 // (Base.getName(), Base(), Offset). 58 const int NameCmp = Base()->getName().compare(O.Base()->getName()); 59 if (NameCmp == 0) { 60 if (Base() == O.Base()) { 61 return Offset.slt(O.Offset); 62 } 63 return Base() < O.Base(); 64 } 65 return NameCmp < 0; 66 } 67 68 GetElementPtrInst *GEP; 69 LoadInst *LoadI; 70 APInt Offset; 71 }; 72 73 // If this value is a load from a constant offset w.r.t. a base address, and 74 // there are no other users of the load or address, returns the base address and 75 // the offset. 76 BCEAtom visitICmpLoadOperand(Value *const Val) { 77 BCEAtom Result; 78 if (auto *const LoadI = dyn_cast<LoadInst>(Val)) { 79 LLVM_DEBUG(dbgs() << "load\n"); 80 if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) { 81 LLVM_DEBUG(dbgs() << "used outside of block\n"); 82 return {}; 83 } 84 if (LoadI->isVolatile()) { 85 LLVM_DEBUG(dbgs() << "volatile\n"); 86 return {}; 87 } 88 Value *const Addr = LoadI->getOperand(0); 89 if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) { 90 LLVM_DEBUG(dbgs() << "GEP\n"); 91 if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) { 92 LLVM_DEBUG(dbgs() << "used outside of block\n"); 93 return {}; 94 } 95 const auto &DL = GEP->getModule()->getDataLayout(); 96 if (!isDereferenceablePointer(GEP, DL)) { 97 LLVM_DEBUG(dbgs() << "not dereferenceable\n"); 98 // We need to make sure that we can do comparison in any order, so we 99 // require memory to be unconditionnally dereferencable. 100 return {}; 101 } 102 Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0); 103 if (GEP->accumulateConstantOffset(DL, Result.Offset)) { 104 Result.GEP = GEP; 105 Result.LoadI = LoadI; 106 } 107 } 108 } 109 return Result; 110 } 111 112 // A basic block with a comparison between two BCE atoms. 113 // The block might do extra work besides the atom comparison, in which case 114 // doesOtherWork() returns true. Under some conditions, the block can be 115 // split into the atom comparison part and the "other work" part 116 // (see canSplit()). 117 // Note: the terminology is misleading: the comparison is symmetric, so there 118 // is no real {l/r}hs. What we want though is to have the same base on the 119 // left (resp. right), so that we can detect consecutive loads. To ensure this 120 // we put the smallest atom on the left. 121 class BCECmpBlock { 122 public: 123 BCECmpBlock() {} 124 125 BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits) 126 : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) { 127 if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_); 128 } 129 130 bool IsValid() const { 131 return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr; 132 } 133 134 // Assert the block is consistent: If valid, it should also have 135 // non-null members besides Lhs_ and Rhs_. 136 void AssertConsistent() const { 137 if (IsValid()) { 138 assert(BB); 139 assert(CmpI); 140 assert(BranchI); 141 } 142 } 143 144 const BCEAtom &Lhs() const { return Lhs_; } 145 const BCEAtom &Rhs() const { return Rhs_; } 146 int SizeBits() const { return SizeBits_; } 147 148 // Returns true if the block does other works besides comparison. 149 bool doesOtherWork() const; 150 151 // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp 152 // instructions in the block. 153 bool canSplit() const; 154 155 // Return true if this all the relevant instructions in the BCE-cmp-block can 156 // be sunk below this instruction. By doing this, we know we can separate the 157 // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the 158 // block. 159 bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &) const; 160 161 // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block 162 // instructions. Split the old block and move all non-BCE-cmp-insts into the 163 // new parent block. 164 void split(BasicBlock *NewParent) const; 165 166 // The basic block where this comparison happens. 167 BasicBlock *BB = nullptr; 168 // The ICMP for this comparison. 169 ICmpInst *CmpI = nullptr; 170 // The terminating branch. 171 BranchInst *BranchI = nullptr; 172 // The block requires splitting. 173 bool RequireSplit = false; 174 175 private: 176 BCEAtom Lhs_; 177 BCEAtom Rhs_; 178 int SizeBits_ = 0; 179 }; 180 181 bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst, 182 DenseSet<Instruction *> &BlockInsts) const { 183 // If this instruction has side effects and its in middle of the BCE cmp block 184 // instructions, then bail for now. 185 // TODO: use alias analysis to tell whether there is real interference. 186 if (Inst->mayHaveSideEffects()) 187 return false; 188 // Make sure this instruction does not use any of the BCE cmp block 189 // instructions as operand. 190 for (auto BI : BlockInsts) { 191 if (is_contained(Inst->operands(), BI)) 192 return false; 193 } 194 return true; 195 } 196 197 void BCECmpBlock::split(BasicBlock *NewParent) const { 198 DenseSet<Instruction *> BlockInsts( 199 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); 200 llvm::SmallVector<Instruction *, 4> OtherInsts; 201 for (Instruction &Inst : *BB) { 202 if (BlockInsts.count(&Inst)) 203 continue; 204 assert(canSinkBCECmpInst(&Inst, BlockInsts) && "Split unsplittable block"); 205 // This is a non-BCE-cmp-block instruction. And it can be separated 206 // from the BCE-cmp-block instruction. 207 OtherInsts.push_back(&Inst); 208 } 209 210 // Do the actual spliting. 211 for (Instruction *Inst : reverse(OtherInsts)) { 212 Inst->moveBefore(&*NewParent->begin()); 213 } 214 } 215 216 bool BCECmpBlock::canSplit() const { 217 DenseSet<Instruction *> BlockInsts( 218 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); 219 for (Instruction &Inst : *BB) { 220 if (!BlockInsts.count(&Inst)) { 221 if (!canSinkBCECmpInst(&Inst, BlockInsts)) 222 return false; 223 } 224 } 225 return true; 226 } 227 228 bool BCECmpBlock::doesOtherWork() const { 229 AssertConsistent(); 230 // All the instructions we care about in the BCE cmp block. 231 DenseSet<Instruction *> BlockInsts( 232 {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); 233 // TODO(courbet): Can we allow some other things ? This is very conservative. 234 // We might be able to get away with anything does not have any side 235 // effects outside of the basic block. 236 // Note: The GEPs and/or loads are not necessarily in the same block. 237 for (const Instruction &Inst : *BB) { 238 if (!BlockInsts.count(&Inst)) 239 return true; 240 } 241 return false; 242 } 243 244 // Visit the given comparison. If this is a comparison between two valid 245 // BCE atoms, returns the comparison. 246 BCECmpBlock visitICmp(const ICmpInst *const CmpI, 247 const ICmpInst::Predicate ExpectedPredicate) { 248 // The comparison can only be used once: 249 // - For intermediate blocks, as a branch condition. 250 // - For the final block, as an incoming value for the Phi. 251 // If there are any other uses of the comparison, we cannot merge it with 252 // other comparisons as we would create an orphan use of the value. 253 if (!CmpI->hasOneUse()) { 254 LLVM_DEBUG(dbgs() << "cmp has several uses\n"); 255 return {}; 256 } 257 if (CmpI->getPredicate() == ExpectedPredicate) { 258 LLVM_DEBUG(dbgs() << "cmp " 259 << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne") 260 << "\n"); 261 auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0)); 262 if (!Lhs.Base()) return {}; 263 auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1)); 264 if (!Rhs.Base()) return {}; 265 return BCECmpBlock(std::move(Lhs), std::move(Rhs), 266 CmpI->getOperand(0)->getType()->getScalarSizeInBits()); 267 } 268 return {}; 269 } 270 271 // Visit the given comparison block. If this is a comparison between two valid 272 // BCE atoms, returns the comparison. 273 BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block, 274 const BasicBlock *const PhiBlock) { 275 if (Block->empty()) return {}; 276 auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator()); 277 if (!BranchI) return {}; 278 LLVM_DEBUG(dbgs() << "branch\n"); 279 if (BranchI->isUnconditional()) { 280 // In this case, we expect an incoming value which is the result of the 281 // comparison. This is the last link in the chain of comparisons (note 282 // that this does not mean that this is the last incoming value, blocks 283 // can be reordered). 284 auto *const CmpI = dyn_cast<ICmpInst>(Val); 285 if (!CmpI) return {}; 286 LLVM_DEBUG(dbgs() << "icmp\n"); 287 auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ); 288 Result.CmpI = CmpI; 289 Result.BranchI = BranchI; 290 return Result; 291 } else { 292 // In this case, we expect a constant incoming value (the comparison is 293 // chained). 294 const auto *const Const = dyn_cast<ConstantInt>(Val); 295 LLVM_DEBUG(dbgs() << "const\n"); 296 if (!Const->isZero()) return {}; 297 LLVM_DEBUG(dbgs() << "false\n"); 298 auto *const CmpI = dyn_cast<ICmpInst>(BranchI->getCondition()); 299 if (!CmpI) return {}; 300 LLVM_DEBUG(dbgs() << "icmp\n"); 301 assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch"); 302 BasicBlock *const FalseBlock = BranchI->getSuccessor(1); 303 auto Result = visitICmp( 304 CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE); 305 Result.CmpI = CmpI; 306 Result.BranchI = BranchI; 307 return Result; 308 } 309 return {}; 310 } 311 312 static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons, 313 BCECmpBlock &Comparison) { 314 LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName() 315 << "': Found cmp of " << Comparison.SizeBits() 316 << " bits between " << Comparison.Lhs().Base() << " + " 317 << Comparison.Lhs().Offset << " and " 318 << Comparison.Rhs().Base() << " + " 319 << Comparison.Rhs().Offset << "\n"); 320 LLVM_DEBUG(dbgs() << "\n"); 321 Comparisons.push_back(Comparison); 322 } 323 324 // A chain of comparisons. 325 class BCECmpChain { 326 public: 327 BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi); 328 329 int size() const { return Comparisons_.size(); } 330 331 #ifdef MERGEICMPS_DOT_ON 332 void dump() const; 333 #endif // MERGEICMPS_DOT_ON 334 335 bool simplify(const TargetLibraryInfo *const TLI); 336 337 private: 338 static bool IsContiguous(const BCECmpBlock &First, 339 const BCECmpBlock &Second) { 340 return First.Lhs().Base() == Second.Lhs().Base() && 341 First.Rhs().Base() == Second.Rhs().Base() && 342 First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset && 343 First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset; 344 } 345 346 // Merges the given comparison blocks into one memcmp block and update 347 // branches. Comparisons are assumed to be continguous. If NextBBInChain is 348 // null, the merged block will link to the phi block. 349 void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons, 350 BasicBlock *const NextBBInChain, PHINode &Phi, 351 const TargetLibraryInfo *const TLI); 352 353 PHINode &Phi_; 354 std::vector<BCECmpBlock> Comparisons_; 355 // The original entry block (before sorting); 356 BasicBlock *EntryBlock_; 357 }; 358 359 BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi) 360 : Phi_(Phi) { 361 assert(!Blocks.empty() && "a chain should have at least one block"); 362 // Now look inside blocks to check for BCE comparisons. 363 std::vector<BCECmpBlock> Comparisons; 364 for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) { 365 BasicBlock *const Block = Blocks[BlockIdx]; 366 assert(Block && "invalid block"); 367 BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block), 368 Block, Phi.getParent()); 369 Comparison.BB = Block; 370 if (!Comparison.IsValid()) { 371 LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n"); 372 return; 373 } 374 if (Comparison.doesOtherWork()) { 375 LLVM_DEBUG(dbgs() << "block '" << Comparison.BB->getName() 376 << "' does extra work besides compare\n"); 377 if (Comparisons.empty()) { 378 // This is the initial block in the chain, in case this block does other 379 // work, we can try to split the block and move the irrelevant 380 // instructions to the predecessor. 381 // 382 // If this is not the initial block in the chain, splitting it wont 383 // work. 384 // 385 // As once split, there will still be instructions before the BCE cmp 386 // instructions that do other work in program order, i.e. within the 387 // chain before sorting. Unless we can abort the chain at this point 388 // and start anew. 389 // 390 // NOTE: we only handle block with single predecessor for now. 391 if (Comparison.canSplit()) { 392 LLVM_DEBUG(dbgs() 393 << "Split initial block '" << Comparison.BB->getName() 394 << "' that does extra work besides compare\n"); 395 Comparison.RequireSplit = true; 396 enqueueBlock(Comparisons, Comparison); 397 } else { 398 LLVM_DEBUG(dbgs() 399 << "ignoring initial block '" << Comparison.BB->getName() 400 << "' that does extra work besides compare\n"); 401 } 402 continue; 403 } 404 // TODO(courbet): Right now we abort the whole chain. We could be 405 // merging only the blocks that don't do other work and resume the 406 // chain from there. For example: 407 // if (a[0] == b[0]) { // bb1 408 // if (a[1] == b[1]) { // bb2 409 // some_value = 3; //bb3 410 // if (a[2] == b[2]) { //bb3 411 // do a ton of stuff //bb4 412 // } 413 // } 414 // } 415 // 416 // This is: 417 // 418 // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+ 419 // \ \ \ \ 420 // ne ne ne \ 421 // \ \ \ v 422 // +------------+-----------+----------> bb_phi 423 // 424 // We can only merge the first two comparisons, because bb3* does 425 // "other work" (setting some_value to 3). 426 // We could still merge bb1 and bb2 though. 427 return; 428 } 429 enqueueBlock(Comparisons, Comparison); 430 } 431 432 // It is possible we have no suitable comparison to merge. 433 if (Comparisons.empty()) { 434 LLVM_DEBUG(dbgs() << "chain with no BCE basic blocks, no merge\n"); 435 return; 436 } 437 EntryBlock_ = Comparisons[0].BB; 438 Comparisons_ = std::move(Comparisons); 439 #ifdef MERGEICMPS_DOT_ON 440 errs() << "BEFORE REORDERING:\n\n"; 441 dump(); 442 #endif // MERGEICMPS_DOT_ON 443 // Reorder blocks by LHS. We can do that without changing the 444 // semantics because we are only accessing dereferencable memory. 445 llvm::sort(Comparisons_.begin(), Comparisons_.end(), 446 [](const BCECmpBlock &a, const BCECmpBlock &b) { 447 return a.Lhs() < b.Lhs(); 448 }); 449 #ifdef MERGEICMPS_DOT_ON 450 errs() << "AFTER REORDERING:\n\n"; 451 dump(); 452 #endif // MERGEICMPS_DOT_ON 453 } 454 455 #ifdef MERGEICMPS_DOT_ON 456 void BCECmpChain::dump() const { 457 errs() << "digraph dag {\n"; 458 errs() << " graph [bgcolor=transparent];\n"; 459 errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n"; 460 errs() << " edge [color=black];\n"; 461 for (size_t I = 0; I < Comparisons_.size(); ++I) { 462 const auto &Comparison = Comparisons_[I]; 463 errs() << " \"" << I << "\" [label=\"%" 464 << Comparison.Lhs().Base()->getName() << " + " 465 << Comparison.Lhs().Offset << " == %" 466 << Comparison.Rhs().Base()->getName() << " + " 467 << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8) 468 << " bytes)\"];\n"; 469 const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB); 470 if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n"; 471 errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n"; 472 } 473 errs() << " \"Phi\" [label=\"Phi\"];\n"; 474 errs() << "}\n\n"; 475 } 476 #endif // MERGEICMPS_DOT_ON 477 478 bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) { 479 // First pass to check if there is at least one merge. If not, we don't do 480 // anything and we keep analysis passes intact. 481 { 482 bool AtLeastOneMerged = false; 483 for (size_t I = 1; I < Comparisons_.size(); ++I) { 484 if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) { 485 AtLeastOneMerged = true; 486 break; 487 } 488 } 489 if (!AtLeastOneMerged) return false; 490 } 491 492 // Remove phi references to comparison blocks, they will be rebuilt as we 493 // merge the blocks. 494 for (const auto &Comparison : Comparisons_) { 495 Phi_.removeIncomingValue(Comparison.BB, false); 496 } 497 498 // If entry block is part of the chain, we need to make the first block 499 // of the chain the new entry block of the function. 500 BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock(); 501 for (size_t I = 1; I < Comparisons_.size(); ++I) { 502 if (Entry == Comparisons_[I].BB) { 503 BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "", 504 Entry->getParent(), Entry); 505 BranchInst::Create(Entry, NEntryBB); 506 break; 507 } 508 } 509 510 // Point the predecessors of the chain to the first comparison block (which is 511 // the new entry point) and update the entry block of the chain. 512 if (EntryBlock_ != Comparisons_[0].BB) { 513 EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB); 514 EntryBlock_ = Comparisons_[0].BB; 515 } 516 517 // Effectively merge blocks. 518 int NumMerged = 1; 519 for (size_t I = 1; I < Comparisons_.size(); ++I) { 520 if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) { 521 ++NumMerged; 522 } else { 523 // Merge all previous comparisons and start a new merge block. 524 mergeComparisons( 525 makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged), 526 Comparisons_[I].BB, Phi_, TLI); 527 NumMerged = 1; 528 } 529 } 530 mergeComparisons(makeArrayRef(Comparisons_) 531 .slice(Comparisons_.size() - NumMerged, NumMerged), 532 nullptr, Phi_, TLI); 533 534 return true; 535 } 536 537 void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons, 538 BasicBlock *const NextBBInChain, 539 PHINode &Phi, 540 const TargetLibraryInfo *const TLI) { 541 assert(!Comparisons.empty()); 542 const auto &FirstComparison = *Comparisons.begin(); 543 BasicBlock *const BB = FirstComparison.BB; 544 LLVMContext &Context = BB->getContext(); 545 546 if (Comparisons.size() >= 2) { 547 // If there is one block that requires splitting, we do it now, i.e. 548 // just before we know we will collapse the chain. The instructions 549 // can be executed before any of the instructions in the chain. 550 auto C = std::find_if(Comparisons.begin(), Comparisons.end(), 551 [](const BCECmpBlock &B) { return B.RequireSplit; }); 552 if (C != Comparisons.end()) 553 C->split(EntryBlock_); 554 555 LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n"); 556 const auto TotalSize = 557 std::accumulate(Comparisons.begin(), Comparisons.end(), 0, 558 [](int Size, const BCECmpBlock &C) { 559 return Size + C.SizeBits(); 560 }) / 561 8; 562 563 // Incoming edges do not need to be updated, and both GEPs are already 564 // computing the right address, we just need to: 565 // - replace the two loads and the icmp with the memcmp 566 // - update the branch 567 // - update the incoming values in the phi. 568 FirstComparison.BranchI->eraseFromParent(); 569 FirstComparison.CmpI->eraseFromParent(); 570 FirstComparison.Lhs().LoadI->eraseFromParent(); 571 FirstComparison.Rhs().LoadI->eraseFromParent(); 572 573 IRBuilder<> Builder(BB); 574 const auto &DL = Phi.getModule()->getDataLayout(); 575 Value *const MemCmpCall = emitMemCmp( 576 FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, 577 ConstantInt::get(DL.getIntPtrType(Context), TotalSize), 578 Builder, DL, TLI); 579 Value *const MemCmpIsZero = Builder.CreateICmpEQ( 580 MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0)); 581 582 // Add a branch to the next basic block in the chain. 583 if (NextBBInChain) { 584 Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent()); 585 Phi.addIncoming(ConstantInt::getFalse(Context), BB); 586 } else { 587 Builder.CreateBr(Phi.getParent()); 588 Phi.addIncoming(MemCmpIsZero, BB); 589 } 590 591 // Delete merged blocks. 592 for (size_t I = 1; I < Comparisons.size(); ++I) { 593 BasicBlock *CBB = Comparisons[I].BB; 594 CBB->replaceAllUsesWith(BB); 595 CBB->eraseFromParent(); 596 } 597 } else { 598 assert(Comparisons.size() == 1); 599 // There are no blocks to merge, but we still need to update the branches. 600 LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); 601 if (NextBBInChain) { 602 if (FirstComparison.BranchI->isConditional()) { 603 LLVM_DEBUG(dbgs() << "conditional -> conditional\n"); 604 // Just update the "true" target, the "false" target should already be 605 // the phi block. 606 assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent()); 607 FirstComparison.BranchI->setSuccessor(0, NextBBInChain); 608 Phi.addIncoming(ConstantInt::getFalse(Context), BB); 609 } else { 610 LLVM_DEBUG(dbgs() << "unconditional -> conditional\n"); 611 // Replace the unconditional branch by a conditional one. 612 FirstComparison.BranchI->eraseFromParent(); 613 IRBuilder<> Builder(BB); 614 Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain, 615 Phi.getParent()); 616 Phi.addIncoming(FirstComparison.CmpI, BB); 617 } 618 } else { 619 if (FirstComparison.BranchI->isConditional()) { 620 LLVM_DEBUG(dbgs() << "conditional -> unconditional\n"); 621 // Replace the conditional branch by an unconditional one. 622 FirstComparison.BranchI->eraseFromParent(); 623 IRBuilder<> Builder(BB); 624 Builder.CreateBr(Phi.getParent()); 625 Phi.addIncoming(FirstComparison.CmpI, BB); 626 } else { 627 LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n"); 628 Phi.addIncoming(FirstComparison.CmpI, BB); 629 } 630 } 631 } 632 } 633 634 std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi, 635 BasicBlock *const LastBlock, 636 int NumBlocks) { 637 // Walk up from the last block to find other blocks. 638 std::vector<BasicBlock *> Blocks(NumBlocks); 639 assert(LastBlock && "invalid last block"); 640 BasicBlock *CurBlock = LastBlock; 641 for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) { 642 if (CurBlock->hasAddressTaken()) { 643 // Somebody is jumping to the block through an address, all bets are 644 // off. 645 LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex 646 << " has its address taken\n"); 647 return {}; 648 } 649 Blocks[BlockIndex] = CurBlock; 650 auto *SinglePredecessor = CurBlock->getSinglePredecessor(); 651 if (!SinglePredecessor) { 652 // The block has two or more predecessors. 653 LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex 654 << " has two or more predecessors\n"); 655 return {}; 656 } 657 if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) { 658 // The block does not link back to the phi. 659 LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex 660 << " does not link back to the phi\n"); 661 return {}; 662 } 663 CurBlock = SinglePredecessor; 664 } 665 Blocks[0] = CurBlock; 666 return Blocks; 667 } 668 669 bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) { 670 LLVM_DEBUG(dbgs() << "processPhi()\n"); 671 if (Phi.getNumIncomingValues() <= 1) { 672 LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n"); 673 return false; 674 } 675 // We are looking for something that has the following structure: 676 // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+ 677 // \ \ \ \ 678 // ne ne ne \ 679 // \ \ \ v 680 // +------------+-----------+----------> bb_phi 681 // 682 // - The last basic block (bb4 here) must branch unconditionally to bb_phi. 683 // It's the only block that contributes a non-constant value to the Phi. 684 // - All other blocks (b1, b2, b3) must have exactly two successors, one of 685 // them being the phi block. 686 // - All intermediate blocks (bb2, bb3) must have only one predecessor. 687 // - Blocks cannot do other work besides the comparison, see doesOtherWork() 688 689 // The blocks are not necessarily ordered in the phi, so we start from the 690 // last block and reconstruct the order. 691 BasicBlock *LastBlock = nullptr; 692 for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) { 693 if (isa<ConstantInt>(Phi.getIncomingValue(I))) continue; 694 if (LastBlock) { 695 // There are several non-constant values. 696 LLVM_DEBUG(dbgs() << "skip: several non-constant values\n"); 697 return false; 698 } 699 if (!isa<ICmpInst>(Phi.getIncomingValue(I)) || 700 cast<ICmpInst>(Phi.getIncomingValue(I))->getParent() != 701 Phi.getIncomingBlock(I)) { 702 // Non-constant incoming value is not from a cmp instruction or not 703 // produced by the last block. We could end up processing the value 704 // producing block more than once. 705 // 706 // This is an uncommon case, so we bail. 707 LLVM_DEBUG( 708 dbgs() 709 << "skip: non-constant value not from cmp or not from last block.\n"); 710 return false; 711 } 712 LastBlock = Phi.getIncomingBlock(I); 713 } 714 if (!LastBlock) { 715 // There is no non-constant block. 716 LLVM_DEBUG(dbgs() << "skip: no non-constant block\n"); 717 return false; 718 } 719 if (LastBlock->getSingleSuccessor() != Phi.getParent()) { 720 LLVM_DEBUG(dbgs() << "skip: last block non-phi successor\n"); 721 return false; 722 } 723 724 const auto Blocks = 725 getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues()); 726 if (Blocks.empty()) return false; 727 BCECmpChain CmpChain(Blocks, Phi); 728 729 if (CmpChain.size() < 2) { 730 LLVM_DEBUG(dbgs() << "skip: only one compare block\n"); 731 return false; 732 } 733 734 return CmpChain.simplify(TLI); 735 } 736 737 class MergeICmps : public FunctionPass { 738 public: 739 static char ID; 740 741 MergeICmps() : FunctionPass(ID) { 742 initializeMergeICmpsPass(*PassRegistry::getPassRegistry()); 743 } 744 745 bool runOnFunction(Function &F) override { 746 if (skipFunction(F)) return false; 747 const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 748 const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 749 auto PA = runImpl(F, &TLI, &TTI); 750 return !PA.areAllPreserved(); 751 } 752 753 private: 754 void getAnalysisUsage(AnalysisUsage &AU) const override { 755 AU.addRequired<TargetLibraryInfoWrapperPass>(); 756 AU.addRequired<TargetTransformInfoWrapperPass>(); 757 } 758 759 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, 760 const TargetTransformInfo *TTI); 761 }; 762 763 PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI, 764 const TargetTransformInfo *TTI) { 765 LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); 766 767 // We only try merging comparisons if the target wants to expand memcmp later. 768 // The rationale is to avoid turning small chains into memcmp calls. 769 if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all(); 770 771 // If we don't have memcmp avaiable we can't emit calls to it. 772 if (!TLI->has(LibFunc_memcmp)) 773 return PreservedAnalyses::all(); 774 775 bool MadeChange = false; 776 777 for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { 778 // A Phi operation is always first in a basic block. 779 if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin())) 780 MadeChange |= processPhi(*Phi, TLI); 781 } 782 783 if (MadeChange) return PreservedAnalyses::none(); 784 return PreservedAnalyses::all(); 785 } 786 787 } // namespace 788 789 char MergeICmps::ID = 0; 790 INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", 791 "Merge contiguous icmps into a memcmp", false, false) 792 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 793 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 794 INITIALIZE_PASS_END(MergeICmps, "mergeicmps", 795 "Merge contiguous icmps into a memcmp", false, false) 796 797 Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); } 798