10b57cec5SDimitry Andric //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass turns chains of integer comparisons into memcmp (the memcmp is 100b57cec5SDimitry Andric // later typically inlined as a chain of efficient hardware comparisons). This 110b57cec5SDimitry Andric // typically benefits c++ member or nonmember operator==(). 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The basic idea is to replace a longer chain of integer comparisons loaded 140b57cec5SDimitry Andric // from contiguous memory locations into a shorter chain of larger integer 150b57cec5SDimitry Andric // comparisons. Benefits are double: 160b57cec5SDimitry Andric // - There are less jumps, and therefore less opportunities for mispredictions 170b57cec5SDimitry Andric // and I-cache misses. 180b57cec5SDimitry Andric // - Code size is smaller, both because jumps are removed and because the 190b57cec5SDimitry Andric // encoding of a 2*n byte compare is smaller than that of two n-byte 200b57cec5SDimitry Andric // compares. 210b57cec5SDimitry Andric // 220b57cec5SDimitry Andric // Example: 230b57cec5SDimitry Andric // 240b57cec5SDimitry Andric // struct S { 250b57cec5SDimitry Andric // int a; 260b57cec5SDimitry Andric // char b; 270b57cec5SDimitry Andric // char c; 280b57cec5SDimitry Andric // uint16_t d; 290b57cec5SDimitry Andric // bool operator==(const S& o) const { 300b57cec5SDimitry Andric // return a == o.a && b == o.b && c == o.c && d == o.d; 310b57cec5SDimitry Andric // } 320b57cec5SDimitry Andric // }; 330b57cec5SDimitry Andric // 340b57cec5SDimitry Andric // Is optimized as : 350b57cec5SDimitry Andric // 360b57cec5SDimitry Andric // bool S::operator==(const S& o) const { 370b57cec5SDimitry Andric // return memcmp(this, &o, 8) == 0; 380b57cec5SDimitry Andric // } 390b57cec5SDimitry Andric // 400b57cec5SDimitry Andric // Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp. 410b57cec5SDimitry Andric // 420b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/MergeICmps.h" 4506c3fb27SDimitry Andric #include "llvm/ADT/SmallString.h" 460b57cec5SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 470b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h" 480b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h" 490b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 500b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 510b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 520b57cec5SDimitry Andric #include "llvm/IR/Function.h" 5306c3fb27SDimitry Andric #include "llvm/IR/Instruction.h" 540b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 55480093f4SDimitry Andric #include "llvm/InitializePasses.h" 560b57cec5SDimitry Andric #include "llvm/Pass.h" 570b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 580b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 590b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BuildLibCalls.h" 600b57cec5SDimitry Andric #include <algorithm> 610b57cec5SDimitry Andric #include <numeric> 620b57cec5SDimitry Andric #include <utility> 630b57cec5SDimitry Andric #include <vector> 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric using namespace llvm; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric namespace { 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric #define DEBUG_TYPE "mergeicmps" 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric // A BCE atom "Binary Compare Expression Atom" represents an integer load 720b57cec5SDimitry Andric // that is a constant offset from a base value, e.g. `a` or `o.c` in the example 730b57cec5SDimitry Andric // at the top. 740b57cec5SDimitry Andric struct BCEAtom { 750b57cec5SDimitry Andric BCEAtom() = default; 760b57cec5SDimitry Andric BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset) 77*0fca6ea1SDimitry Andric : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(std::move(Offset)) {} 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric BCEAtom(const BCEAtom &) = delete; 800b57cec5SDimitry Andric BCEAtom &operator=(const BCEAtom &) = delete; 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric BCEAtom(BCEAtom &&that) = default; 830b57cec5SDimitry Andric BCEAtom &operator=(BCEAtom &&that) { 840b57cec5SDimitry Andric if (this == &that) 850b57cec5SDimitry Andric return *this; 860b57cec5SDimitry Andric GEP = that.GEP; 870b57cec5SDimitry Andric LoadI = that.LoadI; 880b57cec5SDimitry Andric BaseId = that.BaseId; 890b57cec5SDimitry Andric Offset = std::move(that.Offset); 900b57cec5SDimitry Andric return *this; 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric // We want to order BCEAtoms by (Base, Offset). However we cannot use 940b57cec5SDimitry Andric // the pointer values for Base because these are non-deterministic. 950b57cec5SDimitry Andric // To make sure that the sort order is stable, we first assign to each atom 960b57cec5SDimitry Andric // base value an index based on its order of appearance in the chain of 970b57cec5SDimitry Andric // comparisons. We call this index `BaseOrdering`. For example, for: 980b57cec5SDimitry Andric // b[3] == c[2] && a[1] == d[1] && b[4] == c[3] 990b57cec5SDimitry Andric // | block 1 | | block 2 | | block 3 | 1000b57cec5SDimitry Andric // b gets assigned index 0 and a index 1, because b appears as LHS in block 1, 1010b57cec5SDimitry Andric // which is before block 2. 1020b57cec5SDimitry Andric // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable. 1030b57cec5SDimitry Andric bool operator<(const BCEAtom &O) const { 1040b57cec5SDimitry Andric return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric GetElementPtrInst *GEP = nullptr; 1080b57cec5SDimitry Andric LoadInst *LoadI = nullptr; 1090b57cec5SDimitry Andric unsigned BaseId = 0; 1100b57cec5SDimitry Andric APInt Offset; 1110b57cec5SDimitry Andric }; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric // A class that assigns increasing ids to values in the order in which they are 1140b57cec5SDimitry Andric // seen. See comment in `BCEAtom::operator<()``. 1150b57cec5SDimitry Andric class BaseIdentifier { 1160b57cec5SDimitry Andric public: 1170b57cec5SDimitry Andric // Returns the id for value `Base`, after assigning one if `Base` has not been 1180b57cec5SDimitry Andric // seen before. 1190b57cec5SDimitry Andric int getBaseId(const Value *Base) { 1200b57cec5SDimitry Andric assert(Base && "invalid base"); 1210b57cec5SDimitry Andric const auto Insertion = BaseToIndex.try_emplace(Base, Order); 1220b57cec5SDimitry Andric if (Insertion.second) 1230b57cec5SDimitry Andric ++Order; 1240b57cec5SDimitry Andric return Insertion.first->second; 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric private: 1280b57cec5SDimitry Andric unsigned Order = 1; 1290b57cec5SDimitry Andric DenseMap<const Value*, int> BaseToIndex; 1300b57cec5SDimitry Andric }; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric // If this value is a load from a constant offset w.r.t. a base address, and 1330b57cec5SDimitry Andric // there are no other users of the load or address, returns the base address and 1340b57cec5SDimitry Andric // the offset. 1350b57cec5SDimitry Andric BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { 1360b57cec5SDimitry Andric auto *const LoadI = dyn_cast<LoadInst>(Val); 1370b57cec5SDimitry Andric if (!LoadI) 1380b57cec5SDimitry Andric return {}; 1390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "load\n"); 1400b57cec5SDimitry Andric if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) { 1410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "used outside of block\n"); 1420b57cec5SDimitry Andric return {}; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric // Do not optimize atomic loads to non-atomic memcmp 1450b57cec5SDimitry Andric if (!LoadI->isSimple()) { 1460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "volatile or atomic\n"); 1470b57cec5SDimitry Andric return {}; 1480b57cec5SDimitry Andric } 14981ad6265SDimitry Andric Value *Addr = LoadI->getOperand(0); 150349cc55cSDimitry Andric if (Addr->getType()->getPointerAddressSpace() != 0) { 151349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n"); 152349cc55cSDimitry Andric return {}; 153349cc55cSDimitry Andric } 154*0fca6ea1SDimitry Andric const auto &DL = LoadI->getDataLayout(); 15581ad6265SDimitry Andric if (!isDereferenceablePointer(Addr, LoadI->getType(), DL)) { 1560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "not dereferenceable\n"); 1570b57cec5SDimitry Andric // We need to make sure that we can do comparison in any order, so we 158bdd1243dSDimitry Andric // require memory to be unconditionally dereferenceable. 1590b57cec5SDimitry Andric return {}; 1600b57cec5SDimitry Andric } 16181ad6265SDimitry Andric 16206c3fb27SDimitry Andric APInt Offset = APInt(DL.getIndexTypeSizeInBits(Addr->getType()), 0); 16381ad6265SDimitry Andric Value *Base = Addr; 16481ad6265SDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(Addr); 16581ad6265SDimitry Andric if (GEP) { 16681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "GEP\n"); 16781ad6265SDimitry Andric if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) { 16881ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "used outside of block\n"); 16981ad6265SDimitry Andric return {}; 17081ad6265SDimitry Andric } 1710b57cec5SDimitry Andric if (!GEP->accumulateConstantOffset(DL, Offset)) 1720b57cec5SDimitry Andric return {}; 17381ad6265SDimitry Andric Base = GEP->getPointerOperand(); 17481ad6265SDimitry Andric } 17581ad6265SDimitry Andric return BCEAtom(GEP, LoadI, BaseId.getBaseId(Base), Offset); 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric 178fe6060f1SDimitry Andric // A comparison between two BCE atoms, e.g. `a == o.a` in the example at the 179fe6060f1SDimitry Andric // top. 1800b57cec5SDimitry Andric // Note: the terminology is misleading: the comparison is symmetric, so there 1810b57cec5SDimitry Andric // is no real {l/r}hs. What we want though is to have the same base on the 1820b57cec5SDimitry Andric // left (resp. right), so that we can detect consecutive loads. To ensure this 1830b57cec5SDimitry Andric // we put the smallest atom on the left. 184fe6060f1SDimitry Andric struct BCECmp { 185fe6060f1SDimitry Andric BCEAtom Lhs; 186fe6060f1SDimitry Andric BCEAtom Rhs; 187fe6060f1SDimitry Andric int SizeBits; 188fe6060f1SDimitry Andric const ICmpInst *CmpI; 189fe6060f1SDimitry Andric 190fe6060f1SDimitry Andric BCECmp(BCEAtom L, BCEAtom R, int SizeBits, const ICmpInst *CmpI) 191fe6060f1SDimitry Andric : Lhs(std::move(L)), Rhs(std::move(R)), SizeBits(SizeBits), CmpI(CmpI) { 192fe6060f1SDimitry Andric if (Rhs < Lhs) std::swap(Rhs, Lhs); 193fe6060f1SDimitry Andric } 194fe6060f1SDimitry Andric }; 195fe6060f1SDimitry Andric 196fe6060f1SDimitry Andric // A basic block with a comparison between two BCE atoms. 197fe6060f1SDimitry Andric // The block might do extra work besides the atom comparison, in which case 198fe6060f1SDimitry Andric // doesOtherWork() returns true. Under some conditions, the block can be 199fe6060f1SDimitry Andric // split into the atom comparison part and the "other work" part 200fe6060f1SDimitry Andric // (see canSplit()). 2010b57cec5SDimitry Andric class BCECmpBlock { 2020b57cec5SDimitry Andric public: 203fe6060f1SDimitry Andric typedef SmallDenseSet<const Instruction *, 8> InstructionSet; 2040b57cec5SDimitry Andric 205fe6060f1SDimitry Andric BCECmpBlock(BCECmp Cmp, BasicBlock *BB, InstructionSet BlockInsts) 206fe6060f1SDimitry Andric : BB(BB), BlockInsts(std::move(BlockInsts)), Cmp(std::move(Cmp)) {} 2070b57cec5SDimitry Andric 208fe6060f1SDimitry Andric const BCEAtom &Lhs() const { return Cmp.Lhs; } 209fe6060f1SDimitry Andric const BCEAtom &Rhs() const { return Cmp.Rhs; } 210fe6060f1SDimitry Andric int SizeBits() const { return Cmp.SizeBits; } 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric // Returns true if the block does other works besides comparison. 2130b57cec5SDimitry Andric bool doesOtherWork() const; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp 2160b57cec5SDimitry Andric // instructions in the block. 2170b57cec5SDimitry Andric bool canSplit(AliasAnalysis &AA) const; 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric // Return true if this all the relevant instructions in the BCE-cmp-block can 2200b57cec5SDimitry Andric // be sunk below this instruction. By doing this, we know we can separate the 2210b57cec5SDimitry Andric // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the 2220b57cec5SDimitry Andric // block. 223fe6060f1SDimitry Andric bool canSinkBCECmpInst(const Instruction *, AliasAnalysis &AA) const; 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block 2260b57cec5SDimitry Andric // instructions. Split the old block and move all non-BCE-cmp-insts into the 2270b57cec5SDimitry Andric // new parent block. 2280b57cec5SDimitry Andric void split(BasicBlock *NewParent, AliasAnalysis &AA) const; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric // The basic block where this comparison happens. 231fe6060f1SDimitry Andric BasicBlock *BB; 232fe6060f1SDimitry Andric // Instructions relating to the BCECmp and branch. 233fe6060f1SDimitry Andric InstructionSet BlockInsts; 2340b57cec5SDimitry Andric // The block requires splitting. 2350b57cec5SDimitry Andric bool RequireSplit = false; 236349cc55cSDimitry Andric // Original order of this block in the chain. 237349cc55cSDimitry Andric unsigned OrigOrder = 0; 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric private: 240fe6060f1SDimitry Andric BCECmp Cmp; 2410b57cec5SDimitry Andric }; 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst, 2440b57cec5SDimitry Andric AliasAnalysis &AA) const { 245fe6060f1SDimitry Andric // If this instruction may clobber the loads and is in middle of the BCE cmp 246fe6060f1SDimitry Andric // block instructions, then bail for now. 247fe6060f1SDimitry Andric if (Inst->mayWriteToMemory()) { 248349cc55cSDimitry Andric auto MayClobber = [&](LoadInst *LI) { 249349cc55cSDimitry Andric // If a potentially clobbering instruction comes before the load, 250349cc55cSDimitry Andric // we can still safely sink the load. 25181ad6265SDimitry Andric return (Inst->getParent() != LI->getParent() || !Inst->comesBefore(LI)) && 252349cc55cSDimitry Andric isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI))); 253349cc55cSDimitry Andric }; 254349cc55cSDimitry Andric if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI)) 2550b57cec5SDimitry Andric return false; 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric // Make sure this instruction does not use any of the BCE cmp block 2580b57cec5SDimitry Andric // instructions as operand. 259fe6060f1SDimitry Andric return llvm::none_of(Inst->operands(), [&](const Value *Op) { 260fe6060f1SDimitry Andric const Instruction *OpI = dyn_cast<Instruction>(Op); 261fe6060f1SDimitry Andric return OpI && BlockInsts.contains(OpI); 262fe6060f1SDimitry Andric }); 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const { 2660b57cec5SDimitry Andric llvm::SmallVector<Instruction *, 4> OtherInsts; 2670b57cec5SDimitry Andric for (Instruction &Inst : *BB) { 2680b57cec5SDimitry Andric if (BlockInsts.count(&Inst)) 2690b57cec5SDimitry Andric continue; 270fe6060f1SDimitry Andric assert(canSinkBCECmpInst(&Inst, AA) && "Split unsplittable block"); 2710b57cec5SDimitry Andric // This is a non-BCE-cmp-block instruction. And it can be separated 2720b57cec5SDimitry Andric // from the BCE-cmp-block instruction. 2730b57cec5SDimitry Andric OtherInsts.push_back(&Inst); 2740b57cec5SDimitry Andric } 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric // Do the actual spliting. 27781ad6265SDimitry Andric for (Instruction *Inst : reverse(OtherInsts)) 2785f757f3fSDimitry Andric Inst->moveBeforePreserving(*NewParent, NewParent->begin()); 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric bool BCECmpBlock::canSplit(AliasAnalysis &AA) const { 2820b57cec5SDimitry Andric for (Instruction &Inst : *BB) { 2830b57cec5SDimitry Andric if (!BlockInsts.count(&Inst)) { 284fe6060f1SDimitry Andric if (!canSinkBCECmpInst(&Inst, AA)) 2850b57cec5SDimitry Andric return false; 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric return true; 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric bool BCECmpBlock::doesOtherWork() const { 2920b57cec5SDimitry Andric // TODO(courbet): Can we allow some other things ? This is very conservative. 2930b57cec5SDimitry Andric // We might be able to get away with anything does not have any side 2940b57cec5SDimitry Andric // effects outside of the basic block. 2950b57cec5SDimitry Andric // Note: The GEPs and/or loads are not necessarily in the same block. 2960b57cec5SDimitry Andric for (const Instruction &Inst : *BB) { 2970b57cec5SDimitry Andric if (!BlockInsts.count(&Inst)) 2980b57cec5SDimitry Andric return true; 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric return false; 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric // Visit the given comparison. If this is a comparison between two valid 3040b57cec5SDimitry Andric // BCE atoms, returns the comparison. 305bdd1243dSDimitry Andric std::optional<BCECmp> visitICmp(const ICmpInst *const CmpI, 3060b57cec5SDimitry Andric const ICmpInst::Predicate ExpectedPredicate, 3070b57cec5SDimitry Andric BaseIdentifier &BaseId) { 3080b57cec5SDimitry Andric // The comparison can only be used once: 3090b57cec5SDimitry Andric // - For intermediate blocks, as a branch condition. 3100b57cec5SDimitry Andric // - For the final block, as an incoming value for the Phi. 3110b57cec5SDimitry Andric // If there are any other uses of the comparison, we cannot merge it with 3120b57cec5SDimitry Andric // other comparisons as we would create an orphan use of the value. 3130b57cec5SDimitry Andric if (!CmpI->hasOneUse()) { 3140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "cmp has several uses\n"); 315bdd1243dSDimitry Andric return std::nullopt; 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric if (CmpI->getPredicate() != ExpectedPredicate) 318bdd1243dSDimitry Andric return std::nullopt; 3190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "cmp " 3200b57cec5SDimitry Andric << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne") 3210b57cec5SDimitry Andric << "\n"); 3220b57cec5SDimitry Andric auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId); 3230b57cec5SDimitry Andric if (!Lhs.BaseId) 324bdd1243dSDimitry Andric return std::nullopt; 3250b57cec5SDimitry Andric auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId); 3260b57cec5SDimitry Andric if (!Rhs.BaseId) 327bdd1243dSDimitry Andric return std::nullopt; 328*0fca6ea1SDimitry Andric const auto &DL = CmpI->getDataLayout(); 329fe6060f1SDimitry Andric return BCECmp(std::move(Lhs), std::move(Rhs), 330fe6060f1SDimitry Andric DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()), CmpI); 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric // Visit the given comparison block. If this is a comparison between two valid 3340b57cec5SDimitry Andric // BCE atoms, returns the comparison. 335bdd1243dSDimitry Andric std::optional<BCECmpBlock> visitCmpBlock(Value *const Val, 336bdd1243dSDimitry Andric BasicBlock *const Block, 3370b57cec5SDimitry Andric const BasicBlock *const PhiBlock, 3380b57cec5SDimitry Andric BaseIdentifier &BaseId) { 339bdd1243dSDimitry Andric if (Block->empty()) 340bdd1243dSDimitry Andric return std::nullopt; 3410b57cec5SDimitry Andric auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator()); 342bdd1243dSDimitry Andric if (!BranchI) 343bdd1243dSDimitry Andric return std::nullopt; 3440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "branch\n"); 345fe6060f1SDimitry Andric Value *Cond; 346fe6060f1SDimitry Andric ICmpInst::Predicate ExpectedPredicate; 3470b57cec5SDimitry Andric if (BranchI->isUnconditional()) { 3480b57cec5SDimitry Andric // In this case, we expect an incoming value which is the result of the 3490b57cec5SDimitry Andric // comparison. This is the last link in the chain of comparisons (note 3500b57cec5SDimitry Andric // that this does not mean that this is the last incoming value, blocks 3510b57cec5SDimitry Andric // can be reordered). 352fe6060f1SDimitry Andric Cond = Val; 353fe6060f1SDimitry Andric ExpectedPredicate = ICmpInst::ICMP_EQ; 3540b57cec5SDimitry Andric } else { 3550b57cec5SDimitry Andric // In this case, we expect a constant incoming value (the comparison is 3560b57cec5SDimitry Andric // chained). 357e8d8bef9SDimitry Andric const auto *const Const = cast<ConstantInt>(Val); 3580b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "const\n"); 359bdd1243dSDimitry Andric if (!Const->isZero()) 360bdd1243dSDimitry Andric return std::nullopt; 3610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "false\n"); 3620b57cec5SDimitry Andric assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch"); 3630b57cec5SDimitry Andric BasicBlock *const FalseBlock = BranchI->getSuccessor(1); 364fe6060f1SDimitry Andric Cond = BranchI->getCondition(); 365fe6060f1SDimitry Andric ExpectedPredicate = 366fe6060f1SDimitry Andric FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; 3670b57cec5SDimitry Andric } 368fe6060f1SDimitry Andric 369fe6060f1SDimitry Andric auto *CmpI = dyn_cast<ICmpInst>(Cond); 370bdd1243dSDimitry Andric if (!CmpI) 371bdd1243dSDimitry Andric return std::nullopt; 372fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "icmp\n"); 373fe6060f1SDimitry Andric 374bdd1243dSDimitry Andric std::optional<BCECmp> Result = visitICmp(CmpI, ExpectedPredicate, BaseId); 375fe6060f1SDimitry Andric if (!Result) 376bdd1243dSDimitry Andric return std::nullopt; 377fe6060f1SDimitry Andric 378fe6060f1SDimitry Andric BCECmpBlock::InstructionSet BlockInsts( 37981ad6265SDimitry Andric {Result->Lhs.LoadI, Result->Rhs.LoadI, Result->CmpI, BranchI}); 38081ad6265SDimitry Andric if (Result->Lhs.GEP) 38181ad6265SDimitry Andric BlockInsts.insert(Result->Lhs.GEP); 38281ad6265SDimitry Andric if (Result->Rhs.GEP) 38381ad6265SDimitry Andric BlockInsts.insert(Result->Rhs.GEP); 384fe6060f1SDimitry Andric return BCECmpBlock(std::move(*Result), Block, BlockInsts); 3850b57cec5SDimitry Andric } 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons, 3880b57cec5SDimitry Andric BCECmpBlock &&Comparison) { 3890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName() 3900b57cec5SDimitry Andric << "': Found cmp of " << Comparison.SizeBits() 3910b57cec5SDimitry Andric << " bits between " << Comparison.Lhs().BaseId << " + " 3920b57cec5SDimitry Andric << Comparison.Lhs().Offset << " and " 3930b57cec5SDimitry Andric << Comparison.Rhs().BaseId << " + " 3940b57cec5SDimitry Andric << Comparison.Rhs().Offset << "\n"); 3950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 396349cc55cSDimitry Andric Comparison.OrigOrder = Comparisons.size(); 3970b57cec5SDimitry Andric Comparisons.push_back(std::move(Comparison)); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric // A chain of comparisons. 4010b57cec5SDimitry Andric class BCECmpChain { 4020b57cec5SDimitry Andric public: 403349cc55cSDimitry Andric using ContiguousBlocks = std::vector<BCECmpBlock>; 404349cc55cSDimitry Andric 4050b57cec5SDimitry Andric BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi, 4060b57cec5SDimitry Andric AliasAnalysis &AA); 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA, 4090b57cec5SDimitry Andric DomTreeUpdater &DTU); 4100b57cec5SDimitry Andric 411349cc55cSDimitry Andric bool atLeastOneMerged() const { 412349cc55cSDimitry Andric return any_of(MergedBlocks_, 413349cc55cSDimitry Andric [](const auto &Blocks) { return Blocks.size() > 1; }); 414349cc55cSDimitry Andric } 415349cc55cSDimitry Andric 4160b57cec5SDimitry Andric private: 417349cc55cSDimitry Andric PHINode &Phi_; 418349cc55cSDimitry Andric // The list of all blocks in the chain, grouped by contiguity. 419349cc55cSDimitry Andric std::vector<ContiguousBlocks> MergedBlocks_; 420349cc55cSDimitry Andric // The original entry block (before sorting); 421349cc55cSDimitry Andric BasicBlock *EntryBlock_; 422349cc55cSDimitry Andric }; 423349cc55cSDimitry Andric 424349cc55cSDimitry Andric static bool areContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) { 4250b57cec5SDimitry Andric return First.Lhs().BaseId == Second.Lhs().BaseId && 4260b57cec5SDimitry Andric First.Rhs().BaseId == Second.Rhs().BaseId && 4270b57cec5SDimitry Andric First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset && 4280b57cec5SDimitry Andric First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 431349cc55cSDimitry Andric static unsigned getMinOrigOrder(const BCECmpChain::ContiguousBlocks &Blocks) { 432349cc55cSDimitry Andric unsigned MinOrigOrder = std::numeric_limits<unsigned>::max(); 433349cc55cSDimitry Andric for (const BCECmpBlock &Block : Blocks) 434349cc55cSDimitry Andric MinOrigOrder = std::min(MinOrigOrder, Block.OrigOrder); 435349cc55cSDimitry Andric return MinOrigOrder; 436349cc55cSDimitry Andric } 437349cc55cSDimitry Andric 438349cc55cSDimitry Andric /// Given a chain of comparison blocks, groups the blocks into contiguous 439349cc55cSDimitry Andric /// ranges that can be merged together into a single comparison. 440349cc55cSDimitry Andric static std::vector<BCECmpChain::ContiguousBlocks> 441349cc55cSDimitry Andric mergeBlocks(std::vector<BCECmpBlock> &&Blocks) { 442349cc55cSDimitry Andric std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks; 443349cc55cSDimitry Andric 444349cc55cSDimitry Andric // Sort to detect continuous offsets. 445349cc55cSDimitry Andric llvm::sort(Blocks, 446349cc55cSDimitry Andric [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) { 447349cc55cSDimitry Andric return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) < 448349cc55cSDimitry Andric std::tie(RhsBlock.Lhs(), RhsBlock.Rhs()); 449349cc55cSDimitry Andric }); 450349cc55cSDimitry Andric 451349cc55cSDimitry Andric BCECmpChain::ContiguousBlocks *LastMergedBlock = nullptr; 452349cc55cSDimitry Andric for (BCECmpBlock &Block : Blocks) { 453349cc55cSDimitry Andric if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) { 454349cc55cSDimitry Andric MergedBlocks.emplace_back(); 455349cc55cSDimitry Andric LastMergedBlock = &MergedBlocks.back(); 456349cc55cSDimitry Andric } else { 457349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "Merging block " << Block.BB->getName() << " into " 458349cc55cSDimitry Andric << LastMergedBlock->back().BB->getName() << "\n"); 459349cc55cSDimitry Andric } 460349cc55cSDimitry Andric LastMergedBlock->push_back(std::move(Block)); 461349cc55cSDimitry Andric } 462349cc55cSDimitry Andric 463349cc55cSDimitry Andric // While we allow reordering for merging, do not reorder unmerged comparisons. 464349cc55cSDimitry Andric // Doing so may introduce branch on poison. 465349cc55cSDimitry Andric llvm::sort(MergedBlocks, [](const BCECmpChain::ContiguousBlocks &LhsBlocks, 466349cc55cSDimitry Andric const BCECmpChain::ContiguousBlocks &RhsBlocks) { 467349cc55cSDimitry Andric return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks); 468349cc55cSDimitry Andric }); 469349cc55cSDimitry Andric 470349cc55cSDimitry Andric return MergedBlocks; 471349cc55cSDimitry Andric } 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi, 4740b57cec5SDimitry Andric AliasAnalysis &AA) 4750b57cec5SDimitry Andric : Phi_(Phi) { 4760b57cec5SDimitry Andric assert(!Blocks.empty() && "a chain should have at least one block"); 4770b57cec5SDimitry Andric // Now look inside blocks to check for BCE comparisons. 4780b57cec5SDimitry Andric std::vector<BCECmpBlock> Comparisons; 4790b57cec5SDimitry Andric BaseIdentifier BaseId; 480fe6060f1SDimitry Andric for (BasicBlock *const Block : Blocks) { 4810b57cec5SDimitry Andric assert(Block && "invalid block"); 482bdd1243dSDimitry Andric std::optional<BCECmpBlock> Comparison = visitCmpBlock( 483fe6060f1SDimitry Andric Phi.getIncomingValueForBlock(Block), Block, Phi.getParent(), BaseId); 484fe6060f1SDimitry Andric if (!Comparison) { 4850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n"); 4860b57cec5SDimitry Andric return; 4870b57cec5SDimitry Andric } 488fe6060f1SDimitry Andric if (Comparison->doesOtherWork()) { 489fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "block '" << Comparison->BB->getName() 4900b57cec5SDimitry Andric << "' does extra work besides compare\n"); 4910b57cec5SDimitry Andric if (Comparisons.empty()) { 4920b57cec5SDimitry Andric // This is the initial block in the chain, in case this block does other 4930b57cec5SDimitry Andric // work, we can try to split the block and move the irrelevant 4940b57cec5SDimitry Andric // instructions to the predecessor. 4950b57cec5SDimitry Andric // 4960b57cec5SDimitry Andric // If this is not the initial block in the chain, splitting it wont 4970b57cec5SDimitry Andric // work. 4980b57cec5SDimitry Andric // 4990b57cec5SDimitry Andric // As once split, there will still be instructions before the BCE cmp 5000b57cec5SDimitry Andric // instructions that do other work in program order, i.e. within the 5010b57cec5SDimitry Andric // chain before sorting. Unless we can abort the chain at this point 5020b57cec5SDimitry Andric // and start anew. 5030b57cec5SDimitry Andric // 5040b57cec5SDimitry Andric // NOTE: we only handle blocks a with single predecessor for now. 505fe6060f1SDimitry Andric if (Comparison->canSplit(AA)) { 5060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 507fe6060f1SDimitry Andric << "Split initial block '" << Comparison->BB->getName() 5080b57cec5SDimitry Andric << "' that does extra work besides compare\n"); 509fe6060f1SDimitry Andric Comparison->RequireSplit = true; 510fe6060f1SDimitry Andric enqueueBlock(Comparisons, std::move(*Comparison)); 5110b57cec5SDimitry Andric } else { 5120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 513fe6060f1SDimitry Andric << "ignoring initial block '" << Comparison->BB->getName() 5140b57cec5SDimitry Andric << "' that does extra work besides compare\n"); 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric continue; 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric // TODO(courbet): Right now we abort the whole chain. We could be 5190b57cec5SDimitry Andric // merging only the blocks that don't do other work and resume the 5200b57cec5SDimitry Andric // chain from there. For example: 5210b57cec5SDimitry Andric // if (a[0] == b[0]) { // bb1 5220b57cec5SDimitry Andric // if (a[1] == b[1]) { // bb2 5230b57cec5SDimitry Andric // some_value = 3; //bb3 5240b57cec5SDimitry Andric // if (a[2] == b[2]) { //bb3 5250b57cec5SDimitry Andric // do a ton of stuff //bb4 5260b57cec5SDimitry Andric // } 5270b57cec5SDimitry Andric // } 5280b57cec5SDimitry Andric // } 5290b57cec5SDimitry Andric // 5300b57cec5SDimitry Andric // This is: 5310b57cec5SDimitry Andric // 5320b57cec5SDimitry Andric // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+ 5330b57cec5SDimitry Andric // \ \ \ \ 5340b57cec5SDimitry Andric // ne ne ne \ 5350b57cec5SDimitry Andric // \ \ \ v 5360b57cec5SDimitry Andric // +------------+-----------+----------> bb_phi 5370b57cec5SDimitry Andric // 5380b57cec5SDimitry Andric // We can only merge the first two comparisons, because bb3* does 5390b57cec5SDimitry Andric // "other work" (setting some_value to 3). 5400b57cec5SDimitry Andric // We could still merge bb1 and bb2 though. 5410b57cec5SDimitry Andric return; 5420b57cec5SDimitry Andric } 543fe6060f1SDimitry Andric enqueueBlock(Comparisons, std::move(*Comparison)); 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric // It is possible we have no suitable comparison to merge. 5470b57cec5SDimitry Andric if (Comparisons.empty()) { 5480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "chain with no BCE basic blocks, no merge\n"); 5490b57cec5SDimitry Andric return; 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric EntryBlock_ = Comparisons[0].BB; 552349cc55cSDimitry Andric MergedBlocks_ = mergeBlocks(std::move(Comparisons)); 5530b57cec5SDimitry Andric } 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric namespace { 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric // A class to compute the name of a set of merged basic blocks. 5580b57cec5SDimitry Andric // This is optimized for the common case of no block names. 5590b57cec5SDimitry Andric class MergedBlockName { 5600b57cec5SDimitry Andric // Storage for the uncommon case of several named blocks. 5610b57cec5SDimitry Andric SmallString<16> Scratch; 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric public: 5640b57cec5SDimitry Andric explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons) 5650b57cec5SDimitry Andric : Name(makeName(Comparisons)) {} 5660b57cec5SDimitry Andric const StringRef Name; 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric private: 5690b57cec5SDimitry Andric StringRef makeName(ArrayRef<BCECmpBlock> Comparisons) { 5700b57cec5SDimitry Andric assert(!Comparisons.empty() && "no basic block"); 5710b57cec5SDimitry Andric // Fast path: only one block, or no names at all. 5720b57cec5SDimitry Andric if (Comparisons.size() == 1) 5730b57cec5SDimitry Andric return Comparisons[0].BB->getName(); 5740b57cec5SDimitry Andric const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0, 5750b57cec5SDimitry Andric [](int i, const BCECmpBlock &Cmp) { 5760b57cec5SDimitry Andric return i + Cmp.BB->getName().size(); 5770b57cec5SDimitry Andric }); 5780b57cec5SDimitry Andric if (size == 0) 5790b57cec5SDimitry Andric return StringRef("", 0); 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric // Slow path: at least two blocks, at least one block with a name. 5820b57cec5SDimitry Andric Scratch.clear(); 5830b57cec5SDimitry Andric // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for 5840b57cec5SDimitry Andric // separators. 5850b57cec5SDimitry Andric Scratch.reserve(size + Comparisons.size() - 1); 5860b57cec5SDimitry Andric const auto append = [this](StringRef str) { 5870b57cec5SDimitry Andric Scratch.append(str.begin(), str.end()); 5880b57cec5SDimitry Andric }; 5890b57cec5SDimitry Andric append(Comparisons[0].BB->getName()); 5900b57cec5SDimitry Andric for (int I = 1, E = Comparisons.size(); I < E; ++I) { 5910b57cec5SDimitry Andric const BasicBlock *const BB = Comparisons[I].BB; 5920b57cec5SDimitry Andric if (!BB->getName().empty()) { 5930b57cec5SDimitry Andric append("+"); 5940b57cec5SDimitry Andric append(BB->getName()); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric } 597fe6060f1SDimitry Andric return Scratch.str(); 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric }; 6000b57cec5SDimitry Andric } // namespace 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric // Merges the given contiguous comparison blocks into one memcmp block. 6030b57cec5SDimitry Andric static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons, 6040b57cec5SDimitry Andric BasicBlock *const InsertBefore, 6050b57cec5SDimitry Andric BasicBlock *const NextCmpBlock, 6060b57cec5SDimitry Andric PHINode &Phi, const TargetLibraryInfo &TLI, 6070b57cec5SDimitry Andric AliasAnalysis &AA, DomTreeUpdater &DTU) { 6080b57cec5SDimitry Andric assert(!Comparisons.empty() && "merging zero comparisons"); 6090b57cec5SDimitry Andric LLVMContext &Context = NextCmpBlock->getContext(); 6100b57cec5SDimitry Andric const BCECmpBlock &FirstCmp = Comparisons[0]; 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric // Create a new cmp block before next cmp block. 6130b57cec5SDimitry Andric BasicBlock *const BB = 6140b57cec5SDimitry Andric BasicBlock::Create(Context, MergedBlockName(Comparisons).Name, 6150b57cec5SDimitry Andric NextCmpBlock->getParent(), InsertBefore); 6160b57cec5SDimitry Andric IRBuilder<> Builder(BB); 6170b57cec5SDimitry Andric // Add the GEPs from the first BCECmpBlock. 61881ad6265SDimitry Andric Value *Lhs, *Rhs; 61981ad6265SDimitry Andric if (FirstCmp.Lhs().GEP) 62081ad6265SDimitry Andric Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone()); 62181ad6265SDimitry Andric else 62281ad6265SDimitry Andric Lhs = FirstCmp.Lhs().LoadI->getPointerOperand(); 62381ad6265SDimitry Andric if (FirstCmp.Rhs().GEP) 62481ad6265SDimitry Andric Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone()); 62581ad6265SDimitry Andric else 62681ad6265SDimitry Andric Rhs = FirstCmp.Rhs().LoadI->getPointerOperand(); 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric Value *IsEqual = nullptr; 6290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> " 6300b57cec5SDimitry Andric << BB->getName() << "\n"); 631e8d8bef9SDimitry Andric 632e8d8bef9SDimitry Andric // If there is one block that requires splitting, we do it now, i.e. 633e8d8bef9SDimitry Andric // just before we know we will collapse the chain. The instructions 634e8d8bef9SDimitry Andric // can be executed before any of the instructions in the chain. 635e8d8bef9SDimitry Andric const auto ToSplit = llvm::find_if( 636e8d8bef9SDimitry Andric Comparisons, [](const BCECmpBlock &B) { return B.RequireSplit; }); 637e8d8bef9SDimitry Andric if (ToSplit != Comparisons.end()) { 638e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); 639e8d8bef9SDimitry Andric ToSplit->split(BB, AA); 640e8d8bef9SDimitry Andric } 641e8d8bef9SDimitry Andric 6420b57cec5SDimitry Andric if (Comparisons.size() == 1) { 6430b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); 64406c3fb27SDimitry Andric // Use clone to keep the metadata 64506c3fb27SDimitry Andric Instruction *const LhsLoad = Builder.Insert(FirstCmp.Lhs().LoadI->clone()); 64606c3fb27SDimitry Andric Instruction *const RhsLoad = Builder.Insert(FirstCmp.Rhs().LoadI->clone()); 64706c3fb27SDimitry Andric LhsLoad->replaceUsesOfWith(LhsLoad->getOperand(0), Lhs); 64806c3fb27SDimitry Andric RhsLoad->replaceUsesOfWith(RhsLoad->getOperand(0), Rhs); 6490b57cec5SDimitry Andric // There are no blocks to merge, just do the comparison. 6500b57cec5SDimitry Andric IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad); 6510b57cec5SDimitry Andric } else { 6520b57cec5SDimitry Andric const unsigned TotalSizeBits = std::accumulate( 6530b57cec5SDimitry Andric Comparisons.begin(), Comparisons.end(), 0u, 6540b57cec5SDimitry Andric [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); }); 6550b57cec5SDimitry Andric 656bdd1243dSDimitry Andric // memcmp expects a 'size_t' argument and returns 'int'. 657bdd1243dSDimitry Andric unsigned SizeTBits = TLI.getSizeTSize(*Phi.getModule()); 658bdd1243dSDimitry Andric unsigned IntBits = TLI.getIntSize(); 659bdd1243dSDimitry Andric 6600b57cec5SDimitry Andric // Create memcmp() == 0. 661*0fca6ea1SDimitry Andric const auto &DL = Phi.getDataLayout(); 6620b57cec5SDimitry Andric Value *const MemCmpCall = emitMemCmp( 6630b57cec5SDimitry Andric Lhs, Rhs, 664bdd1243dSDimitry Andric ConstantInt::get(Builder.getIntNTy(SizeTBits), TotalSizeBits / 8), 665bdd1243dSDimitry Andric Builder, DL, &TLI); 6660b57cec5SDimitry Andric IsEqual = Builder.CreateICmpEQ( 667bdd1243dSDimitry Andric MemCmpCall, ConstantInt::get(Builder.getIntNTy(IntBits), 0)); 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric BasicBlock *const PhiBB = Phi.getParent(); 6710b57cec5SDimitry Andric // Add a branch to the next basic block in the chain. 6720b57cec5SDimitry Andric if (NextCmpBlock == PhiBB) { 6730b57cec5SDimitry Andric // Continue to phi, passing it the comparison result. 6740b57cec5SDimitry Andric Builder.CreateBr(PhiBB); 6750b57cec5SDimitry Andric Phi.addIncoming(IsEqual, BB); 6760b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}}); 6770b57cec5SDimitry Andric } else { 6780b57cec5SDimitry Andric // Continue to next block if equal, exit to phi else. 6790b57cec5SDimitry Andric Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB); 6800b57cec5SDimitry Andric Phi.addIncoming(ConstantInt::getFalse(Context), BB); 6810b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock}, 6820b57cec5SDimitry Andric {DominatorTree::Insert, BB, PhiBB}}); 6830b57cec5SDimitry Andric } 6840b57cec5SDimitry Andric return BB; 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA, 6880b57cec5SDimitry Andric DomTreeUpdater &DTU) { 689349cc55cSDimitry Andric assert(atLeastOneMerged() && "simplifying trivial BCECmpChain"); 6900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block " 6910b57cec5SDimitry Andric << EntryBlock_->getName() << "\n"); 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric // Effectively merge blocks. We go in the reverse direction from the phi block 6940b57cec5SDimitry Andric // so that the next block is always available to branch to. 695349cc55cSDimitry Andric BasicBlock *InsertBefore = EntryBlock_; 6960b57cec5SDimitry Andric BasicBlock *NextCmpBlock = Phi_.getParent(); 697349cc55cSDimitry Andric for (const auto &Blocks : reverse(MergedBlocks_)) { 698349cc55cSDimitry Andric InsertBefore = NextCmpBlock = mergeComparisons( 699349cc55cSDimitry Andric Blocks, InsertBefore, NextCmpBlock, Phi_, TLI, AA, DTU); 7000b57cec5SDimitry Andric } 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric // Replace the original cmp chain with the new cmp chain by pointing all 7030b57cec5SDimitry Andric // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp 7040b57cec5SDimitry Andric // blocks in the old chain unreachable. 7050b57cec5SDimitry Andric while (!pred_empty(EntryBlock_)) { 7060b57cec5SDimitry Andric BasicBlock* const Pred = *pred_begin(EntryBlock_); 7070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName() 7080b57cec5SDimitry Andric << "\n"); 7090b57cec5SDimitry Andric Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock); 7100b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_}, 7110b57cec5SDimitry Andric {DominatorTree::Insert, Pred, NextCmpBlock}}); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric // If the old cmp chain was the function entry, we need to update the function 7150b57cec5SDimitry Andric // entry. 716fe6060f1SDimitry Andric const bool ChainEntryIsFnEntry = EntryBlock_->isEntryBlock(); 7170b57cec5SDimitry Andric if (ChainEntryIsFnEntry && DTU.hasDomTree()) { 7180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Changing function entry from " 7190b57cec5SDimitry Andric << EntryBlock_->getName() << " to " 7200b57cec5SDimitry Andric << NextCmpBlock->getName() << "\n"); 7210b57cec5SDimitry Andric DTU.getDomTree().setNewRoot(NextCmpBlock); 7220b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}}); 7230b57cec5SDimitry Andric } 7240b57cec5SDimitry Andric EntryBlock_ = nullptr; 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric // Delete merged blocks. This also removes incoming values in phi. 7270b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> DeadBlocks; 728349cc55cSDimitry Andric for (const auto &Blocks : MergedBlocks_) { 729349cc55cSDimitry Andric for (const BCECmpBlock &Block : Blocks) { 730349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "Deleting merged block " << Block.BB->getName() 731349cc55cSDimitry Andric << "\n"); 732349cc55cSDimitry Andric DeadBlocks.push_back(Block.BB); 733349cc55cSDimitry Andric } 7340b57cec5SDimitry Andric } 7350b57cec5SDimitry Andric DeleteDeadBlocks(DeadBlocks, &DTU); 7360b57cec5SDimitry Andric 737349cc55cSDimitry Andric MergedBlocks_.clear(); 7380b57cec5SDimitry Andric return true; 7390b57cec5SDimitry Andric } 7400b57cec5SDimitry Andric 7410b57cec5SDimitry Andric std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi, 7420b57cec5SDimitry Andric BasicBlock *const LastBlock, 7430b57cec5SDimitry Andric int NumBlocks) { 7440b57cec5SDimitry Andric // Walk up from the last block to find other blocks. 7450b57cec5SDimitry Andric std::vector<BasicBlock *> Blocks(NumBlocks); 7460b57cec5SDimitry Andric assert(LastBlock && "invalid last block"); 7470b57cec5SDimitry Andric BasicBlock *CurBlock = LastBlock; 7480b57cec5SDimitry Andric for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) { 7490b57cec5SDimitry Andric if (CurBlock->hasAddressTaken()) { 7500b57cec5SDimitry Andric // Somebody is jumping to the block through an address, all bets are 7510b57cec5SDimitry Andric // off. 7520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex 7530b57cec5SDimitry Andric << " has its address taken\n"); 7540b57cec5SDimitry Andric return {}; 7550b57cec5SDimitry Andric } 7560b57cec5SDimitry Andric Blocks[BlockIndex] = CurBlock; 7570b57cec5SDimitry Andric auto *SinglePredecessor = CurBlock->getSinglePredecessor(); 7580b57cec5SDimitry Andric if (!SinglePredecessor) { 7590b57cec5SDimitry Andric // The block has two or more predecessors. 7600b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex 7610b57cec5SDimitry Andric << " has two or more predecessors\n"); 7620b57cec5SDimitry Andric return {}; 7630b57cec5SDimitry Andric } 7640b57cec5SDimitry Andric if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) { 7650b57cec5SDimitry Andric // The block does not link back to the phi. 7660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex 7670b57cec5SDimitry Andric << " does not link back to the phi\n"); 7680b57cec5SDimitry Andric return {}; 7690b57cec5SDimitry Andric } 7700b57cec5SDimitry Andric CurBlock = SinglePredecessor; 7710b57cec5SDimitry Andric } 7720b57cec5SDimitry Andric Blocks[0] = CurBlock; 7730b57cec5SDimitry Andric return Blocks; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric 7760b57cec5SDimitry Andric bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA, 7770b57cec5SDimitry Andric DomTreeUpdater &DTU) { 7780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "processPhi()\n"); 7790b57cec5SDimitry Andric if (Phi.getNumIncomingValues() <= 1) { 7800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n"); 7810b57cec5SDimitry Andric return false; 7820b57cec5SDimitry Andric } 7830b57cec5SDimitry Andric // We are looking for something that has the following structure: 7840b57cec5SDimitry Andric // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+ 7850b57cec5SDimitry Andric // \ \ \ \ 7860b57cec5SDimitry Andric // ne ne ne \ 7870b57cec5SDimitry Andric // \ \ \ v 7880b57cec5SDimitry Andric // +------------+-----------+----------> bb_phi 7890b57cec5SDimitry Andric // 7900b57cec5SDimitry Andric // - The last basic block (bb4 here) must branch unconditionally to bb_phi. 7910b57cec5SDimitry Andric // It's the only block that contributes a non-constant value to the Phi. 7920b57cec5SDimitry Andric // - All other blocks (b1, b2, b3) must have exactly two successors, one of 7930b57cec5SDimitry Andric // them being the phi block. 7940b57cec5SDimitry Andric // - All intermediate blocks (bb2, bb3) must have only one predecessor. 7950b57cec5SDimitry Andric // - Blocks cannot do other work besides the comparison, see doesOtherWork() 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andric // The blocks are not necessarily ordered in the phi, so we start from the 7980b57cec5SDimitry Andric // last block and reconstruct the order. 7990b57cec5SDimitry Andric BasicBlock *LastBlock = nullptr; 8000b57cec5SDimitry Andric for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) { 8010b57cec5SDimitry Andric if (isa<ConstantInt>(Phi.getIncomingValue(I))) continue; 8020b57cec5SDimitry Andric if (LastBlock) { 8030b57cec5SDimitry Andric // There are several non-constant values. 8040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: several non-constant values\n"); 8050b57cec5SDimitry Andric return false; 8060b57cec5SDimitry Andric } 8070b57cec5SDimitry Andric if (!isa<ICmpInst>(Phi.getIncomingValue(I)) || 8080b57cec5SDimitry Andric cast<ICmpInst>(Phi.getIncomingValue(I))->getParent() != 8090b57cec5SDimitry Andric Phi.getIncomingBlock(I)) { 8100b57cec5SDimitry Andric // Non-constant incoming value is not from a cmp instruction or not 8110b57cec5SDimitry Andric // produced by the last block. We could end up processing the value 8120b57cec5SDimitry Andric // producing block more than once. 8130b57cec5SDimitry Andric // 8140b57cec5SDimitry Andric // This is an uncommon case, so we bail. 8150b57cec5SDimitry Andric LLVM_DEBUG( 8160b57cec5SDimitry Andric dbgs() 8170b57cec5SDimitry Andric << "skip: non-constant value not from cmp or not from last block.\n"); 8180b57cec5SDimitry Andric return false; 8190b57cec5SDimitry Andric } 8200b57cec5SDimitry Andric LastBlock = Phi.getIncomingBlock(I); 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric if (!LastBlock) { 8230b57cec5SDimitry Andric // There is no non-constant block. 8240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: no non-constant block\n"); 8250b57cec5SDimitry Andric return false; 8260b57cec5SDimitry Andric } 8270b57cec5SDimitry Andric if (LastBlock->getSingleSuccessor() != Phi.getParent()) { 8280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: last block non-phi successor\n"); 8290b57cec5SDimitry Andric return false; 8300b57cec5SDimitry Andric } 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric const auto Blocks = 8330b57cec5SDimitry Andric getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues()); 8340b57cec5SDimitry Andric if (Blocks.empty()) return false; 8350b57cec5SDimitry Andric BCECmpChain CmpChain(Blocks, Phi, AA); 8360b57cec5SDimitry Andric 837349cc55cSDimitry Andric if (!CmpChain.atLeastOneMerged()) { 838349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "skip: nothing merged\n"); 8390b57cec5SDimitry Andric return false; 8400b57cec5SDimitry Andric } 8410b57cec5SDimitry Andric 8420b57cec5SDimitry Andric return CmpChain.simplify(TLI, AA, DTU); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric static bool runImpl(Function &F, const TargetLibraryInfo &TLI, 8460b57cec5SDimitry Andric const TargetTransformInfo &TTI, AliasAnalysis &AA, 8470b57cec5SDimitry Andric DominatorTree *DT) { 8480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n"); 8490b57cec5SDimitry Andric 8500b57cec5SDimitry Andric // We only try merging comparisons if the target wants to expand memcmp later. 8510b57cec5SDimitry Andric // The rationale is to avoid turning small chains into memcmp calls. 8520b57cec5SDimitry Andric if (!TTI.enableMemCmpExpansion(F.hasOptSize(), true)) 8530b57cec5SDimitry Andric return false; 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andric // If we don't have memcmp avaiable we can't emit calls to it. 8560b57cec5SDimitry Andric if (!TLI.has(LibFunc_memcmp)) 8570b57cec5SDimitry Andric return false; 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric DomTreeUpdater DTU(DT, /*PostDominatorTree*/ nullptr, 8600b57cec5SDimitry Andric DomTreeUpdater::UpdateStrategy::Eager); 8610b57cec5SDimitry Andric 8620b57cec5SDimitry Andric bool MadeChange = false; 8630b57cec5SDimitry Andric 864349cc55cSDimitry Andric for (BasicBlock &BB : llvm::drop_begin(F)) { 8650b57cec5SDimitry Andric // A Phi operation is always first in a basic block. 866349cc55cSDimitry Andric if (auto *const Phi = dyn_cast<PHINode>(&*BB.begin())) 8670b57cec5SDimitry Andric MadeChange |= processPhi(*Phi, TLI, AA, DTU); 8680b57cec5SDimitry Andric } 8690b57cec5SDimitry Andric 8700b57cec5SDimitry Andric return MadeChange; 8710b57cec5SDimitry Andric } 8720b57cec5SDimitry Andric 8730b57cec5SDimitry Andric class MergeICmpsLegacyPass : public FunctionPass { 8740b57cec5SDimitry Andric public: 8750b57cec5SDimitry Andric static char ID; 8760b57cec5SDimitry Andric 8770b57cec5SDimitry Andric MergeICmpsLegacyPass() : FunctionPass(ID) { 8780b57cec5SDimitry Andric initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry()); 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric bool runOnFunction(Function &F) override { 8820b57cec5SDimitry Andric if (skipFunction(F)) return false; 8838bcb0991SDimitry Andric const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 8840b57cec5SDimitry Andric const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 8850b57cec5SDimitry Andric // MergeICmps does not need the DominatorTree, but we update it if it's 8860b57cec5SDimitry Andric // already available. 8870b57cec5SDimitry Andric auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 8880b57cec5SDimitry Andric auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 8890b57cec5SDimitry Andric return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr); 8900b57cec5SDimitry Andric } 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric private: 8930b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 8940b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 8950b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 8960b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 8970b57cec5SDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>(); 8980b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 8990b57cec5SDimitry Andric } 9000b57cec5SDimitry Andric }; 9010b57cec5SDimitry Andric 9020b57cec5SDimitry Andric } // namespace 9030b57cec5SDimitry Andric 9040b57cec5SDimitry Andric char MergeICmpsLegacyPass::ID = 0; 9050b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps", 9060b57cec5SDimitry Andric "Merge contiguous icmps into a memcmp", false, false) 9070b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 9080b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 9090b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 9100b57cec5SDimitry Andric INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps", 9110b57cec5SDimitry Andric "Merge contiguous icmps into a memcmp", false, false) 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); } 9140b57cec5SDimitry Andric 9150b57cec5SDimitry Andric PreservedAnalyses MergeICmpsPass::run(Function &F, 9160b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 9170b57cec5SDimitry Andric auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 9180b57cec5SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F); 9190b57cec5SDimitry Andric auto &AA = AM.getResult<AAManager>(F); 9200b57cec5SDimitry Andric auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); 9210b57cec5SDimitry Andric const bool MadeChanges = runImpl(F, TLI, TTI, AA, DT); 9220b57cec5SDimitry Andric if (!MadeChanges) 9230b57cec5SDimitry Andric return PreservedAnalyses::all(); 9240b57cec5SDimitry Andric PreservedAnalyses PA; 9250b57cec5SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 9260b57cec5SDimitry Andric return PA; 9270b57cec5SDimitry Andric } 928