Lines Matching +full:block +full:- +full:offset
1 //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
16 // - There are less jumps, and therefore less opportunities for mispredictions
17 // and I-cache misses.
18 // - Code size is smaller, both because jumps are removed and because the
19 // encoding of a 2*n byte compare is smaller than that of two n-byte
40 // Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp.
42 //===----------------------------------------------------------------------===//
72 // that is a constant offset from a base value, e.g. `a` or `o.c` in the example
76 BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
77 : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(std::move(Offset)) {}
89 Offset = std::move(that.Offset);
93 // We want to order BCEAtoms by (Base, Offset). However we cannot use
94 // the pointer values for Base because these are non-deterministic.
99 // | block 1 | | block 2 | | block 3 |
100 // b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
101 // which is before block 2.
102 // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
104 return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
110 APInt Offset;
124 return Insertion.first->second;
132 // If this value is a load from a constant offset w.r.t. a base address, and
134 // the offset.
140 if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
141 LLVM_DEBUG(dbgs() << "used outside of block\n");
144 // Do not optimize atomic loads to non-atomic memcmp
145 if (!LoadI->isSimple()) {
149 Value *Addr = LoadI->getOperand(0);
150 if (Addr->getType()->getPointerAddressSpace() != 0) {
151 LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
154 const auto &DL = LoadI->getDataLayout();
155 if (!isDereferenceablePointer(Addr, LoadI->getType(), DL)) {
162 APInt Offset = APInt(DL.getIndexTypeSizeInBits(Addr->getType()), 0);
167 if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
168 LLVM_DEBUG(dbgs() << "used outside of block\n");
171 if (!GEP->accumulateConstantOffset(DL, Offset))
173 Base = GEP->getPointerOperand();
175 return BCEAtom(GEP, LoadI, BaseId.getBaseId(Base), Offset);
196 // A basic block with a comparison between two BCE atoms.
197 // The block might do extra work besides the atom comparison, in which case
198 // doesOtherWork() returns true. Under some conditions, the block can be
212 // Returns true if the block does other works besides comparison.
215 // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
216 // instructions in the block.
219 // Return true if this all the relevant instructions in the BCE-cmp-block can
221 // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
222 // block.
225 // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
226 // instructions. Split the old block and move all non-BCE-cmp-insts into the
227 // new parent block.
230 // The basic block where this comparison happens.
234 // The block requires splitting.
236 // Original order of this block in the chain.
246 // block instructions, then bail for now.
247 if (Inst->mayWriteToMemory()) {
251 return (Inst->getParent() != LI->getParent() || !Inst->comesBefore(LI)) &&
257 // Make sure this instruction does not use any of the BCE cmp block
259 return llvm::none_of(Inst->operands(), [&](const Value *Op) {
270 assert(canSinkBCECmpInst(&Inst, AA) && "Split unsplittable block");
271 // This is a non-BCE-cmp-block instruction. And it can be separated
272 // from the BCE-cmp-block instruction.
278 Inst->moveBeforePreserving(*NewParent, NewParent->begin());
294 // effects outside of the basic block.
295 // Note: The GEPs and/or loads are not necessarily in the same block.
309 // - For intermediate blocks, as a branch condition.
310 // - For the final block, as an incoming value for the Phi.
313 if (!CmpI->hasOneUse()) {
317 if (CmpI->getPredicate() != ExpectedPredicate)
322 auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
325 auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
328 const auto &DL = CmpI->getDataLayout();
330 DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()), CmpI);
333 // Visit the given comparison block. If this is a comparison between two valid
336 BasicBlock *const Block,
339 if (Block->empty())
341 auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
347 if (BranchI->isUnconditional()) {
359 if (!Const->isZero())
362 assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
363 BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
364 Cond = BranchI->getCondition();
379 {Result->Lhs.LoadI, Result->Rhs.LoadI, Result->CmpI, BranchI});
380 if (Result->Lhs.GEP)
381 BlockInsts.insert(Result->Lhs.GEP);
382 if (Result->Rhs.GEP)
383 BlockInsts.insert(Result->Rhs.GEP);
384 return BCECmpBlock(std::move(*Result), Block, BlockInsts);
389 LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
392 << Comparison.Lhs().Offset << " and "
394 << Comparison.Rhs().Offset << "\n");
420 // The original entry block (before sorting);
427 First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
428 First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
433 for (const BCECmpBlock &Block : Blocks)
434 MinOrigOrder = std::min(MinOrigOrder, Block.OrigOrder);
452 for (BCECmpBlock &Block : Blocks) {
453 if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) {
457 LLVM_DEBUG(dbgs() << "Merging block " << Block.BB->getName() << " into "
458 << LastMergedBlock->back().BB->getName() << "\n");
460 LastMergedBlock->push_back(std::move(Block));
476 assert(!Blocks.empty() && "a chain should have at least one block");
480 for (BasicBlock *const Block : Blocks) {
481 assert(Block && "invalid block");
483 Phi.getIncomingValueForBlock(Block), Block, Phi.getParent(), BaseId);
488 if (Comparison->doesOtherWork()) {
489 LLVM_DEBUG(dbgs() << "block '" << Comparison->BB->getName()
492 // This is the initial block in the chain, in case this block does other
493 // work, we can try to split the block and move the irrelevant
496 // If this is not the initial block in the chain, splitting it wont
505 if (Comparison->canSplit(AA)) {
507 << "Split initial block '" << Comparison->BB->getName()
509 Comparison->RequireSplit = true;
513 << "ignoring initial block '" << Comparison->BB->getName()
532 // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+
536 // +------------+-----------+----------> bb_phi
558 // This is optimized for the common case of no block names.
570 assert(!Comparisons.empty() && "no basic block");
571 // Fast path: only one block, or no names at all.
573 return Comparisons[0].BB->getName();
576 return i + Cmp.BB->getName().size();
581 // Slow path: at least two blocks, at least one block with a name.
583 // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
585 Scratch.reserve(size + Comparisons.size() - 1);
589 append(Comparisons[0].BB->getName());
592 if (!BB->getName().empty()) {
594 append(BB->getName());
602 // Merges the given contiguous comparison blocks into one memcmp block.
609 LLVMContext &Context = NextCmpBlock->getContext();
612 // Create a new cmp block before next cmp block.
615 NextCmpBlock->getParent(), InsertBefore);
620 Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
622 Lhs = FirstCmp.Lhs().LoadI->getPointerOperand();
624 Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
626 Rhs = FirstCmp.Rhs().LoadI->getPointerOperand();
629 LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
630 << BB->getName() << "\n");
632 // If there is one block that requires splitting, we do it now, i.e.
639 ToSplit->split(BB, AA);
645 Instruction *const LhsLoad = Builder.Insert(FirstCmp.Lhs().LoadI->clone());
646 Instruction *const RhsLoad = Builder.Insert(FirstCmp.Rhs().LoadI->clone());
647 LhsLoad->replaceUsesOfWith(LhsLoad->getOperand(0), Lhs);
648 RhsLoad->replaceUsesOfWith(RhsLoad->getOperand(0), Rhs);
671 // Add a branch to the next basic block in the chain.
678 // Continue to next block if equal, exit to phi else.
690 LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
691 << EntryBlock_->getName() << "\n");
693 // Effectively merge blocks. We go in the reverse direction from the phi block
694 // so that the next block is always available to branch to.
707 LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName()
709 Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
716 const bool ChainEntryIsFnEntry = EntryBlock_->isEntryBlock();
719 << EntryBlock_->getName() << " to "
720 << NextCmpBlock->getName() << "\n");
729 for (const BCECmpBlock &Block : Blocks) {
730 LLVM_DEBUG(dbgs() << "Deleting merged block " << Block.BB->getName()
732 DeadBlocks.push_back(Block.BB);
744 // Walk up from the last block to find other blocks.
746 assert(LastBlock && "invalid last block");
748 for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
749 if (CurBlock->hasAddressTaken()) {
750 // Somebody is jumping to the block through an address, all bets are
752 LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
757 auto *SinglePredecessor = CurBlock->getSinglePredecessor();
759 // The block has two or more predecessors.
760 LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
765 // The block does not link back to the phi.
766 LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
784 // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+
788 // +------------+-----------+----------> bb_phi
790 // - The last basic block (bb4 here) must branch unconditionally to bb_phi.
791 // It's the only block that contributes a non-constant value to the Phi.
792 // - All other blocks (b1, b2, b3) must have exactly two successors, one of
793 // them being the phi block.
794 // - All intermediate blocks (bb2, bb3) must have only one predecessor.
795 // - Blocks cannot do other work besides the comparison, see doesOtherWork()
798 // last block and reconstruct the order.
803 // There are several non-constant values.
804 LLVM_DEBUG(dbgs() << "skip: several non-constant values\n");
808 cast<ICmpInst>(Phi.getIncomingValue(I))->getParent() !=
810 // Non-constant incoming value is not from a cmp instruction or not
811 // produced by the last block. We could end up processing the value
812 // producing block more than once.
817 << "skip: non-constant value not from cmp or not from last block.\n");
823 // There is no non-constant block.
824 LLVM_DEBUG(dbgs() << "skip: no non-constant block\n");
827 if (LastBlock->getSingleSuccessor() != Phi.getParent()) {
828 LLVM_DEBUG(dbgs() << "skip: last block non-phi successor\n");
865 // A Phi operation is always first in a basic block.
889 return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr);