10b57cec5SDimitry Andric //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass performs various transformations related to eliminating memcpy 100b57cec5SDimitry Andric // calls, or transforming sets of stores into memset's. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" 150b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 17*0fca6ea1SDimitry Andric #include "llvm/ADT/ScopeExit.h" 180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 190b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 200b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 235f757f3fSDimitry Andric #include "llvm/Analysis/CFG.h" 2404eeddc0SDimitry Andric #include "llvm/Analysis/CaptureTracking.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h" 265f757f3fSDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 27e8d8bef9SDimitry Andric #include "llvm/Analysis/Loads.h" 280b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h" 29e8d8bef9SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 30e8d8bef9SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 315f757f3fSDimitry Andric #include "llvm/Analysis/PostDominators.h" 320b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 330b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 340b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 350b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 360b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 370b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 380b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 390b57cec5SDimitry Andric #include "llvm/IR/Function.h" 400b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 410b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 420b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 430b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 440b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 450b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 460b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 470b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 480b57cec5SDimitry Andric #include "llvm/IR/Module.h" 490b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 500b57cec5SDimitry Andric #include "llvm/IR/Type.h" 510b57cec5SDimitry Andric #include "llvm/IR/User.h" 520b57cec5SDimitry Andric #include "llvm/IR/Value.h" 530b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 540b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 550b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 560b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 57480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 580b57cec5SDimitry Andric #include <algorithm> 590b57cec5SDimitry Andric #include <cassert> 600b57cec5SDimitry Andric #include <cstdint> 61bdd1243dSDimitry Andric #include <optional> 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric using namespace llvm; 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric #define DEBUG_TYPE "memcpyopt" 660b57cec5SDimitry Andric 67349cc55cSDimitry Andric static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( 6881ad6265SDimitry Andric "enable-memcpyopt-without-libcalls", cl::Hidden, 69349cc55cSDimitry Andric cl::desc("Enable memcpyopt even when libcalls are disabled")); 70e8d8bef9SDimitry Andric 710b57cec5SDimitry Andric STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); 720b57cec5SDimitry Andric STATISTIC(NumMemSetInfer, "Number of memsets inferred"); 730b57cec5SDimitry Andric STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); 740b57cec5SDimitry Andric STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); 75e8d8bef9SDimitry Andric STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); 765f757f3fSDimitry Andric STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric namespace { 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /// Represents a range of memset'd bytes with the ByteVal value. 810b57cec5SDimitry Andric /// This allows us to analyze stores like: 820b57cec5SDimitry Andric /// store 0 -> P+1 830b57cec5SDimitry Andric /// store 0 -> P+0 840b57cec5SDimitry Andric /// store 0 -> P+3 850b57cec5SDimitry Andric /// store 0 -> P+2 860b57cec5SDimitry Andric /// which sometimes happens with stores to arrays of structs etc. When we see 870b57cec5SDimitry Andric /// the first store, we make a range [1, 2). The second store extends the range 880b57cec5SDimitry Andric /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the 890b57cec5SDimitry Andric /// two ranges into [0, 3) which is memset'able. 900b57cec5SDimitry Andric struct MemsetRange { 910b57cec5SDimitry Andric // Start/End - A semi range that describes the span that this range covers. 920b57cec5SDimitry Andric // The range is closed at the start and open at the end: [Start, End). 930b57cec5SDimitry Andric int64_t Start, End; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric /// StartPtr - The getelementptr instruction that points to the start of the 960b57cec5SDimitry Andric /// range. 970b57cec5SDimitry Andric Value *StartPtr; 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric /// Alignment - The known alignment of the first store. 10081ad6265SDimitry Andric MaybeAlign Alignment; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric /// TheStores - The actual stores that make up this range. 1030b57cec5SDimitry Andric SmallVector<Instruction *, 16> TheStores; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric bool isProfitableToUseMemset(const DataLayout &DL) const; 1060b57cec5SDimitry Andric }; 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric } // end anonymous namespace 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { 1110b57cec5SDimitry Andric // If we found more than 4 stores to merge or 16 bytes, use memset. 112*0fca6ea1SDimitry Andric if (TheStores.size() >= 4 || End - Start >= 16) 113*0fca6ea1SDimitry Andric return true; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric // If there is nothing to merge, don't do anything. 116*0fca6ea1SDimitry Andric if (TheStores.size() < 2) 117*0fca6ea1SDimitry Andric return false; 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric // If any of the stores are a memset, then it is always good to extend the 1200b57cec5SDimitry Andric // memset. 1210b57cec5SDimitry Andric for (Instruction *SI : TheStores) 1220b57cec5SDimitry Andric if (!isa<StoreInst>(SI)) 1230b57cec5SDimitry Andric return true; 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric // Assume that the code generator is capable of merging pairs of stores 1260b57cec5SDimitry Andric // together if it wants to. 127*0fca6ea1SDimitry Andric if (TheStores.size() == 2) 128*0fca6ea1SDimitry Andric return false; 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric // If we have fewer than 8 stores, it can still be worthwhile to do this. 1310b57cec5SDimitry Andric // For example, merging 4 i8 stores into an i32 store is useful almost always. 1320b57cec5SDimitry Andric // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the 1330b57cec5SDimitry Andric // memset will be split into 2 32-bit stores anyway) and doing so can 1340b57cec5SDimitry Andric // pessimize the llvm optimizer. 1350b57cec5SDimitry Andric // 1360b57cec5SDimitry Andric // Since we don't have perfect knowledge here, make some assumptions: assume 1370b57cec5SDimitry Andric // the maximum GPR width is the same size as the largest legal integer 1380b57cec5SDimitry Andric // size. If so, check to see whether we will end up actually reducing the 1390b57cec5SDimitry Andric // number of stores used. 1400b57cec5SDimitry Andric unsigned Bytes = unsigned(End - Start); 1410b57cec5SDimitry Andric unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8; 1420b57cec5SDimitry Andric if (MaxIntSize == 0) 1430b57cec5SDimitry Andric MaxIntSize = 1; 1440b57cec5SDimitry Andric unsigned NumPointerStores = Bytes / MaxIntSize; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric // Assume the remaining bytes if any are done a byte at a time. 1470b57cec5SDimitry Andric unsigned NumByteStores = Bytes % MaxIntSize; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // If we will reduce the # stores (according to this heuristic), do the 1500b57cec5SDimitry Andric // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 1510b57cec5SDimitry Andric // etc. 1520b57cec5SDimitry Andric return TheStores.size() > NumPointerStores + NumByteStores; 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric namespace { 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric class MemsetRanges { 1580b57cec5SDimitry Andric using range_iterator = SmallVectorImpl<MemsetRange>::iterator; 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric /// A sorted list of the memset ranges. 1610b57cec5SDimitry Andric SmallVector<MemsetRange, 8> Ranges; 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric const DataLayout &DL; 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric public: 1660b57cec5SDimitry Andric MemsetRanges(const DataLayout &DL) : DL(DL) {} 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator; 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric const_iterator begin() const { return Ranges.begin(); } 1710b57cec5SDimitry Andric const_iterator end() const { return Ranges.end(); } 1720b57cec5SDimitry Andric bool empty() const { return Ranges.empty(); } 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric void addInst(int64_t OffsetFromFirst, Instruction *Inst) { 17504eeddc0SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(Inst)) 1760b57cec5SDimitry Andric addStore(OffsetFromFirst, SI); 1770b57cec5SDimitry Andric else 1780b57cec5SDimitry Andric addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric void addStore(int64_t OffsetFromFirst, StoreInst *SI) { 1828c6f6c0cSDimitry Andric TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); 1838c6f6c0cSDimitry Andric assert(!StoreSize.isScalable() && "Can't track scalable-typed stores"); 184bdd1243dSDimitry Andric addRange(OffsetFromFirst, StoreSize.getFixedValue(), 185bdd1243dSDimitry Andric SI->getPointerOperand(), SI->getAlign(), SI); 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { 1890b57cec5SDimitry Andric int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 19081ad6265SDimitry Andric addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI); 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric 19381ad6265SDimitry Andric void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment, 19481ad6265SDimitry Andric Instruction *Inst); 1950b57cec5SDimitry Andric }; 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric } // end anonymous namespace 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /// Add a new store to the MemsetRanges data structure. This adds a 2000b57cec5SDimitry Andric /// new range for the specified store at the specified offset, merging into 2010b57cec5SDimitry Andric /// existing ranges as appropriate. 2020b57cec5SDimitry Andric void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, 20381ad6265SDimitry Andric MaybeAlign Alignment, Instruction *Inst) { 2040b57cec5SDimitry Andric int64_t End = Start + Size; 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric range_iterator I = partition_point( 2070b57cec5SDimitry Andric Ranges, [=](const MemsetRange &O) { return O.End < Start; }); 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric // We now know that I == E, in which case we didn't find anything to merge 2100b57cec5SDimitry Andric // with, or that Start <= I->End. If End < I->Start or I == E, then we need 2110b57cec5SDimitry Andric // to insert a new range. Handle this now. 2120b57cec5SDimitry Andric if (I == Ranges.end() || End < I->Start) { 2130b57cec5SDimitry Andric MemsetRange &R = *Ranges.insert(I, MemsetRange()); 2140b57cec5SDimitry Andric R.Start = Start; 2150b57cec5SDimitry Andric R.End = End; 2160b57cec5SDimitry Andric R.StartPtr = Ptr; 2170b57cec5SDimitry Andric R.Alignment = Alignment; 2180b57cec5SDimitry Andric R.TheStores.push_back(Inst); 2190b57cec5SDimitry Andric return; 2200b57cec5SDimitry Andric } 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric // This store overlaps with I, add it. 2230b57cec5SDimitry Andric I->TheStores.push_back(Inst); 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric // At this point, we may have an interval that completely contains our store. 2260b57cec5SDimitry Andric // If so, just add it to the interval and return. 2270b57cec5SDimitry Andric if (I->Start <= Start && I->End >= End) 2280b57cec5SDimitry Andric return; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric // Now we know that Start <= I->End and End >= I->Start so the range overlaps 2310b57cec5SDimitry Andric // but is not entirely contained within the range. 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric // See if the range extends the start of the range. In this case, it couldn't 2340b57cec5SDimitry Andric // possibly cause it to join the prior range, because otherwise we would have 2350b57cec5SDimitry Andric // stopped on *it*. 2360b57cec5SDimitry Andric if (Start < I->Start) { 2370b57cec5SDimitry Andric I->Start = Start; 2380b57cec5SDimitry Andric I->StartPtr = Ptr; 2390b57cec5SDimitry Andric I->Alignment = Alignment; 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric // Now we know that Start <= I->End and Start >= I->Start (so the startpoint 2430b57cec5SDimitry Andric // is in or right at the end of I), and that End >= I->Start. Extend I out to 2440b57cec5SDimitry Andric // End. 2450b57cec5SDimitry Andric if (End > I->End) { 2460b57cec5SDimitry Andric I->End = End; 2470b57cec5SDimitry Andric range_iterator NextI = I; 2480b57cec5SDimitry Andric while (++NextI != Ranges.end() && End >= NextI->Start) { 2490b57cec5SDimitry Andric // Merge the range in. 2500b57cec5SDimitry Andric I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); 2510b57cec5SDimitry Andric if (NextI->End > I->End) 2520b57cec5SDimitry Andric I->End = NextI->End; 2530b57cec5SDimitry Andric Ranges.erase(NextI); 2540b57cec5SDimitry Andric NextI = I; 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 2600b57cec5SDimitry Andric // MemCpyOptLegacyPass Pass 2610b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 2620b57cec5SDimitry Andric 263e8d8bef9SDimitry Andric // Check that V is either not accessible by the caller, or unwinding cannot 264e8d8bef9SDimitry Andric // occur between Start and End. 265e8d8bef9SDimitry Andric static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, 266e8d8bef9SDimitry Andric Instruction *End) { 267e8d8bef9SDimitry Andric assert(Start->getParent() == End->getParent() && "Must be in same block"); 26804eeddc0SDimitry Andric // Function can't unwind, so it also can't be visible through unwinding. 26904eeddc0SDimitry Andric if (Start->getFunction()->doesNotThrow()) 270e8d8bef9SDimitry Andric return false; 27104eeddc0SDimitry Andric 27204eeddc0SDimitry Andric // Object is not visible on unwind. 27304eeddc0SDimitry Andric // TODO: Support RequiresNoCaptureBeforeUnwind case. 27404eeddc0SDimitry Andric bool RequiresNoCaptureBeforeUnwind; 27504eeddc0SDimitry Andric if (isNotVisibleOnUnwind(getUnderlyingObject(V), 27604eeddc0SDimitry Andric RequiresNoCaptureBeforeUnwind) && 27704eeddc0SDimitry Andric !RequiresNoCaptureBeforeUnwind) 27804eeddc0SDimitry Andric return false; 27904eeddc0SDimitry Andric 28004eeddc0SDimitry Andric // Check whether there are any unwinding instructions in the range. 28104eeddc0SDimitry Andric return any_of(make_range(Start->getIterator(), End->getIterator()), 28204eeddc0SDimitry Andric [](const Instruction &I) { return I.mayThrow(); }); 283e8d8bef9SDimitry Andric } 284e8d8bef9SDimitry Andric 285e8d8bef9SDimitry Andric void MemCpyOptPass::eraseInstruction(Instruction *I) { 286e8d8bef9SDimitry Andric MSSAU->removeMemoryAccess(I); 287e8d8bef9SDimitry Andric I->eraseFromParent(); 288e8d8bef9SDimitry Andric } 289e8d8bef9SDimitry Andric 290e8d8bef9SDimitry Andric // Check for mod or ref of Loc between Start and End, excluding both boundaries. 291bdd1243dSDimitry Andric // Start and End must be in the same block. 292bdd1243dSDimitry Andric // If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start 293bdd1243dSDimitry Andric // intrinsic and store it inside SkippedLifetimeStart. 294bdd1243dSDimitry Andric static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, 295e8d8bef9SDimitry Andric const MemoryUseOrDef *Start, 296bdd1243dSDimitry Andric const MemoryUseOrDef *End, 297bdd1243dSDimitry Andric Instruction **SkippedLifetimeStart = nullptr) { 298e8d8bef9SDimitry Andric assert(Start->getBlock() == End->getBlock() && "Only local supported"); 299e8d8bef9SDimitry Andric for (const MemoryAccess &MA : 300e8d8bef9SDimitry Andric make_range(++Start->getIterator(), End->getIterator())) { 301bdd1243dSDimitry Andric Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst(); 302bdd1243dSDimitry Andric if (isModOrRefSet(AA.getModRefInfo(I, Loc))) { 303bdd1243dSDimitry Andric auto *II = dyn_cast<IntrinsicInst>(I); 304bdd1243dSDimitry Andric if (II && II->getIntrinsicID() == Intrinsic::lifetime_start && 305bdd1243dSDimitry Andric SkippedLifetimeStart && !*SkippedLifetimeStart) { 306bdd1243dSDimitry Andric *SkippedLifetimeStart = I; 307bdd1243dSDimitry Andric continue; 308bdd1243dSDimitry Andric } 309bdd1243dSDimitry Andric 310e8d8bef9SDimitry Andric return true; 311e8d8bef9SDimitry Andric } 312bdd1243dSDimitry Andric } 313e8d8bef9SDimitry Andric return false; 314e8d8bef9SDimitry Andric } 315e8d8bef9SDimitry Andric 316e8d8bef9SDimitry Andric // Check for mod of Loc between Start and End, excluding both boundaries. 317e8d8bef9SDimitry Andric // Start and End can be in different blocks. 318bdd1243dSDimitry Andric static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, 31981ad6265SDimitry Andric MemoryLocation Loc, const MemoryUseOrDef *Start, 320e8d8bef9SDimitry Andric const MemoryUseOrDef *End) { 32181ad6265SDimitry Andric if (isa<MemoryUse>(End)) { 32281ad6265SDimitry Andric // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes. 32381ad6265SDimitry Andric // Manually check read accesses between Start and End, if they are in the 32481ad6265SDimitry Andric // same block, for clobbers. Otherwise assume Loc is clobbered. 32581ad6265SDimitry Andric return Start->getBlock() != End->getBlock() || 32681ad6265SDimitry Andric any_of( 32781ad6265SDimitry Andric make_range(std::next(Start->getIterator()), End->getIterator()), 32881ad6265SDimitry Andric [&AA, Loc](const MemoryAccess &Acc) { 32981ad6265SDimitry Andric if (isa<MemoryUse>(&Acc)) 33081ad6265SDimitry Andric return false; 33181ad6265SDimitry Andric Instruction *AccInst = 33281ad6265SDimitry Andric cast<MemoryUseOrDef>(&Acc)->getMemoryInst(); 33381ad6265SDimitry Andric return isModSet(AA.getModRefInfo(AccInst, Loc)); 33481ad6265SDimitry Andric }); 33581ad6265SDimitry Andric } 33681ad6265SDimitry Andric 337e8d8bef9SDimitry Andric // TODO: Only walk until we hit Start. 338e8d8bef9SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 339bdd1243dSDimitry Andric End->getDefiningAccess(), Loc, AA); 340e8d8bef9SDimitry Andric return !MSSA->dominates(Clobber, Start); 341e8d8bef9SDimitry Andric } 342e8d8bef9SDimitry Andric 3434542f901SDimitry Andric // Update AA metadata 3444542f901SDimitry Andric static void combineAAMetadata(Instruction *ReplInst, Instruction *I) { 3454542f901SDimitry Andric // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be 3464542f901SDimitry Andric // handled here, but combineMetadata doesn't support them yet 3474542f901SDimitry Andric unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, 3484542f901SDimitry Andric LLVMContext::MD_noalias, 3494542f901SDimitry Andric LLVMContext::MD_invariant_group, 3504542f901SDimitry Andric LLVMContext::MD_access_group}; 3514542f901SDimitry Andric combineMetadata(ReplInst, I, KnownIDs, true); 3524542f901SDimitry Andric } 3534542f901SDimitry Andric 3540b57cec5SDimitry Andric /// When scanning forward over instructions, we look for some other patterns to 3550b57cec5SDimitry Andric /// fold away. In particular, this looks for stores to neighboring locations of 3560b57cec5SDimitry Andric /// memory. If it sees enough consecutive ones, it attempts to merge them 3570b57cec5SDimitry Andric /// together into a memcpy/memset. 3580b57cec5SDimitry Andric Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, 3590b57cec5SDimitry Andric Value *StartPtr, 3600b57cec5SDimitry Andric Value *ByteVal) { 361*0fca6ea1SDimitry Andric const DataLayout &DL = StartInst->getDataLayout(); 3620b57cec5SDimitry Andric 3638c6f6c0cSDimitry Andric // We can't track scalable types 36404eeddc0SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(StartInst)) 3658c6f6c0cSDimitry Andric if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable()) 3668c6f6c0cSDimitry Andric return nullptr; 3678c6f6c0cSDimitry Andric 3680b57cec5SDimitry Andric // Okay, so we now have a single store that can be splatable. Scan to find 3690b57cec5SDimitry Andric // all subsequent stores of the same value to offset from the same pointer. 3700b57cec5SDimitry Andric // Join these together into ranges, so we can decide whether contiguous blocks 3710b57cec5SDimitry Andric // are stored. 3720b57cec5SDimitry Andric MemsetRanges Ranges(DL); 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric BasicBlock::iterator BI(StartInst); 375e8d8bef9SDimitry Andric 376e8d8bef9SDimitry Andric // Keeps track of the last memory use or def before the insertion point for 377e8d8bef9SDimitry Andric // the new memset. The new MemoryDef for the inserted memsets will be inserted 3785f757f3fSDimitry Andric // after MemInsertPoint. 379e8d8bef9SDimitry Andric MemoryUseOrDef *MemInsertPoint = nullptr; 3800b57cec5SDimitry Andric for (++BI; !BI->isTerminator(); ++BI) { 381e8d8bef9SDimitry Andric auto *CurrentAcc = cast_or_null<MemoryUseOrDef>( 382e8d8bef9SDimitry Andric MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); 3835f757f3fSDimitry Andric if (CurrentAcc) 384e8d8bef9SDimitry Andric MemInsertPoint = CurrentAcc; 385e8d8bef9SDimitry Andric 386fe6060f1SDimitry Andric // Calls that only access inaccessible memory do not block merging 387fe6060f1SDimitry Andric // accessible stores. 388fe6060f1SDimitry Andric if (auto *CB = dyn_cast<CallBase>(BI)) { 389fe6060f1SDimitry Andric if (CB->onlyAccessesInaccessibleMemory()) 390fe6060f1SDimitry Andric continue; 391fe6060f1SDimitry Andric } 392fe6060f1SDimitry Andric 3930b57cec5SDimitry Andric if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { 3940b57cec5SDimitry Andric // If the instruction is readnone, ignore it, otherwise bail out. We 3950b57cec5SDimitry Andric // don't even allow readonly here because we don't want something like: 3960b57cec5SDimitry Andric // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 3970b57cec5SDimitry Andric if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) 3980b57cec5SDimitry Andric break; 3990b57cec5SDimitry Andric continue; 4000b57cec5SDimitry Andric } 4010b57cec5SDimitry Andric 40204eeddc0SDimitry Andric if (auto *NextStore = dyn_cast<StoreInst>(BI)) { 4030b57cec5SDimitry Andric // If this is a store, see if we can merge it in. 404*0fca6ea1SDimitry Andric if (!NextStore->isSimple()) 405*0fca6ea1SDimitry Andric break; 4060b57cec5SDimitry Andric 407e8d8bef9SDimitry Andric Value *StoredVal = NextStore->getValueOperand(); 408e8d8bef9SDimitry Andric 409e8d8bef9SDimitry Andric // Don't convert stores of non-integral pointer types to memsets (which 410e8d8bef9SDimitry Andric // stores integers). 411e8d8bef9SDimitry Andric if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) 412e8d8bef9SDimitry Andric break; 413e8d8bef9SDimitry Andric 4148c6f6c0cSDimitry Andric // We can't track ranges involving scalable types. 4158c6f6c0cSDimitry Andric if (DL.getTypeStoreSize(StoredVal->getType()).isScalable()) 4168c6f6c0cSDimitry Andric break; 4178c6f6c0cSDimitry Andric 4180b57cec5SDimitry Andric // Check to see if this stored value is of the same byte-splattable value. 419e8d8bef9SDimitry Andric Value *StoredByte = isBytewiseValue(StoredVal, DL); 4200b57cec5SDimitry Andric if (isa<UndefValue>(ByteVal) && StoredByte) 4210b57cec5SDimitry Andric ByteVal = StoredByte; 4220b57cec5SDimitry Andric if (ByteVal != StoredByte) 4230b57cec5SDimitry Andric break; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric // Check to see if this store is to a constant offset from the start ptr. 426bdd1243dSDimitry Andric std::optional<int64_t> Offset = 42706c3fb27SDimitry Andric NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL); 4288bcb0991SDimitry Andric if (!Offset) 4290b57cec5SDimitry Andric break; 4300b57cec5SDimitry Andric 4318bcb0991SDimitry Andric Ranges.addStore(*Offset, NextStore); 4320b57cec5SDimitry Andric } else { 43304eeddc0SDimitry Andric auto *MSI = cast<MemSetInst>(BI); 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric if (MSI->isVolatile() || ByteVal != MSI->getValue() || 4360b57cec5SDimitry Andric !isa<ConstantInt>(MSI->getLength())) 4370b57cec5SDimitry Andric break; 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric // Check to see if this store is to a constant offset from the start ptr. 440bdd1243dSDimitry Andric std::optional<int64_t> Offset = 44106c3fb27SDimitry Andric MSI->getDest()->getPointerOffsetFrom(StartPtr, DL); 4428bcb0991SDimitry Andric if (!Offset) 4430b57cec5SDimitry Andric break; 4440b57cec5SDimitry Andric 4458bcb0991SDimitry Andric Ranges.addMemSet(*Offset, MSI); 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric // If we have no ranges, then we just had a single store with nothing that 4500b57cec5SDimitry Andric // could be merged in. This is a very common case of course. 4510b57cec5SDimitry Andric if (Ranges.empty()) 4520b57cec5SDimitry Andric return nullptr; 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric // If we had at least one store that could be merged in, add the starting 4550b57cec5SDimitry Andric // store as well. We try to avoid this unless there is at least something 4560b57cec5SDimitry Andric // interesting as a small compile-time optimization. 4570b57cec5SDimitry Andric Ranges.addInst(0, StartInst); 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric // If we create any memsets, we put it right before the first instruction that 4600b57cec5SDimitry Andric // isn't part of the memset block. This ensure that the memset is dominated 4610b57cec5SDimitry Andric // by any addressing instruction needed by the start of the block. 4620b57cec5SDimitry Andric IRBuilder<> Builder(&*BI); 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric // Now that we have full information about ranges, loop over the ranges and 4650b57cec5SDimitry Andric // emit memset's for anything big enough to be worthwhile. 4660b57cec5SDimitry Andric Instruction *AMemSet = nullptr; 4670b57cec5SDimitry Andric for (const MemsetRange &Range : Ranges) { 468*0fca6ea1SDimitry Andric if (Range.TheStores.size() == 1) 469*0fca6ea1SDimitry Andric continue; 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric // If it is profitable to lower this range to memset, do so now. 4720b57cec5SDimitry Andric if (!Range.isProfitableToUseMemset(DL)) 4730b57cec5SDimitry Andric continue; 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric // Otherwise, we do want to transform this! Create a new memset. 4760b57cec5SDimitry Andric // Get the starting pointer of the block. 4770b57cec5SDimitry Andric StartPtr = Range.StartPtr; 4780b57cec5SDimitry Andric 479480093f4SDimitry Andric AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start, 48081ad6265SDimitry Andric Range.Alignment); 481bdd1243dSDimitry Andric AMemSet->mergeDIAssignID(Range.TheStores); 482bdd1243dSDimitry Andric 4830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI 4840b57cec5SDimitry Andric : Range.TheStores) dbgs() 4850b57cec5SDimitry Andric << *SI << '\n'; 4860b57cec5SDimitry Andric dbgs() << "With: " << *AMemSet << '\n'); 4870b57cec5SDimitry Andric if (!Range.TheStores.empty()) 4880b57cec5SDimitry Andric AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); 4890b57cec5SDimitry Andric 490*0fca6ea1SDimitry Andric auto *NewDef = cast<MemoryDef>( 491*0fca6ea1SDimitry Andric MemInsertPoint->getMemoryInst() == &*BI 492*0fca6ea1SDimitry Andric ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint) 493*0fca6ea1SDimitry Andric : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint)); 494e8d8bef9SDimitry Andric MSSAU->insertDef(NewDef, /*RenameUses=*/true); 495e8d8bef9SDimitry Andric MemInsertPoint = NewDef; 496e8d8bef9SDimitry Andric 497e8d8bef9SDimitry Andric // Zap all the stores. 498e8d8bef9SDimitry Andric for (Instruction *SI : Range.TheStores) 499e8d8bef9SDimitry Andric eraseInstruction(SI); 500e8d8bef9SDimitry Andric 5010b57cec5SDimitry Andric ++NumMemSetInfer; 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric return AMemSet; 5050b57cec5SDimitry Andric } 5060b57cec5SDimitry Andric 5070b57cec5SDimitry Andric // This method try to lift a store instruction before position P. 5080b57cec5SDimitry Andric // It will lift the store and its argument + that anything that 5090b57cec5SDimitry Andric // may alias with these. 5100b57cec5SDimitry Andric // The method returns true if it was successful. 511e8d8bef9SDimitry Andric bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { 5120b57cec5SDimitry Andric // If the store alias this position, early bail out. 5130b57cec5SDimitry Andric MemoryLocation StoreLoc = MemoryLocation::get(SI); 514e8d8bef9SDimitry Andric if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) 5150b57cec5SDimitry Andric return false; 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric // Keep track of the arguments of all instruction we plan to lift 5180b57cec5SDimitry Andric // so we can make sure to lift them as well if appropriate. 5190b57cec5SDimitry Andric DenseSet<Instruction *> Args; 520bdd1243dSDimitry Andric auto AddArg = [&](Value *Arg) { 521bdd1243dSDimitry Andric auto *I = dyn_cast<Instruction>(Arg); 522bdd1243dSDimitry Andric if (I && I->getParent() == SI->getParent()) { 523bdd1243dSDimitry Andric // Cannot hoist user of P above P 524*0fca6ea1SDimitry Andric if (I == P) 525*0fca6ea1SDimitry Andric return false; 526bdd1243dSDimitry Andric Args.insert(I); 527bdd1243dSDimitry Andric } 528bdd1243dSDimitry Andric return true; 529bdd1243dSDimitry Andric }; 530bdd1243dSDimitry Andric if (!AddArg(SI->getPointerOperand())) 531bdd1243dSDimitry Andric return false; 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric // Instruction to lift before P. 534e8d8bef9SDimitry Andric SmallVector<Instruction *, 8> ToLift{SI}; 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric // Memory locations of lifted instructions. 5370b57cec5SDimitry Andric SmallVector<MemoryLocation, 8> MemLocs{StoreLoc}; 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric // Lifted calls. 5400b57cec5SDimitry Andric SmallVector<const CallBase *, 8> Calls; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric const MemoryLocation LoadLoc = MemoryLocation::get(LI); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { 5450b57cec5SDimitry Andric auto *C = &*I; 5460b57cec5SDimitry Andric 547e8d8bef9SDimitry Andric // Make sure hoisting does not perform a store that was not guaranteed to 548e8d8bef9SDimitry Andric // happen. 549e8d8bef9SDimitry Andric if (!isGuaranteedToTransferExecutionToSuccessor(C)) 550e8d8bef9SDimitry Andric return false; 551e8d8bef9SDimitry Andric 552bdd1243dSDimitry Andric bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt)); 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric bool NeedLift = false; 5550b57cec5SDimitry Andric if (Args.erase(C)) 5560b57cec5SDimitry Andric NeedLift = true; 5570b57cec5SDimitry Andric else if (MayAlias) { 558e8d8bef9SDimitry Andric NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { 559e8d8bef9SDimitry Andric return isModOrRefSet(AA->getModRefInfo(C, ML)); 5600b57cec5SDimitry Andric }); 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric if (!NeedLift) 563e8d8bef9SDimitry Andric NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { 564e8d8bef9SDimitry Andric return isModOrRefSet(AA->getModRefInfo(C, Call)); 5650b57cec5SDimitry Andric }); 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric if (!NeedLift) 5690b57cec5SDimitry Andric continue; 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric if (MayAlias) { 5720b57cec5SDimitry Andric // Since LI is implicitly moved downwards past the lifted instructions, 5730b57cec5SDimitry Andric // none of them may modify its source. 574e8d8bef9SDimitry Andric if (isModSet(AA->getModRefInfo(C, LoadLoc))) 5750b57cec5SDimitry Andric return false; 5760b57cec5SDimitry Andric else if (const auto *Call = dyn_cast<CallBase>(C)) { 5770b57cec5SDimitry Andric // If we can't lift this before P, it's game over. 578e8d8bef9SDimitry Andric if (isModOrRefSet(AA->getModRefInfo(P, Call))) 5790b57cec5SDimitry Andric return false; 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric Calls.push_back(Call); 5820b57cec5SDimitry Andric } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) { 5830b57cec5SDimitry Andric // If we can't lift this before P, it's game over. 5840b57cec5SDimitry Andric auto ML = MemoryLocation::get(C); 585e8d8bef9SDimitry Andric if (isModOrRefSet(AA->getModRefInfo(P, ML))) 5860b57cec5SDimitry Andric return false; 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric MemLocs.push_back(ML); 5890b57cec5SDimitry Andric } else 5900b57cec5SDimitry Andric // We don't know how to lift this instruction. 5910b57cec5SDimitry Andric return false; 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric ToLift.push_back(C); 595bdd1243dSDimitry Andric for (Value *Op : C->operands()) 596bdd1243dSDimitry Andric if (!AddArg(Op)) 597bdd1243dSDimitry Andric return false; 598c14a5a88SDimitry Andric } 5990b57cec5SDimitry Andric 600e8d8bef9SDimitry Andric // Find MSSA insertion point. Normally P will always have a corresponding 601e8d8bef9SDimitry Andric // memory access before which we can insert. However, with non-standard AA 602e8d8bef9SDimitry Andric // pipelines, there may be a mismatch between AA and MSSA, in which case we 603e8d8bef9SDimitry Andric // will scan for a memory access before P. In either case, we know for sure 604e8d8bef9SDimitry Andric // that at least the load will have a memory access. 605e8d8bef9SDimitry Andric // TODO: Simplify this once P will be determined by MSSA, in which case the 606e8d8bef9SDimitry Andric // discrepancy can no longer occur. 607e8d8bef9SDimitry Andric MemoryUseOrDef *MemInsertPoint = nullptr; 608e8d8bef9SDimitry Andric if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { 609e8d8bef9SDimitry Andric MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); 610e8d8bef9SDimitry Andric } else { 611e8d8bef9SDimitry Andric const Instruction *ConstP = P; 612e8d8bef9SDimitry Andric for (const Instruction &I : make_range(++ConstP->getReverseIterator(), 613e8d8bef9SDimitry Andric ++LI->getReverseIterator())) { 614e8d8bef9SDimitry Andric if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { 615e8d8bef9SDimitry Andric MemInsertPoint = MA; 616e8d8bef9SDimitry Andric break; 617e8d8bef9SDimitry Andric } 618e8d8bef9SDimitry Andric } 619e8d8bef9SDimitry Andric } 620e8d8bef9SDimitry Andric 621e8d8bef9SDimitry Andric // We made it, we need to lift. 6220b57cec5SDimitry Andric for (auto *I : llvm::reverse(ToLift)) { 6230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); 6240b57cec5SDimitry Andric I->moveBefore(P); 625e8d8bef9SDimitry Andric assert(MemInsertPoint && "Must have found insert point"); 626e8d8bef9SDimitry Andric if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { 627e8d8bef9SDimitry Andric MSSAU->moveAfter(MA, MemInsertPoint); 628e8d8bef9SDimitry Andric MemInsertPoint = MA; 629e8d8bef9SDimitry Andric } 630e8d8bef9SDimitry Andric } 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric return true; 6330b57cec5SDimitry Andric } 6340b57cec5SDimitry Andric 635bdd1243dSDimitry Andric bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, 636bdd1243dSDimitry Andric const DataLayout &DL, 637bdd1243dSDimitry Andric BasicBlock::iterator &BBI) { 638*0fca6ea1SDimitry Andric if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent()) 6390b57cec5SDimitry Andric return false; 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric auto *T = LI->getType(); 642349cc55cSDimitry Andric // Don't introduce calls to memcpy/memmove intrinsics out of thin air if 643349cc55cSDimitry Andric // the corresponding libcalls are not available. 644349cc55cSDimitry Andric // TODO: We should really distinguish between libcall availability and 645349cc55cSDimitry Andric // our ability to introduce intrinsics. 646349cc55cSDimitry Andric if (T->isAggregateType() && 647349cc55cSDimitry Andric (EnableMemCpyOptWithoutLibcalls || 648349cc55cSDimitry Andric (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) { 6490b57cec5SDimitry Andric MemoryLocation LoadLoc = MemoryLocation::get(LI); 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric // We use alias analysis to check if an instruction may store to 6520b57cec5SDimitry Andric // the memory we load from in between the load and the store. If 6530b57cec5SDimitry Andric // such an instruction is found, we try to promote there instead 6540b57cec5SDimitry Andric // of at the store position. 655e8d8bef9SDimitry Andric // TODO: Can use MSSA for this. 6560b57cec5SDimitry Andric Instruction *P = SI; 6570b57cec5SDimitry Andric for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { 658e8d8bef9SDimitry Andric if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { 6590b57cec5SDimitry Andric P = &I; 6600b57cec5SDimitry Andric break; 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric } 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andric // We found an instruction that may write to the loaded memory. 6650b57cec5SDimitry Andric // We can try to promote at this position instead of the store 666fe6060f1SDimitry Andric // position if nothing aliases the store memory after this and the store 6670b57cec5SDimitry Andric // destination is not in the range. 6680b57cec5SDimitry Andric if (P && P != SI) { 669e8d8bef9SDimitry Andric if (!moveUp(SI, P, LI)) 6700b57cec5SDimitry Andric P = nullptr; 6710b57cec5SDimitry Andric } 6720b57cec5SDimitry Andric 6730b57cec5SDimitry Andric // If a valid insertion position is found, then we can promote 6740b57cec5SDimitry Andric // the load/store pair to a memcpy. 6750b57cec5SDimitry Andric if (P) { 6760b57cec5SDimitry Andric // If we load from memory that may alias the memory we store to, 6770b57cec5SDimitry Andric // memmove must be used to preserve semantic. If not, memcpy can 678349cc55cSDimitry Andric // be used. Also, if we load from constant memory, memcpy can be used 679349cc55cSDimitry Andric // as the constant memory won't be modified. 6800b57cec5SDimitry Andric bool UseMemMove = false; 681349cc55cSDimitry Andric if (isModSet(AA->getModRefInfo(SI, LoadLoc))) 6820b57cec5SDimitry Andric UseMemMove = true; 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric IRBuilder<> Builder(P); 685*0fca6ea1SDimitry Andric Value *Size = 686*0fca6ea1SDimitry Andric Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T)); 6870b57cec5SDimitry Andric Instruction *M; 6880b57cec5SDimitry Andric if (UseMemMove) 689*0fca6ea1SDimitry Andric M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(), 690*0fca6ea1SDimitry Andric LI->getPointerOperand(), LI->getAlign(), 691*0fca6ea1SDimitry Andric Size); 6920b57cec5SDimitry Andric else 693*0fca6ea1SDimitry Andric M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(), 6945ffd83dbSDimitry Andric LI->getPointerOperand(), LI->getAlign(), Size); 695bdd1243dSDimitry Andric M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); 6960b57cec5SDimitry Andric 697*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M 698*0fca6ea1SDimitry Andric << "\n"); 6990b57cec5SDimitry Andric 700e8d8bef9SDimitry Andric auto *LastDef = 701e8d8bef9SDimitry Andric cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); 7025f757f3fSDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef); 703e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 704e8d8bef9SDimitry Andric 705e8d8bef9SDimitry Andric eraseInstruction(SI); 706e8d8bef9SDimitry Andric eraseInstruction(LI); 7070b57cec5SDimitry Andric ++NumMemCpyInstr; 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric // Make sure we do not invalidate the iterator. 7100b57cec5SDimitry Andric BBI = M->getIterator(); 7110b57cec5SDimitry Andric return true; 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric // Detect cases where we're performing call slot forwarding, but 7160b57cec5SDimitry Andric // happen to be using a load-store pair to implement it, rather than 7170b57cec5SDimitry Andric // a memcpy. 718bdd1243dSDimitry Andric BatchAAResults BAA(*AA); 71981ad6265SDimitry Andric auto GetCall = [&]() -> CallInst * { 72081ad6265SDimitry Andric // We defer this expensive clobber walk until the cheap checks 72181ad6265SDimitry Andric // have been done on the source inside performCallSlotOptzn. 722e8d8bef9SDimitry Andric if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( 723bdd1243dSDimitry Andric MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA))) 72481ad6265SDimitry Andric return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); 72581ad6265SDimitry Andric return nullptr; 72681ad6265SDimitry Andric }; 7270b57cec5SDimitry Andric 728bdd1243dSDimitry Andric bool Changed = performCallSlotOptzn( 729e8d8bef9SDimitry Andric LI, SI, SI->getPointerOperand()->stripPointerCasts(), 7300b57cec5SDimitry Andric LI->getPointerOperand()->stripPointerCasts(), 7310b57cec5SDimitry Andric DL.getTypeStoreSize(SI->getOperand(0)->getType()), 732bdd1243dSDimitry Andric std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall); 733bdd1243dSDimitry Andric if (Changed) { 734e8d8bef9SDimitry Andric eraseInstruction(SI); 735e8d8bef9SDimitry Andric eraseInstruction(LI); 7360b57cec5SDimitry Andric ++NumMemCpyInstr; 7370b57cec5SDimitry Andric return true; 7380b57cec5SDimitry Andric } 739bdd1243dSDimitry Andric 7405f757f3fSDimitry Andric // If this is a load-store pair from a stack slot to a stack slot, we 7415f757f3fSDimitry Andric // might be able to perform the stack-move optimization just as we do for 7425f757f3fSDimitry Andric // memcpys from an alloca to an alloca. 7435f757f3fSDimitry Andric if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) { 7445f757f3fSDimitry Andric if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) { 7455f757f3fSDimitry Andric if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, 7465f757f3fSDimitry Andric DL.getTypeStoreSize(T), BAA)) { 7475f757f3fSDimitry Andric // Avoid invalidating the iterator. 7485f757f3fSDimitry Andric BBI = SI->getNextNonDebugInstruction()->getIterator(); 7495f757f3fSDimitry Andric eraseInstruction(SI); 7505f757f3fSDimitry Andric eraseInstruction(LI); 7515f757f3fSDimitry Andric ++NumMemCpyInstr; 7525f757f3fSDimitry Andric return true; 7535f757f3fSDimitry Andric } 7545f757f3fSDimitry Andric } 7555f757f3fSDimitry Andric } 7565f757f3fSDimitry Andric 757bdd1243dSDimitry Andric return false; 7580b57cec5SDimitry Andric } 759bdd1243dSDimitry Andric 760bdd1243dSDimitry Andric bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { 761*0fca6ea1SDimitry Andric if (!SI->isSimple()) 762*0fca6ea1SDimitry Andric return false; 763bdd1243dSDimitry Andric 764bdd1243dSDimitry Andric // Avoid merging nontemporal stores since the resulting 765bdd1243dSDimitry Andric // memcpy/memset would not be able to preserve the nontemporal hint. 766bdd1243dSDimitry Andric // In theory we could teach how to propagate the !nontemporal metadata to 767bdd1243dSDimitry Andric // memset calls. However, that change would force the backend to 768bdd1243dSDimitry Andric // conservatively expand !nontemporal memset calls back to sequences of 769bdd1243dSDimitry Andric // store instructions (effectively undoing the merging). 770bdd1243dSDimitry Andric if (SI->getMetadata(LLVMContext::MD_nontemporal)) 771bdd1243dSDimitry Andric return false; 772bdd1243dSDimitry Andric 773*0fca6ea1SDimitry Andric const DataLayout &DL = SI->getDataLayout(); 774bdd1243dSDimitry Andric 775bdd1243dSDimitry Andric Value *StoredVal = SI->getValueOperand(); 776bdd1243dSDimitry Andric 777bdd1243dSDimitry Andric // Not all the transforms below are correct for non-integral pointers, bail 778bdd1243dSDimitry Andric // until we've audited the individual pieces. 779bdd1243dSDimitry Andric if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) 780bdd1243dSDimitry Andric return false; 781bdd1243dSDimitry Andric 782bdd1243dSDimitry Andric // Load to store forwarding can be interpreted as memcpy. 783bdd1243dSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(StoredVal)) 784bdd1243dSDimitry Andric return processStoreOfLoad(SI, LI, DL, BBI); 7850b57cec5SDimitry Andric 786349cc55cSDimitry Andric // The following code creates memset intrinsics out of thin air. Don't do 787349cc55cSDimitry Andric // this if the corresponding libfunc is not available. 788349cc55cSDimitry Andric // TODO: We should really distinguish between libcall availability and 789349cc55cSDimitry Andric // our ability to introduce intrinsics. 790349cc55cSDimitry Andric if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls)) 791349cc55cSDimitry Andric return false; 792349cc55cSDimitry Andric 7930b57cec5SDimitry Andric // There are two cases that are interesting for this code to handle: memcpy 7940b57cec5SDimitry Andric // and memset. Right now we only handle memset. 7950b57cec5SDimitry Andric 7960b57cec5SDimitry Andric // Ensure that the value being stored is something that can be memset'able a 7970b57cec5SDimitry Andric // byte at a time like "0" or "-1" or any width, as well as things like 7980b57cec5SDimitry Andric // 0xA0A0A0A0 and 0.0. 7990b57cec5SDimitry Andric auto *V = SI->getOperand(0); 8000b57cec5SDimitry Andric if (Value *ByteVal = isBytewiseValue(V, DL)) { 801*0fca6ea1SDimitry Andric if (Instruction *I = 802*0fca6ea1SDimitry Andric tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { 8030b57cec5SDimitry Andric BBI = I->getIterator(); // Don't invalidate iterator. 8040b57cec5SDimitry Andric return true; 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric // If we have an aggregate, we try to promote it to memset regardless 8080b57cec5SDimitry Andric // of opportunity for merging as it can expose optimization opportunities 8090b57cec5SDimitry Andric // in subsequent passes. 8100b57cec5SDimitry Andric auto *T = V->getType(); 8110b57cec5SDimitry Andric if (T->isAggregateType()) { 8120b57cec5SDimitry Andric uint64_t Size = DL.getTypeStoreSize(T); 8130b57cec5SDimitry Andric IRBuilder<> Builder(SI); 8145ffd83dbSDimitry Andric auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, 8155ffd83dbSDimitry Andric SI->getAlign()); 816bdd1243dSDimitry Andric M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); 8190b57cec5SDimitry Andric 820349cc55cSDimitry Andric // The newly inserted memset is immediately overwritten by the original 821349cc55cSDimitry Andric // store, so we do not need to rename uses. 822349cc55cSDimitry Andric auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI)); 823*0fca6ea1SDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef); 824349cc55cSDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false); 825e8d8bef9SDimitry Andric 826e8d8bef9SDimitry Andric eraseInstruction(SI); 8270b57cec5SDimitry Andric NumMemSetInfer++; 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andric // Make sure we do not invalidate the iterator. 8300b57cec5SDimitry Andric BBI = M->getIterator(); 8310b57cec5SDimitry Andric return true; 8320b57cec5SDimitry Andric } 8330b57cec5SDimitry Andric } 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric return false; 8360b57cec5SDimitry Andric } 8370b57cec5SDimitry Andric 8380b57cec5SDimitry Andric bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { 8390b57cec5SDimitry Andric // See if there is another memset or store neighboring this memset which 8400b57cec5SDimitry Andric // allows us to widen out the memset to do a single larger store. 8410b57cec5SDimitry Andric if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) 842*0fca6ea1SDimitry Andric if (Instruction *I = 843*0fca6ea1SDimitry Andric tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) { 8440b57cec5SDimitry Andric BBI = I->getIterator(); // Don't invalidate iterator. 8450b57cec5SDimitry Andric return true; 8460b57cec5SDimitry Andric } 8470b57cec5SDimitry Andric return false; 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric 8500b57cec5SDimitry Andric /// Takes a memcpy and a call that it depends on, 8510b57cec5SDimitry Andric /// and checks for the possibility of a call slot optimization by having 8520b57cec5SDimitry Andric /// the call write its result directly into the destination of the memcpy. 853e8d8bef9SDimitry Andric bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, 854e8d8bef9SDimitry Andric Instruction *cpyStore, Value *cpyDest, 8558c6f6c0cSDimitry Andric Value *cpySrc, TypeSize cpySize, 856*0fca6ea1SDimitry Andric Align cpyDestAlign, 857*0fca6ea1SDimitry Andric BatchAAResults &BAA, 85881ad6265SDimitry Andric std::function<CallInst *()> GetC) { 8590b57cec5SDimitry Andric // The general transformation to keep in mind is 8600b57cec5SDimitry Andric // 8610b57cec5SDimitry Andric // call @func(..., src, ...) 8620b57cec5SDimitry Andric // memcpy(dest, src, ...) 8630b57cec5SDimitry Andric // 8640b57cec5SDimitry Andric // -> 8650b57cec5SDimitry Andric // 8660b57cec5SDimitry Andric // memcpy(dest, src, ...) 8670b57cec5SDimitry Andric // call @func(..., dest, ...) 8680b57cec5SDimitry Andric // 8690b57cec5SDimitry Andric // Since moving the memcpy is technically awkward, we additionally check that 8700b57cec5SDimitry Andric // src only holds uninitialized values at the moment of the call, meaning that 8710b57cec5SDimitry Andric // the memcpy can be discarded rather than moved. 8720b57cec5SDimitry Andric 8738c6f6c0cSDimitry Andric // We can't optimize scalable types. 8748c6f6c0cSDimitry Andric if (cpySize.isScalable()) 8758c6f6c0cSDimitry Andric return false; 8768c6f6c0cSDimitry Andric 8770b57cec5SDimitry Andric // Require that src be an alloca. This simplifies the reasoning considerably. 87804eeddc0SDimitry Andric auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc); 8790b57cec5SDimitry Andric if (!srcAlloca) 8800b57cec5SDimitry Andric return false; 8810b57cec5SDimitry Andric 8820b57cec5SDimitry Andric ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 8830b57cec5SDimitry Andric if (!srcArraySize) 8840b57cec5SDimitry Andric return false; 8850b57cec5SDimitry Andric 886*0fca6ea1SDimitry Andric const DataLayout &DL = cpyLoad->getDataLayout(); 8875f757f3fSDimitry Andric TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()); 8885f757f3fSDimitry Andric // We can't optimize scalable types. 8895f757f3fSDimitry Andric if (SrcAllocaSize.isScalable()) 8905f757f3fSDimitry Andric return false; 8915f757f3fSDimitry Andric uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue(); 8920b57cec5SDimitry Andric 8938c6f6c0cSDimitry Andric if (cpySize < srcSize) 8940b57cec5SDimitry Andric return false; 8950b57cec5SDimitry Andric 89681ad6265SDimitry Andric CallInst *C = GetC(); 89781ad6265SDimitry Andric if (!C) 89881ad6265SDimitry Andric return false; 89981ad6265SDimitry Andric 90081ad6265SDimitry Andric // Lifetime marks shouldn't be operated on. 90181ad6265SDimitry Andric if (Function *F = C->getCalledFunction()) 90281ad6265SDimitry Andric if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) 90381ad6265SDimitry Andric return false; 90481ad6265SDimitry Andric 90581ad6265SDimitry Andric if (C->getParent() != cpyStore->getParent()) { 90681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n"); 90781ad6265SDimitry Andric return false; 90881ad6265SDimitry Andric } 90981ad6265SDimitry Andric 910*0fca6ea1SDimitry Andric MemoryLocation DestLoc = 911*0fca6ea1SDimitry Andric isa<StoreInst>(cpyStore) 912*0fca6ea1SDimitry Andric ? MemoryLocation::get(cpyStore) 913*0fca6ea1SDimitry Andric : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore)); 91481ad6265SDimitry Andric 91581ad6265SDimitry Andric // Check that nothing touches the dest of the copy between 91681ad6265SDimitry Andric // the call and the store/memcpy. 917bdd1243dSDimitry Andric Instruction *SkippedLifetimeStart = nullptr; 918bdd1243dSDimitry Andric if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C), 919bdd1243dSDimitry Andric MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) { 92081ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n"); 92181ad6265SDimitry Andric return false; 92281ad6265SDimitry Andric } 92381ad6265SDimitry Andric 924bdd1243dSDimitry Andric // If we need to move a lifetime.start above the call, make sure that we can 925bdd1243dSDimitry Andric // actually do so. If the argument is bitcasted for example, we would have to 926bdd1243dSDimitry Andric // move the bitcast as well, which we don't handle. 927bdd1243dSDimitry Andric if (SkippedLifetimeStart) { 928bdd1243dSDimitry Andric auto *LifetimeArg = 929bdd1243dSDimitry Andric dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1)); 930bdd1243dSDimitry Andric if (LifetimeArg && LifetimeArg->getParent() == C->getParent() && 931bdd1243dSDimitry Andric C->comesBefore(LifetimeArg)) 932bdd1243dSDimitry Andric return false; 933bdd1243dSDimitry Andric } 934bdd1243dSDimitry Andric 9355f757f3fSDimitry Andric // Check that storing to the first srcSize bytes of dest will not cause a 9365f757f3fSDimitry Andric // trap or data race. 9375f757f3fSDimitry Andric bool ExplicitlyDereferenceableOnly; 9385f757f3fSDimitry Andric if (!isWritableObject(getUnderlyingObject(cpyDest), 9395f757f3fSDimitry Andric ExplicitlyDereferenceableOnly) || 9405f757f3fSDimitry Andric !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize), 941bdd1243dSDimitry Andric DL, C, AC, DT)) { 94204eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n"); 9430b57cec5SDimitry Andric return false; 94404eeddc0SDimitry Andric } 9450b57cec5SDimitry Andric 946e8d8bef9SDimitry Andric // Make sure that nothing can observe cpyDest being written early. There are 947e8d8bef9SDimitry Andric // a number of cases to consider: 948e8d8bef9SDimitry Andric // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of 949e8d8bef9SDimitry Andric // the transform. 950e8d8bef9SDimitry Andric // 2. C itself may not access cpyDest (prior to the transform). This is 951e8d8bef9SDimitry Andric // checked further below. 952e8d8bef9SDimitry Andric // 3. If cpyDest is accessible to the caller of this function (potentially 953e8d8bef9SDimitry Andric // captured and not based on an alloca), we need to ensure that we cannot 954e8d8bef9SDimitry Andric // unwind between C and cpyStore. This is checked here. 955e8d8bef9SDimitry Andric // 4. If cpyDest is potentially captured, there may be accesses to it from 956e8d8bef9SDimitry Andric // another thread. In this case, we need to check that cpyStore is 957e8d8bef9SDimitry Andric // guaranteed to be executed if C is. As it is a non-atomic access, it 958e8d8bef9SDimitry Andric // renders accesses from other threads undefined. 959e8d8bef9SDimitry Andric // TODO: This is currently not checked. 96004eeddc0SDimitry Andric if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) { 961bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n"); 9620b57cec5SDimitry Andric return false; 96304eeddc0SDimitry Andric } 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andric // Check that dest points to memory that is at least as aligned as src. 9665ffd83dbSDimitry Andric Align srcAlign = srcAlloca->getAlign(); 967bdd1243dSDimitry Andric bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign; 9680b57cec5SDimitry Andric // If dest is not aligned enough and we can't increase its alignment then 9690b57cec5SDimitry Andric // bail out. 970bdd1243dSDimitry Andric if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) { 971bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n"); 9720b57cec5SDimitry Andric return false; 973bdd1243dSDimitry Andric } 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric // Check that src is not accessed except via the call and the memcpy. This 9760b57cec5SDimitry Andric // guarantees that it holds only undefined values when passed in (so the final 9770b57cec5SDimitry Andric // memcpy can be dropped), that it is not read or written between the call and 9780b57cec5SDimitry Andric // the memcpy, and that writing beyond the end of it is undefined. 979e8d8bef9SDimitry Andric SmallVector<User *, 8> srcUseList(srcAlloca->users()); 9800b57cec5SDimitry Andric while (!srcUseList.empty()) { 9810b57cec5SDimitry Andric User *U = srcUseList.pop_back_val(); 9820b57cec5SDimitry Andric 9830b57cec5SDimitry Andric if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { 984e8d8bef9SDimitry Andric append_range(srcUseList, U->users()); 9850b57cec5SDimitry Andric continue; 9860b57cec5SDimitry Andric } 987*0fca6ea1SDimitry Andric if (const auto *G = dyn_cast<GetElementPtrInst>(U); 988*0fca6ea1SDimitry Andric G && G->hasAllZeroIndices()) { 989e8d8bef9SDimitry Andric append_range(srcUseList, U->users()); 9900b57cec5SDimitry Andric continue; 9910b57cec5SDimitry Andric } 99204eeddc0SDimitry Andric if (const auto *IT = dyn_cast<IntrinsicInst>(U)) 9930b57cec5SDimitry Andric if (IT->isLifetimeStartOrEnd()) 9940b57cec5SDimitry Andric continue; 9950b57cec5SDimitry Andric 996*0fca6ea1SDimitry Andric if (U != C && U != cpyLoad) { 997*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n"); 9980b57cec5SDimitry Andric return false; 9990b57cec5SDimitry Andric } 1000*0fca6ea1SDimitry Andric } 10010b57cec5SDimitry Andric 100204eeddc0SDimitry Andric // Check whether src is captured by the called function, in which case there 100304eeddc0SDimitry Andric // may be further indirect uses of src. 100404eeddc0SDimitry Andric bool SrcIsCaptured = any_of(C->args(), [&](Use &U) { 100504eeddc0SDimitry Andric return U->stripPointerCasts() == cpySrc && 100604eeddc0SDimitry Andric !C->doesNotCapture(C->getArgOperandNo(&U)); 100704eeddc0SDimitry Andric }); 100804eeddc0SDimitry Andric 100904eeddc0SDimitry Andric // If src is captured, then check whether there are any potential uses of 101004eeddc0SDimitry Andric // src through the captured pointer before the lifetime of src ends, either 101104eeddc0SDimitry Andric // due to a lifetime.end or a return from the function. 101204eeddc0SDimitry Andric if (SrcIsCaptured) { 101304eeddc0SDimitry Andric // Check that dest is not captured before/at the call. We have already 101404eeddc0SDimitry Andric // checked that src is not captured before it. If either had been captured, 101504eeddc0SDimitry Andric // then the call might be comparing the argument against the captured dest 101604eeddc0SDimitry Andric // or src pointer. 101704eeddc0SDimitry Andric Value *DestObj = getUnderlyingObject(cpyDest); 101804eeddc0SDimitry Andric if (!isIdentifiedFunctionLocal(DestObj) || 101904eeddc0SDimitry Andric PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true, 102004eeddc0SDimitry Andric /* StoreCaptures */ true, C, DT, 102104eeddc0SDimitry Andric /* IncludeI */ true)) 10220b57cec5SDimitry Andric return false; 10230b57cec5SDimitry Andric 102404eeddc0SDimitry Andric MemoryLocation SrcLoc = 102504eeddc0SDimitry Andric MemoryLocation(srcAlloca, LocationSize::precise(srcSize)); 102604eeddc0SDimitry Andric for (Instruction &I : 102704eeddc0SDimitry Andric make_range(++C->getIterator(), C->getParent()->end())) { 102804eeddc0SDimitry Andric // Lifetime of srcAlloca ends at lifetime.end. 102904eeddc0SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 103004eeddc0SDimitry Andric if (II->getIntrinsicID() == Intrinsic::lifetime_end && 103104eeddc0SDimitry Andric II->getArgOperand(1)->stripPointerCasts() == srcAlloca && 103204eeddc0SDimitry Andric cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize)) 103304eeddc0SDimitry Andric break; 103404eeddc0SDimitry Andric } 103504eeddc0SDimitry Andric 103604eeddc0SDimitry Andric // Lifetime of srcAlloca ends at return. 103704eeddc0SDimitry Andric if (isa<ReturnInst>(&I)) 103804eeddc0SDimitry Andric break; 103904eeddc0SDimitry Andric 104004eeddc0SDimitry Andric // Ignore the direct read of src in the load. 104104eeddc0SDimitry Andric if (&I == cpyLoad) 104204eeddc0SDimitry Andric continue; 104304eeddc0SDimitry Andric 104404eeddc0SDimitry Andric // Check whether this instruction may mod/ref src through the captured 104504eeddc0SDimitry Andric // pointer (we have already any direct mod/refs in the loop above). 104604eeddc0SDimitry Andric // Also bail if we hit a terminator, as we don't want to scan into other 104704eeddc0SDimitry Andric // blocks. 1048bdd1243dSDimitry Andric if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator()) 104904eeddc0SDimitry Andric return false; 105004eeddc0SDimitry Andric } 105104eeddc0SDimitry Andric } 105204eeddc0SDimitry Andric 10530b57cec5SDimitry Andric // Since we're changing the parameter to the callsite, we need to make sure 10540b57cec5SDimitry Andric // that what would be the new parameter dominates the callsite. 10555f757f3fSDimitry Andric bool NeedMoveGEP = false; 1056e8d8bef9SDimitry Andric if (!DT->dominates(cpyDest, C)) { 1057e8d8bef9SDimitry Andric // Support moving a constant index GEP before the call. 1058e8d8bef9SDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); 1059e8d8bef9SDimitry Andric if (GEP && GEP->hasAllConstantIndices() && 1060e8d8bef9SDimitry Andric DT->dominates(GEP->getPointerOperand(), C)) 10615f757f3fSDimitry Andric NeedMoveGEP = true; 1062e8d8bef9SDimitry Andric else 10630b57cec5SDimitry Andric return false; 1064e8d8bef9SDimitry Andric } 10650b57cec5SDimitry Andric 10660b57cec5SDimitry Andric // In addition to knowing that the call does not access src in some 10670b57cec5SDimitry Andric // unexpected manner, for example via a global, which we deduce from 10680b57cec5SDimitry Andric // the use analysis, we also need to know that it does not sneakily 10690b57cec5SDimitry Andric // access dest. We rely on AA to figure this out for us. 1070bdd1243dSDimitry Andric MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize)); 1071bdd1243dSDimitry Andric ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize); 10720b57cec5SDimitry Andric // If necessary, perform additional analysis. 10730b57cec5SDimitry Andric if (isModOrRefSet(MR)) 1074bdd1243dSDimitry Andric MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT); 10750b57cec5SDimitry Andric if (isModOrRefSet(MR)) 10760b57cec5SDimitry Andric return false; 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric // We can't create address space casts here because we don't know if they're 10790b57cec5SDimitry Andric // safe for the target. 10805f757f3fSDimitry Andric if (cpySrc->getType() != cpyDest->getType()) 10810b57cec5SDimitry Andric return false; 10825ffd83dbSDimitry Andric for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI) 10835ffd83dbSDimitry Andric if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc && 10845f757f3fSDimitry Andric cpySrc->getType() != C->getArgOperand(ArgI)->getType()) 10850b57cec5SDimitry Andric return false; 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric // All the checks have passed, so do the transformation. 10880b57cec5SDimitry Andric bool changedArgument = false; 10895ffd83dbSDimitry Andric for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI) 10905ffd83dbSDimitry Andric if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) { 10910b57cec5SDimitry Andric changedArgument = true; 10925f757f3fSDimitry Andric C->setArgOperand(ArgI, cpyDest); 10930b57cec5SDimitry Andric } 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric if (!changedArgument) 10960b57cec5SDimitry Andric return false; 10970b57cec5SDimitry Andric 10980b57cec5SDimitry Andric // If the destination wasn't sufficiently aligned then increase its alignment. 10990b57cec5SDimitry Andric if (!isDestSufficientlyAligned) { 11000b57cec5SDimitry Andric assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"); 11015ffd83dbSDimitry Andric cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11045f757f3fSDimitry Andric if (NeedMoveGEP) { 11055f757f3fSDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); 11065f757f3fSDimitry Andric GEP->moveBefore(C); 11075f757f3fSDimitry Andric } 11085f757f3fSDimitry Andric 1109bdd1243dSDimitry Andric if (SkippedLifetimeStart) { 1110bdd1243dSDimitry Andric SkippedLifetimeStart->moveBefore(C); 1111bdd1243dSDimitry Andric MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart), 1112bdd1243dSDimitry Andric MSSA->getMemoryAccess(C)); 1113bdd1243dSDimitry Andric } 1114bdd1243dSDimitry Andric 11154542f901SDimitry Andric combineAAMetadata(C, cpyLoad); 111604eeddc0SDimitry Andric if (cpyLoad != cpyStore) 11174542f901SDimitry Andric combineAAMetadata(C, cpyStore); 11180b57cec5SDimitry Andric 1119e8d8bef9SDimitry Andric ++NumCallSlot; 11200b57cec5SDimitry Andric return true; 11210b57cec5SDimitry Andric } 11220b57cec5SDimitry Andric 11230b57cec5SDimitry Andric /// We've found that the (upward scanning) memory dependence of memcpy 'M' is 11240b57cec5SDimitry Andric /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can. 11250b57cec5SDimitry Andric bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, 1126bdd1243dSDimitry Andric MemCpyInst *MDep, 1127bdd1243dSDimitry Andric BatchAAResults &BAA) { 11280b57cec5SDimitry Andric // If dep instruction is reading from our current input, then it is a noop 11290b57cec5SDimitry Andric // transfer and substituting the input won't change this instruction. Just 11300b57cec5SDimitry Andric // ignore the input and let someone else zap MDep. This handles cases like: 11310b57cec5SDimitry Andric // memcpy(a <- a) 11320b57cec5SDimitry Andric // memcpy(b <- a) 11330b57cec5SDimitry Andric if (M->getSource() == MDep->getSource()) 11340b57cec5SDimitry Andric return false; 11350b57cec5SDimitry Andric 1136*0fca6ea1SDimitry Andric // We can only optimize non-volatile memcpy's. 1137*0fca6ea1SDimitry Andric if (MDep->isVolatile()) 1138*0fca6ea1SDimitry Andric return false; 1139*0fca6ea1SDimitry Andric 1140*0fca6ea1SDimitry Andric int64_t MForwardOffset = 0; 1141*0fca6ea1SDimitry Andric const DataLayout &DL = M->getModule()->getDataLayout(); 1142*0fca6ea1SDimitry Andric // We can only transforms memcpy's where the dest of one is the source of the 1143*0fca6ea1SDimitry Andric // other, or they have an offset in a range. 1144*0fca6ea1SDimitry Andric if (M->getSource() != MDep->getDest()) { 1145*0fca6ea1SDimitry Andric std::optional<int64_t> Offset = 1146*0fca6ea1SDimitry Andric M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL); 1147*0fca6ea1SDimitry Andric if (!Offset || *Offset < 0) 1148*0fca6ea1SDimitry Andric return false; 1149*0fca6ea1SDimitry Andric MForwardOffset = *Offset; 1150*0fca6ea1SDimitry Andric } 1151*0fca6ea1SDimitry Andric 1152*0fca6ea1SDimitry Andric // The length of the memcpy's must be the same, or the preceding one 11530b57cec5SDimitry Andric // must be larger than the following one. 1154*0fca6ea1SDimitry Andric if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) { 115504eeddc0SDimitry Andric auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 115604eeddc0SDimitry Andric auto *MLen = dyn_cast<ConstantInt>(M->getLength()); 1157*0fca6ea1SDimitry Andric if (!MDepLen || !MLen || 1158*0fca6ea1SDimitry Andric MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) 11590b57cec5SDimitry Andric return false; 1160fe6060f1SDimitry Andric } 11610b57cec5SDimitry Andric 1162*0fca6ea1SDimitry Andric IRBuilder<> Builder(M); 1163*0fca6ea1SDimitry Andric auto *CopySource = MDep->getSource(); 1164*0fca6ea1SDimitry Andric Instruction *NewCopySource = nullptr; 1165*0fca6ea1SDimitry Andric auto CleanupOnRet = llvm::make_scope_exit([&NewCopySource] { 1166*0fca6ea1SDimitry Andric if (NewCopySource && NewCopySource->use_empty()) 1167*0fca6ea1SDimitry Andric // Safety: It's safe here because we will only allocate more instructions 1168*0fca6ea1SDimitry Andric // after finishing all BatchAA queries, but we have to be careful if we 1169*0fca6ea1SDimitry Andric // want to do something like this in another place. Then we'd probably 1170*0fca6ea1SDimitry Andric // have to delay instruction removal until all transforms on an 1171*0fca6ea1SDimitry Andric // instruction finished. 1172*0fca6ea1SDimitry Andric NewCopySource->eraseFromParent(); 1173*0fca6ea1SDimitry Andric }); 1174*0fca6ea1SDimitry Andric MaybeAlign CopySourceAlign = MDep->getSourceAlign(); 1175*0fca6ea1SDimitry Andric // We just need to calculate the actual size of the copy. 1176*0fca6ea1SDimitry Andric auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize( 1177*0fca6ea1SDimitry Andric MemoryLocation::getForSource(M).Size); 1178*0fca6ea1SDimitry Andric 1179*0fca6ea1SDimitry Andric // When the forwarding offset is greater than 0, we transform 1180*0fca6ea1SDimitry Andric // memcpy(d1 <- s1) 1181*0fca6ea1SDimitry Andric // memcpy(d2 <- d1+o) 1182*0fca6ea1SDimitry Andric // to 1183*0fca6ea1SDimitry Andric // memcpy(d2 <- s1+o) 1184*0fca6ea1SDimitry Andric if (MForwardOffset > 0) { 1185*0fca6ea1SDimitry Andric // The copy destination of `M` maybe can serve as the source of copying. 1186*0fca6ea1SDimitry Andric std::optional<int64_t> MDestOffset = 1187*0fca6ea1SDimitry Andric M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL); 1188*0fca6ea1SDimitry Andric if (MDestOffset == MForwardOffset) 1189*0fca6ea1SDimitry Andric CopySource = M->getDest(); 1190*0fca6ea1SDimitry Andric else { 1191*0fca6ea1SDimitry Andric CopySource = Builder.CreateInBoundsPtrAdd( 1192*0fca6ea1SDimitry Andric CopySource, Builder.getInt64(MForwardOffset)); 1193*0fca6ea1SDimitry Andric NewCopySource = dyn_cast<Instruction>(CopySource); 1194*0fca6ea1SDimitry Andric } 1195*0fca6ea1SDimitry Andric // We need to update `MCopyLoc` if an offset exists. 1196*0fca6ea1SDimitry Andric MCopyLoc = MCopyLoc.getWithNewPtr(CopySource); 1197*0fca6ea1SDimitry Andric if (CopySourceAlign) 1198*0fca6ea1SDimitry Andric CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset); 1199*0fca6ea1SDimitry Andric } 1200*0fca6ea1SDimitry Andric 12010b57cec5SDimitry Andric // Verify that the copied-from memory doesn't change in between the two 12020b57cec5SDimitry Andric // transfers. For example, in: 12030b57cec5SDimitry Andric // memcpy(a <- b) 12040b57cec5SDimitry Andric // *b = 42; 12050b57cec5SDimitry Andric // memcpy(c <- a) 12060b57cec5SDimitry Andric // It would be invalid to transform the second memcpy into memcpy(c <- b). 12070b57cec5SDimitry Andric // 12080b57cec5SDimitry Andric // TODO: If the code between M and MDep is transparent to the destination "c", 12090b57cec5SDimitry Andric // then we could still perform the xform by moving M up to the first memcpy. 1210*0fca6ea1SDimitry Andric if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep), 1211*0fca6ea1SDimitry Andric MSSA->getMemoryAccess(M))) 1212e8d8bef9SDimitry Andric return false; 12130b57cec5SDimitry Andric 1214*0fca6ea1SDimitry Andric // No need to create `memcpy(a <- a)`. 1215*0fca6ea1SDimitry Andric if (BAA.isMustAlias(M->getDest(), CopySource)) { 1216*0fca6ea1SDimitry Andric // Remove the instruction we're replacing. 1217*0fca6ea1SDimitry Andric eraseInstruction(M); 1218*0fca6ea1SDimitry Andric ++NumMemCpyInstr; 1219*0fca6ea1SDimitry Andric return true; 1220*0fca6ea1SDimitry Andric } 1221*0fca6ea1SDimitry Andric 12220b57cec5SDimitry Andric // If the dest of the second might alias the source of the first, then the 1223349cc55cSDimitry Andric // source and dest might overlap. In addition, if the source of the first 1224349cc55cSDimitry Andric // points to constant memory, they won't overlap by definition. Otherwise, we 1225349cc55cSDimitry Andric // still want to eliminate the intermediate value, but we have to generate a 1226349cc55cSDimitry Andric // memmove instead of memcpy. 12270b57cec5SDimitry Andric bool UseMemMove = false; 122806c3fb27SDimitry Andric if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) { 122906c3fb27SDimitry Andric // Don't convert llvm.memcpy.inline into memmove because memmove can be 123006c3fb27SDimitry Andric // lowered as a call, and that is not allowed for llvm.memcpy.inline (and 123106c3fb27SDimitry Andric // there is no inline version of llvm.memmove) 123206c3fb27SDimitry Andric if (isa<MemCpyInlineInst>(M)) 123306c3fb27SDimitry Andric return false; 12340b57cec5SDimitry Andric UseMemMove = true; 123506c3fb27SDimitry Andric } 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric // If all checks passed, then we can transform M. 12380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n" 1239*0fca6ea1SDimitry Andric << *MDep << '\n' 1240*0fca6ea1SDimitry Andric << *M << '\n'); 12410b57cec5SDimitry Andric 12420b57cec5SDimitry Andric // TODO: Is this worth it if we're creating a less aligned memcpy? For 12430b57cec5SDimitry Andric // example we could be moving from movaps -> movq on x86. 1244e8d8bef9SDimitry Andric Instruction *NewM; 12450b57cec5SDimitry Andric if (UseMemMove) 1246*0fca6ea1SDimitry Andric NewM = 1247*0fca6ea1SDimitry Andric Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource, 1248*0fca6ea1SDimitry Andric CopySourceAlign, M->getLength(), M->isVolatile()); 1249fe6060f1SDimitry Andric else if (isa<MemCpyInlineInst>(M)) { 1250fe6060f1SDimitry Andric // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is 1251fe6060f1SDimitry Andric // never allowed since that would allow the latter to be lowered as a call 1252fe6060f1SDimitry Andric // to an external function. 1253*0fca6ea1SDimitry Andric NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(), 1254*0fca6ea1SDimitry Andric CopySource, CopySourceAlign, 12550b57cec5SDimitry Andric M->getLength(), M->isVolatile()); 1256*0fca6ea1SDimitry Andric } else 1257*0fca6ea1SDimitry Andric NewM = 1258*0fca6ea1SDimitry Andric Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource, 1259*0fca6ea1SDimitry Andric CopySourceAlign, M->getLength(), M->isVolatile()); 1260bdd1243dSDimitry Andric NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID); 12610b57cec5SDimitry Andric 1262e8d8bef9SDimitry Andric assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))); 1263e8d8bef9SDimitry Andric auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); 12645f757f3fSDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); 1265e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1266e8d8bef9SDimitry Andric 12670b57cec5SDimitry Andric // Remove the instruction we're replacing. 1268e8d8bef9SDimitry Andric eraseInstruction(M); 12690b57cec5SDimitry Andric ++NumMemCpyInstr; 12700b57cec5SDimitry Andric return true; 12710b57cec5SDimitry Andric } 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric /// We've found that the (upward scanning) memory dependence of \p MemCpy is 12740b57cec5SDimitry Andric /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that 12750b57cec5SDimitry Andric /// weren't copied over by \p MemCpy. 12760b57cec5SDimitry Andric /// 12770b57cec5SDimitry Andric /// In other words, transform: 12780b57cec5SDimitry Andric /// \code 12790b57cec5SDimitry Andric /// memset(dst, c, dst_size); 128006c3fb27SDimitry Andric /// ... 12810b57cec5SDimitry Andric /// memcpy(dst, src, src_size); 12820b57cec5SDimitry Andric /// \endcode 12830b57cec5SDimitry Andric /// into: 12840b57cec5SDimitry Andric /// \code 128506c3fb27SDimitry Andric /// ... 12860b57cec5SDimitry Andric /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size); 128706c3fb27SDimitry Andric /// memcpy(dst, src, src_size); 12880b57cec5SDimitry Andric /// \endcode 128906c3fb27SDimitry Andric /// 129006c3fb27SDimitry Andric /// The memset is sunk to just before the memcpy to ensure that src_size is 129106c3fb27SDimitry Andric /// present when emitting the simplified memset. 12920b57cec5SDimitry Andric bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, 1293bdd1243dSDimitry Andric MemSetInst *MemSet, 1294bdd1243dSDimitry Andric BatchAAResults &BAA) { 12950b57cec5SDimitry Andric // We can only transform memset/memcpy with the same destination. 1296bdd1243dSDimitry Andric if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest())) 12970b57cec5SDimitry Andric return false; 12980b57cec5SDimitry Andric 1299*0fca6ea1SDimitry Andric // Don't perform the transform if src_size may be zero. In that case, the 1300*0fca6ea1SDimitry Andric // transform is essentially a complex no-op and may lead to an infinite 1301*0fca6ea1SDimitry Andric // loop if BasicAA is smart enough to understand that dst and dst + src_size 1302*0fca6ea1SDimitry Andric // are still MustAlias after the transform. 1303*0fca6ea1SDimitry Andric Value *SrcSize = MemCpy->getLength(); 1304*0fca6ea1SDimitry Andric if (!isKnownNonZero(SrcSize, 1305*0fca6ea1SDimitry Andric SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy))) 1306*0fca6ea1SDimitry Andric return false; 1307*0fca6ea1SDimitry Andric 1308e8d8bef9SDimitry Andric // Check that src and dst of the memcpy aren't the same. While memcpy 1309e8d8bef9SDimitry Andric // operands cannot partially overlap, exact equality is allowed. 1310bdd1243dSDimitry Andric if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy)))) 1311e8d8bef9SDimitry Andric return false; 1312e8d8bef9SDimitry Andric 1313e8d8bef9SDimitry Andric // We know that dst up to src_size is not written. We now need to make sure 1314e8d8bef9SDimitry Andric // that dst up to dst_size is not accessed. (If we did not move the memset, 1315e8d8bef9SDimitry Andric // checking for reads would be sufficient.) 1316bdd1243dSDimitry Andric if (accessedBetween(BAA, MemoryLocation::getForDest(MemSet), 1317e8d8bef9SDimitry Andric MSSA->getMemoryAccess(MemSet), 1318349cc55cSDimitry Andric MSSA->getMemoryAccess(MemCpy))) 1319e8d8bef9SDimitry Andric return false; 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric // Use the same i8* dest as the memcpy, killing the memset dest if different. 13220b57cec5SDimitry Andric Value *Dest = MemCpy->getRawDest(); 13230b57cec5SDimitry Andric Value *DestSize = MemSet->getLength(); 13240b57cec5SDimitry Andric 1325e8d8bef9SDimitry Andric if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) 1326e8d8bef9SDimitry Andric return false; 1327e8d8bef9SDimitry Andric 1328fe6060f1SDimitry Andric // If the sizes are the same, simply drop the memset instead of generating 1329fe6060f1SDimitry Andric // a replacement with zero size. 1330fe6060f1SDimitry Andric if (DestSize == SrcSize) { 1331fe6060f1SDimitry Andric eraseInstruction(MemSet); 1332fe6060f1SDimitry Andric return true; 1333fe6060f1SDimitry Andric } 1334fe6060f1SDimitry Andric 13350b57cec5SDimitry Andric // By default, create an unaligned memset. 133681ad6265SDimitry Andric Align Alignment = Align(1); 13370b57cec5SDimitry Andric // If Dest is aligned, and SrcSize is constant, use the minimum alignment 13380b57cec5SDimitry Andric // of the sum. 133981ad6265SDimitry Andric const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(), 134081ad6265SDimitry Andric MemCpy->getDestAlign().valueOrOne()); 13410b57cec5SDimitry Andric if (DestAlign > 1) 134204eeddc0SDimitry Andric if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) 134381ad6265SDimitry Andric Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue()); 13440b57cec5SDimitry Andric 13450b57cec5SDimitry Andric IRBuilder<> Builder(MemCpy); 13460b57cec5SDimitry Andric 134706c3fb27SDimitry Andric // Preserve the debug location of the old memset for the code emitted here 134806c3fb27SDimitry Andric // related to the new memset. This is correct according to the rules in 134906c3fb27SDimitry Andric // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an 135006c3fb27SDimitry Andric // instruction location", given that we move the memset within the basic 135106c3fb27SDimitry Andric // block. 135206c3fb27SDimitry Andric assert(MemSet->getParent() == MemCpy->getParent() && 135306c3fb27SDimitry Andric "Preserving debug location based on moving memset within BB."); 135406c3fb27SDimitry Andric Builder.SetCurrentDebugLocation(MemSet->getDebugLoc()); 135506c3fb27SDimitry Andric 13560b57cec5SDimitry Andric // If the sizes have different types, zext the smaller one. 13570b57cec5SDimitry Andric if (DestSize->getType() != SrcSize->getType()) { 13580b57cec5SDimitry Andric if (DestSize->getType()->getIntegerBitWidth() > 13590b57cec5SDimitry Andric SrcSize->getType()->getIntegerBitWidth()) 13600b57cec5SDimitry Andric SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType()); 13610b57cec5SDimitry Andric else 13620b57cec5SDimitry Andric DestSize = Builder.CreateZExt(DestSize, SrcSize->getType()); 13630b57cec5SDimitry Andric } 13640b57cec5SDimitry Andric 13650b57cec5SDimitry Andric Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize); 13660b57cec5SDimitry Andric Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); 13670b57cec5SDimitry Andric Value *MemsetLen = Builder.CreateSelect( 13680b57cec5SDimitry Andric Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); 13697a6dacacSDimitry Andric Instruction *NewMemSet = 13707a6dacacSDimitry Andric Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize), 137181ad6265SDimitry Andric MemSet->getOperand(1), MemsetLen, Alignment); 13720b57cec5SDimitry Andric 1373e8d8bef9SDimitry Andric assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && 1374e8d8bef9SDimitry Andric "MemCpy must be a MemoryDef"); 137506c3fb27SDimitry Andric // The new memset is inserted before the memcpy, and it is known that the 137606c3fb27SDimitry Andric // memcpy's defining access is the memset about to be removed. 1377e8d8bef9SDimitry Andric auto *LastDef = 1378e8d8bef9SDimitry Andric cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); 1379*0fca6ea1SDimitry Andric auto *NewAccess = 1380*0fca6ea1SDimitry Andric MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef); 1381e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1382e8d8bef9SDimitry Andric 1383e8d8bef9SDimitry Andric eraseInstruction(MemSet); 13840b57cec5SDimitry Andric return true; 13850b57cec5SDimitry Andric } 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric /// Determine whether the instruction has undefined content for the given Size, 13880b57cec5SDimitry Andric /// either because it was freshly alloca'd or started its lifetime. 1389bdd1243dSDimitry Andric static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, 1390fe6060f1SDimitry Andric MemoryDef *Def, Value *Size) { 1391e8d8bef9SDimitry Andric if (MSSA->isLiveOnEntryDef(Def)) 1392e8d8bef9SDimitry Andric return isa<AllocaInst>(getUnderlyingObject(V)); 1393e8d8bef9SDimitry Andric 139404eeddc0SDimitry Andric if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { 1395e8d8bef9SDimitry Andric if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 139604eeddc0SDimitry Andric auto *LTSize = cast<ConstantInt>(II->getArgOperand(0)); 1397fe6060f1SDimitry Andric 139804eeddc0SDimitry Andric if (auto *CSize = dyn_cast<ConstantInt>(Size)) { 1399bdd1243dSDimitry Andric if (AA.isMustAlias(V, II->getArgOperand(1)) && 1400fe6060f1SDimitry Andric LTSize->getZExtValue() >= CSize->getZExtValue()) 1401e8d8bef9SDimitry Andric return true; 1402e8d8bef9SDimitry Andric } 1403fe6060f1SDimitry Andric 1404fe6060f1SDimitry Andric // If the lifetime.start covers a whole alloca (as it almost always 1405fe6060f1SDimitry Andric // does) and we're querying a pointer based on that alloca, then we know 1406fe6060f1SDimitry Andric // the memory is definitely undef, regardless of how exactly we alias. 1407fe6060f1SDimitry Andric // The size also doesn't matter, as an out-of-bounds access would be UB. 140804eeddc0SDimitry Andric if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) { 1409fe6060f1SDimitry Andric if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) { 1410*0fca6ea1SDimitry Andric const DataLayout &DL = Alloca->getDataLayout(); 1411bdd1243dSDimitry Andric if (std::optional<TypeSize> AllocaSize = 1412bdd1243dSDimitry Andric Alloca->getAllocationSize(DL)) 1413bdd1243dSDimitry Andric if (*AllocaSize == LTSize->getValue()) 1414fe6060f1SDimitry Andric return true; 1415fe6060f1SDimitry Andric } 1416fe6060f1SDimitry Andric } 1417e8d8bef9SDimitry Andric } 141804eeddc0SDimitry Andric } 1419e8d8bef9SDimitry Andric 1420e8d8bef9SDimitry Andric return false; 1421e8d8bef9SDimitry Andric } 1422e8d8bef9SDimitry Andric 14230b57cec5SDimitry Andric /// Transform memcpy to memset when its source was just memset. 14240b57cec5SDimitry Andric /// In other words, turn: 14250b57cec5SDimitry Andric /// \code 14260b57cec5SDimitry Andric /// memset(dst1, c, dst1_size); 14270b57cec5SDimitry Andric /// memcpy(dst2, dst1, dst2_size); 14280b57cec5SDimitry Andric /// \endcode 14290b57cec5SDimitry Andric /// into: 14300b57cec5SDimitry Andric /// \code 14310b57cec5SDimitry Andric /// memset(dst1, c, dst1_size); 14320b57cec5SDimitry Andric /// memset(dst2, c, dst2_size); 14330b57cec5SDimitry Andric /// \endcode 14340b57cec5SDimitry Andric /// When dst2_size <= dst1_size. 14350b57cec5SDimitry Andric bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, 1436bdd1243dSDimitry Andric MemSetInst *MemSet, 1437bdd1243dSDimitry Andric BatchAAResults &BAA) { 14380b57cec5SDimitry Andric // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and 14390b57cec5SDimitry Andric // memcpying from the same address. Otherwise it is hard to reason about. 1440bdd1243dSDimitry Andric if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) 14410b57cec5SDimitry Andric return false; 14420b57cec5SDimitry Andric 1443fe6060f1SDimitry Andric Value *MemSetSize = MemSet->getLength(); 1444fe6060f1SDimitry Andric Value *CopySize = MemCpy->getLength(); 14450b57cec5SDimitry Andric 1446fe6060f1SDimitry Andric if (MemSetSize != CopySize) { 14470b57cec5SDimitry Andric // Make sure the memcpy doesn't read any more than what the memset wrote. 14480b57cec5SDimitry Andric // Don't worry about sizes larger than i64. 1449fe6060f1SDimitry Andric 1450fe6060f1SDimitry Andric // A known memset size is required. 145104eeddc0SDimitry Andric auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize); 1452fe6060f1SDimitry Andric if (!CMemSetSize) 1453fe6060f1SDimitry Andric return false; 1454fe6060f1SDimitry Andric 1455fe6060f1SDimitry Andric // A known memcpy size is also required. 145604eeddc0SDimitry Andric auto *CCopySize = dyn_cast<ConstantInt>(CopySize); 1457fe6060f1SDimitry Andric if (!CCopySize) 1458fe6060f1SDimitry Andric return false; 1459fe6060f1SDimitry Andric if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) { 14600b57cec5SDimitry Andric // If the memcpy is larger than the memset, but the memory was undef prior 14610b57cec5SDimitry Andric // to the memset, we can just ignore the tail. Technically we're only 14620b57cec5SDimitry Andric // interested in the bytes from MemSetSize..CopySize here, but as we can't 14630b57cec5SDimitry Andric // easily represent this location, we use the full 0..CopySize range. 14640b57cec5SDimitry Andric MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); 1465e8d8bef9SDimitry Andric bool CanReduceSize = false; 1466e8d8bef9SDimitry Andric MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); 1467e8d8bef9SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 1468bdd1243dSDimitry Andric MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA); 1469e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(Clobber)) 1470bdd1243dSDimitry Andric if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize)) 1471e8d8bef9SDimitry Andric CanReduceSize = true; 1472e8d8bef9SDimitry Andric 1473e8d8bef9SDimitry Andric if (!CanReduceSize) 14740b57cec5SDimitry Andric return false; 1475e8d8bef9SDimitry Andric CopySize = MemSetSize; 14760b57cec5SDimitry Andric } 1477fe6060f1SDimitry Andric } 14780b57cec5SDimitry Andric 14790b57cec5SDimitry Andric IRBuilder<> Builder(MemCpy); 1480e8d8bef9SDimitry Andric Instruction *NewM = 1481e8d8bef9SDimitry Andric Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), 1482bdd1243dSDimitry Andric CopySize, MemCpy->getDestAlign()); 1483e8d8bef9SDimitry Andric auto *LastDef = 1484e8d8bef9SDimitry Andric cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); 14855f757f3fSDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); 1486e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1487e8d8bef9SDimitry Andric 14880b57cec5SDimitry Andric return true; 14890b57cec5SDimitry Andric } 14900b57cec5SDimitry Andric 14915f757f3fSDimitry Andric // Attempts to optimize the pattern whereby memory is copied from an alloca to 14925f757f3fSDimitry Andric // another alloca, where the two allocas don't have conflicting mod/ref. If 14935f757f3fSDimitry Andric // successful, the two allocas can be merged into one and the transfer can be 14945f757f3fSDimitry Andric // deleted. This pattern is generated frequently in Rust, due to the ubiquity of 14955f757f3fSDimitry Andric // move operations in that language. 14965f757f3fSDimitry Andric // 14975f757f3fSDimitry Andric // Once we determine that the optimization is safe to perform, we replace all 14985f757f3fSDimitry Andric // uses of the destination alloca with the source alloca. We also "shrink wrap" 14995f757f3fSDimitry Andric // the lifetime markers of the single merged alloca to before the first use 15005f757f3fSDimitry Andric // and after the last use. Note that the "shrink wrapping" procedure is a safe 15015f757f3fSDimitry Andric // transformation only because we restrict the scope of this optimization to 15025f757f3fSDimitry Andric // allocas that aren't captured. 15035f757f3fSDimitry Andric bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, 15045f757f3fSDimitry Andric AllocaInst *DestAlloca, 15055f757f3fSDimitry Andric AllocaInst *SrcAlloca, TypeSize Size, 15065f757f3fSDimitry Andric BatchAAResults &BAA) { 15075f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" 15085f757f3fSDimitry Andric << *Store << "\n"); 15095f757f3fSDimitry Andric 15105f757f3fSDimitry Andric // Make sure the two allocas are in the same address space. 15115f757f3fSDimitry Andric if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { 15125f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); 15135f757f3fSDimitry Andric return false; 15145f757f3fSDimitry Andric } 15155f757f3fSDimitry Andric 15165f757f3fSDimitry Andric // Check that copy is full with static size. 1517*0fca6ea1SDimitry Andric const DataLayout &DL = DestAlloca->getDataLayout(); 15185f757f3fSDimitry Andric std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL); 15195f757f3fSDimitry Andric if (!SrcSize || Size != *SrcSize) { 15205f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); 15215f757f3fSDimitry Andric return false; 15225f757f3fSDimitry Andric } 15235f757f3fSDimitry Andric std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL); 15245f757f3fSDimitry Andric if (!DestSize || Size != *DestSize) { 15255f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); 15265f757f3fSDimitry Andric return false; 15275f757f3fSDimitry Andric } 15285f757f3fSDimitry Andric 15295f757f3fSDimitry Andric if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) 15305f757f3fSDimitry Andric return false; 15315f757f3fSDimitry Andric 15325f757f3fSDimitry Andric // Check that src and dest are never captured, unescaped allocas. Also 15335f757f3fSDimitry Andric // find the nearest common dominator and postdominator for all users in 15345f757f3fSDimitry Andric // order to shrink wrap the lifetimes, and instructions with noalias metadata 15355f757f3fSDimitry Andric // to remove them. 15365f757f3fSDimitry Andric 15375f757f3fSDimitry Andric SmallVector<Instruction *, 4> LifetimeMarkers; 15385f757f3fSDimitry Andric SmallSet<Instruction *, 4> NoAliasInstrs; 15395f757f3fSDimitry Andric bool SrcNotDom = false; 15405f757f3fSDimitry Andric 15415f757f3fSDimitry Andric // Recursively track the user and check whether modified alias exist. 15425f757f3fSDimitry Andric auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool { 15435f757f3fSDimitry Andric bool CanBeNull, CanBeFreed; 15445f757f3fSDimitry Andric return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); 15455f757f3fSDimitry Andric }; 15465f757f3fSDimitry Andric 15475f757f3fSDimitry Andric auto CaptureTrackingWithModRef = 15485f757f3fSDimitry Andric [&](Instruction *AI, 15495f757f3fSDimitry Andric function_ref<bool(Instruction *)> ModRefCallback) -> bool { 15505f757f3fSDimitry Andric SmallVector<Instruction *, 8> Worklist; 15515f757f3fSDimitry Andric Worklist.push_back(AI); 15525f757f3fSDimitry Andric unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); 15535f757f3fSDimitry Andric Worklist.reserve(MaxUsesToExplore); 15545f757f3fSDimitry Andric SmallSet<const Use *, 20> Visited; 15555f757f3fSDimitry Andric while (!Worklist.empty()) { 15565f757f3fSDimitry Andric Instruction *I = Worklist.back(); 15575f757f3fSDimitry Andric Worklist.pop_back(); 15585f757f3fSDimitry Andric for (const Use &U : I->uses()) { 15595f757f3fSDimitry Andric auto *UI = cast<Instruction>(U.getUser()); 15605f757f3fSDimitry Andric // If any use that isn't dominated by SrcAlloca exists, we move src 15615f757f3fSDimitry Andric // alloca to the entry before the transformation. 15625f757f3fSDimitry Andric if (!DT->dominates(SrcAlloca, UI)) 15635f757f3fSDimitry Andric SrcNotDom = true; 15645f757f3fSDimitry Andric 15655f757f3fSDimitry Andric if (Visited.size() >= MaxUsesToExplore) { 15665f757f3fSDimitry Andric LLVM_DEBUG( 15675f757f3fSDimitry Andric dbgs() 15685f757f3fSDimitry Andric << "Stack Move: Exceeded max uses to see ModRef, bailing\n"); 15695f757f3fSDimitry Andric return false; 15705f757f3fSDimitry Andric } 15715f757f3fSDimitry Andric if (!Visited.insert(&U).second) 15725f757f3fSDimitry Andric continue; 15735f757f3fSDimitry Andric switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { 15745f757f3fSDimitry Andric case UseCaptureKind::MAY_CAPTURE: 15755f757f3fSDimitry Andric return false; 15765f757f3fSDimitry Andric case UseCaptureKind::PASSTHROUGH: 15775f757f3fSDimitry Andric // Instructions cannot have non-instruction users. 15785f757f3fSDimitry Andric Worklist.push_back(UI); 15795f757f3fSDimitry Andric continue; 15805f757f3fSDimitry Andric case UseCaptureKind::NO_CAPTURE: { 15815f757f3fSDimitry Andric if (UI->isLifetimeStartOrEnd()) { 15825f757f3fSDimitry Andric // We note the locations of these intrinsic calls so that we can 15835f757f3fSDimitry Andric // delete them later if the optimization succeeds, this is safe 15845f757f3fSDimitry Andric // since both llvm.lifetime.start and llvm.lifetime.end intrinsics 15855f757f3fSDimitry Andric // practically fill all the bytes of the alloca with an undefined 15865f757f3fSDimitry Andric // value, although conceptually marked as alive/dead. 15875f757f3fSDimitry Andric int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue(); 15885f757f3fSDimitry Andric if (Size < 0 || Size == DestSize) { 15895f757f3fSDimitry Andric LifetimeMarkers.push_back(UI); 15905f757f3fSDimitry Andric continue; 15915f757f3fSDimitry Andric } 15925f757f3fSDimitry Andric } 15935f757f3fSDimitry Andric if (UI->hasMetadata(LLVMContext::MD_noalias)) 15945f757f3fSDimitry Andric NoAliasInstrs.insert(UI); 15955f757f3fSDimitry Andric if (!ModRefCallback(UI)) 15965f757f3fSDimitry Andric return false; 15975f757f3fSDimitry Andric } 15985f757f3fSDimitry Andric } 15995f757f3fSDimitry Andric } 16005f757f3fSDimitry Andric } 16015f757f3fSDimitry Andric return true; 16025f757f3fSDimitry Andric }; 16035f757f3fSDimitry Andric 16045f757f3fSDimitry Andric // Check that dest has no Mod/Ref, from the alloca to the Store, except full 16055f757f3fSDimitry Andric // size lifetime intrinsics. And collect modref inst for the reachability 16065f757f3fSDimitry Andric // check. 16075f757f3fSDimitry Andric ModRefInfo DestModRef = ModRefInfo::NoModRef; 16085f757f3fSDimitry Andric MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); 16095f757f3fSDimitry Andric SmallVector<BasicBlock *, 8> ReachabilityWorklist; 16105f757f3fSDimitry Andric auto DestModRefCallback = [&](Instruction *UI) -> bool { 16115f757f3fSDimitry Andric // We don't care about the store itself. 16125f757f3fSDimitry Andric if (UI == Store) 16135f757f3fSDimitry Andric return true; 16145f757f3fSDimitry Andric ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); 16155f757f3fSDimitry Andric DestModRef |= Res; 16165f757f3fSDimitry Andric if (isModOrRefSet(Res)) { 16175f757f3fSDimitry Andric // Instructions reachability checks. 16185f757f3fSDimitry Andric // FIXME: adding the Instruction version isPotentiallyReachableFromMany on 16195f757f3fSDimitry Andric // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful. 16205f757f3fSDimitry Andric if (UI->getParent() == Store->getParent()) { 16215f757f3fSDimitry Andric // The same block case is special because it's the only time we're 16225f757f3fSDimitry Andric // looking within a single block to see which instruction comes first. 16235f757f3fSDimitry Andric // Once we start looking at multiple blocks, the first instruction of 16245f757f3fSDimitry Andric // the block is reachable, so we only need to determine reachability 16255f757f3fSDimitry Andric // between whole blocks. 16265f757f3fSDimitry Andric BasicBlock *BB = UI->getParent(); 16275f757f3fSDimitry Andric 16285f757f3fSDimitry Andric // If A comes before B, then B is definitively reachable from A. 16295f757f3fSDimitry Andric if (UI->comesBefore(Store)) 16305f757f3fSDimitry Andric return false; 16315f757f3fSDimitry Andric 16325f757f3fSDimitry Andric // If the user's parent block is entry, no predecessor exists. 16335f757f3fSDimitry Andric if (BB->isEntryBlock()) 16345f757f3fSDimitry Andric return true; 16355f757f3fSDimitry Andric 16365f757f3fSDimitry Andric // Otherwise, continue doing the normal per-BB CFG walk. 16375f757f3fSDimitry Andric ReachabilityWorklist.append(succ_begin(BB), succ_end(BB)); 16385f757f3fSDimitry Andric } else { 16395f757f3fSDimitry Andric ReachabilityWorklist.push_back(UI->getParent()); 16405f757f3fSDimitry Andric } 16415f757f3fSDimitry Andric } 16425f757f3fSDimitry Andric return true; 16435f757f3fSDimitry Andric }; 16445f757f3fSDimitry Andric 16455f757f3fSDimitry Andric if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) 16465f757f3fSDimitry Andric return false; 16475f757f3fSDimitry Andric // Bailout if Dest may have any ModRef before Store. 16485f757f3fSDimitry Andric if (!ReachabilityWorklist.empty() && 16495f757f3fSDimitry Andric isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(), 16505f757f3fSDimitry Andric nullptr, DT, nullptr)) 16515f757f3fSDimitry Andric return false; 16525f757f3fSDimitry Andric 16535f757f3fSDimitry Andric // Check that, from after the Load to the end of the BB, 16545f757f3fSDimitry Andric // - if the dest has any Mod, src has no Ref, and 16555f757f3fSDimitry Andric // - if the dest has any Ref, src has no Mod except full-sized lifetimes. 16565f757f3fSDimitry Andric MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); 16575f757f3fSDimitry Andric 16585f757f3fSDimitry Andric auto SrcModRefCallback = [&](Instruction *UI) -> bool { 16595f757f3fSDimitry Andric // Any ModRef post-dominated by Load doesn't matter, also Load and Store 16605f757f3fSDimitry Andric // themselves can be ignored. 16615f757f3fSDimitry Andric if (PDT->dominates(Load, UI) || UI == Load || UI == Store) 16625f757f3fSDimitry Andric return true; 16635f757f3fSDimitry Andric ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); 16645f757f3fSDimitry Andric if ((isModSet(DestModRef) && isRefSet(Res)) || 16655f757f3fSDimitry Andric (isRefSet(DestModRef) && isModSet(Res))) 16665f757f3fSDimitry Andric return false; 16675f757f3fSDimitry Andric 16685f757f3fSDimitry Andric return true; 16695f757f3fSDimitry Andric }; 16705f757f3fSDimitry Andric 16715f757f3fSDimitry Andric if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) 16725f757f3fSDimitry Andric return false; 16735f757f3fSDimitry Andric 16745f757f3fSDimitry Andric // We can do the transformation. First, move the SrcAlloca to the start of the 16755f757f3fSDimitry Andric // BB. 16765f757f3fSDimitry Andric if (SrcNotDom) 16775f757f3fSDimitry Andric SrcAlloca->moveBefore(*SrcAlloca->getParent(), 16785f757f3fSDimitry Andric SrcAlloca->getParent()->getFirstInsertionPt()); 16795f757f3fSDimitry Andric // Align the allocas appropriately. 16805f757f3fSDimitry Andric SrcAlloca->setAlignment( 16815f757f3fSDimitry Andric std::max(SrcAlloca->getAlign(), DestAlloca->getAlign())); 16825f757f3fSDimitry Andric 16835f757f3fSDimitry Andric // Merge the two allocas. 16845f757f3fSDimitry Andric DestAlloca->replaceAllUsesWith(SrcAlloca); 16855f757f3fSDimitry Andric eraseInstruction(DestAlloca); 16865f757f3fSDimitry Andric 16875f757f3fSDimitry Andric // Drop metadata on the source alloca. 16885f757f3fSDimitry Andric SrcAlloca->dropUnknownNonDebugMetadata(); 16895f757f3fSDimitry Andric 16905f757f3fSDimitry Andric // TODO: Reconstruct merged lifetime markers. 16915f757f3fSDimitry Andric // Remove all other lifetime markers. if the original lifetime intrinsics 16925f757f3fSDimitry Andric // exists. 16935f757f3fSDimitry Andric if (!LifetimeMarkers.empty()) { 16945f757f3fSDimitry Andric for (Instruction *I : LifetimeMarkers) 16955f757f3fSDimitry Andric eraseInstruction(I); 16965f757f3fSDimitry Andric } 16975f757f3fSDimitry Andric 16985f757f3fSDimitry Andric // As this transformation can cause memory accesses that didn't previously 16995f757f3fSDimitry Andric // alias to begin to alias one another, we remove !noalias metadata from any 17005f757f3fSDimitry Andric // uses of either alloca. This is conservative, but more precision doesn't 17015f757f3fSDimitry Andric // seem worthwhile right now. 17025f757f3fSDimitry Andric for (Instruction *I : NoAliasInstrs) 17035f757f3fSDimitry Andric I->setMetadata(LLVMContext::MD_noalias, nullptr); 17045f757f3fSDimitry Andric 17055f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); 17065f757f3fSDimitry Andric NumStackMove++; 17075f757f3fSDimitry Andric return true; 17085f757f3fSDimitry Andric } 17095f757f3fSDimitry Andric 17105f757f3fSDimitry Andric static bool isZeroSize(Value *Size) { 17115f757f3fSDimitry Andric if (auto *I = dyn_cast<Instruction>(Size)) 1712*0fca6ea1SDimitry Andric if (auto *Res = simplifyInstruction(I, I->getDataLayout())) 17135f757f3fSDimitry Andric Size = Res; 17145f757f3fSDimitry Andric // Treat undef/poison size like zero. 17155f757f3fSDimitry Andric if (auto *C = dyn_cast<Constant>(Size)) 17165f757f3fSDimitry Andric return isa<UndefValue>(C) || C->isNullValue(); 17175f757f3fSDimitry Andric return false; 17185f757f3fSDimitry Andric } 17195f757f3fSDimitry Andric 17200b57cec5SDimitry Andric /// Perform simplification of memcpy's. If we have memcpy A 17210b57cec5SDimitry Andric /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite 17220b57cec5SDimitry Andric /// B to be a memcpy from X to Z (or potentially a memmove, depending on 17230b57cec5SDimitry Andric /// circumstances). This allows later passes to remove the first memcpy 17240b57cec5SDimitry Andric /// altogether. 17255ffd83dbSDimitry Andric bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { 17260b57cec5SDimitry Andric // We can only optimize non-volatile memcpy's. 1727*0fca6ea1SDimitry Andric if (M->isVolatile()) 1728*0fca6ea1SDimitry Andric return false; 17290b57cec5SDimitry Andric 17300b57cec5SDimitry Andric // If the source and destination of the memcpy are the same, then zap it. 17310b57cec5SDimitry Andric if (M->getSource() == M->getDest()) { 17325ffd83dbSDimitry Andric ++BBI; 1733e8d8bef9SDimitry Andric eraseInstruction(M); 17345ffd83dbSDimitry Andric return true; 17350b57cec5SDimitry Andric } 17360b57cec5SDimitry Andric 1737*0fca6ea1SDimitry Andric // If the size is zero, remove the memcpy. 17385f757f3fSDimitry Andric if (isZeroSize(M->getLength())) { 17395f757f3fSDimitry Andric ++BBI; 17405f757f3fSDimitry Andric eraseInstruction(M); 17415f757f3fSDimitry Andric return true; 17425f757f3fSDimitry Andric } 17435f757f3fSDimitry Andric 17445f757f3fSDimitry Andric MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); 17455f757f3fSDimitry Andric if (!MA) 17465f757f3fSDimitry Andric // Degenerate case: memcpy marked as not accessing memory. 17475f757f3fSDimitry Andric return false; 17485f757f3fSDimitry Andric 17490b57cec5SDimitry Andric // If copying from a constant, try to turn the memcpy into a memset. 175004eeddc0SDimitry Andric if (auto *GV = dyn_cast<GlobalVariable>(M->getSource())) 17510b57cec5SDimitry Andric if (GV->isConstant() && GV->hasDefinitiveInitializer()) 17520b57cec5SDimitry Andric if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), 1753*0fca6ea1SDimitry Andric M->getDataLayout())) { 17540b57cec5SDimitry Andric IRBuilder<> Builder(M); 1755bdd1243dSDimitry Andric Instruction *NewM = Builder.CreateMemSet( 1756bdd1243dSDimitry Andric M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false); 17575f757f3fSDimitry Andric auto *LastDef = cast<MemoryDef>(MA); 1758e8d8bef9SDimitry Andric auto *NewAccess = 17595f757f3fSDimitry Andric MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); 1760e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1761e8d8bef9SDimitry Andric 1762e8d8bef9SDimitry Andric eraseInstruction(M); 17630b57cec5SDimitry Andric ++NumCpyToSet; 17640b57cec5SDimitry Andric return true; 17650b57cec5SDimitry Andric } 17660b57cec5SDimitry Andric 1767bdd1243dSDimitry Andric BatchAAResults BAA(*AA); 176881ad6265SDimitry Andric // FIXME: Not using getClobberingMemoryAccess() here due to PR54682. 176981ad6265SDimitry Andric MemoryAccess *AnyClobber = MA->getDefiningAccess(); 1770e8d8bef9SDimitry Andric MemoryLocation DestLoc = MemoryLocation::getForDest(M); 1771e8d8bef9SDimitry Andric const MemoryAccess *DestClobber = 1772bdd1243dSDimitry Andric MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA); 1773e8d8bef9SDimitry Andric 1774e8d8bef9SDimitry Andric // Try to turn a partially redundant memset + memcpy into 177506c3fb27SDimitry Andric // smaller memset + memcpy. We don't need the memcpy size for this. 177606c3fb27SDimitry Andric // The memcpy must post-dom the memset, so limit this to the same basic 1777e8d8bef9SDimitry Andric // block. A non-local generalization is likely not worthwhile. 1778e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(DestClobber)) 1779e8d8bef9SDimitry Andric if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst())) 1780e8d8bef9SDimitry Andric if (DestClobber->getBlock() == M->getParent()) 1781bdd1243dSDimitry Andric if (processMemSetMemCpyDependence(M, MDep, BAA)) 1782e8d8bef9SDimitry Andric return true; 1783e8d8bef9SDimitry Andric 1784e8d8bef9SDimitry Andric MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( 1785bdd1243dSDimitry Andric AnyClobber, MemoryLocation::getForSource(M), BAA); 1786e8d8bef9SDimitry Andric 17875f757f3fSDimitry Andric // There are five possible optimizations we can do for memcpy: 1788e8d8bef9SDimitry Andric // a) memcpy-memcpy xform which exposes redundance for DSE. 1789e8d8bef9SDimitry Andric // b) call-memcpy xform for return slot optimization. 1790e8d8bef9SDimitry Andric // c) memcpy from freshly alloca'd space or space that has just started 1791e8d8bef9SDimitry Andric // its lifetime copies undefined data, and we can therefore eliminate 1792e8d8bef9SDimitry Andric // the memcpy in favor of the data that was already at the destination. 1793e8d8bef9SDimitry Andric // d) memcpy from a just-memset'd source can be turned into memset. 17945f757f3fSDimitry Andric // e) elimination of memcpy via stack-move optimization. 1795e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { 1796e8d8bef9SDimitry Andric if (Instruction *MI = MD->getMemoryInst()) { 179704eeddc0SDimitry Andric if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) { 1798e8d8bef9SDimitry Andric if (auto *C = dyn_cast<CallInst>(MI)) { 1799bdd1243dSDimitry Andric if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), 1800bdd1243dSDimitry Andric TypeSize::getFixed(CopySize->getZExtValue()), 1801bdd1243dSDimitry Andric M->getDestAlign().valueOrOne(), BAA, 180281ad6265SDimitry Andric [C]() -> CallInst * { return C; })) { 1803e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" 1804e8d8bef9SDimitry Andric << " call: " << *C << "\n" 1805e8d8bef9SDimitry Andric << " memcpy: " << *M << "\n"); 1806e8d8bef9SDimitry Andric eraseInstruction(M); 1807e8d8bef9SDimitry Andric ++NumMemCpyInstr; 1808e8d8bef9SDimitry Andric return true; 1809e8d8bef9SDimitry Andric } 1810e8d8bef9SDimitry Andric } 1811e8d8bef9SDimitry Andric } 1812e8d8bef9SDimitry Andric if (auto *MDep = dyn_cast<MemCpyInst>(MI)) 18135f757f3fSDimitry Andric if (processMemCpyMemCpyDependence(M, MDep, BAA)) 18145f757f3fSDimitry Andric return true; 1815e8d8bef9SDimitry Andric if (auto *MDep = dyn_cast<MemSetInst>(MI)) { 1816bdd1243dSDimitry Andric if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { 1817e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); 1818e8d8bef9SDimitry Andric eraseInstruction(M); 1819e8d8bef9SDimitry Andric ++NumCpyToSet; 1820e8d8bef9SDimitry Andric return true; 1821e8d8bef9SDimitry Andric } 1822e8d8bef9SDimitry Andric } 1823e8d8bef9SDimitry Andric } 1824e8d8bef9SDimitry Andric 1825bdd1243dSDimitry Andric if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) { 1826e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); 1827e8d8bef9SDimitry Andric eraseInstruction(M); 1828e8d8bef9SDimitry Andric ++NumMemCpyInstr; 1829e8d8bef9SDimitry Andric return true; 1830e8d8bef9SDimitry Andric } 1831e8d8bef9SDimitry Andric } 18320b57cec5SDimitry Andric 18335f757f3fSDimitry Andric // If the transfer is from a stack slot to a stack slot, then we may be able 18345f757f3fSDimitry Andric // to perform the stack-move optimization. See the comments in 18355f757f3fSDimitry Andric // performStackMoveOptzn() for more details. 18365f757f3fSDimitry Andric auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest()); 18375f757f3fSDimitry Andric if (!DestAlloca) 18385f757f3fSDimitry Andric return false; 18395f757f3fSDimitry Andric auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource()); 18405f757f3fSDimitry Andric if (!SrcAlloca) 18415f757f3fSDimitry Andric return false; 18425f757f3fSDimitry Andric ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()); 18435f757f3fSDimitry Andric if (Len == nullptr) 18445f757f3fSDimitry Andric return false; 18455f757f3fSDimitry Andric if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, 18465f757f3fSDimitry Andric TypeSize::getFixed(Len->getZExtValue()), BAA)) { 18475f757f3fSDimitry Andric // Avoid invalidating the iterator. 18485f757f3fSDimitry Andric BBI = M->getNextNonDebugInstruction()->getIterator(); 18495f757f3fSDimitry Andric eraseInstruction(M); 18505f757f3fSDimitry Andric ++NumMemCpyInstr; 18515f757f3fSDimitry Andric return true; 18525f757f3fSDimitry Andric } 18535f757f3fSDimitry Andric 18540b57cec5SDimitry Andric return false; 18550b57cec5SDimitry Andric } 18560b57cec5SDimitry Andric 18570b57cec5SDimitry Andric /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed 18580b57cec5SDimitry Andric /// not to alias. 18590b57cec5SDimitry Andric bool MemCpyOptPass::processMemMove(MemMoveInst *M) { 1860349cc55cSDimitry Andric // See if the source could be modified by this memmove potentially. 1861349cc55cSDimitry Andric if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) 18620b57cec5SDimitry Andric return false; 18630b57cec5SDimitry Andric 18640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M 18650b57cec5SDimitry Andric << "\n"); 18660b57cec5SDimitry Andric 18670b57cec5SDimitry Andric // If not, then we know we can transform this. 1868*0fca6ea1SDimitry Andric Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(), 18690b57cec5SDimitry Andric M->getLength()->getType()}; 1870*0fca6ea1SDimitry Andric M->setCalledFunction( 1871*0fca6ea1SDimitry Andric Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys)); 18720b57cec5SDimitry Andric 1873e8d8bef9SDimitry Andric // For MemorySSA nothing really changes (except that memcpy may imply stricter 1874e8d8bef9SDimitry Andric // aliasing guarantees). 1875e8d8bef9SDimitry Andric 18760b57cec5SDimitry Andric ++NumMoveToCpy; 18770b57cec5SDimitry Andric return true; 18780b57cec5SDimitry Andric } 18790b57cec5SDimitry Andric 18800b57cec5SDimitry Andric /// This is called on every byval argument in call sites. 18815ffd83dbSDimitry Andric bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { 1882*0fca6ea1SDimitry Andric const DataLayout &DL = CB.getDataLayout(); 18830b57cec5SDimitry Andric // Find out what feeds this byval argument. 18845ffd83dbSDimitry Andric Value *ByValArg = CB.getArgOperand(ArgNo); 1885fe6060f1SDimitry Andric Type *ByValTy = CB.getParamByValType(ArgNo); 18868c6f6c0cSDimitry Andric TypeSize ByValSize = DL.getTypeAllocSize(ByValTy); 1887e8d8bef9SDimitry Andric MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); 1888e8d8bef9SDimitry Andric MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); 1889fe6060f1SDimitry Andric if (!CallAccess) 1890fe6060f1SDimitry Andric return false; 1891349cc55cSDimitry Andric MemCpyInst *MDep = nullptr; 1892bdd1243dSDimitry Andric BatchAAResults BAA(*AA); 1893e8d8bef9SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 1894bdd1243dSDimitry Andric CallAccess->getDefiningAccess(), Loc, BAA); 1895e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(Clobber)) 1896e8d8bef9SDimitry Andric MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); 18970b57cec5SDimitry Andric 18980b57cec5SDimitry Andric // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by 18990b57cec5SDimitry Andric // a memcpy, see if we can byval from the source of the memcpy instead of the 19000b57cec5SDimitry Andric // result. 19010b57cec5SDimitry Andric if (!MDep || MDep->isVolatile() || 19020b57cec5SDimitry Andric ByValArg->stripPointerCasts() != MDep->getDest()) 19030b57cec5SDimitry Andric return false; 19040b57cec5SDimitry Andric 19050b57cec5SDimitry Andric // The length of the memcpy must be larger or equal to the size of the byval. 190604eeddc0SDimitry Andric auto *C1 = dyn_cast<ConstantInt>(MDep->getLength()); 19078c6f6c0cSDimitry Andric if (!C1 || !TypeSize::isKnownGE( 19088c6f6c0cSDimitry Andric TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize)) 19090b57cec5SDimitry Andric return false; 19100b57cec5SDimitry Andric 19110b57cec5SDimitry Andric // Get the alignment of the byval. If the call doesn't specify the alignment, 19120b57cec5SDimitry Andric // then it is some target specific value that we can't know. 19135ffd83dbSDimitry Andric MaybeAlign ByValAlign = CB.getParamAlign(ArgNo); 1914*0fca6ea1SDimitry Andric if (!ByValAlign) 1915*0fca6ea1SDimitry Andric return false; 19160b57cec5SDimitry Andric 19170b57cec5SDimitry Andric // If it is greater than the memcpy, then we check to see if we can force the 19180b57cec5SDimitry Andric // source of the memcpy to the alignment we need. If we fail, we bail out. 19195ffd83dbSDimitry Andric MaybeAlign MemDepAlign = MDep->getSourceAlign(); 19205ffd83dbSDimitry Andric if ((!MemDepAlign || *MemDepAlign < *ByValAlign) && 1921e8d8bef9SDimitry Andric getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, 1922e8d8bef9SDimitry Andric DT) < *ByValAlign) 19230b57cec5SDimitry Andric return false; 19240b57cec5SDimitry Andric 19255f757f3fSDimitry Andric // The type of the memcpy source must match the byval argument 19265f757f3fSDimitry Andric if (MDep->getSource()->getType() != ByValArg->getType()) 19270b57cec5SDimitry Andric return false; 19280b57cec5SDimitry Andric 19290b57cec5SDimitry Andric // Verify that the copied-from memory doesn't change in between the memcpy and 19300b57cec5SDimitry Andric // the byval call. 19310b57cec5SDimitry Andric // memcpy(a <- b) 19320b57cec5SDimitry Andric // *b = 42; 19330b57cec5SDimitry Andric // foo(*a) 19340b57cec5SDimitry Andric // It would be invalid to transform the second memcpy into foo(*b). 1935bdd1243dSDimitry Andric if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), 193606c3fb27SDimitry Andric MSSA->getMemoryAccess(MDep), CallAccess)) 1937e8d8bef9SDimitry Andric return false; 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" 19400b57cec5SDimitry Andric << " " << *MDep << "\n" 19415ffd83dbSDimitry Andric << " " << CB << "\n"); 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric // Otherwise we're good! Update the byval argument. 1944b121cb00SDimitry Andric combineAAMetadata(&CB, MDep); 194506c3fb27SDimitry Andric CB.setArgOperand(ArgNo, MDep->getSource()); 194606c3fb27SDimitry Andric ++NumMemCpyInstr; 194706c3fb27SDimitry Andric return true; 194806c3fb27SDimitry Andric } 194906c3fb27SDimitry Andric 195006c3fb27SDimitry Andric /// This is called on memcpy dest pointer arguments attributed as immutable 195106c3fb27SDimitry Andric /// during call. Try to use memcpy source directly if all of the following 195206c3fb27SDimitry Andric /// conditions are satisfied. 195306c3fb27SDimitry Andric /// 1. The memcpy dst is neither modified during the call nor captured by the 195406c3fb27SDimitry Andric /// call. (if readonly, noalias, nocapture attributes on call-site.) 195506c3fb27SDimitry Andric /// 2. The memcpy dst is an alloca with known alignment & size. 195606c3fb27SDimitry Andric /// 2-1. The memcpy length == the alloca size which ensures that the new 195706c3fb27SDimitry Andric /// pointer is dereferenceable for the required range 195806c3fb27SDimitry Andric /// 2-2. The src pointer has alignment >= the alloca alignment or can be 195906c3fb27SDimitry Andric /// enforced so. 196006c3fb27SDimitry Andric /// 3. The memcpy dst and src is not modified between the memcpy and the call. 196106c3fb27SDimitry Andric /// (if MSSA clobber check is safe.) 196206c3fb27SDimitry Andric /// 4. The memcpy src is not modified during the call. (ModRef check shows no 196306c3fb27SDimitry Andric /// Mod.) 196406c3fb27SDimitry Andric bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { 196506c3fb27SDimitry Andric // 1. Ensure passed argument is immutable during call. 196606c3fb27SDimitry Andric if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) && 196706c3fb27SDimitry Andric CB.paramHasAttr(ArgNo, Attribute::NoCapture))) 196806c3fb27SDimitry Andric return false; 1969*0fca6ea1SDimitry Andric const DataLayout &DL = CB.getDataLayout(); 197006c3fb27SDimitry Andric Value *ImmutArg = CB.getArgOperand(ArgNo); 197106c3fb27SDimitry Andric 197206c3fb27SDimitry Andric // 2. Check that arg is alloca 197306c3fb27SDimitry Andric // TODO: Even if the arg gets back to branches, we can remove memcpy if all 197406c3fb27SDimitry Andric // the alloca alignments can be enforced to source alignment. 197506c3fb27SDimitry Andric auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts()); 197606c3fb27SDimitry Andric if (!AI) 197706c3fb27SDimitry Andric return false; 197806c3fb27SDimitry Andric 197906c3fb27SDimitry Andric std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL); 198006c3fb27SDimitry Andric // Can't handle unknown size alloca. 198106c3fb27SDimitry Andric // (e.g. Variable Length Array, Scalable Vector) 198206c3fb27SDimitry Andric if (!AllocaSize || AllocaSize->isScalable()) 198306c3fb27SDimitry Andric return false; 198406c3fb27SDimitry Andric MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize)); 198506c3fb27SDimitry Andric MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); 198606c3fb27SDimitry Andric if (!CallAccess) 198706c3fb27SDimitry Andric return false; 198806c3fb27SDimitry Andric 198906c3fb27SDimitry Andric MemCpyInst *MDep = nullptr; 199006c3fb27SDimitry Andric BatchAAResults BAA(*AA); 199106c3fb27SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 199206c3fb27SDimitry Andric CallAccess->getDefiningAccess(), Loc, BAA); 199306c3fb27SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(Clobber)) 199406c3fb27SDimitry Andric MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); 199506c3fb27SDimitry Andric 199606c3fb27SDimitry Andric // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by 199706c3fb27SDimitry Andric // a memcpy, check that the arg equals the memcpy dest. 199806c3fb27SDimitry Andric if (!MDep || MDep->isVolatile() || AI != MDep->getDest()) 199906c3fb27SDimitry Andric return false; 200006c3fb27SDimitry Andric 20015f757f3fSDimitry Andric // The type of the memcpy source must match the immut argument 20025f757f3fSDimitry Andric if (MDep->getSource()->getType() != ImmutArg->getType()) 200306c3fb27SDimitry Andric return false; 200406c3fb27SDimitry Andric 200506c3fb27SDimitry Andric // 2-1. The length of the memcpy must be equal to the size of the alloca. 200606c3fb27SDimitry Andric auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 200706c3fb27SDimitry Andric if (!MDepLen || AllocaSize != MDepLen->getValue()) 200806c3fb27SDimitry Andric return false; 200906c3fb27SDimitry Andric 201006c3fb27SDimitry Andric // 2-2. the memcpy source align must be larger than or equal the alloca's 201106c3fb27SDimitry Andric // align. If not so, we check to see if we can force the source of the memcpy 201206c3fb27SDimitry Andric // to the alignment we need. If we fail, we bail out. 201306c3fb27SDimitry Andric Align MemDepAlign = MDep->getSourceAlign().valueOrOne(); 201406c3fb27SDimitry Andric Align AllocaAlign = AI->getAlign(); 201506c3fb27SDimitry Andric if (MemDepAlign < AllocaAlign && 201606c3fb27SDimitry Andric getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC, 201706c3fb27SDimitry Andric DT) < AllocaAlign) 201806c3fb27SDimitry Andric return false; 201906c3fb27SDimitry Andric 202006c3fb27SDimitry Andric // 3. Verify that the source doesn't change in between the memcpy and 202106c3fb27SDimitry Andric // the call. 202206c3fb27SDimitry Andric // memcpy(a <- b) 202306c3fb27SDimitry Andric // *b = 42; 202406c3fb27SDimitry Andric // foo(*a) 202506c3fb27SDimitry Andric // It would be invalid to transform the second memcpy into foo(*b). 202606c3fb27SDimitry Andric if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), 202706c3fb27SDimitry Andric MSSA->getMemoryAccess(MDep), CallAccess)) 202806c3fb27SDimitry Andric return false; 202906c3fb27SDimitry Andric 203006c3fb27SDimitry Andric // 4. The memcpy src must not be modified during the call. 203106c3fb27SDimitry Andric if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep)))) 203206c3fb27SDimitry Andric return false; 203306c3fb27SDimitry Andric 203406c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n" 203506c3fb27SDimitry Andric << " " << *MDep << "\n" 203606c3fb27SDimitry Andric << " " << CB << "\n"); 203706c3fb27SDimitry Andric 203806c3fb27SDimitry Andric // Otherwise we're good! Update the immut argument. 20394542f901SDimitry Andric combineAAMetadata(&CB, MDep); 204006c3fb27SDimitry Andric CB.setArgOperand(ArgNo, MDep->getSource()); 20410b57cec5SDimitry Andric ++NumMemCpyInstr; 20420b57cec5SDimitry Andric return true; 20430b57cec5SDimitry Andric } 20440b57cec5SDimitry Andric 20450b57cec5SDimitry Andric /// Executes one iteration of MemCpyOptPass. 20460b57cec5SDimitry Andric bool MemCpyOptPass::iterateOnFunction(Function &F) { 20470b57cec5SDimitry Andric bool MadeChange = false; 20480b57cec5SDimitry Andric 20490b57cec5SDimitry Andric // Walk all instruction in the function. 20500b57cec5SDimitry Andric for (BasicBlock &BB : F) { 20510b57cec5SDimitry Andric // Skip unreachable blocks. For example processStore assumes that an 20520b57cec5SDimitry Andric // instruction in a BB can't be dominated by a later instruction in the 20530b57cec5SDimitry Andric // same BB (which is a scenario that can happen for an unreachable BB that 20540b57cec5SDimitry Andric // has itself as a predecessor). 2055e8d8bef9SDimitry Andric if (!DT->isReachableFromEntry(&BB)) 20560b57cec5SDimitry Andric continue; 20570b57cec5SDimitry Andric 20580b57cec5SDimitry Andric for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { 20590b57cec5SDimitry Andric // Avoid invalidating the iterator. 20600b57cec5SDimitry Andric Instruction *I = &*BI++; 20610b57cec5SDimitry Andric 20620b57cec5SDimitry Andric bool RepeatInstruction = false; 20630b57cec5SDimitry Andric 206404eeddc0SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(I)) 20650b57cec5SDimitry Andric MadeChange |= processStore(SI, BI); 206604eeddc0SDimitry Andric else if (auto *M = dyn_cast<MemSetInst>(I)) 20670b57cec5SDimitry Andric RepeatInstruction = processMemSet(M, BI); 206804eeddc0SDimitry Andric else if (auto *M = dyn_cast<MemCpyInst>(I)) 20695ffd83dbSDimitry Andric RepeatInstruction = processMemCpy(M, BI); 207004eeddc0SDimitry Andric else if (auto *M = dyn_cast<MemMoveInst>(I)) 20710b57cec5SDimitry Andric RepeatInstruction = processMemMove(M); 20725ffd83dbSDimitry Andric else if (auto *CB = dyn_cast<CallBase>(I)) { 207306c3fb27SDimitry Andric for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { 20745ffd83dbSDimitry Andric if (CB->isByValArgument(i)) 20755ffd83dbSDimitry Andric MadeChange |= processByValArgument(*CB, i); 207606c3fb27SDimitry Andric else if (CB->onlyReadsMemory(i)) 207706c3fb27SDimitry Andric MadeChange |= processImmutArgument(*CB, i); 207806c3fb27SDimitry Andric } 20790b57cec5SDimitry Andric } 20800b57cec5SDimitry Andric 20810b57cec5SDimitry Andric // Reprocess the instruction if desired. 20820b57cec5SDimitry Andric if (RepeatInstruction) { 20830b57cec5SDimitry Andric if (BI != BB.begin()) 20840b57cec5SDimitry Andric --BI; 20850b57cec5SDimitry Andric MadeChange = true; 20860b57cec5SDimitry Andric } 20870b57cec5SDimitry Andric } 20880b57cec5SDimitry Andric } 20890b57cec5SDimitry Andric 20900b57cec5SDimitry Andric return MadeChange; 20910b57cec5SDimitry Andric } 20920b57cec5SDimitry Andric 20930b57cec5SDimitry Andric PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { 20940b57cec5SDimitry Andric auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 2095e8d8bef9SDimitry Andric auto *AA = &AM.getResult<AAManager>(F); 2096e8d8bef9SDimitry Andric auto *AC = &AM.getResult<AssumptionAnalysis>(F); 2097e8d8bef9SDimitry Andric auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 20985f757f3fSDimitry Andric auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F); 2099349cc55cSDimitry Andric auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F); 21000b57cec5SDimitry Andric 21015f757f3fSDimitry Andric bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA()); 21020b57cec5SDimitry Andric if (!MadeChange) 21030b57cec5SDimitry Andric return PreservedAnalyses::all(); 21040b57cec5SDimitry Andric 21050b57cec5SDimitry Andric PreservedAnalyses PA; 21060b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>(); 2107e8d8bef9SDimitry Andric PA.preserve<MemorySSAAnalysis>(); 21080b57cec5SDimitry Andric return PA; 21090b57cec5SDimitry Andric } 21100b57cec5SDimitry Andric 2111349cc55cSDimitry Andric bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, 2112349cc55cSDimitry Andric AliasAnalysis *AA_, AssumptionCache *AC_, 21135f757f3fSDimitry Andric DominatorTree *DT_, PostDominatorTree *PDT_, 21145f757f3fSDimitry Andric MemorySSA *MSSA_) { 21150b57cec5SDimitry Andric bool MadeChange = false; 21160b57cec5SDimitry Andric TLI = TLI_; 2117e8d8bef9SDimitry Andric AA = AA_; 2118e8d8bef9SDimitry Andric AC = AC_; 2119e8d8bef9SDimitry Andric DT = DT_; 21205f757f3fSDimitry Andric PDT = PDT_; 2121e8d8bef9SDimitry Andric MSSA = MSSA_; 2122e8d8bef9SDimitry Andric MemorySSAUpdater MSSAU_(MSSA_); 2123349cc55cSDimitry Andric MSSAU = &MSSAU_; 21240b57cec5SDimitry Andric 21250b57cec5SDimitry Andric while (true) { 21260b57cec5SDimitry Andric if (!iterateOnFunction(F)) 21270b57cec5SDimitry Andric break; 21280b57cec5SDimitry Andric MadeChange = true; 21290b57cec5SDimitry Andric } 21300b57cec5SDimitry Andric 2131349cc55cSDimitry Andric if (VerifyMemorySSA) 2132e8d8bef9SDimitry Andric MSSA_->verifyMemorySSA(); 2133e8d8bef9SDimitry Andric 21340b57cec5SDimitry Andric return MadeChange; 21350b57cec5SDimitry Andric } 2136