10b57cec5SDimitry Andric //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the visit functions for load, store and alloca. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "InstCombineInternal.h" 140b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h" 150b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 160b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 175ffd83dbSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h" 190b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 200b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 210b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 220b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 230b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 24e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h" 255ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/Local.h" 260b57cec5SDimitry Andric using namespace llvm; 270b57cec5SDimitry Andric using namespace PatternMatch; 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric #define DEBUG_TYPE "instcombine" 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 320b57cec5SDimitry Andric STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 330b57cec5SDimitry Andric 34bdd1243dSDimitry Andric static cl::opt<unsigned> MaxCopiedFromConstantUsers( 3506c3fb27SDimitry Andric "instcombine-max-copied-from-constant-users", cl::init(300), 36bdd1243dSDimitry Andric cl::desc("Maximum users to visit in copy from constant transform"), 37bdd1243dSDimitry Andric cl::Hidden); 38bdd1243dSDimitry Andric 395f757f3fSDimitry Andric namespace llvm { 405f757f3fSDimitry Andric cl::opt<bool> EnableInferAlignmentPass( 415f757f3fSDimitry Andric "enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore, 425f757f3fSDimitry Andric cl::desc("Enable the InferAlignment pass, disabling alignment inference in " 435f757f3fSDimitry Andric "InstCombine")); 445f757f3fSDimitry Andric } 455f757f3fSDimitry Andric 46bdd1243dSDimitry Andric /// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) 470b57cec5SDimitry Andric /// pointer to an alloca. Ignore any reads of the pointer, return false if we 480b57cec5SDimitry Andric /// see any stores or other unknown uses. If we see pointer arithmetic, keep 490b57cec5SDimitry Andric /// track of whether it moves the pointer (with IsOffset) but otherwise traverse 500b57cec5SDimitry Andric /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 51bdd1243dSDimitry Andric /// the alloca, and if the source pointer is a pointer to a constant memory 52bdd1243dSDimitry Andric /// location, we can optimize this. 530b57cec5SDimitry Andric static bool 54bdd1243dSDimitry Andric isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, 55bdd1243dSDimitry Andric MemTransferInst *&TheCopy, 560b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &ToDelete) { 570b57cec5SDimitry Andric // We track lifetime intrinsics as we encounter them. If we decide to go 58bdd1243dSDimitry Andric // ahead and replace the value with the memory location, this lets the caller 59bdd1243dSDimitry Andric // quickly eliminate the markers. 600b57cec5SDimitry Andric 61bdd1243dSDimitry Andric using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>; 62bdd1243dSDimitry Andric SmallVector<ValueAndIsOffset, 32> Worklist; 63bdd1243dSDimitry Andric SmallPtrSet<ValueAndIsOffset, 32> Visited; 64bdd1243dSDimitry Andric Worklist.emplace_back(V, false); 65bdd1243dSDimitry Andric while (!Worklist.empty()) { 66bdd1243dSDimitry Andric ValueAndIsOffset Elem = Worklist.pop_back_val(); 67bdd1243dSDimitry Andric if (!Visited.insert(Elem).second) 68bdd1243dSDimitry Andric continue; 69bdd1243dSDimitry Andric if (Visited.size() > MaxCopiedFromConstantUsers) 70bdd1243dSDimitry Andric return false; 71bdd1243dSDimitry Andric 72bdd1243dSDimitry Andric const auto [Value, IsOffset] = Elem; 73bdd1243dSDimitry Andric for (auto &U : Value->uses()) { 740b57cec5SDimitry Andric auto *I = cast<Instruction>(U.getUser()); 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(I)) { 770b57cec5SDimitry Andric // Ignore non-volatile loads, they are always ok. 780b57cec5SDimitry Andric if (!LI->isSimple()) return false; 790b57cec5SDimitry Andric continue; 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric 82bdd1243dSDimitry Andric if (isa<PHINode, SelectInst>(I)) { 83bdd1243dSDimitry Andric // We set IsOffset=true, to forbid the memcpy from occurring after the 84bdd1243dSDimitry Andric // phi: If one of the phi operands is not based on the alloca, we 85bdd1243dSDimitry Andric // would incorrectly omit a write. 86bdd1243dSDimitry Andric Worklist.emplace_back(I, true); 87bdd1243dSDimitry Andric continue; 88bdd1243dSDimitry Andric } 89bdd1243dSDimitry Andric if (isa<BitCastInst, AddrSpaceCastInst>(I)) { 900b57cec5SDimitry Andric // If uses of the bitcast are ok, we are ok. 91bdd1243dSDimitry Andric Worklist.emplace_back(I, IsOffset); 920b57cec5SDimitry Andric continue; 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { 950b57cec5SDimitry Andric // If the GEP has all zero indices, it doesn't offset the pointer. If it 960b57cec5SDimitry Andric // doesn't, it does. 97bdd1243dSDimitry Andric Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices()); 980b57cec5SDimitry Andric continue; 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric if (auto *Call = dyn_cast<CallBase>(I)) { 1020b57cec5SDimitry Andric // If this is the function being called then we treat it like a load and 1030b57cec5SDimitry Andric // ignore it. 1040b57cec5SDimitry Andric if (Call->isCallee(&U)) 1050b57cec5SDimitry Andric continue; 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric unsigned DataOpNo = Call->getDataOperandNo(&U); 1080b57cec5SDimitry Andric bool IsArgOperand = Call->isArgOperand(&U); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric // Inalloca arguments are clobbered by the call. 1110b57cec5SDimitry Andric if (IsArgOperand && Call->isInAllocaArgument(DataOpNo)) 1120b57cec5SDimitry Andric return false; 1130b57cec5SDimitry Andric 114bdd1243dSDimitry Andric // If this call site doesn't modify the memory, then we know it is just 115bdd1243dSDimitry Andric // a load (but one that potentially returns the value itself), so we can 1160b57cec5SDimitry Andric // ignore it if we know that the value isn't captured. 117bdd1243dSDimitry Andric bool NoCapture = Call->doesNotCapture(DataOpNo); 118bdd1243dSDimitry Andric if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) || 119bdd1243dSDimitry Andric (Call->onlyReadsMemory(DataOpNo) && NoCapture)) 1200b57cec5SDimitry Andric continue; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric // If this is being passed as a byval argument, the caller is making a 1230b57cec5SDimitry Andric // copy, so it is only a read of the alloca. 1240b57cec5SDimitry Andric if (IsArgOperand && Call->isByValArgument(DataOpNo)) 1250b57cec5SDimitry Andric continue; 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric // Lifetime intrinsics can be handled by the caller. 1290b57cec5SDimitry Andric if (I->isLifetimeStartOrEnd()) { 1300b57cec5SDimitry Andric assert(I->use_empty() && "Lifetime markers have no result to use!"); 1310b57cec5SDimitry Andric ToDelete.push_back(I); 1320b57cec5SDimitry Andric continue; 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric // If this is isn't our memcpy/memmove, reject it as something we can't 1360b57cec5SDimitry Andric // handle. 1370b57cec5SDimitry Andric MemTransferInst *MI = dyn_cast<MemTransferInst>(I); 1380b57cec5SDimitry Andric if (!MI) 1390b57cec5SDimitry Andric return false; 1400b57cec5SDimitry Andric 141bdd1243dSDimitry Andric // If the transfer is volatile, reject it. 142bdd1243dSDimitry Andric if (MI->isVolatile()) 143bdd1243dSDimitry Andric return false; 144bdd1243dSDimitry Andric 1450b57cec5SDimitry Andric // If the transfer is using the alloca as a source of the transfer, then 1460b57cec5SDimitry Andric // ignore it since it is a load (unless the transfer is volatile). 147bdd1243dSDimitry Andric if (U.getOperandNo() == 1) 1480b57cec5SDimitry Andric continue; 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric // If we already have seen a copy, reject the second one. 1510b57cec5SDimitry Andric if (TheCopy) return false; 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric // If the pointer has been offset from the start of the alloca, we can't 1540b57cec5SDimitry Andric // safely handle this. 1550b57cec5SDimitry Andric if (IsOffset) return false; 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric // If the memintrinsic isn't using the alloca as the dest, reject it. 1580b57cec5SDimitry Andric if (U.getOperandNo() != 0) return false; 1590b57cec5SDimitry Andric 160bdd1243dSDimitry Andric // If the source of the memcpy/move is not constant, reject it. 161bdd1243dSDimitry Andric if (isModSet(AA->getModRefInfoMask(MI->getSource()))) 1620b57cec5SDimitry Andric return false; 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric // Otherwise, the transform is safe. Remember the copy instruction. 1650b57cec5SDimitry Andric TheCopy = MI; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric return true; 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 171bdd1243dSDimitry Andric /// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only 172bdd1243dSDimitry Andric /// modified by a copy from a constant memory location. If we can prove this, we 173bdd1243dSDimitry Andric /// can replace any uses of the alloca with uses of the memory location 174bdd1243dSDimitry Andric /// directly. 1750b57cec5SDimitry Andric static MemTransferInst * 1765ffd83dbSDimitry Andric isOnlyCopiedFromConstantMemory(AAResults *AA, 1775ffd83dbSDimitry Andric AllocaInst *AI, 1780b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &ToDelete) { 1790b57cec5SDimitry Andric MemTransferInst *TheCopy = nullptr; 1805ffd83dbSDimitry Andric if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete)) 1810b57cec5SDimitry Andric return TheCopy; 1820b57cec5SDimitry Andric return nullptr; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric /// Returns true if V is dereferenceable for size of alloca. 1860b57cec5SDimitry Andric static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, 1870b57cec5SDimitry Andric const DataLayout &DL) { 1880b57cec5SDimitry Andric if (AI->isArrayAllocation()) 1890b57cec5SDimitry Andric return false; 1900b57cec5SDimitry Andric uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); 1910b57cec5SDimitry Andric if (!AllocaSize) 1920b57cec5SDimitry Andric return false; 1930eae32dcSDimitry Andric return isDereferenceableAndAlignedPointer(V, AI->getAlign(), 1940b57cec5SDimitry Andric APInt(64, AllocaSize), DL); 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 197e8d8bef9SDimitry Andric static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, 198bdd1243dSDimitry Andric AllocaInst &AI, DominatorTree &DT) { 1990b57cec5SDimitry Andric // Check for array size of 1 (scalar allocation). 2000b57cec5SDimitry Andric if (!AI.isArrayAllocation()) { 2010b57cec5SDimitry Andric // i32 1 is the canonical array size for scalar allocations. 2020b57cec5SDimitry Andric if (AI.getArraySize()->getType()->isIntegerTy(32)) 2030b57cec5SDimitry Andric return nullptr; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric // Canonicalize it. 2065ffd83dbSDimitry Andric return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1)); 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 2100b57cec5SDimitry Andric if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 2110b57cec5SDimitry Andric if (C->getValue().getActiveBits() <= 64) { 2120b57cec5SDimitry Andric Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 2130eae32dcSDimitry Andric AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(), 2140eae32dcSDimitry Andric nullptr, AI.getName()); 2155ffd83dbSDimitry Andric New->setAlignment(AI.getAlign()); 2165f757f3fSDimitry Andric New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); 2170b57cec5SDimitry Andric 218bdd1243dSDimitry Andric replaceAllDbgUsesWith(AI, *New, *New, DT); 2195f757f3fSDimitry Andric return IC.replaceInstUsesWith(AI, New); 2200b57cec5SDimitry Andric } 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric if (isa<UndefValue>(AI.getArraySize())) 2240b57cec5SDimitry Andric return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 2250b57cec5SDimitry Andric 22606c3fb27SDimitry Andric // Ensure that the alloca array size argument has type equal to the offset 22706c3fb27SDimitry Andric // size of the alloca() pointer, which, in the tyical case, is intptr_t, 22806c3fb27SDimitry Andric // so that any casting is exposed early. 22906c3fb27SDimitry Andric Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType()); 23006c3fb27SDimitry Andric if (AI.getArraySize()->getType() != PtrIdxTy) { 23106c3fb27SDimitry Andric Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false); 2325ffd83dbSDimitry Andric return IC.replaceOperand(AI, 0, V); 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric return nullptr; 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric namespace { 2390b57cec5SDimitry Andric // If I and V are pointers in different address space, it is not allowed to 2400b57cec5SDimitry Andric // use replaceAllUsesWith since I and V have different types. A 2410b57cec5SDimitry Andric // non-target-specific transformation should not use addrspacecast on V since 2420b57cec5SDimitry Andric // the two address space may be disjoint depending on target. 2430b57cec5SDimitry Andric // 2440b57cec5SDimitry Andric // This class chases down uses of the old pointer until reaching the load 2450b57cec5SDimitry Andric // instructions, then replaces the old pointer in the load instructions with 2460b57cec5SDimitry Andric // the new pointer. If during the chasing it sees bitcast or GEP, it will 2470b57cec5SDimitry Andric // create new bitcast or GEP with the new pointer and use them in the load 2480b57cec5SDimitry Andric // instruction. 2490b57cec5SDimitry Andric class PointerReplacer { 2500b57cec5SDimitry Andric public: 25106c3fb27SDimitry Andric PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS) 25206c3fb27SDimitry Andric : IC(IC), Root(Root), FromAS(SrcAS) {} 253e8d8bef9SDimitry Andric 254bdd1243dSDimitry Andric bool collectUsers(); 255bdd1243dSDimitry Andric void replacePointer(Value *V); 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric private: 258bdd1243dSDimitry Andric bool collectUsersRecursive(Instruction &I); 2590b57cec5SDimitry Andric void replace(Instruction *I); 2600b57cec5SDimitry Andric Value *getReplacement(Value *I); 261bdd1243dSDimitry Andric bool isAvailable(Instruction *I) const { 262bdd1243dSDimitry Andric return I == &Root || Worklist.contains(I); 263bdd1243dSDimitry Andric } 2640b57cec5SDimitry Andric 26506c3fb27SDimitry Andric bool isEqualOrValidAddrSpaceCast(const Instruction *I, 26606c3fb27SDimitry Andric unsigned FromAS) const { 26706c3fb27SDimitry Andric const auto *ASC = dyn_cast<AddrSpaceCastInst>(I); 26806c3fb27SDimitry Andric if (!ASC) 26906c3fb27SDimitry Andric return false; 27006c3fb27SDimitry Andric unsigned ToAS = ASC->getDestAddressSpace(); 27106c3fb27SDimitry Andric return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS); 27206c3fb27SDimitry Andric } 27306c3fb27SDimitry Andric 274bdd1243dSDimitry Andric SmallPtrSet<Instruction *, 32> ValuesToRevisit; 275e8d8bef9SDimitry Andric SmallSetVector<Instruction *, 4> Worklist; 2760b57cec5SDimitry Andric MapVector<Value *, Value *> WorkMap; 277e8d8bef9SDimitry Andric InstCombinerImpl &IC; 278bdd1243dSDimitry Andric Instruction &Root; 27906c3fb27SDimitry Andric unsigned FromAS; 2800b57cec5SDimitry Andric }; 2810b57cec5SDimitry Andric } // end anonymous namespace 2820b57cec5SDimitry Andric 283bdd1243dSDimitry Andric bool PointerReplacer::collectUsers() { 284bdd1243dSDimitry Andric if (!collectUsersRecursive(Root)) 285bdd1243dSDimitry Andric return false; 286bdd1243dSDimitry Andric 287bdd1243dSDimitry Andric // Ensure that all outstanding (indirect) users of I 288bdd1243dSDimitry Andric // are inserted into the Worklist. Return false 289bdd1243dSDimitry Andric // otherwise. 290bdd1243dSDimitry Andric for (auto *Inst : ValuesToRevisit) 291bdd1243dSDimitry Andric if (!Worklist.contains(Inst)) 292bdd1243dSDimitry Andric return false; 293bdd1243dSDimitry Andric return true; 294bdd1243dSDimitry Andric } 295bdd1243dSDimitry Andric 296bdd1243dSDimitry Andric bool PointerReplacer::collectUsersRecursive(Instruction &I) { 297bdd1243dSDimitry Andric for (auto *U : I.users()) { 2986e75b2fbSDimitry Andric auto *Inst = cast<Instruction>(&*U); 2996e75b2fbSDimitry Andric if (auto *Load = dyn_cast<LoadInst>(Inst)) { 300e8d8bef9SDimitry Andric if (Load->isVolatile()) 301e8d8bef9SDimitry Andric return false; 302e8d8bef9SDimitry Andric Worklist.insert(Load); 303bdd1243dSDimitry Andric } else if (auto *PHI = dyn_cast<PHINode>(Inst)) { 304bdd1243dSDimitry Andric // All incoming values must be instructions for replacability 305bdd1243dSDimitry Andric if (any_of(PHI->incoming_values(), 306bdd1243dSDimitry Andric [](Value *V) { return !isa<Instruction>(V); })) 307bdd1243dSDimitry Andric return false; 308bdd1243dSDimitry Andric 309bdd1243dSDimitry Andric // If at least one incoming value of the PHI is not in Worklist, 310bdd1243dSDimitry Andric // store the PHI for revisiting and skip this iteration of the 311bdd1243dSDimitry Andric // loop. 312bdd1243dSDimitry Andric if (any_of(PHI->incoming_values(), [this](Value *V) { 313bdd1243dSDimitry Andric return !isAvailable(cast<Instruction>(V)); 314bdd1243dSDimitry Andric })) { 315bdd1243dSDimitry Andric ValuesToRevisit.insert(Inst); 316bdd1243dSDimitry Andric continue; 317bdd1243dSDimitry Andric } 318bdd1243dSDimitry Andric 319bdd1243dSDimitry Andric Worklist.insert(PHI); 320bdd1243dSDimitry Andric if (!collectUsersRecursive(*PHI)) 321bdd1243dSDimitry Andric return false; 322bdd1243dSDimitry Andric } else if (auto *SI = dyn_cast<SelectInst>(Inst)) { 323bdd1243dSDimitry Andric if (!isa<Instruction>(SI->getTrueValue()) || 324bdd1243dSDimitry Andric !isa<Instruction>(SI->getFalseValue())) 325bdd1243dSDimitry Andric return false; 326bdd1243dSDimitry Andric 327bdd1243dSDimitry Andric if (!isAvailable(cast<Instruction>(SI->getTrueValue())) || 328bdd1243dSDimitry Andric !isAvailable(cast<Instruction>(SI->getFalseValue()))) { 329bdd1243dSDimitry Andric ValuesToRevisit.insert(Inst); 330bdd1243dSDimitry Andric continue; 331bdd1243dSDimitry Andric } 332bdd1243dSDimitry Andric Worklist.insert(SI); 333bdd1243dSDimitry Andric if (!collectUsersRecursive(*SI)) 334bdd1243dSDimitry Andric return false; 335*0fca6ea1SDimitry Andric } else if (isa<GetElementPtrInst>(Inst)) { 336e8d8bef9SDimitry Andric Worklist.insert(Inst); 337bdd1243dSDimitry Andric if (!collectUsersRecursive(*Inst)) 338e8d8bef9SDimitry Andric return false; 3396e75b2fbSDimitry Andric } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) { 3406e75b2fbSDimitry Andric if (MI->isVolatile()) 3416e75b2fbSDimitry Andric return false; 342e8d8bef9SDimitry Andric Worklist.insert(Inst); 34306c3fb27SDimitry Andric } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) { 34406c3fb27SDimitry Andric Worklist.insert(Inst); 345*0fca6ea1SDimitry Andric if (!collectUsersRecursive(*Inst)) 346*0fca6ea1SDimitry Andric return false; 347fe6060f1SDimitry Andric } else if (Inst->isLifetimeStartOrEnd()) { 348fe6060f1SDimitry Andric continue; 3490b57cec5SDimitry Andric } else { 350*0fca6ea1SDimitry Andric // TODO: For arbitrary uses with address space mismatches, should we check 351*0fca6ea1SDimitry Andric // if we can introduce a valid addrspacecast? 352e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n'); 353e8d8bef9SDimitry Andric return false; 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric } 3560b57cec5SDimitry Andric 357e8d8bef9SDimitry Andric return true; 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 360e8d8bef9SDimitry Andric Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); } 361e8d8bef9SDimitry Andric 3620b57cec5SDimitry Andric void PointerReplacer::replace(Instruction *I) { 3630b57cec5SDimitry Andric if (getReplacement(I)) 3640b57cec5SDimitry Andric return; 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric if (auto *LT = dyn_cast<LoadInst>(I)) { 3670b57cec5SDimitry Andric auto *V = getReplacement(LT->getPointerOperand()); 3680b57cec5SDimitry Andric assert(V && "Operand not replaced"); 369e8d8bef9SDimitry Andric auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(), 370e8d8bef9SDimitry Andric LT->getAlign(), LT->getOrdering(), 371e8d8bef9SDimitry Andric LT->getSyncScopeID()); 3720b57cec5SDimitry Andric NewI->takeName(LT); 373e8d8bef9SDimitry Andric copyMetadataForLoad(*NewI, *LT); 374e8d8bef9SDimitry Andric 3755f757f3fSDimitry Andric IC.InsertNewInstWith(NewI, LT->getIterator()); 3760b57cec5SDimitry Andric IC.replaceInstUsesWith(*LT, NewI); 3770b57cec5SDimitry Andric WorkMap[LT] = NewI; 378bdd1243dSDimitry Andric } else if (auto *PHI = dyn_cast<PHINode>(I)) { 379bdd1243dSDimitry Andric Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType(); 380bdd1243dSDimitry Andric auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(), 381*0fca6ea1SDimitry Andric PHI->getName(), PHI->getIterator()); 382bdd1243dSDimitry Andric for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) 383bdd1243dSDimitry Andric NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)), 384bdd1243dSDimitry Andric PHI->getIncomingBlock(I)); 385bdd1243dSDimitry Andric WorkMap[PHI] = NewPHI; 3860b57cec5SDimitry Andric } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { 3870b57cec5SDimitry Andric auto *V = getReplacement(GEP->getPointerOperand()); 3880b57cec5SDimitry Andric assert(V && "Operand not replaced"); 389*0fca6ea1SDimitry Andric SmallVector<Value *, 8> Indices(GEP->indices()); 39004eeddc0SDimitry Andric auto *NewI = 39104eeddc0SDimitry Andric GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices); 3925f757f3fSDimitry Andric IC.InsertNewInstWith(NewI, GEP->getIterator()); 3930b57cec5SDimitry Andric NewI->takeName(GEP); 394*0fca6ea1SDimitry Andric NewI->setNoWrapFlags(GEP->getNoWrapFlags()); 3950b57cec5SDimitry Andric WorkMap[GEP] = NewI; 396bdd1243dSDimitry Andric } else if (auto *SI = dyn_cast<SelectInst>(I)) { 397*0fca6ea1SDimitry Andric Value *TrueValue = SI->getTrueValue(); 398*0fca6ea1SDimitry Andric Value *FalseValue = SI->getFalseValue(); 399*0fca6ea1SDimitry Andric if (Value *Replacement = getReplacement(TrueValue)) 400*0fca6ea1SDimitry Andric TrueValue = Replacement; 401*0fca6ea1SDimitry Andric if (Value *Replacement = getReplacement(FalseValue)) 402*0fca6ea1SDimitry Andric FalseValue = Replacement; 403*0fca6ea1SDimitry Andric auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue, 404*0fca6ea1SDimitry Andric SI->getName(), nullptr, SI); 4055f757f3fSDimitry Andric IC.InsertNewInstWith(NewSI, SI->getIterator()); 406bdd1243dSDimitry Andric NewSI->takeName(SI); 407bdd1243dSDimitry Andric WorkMap[SI] = NewSI; 408e8d8bef9SDimitry Andric } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) { 409*0fca6ea1SDimitry Andric auto *DestV = MemCpy->getRawDest(); 410*0fca6ea1SDimitry Andric auto *SrcV = MemCpy->getRawSource(); 411*0fca6ea1SDimitry Andric 412*0fca6ea1SDimitry Andric if (auto *DestReplace = getReplacement(DestV)) 413*0fca6ea1SDimitry Andric DestV = DestReplace; 414*0fca6ea1SDimitry Andric if (auto *SrcReplace = getReplacement(SrcV)) 415*0fca6ea1SDimitry Andric SrcV = SrcReplace; 416e8d8bef9SDimitry Andric 417e8d8bef9SDimitry Andric IC.Builder.SetInsertPoint(MemCpy); 418e8d8bef9SDimitry Andric auto *NewI = IC.Builder.CreateMemTransferInst( 419*0fca6ea1SDimitry Andric MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV, 420*0fca6ea1SDimitry Andric MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile()); 421349cc55cSDimitry Andric AAMDNodes AAMD = MemCpy->getAAMetadata(); 422e8d8bef9SDimitry Andric if (AAMD) 423e8d8bef9SDimitry Andric NewI->setAAMetadata(AAMD); 424e8d8bef9SDimitry Andric 425e8d8bef9SDimitry Andric IC.eraseInstFromFunction(*MemCpy); 426e8d8bef9SDimitry Andric WorkMap[MemCpy] = NewI; 42706c3fb27SDimitry Andric } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) { 42806c3fb27SDimitry Andric auto *V = getReplacement(ASC->getPointerOperand()); 42906c3fb27SDimitry Andric assert(V && "Operand not replaced"); 43006c3fb27SDimitry Andric assert(isEqualOrValidAddrSpaceCast( 43106c3fb27SDimitry Andric ASC, V->getType()->getPointerAddressSpace()) && 43206c3fb27SDimitry Andric "Invalid address space cast!"); 433*0fca6ea1SDimitry Andric 43406c3fb27SDimitry Andric if (V->getType()->getPointerAddressSpace() != 43506c3fb27SDimitry Andric ASC->getType()->getPointerAddressSpace()) { 43606c3fb27SDimitry Andric auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), ""); 43706c3fb27SDimitry Andric NewI->takeName(ASC); 4385f757f3fSDimitry Andric IC.InsertNewInstWith(NewI, ASC->getIterator()); 439*0fca6ea1SDimitry Andric WorkMap[ASC] = NewI; 440*0fca6ea1SDimitry Andric } else { 441*0fca6ea1SDimitry Andric WorkMap[ASC] = V; 44206c3fb27SDimitry Andric } 443*0fca6ea1SDimitry Andric 4440b57cec5SDimitry Andric } else { 4450b57cec5SDimitry Andric llvm_unreachable("should never reach here"); 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 449bdd1243dSDimitry Andric void PointerReplacer::replacePointer(Value *V) { 4500b57cec5SDimitry Andric #ifndef NDEBUG 451bdd1243dSDimitry Andric auto *PT = cast<PointerType>(Root.getType()); 4520b57cec5SDimitry Andric auto *NT = cast<PointerType>(V->getType()); 45306c3fb27SDimitry Andric assert(PT != NT && "Invalid usage"); 4540b57cec5SDimitry Andric #endif 455bdd1243dSDimitry Andric WorkMap[&Root] = V; 456e8d8bef9SDimitry Andric 457e8d8bef9SDimitry Andric for (Instruction *Workitem : Worklist) 458e8d8bef9SDimitry Andric replace(Workitem); 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 461e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) { 462bdd1243dSDimitry Andric if (auto *I = simplifyAllocaArraySize(*this, AI, DT)) 4630b57cec5SDimitry Andric return I; 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric if (AI.getAllocatedType()->isSized()) { 4660b57cec5SDimitry Andric // Move all alloca's of zero byte objects to the entry block and merge them 4670b57cec5SDimitry Andric // together. Note that we only do this for alloca's, because malloc should 4680b57cec5SDimitry Andric // allocate and return a unique pointer, even for a zero byte allocation. 469bdd1243dSDimitry Andric if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinValue() == 0) { 4700b57cec5SDimitry Andric // For a zero sized alloca there is no point in doing an array allocation. 4710b57cec5SDimitry Andric // This is helpful if the array size is a complicated expression not used 4720b57cec5SDimitry Andric // elsewhere. 4735ffd83dbSDimitry Andric if (AI.isArrayAllocation()) 4745ffd83dbSDimitry Andric return replaceOperand(AI, 0, 4755ffd83dbSDimitry Andric ConstantInt::get(AI.getArraySize()->getType(), 1)); 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric // Get the first instruction in the entry block. 4780b57cec5SDimitry Andric BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 4790b57cec5SDimitry Andric Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 4800b57cec5SDimitry Andric if (FirstInst != &AI) { 4810b57cec5SDimitry Andric // If the entry block doesn't start with a zero-size alloca then move 4820b57cec5SDimitry Andric // this one to the start of the entry block. There is no problem with 4830b57cec5SDimitry Andric // dominance as the array size was forced to a constant earlier already. 4840b57cec5SDimitry Andric AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 4850b57cec5SDimitry Andric if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 4865ffd83dbSDimitry Andric DL.getTypeAllocSize(EntryAI->getAllocatedType()) 487bdd1243dSDimitry Andric .getKnownMinValue() != 0) { 4880b57cec5SDimitry Andric AI.moveBefore(FirstInst); 4890b57cec5SDimitry Andric return &AI; 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric // Replace this zero-sized alloca with the one at the start of the entry 4930b57cec5SDimitry Andric // block after ensuring that the address will be aligned enough for both 4940b57cec5SDimitry Andric // types. 4955ffd83dbSDimitry Andric const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign()); 4960b57cec5SDimitry Andric EntryAI->setAlignment(MaxAlign); 4970b57cec5SDimitry Andric return replaceInstUsesWith(AI, EntryAI); 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric // Check to see if this allocation is only modified by a memcpy/memmove from 503bdd1243dSDimitry Andric // a memory location whose alignment is equal to or exceeds that of the 504bdd1243dSDimitry Andric // allocation. If this is the case, we can change all users to use the 505bdd1243dSDimitry Andric // constant memory location instead. This is commonly produced by the CFE by 506bdd1243dSDimitry Andric // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 507bdd1243dSDimitry Andric // is only subsequently read. 5080b57cec5SDimitry Andric SmallVector<Instruction *, 4> ToDelete; 5095ffd83dbSDimitry Andric if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) { 510e8d8bef9SDimitry Andric Value *TheSrc = Copy->getSource(); 5115ffd83dbSDimitry Andric Align AllocaAlign = AI.getAlign(); 5125ffd83dbSDimitry Andric Align SourceAlign = getOrEnforceKnownAlignment( 513e8d8bef9SDimitry Andric TheSrc, AllocaAlign, DL, &AI, &AC, &DT); 5145ffd83dbSDimitry Andric if (AllocaAlign <= SourceAlign && 515fe6060f1SDimitry Andric isDereferenceableForAllocaSize(TheSrc, &AI, DL) && 516fe6060f1SDimitry Andric !isa<Instruction>(TheSrc)) { 517fe6060f1SDimitry Andric // FIXME: Can we sink instructions without violating dominance when TheSrc 518fe6060f1SDimitry Andric // is an instruction instead of a constant or argument? 5190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 5200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 521e8d8bef9SDimitry Andric unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace(); 522bdd1243dSDimitry Andric if (AI.getAddressSpace() == SrcAddrSpace) { 523e8d8bef9SDimitry Andric for (Instruction *Delete : ToDelete) 524e8d8bef9SDimitry Andric eraseInstFromFunction(*Delete); 525e8d8bef9SDimitry Andric 5265f757f3fSDimitry Andric Instruction *NewI = replaceInstUsesWith(AI, TheSrc); 5270b57cec5SDimitry Andric eraseInstFromFunction(*Copy); 5280b57cec5SDimitry Andric ++NumGlobalCopies; 5290b57cec5SDimitry Andric return NewI; 5305ffd83dbSDimitry Andric } 5315ffd83dbSDimitry Andric 53206c3fb27SDimitry Andric PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace); 533bdd1243dSDimitry Andric if (PtrReplacer.collectUsers()) { 534e8d8bef9SDimitry Andric for (Instruction *Delete : ToDelete) 535e8d8bef9SDimitry Andric eraseInstFromFunction(*Delete); 536e8d8bef9SDimitry Andric 5375f757f3fSDimitry Andric PtrReplacer.replacePointer(TheSrc); 5380b57cec5SDimitry Andric ++NumGlobalCopies; 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric } 541e8d8bef9SDimitry Andric } 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric // At last, use the generic allocation site handler to aggressively remove 5440b57cec5SDimitry Andric // unused allocas. 5450b57cec5SDimitry Andric return visitAllocSite(AI); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric // Are we allowed to form a atomic load or store of this type? 5490b57cec5SDimitry Andric static bool isSupportedAtomicType(Type *Ty) { 5500b57cec5SDimitry Andric return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy(); 5510b57cec5SDimitry Andric } 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric /// Helper to combine a load to a new type. 5540b57cec5SDimitry Andric /// 5550b57cec5SDimitry Andric /// This just does the work of combining a load to a new type. It handles 5560b57cec5SDimitry Andric /// metadata, etc., and returns the new instruction. The \c NewTy should be the 5570b57cec5SDimitry Andric /// loaded *value* type. This will convert it to a pointer, cast the operand to 5580b57cec5SDimitry Andric /// that pointer type, load it, etc. 5590b57cec5SDimitry Andric /// 5600b57cec5SDimitry Andric /// Note that this will create all of the instructions with whatever insert 561e8d8bef9SDimitry Andric /// point the \c InstCombinerImpl currently is using. 562e8d8bef9SDimitry Andric LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy, 563480093f4SDimitry Andric const Twine &Suffix) { 5640b57cec5SDimitry Andric assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) && 5650b57cec5SDimitry Andric "can't fold an atomic load to requested type"); 5660b57cec5SDimitry Andric 5675f757f3fSDimitry Andric LoadInst *NewLoad = 5685f757f3fSDimitry Andric Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(), 5695f757f3fSDimitry Andric LI.isVolatile(), LI.getName() + Suffix); 5700b57cec5SDimitry Andric NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); 5718bcb0991SDimitry Andric copyMetadataForLoad(*NewLoad, LI); 5720b57cec5SDimitry Andric return NewLoad; 5730b57cec5SDimitry Andric } 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric /// Combine a store to a new type. 5760b57cec5SDimitry Andric /// 5770b57cec5SDimitry Andric /// Returns the newly created store instruction. 578e8d8bef9SDimitry Andric static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, 579e8d8bef9SDimitry Andric Value *V) { 5800b57cec5SDimitry Andric assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) && 5810b57cec5SDimitry Andric "can't fold an atomic store of requested type"); 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric Value *Ptr = SI.getPointerOperand(); 5840b57cec5SDimitry Andric SmallVector<std::pair<unsigned, MDNode *>, 8> MD; 5850b57cec5SDimitry Andric SI.getAllMetadata(MD); 5860b57cec5SDimitry Andric 5875f757f3fSDimitry Andric StoreInst *NewStore = 5885f757f3fSDimitry Andric IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile()); 5890b57cec5SDimitry Andric NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); 5900b57cec5SDimitry Andric for (const auto &MDPair : MD) { 5910b57cec5SDimitry Andric unsigned ID = MDPair.first; 5920b57cec5SDimitry Andric MDNode *N = MDPair.second; 5930b57cec5SDimitry Andric // Note, essentially every kind of metadata should be preserved here! This 5940b57cec5SDimitry Andric // routine is supposed to clone a store instruction changing *only its 5950b57cec5SDimitry Andric // type*. The only metadata it makes sense to drop is metadata which is 5960b57cec5SDimitry Andric // invalidated when the pointer type changes. This should essentially 5970b57cec5SDimitry Andric // never be the case in LLVM, but we explicitly switch over only known 5980b57cec5SDimitry Andric // metadata to be conservatively correct. If you are adding metadata to 5990b57cec5SDimitry Andric // LLVM which pertains to stores, you almost certainly want to add it 6000b57cec5SDimitry Andric // here. 6010b57cec5SDimitry Andric switch (ID) { 6020b57cec5SDimitry Andric case LLVMContext::MD_dbg: 603bdd1243dSDimitry Andric case LLVMContext::MD_DIAssignID: 6040b57cec5SDimitry Andric case LLVMContext::MD_tbaa: 6050b57cec5SDimitry Andric case LLVMContext::MD_prof: 6060b57cec5SDimitry Andric case LLVMContext::MD_fpmath: 6070b57cec5SDimitry Andric case LLVMContext::MD_tbaa_struct: 6080b57cec5SDimitry Andric case LLVMContext::MD_alias_scope: 6090b57cec5SDimitry Andric case LLVMContext::MD_noalias: 6100b57cec5SDimitry Andric case LLVMContext::MD_nontemporal: 6110b57cec5SDimitry Andric case LLVMContext::MD_mem_parallel_loop_access: 6120b57cec5SDimitry Andric case LLVMContext::MD_access_group: 6130b57cec5SDimitry Andric // All of these directly apply. 6140b57cec5SDimitry Andric NewStore->setMetadata(ID, N); 6150b57cec5SDimitry Andric break; 6160b57cec5SDimitry Andric case LLVMContext::MD_invariant_load: 6170b57cec5SDimitry Andric case LLVMContext::MD_nonnull: 618e8d8bef9SDimitry Andric case LLVMContext::MD_noundef: 6190b57cec5SDimitry Andric case LLVMContext::MD_range: 6200b57cec5SDimitry Andric case LLVMContext::MD_align: 6210b57cec5SDimitry Andric case LLVMContext::MD_dereferenceable: 6220b57cec5SDimitry Andric case LLVMContext::MD_dereferenceable_or_null: 6230b57cec5SDimitry Andric // These don't apply for stores. 6240b57cec5SDimitry Andric break; 6250b57cec5SDimitry Andric } 6260b57cec5SDimitry Andric } 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric return NewStore; 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric /// Combine loads to match the type of their uses' value after looking 6320b57cec5SDimitry Andric /// through intervening bitcasts. 6330b57cec5SDimitry Andric /// 6340b57cec5SDimitry Andric /// The core idea here is that if the result of a load is used in an operation, 6350b57cec5SDimitry Andric /// we should load the type most conducive to that operation. For example, when 6360b57cec5SDimitry Andric /// loading an integer and converting that immediately to a pointer, we should 6370b57cec5SDimitry Andric /// instead directly load a pointer. 6380b57cec5SDimitry Andric /// 6390b57cec5SDimitry Andric /// However, this routine must never change the width of a load or the number of 6400b57cec5SDimitry Andric /// loads as that would introduce a semantic change. This combine is expected to 6410b57cec5SDimitry Andric /// be a semantic no-op which just allows loads to more closely model the types 6420b57cec5SDimitry Andric /// of their consuming operations. 6430b57cec5SDimitry Andric /// 6440b57cec5SDimitry Andric /// Currently, we also refuse to change the precise type used for an atomic load 6450b57cec5SDimitry Andric /// or a volatile load. This is debatable, and might be reasonable to change 6460b57cec5SDimitry Andric /// later. However, it is risky in case some backend or other part of LLVM is 6470b57cec5SDimitry Andric /// relying on the exact type loaded to select appropriate atomic operations. 648e8d8bef9SDimitry Andric static Instruction *combineLoadToOperationType(InstCombinerImpl &IC, 649bdd1243dSDimitry Andric LoadInst &Load) { 6500b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and ordered 6510b57cec5SDimitry Andric // atomic loads here but it isn't clear that this is important. 652bdd1243dSDimitry Andric if (!Load.isUnordered()) 6530b57cec5SDimitry Andric return nullptr; 6540b57cec5SDimitry Andric 655bdd1243dSDimitry Andric if (Load.use_empty()) 6560b57cec5SDimitry Andric return nullptr; 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric // swifterror values can't be bitcasted. 659bdd1243dSDimitry Andric if (Load.getPointerOperand()->isSwiftError()) 6600b57cec5SDimitry Andric return nullptr; 6610b57cec5SDimitry Andric 662e8d8bef9SDimitry Andric // Fold away bit casts of the loaded value by loading the desired type. 663e8d8bef9SDimitry Andric // Note that we should not do this for pointer<->integer casts, 664e8d8bef9SDimitry Andric // because that would result in type punning. 665bdd1243dSDimitry Andric if (Load.hasOneUse()) { 666e8d8bef9SDimitry Andric // Don't transform when the type is x86_amx, it makes the pass that lower 667e8d8bef9SDimitry Andric // x86_amx type happy. 668bdd1243dSDimitry Andric Type *LoadTy = Load.getType(); 669bdd1243dSDimitry Andric if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) { 670bdd1243dSDimitry Andric assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!"); 671e8d8bef9SDimitry Andric if (BC->getType()->isX86_AMXTy()) 672e8d8bef9SDimitry Andric return nullptr; 6730b57cec5SDimitry Andric } 6740b57cec5SDimitry Andric 675bdd1243dSDimitry Andric if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) { 676bdd1243dSDimitry Andric Type *DestTy = CastUser->getDestTy(); 677bdd1243dSDimitry Andric if (CastUser->isNoopCast(IC.getDataLayout()) && 678bdd1243dSDimitry Andric LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() && 679bdd1243dSDimitry Andric (!Load.isAtomic() || isSupportedAtomicType(DestTy))) { 680bdd1243dSDimitry Andric LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy); 681bdd1243dSDimitry Andric CastUser->replaceAllUsesWith(NewLoad); 682bdd1243dSDimitry Andric IC.eraseInstFromFunction(*CastUser); 683bdd1243dSDimitry Andric return &Load; 684bdd1243dSDimitry Andric } 6850b57cec5SDimitry Andric } 686e8d8bef9SDimitry Andric } 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric // FIXME: We should also canonicalize loads of vectors when their elements are 6890b57cec5SDimitry Andric // cast to other types. 6900b57cec5SDimitry Andric return nullptr; 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric 693e8d8bef9SDimitry Andric static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) { 6940b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and atomic 6950b57cec5SDimitry Andric // stores here but it isn't clear that this is important. 6960b57cec5SDimitry Andric if (!LI.isSimple()) 6970b57cec5SDimitry Andric return nullptr; 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric Type *T = LI.getType(); 7000b57cec5SDimitry Andric if (!T->isAggregateType()) 7010b57cec5SDimitry Andric return nullptr; 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric StringRef Name = LI.getName(); 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric if (auto *ST = dyn_cast<StructType>(T)) { 7060b57cec5SDimitry Andric // If the struct only have one element, we unpack. 7070b57cec5SDimitry Andric auto NumElements = ST->getNumElements(); 7080b57cec5SDimitry Andric if (NumElements == 1) { 709480093f4SDimitry Andric LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U), 7100b57cec5SDimitry Andric ".unpack"); 711349cc55cSDimitry Andric NewLoad->setAAMetadata(LI.getAAMetadata()); 7120b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( 713bdd1243dSDimitry Andric PoisonValue::get(T), NewLoad, 0, Name)); 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric // We don't want to break loads with padding here as we'd loose 7170b57cec5SDimitry Andric // the knowledge that padding exists for the rest of the pipeline. 7180b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout(); 7190b57cec5SDimitry Andric auto *SL = DL.getStructLayout(ST); 72006c3fb27SDimitry Andric 72106c3fb27SDimitry Andric // Don't unpack for structure with scalable vector. 72206c3fb27SDimitry Andric if (SL->getSizeInBits().isScalable()) 72306c3fb27SDimitry Andric return nullptr; 72406c3fb27SDimitry Andric 7250b57cec5SDimitry Andric if (SL->hasPadding()) 7260b57cec5SDimitry Andric return nullptr; 7270b57cec5SDimitry Andric 7285ffd83dbSDimitry Andric const auto Align = LI.getAlign(); 7290b57cec5SDimitry Andric auto *Addr = LI.getPointerOperand(); 7300b57cec5SDimitry Andric auto *IdxType = Type::getInt32Ty(T->getContext()); 7310b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0); 7320b57cec5SDimitry Andric 733bdd1243dSDimitry Andric Value *V = PoisonValue::get(T); 7340b57cec5SDimitry Andric for (unsigned i = 0; i < NumElements; i++) { 7350b57cec5SDimitry Andric Value *Indices[2] = { 7360b57cec5SDimitry Andric Zero, 7370b57cec5SDimitry Andric ConstantInt::get(IdxType, i), 7380b57cec5SDimitry Andric }; 739bdd1243dSDimitry Andric auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), 7400b57cec5SDimitry Andric Name + ".elt"); 7415ffd83dbSDimitry Andric auto *L = IC.Builder.CreateAlignedLoad( 7425ffd83dbSDimitry Andric ST->getElementType(i), Ptr, 7435ffd83dbSDimitry Andric commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack"); 7440b57cec5SDimitry Andric // Propagate AA metadata. It'll still be valid on the narrowed load. 745349cc55cSDimitry Andric L->setAAMetadata(LI.getAAMetadata()); 7460b57cec5SDimitry Andric V = IC.Builder.CreateInsertValue(V, L, i); 7470b57cec5SDimitry Andric } 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andric V->setName(Name); 7500b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, V); 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(T)) { 7540b57cec5SDimitry Andric auto *ET = AT->getElementType(); 7550b57cec5SDimitry Andric auto NumElements = AT->getNumElements(); 7560b57cec5SDimitry Andric if (NumElements == 1) { 757480093f4SDimitry Andric LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack"); 758349cc55cSDimitry Andric NewLoad->setAAMetadata(LI.getAAMetadata()); 7590b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( 760bdd1243dSDimitry Andric PoisonValue::get(T), NewLoad, 0, Name)); 7610b57cec5SDimitry Andric } 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric // Bail out if the array is too large. Ideally we would like to optimize 7640b57cec5SDimitry Andric // arrays of arbitrary size but this has a terrible impact on compile time. 7650b57cec5SDimitry Andric // The threshold here is chosen arbitrarily, maybe needs a little bit of 7660b57cec5SDimitry Andric // tuning. 7670b57cec5SDimitry Andric if (NumElements > IC.MaxArraySizeForCombine) 7680b57cec5SDimitry Andric return nullptr; 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout(); 7715f757f3fSDimitry Andric TypeSize EltSize = DL.getTypeAllocSize(ET); 7725ffd83dbSDimitry Andric const auto Align = LI.getAlign(); 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric auto *Addr = LI.getPointerOperand(); 7750b57cec5SDimitry Andric auto *IdxType = Type::getInt64Ty(T->getContext()); 7760b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0); 7770b57cec5SDimitry Andric 778bdd1243dSDimitry Andric Value *V = PoisonValue::get(T); 779*0fca6ea1SDimitry Andric TypeSize Offset = TypeSize::getZero(); 7800b57cec5SDimitry Andric for (uint64_t i = 0; i < NumElements; i++) { 7810b57cec5SDimitry Andric Value *Indices[2] = { 7820b57cec5SDimitry Andric Zero, 7830b57cec5SDimitry Andric ConstantInt::get(IdxType, i), 7840b57cec5SDimitry Andric }; 785bdd1243dSDimitry Andric auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), 7860b57cec5SDimitry Andric Name + ".elt"); 7875f757f3fSDimitry Andric auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue()); 7885ffd83dbSDimitry Andric auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr, 7895f757f3fSDimitry Andric EltAlign, Name + ".unpack"); 790349cc55cSDimitry Andric L->setAAMetadata(LI.getAAMetadata()); 7910b57cec5SDimitry Andric V = IC.Builder.CreateInsertValue(V, L, i); 7920b57cec5SDimitry Andric Offset += EltSize; 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric V->setName(Name); 7960b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, V); 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric return nullptr; 8000b57cec5SDimitry Andric } 8010b57cec5SDimitry Andric 8020b57cec5SDimitry Andric // If we can determine that all possible objects pointed to by the provided 8030b57cec5SDimitry Andric // pointer value are, not only dereferenceable, but also definitively less than 8040b57cec5SDimitry Andric // or equal to the provided maximum size, then return true. Otherwise, return 8050b57cec5SDimitry Andric // false (constant global values and allocas fall into this category). 8060b57cec5SDimitry Andric // 8070b57cec5SDimitry Andric // FIXME: This should probably live in ValueTracking (or similar). 8080b57cec5SDimitry Andric static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, 8090b57cec5SDimitry Andric const DataLayout &DL) { 8100b57cec5SDimitry Andric SmallPtrSet<Value *, 4> Visited; 8110b57cec5SDimitry Andric SmallVector<Value *, 4> Worklist(1, V); 8120b57cec5SDimitry Andric 8130b57cec5SDimitry Andric do { 8140b57cec5SDimitry Andric Value *P = Worklist.pop_back_val(); 8150b57cec5SDimitry Andric P = P->stripPointerCasts(); 8160b57cec5SDimitry Andric 8170b57cec5SDimitry Andric if (!Visited.insert(P).second) 8180b57cec5SDimitry Andric continue; 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric if (SelectInst *SI = dyn_cast<SelectInst>(P)) { 8210b57cec5SDimitry Andric Worklist.push_back(SI->getTrueValue()); 8220b57cec5SDimitry Andric Worklist.push_back(SI->getFalseValue()); 8230b57cec5SDimitry Andric continue; 8240b57cec5SDimitry Andric } 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(P)) { 827e8d8bef9SDimitry Andric append_range(Worklist, PN->incoming_values()); 8280b57cec5SDimitry Andric continue; 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) { 8320b57cec5SDimitry Andric if (GA->isInterposable()) 8330b57cec5SDimitry Andric return false; 8340b57cec5SDimitry Andric Worklist.push_back(GA->getAliasee()); 8350b57cec5SDimitry Andric continue; 8360b57cec5SDimitry Andric } 8370b57cec5SDimitry Andric 8380b57cec5SDimitry Andric // If we know how big this object is, and it is less than MaxSize, continue 8390b57cec5SDimitry Andric // searching. Otherwise, return false. 8400b57cec5SDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) { 8410b57cec5SDimitry Andric if (!AI->getAllocatedType()->isSized()) 8420b57cec5SDimitry Andric return false; 8430b57cec5SDimitry Andric 8440b57cec5SDimitry Andric ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize()); 8450b57cec5SDimitry Andric if (!CS) 8460b57cec5SDimitry Andric return false; 8470b57cec5SDimitry Andric 848bdd1243dSDimitry Andric TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType()); 849bdd1243dSDimitry Andric if (TS.isScalable()) 850bdd1243dSDimitry Andric return false; 8510b57cec5SDimitry Andric // Make sure that, even if the multiplication below would wrap as an 8520b57cec5SDimitry Andric // uint64_t, we still do the right thing. 853bdd1243dSDimitry Andric if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue())) 854bdd1243dSDimitry Andric .ugt(MaxSize)) 8550b57cec5SDimitry Andric return false; 8560b57cec5SDimitry Andric continue; 8570b57cec5SDimitry Andric } 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { 8600b57cec5SDimitry Andric if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) 8610b57cec5SDimitry Andric return false; 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); 8640b57cec5SDimitry Andric if (InitSize > MaxSize) 8650b57cec5SDimitry Andric return false; 8660b57cec5SDimitry Andric continue; 8670b57cec5SDimitry Andric } 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric return false; 8700b57cec5SDimitry Andric } while (!Worklist.empty()); 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andric return true; 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric // If we're indexing into an object of a known size, and the outer index is 8760b57cec5SDimitry Andric // not a constant, but having any value but zero would lead to undefined 8770b57cec5SDimitry Andric // behavior, replace it with zero. 8780b57cec5SDimitry Andric // 8790b57cec5SDimitry Andric // For example, if we have: 8800b57cec5SDimitry Andric // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4 8810b57cec5SDimitry Andric // ... 8820b57cec5SDimitry Andric // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x 8830b57cec5SDimitry Andric // ... = load i32* %arrayidx, align 4 8840b57cec5SDimitry Andric // Then we know that we can replace %x in the GEP with i64 0. 8850b57cec5SDimitry Andric // 8860b57cec5SDimitry Andric // FIXME: We could fold any GEP index to zero that would cause UB if it were 8870b57cec5SDimitry Andric // not zero. Currently, we only handle the first such index. Also, we could 8880b57cec5SDimitry Andric // also search through non-zero constant indices if we kept track of the 8890b57cec5SDimitry Andric // offsets those indices implied. 890e8d8bef9SDimitry Andric static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, 891e8d8bef9SDimitry Andric GetElementPtrInst *GEPI, Instruction *MemI, 892e8d8bef9SDimitry Andric unsigned &Idx) { 8930b57cec5SDimitry Andric if (GEPI->getNumOperands() < 2) 8940b57cec5SDimitry Andric return false; 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric // Find the first non-zero index of a GEP. If all indices are zero, return 8970b57cec5SDimitry Andric // one past the last index. 8980b57cec5SDimitry Andric auto FirstNZIdx = [](const GetElementPtrInst *GEPI) { 8990b57cec5SDimitry Andric unsigned I = 1; 9000b57cec5SDimitry Andric for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) { 9010b57cec5SDimitry Andric Value *V = GEPI->getOperand(I); 9020b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) 9030b57cec5SDimitry Andric if (CI->isZero()) 9040b57cec5SDimitry Andric continue; 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric break; 9070b57cec5SDimitry Andric } 9080b57cec5SDimitry Andric 9090b57cec5SDimitry Andric return I; 9100b57cec5SDimitry Andric }; 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric // Skip through initial 'zero' indices, and find the corresponding pointer 9130b57cec5SDimitry Andric // type. See if the next index is not a constant. 9140b57cec5SDimitry Andric Idx = FirstNZIdx(GEPI); 9150b57cec5SDimitry Andric if (Idx == GEPI->getNumOperands()) 9160b57cec5SDimitry Andric return false; 9170b57cec5SDimitry Andric if (isa<Constant>(GEPI->getOperand(Idx))) 9180b57cec5SDimitry Andric return false; 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); 921e8d8bef9SDimitry Andric Type *SourceElementType = GEPI->getSourceElementType(); 922e8d8bef9SDimitry Andric // Size information about scalable vectors is not available, so we cannot 923e8d8bef9SDimitry Andric // deduce whether indexing at n is undefined behaviour or not. Bail out. 9245f757f3fSDimitry Andric if (SourceElementType->isScalableTy()) 925e8d8bef9SDimitry Andric return false; 926e8d8bef9SDimitry Andric 927e8d8bef9SDimitry Andric Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops); 9280b57cec5SDimitry Andric if (!AllocTy || !AllocTy->isSized()) 9290b57cec5SDimitry Andric return false; 9300b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout(); 931bdd1243dSDimitry Andric uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue(); 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric // If there are more indices after the one we might replace with a zero, make 9340b57cec5SDimitry Andric // sure they're all non-negative. If any of them are negative, the overall 9350b57cec5SDimitry Andric // address being computed might be before the base address determined by the 9360b57cec5SDimitry Andric // first non-zero index. 9370b57cec5SDimitry Andric auto IsAllNonNegative = [&]() { 9380b57cec5SDimitry Andric for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) { 9390b57cec5SDimitry Andric KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI); 9400b57cec5SDimitry Andric if (Known.isNonNegative()) 9410b57cec5SDimitry Andric continue; 9420b57cec5SDimitry Andric return false; 9430b57cec5SDimitry Andric } 9440b57cec5SDimitry Andric 9450b57cec5SDimitry Andric return true; 9460b57cec5SDimitry Andric }; 9470b57cec5SDimitry Andric 9480b57cec5SDimitry Andric // FIXME: If the GEP is not inbounds, and there are extra indices after the 9490b57cec5SDimitry Andric // one we'll replace, those could cause the address computation to wrap 9500b57cec5SDimitry Andric // (rendering the IsAllNonNegative() check below insufficient). We can do 9510b57cec5SDimitry Andric // better, ignoring zero indices (and other indices we can prove small 9520b57cec5SDimitry Andric // enough not to wrap). 9530b57cec5SDimitry Andric if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds()) 9540b57cec5SDimitry Andric return false; 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric // Note that isObjectSizeLessThanOrEq will return true only if the pointer is 9570b57cec5SDimitry Andric // also known to be dereferenceable. 9580b57cec5SDimitry Andric return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) && 9590b57cec5SDimitry Andric IsAllNonNegative(); 9600b57cec5SDimitry Andric } 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric // If we're indexing into an object with a variable index for the memory 9630b57cec5SDimitry Andric // access, but the object has only one element, we can assume that the index 9640b57cec5SDimitry Andric // will always be zero. If we replace the GEP, return it. 965e8d8bef9SDimitry Andric static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, 96606c3fb27SDimitry Andric Instruction &MemI) { 9670b57cec5SDimitry Andric if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) { 9680b57cec5SDimitry Andric unsigned Idx; 9690b57cec5SDimitry Andric if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) { 9700b57cec5SDimitry Andric Instruction *NewGEPI = GEPI->clone(); 9710b57cec5SDimitry Andric NewGEPI->setOperand(Idx, 9720b57cec5SDimitry Andric ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0)); 9735f757f3fSDimitry Andric IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator()); 9740b57cec5SDimitry Andric return NewGEPI; 9750b57cec5SDimitry Andric } 9760b57cec5SDimitry Andric } 9770b57cec5SDimitry Andric 9780b57cec5SDimitry Andric return nullptr; 9790b57cec5SDimitry Andric } 9800b57cec5SDimitry Andric 9810b57cec5SDimitry Andric static bool canSimplifyNullStoreOrGEP(StoreInst &SI) { 9820b57cec5SDimitry Andric if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace())) 9830b57cec5SDimitry Andric return false; 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andric auto *Ptr = SI.getPointerOperand(); 9860b57cec5SDimitry Andric if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) 9870b57cec5SDimitry Andric Ptr = GEPI->getOperand(0); 9880b57cec5SDimitry Andric return (isa<ConstantPointerNull>(Ptr) && 9890b57cec5SDimitry Andric !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace())); 9900b57cec5SDimitry Andric } 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andric static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) { 9930b57cec5SDimitry Andric if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 9940b57cec5SDimitry Andric const Value *GEPI0 = GEPI->getOperand(0); 9950b57cec5SDimitry Andric if (isa<ConstantPointerNull>(GEPI0) && 9960b57cec5SDimitry Andric !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace())) 9970b57cec5SDimitry Andric return true; 9980b57cec5SDimitry Andric } 9990b57cec5SDimitry Andric if (isa<UndefValue>(Op) || 10000b57cec5SDimitry Andric (isa<ConstantPointerNull>(Op) && 10010b57cec5SDimitry Andric !NullPointerIsDefined(LI.getFunction(), LI.getPointerAddressSpace()))) 10020b57cec5SDimitry Andric return true; 10030b57cec5SDimitry Andric return false; 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric 1006e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) { 10070b57cec5SDimitry Andric Value *Op = LI.getOperand(0); 100806c3fb27SDimitry Andric if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI))) 100906c3fb27SDimitry Andric return replaceInstUsesWith(LI, Res); 10100b57cec5SDimitry Andric 10110b57cec5SDimitry Andric // Try to canonicalize the loaded type. 10120b57cec5SDimitry Andric if (Instruction *Res = combineLoadToOperationType(*this, LI)) 10130b57cec5SDimitry Andric return Res; 10140b57cec5SDimitry Andric 10155f757f3fSDimitry Andric if (!EnableInferAlignmentPass) { 10160b57cec5SDimitry Andric // Attempt to improve the alignment. 10175ffd83dbSDimitry Andric Align KnownAlign = getOrEnforceKnownAlignment( 10185ffd83dbSDimitry Andric Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT); 10195ffd83dbSDimitry Andric if (KnownAlign > LI.getAlign()) 10205ffd83dbSDimitry Andric LI.setAlignment(KnownAlign); 10215f757f3fSDimitry Andric } 10220b57cec5SDimitry Andric 10230b57cec5SDimitry Andric // Replace GEP indices if possible. 102406c3fb27SDimitry Andric if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) 102506c3fb27SDimitry Andric return replaceOperand(LI, 0, NewGEPI); 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric if (Instruction *Res = unpackLoadToAggregate(*this, LI)) 10280b57cec5SDimitry Andric return Res; 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric // Do really simple store-to-load forwarding and load CSE, to catch cases 10310b57cec5SDimitry Andric // where there are several consecutive memory accesses to the same location, 10320b57cec5SDimitry Andric // separated by a few arithmetic operations. 10330b57cec5SDimitry Andric bool IsLoadCSE = false; 1034b3edf446SDimitry Andric BatchAAResults BatchAA(*AA); 1035b3edf446SDimitry Andric if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) { 10360b57cec5SDimitry Andric if (IsLoadCSE) 10370b57cec5SDimitry Andric combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false); 10380b57cec5SDimitry Andric 10390b57cec5SDimitry Andric return replaceInstUsesWith( 10400b57cec5SDimitry Andric LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(), 10410b57cec5SDimitry Andric LI.getName() + ".cast")); 10420b57cec5SDimitry Andric } 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric // None of the following transforms are legal for volatile/ordered atomic 10450b57cec5SDimitry Andric // loads. Most of them do apply for unordered atomics. 10460b57cec5SDimitry Andric if (!LI.isUnordered()) return nullptr; 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric // load(gep null, ...) -> unreachable 10490b57cec5SDimitry Andric // load null/undef -> unreachable 10500b57cec5SDimitry Andric // TODO: Consider a target hook for valid address spaces for this xforms. 10510b57cec5SDimitry Andric if (canSimplifyNullLoadOrGEP(LI, Op)) { 105206c3fb27SDimitry Andric CreateNonTerminatorUnreachable(&LI); 1053fe6060f1SDimitry Andric return replaceInstUsesWith(LI, PoisonValue::get(LI.getType())); 10540b57cec5SDimitry Andric } 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric if (Op->hasOneUse()) { 10570b57cec5SDimitry Andric // Change select and PHI nodes to select values instead of addresses: this 10580b57cec5SDimitry Andric // helps alias analysis out a lot, allows many others simplifications, and 10590b57cec5SDimitry Andric // exposes redundancy in the code. 10600b57cec5SDimitry Andric // 10610b57cec5SDimitry Andric // Note that we cannot do the transformation unless we know that the 10620b57cec5SDimitry Andric // introduced loads cannot trap! Something like this is valid as long as 10630b57cec5SDimitry Andric // the condition is always false: load (select bool %C, int* null, int* %G), 10640b57cec5SDimitry Andric // but it would not be valid if we transformed it to load from null 10650b57cec5SDimitry Andric // unconditionally. 10660b57cec5SDimitry Andric // 10670b57cec5SDimitry Andric if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 10680b57cec5SDimitry Andric // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 10695ffd83dbSDimitry Andric Align Alignment = LI.getAlign(); 10708bcb0991SDimitry Andric if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), 10718bcb0991SDimitry Andric Alignment, DL, SI) && 10728bcb0991SDimitry Andric isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), 10738bcb0991SDimitry Andric Alignment, DL, SI)) { 10740b57cec5SDimitry Andric LoadInst *V1 = 10750b57cec5SDimitry Andric Builder.CreateLoad(LI.getType(), SI->getOperand(1), 10760b57cec5SDimitry Andric SI->getOperand(1)->getName() + ".val"); 10770b57cec5SDimitry Andric LoadInst *V2 = 10780b57cec5SDimitry Andric Builder.CreateLoad(LI.getType(), SI->getOperand(2), 10790b57cec5SDimitry Andric SI->getOperand(2)->getName() + ".val"); 10800b57cec5SDimitry Andric assert(LI.isUnordered() && "implied by above"); 10818bcb0991SDimitry Andric V1->setAlignment(Alignment); 10820b57cec5SDimitry Andric V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); 10838bcb0991SDimitry Andric V2->setAlignment(Alignment); 10840b57cec5SDimitry Andric V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); 10850b57cec5SDimitry Andric return SelectInst::Create(SI->getCondition(), V1, V2); 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric 10880b57cec5SDimitry Andric // load (select (cond, null, P)) -> load P 10890b57cec5SDimitry Andric if (isa<ConstantPointerNull>(SI->getOperand(1)) && 10900b57cec5SDimitry Andric !NullPointerIsDefined(SI->getFunction(), 10915ffd83dbSDimitry Andric LI.getPointerAddressSpace())) 10925ffd83dbSDimitry Andric return replaceOperand(LI, 0, SI->getOperand(2)); 10930b57cec5SDimitry Andric 10940b57cec5SDimitry Andric // load (select (cond, P, null)) -> load P 10950b57cec5SDimitry Andric if (isa<ConstantPointerNull>(SI->getOperand(2)) && 10960b57cec5SDimitry Andric !NullPointerIsDefined(SI->getFunction(), 10975ffd83dbSDimitry Andric LI.getPointerAddressSpace())) 10985ffd83dbSDimitry Andric return replaceOperand(LI, 0, SI->getOperand(1)); 10990b57cec5SDimitry Andric } 11000b57cec5SDimitry Andric } 11010b57cec5SDimitry Andric return nullptr; 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric /// Look for extractelement/insertvalue sequence that acts like a bitcast. 11050b57cec5SDimitry Andric /// 11060b57cec5SDimitry Andric /// \returns underlying value that was "cast", or nullptr otherwise. 11070b57cec5SDimitry Andric /// 11080b57cec5SDimitry Andric /// For example, if we have: 11090b57cec5SDimitry Andric /// 11100b57cec5SDimitry Andric /// %E0 = extractelement <2 x double> %U, i32 0 11110b57cec5SDimitry Andric /// %V0 = insertvalue [2 x double] undef, double %E0, 0 11120b57cec5SDimitry Andric /// %E1 = extractelement <2 x double> %U, i32 1 11130b57cec5SDimitry Andric /// %V1 = insertvalue [2 x double] %V0, double %E1, 1 11140b57cec5SDimitry Andric /// 11150b57cec5SDimitry Andric /// and the layout of a <2 x double> is isomorphic to a [2 x double], 11160b57cec5SDimitry Andric /// then %V1 can be safely approximated by a conceptual "bitcast" of %U. 11170b57cec5SDimitry Andric /// Note that %U may contain non-undef values where %V1 has undef. 1118e8d8bef9SDimitry Andric static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) { 11190b57cec5SDimitry Andric Value *U = nullptr; 11200b57cec5SDimitry Andric while (auto *IV = dyn_cast<InsertValueInst>(V)) { 11210b57cec5SDimitry Andric auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand()); 11220b57cec5SDimitry Andric if (!E) 11230b57cec5SDimitry Andric return nullptr; 11240b57cec5SDimitry Andric auto *W = E->getVectorOperand(); 11250b57cec5SDimitry Andric if (!U) 11260b57cec5SDimitry Andric U = W; 11270b57cec5SDimitry Andric else if (U != W) 11280b57cec5SDimitry Andric return nullptr; 11290b57cec5SDimitry Andric auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand()); 11300b57cec5SDimitry Andric if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin()) 11310b57cec5SDimitry Andric return nullptr; 11320b57cec5SDimitry Andric V = IV->getAggregateOperand(); 11330b57cec5SDimitry Andric } 1134fe6060f1SDimitry Andric if (!match(V, m_Undef()) || !U) 11350b57cec5SDimitry Andric return nullptr; 11360b57cec5SDimitry Andric 11370b57cec5SDimitry Andric auto *UT = cast<VectorType>(U->getType()); 11380b57cec5SDimitry Andric auto *VT = V->getType(); 11390b57cec5SDimitry Andric // Check that types UT and VT are bitwise isomorphic. 11400b57cec5SDimitry Andric const auto &DL = IC.getDataLayout(); 11410b57cec5SDimitry Andric if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) { 11420b57cec5SDimitry Andric return nullptr; 11430b57cec5SDimitry Andric } 11440b57cec5SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(VT)) { 1145e8d8bef9SDimitry Andric if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements()) 11460b57cec5SDimitry Andric return nullptr; 11470b57cec5SDimitry Andric } else { 11480b57cec5SDimitry Andric auto *ST = cast<StructType>(VT); 1149e8d8bef9SDimitry Andric if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements()) 11500b57cec5SDimitry Andric return nullptr; 11510b57cec5SDimitry Andric for (const auto *EltT : ST->elements()) { 11520b57cec5SDimitry Andric if (EltT != UT->getElementType()) 11530b57cec5SDimitry Andric return nullptr; 11540b57cec5SDimitry Andric } 11550b57cec5SDimitry Andric } 11560b57cec5SDimitry Andric return U; 11570b57cec5SDimitry Andric } 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric /// Combine stores to match the type of value being stored. 11600b57cec5SDimitry Andric /// 11610b57cec5SDimitry Andric /// The core idea here is that the memory does not have any intrinsic type and 11620b57cec5SDimitry Andric /// where we can we should match the type of a store to the type of value being 11630b57cec5SDimitry Andric /// stored. 11640b57cec5SDimitry Andric /// 11650b57cec5SDimitry Andric /// However, this routine must never change the width of a store or the number of 11660b57cec5SDimitry Andric /// stores as that would introduce a semantic change. This combine is expected to 11670b57cec5SDimitry Andric /// be a semantic no-op which just allows stores to more closely model the types 11680b57cec5SDimitry Andric /// of their incoming values. 11690b57cec5SDimitry Andric /// 11700b57cec5SDimitry Andric /// Currently, we also refuse to change the precise type used for an atomic or 11710b57cec5SDimitry Andric /// volatile store. This is debatable, and might be reasonable to change later. 11720b57cec5SDimitry Andric /// However, it is risky in case some backend or other part of LLVM is relying 11730b57cec5SDimitry Andric /// on the exact type stored to select appropriate atomic operations. 11740b57cec5SDimitry Andric /// 11750b57cec5SDimitry Andric /// \returns true if the store was successfully combined away. This indicates 11760b57cec5SDimitry Andric /// the caller must erase the store instruction. We have to let the caller erase 11770b57cec5SDimitry Andric /// the store instruction as otherwise there is no way to signal whether it was 11780b57cec5SDimitry Andric /// combined or not: IC.EraseInstFromFunction returns a null pointer. 1179e8d8bef9SDimitry Andric static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) { 11800b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and ordered 11810b57cec5SDimitry Andric // atomic stores here but it isn't clear that this is important. 11820b57cec5SDimitry Andric if (!SI.isUnordered()) 11830b57cec5SDimitry Andric return false; 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric // swifterror values can't be bitcasted. 11860b57cec5SDimitry Andric if (SI.getPointerOperand()->isSwiftError()) 11870b57cec5SDimitry Andric return false; 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric Value *V = SI.getValueOperand(); 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andric // Fold away bit casts of the stored value by storing the original type. 11920b57cec5SDimitry Andric if (auto *BC = dyn_cast<BitCastInst>(V)) { 1193e8d8bef9SDimitry Andric assert(!BC->getType()->isX86_AMXTy() && 1194e8d8bef9SDimitry Andric "store to x86_amx* should not happen!"); 11950b57cec5SDimitry Andric V = BC->getOperand(0); 1196e8d8bef9SDimitry Andric // Don't transform when the type is x86_amx, it makes the pass that lower 1197e8d8bef9SDimitry Andric // x86_amx type happy. 1198e8d8bef9SDimitry Andric if (V->getType()->isX86_AMXTy()) 1199e8d8bef9SDimitry Andric return false; 12000b57cec5SDimitry Andric if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) { 12010b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, V); 12020b57cec5SDimitry Andric return true; 12030b57cec5SDimitry Andric } 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric if (Value *U = likeBitCastFromVector(IC, V)) 12070b57cec5SDimitry Andric if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) { 12080b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, U); 12090b57cec5SDimitry Andric return true; 12100b57cec5SDimitry Andric } 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andric // FIXME: We should also canonicalize stores of vectors when their elements 12130b57cec5SDimitry Andric // are cast to other types. 12140b57cec5SDimitry Andric return false; 12150b57cec5SDimitry Andric } 12160b57cec5SDimitry Andric 1217e8d8bef9SDimitry Andric static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) { 12180b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and atomic 12190b57cec5SDimitry Andric // stores here but it isn't clear that this is important. 12200b57cec5SDimitry Andric if (!SI.isSimple()) 12210b57cec5SDimitry Andric return false; 12220b57cec5SDimitry Andric 12230b57cec5SDimitry Andric Value *V = SI.getValueOperand(); 12240b57cec5SDimitry Andric Type *T = V->getType(); 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric if (!T->isAggregateType()) 12270b57cec5SDimitry Andric return false; 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric if (auto *ST = dyn_cast<StructType>(T)) { 12300b57cec5SDimitry Andric // If the struct only have one element, we unpack. 12310b57cec5SDimitry Andric unsigned Count = ST->getNumElements(); 12320b57cec5SDimitry Andric if (Count == 1) { 12330b57cec5SDimitry Andric V = IC.Builder.CreateExtractValue(V, 0); 12340b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, V); 12350b57cec5SDimitry Andric return true; 12360b57cec5SDimitry Andric } 12370b57cec5SDimitry Andric 12380b57cec5SDimitry Andric // We don't want to break loads with padding here as we'd loose 12390b57cec5SDimitry Andric // the knowledge that padding exists for the rest of the pipeline. 12400b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout(); 12410b57cec5SDimitry Andric auto *SL = DL.getStructLayout(ST); 124206c3fb27SDimitry Andric 124306c3fb27SDimitry Andric // Don't unpack for structure with scalable vector. 124406c3fb27SDimitry Andric if (SL->getSizeInBits().isScalable()) 124506c3fb27SDimitry Andric return false; 124606c3fb27SDimitry Andric 12470b57cec5SDimitry Andric if (SL->hasPadding()) 12480b57cec5SDimitry Andric return false; 12490b57cec5SDimitry Andric 12505ffd83dbSDimitry Andric const auto Align = SI.getAlign(); 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric SmallString<16> EltName = V->getName(); 12530b57cec5SDimitry Andric EltName += ".elt"; 12540b57cec5SDimitry Andric auto *Addr = SI.getPointerOperand(); 12550b57cec5SDimitry Andric SmallString<16> AddrName = Addr->getName(); 12560b57cec5SDimitry Andric AddrName += ".repack"; 12570b57cec5SDimitry Andric 12580b57cec5SDimitry Andric auto *IdxType = Type::getInt32Ty(ST->getContext()); 12590b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0); 12600b57cec5SDimitry Andric for (unsigned i = 0; i < Count; i++) { 12610b57cec5SDimitry Andric Value *Indices[2] = { 12620b57cec5SDimitry Andric Zero, 12630b57cec5SDimitry Andric ConstantInt::get(IdxType, i), 12640b57cec5SDimitry Andric }; 1265bdd1243dSDimitry Andric auto *Ptr = 1266bdd1243dSDimitry Andric IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName); 12670b57cec5SDimitry Andric auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); 12685ffd83dbSDimitry Andric auto EltAlign = commonAlignment(Align, SL->getElementOffset(i)); 12690b57cec5SDimitry Andric llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); 1270349cc55cSDimitry Andric NS->setAAMetadata(SI.getAAMetadata()); 12710b57cec5SDimitry Andric } 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric return true; 12740b57cec5SDimitry Andric } 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(T)) { 12770b57cec5SDimitry Andric // If the array only have one element, we unpack. 12780b57cec5SDimitry Andric auto NumElements = AT->getNumElements(); 12790b57cec5SDimitry Andric if (NumElements == 1) { 12800b57cec5SDimitry Andric V = IC.Builder.CreateExtractValue(V, 0); 12810b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, V); 12820b57cec5SDimitry Andric return true; 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric // Bail out if the array is too large. Ideally we would like to optimize 12860b57cec5SDimitry Andric // arrays of arbitrary size but this has a terrible impact on compile time. 12870b57cec5SDimitry Andric // The threshold here is chosen arbitrarily, maybe needs a little bit of 12880b57cec5SDimitry Andric // tuning. 12890b57cec5SDimitry Andric if (NumElements > IC.MaxArraySizeForCombine) 12900b57cec5SDimitry Andric return false; 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout(); 12935f757f3fSDimitry Andric TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType()); 12945ffd83dbSDimitry Andric const auto Align = SI.getAlign(); 12950b57cec5SDimitry Andric 12960b57cec5SDimitry Andric SmallString<16> EltName = V->getName(); 12970b57cec5SDimitry Andric EltName += ".elt"; 12980b57cec5SDimitry Andric auto *Addr = SI.getPointerOperand(); 12990b57cec5SDimitry Andric SmallString<16> AddrName = Addr->getName(); 13000b57cec5SDimitry Andric AddrName += ".repack"; 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric auto *IdxType = Type::getInt64Ty(T->getContext()); 13030b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0); 13040b57cec5SDimitry Andric 1305*0fca6ea1SDimitry Andric TypeSize Offset = TypeSize::getZero(); 13060b57cec5SDimitry Andric for (uint64_t i = 0; i < NumElements; i++) { 13070b57cec5SDimitry Andric Value *Indices[2] = { 13080b57cec5SDimitry Andric Zero, 13090b57cec5SDimitry Andric ConstantInt::get(IdxType, i), 13100b57cec5SDimitry Andric }; 1311bdd1243dSDimitry Andric auto *Ptr = 1312bdd1243dSDimitry Andric IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName); 13130b57cec5SDimitry Andric auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); 13145f757f3fSDimitry Andric auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue()); 13150b57cec5SDimitry Andric Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); 1316349cc55cSDimitry Andric NS->setAAMetadata(SI.getAAMetadata()); 13170b57cec5SDimitry Andric Offset += EltSize; 13180b57cec5SDimitry Andric } 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric return true; 13210b57cec5SDimitry Andric } 13220b57cec5SDimitry Andric 13230b57cec5SDimitry Andric return false; 13240b57cec5SDimitry Andric } 13250b57cec5SDimitry Andric 13260b57cec5SDimitry Andric /// equivalentAddressValues - Test if A and B will obviously have the same 13270b57cec5SDimitry Andric /// value. This includes recognizing that %t0 and %t1 will have the same 13280b57cec5SDimitry Andric /// value in code like this: 13290b57cec5SDimitry Andric /// %t0 = getelementptr \@a, 0, 3 13300b57cec5SDimitry Andric /// store i32 0, i32* %t0 13310b57cec5SDimitry Andric /// %t1 = getelementptr \@a, 0, 3 13320b57cec5SDimitry Andric /// %t2 = load i32* %t1 13330b57cec5SDimitry Andric /// 13340b57cec5SDimitry Andric static bool equivalentAddressValues(Value *A, Value *B) { 13350b57cec5SDimitry Andric // Test if the values are trivially equivalent. 13360b57cec5SDimitry Andric if (A == B) return true; 13370b57cec5SDimitry Andric 13380b57cec5SDimitry Andric // Test if the values come form identical arithmetic instructions. 13390b57cec5SDimitry Andric // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 13400b57cec5SDimitry Andric // its only used to compare two uses within the same basic block, which 13410b57cec5SDimitry Andric // means that they'll always either have the same value or one of them 13420b57cec5SDimitry Andric // will have an undefined value. 13430b57cec5SDimitry Andric if (isa<BinaryOperator>(A) || 13440b57cec5SDimitry Andric isa<CastInst>(A) || 13450b57cec5SDimitry Andric isa<PHINode>(A) || 13460b57cec5SDimitry Andric isa<GetElementPtrInst>(A)) 13470b57cec5SDimitry Andric if (Instruction *BI = dyn_cast<Instruction>(B)) 13480b57cec5SDimitry Andric if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 13490b57cec5SDimitry Andric return true; 13500b57cec5SDimitry Andric 13510b57cec5SDimitry Andric // Otherwise they may not be equivalent. 13520b57cec5SDimitry Andric return false; 13530b57cec5SDimitry Andric } 13540b57cec5SDimitry Andric 1355e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { 13560b57cec5SDimitry Andric Value *Val = SI.getOperand(0); 13570b57cec5SDimitry Andric Value *Ptr = SI.getOperand(1); 13580b57cec5SDimitry Andric 13590b57cec5SDimitry Andric // Try to canonicalize the stored type. 13600b57cec5SDimitry Andric if (combineStoreToValueType(*this, SI)) 13610b57cec5SDimitry Andric return eraseInstFromFunction(SI); 13620b57cec5SDimitry Andric 13635f757f3fSDimitry Andric if (!EnableInferAlignmentPass) { 13640b57cec5SDimitry Andric // Attempt to improve the alignment. 13655ffd83dbSDimitry Andric const Align KnownAlign = getOrEnforceKnownAlignment( 13665ffd83dbSDimitry Andric Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT); 13675ffd83dbSDimitry Andric if (KnownAlign > SI.getAlign()) 13680b57cec5SDimitry Andric SI.setAlignment(KnownAlign); 13695f757f3fSDimitry Andric } 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andric // Try to canonicalize the stored type. 13720b57cec5SDimitry Andric if (unpackStoreToAggregate(*this, SI)) 13730b57cec5SDimitry Andric return eraseInstFromFunction(SI); 13740b57cec5SDimitry Andric 13750b57cec5SDimitry Andric // Replace GEP indices if possible. 137606c3fb27SDimitry Andric if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) 137706c3fb27SDimitry Andric return replaceOperand(SI, 1, NewGEPI); 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric // Don't hack volatile/ordered stores. 13800b57cec5SDimitry Andric // FIXME: Some bits are legal for ordered atomic stores; needs refactoring. 13810b57cec5SDimitry Andric if (!SI.isUnordered()) return nullptr; 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andric // If the RHS is an alloca with a single use, zapify the store, making the 13840b57cec5SDimitry Andric // alloca dead. 13850b57cec5SDimitry Andric if (Ptr->hasOneUse()) { 13860b57cec5SDimitry Andric if (isa<AllocaInst>(Ptr)) 13870b57cec5SDimitry Andric return eraseInstFromFunction(SI); 13880b57cec5SDimitry Andric if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 13890b57cec5SDimitry Andric if (isa<AllocaInst>(GEP->getOperand(0))) { 13900b57cec5SDimitry Andric if (GEP->getOperand(0)->hasOneUse()) 13910b57cec5SDimitry Andric return eraseInstFromFunction(SI); 13920b57cec5SDimitry Andric } 13930b57cec5SDimitry Andric } 13940b57cec5SDimitry Andric } 13950b57cec5SDimitry Andric 13960b57cec5SDimitry Andric // If we have a store to a location which is known constant, we can conclude 13970b57cec5SDimitry Andric // that the store must be storing the constant value (else the memory 13980b57cec5SDimitry Andric // wouldn't be constant), and this must be a noop. 1399bdd1243dSDimitry Andric if (!isModSet(AA->getModRefInfoMask(Ptr))) 14000b57cec5SDimitry Andric return eraseInstFromFunction(SI); 14010b57cec5SDimitry Andric 14020b57cec5SDimitry Andric // Do really simple DSE, to catch cases where there are several consecutive 14030b57cec5SDimitry Andric // stores to the same location, separated by a few arithmetic operations. This 14040b57cec5SDimitry Andric // situation often occurs with bitfield accesses. 14050b57cec5SDimitry Andric BasicBlock::iterator BBI(SI); 14060b57cec5SDimitry Andric for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 14070b57cec5SDimitry Andric --ScanInsts) { 14080b57cec5SDimitry Andric --BBI; 14090b57cec5SDimitry Andric // Don't count debug info directives, lest they affect codegen, 14100b57cec5SDimitry Andric // and we skip pointer-to-pointer bitcasts, which are NOPs. 14115f757f3fSDimitry Andric if (BBI->isDebugOrPseudoInst()) { 14120b57cec5SDimitry Andric ScanInsts++; 14130b57cec5SDimitry Andric continue; 14140b57cec5SDimitry Andric } 14150b57cec5SDimitry Andric 14160b57cec5SDimitry Andric if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 14170b57cec5SDimitry Andric // Prev store isn't volatile, and stores to the same location? 141881ad6265SDimitry Andric if (PrevSI->isUnordered() && 141981ad6265SDimitry Andric equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) && 142081ad6265SDimitry Andric PrevSI->getValueOperand()->getType() == 142181ad6265SDimitry Andric SI.getValueOperand()->getType()) { 14220b57cec5SDimitry Andric ++NumDeadStore; 142355e4f9d5SDimitry Andric // Manually add back the original store to the worklist now, so it will 142455e4f9d5SDimitry Andric // be processed after the operands of the removed store, as this may 142555e4f9d5SDimitry Andric // expose additional DSE opportunities. 14265ffd83dbSDimitry Andric Worklist.push(&SI); 14270b57cec5SDimitry Andric eraseInstFromFunction(*PrevSI); 142855e4f9d5SDimitry Andric return nullptr; 14290b57cec5SDimitry Andric } 14300b57cec5SDimitry Andric break; 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric // If this is a load, we have to stop. However, if the loaded value is from 14340b57cec5SDimitry Andric // the pointer we're loading and is producing the pointer we're storing, 14350b57cec5SDimitry Andric // then *this* store is dead (X = load P; store X -> P). 14360b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 14370b57cec5SDimitry Andric if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) { 14380b57cec5SDimitry Andric assert(SI.isUnordered() && "can't eliminate ordering operation"); 14390b57cec5SDimitry Andric return eraseInstFromFunction(SI); 14400b57cec5SDimitry Andric } 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andric // Otherwise, this is a load from some other location. Stores before it 14430b57cec5SDimitry Andric // may not be dead. 14440b57cec5SDimitry Andric break; 14450b57cec5SDimitry Andric } 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andric // Don't skip over loads, throws or things that can modify memory. 14480b57cec5SDimitry Andric if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow()) 14490b57cec5SDimitry Andric break; 14500b57cec5SDimitry Andric } 14510b57cec5SDimitry Andric 14520b57cec5SDimitry Andric // store X, null -> turns into 'unreachable' in SimplifyCFG 14530b57cec5SDimitry Andric // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG 14540b57cec5SDimitry Andric if (canSimplifyNullStoreOrGEP(SI)) { 1455fe6060f1SDimitry Andric if (!isa<PoisonValue>(Val)) 1456fe6060f1SDimitry Andric return replaceOperand(SI, 0, PoisonValue::get(Val->getType())); 14570b57cec5SDimitry Andric return nullptr; // Do not modify these! 14580b57cec5SDimitry Andric } 14590b57cec5SDimitry Andric 146006c3fb27SDimitry Andric // This is a non-terminator unreachable marker. Don't remove it. 146106c3fb27SDimitry Andric if (isa<UndefValue>(Ptr)) { 14625f757f3fSDimitry Andric // Remove guaranteed-to-transfer instructions before the marker. 14635f757f3fSDimitry Andric if (removeInstructionsBeforeUnreachable(SI)) 146406c3fb27SDimitry Andric return &SI; 14655f757f3fSDimitry Andric 14665f757f3fSDimitry Andric // Remove all instructions after the marker and handle dead blocks this 14675f757f3fSDimitry Andric // implies. 14685f757f3fSDimitry Andric SmallVector<BasicBlock *> Worklist; 14695f757f3fSDimitry Andric handleUnreachableFrom(SI.getNextNode(), Worklist); 14705f757f3fSDimitry Andric handlePotentiallyDeadBlocks(Worklist); 147106c3fb27SDimitry Andric return nullptr; 147206c3fb27SDimitry Andric } 147306c3fb27SDimitry Andric 14740b57cec5SDimitry Andric // store undef, Ptr -> noop 147581ad6265SDimitry Andric // FIXME: This is technically incorrect because it might overwrite a poison 147681ad6265SDimitry Andric // value. Change to PoisonValue once #52930 is resolved. 14770b57cec5SDimitry Andric if (isa<UndefValue>(Val)) 14780b57cec5SDimitry Andric return eraseInstFromFunction(SI); 14790b57cec5SDimitry Andric 14800b57cec5SDimitry Andric return nullptr; 14810b57cec5SDimitry Andric } 14820b57cec5SDimitry Andric 14830b57cec5SDimitry Andric /// Try to transform: 14840b57cec5SDimitry Andric /// if () { *P = v1; } else { *P = v2 } 14850b57cec5SDimitry Andric /// or: 14860b57cec5SDimitry Andric /// *P = v1; if () { *P = v2; } 14870b57cec5SDimitry Andric /// into a phi node with a store in the successor. 1488e8d8bef9SDimitry Andric bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) { 14895ffd83dbSDimitry Andric if (!SI.isUnordered()) 14905ffd83dbSDimitry Andric return false; // This code has not been audited for volatile/ordered case. 14910b57cec5SDimitry Andric 14920b57cec5SDimitry Andric // Check if the successor block has exactly 2 incoming edges. 14930b57cec5SDimitry Andric BasicBlock *StoreBB = SI.getParent(); 14940b57cec5SDimitry Andric BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 14950b57cec5SDimitry Andric if (!DestBB->hasNPredecessors(2)) 14960b57cec5SDimitry Andric return false; 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric // Capture the other block (the block that doesn't contain our store). 14990b57cec5SDimitry Andric pred_iterator PredIter = pred_begin(DestBB); 15000b57cec5SDimitry Andric if (*PredIter == StoreBB) 15010b57cec5SDimitry Andric ++PredIter; 15020b57cec5SDimitry Andric BasicBlock *OtherBB = *PredIter; 15030b57cec5SDimitry Andric 15040b57cec5SDimitry Andric // Bail out if all of the relevant blocks aren't distinct. This can happen, 15050b57cec5SDimitry Andric // for example, if SI is in an infinite loop. 15060b57cec5SDimitry Andric if (StoreBB == DestBB || OtherBB == DestBB) 15070b57cec5SDimitry Andric return false; 15080b57cec5SDimitry Andric 15090b57cec5SDimitry Andric // Verify that the other block ends in a branch and is not otherwise empty. 15100b57cec5SDimitry Andric BasicBlock::iterator BBI(OtherBB->getTerminator()); 15110b57cec5SDimitry Andric BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 15120b57cec5SDimitry Andric if (!OtherBr || BBI == OtherBB->begin()) 15130b57cec5SDimitry Andric return false; 15140b57cec5SDimitry Andric 151506c3fb27SDimitry Andric auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool { 151606c3fb27SDimitry Andric if (!OtherStore || 151706c3fb27SDimitry Andric OtherStore->getPointerOperand() != SI.getPointerOperand()) 151806c3fb27SDimitry Andric return false; 151906c3fb27SDimitry Andric 152006c3fb27SDimitry Andric auto *SIVTy = SI.getValueOperand()->getType(); 152106c3fb27SDimitry Andric auto *OSVTy = OtherStore->getValueOperand()->getType(); 152206c3fb27SDimitry Andric return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) && 152306c3fb27SDimitry Andric SI.hasSameSpecialState(OtherStore); 152406c3fb27SDimitry Andric }; 152506c3fb27SDimitry Andric 15260b57cec5SDimitry Andric // If the other block ends in an unconditional branch, check for the 'if then 15270b57cec5SDimitry Andric // else' case. There is an instruction before the branch. 15280b57cec5SDimitry Andric StoreInst *OtherStore = nullptr; 15290b57cec5SDimitry Andric if (OtherBr->isUnconditional()) { 15300b57cec5SDimitry Andric --BBI; 1531349cc55cSDimitry Andric // Skip over debugging info and pseudo probes. 15325f757f3fSDimitry Andric while (BBI->isDebugOrPseudoInst()) { 15330b57cec5SDimitry Andric if (BBI==OtherBB->begin()) 15340b57cec5SDimitry Andric return false; 15350b57cec5SDimitry Andric --BBI; 15360b57cec5SDimitry Andric } 15370b57cec5SDimitry Andric // If this isn't a store, isn't a store to the same location, or is not the 15380b57cec5SDimitry Andric // right kind of store, bail out. 15390b57cec5SDimitry Andric OtherStore = dyn_cast<StoreInst>(BBI); 154006c3fb27SDimitry Andric if (!OtherStoreIsMergeable(OtherStore)) 15410b57cec5SDimitry Andric return false; 15420b57cec5SDimitry Andric } else { 15430b57cec5SDimitry Andric // Otherwise, the other block ended with a conditional branch. If one of the 15440b57cec5SDimitry Andric // destinations is StoreBB, then we have the if/then case. 15450b57cec5SDimitry Andric if (OtherBr->getSuccessor(0) != StoreBB && 15460b57cec5SDimitry Andric OtherBr->getSuccessor(1) != StoreBB) 15470b57cec5SDimitry Andric return false; 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 15500b57cec5SDimitry Andric // if/then triangle. See if there is a store to the same ptr as SI that 15510b57cec5SDimitry Andric // lives in OtherBB. 15520b57cec5SDimitry Andric for (;; --BBI) { 15530b57cec5SDimitry Andric // Check to see if we find the matching store. 155406c3fb27SDimitry Andric OtherStore = dyn_cast<StoreInst>(BBI); 155506c3fb27SDimitry Andric if (OtherStoreIsMergeable(OtherStore)) 15560b57cec5SDimitry Andric break; 155706c3fb27SDimitry Andric 15580b57cec5SDimitry Andric // If we find something that may be using or overwriting the stored 15590b57cec5SDimitry Andric // value, or if we run out of instructions, we can't do the transform. 15600b57cec5SDimitry Andric if (BBI->mayReadFromMemory() || BBI->mayThrow() || 15610b57cec5SDimitry Andric BBI->mayWriteToMemory() || BBI == OtherBB->begin()) 15620b57cec5SDimitry Andric return false; 15630b57cec5SDimitry Andric } 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andric // In order to eliminate the store in OtherBr, we have to make sure nothing 15660b57cec5SDimitry Andric // reads or overwrites the stored value in StoreBB. 15670b57cec5SDimitry Andric for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 15680b57cec5SDimitry Andric // FIXME: This should really be AA driven. 15690b57cec5SDimitry Andric if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory()) 15700b57cec5SDimitry Andric return false; 15710b57cec5SDimitry Andric } 15720b57cec5SDimitry Andric } 15730b57cec5SDimitry Andric 15740b57cec5SDimitry Andric // Insert a PHI node now if we need it. 157506c3fb27SDimitry Andric Value *MergedVal = OtherStore->getValueOperand(); 15760b57cec5SDimitry Andric // The debug locations of the original instructions might differ. Merge them. 15770b57cec5SDimitry Andric DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(), 15780b57cec5SDimitry Andric OtherStore->getDebugLoc()); 157906c3fb27SDimitry Andric if (MergedVal != SI.getValueOperand()) { 158006c3fb27SDimitry Andric PHINode *PN = 158106c3fb27SDimitry Andric PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge"); 158206c3fb27SDimitry Andric PN->addIncoming(SI.getValueOperand(), SI.getParent()); 158306c3fb27SDimitry Andric Builder.SetInsertPoint(OtherStore); 158406c3fb27SDimitry Andric PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()), 158506c3fb27SDimitry Andric OtherBB); 15865f757f3fSDimitry Andric MergedVal = InsertNewInstBefore(PN, DestBB->begin()); 15870b57cec5SDimitry Andric PN->setDebugLoc(MergedLoc); 15880b57cec5SDimitry Andric } 15890b57cec5SDimitry Andric 15900b57cec5SDimitry Andric // Advance to a place where it is safe to insert the new store and insert it. 15910b57cec5SDimitry Andric BBI = DestBB->getFirstInsertionPt(); 15925ffd83dbSDimitry Andric StoreInst *NewSI = 15935ffd83dbSDimitry Andric new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(), 15940b57cec5SDimitry Andric SI.getOrdering(), SI.getSyncScopeID()); 15955f757f3fSDimitry Andric InsertNewInstBefore(NewSI, BBI); 15960b57cec5SDimitry Andric NewSI->setDebugLoc(MergedLoc); 1597bdd1243dSDimitry Andric NewSI->mergeDIAssignID({&SI, OtherStore}); 15980b57cec5SDimitry Andric 15990b57cec5SDimitry Andric // If the two stores had AA tags, merge them. 1600349cc55cSDimitry Andric AAMDNodes AATags = SI.getAAMetadata(); 1601349cc55cSDimitry Andric if (AATags) 1602349cc55cSDimitry Andric NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata())); 16030b57cec5SDimitry Andric 16040b57cec5SDimitry Andric // Nuke the old stores. 16050b57cec5SDimitry Andric eraseInstFromFunction(SI); 16060b57cec5SDimitry Andric eraseInstFromFunction(*OtherStore); 16070b57cec5SDimitry Andric return true; 16080b57cec5SDimitry Andric } 1609