10b57cec5SDimitry Andric //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file 90b57cec5SDimitry Andric /// This transformation implements the well known scalar replacement of 100b57cec5SDimitry Andric /// aggregates transformation. It tries to identify promotable elements of an 110b57cec5SDimitry Andric /// aggregate alloca, and promote them to registers. It will also try to 120b57cec5SDimitry Andric /// convert uses of an element (or set of elements) of an alloca into a vector 130b57cec5SDimitry Andric /// or bitfield-style integer scalar if appropriate. 140b57cec5SDimitry Andric /// 150b57cec5SDimitry Andric /// It works to do this with minimal slicing of the alloca so that regions 160b57cec5SDimitry Andric /// which are merely transferred in and out of external memory remain unchanged 170b57cec5SDimitry Andric /// and are not decomposed to scalar code. 180b57cec5SDimitry Andric /// 190b57cec5SDimitry Andric /// Because this also performs alloca promotion, it can be thought of as also 200b57cec5SDimitry Andric /// serving the purpose of SSA formation. The algorithm iterates on the 210b57cec5SDimitry Andric /// function until all opportunities for promotion have been realized. 220b57cec5SDimitry Andric /// 230b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/SROA.h" 260b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 270b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 280b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 295f757f3fSDimitry Andric #include "llvm/ADT/MapVector.h" 300b57cec5SDimitry Andric #include "llvm/ADT/PointerIntPair.h" 310b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 320b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 330b57cec5SDimitry Andric #include "llvm/ADT/SmallBitVector.h" 340b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 350b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 360b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 370b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 380b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 390b57cec5SDimitry Andric #include "llvm/ADT/iterator.h" 400b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 410b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 42bdd1243dSDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 430b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h" 440b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h" 450b57cec5SDimitry Andric #include "llvm/Analysis/PtrUseVisitor.h" 460b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 470b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 480b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 490b57cec5SDimitry Andric #include "llvm/IR/ConstantFolder.h" 500b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 510b57cec5SDimitry Andric #include "llvm/IR/DIBuilder.h" 520b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 531fd87a68SDimitry Andric #include "llvm/IR/DebugInfo.h" 540b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 550b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 560b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 570b57cec5SDimitry Andric #include "llvm/IR/Function.h" 580b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h" 590b57cec5SDimitry Andric #include "llvm/IR/GlobalAlias.h" 600b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 610b57cec5SDimitry Andric #include "llvm/IR/InstVisitor.h" 620b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 630b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 640b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 650b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 660b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 670b57cec5SDimitry Andric #include "llvm/IR/Module.h" 680b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 690b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 700b57cec5SDimitry Andric #include "llvm/IR/Type.h" 710b57cec5SDimitry Andric #include "llvm/IR/Use.h" 720b57cec5SDimitry Andric #include "llvm/IR/User.h" 730b57cec5SDimitry Andric #include "llvm/IR/Value.h" 745f757f3fSDimitry Andric #include "llvm/IR/ValueHandle.h" 75480093f4SDimitry Andric #include "llvm/InitializePasses.h" 760b57cec5SDimitry Andric #include "llvm/Pass.h" 770b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 780b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 790b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 800b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 810b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 820b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 830b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 84bdd1243dSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 85480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 860b57cec5SDimitry Andric #include "llvm/Transforms/Utils/PromoteMemToReg.h" 870b57cec5SDimitry Andric #include <algorithm> 880b57cec5SDimitry Andric #include <cassert> 890b57cec5SDimitry Andric #include <cstddef> 900b57cec5SDimitry Andric #include <cstdint> 910b57cec5SDimitry Andric #include <cstring> 920b57cec5SDimitry Andric #include <iterator> 930b57cec5SDimitry Andric #include <string> 940b57cec5SDimitry Andric #include <tuple> 950b57cec5SDimitry Andric #include <utility> 965f757f3fSDimitry Andric #include <variant> 970b57cec5SDimitry Andric #include <vector> 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric using namespace llvm; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric #define DEBUG_TYPE "sroa" 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); 1040b57cec5SDimitry Andric STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); 1050b57cec5SDimitry Andric STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca"); 1060b57cec5SDimitry Andric STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten"); 1070b57cec5SDimitry Andric STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition"); 1080b57cec5SDimitry Andric STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); 1090b57cec5SDimitry Andric STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); 1100b57cec5SDimitry Andric STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); 111bdd1243dSDimitry Andric STATISTIC(NumLoadsPredicated, 112bdd1243dSDimitry Andric "Number of loads rewritten into predicated loads to allow promotion"); 113bdd1243dSDimitry Andric STATISTIC( 114bdd1243dSDimitry Andric NumStoresPredicated, 115bdd1243dSDimitry Andric "Number of stores rewritten into predicated loads to allow promotion"); 1160b57cec5SDimitry Andric STATISTIC(NumDeleted, "Number of instructions deleted"); 1170b57cec5SDimitry Andric STATISTIC(NumVectorized, "Number of vectorized aggregates"); 1180b57cec5SDimitry Andric 11906c3fb27SDimitry Andric /// Disable running mem2reg during SROA in order to test or debug SROA. 12006c3fb27SDimitry Andric static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), 12106c3fb27SDimitry Andric cl::Hidden); 1220b57cec5SDimitry Andric namespace { 12306c3fb27SDimitry Andric 1245f757f3fSDimitry Andric class AllocaSliceRewriter; 1255f757f3fSDimitry Andric class AllocaSlices; 1265f757f3fSDimitry Andric class Partition; 1275f757f3fSDimitry Andric 1285f757f3fSDimitry Andric class SelectHandSpeculativity { 1295f757f3fSDimitry Andric unsigned char Storage = 0; // None are speculatable by default. 1305f757f3fSDimitry Andric using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit. 1315f757f3fSDimitry Andric using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit. 1325f757f3fSDimitry Andric public: 1335f757f3fSDimitry Andric SelectHandSpeculativity() = default; 1345f757f3fSDimitry Andric SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal); 1355f757f3fSDimitry Andric bool isSpeculatable(bool isTrueVal) const; 1365f757f3fSDimitry Andric bool areAllSpeculatable() const; 1375f757f3fSDimitry Andric bool areAnySpeculatable() const; 1385f757f3fSDimitry Andric bool areNoneSpeculatable() const; 1395f757f3fSDimitry Andric // For interop as int half of PointerIntPair. 1405f757f3fSDimitry Andric explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); } 1415f757f3fSDimitry Andric explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {} 1425f757f3fSDimitry Andric }; 1435f757f3fSDimitry Andric static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char)); 1445f757f3fSDimitry Andric 1455f757f3fSDimitry Andric using PossiblySpeculatableLoad = 1465f757f3fSDimitry Andric PointerIntPair<LoadInst *, 2, SelectHandSpeculativity>; 1475f757f3fSDimitry Andric using UnspeculatableStore = StoreInst *; 1485f757f3fSDimitry Andric using RewriteableMemOp = 1495f757f3fSDimitry Andric std::variant<PossiblySpeculatableLoad, UnspeculatableStore>; 1505f757f3fSDimitry Andric using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>; 1515f757f3fSDimitry Andric 1525f757f3fSDimitry Andric /// An optimization pass providing Scalar Replacement of Aggregates. 1535f757f3fSDimitry Andric /// 1545f757f3fSDimitry Andric /// This pass takes allocations which can be completely analyzed (that is, they 1555f757f3fSDimitry Andric /// don't escape) and tries to turn them into scalar SSA values. There are 1565f757f3fSDimitry Andric /// a few steps to this process. 1575f757f3fSDimitry Andric /// 1585f757f3fSDimitry Andric /// 1) It takes allocations of aggregates and analyzes the ways in which they 1595f757f3fSDimitry Andric /// are used to try to split them into smaller allocations, ideally of 1605f757f3fSDimitry Andric /// a single scalar data type. It will split up memcpy and memset accesses 1615f757f3fSDimitry Andric /// as necessary and try to isolate individual scalar accesses. 1625f757f3fSDimitry Andric /// 2) It will transform accesses into forms which are suitable for SSA value 1635f757f3fSDimitry Andric /// promotion. This can be replacing a memset with a scalar store of an 1645f757f3fSDimitry Andric /// integer value, or it can involve speculating operations on a PHI or 1655f757f3fSDimitry Andric /// select to be a PHI or select of the results. 1665f757f3fSDimitry Andric /// 3) Finally, this will try to detect a pattern of accesses which map cleanly 1675f757f3fSDimitry Andric /// onto insert and extract operations on a vector value, and convert them to 1685f757f3fSDimitry Andric /// this form. By doing so, it will enable promotion of vector aggregates to 1695f757f3fSDimitry Andric /// SSA vector values. 1705f757f3fSDimitry Andric class SROA { 1715f757f3fSDimitry Andric LLVMContext *const C; 1725f757f3fSDimitry Andric DomTreeUpdater *const DTU; 1735f757f3fSDimitry Andric AssumptionCache *const AC; 1745f757f3fSDimitry Andric const bool PreserveCFG; 1755f757f3fSDimitry Andric 1765f757f3fSDimitry Andric /// Worklist of alloca instructions to simplify. 1775f757f3fSDimitry Andric /// 1785f757f3fSDimitry Andric /// Each alloca in the function is added to this. Each new alloca formed gets 1795f757f3fSDimitry Andric /// added to it as well to recursively simplify unless that alloca can be 1805f757f3fSDimitry Andric /// directly promoted. Finally, each time we rewrite a use of an alloca other 1815f757f3fSDimitry Andric /// the one being actively rewritten, we add it back onto the list if not 1825f757f3fSDimitry Andric /// already present to ensure it is re-visited. 1835f757f3fSDimitry Andric SmallSetVector<AllocaInst *, 16> Worklist; 1845f757f3fSDimitry Andric 1855f757f3fSDimitry Andric /// A collection of instructions to delete. 1865f757f3fSDimitry Andric /// We try to batch deletions to simplify code and make things a bit more 1875f757f3fSDimitry Andric /// efficient. We also make sure there is no dangling pointers. 1885f757f3fSDimitry Andric SmallVector<WeakVH, 8> DeadInsts; 1895f757f3fSDimitry Andric 1905f757f3fSDimitry Andric /// Post-promotion worklist. 1915f757f3fSDimitry Andric /// 1925f757f3fSDimitry Andric /// Sometimes we discover an alloca which has a high probability of becoming 1935f757f3fSDimitry Andric /// viable for SROA after a round of promotion takes place. In those cases, 1945f757f3fSDimitry Andric /// the alloca is enqueued here for re-processing. 1955f757f3fSDimitry Andric /// 1965f757f3fSDimitry Andric /// Note that we have to be very careful to clear allocas out of this list in 1975f757f3fSDimitry Andric /// the event they are deleted. 1985f757f3fSDimitry Andric SmallSetVector<AllocaInst *, 16> PostPromotionWorklist; 1995f757f3fSDimitry Andric 2005f757f3fSDimitry Andric /// A collection of alloca instructions we can directly promote. 2015f757f3fSDimitry Andric std::vector<AllocaInst *> PromotableAllocas; 2025f757f3fSDimitry Andric 2035f757f3fSDimitry Andric /// A worklist of PHIs to speculate prior to promoting allocas. 2045f757f3fSDimitry Andric /// 2055f757f3fSDimitry Andric /// All of these PHIs have been checked for the safety of speculation and by 2065f757f3fSDimitry Andric /// being speculated will allow promoting allocas currently in the promotable 2075f757f3fSDimitry Andric /// queue. 2085f757f3fSDimitry Andric SmallSetVector<PHINode *, 8> SpeculatablePHIs; 2095f757f3fSDimitry Andric 2105f757f3fSDimitry Andric /// A worklist of select instructions to rewrite prior to promoting 2115f757f3fSDimitry Andric /// allocas. 2125f757f3fSDimitry Andric SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite; 2135f757f3fSDimitry Andric 2145f757f3fSDimitry Andric /// Select instructions that use an alloca and are subsequently loaded can be 2155f757f3fSDimitry Andric /// rewritten to load both input pointers and then select between the result, 2165f757f3fSDimitry Andric /// allowing the load of the alloca to be promoted. 2175f757f3fSDimitry Andric /// From this: 2185f757f3fSDimitry Andric /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other 2195f757f3fSDimitry Andric /// %V = load <type>, ptr %P2 2205f757f3fSDimitry Andric /// to: 2215f757f3fSDimitry Andric /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd 2225f757f3fSDimitry Andric /// %V2 = load <type>, ptr %Other 2235f757f3fSDimitry Andric /// %V = select i1 %cond, <type> %V1, <type> %V2 2245f757f3fSDimitry Andric /// 2255f757f3fSDimitry Andric /// We can do this to a select if its only uses are loads 2265f757f3fSDimitry Andric /// and if either the operand to the select can be loaded unconditionally, 2275f757f3fSDimitry Andric /// or if we are allowed to perform CFG modifications. 2285f757f3fSDimitry Andric /// If found an intervening bitcast with a single use of the load, 2295f757f3fSDimitry Andric /// allow the promotion. 2305f757f3fSDimitry Andric static std::optional<RewriteableMemOps> 2315f757f3fSDimitry Andric isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG); 2325f757f3fSDimitry Andric 2335f757f3fSDimitry Andric public: 2345f757f3fSDimitry Andric SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC, 2355f757f3fSDimitry Andric SROAOptions PreserveCFG_) 2365f757f3fSDimitry Andric : C(C), DTU(DTU), AC(AC), 2375f757f3fSDimitry Andric PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {} 2385f757f3fSDimitry Andric 2395f757f3fSDimitry Andric /// Main run method used by both the SROAPass and by the legacy pass. 2405f757f3fSDimitry Andric std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F); 2415f757f3fSDimitry Andric 2425f757f3fSDimitry Andric private: 2435f757f3fSDimitry Andric friend class AllocaSliceRewriter; 2445f757f3fSDimitry Andric 2455f757f3fSDimitry Andric bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS); 2465f757f3fSDimitry Andric AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P); 2475f757f3fSDimitry Andric bool splitAlloca(AllocaInst &AI, AllocaSlices &AS); 2485f757f3fSDimitry Andric std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI); 2495f757f3fSDimitry Andric void clobberUse(Use &U); 2505f757f3fSDimitry Andric bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas); 2515f757f3fSDimitry Andric bool promoteAllocas(Function &F); 2525f757f3fSDimitry Andric }; 2535f757f3fSDimitry Andric 2545f757f3fSDimitry Andric } // end anonymous namespace 2555f757f3fSDimitry Andric 25606c3fb27SDimitry Andric /// Calculate the fragment of a variable to use when slicing a store 25706c3fb27SDimitry Andric /// based on the slice dimensions, existing fragment, and base storage 25806c3fb27SDimitry Andric /// fragment. 25906c3fb27SDimitry Andric /// Results: 26006c3fb27SDimitry Andric /// UseFrag - Use Target as the new fragment. 26106c3fb27SDimitry Andric /// UseNoFrag - The new slice already covers the whole variable. 26206c3fb27SDimitry Andric /// Skip - The new alloca slice doesn't include this variable. 26306c3fb27SDimitry Andric /// FIXME: Can we use calculateFragmentIntersect instead? 2645f757f3fSDimitry Andric namespace { 26506c3fb27SDimitry Andric enum FragCalcResult { UseFrag, UseNoFrag, Skip }; 2665f757f3fSDimitry Andric } 26706c3fb27SDimitry Andric static FragCalcResult 26806c3fb27SDimitry Andric calculateFragment(DILocalVariable *Variable, 26906c3fb27SDimitry Andric uint64_t NewStorageSliceOffsetInBits, 27006c3fb27SDimitry Andric uint64_t NewStorageSliceSizeInBits, 27106c3fb27SDimitry Andric std::optional<DIExpression::FragmentInfo> StorageFragment, 27206c3fb27SDimitry Andric std::optional<DIExpression::FragmentInfo> CurrentFragment, 27306c3fb27SDimitry Andric DIExpression::FragmentInfo &Target) { 27406c3fb27SDimitry Andric // If the base storage describes part of the variable apply the offset and 27506c3fb27SDimitry Andric // the size constraint. 27606c3fb27SDimitry Andric if (StorageFragment) { 27706c3fb27SDimitry Andric Target.SizeInBits = 27806c3fb27SDimitry Andric std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits); 27906c3fb27SDimitry Andric Target.OffsetInBits = 28006c3fb27SDimitry Andric NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits; 28106c3fb27SDimitry Andric } else { 28206c3fb27SDimitry Andric Target.SizeInBits = NewStorageSliceSizeInBits; 28306c3fb27SDimitry Andric Target.OffsetInBits = NewStorageSliceOffsetInBits; 28406c3fb27SDimitry Andric } 28506c3fb27SDimitry Andric 28606c3fb27SDimitry Andric // If this slice extracts the entirety of an independent variable from a 28706c3fb27SDimitry Andric // larger alloca, do not produce a fragment expression, as the variable is 28806c3fb27SDimitry Andric // not fragmented. 28906c3fb27SDimitry Andric if (!CurrentFragment) { 29006c3fb27SDimitry Andric if (auto Size = Variable->getSizeInBits()) { 29106c3fb27SDimitry Andric // Treat the current fragment as covering the whole variable. 29206c3fb27SDimitry Andric CurrentFragment = DIExpression::FragmentInfo(*Size, 0); 29306c3fb27SDimitry Andric if (Target == CurrentFragment) 29406c3fb27SDimitry Andric return UseNoFrag; 29506c3fb27SDimitry Andric } 29606c3fb27SDimitry Andric } 29706c3fb27SDimitry Andric 29806c3fb27SDimitry Andric // No additional work to do if there isn't a fragment already, or there is 29906c3fb27SDimitry Andric // but it already exactly describes the new assignment. 30006c3fb27SDimitry Andric if (!CurrentFragment || *CurrentFragment == Target) 30106c3fb27SDimitry Andric return UseFrag; 30206c3fb27SDimitry Andric 30306c3fb27SDimitry Andric // Reject the target fragment if it doesn't fit wholly within the current 30406c3fb27SDimitry Andric // fragment. TODO: We could instead chop up the target to fit in the case of 30506c3fb27SDimitry Andric // a partial overlap. 30606c3fb27SDimitry Andric if (Target.startInBits() < CurrentFragment->startInBits() || 30706c3fb27SDimitry Andric Target.endInBits() > CurrentFragment->endInBits()) 30806c3fb27SDimitry Andric return Skip; 30906c3fb27SDimitry Andric 31006c3fb27SDimitry Andric // Target fits within the current fragment, return it. 31106c3fb27SDimitry Andric return UseFrag; 31206c3fb27SDimitry Andric } 31306c3fb27SDimitry Andric 31406c3fb27SDimitry Andric static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI) { 31506c3fb27SDimitry Andric return DebugVariable(DVI->getVariable(), std::nullopt, 31606c3fb27SDimitry Andric DVI->getDebugLoc().getInlinedAt()); 31706c3fb27SDimitry Andric } 318*0fca6ea1SDimitry Andric static DebugVariable getAggregateVariable(DbgVariableRecord *DVR) { 319*0fca6ea1SDimitry Andric return DebugVariable(DVR->getVariable(), std::nullopt, 320*0fca6ea1SDimitry Andric DVR->getDebugLoc().getInlinedAt()); 3217a6dacacSDimitry Andric } 3227a6dacacSDimitry Andric 323*0fca6ea1SDimitry Andric /// Helpers for handling new and old debug info modes in migrateDebugInfo. 324*0fca6ea1SDimitry Andric /// These overloads unwrap a DbgInstPtr {Instruction* | DbgRecord*} union based 325*0fca6ea1SDimitry Andric /// on the \p Unused parameter type. 326*0fca6ea1SDimitry Andric DbgVariableRecord *UnwrapDbgInstPtr(DbgInstPtr P, DbgVariableRecord *Unused) { 327*0fca6ea1SDimitry Andric (void)Unused; 328*0fca6ea1SDimitry Andric return static_cast<DbgVariableRecord *>(cast<DbgRecord *>(P)); 3297a6dacacSDimitry Andric } 330*0fca6ea1SDimitry Andric DbgAssignIntrinsic *UnwrapDbgInstPtr(DbgInstPtr P, DbgAssignIntrinsic *Unused) { 331*0fca6ea1SDimitry Andric (void)Unused; 332*0fca6ea1SDimitry Andric return static_cast<DbgAssignIntrinsic *>(cast<Instruction *>(P)); 3337a6dacacSDimitry Andric } 33406c3fb27SDimitry Andric 335bdd1243dSDimitry Andric /// Find linked dbg.assign and generate a new one with the correct 336bdd1243dSDimitry Andric /// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the 337bdd1243dSDimitry Andric /// value component is copied from the old dbg.assign to the new. 338bdd1243dSDimitry Andric /// \param OldAlloca Alloca for the variable before splitting. 33906c3fb27SDimitry Andric /// \param IsSplit True if the store (not necessarily alloca) 34006c3fb27SDimitry Andric /// is being split. 34106c3fb27SDimitry Andric /// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca. 342bdd1243dSDimitry Andric /// \param SliceSizeInBits New number of bits being written to. 343bdd1243dSDimitry Andric /// \param OldInst Instruction that is being split. 344bdd1243dSDimitry Andric /// \param Inst New instruction performing this part of the 345bdd1243dSDimitry Andric /// split store. 346bdd1243dSDimitry Andric /// \param Dest Store destination. 347bdd1243dSDimitry Andric /// \param Value Stored value. 348bdd1243dSDimitry Andric /// \param DL Datalayout. 34906c3fb27SDimitry Andric static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, 35006c3fb27SDimitry Andric uint64_t OldAllocaOffsetInBits, 351bdd1243dSDimitry Andric uint64_t SliceSizeInBits, Instruction *OldInst, 352bdd1243dSDimitry Andric Instruction *Inst, Value *Dest, Value *Value, 353bdd1243dSDimitry Andric const DataLayout &DL) { 354bdd1243dSDimitry Andric auto MarkerRange = at::getAssignmentMarkers(OldInst); 355*0fca6ea1SDimitry Andric auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst); 356bdd1243dSDimitry Andric // Nothing to do if OldInst has no linked dbg.assign intrinsics. 357*0fca6ea1SDimitry Andric if (MarkerRange.empty() && DVRAssignMarkerRange.empty()) 358bdd1243dSDimitry Andric return; 359bdd1243dSDimitry Andric 360bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " migrateDebugInfo\n"); 361bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n"); 36206c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n"); 36306c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits 36406c3fb27SDimitry Andric << "\n"); 365bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n"); 366bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n"); 367bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n"); 368bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n"); 369bdd1243dSDimitry Andric if (Value) 370bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n"); 371bdd1243dSDimitry Andric 37206c3fb27SDimitry Andric /// Map of aggregate variables to their fragment associated with OldAlloca. 37306c3fb27SDimitry Andric DenseMap<DebugVariable, std::optional<DIExpression::FragmentInfo>> 37406c3fb27SDimitry Andric BaseFragments; 37506c3fb27SDimitry Andric for (auto *DAI : at::getAssignmentMarkers(OldAlloca)) 37606c3fb27SDimitry Andric BaseFragments[getAggregateVariable(DAI)] = 37706c3fb27SDimitry Andric DAI->getExpression()->getFragmentInfo(); 378*0fca6ea1SDimitry Andric for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca)) 379*0fca6ea1SDimitry Andric BaseFragments[getAggregateVariable(DVR)] = 380*0fca6ea1SDimitry Andric DVR->getExpression()->getFragmentInfo(); 38106c3fb27SDimitry Andric 382bdd1243dSDimitry Andric // The new inst needs a DIAssignID unique metadata tag (if OldInst has 383bdd1243dSDimitry Andric // one). It shouldn't already have one: assert this assumption. 384bdd1243dSDimitry Andric assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID)); 385bdd1243dSDimitry Andric DIAssignID *NewID = nullptr; 386bdd1243dSDimitry Andric auto &Ctx = Inst->getContext(); 387bdd1243dSDimitry Andric DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false); 388bdd1243dSDimitry Andric assert(OldAlloca->isStaticAlloca()); 389bdd1243dSDimitry Andric 390*0fca6ea1SDimitry Andric auto MigrateDbgAssign = [&](auto *DbgAssign) { 391bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign 392bdd1243dSDimitry Andric << "\n"); 393bdd1243dSDimitry Andric auto *Expr = DbgAssign->getExpression(); 39406c3fb27SDimitry Andric bool SetKillLocation = false; 395bdd1243dSDimitry Andric 39606c3fb27SDimitry Andric if (IsSplit) { 39706c3fb27SDimitry Andric std::optional<DIExpression::FragmentInfo> BaseFragment; 39806c3fb27SDimitry Andric { 39906c3fb27SDimitry Andric auto R = BaseFragments.find(getAggregateVariable(DbgAssign)); 40006c3fb27SDimitry Andric if (R == BaseFragments.end()) 4017a6dacacSDimitry Andric return; 40206c3fb27SDimitry Andric BaseFragment = R->second; 40306c3fb27SDimitry Andric } 40406c3fb27SDimitry Andric std::optional<DIExpression::FragmentInfo> CurrentFragment = 40506c3fb27SDimitry Andric Expr->getFragmentInfo(); 40606c3fb27SDimitry Andric DIExpression::FragmentInfo NewFragment; 40706c3fb27SDimitry Andric FragCalcResult Result = calculateFragment( 40806c3fb27SDimitry Andric DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits, 40906c3fb27SDimitry Andric BaseFragment, CurrentFragment, NewFragment); 410bdd1243dSDimitry Andric 41106c3fb27SDimitry Andric if (Result == Skip) 4127a6dacacSDimitry Andric return; 41306c3fb27SDimitry Andric if (Result == UseFrag && !(NewFragment == CurrentFragment)) { 41406c3fb27SDimitry Andric if (CurrentFragment) { 41506c3fb27SDimitry Andric // Rewrite NewFragment to be relative to the existing one (this is 41606c3fb27SDimitry Andric // what createFragmentExpression wants). CalculateFragment has 41706c3fb27SDimitry Andric // already resolved the size for us. FIXME: Should it return the 41806c3fb27SDimitry Andric // relative fragment too? 41906c3fb27SDimitry Andric NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits; 42006c3fb27SDimitry Andric } 42106c3fb27SDimitry Andric // Add the new fragment info to the existing expression if possible. 42206c3fb27SDimitry Andric if (auto E = DIExpression::createFragmentExpression( 42306c3fb27SDimitry Andric Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) { 424bdd1243dSDimitry Andric Expr = *E; 42506c3fb27SDimitry Andric } else { 42606c3fb27SDimitry Andric // Otherwise, add the new fragment info to an empty expression and 42706c3fb27SDimitry Andric // discard the value component of this dbg.assign as the value cannot 42806c3fb27SDimitry Andric // be computed with the new fragment. 42906c3fb27SDimitry Andric Expr = *DIExpression::createFragmentExpression( 43006c3fb27SDimitry Andric DIExpression::get(Expr->getContext(), std::nullopt), 43106c3fb27SDimitry Andric NewFragment.OffsetInBits, NewFragment.SizeInBits); 43206c3fb27SDimitry Andric SetKillLocation = true; 43306c3fb27SDimitry Andric } 43406c3fb27SDimitry Andric } 435bdd1243dSDimitry Andric } 436bdd1243dSDimitry Andric 437bdd1243dSDimitry Andric // If we haven't created a DIAssignID ID do that now and attach it to Inst. 438bdd1243dSDimitry Andric if (!NewID) { 439bdd1243dSDimitry Andric NewID = DIAssignID::getDistinct(Ctx); 440bdd1243dSDimitry Andric Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID); 441bdd1243dSDimitry Andric } 442bdd1243dSDimitry Andric 44306c3fb27SDimitry Andric ::Value *NewValue = Value ? Value : DbgAssign->getValue(); 444*0fca6ea1SDimitry Andric auto *NewAssign = UnwrapDbgInstPtr( 445*0fca6ea1SDimitry Andric DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr, 446*0fca6ea1SDimitry Andric Dest, 4477a6dacacSDimitry Andric DIExpression::get(Expr->getContext(), std::nullopt), 448*0fca6ea1SDimitry Andric DbgAssign->getDebugLoc()), 449*0fca6ea1SDimitry Andric DbgAssign); 450bdd1243dSDimitry Andric 45106c3fb27SDimitry Andric // If we've updated the value but the original dbg.assign has an arglist 45206c3fb27SDimitry Andric // then kill it now - we can't use the requested new value. 45306c3fb27SDimitry Andric // We can't replace the DIArgList with the new value as it'd leave 45406c3fb27SDimitry Andric // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without 45506c3fb27SDimitry Andric // an arglist). And we can't keep the DIArgList in case the linked store 45606c3fb27SDimitry Andric // is being split - in which case the DIArgList + expression may no longer 45706c3fb27SDimitry Andric // be computing the correct value. 45806c3fb27SDimitry Andric // This should be a very rare situation as it requires the value being 45906c3fb27SDimitry Andric // stored to differ from the dbg.assign (i.e., the value has been 46006c3fb27SDimitry Andric // represented differently in the debug intrinsic for some reason). 46106c3fb27SDimitry Andric SetKillLocation |= 46206c3fb27SDimitry Andric Value && (DbgAssign->hasArgList() || 46306c3fb27SDimitry Andric !DbgAssign->getExpression()->isSingleLocationExpression()); 46406c3fb27SDimitry Andric if (SetKillLocation) 46506c3fb27SDimitry Andric NewAssign->setKillLocation(); 46606c3fb27SDimitry Andric 467bdd1243dSDimitry Andric // We could use more precision here at the cost of some additional (code) 468bdd1243dSDimitry Andric // complexity - if the original dbg.assign was adjacent to its store, we 469bdd1243dSDimitry Andric // could position this new dbg.assign adjacent to its store rather than the 470bdd1243dSDimitry Andric // old dbg.assgn. That would result in interleaved dbg.assigns rather than 471bdd1243dSDimitry Andric // what we get now: 472bdd1243dSDimitry Andric // split store !1 473bdd1243dSDimitry Andric // split store !2 474bdd1243dSDimitry Andric // dbg.assign !1 475bdd1243dSDimitry Andric // dbg.assign !2 476bdd1243dSDimitry Andric // This (current behaviour) results results in debug assignments being 477bdd1243dSDimitry Andric // noted as slightly offset (in code) from the store. In practice this 478bdd1243dSDimitry Andric // should have little effect on the debugging experience due to the fact 479bdd1243dSDimitry Andric // that all the split stores should get the same line number. 480bdd1243dSDimitry Andric NewAssign->moveBefore(DbgAssign); 481bdd1243dSDimitry Andric 482bdd1243dSDimitry Andric NewAssign->setDebugLoc(DbgAssign->getDebugLoc()); 4837a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n"); 4847a6dacacSDimitry Andric }; 4857a6dacacSDimitry Andric 4867a6dacacSDimitry Andric for_each(MarkerRange, MigrateDbgAssign); 487*0fca6ea1SDimitry Andric for_each(DVRAssignMarkerRange, MigrateDbgAssign); 488bdd1243dSDimitry Andric } 4890b57cec5SDimitry Andric 4905f757f3fSDimitry Andric namespace { 4915f757f3fSDimitry Andric 4920b57cec5SDimitry Andric /// A custom IRBuilder inserter which prefixes all names, but only in 4930b57cec5SDimitry Andric /// Assert builds. 4945ffd83dbSDimitry Andric class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter { 4950b57cec5SDimitry Andric std::string Prefix; 4960b57cec5SDimitry Andric 497349cc55cSDimitry Andric Twine getNameWithPrefix(const Twine &Name) const { 4980b57cec5SDimitry Andric return Name.isTriviallyEmpty() ? Name : Prefix + Name; 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric public: 5020b57cec5SDimitry Andric void SetNamePrefix(const Twine &P) { Prefix = P.str(); } 5030b57cec5SDimitry Andric 504*0fca6ea1SDimitry Andric void InsertHelper(Instruction *I, const Twine &Name, 5055ffd83dbSDimitry Andric BasicBlock::iterator InsertPt) const override { 506*0fca6ea1SDimitry Andric IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), 5070b57cec5SDimitry Andric InsertPt); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric }; 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric /// Provide a type for IRBuilder that drops names in release builds. 5120b57cec5SDimitry Andric using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>; 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric /// A used slice of an alloca. 5150b57cec5SDimitry Andric /// 5160b57cec5SDimitry Andric /// This structure represents a slice of an alloca used by some instruction. It 5170b57cec5SDimitry Andric /// stores both the begin and end offsets of this use, a pointer to the use 5180b57cec5SDimitry Andric /// itself, and a flag indicating whether we can classify the use as splittable 5190b57cec5SDimitry Andric /// or not when forming partitions of the alloca. 5200b57cec5SDimitry Andric class Slice { 5210b57cec5SDimitry Andric /// The beginning offset of the range. 5220b57cec5SDimitry Andric uint64_t BeginOffset = 0; 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric /// The ending offset, not included in the range. 5250b57cec5SDimitry Andric uint64_t EndOffset = 0; 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric /// Storage for both the use of this slice and whether it can be 5280b57cec5SDimitry Andric /// split. 5290b57cec5SDimitry Andric PointerIntPair<Use *, 1, bool> UseAndIsSplittable; 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric public: 5320b57cec5SDimitry Andric Slice() = default; 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable) 5350b57cec5SDimitry Andric : BeginOffset(BeginOffset), EndOffset(EndOffset), 5360b57cec5SDimitry Andric UseAndIsSplittable(U, IsSplittable) {} 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric uint64_t beginOffset() const { return BeginOffset; } 5390b57cec5SDimitry Andric uint64_t endOffset() const { return EndOffset; } 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric bool isSplittable() const { return UseAndIsSplittable.getInt(); } 5420b57cec5SDimitry Andric void makeUnsplittable() { UseAndIsSplittable.setInt(false); } 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric Use *getUse() const { return UseAndIsSplittable.getPointer(); } 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric bool isDead() const { return getUse() == nullptr; } 5470b57cec5SDimitry Andric void kill() { UseAndIsSplittable.setPointer(nullptr); } 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric /// Support for ordering ranges. 5500b57cec5SDimitry Andric /// 5510b57cec5SDimitry Andric /// This provides an ordering over ranges such that start offsets are 5520b57cec5SDimitry Andric /// always increasing, and within equal start offsets, the end offsets are 5530b57cec5SDimitry Andric /// decreasing. Thus the spanning range comes first in a cluster with the 5540b57cec5SDimitry Andric /// same start position. 5550b57cec5SDimitry Andric bool operator<(const Slice &RHS) const { 5560b57cec5SDimitry Andric if (beginOffset() < RHS.beginOffset()) 5570b57cec5SDimitry Andric return true; 5580b57cec5SDimitry Andric if (beginOffset() > RHS.beginOffset()) 5590b57cec5SDimitry Andric return false; 5600b57cec5SDimitry Andric if (isSplittable() != RHS.isSplittable()) 5610b57cec5SDimitry Andric return !isSplittable(); 5620b57cec5SDimitry Andric if (endOffset() > RHS.endOffset()) 5630b57cec5SDimitry Andric return true; 5640b57cec5SDimitry Andric return false; 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric /// Support comparison with a single offset to allow binary searches. 5680b57cec5SDimitry Andric friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS, 5690b57cec5SDimitry Andric uint64_t RHSOffset) { 5700b57cec5SDimitry Andric return LHS.beginOffset() < RHSOffset; 5710b57cec5SDimitry Andric } 5720b57cec5SDimitry Andric friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, 5730b57cec5SDimitry Andric const Slice &RHS) { 5740b57cec5SDimitry Andric return LHSOffset < RHS.beginOffset(); 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric bool operator==(const Slice &RHS) const { 5780b57cec5SDimitry Andric return isSplittable() == RHS.isSplittable() && 5790b57cec5SDimitry Andric beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset(); 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric bool operator!=(const Slice &RHS) const { return !operator==(RHS); } 5820b57cec5SDimitry Andric }; 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric /// Representation of the alloca slices. 5850b57cec5SDimitry Andric /// 5860b57cec5SDimitry Andric /// This class represents the slices of an alloca which are formed by its 5870b57cec5SDimitry Andric /// various uses. If a pointer escapes, we can't fully build a representation 5880b57cec5SDimitry Andric /// for the slices used and we reflect that in this structure. The uses are 5890b57cec5SDimitry Andric /// stored, sorted by increasing beginning offset and with unsplittable slices 5900b57cec5SDimitry Andric /// starting at a particular offset before splittable slices. 5915f757f3fSDimitry Andric class AllocaSlices { 5920b57cec5SDimitry Andric public: 5930b57cec5SDimitry Andric /// Construct the slices of a particular alloca. 5940b57cec5SDimitry Andric AllocaSlices(const DataLayout &DL, AllocaInst &AI); 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric /// Test whether a pointer to the allocation escapes our analysis. 5970b57cec5SDimitry Andric /// 5980b57cec5SDimitry Andric /// If this is true, the slices are never fully built and should be 5990b57cec5SDimitry Andric /// ignored. 6000b57cec5SDimitry Andric bool isEscaped() const { return PointerEscapingInstr; } 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric /// Support for iterating over the slices. 6030b57cec5SDimitry Andric /// @{ 6040b57cec5SDimitry Andric using iterator = SmallVectorImpl<Slice>::iterator; 6050b57cec5SDimitry Andric using range = iterator_range<iterator>; 6060b57cec5SDimitry Andric 6070b57cec5SDimitry Andric iterator begin() { return Slices.begin(); } 6080b57cec5SDimitry Andric iterator end() { return Slices.end(); } 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric using const_iterator = SmallVectorImpl<Slice>::const_iterator; 6110b57cec5SDimitry Andric using const_range = iterator_range<const_iterator>; 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric const_iterator begin() const { return Slices.begin(); } 6140b57cec5SDimitry Andric const_iterator end() const { return Slices.end(); } 6150b57cec5SDimitry Andric /// @} 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric /// Erase a range of slices. 6180b57cec5SDimitry Andric void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); } 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric /// Insert new slices for this alloca. 6210b57cec5SDimitry Andric /// 6220b57cec5SDimitry Andric /// This moves the slices into the alloca's slices collection, and re-sorts 6230b57cec5SDimitry Andric /// everything so that the usual ordering properties of the alloca's slices 6240b57cec5SDimitry Andric /// hold. 6250b57cec5SDimitry Andric void insert(ArrayRef<Slice> NewSlices) { 6260b57cec5SDimitry Andric int OldSize = Slices.size(); 6270b57cec5SDimitry Andric Slices.append(NewSlices.begin(), NewSlices.end()); 6280b57cec5SDimitry Andric auto SliceI = Slices.begin() + OldSize; 629*0fca6ea1SDimitry Andric std::stable_sort(SliceI, Slices.end()); 6300b57cec5SDimitry Andric std::inplace_merge(Slices.begin(), SliceI, Slices.end()); 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric // Forward declare the iterator and range accessor for walking the 6340b57cec5SDimitry Andric // partitions. 6350b57cec5SDimitry Andric class partition_iterator; 6360b57cec5SDimitry Andric iterator_range<partition_iterator> partitions(); 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric /// Access the dead users for this alloca. 6390b57cec5SDimitry Andric ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; } 6400b57cec5SDimitry Andric 641e8d8bef9SDimitry Andric /// Access Uses that should be dropped if the alloca is promotable. 642e8d8bef9SDimitry Andric ArrayRef<Use *> getDeadUsesIfPromotable() const { 643e8d8bef9SDimitry Andric return DeadUseIfPromotable; 644e8d8bef9SDimitry Andric } 645e8d8bef9SDimitry Andric 6460b57cec5SDimitry Andric /// Access the dead operands referring to this alloca. 6470b57cec5SDimitry Andric /// 6480b57cec5SDimitry Andric /// These are operands which have cannot actually be used to refer to the 6490b57cec5SDimitry Andric /// alloca as they are outside its range and the user doesn't correct for 6500b57cec5SDimitry Andric /// that. These mostly consist of PHI node inputs and the like which we just 6510b57cec5SDimitry Andric /// need to replace with undef. 6520b57cec5SDimitry Andric ArrayRef<Use *> getDeadOperands() const { return DeadOperands; } 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 6550b57cec5SDimitry Andric void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; 6560b57cec5SDimitry Andric void printSlice(raw_ostream &OS, const_iterator I, 6570b57cec5SDimitry Andric StringRef Indent = " ") const; 6580b57cec5SDimitry Andric void printUse(raw_ostream &OS, const_iterator I, 6590b57cec5SDimitry Andric StringRef Indent = " ") const; 6600b57cec5SDimitry Andric void print(raw_ostream &OS) const; 6610b57cec5SDimitry Andric void dump(const_iterator I) const; 6620b57cec5SDimitry Andric void dump() const; 6630b57cec5SDimitry Andric #endif 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric private: 6660b57cec5SDimitry Andric template <typename DerivedT, typename RetT = void> class BuilderBase; 6670b57cec5SDimitry Andric class SliceBuilder; 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric friend class AllocaSlices::SliceBuilder; 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 6720b57cec5SDimitry Andric /// Handle to alloca instruction to simplify method interfaces. 6730b57cec5SDimitry Andric AllocaInst &AI; 6740b57cec5SDimitry Andric #endif 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric /// The instruction responsible for this alloca not having a known set 6770b57cec5SDimitry Andric /// of slices. 6780b57cec5SDimitry Andric /// 6790b57cec5SDimitry Andric /// When an instruction (potentially) escapes the pointer to the alloca, we 6800b57cec5SDimitry Andric /// store a pointer to that here and abort trying to form slices of the 6810b57cec5SDimitry Andric /// alloca. This will be null if the alloca slices are analyzed successfully. 6820b57cec5SDimitry Andric Instruction *PointerEscapingInstr; 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric /// The slices of the alloca. 6850b57cec5SDimitry Andric /// 6860b57cec5SDimitry Andric /// We store a vector of the slices formed by uses of the alloca here. This 6870b57cec5SDimitry Andric /// vector is sorted by increasing begin offset, and then the unsplittable 6880b57cec5SDimitry Andric /// slices before the splittable ones. See the Slice inner class for more 6890b57cec5SDimitry Andric /// details. 6900b57cec5SDimitry Andric SmallVector<Slice, 8> Slices; 6910b57cec5SDimitry Andric 6920b57cec5SDimitry Andric /// Instructions which will become dead if we rewrite the alloca. 6930b57cec5SDimitry Andric /// 6940b57cec5SDimitry Andric /// Note that these are not separated by slice. This is because we expect an 6950b57cec5SDimitry Andric /// alloca to be completely rewritten or not rewritten at all. If rewritten, 69604eeddc0SDimitry Andric /// all these instructions can simply be removed and replaced with poison as 6970b57cec5SDimitry Andric /// they come from outside of the allocated space. 6980b57cec5SDimitry Andric SmallVector<Instruction *, 8> DeadUsers; 6990b57cec5SDimitry Andric 700e8d8bef9SDimitry Andric /// Uses which will become dead if can promote the alloca. 701e8d8bef9SDimitry Andric SmallVector<Use *, 8> DeadUseIfPromotable; 702e8d8bef9SDimitry Andric 7030b57cec5SDimitry Andric /// Operands which will become dead if we rewrite the alloca. 7040b57cec5SDimitry Andric /// 7050b57cec5SDimitry Andric /// These are operands that in their particular use can be replaced with 70604eeddc0SDimitry Andric /// poison when we rewrite the alloca. These show up in out-of-bounds inputs 7070b57cec5SDimitry Andric /// to PHI nodes and the like. They aren't entirely dead (there might be 7080b57cec5SDimitry Andric /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we 70904eeddc0SDimitry Andric /// want to swap this particular input for poison to simplify the use lists of 7100b57cec5SDimitry Andric /// the alloca. 7110b57cec5SDimitry Andric SmallVector<Use *, 8> DeadOperands; 7120b57cec5SDimitry Andric }; 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric /// A partition of the slices. 7150b57cec5SDimitry Andric /// 7160b57cec5SDimitry Andric /// An ephemeral representation for a range of slices which can be viewed as 7170b57cec5SDimitry Andric /// a partition of the alloca. This range represents a span of the alloca's 7180b57cec5SDimitry Andric /// memory which cannot be split, and provides access to all of the slices 7190b57cec5SDimitry Andric /// overlapping some part of the partition. 7200b57cec5SDimitry Andric /// 7210b57cec5SDimitry Andric /// Objects of this type are produced by traversing the alloca's slices, but 7220b57cec5SDimitry Andric /// are only ephemeral and not persistent. 7235f757f3fSDimitry Andric class Partition { 7240b57cec5SDimitry Andric private: 7250b57cec5SDimitry Andric friend class AllocaSlices; 7260b57cec5SDimitry Andric friend class AllocaSlices::partition_iterator; 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric using iterator = AllocaSlices::iterator; 7290b57cec5SDimitry Andric 7300b57cec5SDimitry Andric /// The beginning and ending offsets of the alloca for this 7310b57cec5SDimitry Andric /// partition. 732480093f4SDimitry Andric uint64_t BeginOffset = 0, EndOffset = 0; 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric /// The start and end iterators of this partition. 7350b57cec5SDimitry Andric iterator SI, SJ; 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric /// A collection of split slice tails overlapping the partition. 7380b57cec5SDimitry Andric SmallVector<Slice *, 4> SplitTails; 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric /// Raw constructor builds an empty partition starting and ending at 7410b57cec5SDimitry Andric /// the given iterator. 7420b57cec5SDimitry Andric Partition(iterator SI) : SI(SI), SJ(SI) {} 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andric public: 7450b57cec5SDimitry Andric /// The start offset of this partition. 7460b57cec5SDimitry Andric /// 7470b57cec5SDimitry Andric /// All of the contained slices start at or after this offset. 7480b57cec5SDimitry Andric uint64_t beginOffset() const { return BeginOffset; } 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric /// The end offset of this partition. 7510b57cec5SDimitry Andric /// 7520b57cec5SDimitry Andric /// All of the contained slices end at or before this offset. 7530b57cec5SDimitry Andric uint64_t endOffset() const { return EndOffset; } 7540b57cec5SDimitry Andric 7550b57cec5SDimitry Andric /// The size of the partition. 7560b57cec5SDimitry Andric /// 7570b57cec5SDimitry Andric /// Note that this can never be zero. 7580b57cec5SDimitry Andric uint64_t size() const { 7590b57cec5SDimitry Andric assert(BeginOffset < EndOffset && "Partitions must span some bytes!"); 7600b57cec5SDimitry Andric return EndOffset - BeginOffset; 7610b57cec5SDimitry Andric } 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric /// Test whether this partition contains no slices, and merely spans 7640b57cec5SDimitry Andric /// a region occupied by split slices. 7650b57cec5SDimitry Andric bool empty() const { return SI == SJ; } 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric /// \name Iterate slices that start within the partition. 7680b57cec5SDimitry Andric /// These may be splittable or unsplittable. They have a begin offset >= the 7690b57cec5SDimitry Andric /// partition begin offset. 7700b57cec5SDimitry Andric /// @{ 7710b57cec5SDimitry Andric // FIXME: We should probably define a "concat_iterator" helper and use that 7720b57cec5SDimitry Andric // to stitch together pointee_iterators over the split tails and the 7730b57cec5SDimitry Andric // contiguous iterators of the partition. That would give a much nicer 7740b57cec5SDimitry Andric // interface here. We could then additionally expose filtered iterators for 7750b57cec5SDimitry Andric // split, unsplit, and unsplittable splices based on the usage patterns. 7760b57cec5SDimitry Andric iterator begin() const { return SI; } 7770b57cec5SDimitry Andric iterator end() const { return SJ; } 7780b57cec5SDimitry Andric /// @} 7790b57cec5SDimitry Andric 7800b57cec5SDimitry Andric /// Get the sequence of split slice tails. 7810b57cec5SDimitry Andric /// 7820b57cec5SDimitry Andric /// These tails are of slices which start before this partition but are 7830b57cec5SDimitry Andric /// split and overlap into the partition. We accumulate these while forming 7840b57cec5SDimitry Andric /// partitions. 7850b57cec5SDimitry Andric ArrayRef<Slice *> splitSliceTails() const { return SplitTails; } 7860b57cec5SDimitry Andric }; 7870b57cec5SDimitry Andric 7885f757f3fSDimitry Andric } // end anonymous namespace 7895f757f3fSDimitry Andric 7900b57cec5SDimitry Andric /// An iterator over partitions of the alloca's slices. 7910b57cec5SDimitry Andric /// 7920b57cec5SDimitry Andric /// This iterator implements the core algorithm for partitioning the alloca's 7930b57cec5SDimitry Andric /// slices. It is a forward iterator as we don't support backtracking for 7940b57cec5SDimitry Andric /// efficiency reasons, and re-use a single storage area to maintain the 7950b57cec5SDimitry Andric /// current set of split slices. 7960b57cec5SDimitry Andric /// 7970b57cec5SDimitry Andric /// It is templated on the slice iterator type to use so that it can operate 7980b57cec5SDimitry Andric /// with either const or non-const slice iterators. 7990b57cec5SDimitry Andric class AllocaSlices::partition_iterator 8000b57cec5SDimitry Andric : public iterator_facade_base<partition_iterator, std::forward_iterator_tag, 8010b57cec5SDimitry Andric Partition> { 8020b57cec5SDimitry Andric friend class AllocaSlices; 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric /// Most of the state for walking the partitions is held in a class 8050b57cec5SDimitry Andric /// with a nice interface for examining them. 8060b57cec5SDimitry Andric Partition P; 8070b57cec5SDimitry Andric 8080b57cec5SDimitry Andric /// We need to keep the end of the slices to know when to stop. 8090b57cec5SDimitry Andric AllocaSlices::iterator SE; 8100b57cec5SDimitry Andric 8110b57cec5SDimitry Andric /// We also need to keep track of the maximum split end offset seen. 8120b57cec5SDimitry Andric /// FIXME: Do we really? 8130b57cec5SDimitry Andric uint64_t MaxSplitSliceEndOffset = 0; 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric /// Sets the partition to be empty at given iterator, and sets the 8160b57cec5SDimitry Andric /// end iterator. 8170b57cec5SDimitry Andric partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) 8180b57cec5SDimitry Andric : P(SI), SE(SE) { 8190b57cec5SDimitry Andric // If not already at the end, advance our state to form the initial 8200b57cec5SDimitry Andric // partition. 8210b57cec5SDimitry Andric if (SI != SE) 8220b57cec5SDimitry Andric advance(); 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric /// Advance the iterator to the next partition. 8260b57cec5SDimitry Andric /// 8270b57cec5SDimitry Andric /// Requires that the iterator not be at the end of the slices. 8280b57cec5SDimitry Andric void advance() { 8290b57cec5SDimitry Andric assert((P.SI != SE || !P.SplitTails.empty()) && 8300b57cec5SDimitry Andric "Cannot advance past the end of the slices!"); 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric // Clear out any split uses which have ended. 8330b57cec5SDimitry Andric if (!P.SplitTails.empty()) { 8340b57cec5SDimitry Andric if (P.EndOffset >= MaxSplitSliceEndOffset) { 8350b57cec5SDimitry Andric // If we've finished all splits, this is easy. 8360b57cec5SDimitry Andric P.SplitTails.clear(); 8370b57cec5SDimitry Andric MaxSplitSliceEndOffset = 0; 8380b57cec5SDimitry Andric } else { 8390b57cec5SDimitry Andric // Remove the uses which have ended in the prior partition. This 8400b57cec5SDimitry Andric // cannot change the max split slice end because we just checked that 8410b57cec5SDimitry Andric // the prior partition ended prior to that max. 842e8d8bef9SDimitry Andric llvm::erase_if(P.SplitTails, 843e8d8bef9SDimitry Andric [&](Slice *S) { return S->endOffset() <= P.EndOffset; }); 8440b57cec5SDimitry Andric assert(llvm::any_of(P.SplitTails, 8450b57cec5SDimitry Andric [&](Slice *S) { 8460b57cec5SDimitry Andric return S->endOffset() == MaxSplitSliceEndOffset; 8470b57cec5SDimitry Andric }) && 8480b57cec5SDimitry Andric "Could not find the current max split slice offset!"); 8490b57cec5SDimitry Andric assert(llvm::all_of(P.SplitTails, 8500b57cec5SDimitry Andric [&](Slice *S) { 8510b57cec5SDimitry Andric return S->endOffset() <= MaxSplitSliceEndOffset; 8520b57cec5SDimitry Andric }) && 8530b57cec5SDimitry Andric "Max split slice end offset is not actually the max!"); 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric } 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric // If P.SI is already at the end, then we've cleared the split tail and 8580b57cec5SDimitry Andric // now have an end iterator. 8590b57cec5SDimitry Andric if (P.SI == SE) { 8600b57cec5SDimitry Andric assert(P.SplitTails.empty() && "Failed to clear the split slices!"); 8610b57cec5SDimitry Andric return; 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric // If we had a non-empty partition previously, set up the state for 8650b57cec5SDimitry Andric // subsequent partitions. 8660b57cec5SDimitry Andric if (P.SI != P.SJ) { 8670b57cec5SDimitry Andric // Accumulate all the splittable slices which started in the old 8680b57cec5SDimitry Andric // partition into the split list. 8690b57cec5SDimitry Andric for (Slice &S : P) 8700b57cec5SDimitry Andric if (S.isSplittable() && S.endOffset() > P.EndOffset) { 8710b57cec5SDimitry Andric P.SplitTails.push_back(&S); 8720b57cec5SDimitry Andric MaxSplitSliceEndOffset = 8730b57cec5SDimitry Andric std::max(S.endOffset(), MaxSplitSliceEndOffset); 8740b57cec5SDimitry Andric } 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric // Start from the end of the previous partition. 8770b57cec5SDimitry Andric P.SI = P.SJ; 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric // If P.SI is now at the end, we at most have a tail of split slices. 8800b57cec5SDimitry Andric if (P.SI == SE) { 8810b57cec5SDimitry Andric P.BeginOffset = P.EndOffset; 8820b57cec5SDimitry Andric P.EndOffset = MaxSplitSliceEndOffset; 8830b57cec5SDimitry Andric return; 8840b57cec5SDimitry Andric } 8850b57cec5SDimitry Andric 8860b57cec5SDimitry Andric // If the we have split slices and the next slice is after a gap and is 8870b57cec5SDimitry Andric // not splittable immediately form an empty partition for the split 8880b57cec5SDimitry Andric // slices up until the next slice begins. 8890b57cec5SDimitry Andric if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset && 8900b57cec5SDimitry Andric !P.SI->isSplittable()) { 8910b57cec5SDimitry Andric P.BeginOffset = P.EndOffset; 8920b57cec5SDimitry Andric P.EndOffset = P.SI->beginOffset(); 8930b57cec5SDimitry Andric return; 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric // OK, we need to consume new slices. Set the end offset based on the 8980b57cec5SDimitry Andric // current slice, and step SJ past it. The beginning offset of the 8990b57cec5SDimitry Andric // partition is the beginning offset of the next slice unless we have 9000b57cec5SDimitry Andric // pre-existing split slices that are continuing, in which case we begin 9010b57cec5SDimitry Andric // at the prior end offset. 9020b57cec5SDimitry Andric P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset; 9030b57cec5SDimitry Andric P.EndOffset = P.SI->endOffset(); 9040b57cec5SDimitry Andric ++P.SJ; 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric // There are two strategies to form a partition based on whether the 9070b57cec5SDimitry Andric // partition starts with an unsplittable slice or a splittable slice. 9080b57cec5SDimitry Andric if (!P.SI->isSplittable()) { 9090b57cec5SDimitry Andric // When we're forming an unsplittable region, it must always start at 9100b57cec5SDimitry Andric // the first slice and will extend through its end. 9110b57cec5SDimitry Andric assert(P.BeginOffset == P.SI->beginOffset()); 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric // Form a partition including all of the overlapping slices with this 9140b57cec5SDimitry Andric // unsplittable slice. 9150b57cec5SDimitry Andric while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { 9160b57cec5SDimitry Andric if (!P.SJ->isSplittable()) 9170b57cec5SDimitry Andric P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); 9180b57cec5SDimitry Andric ++P.SJ; 9190b57cec5SDimitry Andric } 9200b57cec5SDimitry Andric 9210b57cec5SDimitry Andric // We have a partition across a set of overlapping unsplittable 9220b57cec5SDimitry Andric // partitions. 9230b57cec5SDimitry Andric return; 9240b57cec5SDimitry Andric } 9250b57cec5SDimitry Andric 9260b57cec5SDimitry Andric // If we're starting with a splittable slice, then we need to form 9270b57cec5SDimitry Andric // a synthetic partition spanning it and any other overlapping splittable 9280b57cec5SDimitry Andric // splices. 9290b57cec5SDimitry Andric assert(P.SI->isSplittable() && "Forming a splittable partition!"); 9300b57cec5SDimitry Andric 9310b57cec5SDimitry Andric // Collect all of the overlapping splittable slices. 9320b57cec5SDimitry Andric while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset && 9330b57cec5SDimitry Andric P.SJ->isSplittable()) { 9340b57cec5SDimitry Andric P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); 9350b57cec5SDimitry Andric ++P.SJ; 9360b57cec5SDimitry Andric } 9370b57cec5SDimitry Andric 9380b57cec5SDimitry Andric // Back upiP.EndOffset if we ended the span early when encountering an 9390b57cec5SDimitry Andric // unsplittable slice. This synthesizes the early end offset of 9400b57cec5SDimitry Andric // a partition spanning only splittable slices. 9410b57cec5SDimitry Andric if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { 9420b57cec5SDimitry Andric assert(!P.SJ->isSplittable()); 9430b57cec5SDimitry Andric P.EndOffset = P.SJ->beginOffset(); 9440b57cec5SDimitry Andric } 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric public: 9480b57cec5SDimitry Andric bool operator==(const partition_iterator &RHS) const { 9490b57cec5SDimitry Andric assert(SE == RHS.SE && 9500b57cec5SDimitry Andric "End iterators don't match between compared partition iterators!"); 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric // The observed positions of partitions is marked by the P.SI iterator and 9530b57cec5SDimitry Andric // the emptiness of the split slices. The latter is only relevant when 9540b57cec5SDimitry Andric // P.SI == SE, as the end iterator will additionally have an empty split 9550b57cec5SDimitry Andric // slices list, but the prior may have the same P.SI and a tail of split 9560b57cec5SDimitry Andric // slices. 9570b57cec5SDimitry Andric if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) { 9580b57cec5SDimitry Andric assert(P.SJ == RHS.P.SJ && 9590b57cec5SDimitry Andric "Same set of slices formed two different sized partitions!"); 9600b57cec5SDimitry Andric assert(P.SplitTails.size() == RHS.P.SplitTails.size() && 9610b57cec5SDimitry Andric "Same slice position with differently sized non-empty split " 9620b57cec5SDimitry Andric "slice tails!"); 9630b57cec5SDimitry Andric return true; 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric return false; 9660b57cec5SDimitry Andric } 9670b57cec5SDimitry Andric 9680b57cec5SDimitry Andric partition_iterator &operator++() { 9690b57cec5SDimitry Andric advance(); 9700b57cec5SDimitry Andric return *this; 9710b57cec5SDimitry Andric } 9720b57cec5SDimitry Andric 9730b57cec5SDimitry Andric Partition &operator*() { return P; } 9740b57cec5SDimitry Andric }; 9750b57cec5SDimitry Andric 9760b57cec5SDimitry Andric /// A forward range over the partitions of the alloca's slices. 9770b57cec5SDimitry Andric /// 9780b57cec5SDimitry Andric /// This accesses an iterator range over the partitions of the alloca's 9790b57cec5SDimitry Andric /// slices. It computes these partitions on the fly based on the overlapping 9800b57cec5SDimitry Andric /// offsets of the slices and the ability to split them. It will visit "empty" 9810b57cec5SDimitry Andric /// partitions to cover regions of the alloca only accessed via split 9820b57cec5SDimitry Andric /// slices. 9830b57cec5SDimitry Andric iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() { 9840b57cec5SDimitry Andric return make_range(partition_iterator(begin(), end()), 9850b57cec5SDimitry Andric partition_iterator(end(), end())); 9860b57cec5SDimitry Andric } 9870b57cec5SDimitry Andric 9880b57cec5SDimitry Andric static Value *foldSelectInst(SelectInst &SI) { 9890b57cec5SDimitry Andric // If the condition being selected on is a constant or the same value is 9900b57cec5SDimitry Andric // being selected between, fold the select. Yes this does (rarely) happen 9910b57cec5SDimitry Andric // early on. 9920b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) 9930b57cec5SDimitry Andric return SI.getOperand(1 + CI->isZero()); 9940b57cec5SDimitry Andric if (SI.getOperand(1) == SI.getOperand(2)) 9950b57cec5SDimitry Andric return SI.getOperand(1); 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric return nullptr; 9980b57cec5SDimitry Andric } 9990b57cec5SDimitry Andric 10000b57cec5SDimitry Andric /// A helper that folds a PHI node or a select. 10010b57cec5SDimitry Andric static Value *foldPHINodeOrSelectInst(Instruction &I) { 10020b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(&I)) { 10030b57cec5SDimitry Andric // If PN merges together the same value, return that value. 10040b57cec5SDimitry Andric return PN->hasConstantValue(); 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric return foldSelectInst(cast<SelectInst>(I)); 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric 10090b57cec5SDimitry Andric /// Builder for the alloca slices. 10100b57cec5SDimitry Andric /// 10110b57cec5SDimitry Andric /// This class builds a set of alloca slices by recursively visiting the uses 10120b57cec5SDimitry Andric /// of an alloca and making a slice for each load and store at each offset. 10130b57cec5SDimitry Andric class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> { 10140b57cec5SDimitry Andric friend class PtrUseVisitor<SliceBuilder>; 10150b57cec5SDimitry Andric friend class InstVisitor<SliceBuilder>; 10160b57cec5SDimitry Andric 10170b57cec5SDimitry Andric using Base = PtrUseVisitor<SliceBuilder>; 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric const uint64_t AllocSize; 10200b57cec5SDimitry Andric AllocaSlices &AS; 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap; 10230b57cec5SDimitry Andric SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes; 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric /// Set to de-duplicate dead instructions found in the use walk. 10260b57cec5SDimitry Andric SmallPtrSet<Instruction *, 4> VisitedDeadInsts; 10270b57cec5SDimitry Andric 10280b57cec5SDimitry Andric public: 10290b57cec5SDimitry Andric SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS) 10300b57cec5SDimitry Andric : PtrUseVisitor<SliceBuilder>(DL), 1031bdd1243dSDimitry Andric AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()), 10325ffd83dbSDimitry Andric AS(AS) {} 10330b57cec5SDimitry Andric 10340b57cec5SDimitry Andric private: 10350b57cec5SDimitry Andric void markAsDead(Instruction &I) { 10360b57cec5SDimitry Andric if (VisitedDeadInsts.insert(&I).second) 10370b57cec5SDimitry Andric AS.DeadUsers.push_back(&I); 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric 10400b57cec5SDimitry Andric void insertUse(Instruction &I, const APInt &Offset, uint64_t Size, 10410b57cec5SDimitry Andric bool IsSplittable = false) { 10420b57cec5SDimitry Andric // Completely skip uses which have a zero size or start either before or 10430b57cec5SDimitry Andric // past the end of the allocation. 10440b57cec5SDimitry Andric if (Size == 0 || Offset.uge(AllocSize)) { 10450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" 10460b57cec5SDimitry Andric << Offset 10470b57cec5SDimitry Andric << " which has zero size or starts outside of the " 10480b57cec5SDimitry Andric << AllocSize << " byte alloca:\n" 10490b57cec5SDimitry Andric << " alloca: " << AS.AI << "\n" 10500b57cec5SDimitry Andric << " use: " << I << "\n"); 10510b57cec5SDimitry Andric return markAsDead(I); 10520b57cec5SDimitry Andric } 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric uint64_t BeginOffset = Offset.getZExtValue(); 10550b57cec5SDimitry Andric uint64_t EndOffset = BeginOffset + Size; 10560b57cec5SDimitry Andric 10570b57cec5SDimitry Andric // Clamp the end offset to the end of the allocation. Note that this is 10580b57cec5SDimitry Andric // formulated to handle even the case where "BeginOffset + Size" overflows. 10590b57cec5SDimitry Andric // This may appear superficially to be something we could ignore entirely, 10600b57cec5SDimitry Andric // but that is not so! There may be widened loads or PHI-node uses where 10610b57cec5SDimitry Andric // some instructions are dead but not others. We can't completely ignore 10620b57cec5SDimitry Andric // them, and so have to record at least the information here. 10630b57cec5SDimitry Andric assert(AllocSize >= BeginOffset); // Established above. 10640b57cec5SDimitry Andric if (Size > AllocSize - BeginOffset) { 10650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" 10660b57cec5SDimitry Andric << Offset << " to remain within the " << AllocSize 10670b57cec5SDimitry Andric << " byte alloca:\n" 10680b57cec5SDimitry Andric << " alloca: " << AS.AI << "\n" 10690b57cec5SDimitry Andric << " use: " << I << "\n"); 10700b57cec5SDimitry Andric EndOffset = AllocSize; 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable)); 10740b57cec5SDimitry Andric } 10750b57cec5SDimitry Andric 10760b57cec5SDimitry Andric void visitBitCastInst(BitCastInst &BC) { 10770b57cec5SDimitry Andric if (BC.use_empty()) 10780b57cec5SDimitry Andric return markAsDead(BC); 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andric return Base::visitBitCastInst(BC); 10810b57cec5SDimitry Andric } 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { 10840b57cec5SDimitry Andric if (ASC.use_empty()) 10850b57cec5SDimitry Andric return markAsDead(ASC); 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric return Base::visitAddrSpaceCastInst(ASC); 10880b57cec5SDimitry Andric } 10890b57cec5SDimitry Andric 10900b57cec5SDimitry Andric void visitGetElementPtrInst(GetElementPtrInst &GEPI) { 10910b57cec5SDimitry Andric if (GEPI.use_empty()) 10920b57cec5SDimitry Andric return markAsDead(GEPI); 10930b57cec5SDimitry Andric 10940b57cec5SDimitry Andric return Base::visitGetElementPtrInst(GEPI); 10950b57cec5SDimitry Andric } 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andric void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, 10980b57cec5SDimitry Andric uint64_t Size, bool IsVolatile) { 10990b57cec5SDimitry Andric // We allow splitting of non-volatile loads and stores where the type is an 11000b57cec5SDimitry Andric // integer type. These may be used to implement 'memcpy' or other "transfer 11010b57cec5SDimitry Andric // of bits" patterns. 1102fe6060f1SDimitry Andric bool IsSplittable = 1103fe6060f1SDimitry Andric Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty); 11040b57cec5SDimitry Andric 11050b57cec5SDimitry Andric insertUse(I, Offset, Size, IsSplittable); 11060b57cec5SDimitry Andric } 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric void visitLoadInst(LoadInst &LI) { 11090b57cec5SDimitry Andric assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && 11100b57cec5SDimitry Andric "All simple FCA loads should have been pre-split"); 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric if (!IsOffsetKnown) 11130b57cec5SDimitry Andric return PI.setAborted(&LI); 11140b57cec5SDimitry Andric 111506c3fb27SDimitry Andric TypeSize Size = DL.getTypeStoreSize(LI.getType()); 111606c3fb27SDimitry Andric if (Size.isScalable()) 1117e8d8bef9SDimitry Andric return PI.setAborted(&LI); 1118e8d8bef9SDimitry Andric 111906c3fb27SDimitry Andric return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(), 112006c3fb27SDimitry Andric LI.isVolatile()); 11210b57cec5SDimitry Andric } 11220b57cec5SDimitry Andric 11230b57cec5SDimitry Andric void visitStoreInst(StoreInst &SI) { 11240b57cec5SDimitry Andric Value *ValOp = SI.getValueOperand(); 11250b57cec5SDimitry Andric if (ValOp == *U) 11260b57cec5SDimitry Andric return PI.setEscapedAndAborted(&SI); 11270b57cec5SDimitry Andric if (!IsOffsetKnown) 11280b57cec5SDimitry Andric return PI.setAborted(&SI); 11290b57cec5SDimitry Andric 113006c3fb27SDimitry Andric TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType()); 113106c3fb27SDimitry Andric if (StoreSize.isScalable()) 1132e8d8bef9SDimitry Andric return PI.setAborted(&SI); 1133e8d8bef9SDimitry Andric 113406c3fb27SDimitry Andric uint64_t Size = StoreSize.getFixedValue(); 11350b57cec5SDimitry Andric 11360b57cec5SDimitry Andric // If this memory access can be shown to *statically* extend outside the 11370b57cec5SDimitry Andric // bounds of the allocation, it's behavior is undefined, so simply 11380b57cec5SDimitry Andric // ignore it. Note that this is more strict than the generic clamping 11390b57cec5SDimitry Andric // behavior of insertUse. We also try to handle cases which might run the 11400b57cec5SDimitry Andric // risk of overflow. 11410b57cec5SDimitry Andric // FIXME: We should instead consider the pointer to have escaped if this 11420b57cec5SDimitry Andric // function is being instrumented for addressing bugs or race conditions. 11430b57cec5SDimitry Andric if (Size > AllocSize || Offset.ugt(AllocSize - Size)) { 11440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" 11450b57cec5SDimitry Andric << Offset << " which extends past the end of the " 11460b57cec5SDimitry Andric << AllocSize << " byte alloca:\n" 11470b57cec5SDimitry Andric << " alloca: " << AS.AI << "\n" 11480b57cec5SDimitry Andric << " use: " << SI << "\n"); 11490b57cec5SDimitry Andric return markAsDead(SI); 11500b57cec5SDimitry Andric } 11510b57cec5SDimitry Andric 11520b57cec5SDimitry Andric assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && 11530b57cec5SDimitry Andric "All simple FCA stores should have been pre-split"); 11540b57cec5SDimitry Andric handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); 11550b57cec5SDimitry Andric } 11560b57cec5SDimitry Andric 11570b57cec5SDimitry Andric void visitMemSetInst(MemSetInst &II) { 11580b57cec5SDimitry Andric assert(II.getRawDest() == *U && "Pointer use is not the destination?"); 11590b57cec5SDimitry Andric ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 11600b57cec5SDimitry Andric if ((Length && Length->getValue() == 0) || 11610b57cec5SDimitry Andric (IsOffsetKnown && Offset.uge(AllocSize))) 11620b57cec5SDimitry Andric // Zero-length mem transfer intrinsics can be ignored entirely. 11630b57cec5SDimitry Andric return markAsDead(II); 11640b57cec5SDimitry Andric 11650b57cec5SDimitry Andric if (!IsOffsetKnown) 11660b57cec5SDimitry Andric return PI.setAborted(&II); 11670b57cec5SDimitry Andric 1168*0fca6ea1SDimitry Andric insertUse(II, Offset, 1169*0fca6ea1SDimitry Andric Length ? Length->getLimitedValue() 11700b57cec5SDimitry Andric : AllocSize - Offset.getLimitedValue(), 11710b57cec5SDimitry Andric (bool)Length); 11720b57cec5SDimitry Andric } 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric void visitMemTransferInst(MemTransferInst &II) { 11750b57cec5SDimitry Andric ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 11760b57cec5SDimitry Andric if (Length && Length->getValue() == 0) 11770b57cec5SDimitry Andric // Zero-length mem transfer intrinsics can be ignored entirely. 11780b57cec5SDimitry Andric return markAsDead(II); 11790b57cec5SDimitry Andric 11800b57cec5SDimitry Andric // Because we can visit these intrinsics twice, also check to see if the 11810b57cec5SDimitry Andric // first time marked this instruction as dead. If so, skip it. 11820b57cec5SDimitry Andric if (VisitedDeadInsts.count(&II)) 11830b57cec5SDimitry Andric return; 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric if (!IsOffsetKnown) 11860b57cec5SDimitry Andric return PI.setAborted(&II); 11870b57cec5SDimitry Andric 11880b57cec5SDimitry Andric // This side of the transfer is completely out-of-bounds, and so we can 11890b57cec5SDimitry Andric // nuke the entire transfer. However, we also need to nuke the other side 11900b57cec5SDimitry Andric // if already added to our partitions. 11910b57cec5SDimitry Andric // FIXME: Yet another place we really should bypass this when 11920b57cec5SDimitry Andric // instrumenting for ASan. 11930b57cec5SDimitry Andric if (Offset.uge(AllocSize)) { 11940b57cec5SDimitry Andric SmallDenseMap<Instruction *, unsigned>::iterator MTPI = 11950b57cec5SDimitry Andric MemTransferSliceMap.find(&II); 11960b57cec5SDimitry Andric if (MTPI != MemTransferSliceMap.end()) 11970b57cec5SDimitry Andric AS.Slices[MTPI->second].kill(); 11980b57cec5SDimitry Andric return markAsDead(II); 11990b57cec5SDimitry Andric } 12000b57cec5SDimitry Andric 12010b57cec5SDimitry Andric uint64_t RawOffset = Offset.getLimitedValue(); 12020b57cec5SDimitry Andric uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset; 12030b57cec5SDimitry Andric 12040b57cec5SDimitry Andric // Check for the special case where the same exact value is used for both 12050b57cec5SDimitry Andric // source and dest. 12060b57cec5SDimitry Andric if (*U == II.getRawDest() && *U == II.getRawSource()) { 12070b57cec5SDimitry Andric // For non-volatile transfers this is a no-op. 12080b57cec5SDimitry Andric if (!II.isVolatile()) 12090b57cec5SDimitry Andric return markAsDead(II); 12100b57cec5SDimitry Andric 12110b57cec5SDimitry Andric return insertUse(II, Offset, Size, /*IsSplittable=*/false); 12120b57cec5SDimitry Andric } 12130b57cec5SDimitry Andric 12140b57cec5SDimitry Andric // If we have seen both source and destination for a mem transfer, then 12150b57cec5SDimitry Andric // they both point to the same alloca. 12160b57cec5SDimitry Andric bool Inserted; 12170b57cec5SDimitry Andric SmallDenseMap<Instruction *, unsigned>::iterator MTPI; 12180b57cec5SDimitry Andric std::tie(MTPI, Inserted) = 12190b57cec5SDimitry Andric MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size())); 12200b57cec5SDimitry Andric unsigned PrevIdx = MTPI->second; 12210b57cec5SDimitry Andric if (!Inserted) { 12220b57cec5SDimitry Andric Slice &PrevP = AS.Slices[PrevIdx]; 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric // Check if the begin offsets match and this is a non-volatile transfer. 12250b57cec5SDimitry Andric // In that case, we can completely elide the transfer. 12260b57cec5SDimitry Andric if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) { 12270b57cec5SDimitry Andric PrevP.kill(); 12280b57cec5SDimitry Andric return markAsDead(II); 12290b57cec5SDimitry Andric } 12300b57cec5SDimitry Andric 12310b57cec5SDimitry Andric // Otherwise we have an offset transfer within the same alloca. We can't 12320b57cec5SDimitry Andric // split those. 12330b57cec5SDimitry Andric PrevP.makeUnsplittable(); 12340b57cec5SDimitry Andric } 12350b57cec5SDimitry Andric 12360b57cec5SDimitry Andric // Insert the use now that we've fixed up the splittable nature. 12370b57cec5SDimitry Andric insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length); 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric // Check that we ended up with a valid index in the map. 12400b57cec5SDimitry Andric assert(AS.Slices[PrevIdx].getUse()->getUser() == &II && 12410b57cec5SDimitry Andric "Map index doesn't point back to a slice with this user."); 12420b57cec5SDimitry Andric } 12430b57cec5SDimitry Andric 1244fe6060f1SDimitry Andric // Disable SRoA for any intrinsics except for lifetime invariants and 1245fe6060f1SDimitry Andric // invariant group. 12460b57cec5SDimitry Andric // FIXME: What about debug intrinsics? This matches old behavior, but 12470b57cec5SDimitry Andric // doesn't make sense. 12480b57cec5SDimitry Andric void visitIntrinsicInst(IntrinsicInst &II) { 1249e8d8bef9SDimitry Andric if (II.isDroppable()) { 1250e8d8bef9SDimitry Andric AS.DeadUseIfPromotable.push_back(U); 1251e8d8bef9SDimitry Andric return; 1252e8d8bef9SDimitry Andric } 1253e8d8bef9SDimitry Andric 12540b57cec5SDimitry Andric if (!IsOffsetKnown) 12550b57cec5SDimitry Andric return PI.setAborted(&II); 12560b57cec5SDimitry Andric 12570b57cec5SDimitry Andric if (II.isLifetimeStartOrEnd()) { 12580b57cec5SDimitry Andric ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); 12590b57cec5SDimitry Andric uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), 12600b57cec5SDimitry Andric Length->getLimitedValue()); 12610b57cec5SDimitry Andric insertUse(II, Offset, Size, true); 12620b57cec5SDimitry Andric return; 12630b57cec5SDimitry Andric } 12640b57cec5SDimitry Andric 1265fe6060f1SDimitry Andric if (II.isLaunderOrStripInvariantGroup()) { 12665f757f3fSDimitry Andric insertUse(II, Offset, AllocSize, true); 1267fe6060f1SDimitry Andric enqueueUsers(II); 1268fe6060f1SDimitry Andric return; 1269fe6060f1SDimitry Andric } 1270fe6060f1SDimitry Andric 12710b57cec5SDimitry Andric Base::visitIntrinsicInst(II); 12720b57cec5SDimitry Andric } 12730b57cec5SDimitry Andric 12740b57cec5SDimitry Andric Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { 12750b57cec5SDimitry Andric // We consider any PHI or select that results in a direct load or store of 12760b57cec5SDimitry Andric // the same offset to be a viable use for slicing purposes. These uses 12770b57cec5SDimitry Andric // are considered unsplittable and the size is the maximum loaded or stored 12780b57cec5SDimitry Andric // size. 12790b57cec5SDimitry Andric SmallPtrSet<Instruction *, 4> Visited; 12800b57cec5SDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; 12810b57cec5SDimitry Andric Visited.insert(Root); 12820b57cec5SDimitry Andric Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); 1283*0fca6ea1SDimitry Andric const DataLayout &DL = Root->getDataLayout(); 12840b57cec5SDimitry Andric // If there are no loads or stores, the access is dead. We mark that as 12850b57cec5SDimitry Andric // a size zero access. 12860b57cec5SDimitry Andric Size = 0; 12870b57cec5SDimitry Andric do { 12880b57cec5SDimitry Andric Instruction *I, *UsedI; 12890b57cec5SDimitry Andric std::tie(UsedI, I) = Uses.pop_back_val(); 12900b57cec5SDimitry Andric 12910b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 12925f757f3fSDimitry Andric TypeSize LoadSize = DL.getTypeStoreSize(LI->getType()); 12935f757f3fSDimitry Andric if (LoadSize.isScalable()) { 12945f757f3fSDimitry Andric PI.setAborted(LI); 12955f757f3fSDimitry Andric return nullptr; 12965f757f3fSDimitry Andric } 12975f757f3fSDimitry Andric Size = std::max(Size, LoadSize.getFixedValue()); 12980b57cec5SDimitry Andric continue; 12990b57cec5SDimitry Andric } 13000b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 13010b57cec5SDimitry Andric Value *Op = SI->getOperand(0); 13020b57cec5SDimitry Andric if (Op == UsedI) 13030b57cec5SDimitry Andric return SI; 13045f757f3fSDimitry Andric TypeSize StoreSize = DL.getTypeStoreSize(Op->getType()); 13055f757f3fSDimitry Andric if (StoreSize.isScalable()) { 13065f757f3fSDimitry Andric PI.setAborted(SI); 13075f757f3fSDimitry Andric return nullptr; 13085f757f3fSDimitry Andric } 13095f757f3fSDimitry Andric Size = std::max(Size, StoreSize.getFixedValue()); 13100b57cec5SDimitry Andric continue; 13110b57cec5SDimitry Andric } 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andric if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 13140b57cec5SDimitry Andric if (!GEP->hasAllZeroIndices()) 13150b57cec5SDimitry Andric return GEP; 13160b57cec5SDimitry Andric } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) && 13170b57cec5SDimitry Andric !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) { 13180b57cec5SDimitry Andric return I; 13190b57cec5SDimitry Andric } 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric for (User *U : I->users()) 13220b57cec5SDimitry Andric if (Visited.insert(cast<Instruction>(U)).second) 13230b57cec5SDimitry Andric Uses.push_back(std::make_pair(I, cast<Instruction>(U))); 13240b57cec5SDimitry Andric } while (!Uses.empty()); 13250b57cec5SDimitry Andric 13260b57cec5SDimitry Andric return nullptr; 13270b57cec5SDimitry Andric } 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric void visitPHINodeOrSelectInst(Instruction &I) { 13300b57cec5SDimitry Andric assert(isa<PHINode>(I) || isa<SelectInst>(I)); 13310b57cec5SDimitry Andric if (I.use_empty()) 13320b57cec5SDimitry Andric return markAsDead(I); 13330b57cec5SDimitry Andric 133404eeddc0SDimitry Andric // If this is a PHI node before a catchswitch, we cannot insert any non-PHI 133504eeddc0SDimitry Andric // instructions in this BB, which may be required during rewriting. Bail out 133604eeddc0SDimitry Andric // on these cases. 133704eeddc0SDimitry Andric if (isa<PHINode>(I) && 133804eeddc0SDimitry Andric I.getParent()->getFirstInsertionPt() == I.getParent()->end()) 133904eeddc0SDimitry Andric return PI.setAborted(&I); 134004eeddc0SDimitry Andric 134181ad6265SDimitry Andric // TODO: We could use simplifyInstruction here to fold PHINodes and 13420b57cec5SDimitry Andric // SelectInsts. However, doing so requires to change the current 13430b57cec5SDimitry Andric // dead-operand-tracking mechanism. For instance, suppose neither loading 13440b57cec5SDimitry Andric // from %U nor %other traps. Then "load (select undef, %U, %other)" does not 13450b57cec5SDimitry Andric // trap either. However, if we simply replace %U with undef using the 13460b57cec5SDimitry Andric // current dead-operand-tracking mechanism, "load (select undef, undef, 13470b57cec5SDimitry Andric // %other)" may trap because the select may return the first operand 13480b57cec5SDimitry Andric // "undef". 13490b57cec5SDimitry Andric if (Value *Result = foldPHINodeOrSelectInst(I)) { 13500b57cec5SDimitry Andric if (Result == *U) 13510b57cec5SDimitry Andric // If the result of the constant fold will be the pointer, recurse 13520b57cec5SDimitry Andric // through the PHI/select as if we had RAUW'ed it. 13530b57cec5SDimitry Andric enqueueUsers(I); 13540b57cec5SDimitry Andric else 13550b57cec5SDimitry Andric // Otherwise the operand to the PHI/select is dead, and we can replace 135604eeddc0SDimitry Andric // it with poison. 13570b57cec5SDimitry Andric AS.DeadOperands.push_back(U); 13580b57cec5SDimitry Andric 13590b57cec5SDimitry Andric return; 13600b57cec5SDimitry Andric } 13610b57cec5SDimitry Andric 13620b57cec5SDimitry Andric if (!IsOffsetKnown) 13630b57cec5SDimitry Andric return PI.setAborted(&I); 13640b57cec5SDimitry Andric 13650b57cec5SDimitry Andric // See if we already have computed info on this node. 13660b57cec5SDimitry Andric uint64_t &Size = PHIOrSelectSizes[&I]; 13670b57cec5SDimitry Andric if (!Size) { 13680b57cec5SDimitry Andric // This is a new PHI/Select, check for an unsafe use of it. 13690b57cec5SDimitry Andric if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size)) 13700b57cec5SDimitry Andric return PI.setAborted(UnsafeI); 13710b57cec5SDimitry Andric } 13720b57cec5SDimitry Andric 13730b57cec5SDimitry Andric // For PHI and select operands outside the alloca, we can't nuke the entire 13740b57cec5SDimitry Andric // phi or select -- the other side might still be relevant, so we special 13750b57cec5SDimitry Andric // case them here and use a separate structure to track the operands 137604eeddc0SDimitry Andric // themselves which should be replaced with poison. 13770b57cec5SDimitry Andric // FIXME: This should instead be escaped in the event we're instrumenting 13780b57cec5SDimitry Andric // for address sanitization. 13790b57cec5SDimitry Andric if (Offset.uge(AllocSize)) { 13800b57cec5SDimitry Andric AS.DeadOperands.push_back(U); 13810b57cec5SDimitry Andric return; 13820b57cec5SDimitry Andric } 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric insertUse(I, Offset, Size); 13850b57cec5SDimitry Andric } 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); } 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); } 13900b57cec5SDimitry Andric 13910b57cec5SDimitry Andric /// Disable SROA entirely if there are unhandled users of the alloca. 13920b57cec5SDimitry Andric void visitInstruction(Instruction &I) { PI.setAborted(&I); } 13930b57cec5SDimitry Andric }; 13940b57cec5SDimitry Andric 13950b57cec5SDimitry Andric AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) 13960b57cec5SDimitry Andric : 13970b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 13980b57cec5SDimitry Andric AI(AI), 13990b57cec5SDimitry Andric #endif 14000b57cec5SDimitry Andric PointerEscapingInstr(nullptr) { 14010b57cec5SDimitry Andric SliceBuilder PB(DL, AI, *this); 14020b57cec5SDimitry Andric SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); 14030b57cec5SDimitry Andric if (PtrI.isEscaped() || PtrI.isAborted()) { 14040b57cec5SDimitry Andric // FIXME: We should sink the escape vs. abort info into the caller nicely, 14050b57cec5SDimitry Andric // possibly by just storing the PtrInfo in the AllocaSlices. 14060b57cec5SDimitry Andric PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst() 14070b57cec5SDimitry Andric : PtrI.getAbortingInst(); 14080b57cec5SDimitry Andric assert(PointerEscapingInstr && "Did not track a bad instruction"); 14090b57cec5SDimitry Andric return; 14100b57cec5SDimitry Andric } 14110b57cec5SDimitry Andric 1412e8d8bef9SDimitry Andric llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); 14130b57cec5SDimitry Andric 14140b57cec5SDimitry Andric // Sort the uses. This arranges for the offsets to be in ascending order, 14150b57cec5SDimitry Andric // and the sizes to be in descending order. 1416e8d8bef9SDimitry Andric llvm::stable_sort(Slices); 14170b57cec5SDimitry Andric } 14180b57cec5SDimitry Andric 14190b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric void AllocaSlices::print(raw_ostream &OS, const_iterator I, 14220b57cec5SDimitry Andric StringRef Indent) const { 14230b57cec5SDimitry Andric printSlice(OS, I, Indent); 14240b57cec5SDimitry Andric OS << "\n"; 14250b57cec5SDimitry Andric printUse(OS, I, Indent); 14260b57cec5SDimitry Andric } 14270b57cec5SDimitry Andric 14280b57cec5SDimitry Andric void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I, 14290b57cec5SDimitry Andric StringRef Indent) const { 14300b57cec5SDimitry Andric OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")" 14310b57cec5SDimitry Andric << " slice #" << (I - begin()) 14320b57cec5SDimitry Andric << (I->isSplittable() ? " (splittable)" : ""); 14330b57cec5SDimitry Andric } 14340b57cec5SDimitry Andric 14350b57cec5SDimitry Andric void AllocaSlices::printUse(raw_ostream &OS, const_iterator I, 14360b57cec5SDimitry Andric StringRef Indent) const { 14370b57cec5SDimitry Andric OS << Indent << " used by: " << *I->getUse()->getUser() << "\n"; 14380b57cec5SDimitry Andric } 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric void AllocaSlices::print(raw_ostream &OS) const { 14410b57cec5SDimitry Andric if (PointerEscapingInstr) { 14420b57cec5SDimitry Andric OS << "Can't analyze slices for alloca: " << AI << "\n" 14430b57cec5SDimitry Andric << " A pointer to this alloca escaped by:\n" 14440b57cec5SDimitry Andric << " " << *PointerEscapingInstr << "\n"; 14450b57cec5SDimitry Andric return; 14460b57cec5SDimitry Andric } 14470b57cec5SDimitry Andric 14480b57cec5SDimitry Andric OS << "Slices of alloca: " << AI << "\n"; 14490b57cec5SDimitry Andric for (const_iterator I = begin(), E = end(); I != E; ++I) 14500b57cec5SDimitry Andric print(OS, I); 14510b57cec5SDimitry Andric } 14520b57cec5SDimitry Andric 14530b57cec5SDimitry Andric LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const { 14540b57cec5SDimitry Andric print(dbgs(), I); 14550b57cec5SDimitry Andric } 14560b57cec5SDimitry Andric LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); } 14570b57cec5SDimitry Andric 14580b57cec5SDimitry Andric #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 14590b57cec5SDimitry Andric 14600b57cec5SDimitry Andric /// Walk the range of a partitioning looking for a common type to cover this 14610b57cec5SDimitry Andric /// sequence of slices. 1462e8d8bef9SDimitry Andric static std::pair<Type *, IntegerType *> 1463e8d8bef9SDimitry Andric findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, 14640b57cec5SDimitry Andric uint64_t EndOffset) { 14650b57cec5SDimitry Andric Type *Ty = nullptr; 14660b57cec5SDimitry Andric bool TyIsCommon = true; 14670b57cec5SDimitry Andric IntegerType *ITy = nullptr; 14680b57cec5SDimitry Andric 14690b57cec5SDimitry Andric // Note that we need to look at *every* alloca slice's Use to ensure we 14700b57cec5SDimitry Andric // always get consistent results regardless of the order of slices. 14710b57cec5SDimitry Andric for (AllocaSlices::const_iterator I = B; I != E; ++I) { 14720b57cec5SDimitry Andric Use *U = I->getUse(); 14730b57cec5SDimitry Andric if (isa<IntrinsicInst>(*U->getUser())) 14740b57cec5SDimitry Andric continue; 14750b57cec5SDimitry Andric if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset) 14760b57cec5SDimitry Andric continue; 14770b57cec5SDimitry Andric 14780b57cec5SDimitry Andric Type *UserTy = nullptr; 14790b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { 14800b57cec5SDimitry Andric UserTy = LI->getType(); 14810b57cec5SDimitry Andric } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { 14820b57cec5SDimitry Andric UserTy = SI->getValueOperand()->getType(); 14830b57cec5SDimitry Andric } 14840b57cec5SDimitry Andric 14850b57cec5SDimitry Andric if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) { 14860b57cec5SDimitry Andric // If the type is larger than the partition, skip it. We only encounter 14870b57cec5SDimitry Andric // this for split integer operations where we want to use the type of the 14880b57cec5SDimitry Andric // entity causing the split. Also skip if the type is not a byte width 14890b57cec5SDimitry Andric // multiple. 14900b57cec5SDimitry Andric if (UserITy->getBitWidth() % 8 != 0 || 14910b57cec5SDimitry Andric UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset())) 14920b57cec5SDimitry Andric continue; 14930b57cec5SDimitry Andric 14940b57cec5SDimitry Andric // Track the largest bitwidth integer type used in this way in case there 14950b57cec5SDimitry Andric // is no common type. 14960b57cec5SDimitry Andric if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth()) 14970b57cec5SDimitry Andric ITy = UserITy; 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric // To avoid depending on the order of slices, Ty and TyIsCommon must not 15010b57cec5SDimitry Andric // depend on types skipped above. 15020b57cec5SDimitry Andric if (!UserTy || (Ty && Ty != UserTy)) 15030b57cec5SDimitry Andric TyIsCommon = false; // Give up on anything but an iN type. 15040b57cec5SDimitry Andric else 15050b57cec5SDimitry Andric Ty = UserTy; 15060b57cec5SDimitry Andric } 15070b57cec5SDimitry Andric 1508e8d8bef9SDimitry Andric return {TyIsCommon ? Ty : nullptr, ITy}; 15090b57cec5SDimitry Andric } 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric /// PHI instructions that use an alloca and are subsequently loaded can be 15120b57cec5SDimitry Andric /// rewritten to load both input pointers in the pred blocks and then PHI the 15130b57cec5SDimitry Andric /// results, allowing the load of the alloca to be promoted. 15140b57cec5SDimitry Andric /// From this: 15150b57cec5SDimitry Andric /// %P2 = phi [i32* %Alloca, i32* %Other] 15160b57cec5SDimitry Andric /// %V = load i32* %P2 15170b57cec5SDimitry Andric /// to: 15180b57cec5SDimitry Andric /// %V1 = load i32* %Alloca -> will be mem2reg'd 15190b57cec5SDimitry Andric /// ... 15200b57cec5SDimitry Andric /// %V2 = load i32* %Other 15210b57cec5SDimitry Andric /// ... 15220b57cec5SDimitry Andric /// %V = phi [i32 %V1, i32 %V2] 15230b57cec5SDimitry Andric /// 15240b57cec5SDimitry Andric /// We can do this to a select if its only uses are loads and if the operands 15250b57cec5SDimitry Andric /// to the select can be loaded unconditionally. 15260b57cec5SDimitry Andric /// 15270b57cec5SDimitry Andric /// FIXME: This should be hoisted into a generic utility, likely in 15280b57cec5SDimitry Andric /// Transforms/Util/Local.h 15290b57cec5SDimitry Andric static bool isSafePHIToSpeculate(PHINode &PN) { 1530*0fca6ea1SDimitry Andric const DataLayout &DL = PN.getDataLayout(); 15310b57cec5SDimitry Andric 15320b57cec5SDimitry Andric // For now, we can only do this promotion if the load is in the same block 15330b57cec5SDimitry Andric // as the PHI, and if there are no stores between the phi and load. 15340b57cec5SDimitry Andric // TODO: Allow recursive phi users. 15350b57cec5SDimitry Andric // TODO: Allow stores. 15360b57cec5SDimitry Andric BasicBlock *BB = PN.getParent(); 15375ffd83dbSDimitry Andric Align MaxAlign; 15380b57cec5SDimitry Andric uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType()); 1539f3fd488fSDimitry Andric Type *LoadType = nullptr; 15400b57cec5SDimitry Andric for (User *U : PN.users()) { 15410b57cec5SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(U); 15420b57cec5SDimitry Andric if (!LI || !LI->isSimple()) 15430b57cec5SDimitry Andric return false; 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric // For now we only allow loads in the same block as the PHI. This is 15460b57cec5SDimitry Andric // a common case that happens when instcombine merges two loads through 15470b57cec5SDimitry Andric // a PHI. 15480b57cec5SDimitry Andric if (LI->getParent() != BB) 15490b57cec5SDimitry Andric return false; 15500b57cec5SDimitry Andric 1551f3fd488fSDimitry Andric if (LoadType) { 1552f3fd488fSDimitry Andric if (LoadType != LI->getType()) 1553f3fd488fSDimitry Andric return false; 1554f3fd488fSDimitry Andric } else { 1555f3fd488fSDimitry Andric LoadType = LI->getType(); 1556f3fd488fSDimitry Andric } 1557f3fd488fSDimitry Andric 15580b57cec5SDimitry Andric // Ensure that there are no instructions between the PHI and the load that 15590b57cec5SDimitry Andric // could store. 15600b57cec5SDimitry Andric for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI) 15610b57cec5SDimitry Andric if (BBI->mayWriteToMemory()) 15620b57cec5SDimitry Andric return false; 15630b57cec5SDimitry Andric 15645ffd83dbSDimitry Andric MaxAlign = std::max(MaxAlign, LI->getAlign()); 15650b57cec5SDimitry Andric } 15660b57cec5SDimitry Andric 1567f3fd488fSDimitry Andric if (!LoadType) 15680b57cec5SDimitry Andric return false; 15690b57cec5SDimitry Andric 1570bdd1243dSDimitry Andric APInt LoadSize = 1571bdd1243dSDimitry Andric APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue()); 1572f3fd488fSDimitry Andric 15730b57cec5SDimitry Andric // We can only transform this if it is safe to push the loads into the 15740b57cec5SDimitry Andric // predecessor blocks. The only thing to watch out for is that we can't put 15750b57cec5SDimitry Andric // a possibly trapping load in the predecessor if it is a critical edge. 15760b57cec5SDimitry Andric for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { 15770b57cec5SDimitry Andric Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator(); 15780b57cec5SDimitry Andric Value *InVal = PN.getIncomingValue(Idx); 15790b57cec5SDimitry Andric 15800b57cec5SDimitry Andric // If the value is produced by the terminator of the predecessor (an 15810b57cec5SDimitry Andric // invoke) or it has side-effects, there is no valid place to put a load 15820b57cec5SDimitry Andric // in the predecessor. 15830b57cec5SDimitry Andric if (TI == InVal || TI->mayHaveSideEffects()) 15840b57cec5SDimitry Andric return false; 15850b57cec5SDimitry Andric 15860b57cec5SDimitry Andric // If the predecessor has a single successor, then the edge isn't 15870b57cec5SDimitry Andric // critical. 15880b57cec5SDimitry Andric if (TI->getNumSuccessors() == 1) 15890b57cec5SDimitry Andric continue; 15900b57cec5SDimitry Andric 15910b57cec5SDimitry Andric // If this pointer is always safe to load, or if we can prove that there 15920b57cec5SDimitry Andric // is already a load in the block, then we can move the load to the pred 15930b57cec5SDimitry Andric // block. 1594f3fd488fSDimitry Andric if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI)) 15950b57cec5SDimitry Andric continue; 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric return false; 15980b57cec5SDimitry Andric } 15990b57cec5SDimitry Andric 16000b57cec5SDimitry Andric return true; 16010b57cec5SDimitry Andric } 16020b57cec5SDimitry Andric 160304eeddc0SDimitry Andric static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) { 16040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); 16050b57cec5SDimitry Andric 16060b57cec5SDimitry Andric LoadInst *SomeLoad = cast<LoadInst>(PN.user_back()); 16070b57cec5SDimitry Andric Type *LoadTy = SomeLoad->getType(); 160804eeddc0SDimitry Andric IRB.SetInsertPoint(&PN); 160904eeddc0SDimitry Andric PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(), 16100b57cec5SDimitry Andric PN.getName() + ".sroa.speculated"); 16110b57cec5SDimitry Andric 16128bcb0991SDimitry Andric // Get the AA tags and alignment to use from one of the loads. It does not 16130b57cec5SDimitry Andric // matter which one we get and if any differ. 1614349cc55cSDimitry Andric AAMDNodes AATags = SomeLoad->getAAMetadata(); 16155ffd83dbSDimitry Andric Align Alignment = SomeLoad->getAlign(); 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric // Rewrite all loads of the PN to use the new PHI. 16180b57cec5SDimitry Andric while (!PN.use_empty()) { 16190b57cec5SDimitry Andric LoadInst *LI = cast<LoadInst>(PN.user_back()); 16200b57cec5SDimitry Andric LI->replaceAllUsesWith(NewPN); 16210b57cec5SDimitry Andric LI->eraseFromParent(); 16220b57cec5SDimitry Andric } 16230b57cec5SDimitry Andric 16240b57cec5SDimitry Andric // Inject loads into all of the pred blocks. 16250b57cec5SDimitry Andric DenseMap<BasicBlock *, Value *> InjectedLoads; 16260b57cec5SDimitry Andric for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { 16270b57cec5SDimitry Andric BasicBlock *Pred = PN.getIncomingBlock(Idx); 16280b57cec5SDimitry Andric Value *InVal = PN.getIncomingValue(Idx); 16290b57cec5SDimitry Andric 16300b57cec5SDimitry Andric // A PHI node is allowed to have multiple (duplicated) entries for the same 16310b57cec5SDimitry Andric // basic block, as long as the value is the same. So if we already injected 16320b57cec5SDimitry Andric // a load in the predecessor, then we should reuse the same load for all 16330b57cec5SDimitry Andric // duplicated entries. 16340b57cec5SDimitry Andric if (Value *V = InjectedLoads.lookup(Pred)) { 16350b57cec5SDimitry Andric NewPN->addIncoming(V, Pred); 16360b57cec5SDimitry Andric continue; 16370b57cec5SDimitry Andric } 16380b57cec5SDimitry Andric 16390b57cec5SDimitry Andric Instruction *TI = Pred->getTerminator(); 164004eeddc0SDimitry Andric IRB.SetInsertPoint(TI); 16410b57cec5SDimitry Andric 164204eeddc0SDimitry Andric LoadInst *Load = IRB.CreateAlignedLoad( 16435ffd83dbSDimitry Andric LoadTy, InVal, Alignment, 16440b57cec5SDimitry Andric (PN.getName() + ".sroa.speculate.load." + Pred->getName())); 16450b57cec5SDimitry Andric ++NumLoadsSpeculated; 16460b57cec5SDimitry Andric if (AATags) 16470b57cec5SDimitry Andric Load->setAAMetadata(AATags); 16480b57cec5SDimitry Andric NewPN->addIncoming(Load, Pred); 16490b57cec5SDimitry Andric InjectedLoads[Pred] = Load; 16500b57cec5SDimitry Andric } 16510b57cec5SDimitry Andric 16520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); 16530b57cec5SDimitry Andric PN.eraseFromParent(); 16540b57cec5SDimitry Andric } 16550b57cec5SDimitry Andric 16565f757f3fSDimitry Andric SelectHandSpeculativity & 16575f757f3fSDimitry Andric SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) { 1658bdd1243dSDimitry Andric if (isTrueVal) 16595f757f3fSDimitry Andric Bitfield::set<SelectHandSpeculativity::TrueVal>(Storage, true); 1660bdd1243dSDimitry Andric else 16615f757f3fSDimitry Andric Bitfield::set<SelectHandSpeculativity::FalseVal>(Storage, true); 1662bdd1243dSDimitry Andric return *this; 1663bdd1243dSDimitry Andric } 1664bdd1243dSDimitry Andric 16655f757f3fSDimitry Andric bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const { 16665f757f3fSDimitry Andric return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage) 16675f757f3fSDimitry Andric : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage); 1668bdd1243dSDimitry Andric } 1669bdd1243dSDimitry Andric 16705f757f3fSDimitry Andric bool SelectHandSpeculativity::areAllSpeculatable() const { 1671bdd1243dSDimitry Andric return isSpeculatable(/*isTrueVal=*/true) && 1672bdd1243dSDimitry Andric isSpeculatable(/*isTrueVal=*/false); 1673bdd1243dSDimitry Andric } 1674bdd1243dSDimitry Andric 16755f757f3fSDimitry Andric bool SelectHandSpeculativity::areAnySpeculatable() const { 1676bdd1243dSDimitry Andric return isSpeculatable(/*isTrueVal=*/true) || 1677bdd1243dSDimitry Andric isSpeculatable(/*isTrueVal=*/false); 1678bdd1243dSDimitry Andric } 16795f757f3fSDimitry Andric bool SelectHandSpeculativity::areNoneSpeculatable() const { 1680bdd1243dSDimitry Andric return !areAnySpeculatable(); 1681bdd1243dSDimitry Andric } 1682bdd1243dSDimitry Andric 16835f757f3fSDimitry Andric static SelectHandSpeculativity 1684bdd1243dSDimitry Andric isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG) { 1685bdd1243dSDimitry Andric assert(LI.isSimple() && "Only for simple loads"); 16865f757f3fSDimitry Andric SelectHandSpeculativity Spec; 1687bdd1243dSDimitry Andric 1688*0fca6ea1SDimitry Andric const DataLayout &DL = SI.getDataLayout(); 1689bdd1243dSDimitry Andric for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()}) 1690bdd1243dSDimitry Andric if (isSafeToLoadUnconditionally(Value, LI.getType(), LI.getAlign(), DL, 1691bdd1243dSDimitry Andric &LI)) 1692bdd1243dSDimitry Andric Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue()); 1693bdd1243dSDimitry Andric else if (PreserveCFG) 1694bdd1243dSDimitry Andric return Spec; 1695bdd1243dSDimitry Andric 1696bdd1243dSDimitry Andric return Spec; 1697bdd1243dSDimitry Andric } 1698bdd1243dSDimitry Andric 16995f757f3fSDimitry Andric std::optional<RewriteableMemOps> 17005f757f3fSDimitry Andric SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) { 1701bdd1243dSDimitry Andric RewriteableMemOps Ops; 17020b57cec5SDimitry Andric 17030b57cec5SDimitry Andric for (User *U : SI.users()) { 1704bdd1243dSDimitry Andric if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse()) 1705bdd1243dSDimitry Andric U = *BC->user_begin(); 1706349cc55cSDimitry Andric 1707bdd1243dSDimitry Andric if (auto *Store = dyn_cast<StoreInst>(U)) { 1708bdd1243dSDimitry Andric // Note that atomic stores can be transformed; atomic semantics do not 1709bdd1243dSDimitry Andric // have any meaning for a local alloca. Stores are not speculatable, 1710bdd1243dSDimitry Andric // however, so if we can't turn it into a predicated store, we are done. 1711bdd1243dSDimitry Andric if (Store->isVolatile() || PreserveCFG) 1712bdd1243dSDimitry Andric return {}; // Give up on this `select`. 1713bdd1243dSDimitry Andric Ops.emplace_back(Store); 1714bdd1243dSDimitry Andric continue; 17150b57cec5SDimitry Andric } 17160b57cec5SDimitry Andric 1717bdd1243dSDimitry Andric auto *LI = dyn_cast<LoadInst>(U); 1718bdd1243dSDimitry Andric 1719bdd1243dSDimitry Andric // Note that atomic loads can be transformed; 1720bdd1243dSDimitry Andric // atomic semantics do not have any meaning for a local alloca. 1721bdd1243dSDimitry Andric if (!LI || LI->isVolatile()) 1722bdd1243dSDimitry Andric return {}; // Give up on this `select`. 1723bdd1243dSDimitry Andric 1724bdd1243dSDimitry Andric PossiblySpeculatableLoad Load(LI); 1725bdd1243dSDimitry Andric if (!LI->isSimple()) { 1726bdd1243dSDimitry Andric // If the `load` is not simple, we can't speculatively execute it, 1727bdd1243dSDimitry Andric // but we could handle this via a CFG modification. But can we? 1728bdd1243dSDimitry Andric if (PreserveCFG) 1729bdd1243dSDimitry Andric return {}; // Give up on this `select`. 1730bdd1243dSDimitry Andric Ops.emplace_back(Load); 1731bdd1243dSDimitry Andric continue; 17320b57cec5SDimitry Andric } 17330b57cec5SDimitry Andric 17345f757f3fSDimitry Andric SelectHandSpeculativity Spec = 1735bdd1243dSDimitry Andric isSafeLoadOfSelectToSpeculate(*LI, SI, PreserveCFG); 1736bdd1243dSDimitry Andric if (PreserveCFG && !Spec.areAllSpeculatable()) 1737bdd1243dSDimitry Andric return {}; // Give up on this `select`. 17380b57cec5SDimitry Andric 1739bdd1243dSDimitry Andric Load.setInt(Spec); 1740bdd1243dSDimitry Andric Ops.emplace_back(Load); 1741bdd1243dSDimitry Andric } 1742bdd1243dSDimitry Andric 1743bdd1243dSDimitry Andric return Ops; 1744bdd1243dSDimitry Andric } 1745bdd1243dSDimitry Andric 1746bdd1243dSDimitry Andric static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, 1747bdd1243dSDimitry Andric IRBuilderTy &IRB) { 1748bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " original load: " << SI << "\n"); 1749bdd1243dSDimitry Andric 17500b57cec5SDimitry Andric Value *TV = SI.getTrueValue(); 17510b57cec5SDimitry Andric Value *FV = SI.getFalseValue(); 1752bdd1243dSDimitry Andric // Replace the given load of the select with a select of two loads. 1753bdd1243dSDimitry Andric 1754bdd1243dSDimitry Andric assert(LI.isSimple() && "We only speculate simple loads"); 1755bdd1243dSDimitry Andric 1756bdd1243dSDimitry Andric IRB.SetInsertPoint(&LI); 1757bdd1243dSDimitry Andric 1758bdd1243dSDimitry Andric LoadInst *TL = 1759bdd1243dSDimitry Andric IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(), 1760bdd1243dSDimitry Andric LI.getName() + ".sroa.speculate.load.true"); 1761bdd1243dSDimitry Andric LoadInst *FL = 1762bdd1243dSDimitry Andric IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(), 1763bdd1243dSDimitry Andric LI.getName() + ".sroa.speculate.load.false"); 17640b57cec5SDimitry Andric NumLoadsSpeculated += 2; 17650b57cec5SDimitry Andric 17660b57cec5SDimitry Andric // Transfer alignment and AA info if present. 1767bdd1243dSDimitry Andric TL->setAlignment(LI.getAlign()); 1768bdd1243dSDimitry Andric FL->setAlignment(LI.getAlign()); 17690b57cec5SDimitry Andric 1770bdd1243dSDimitry Andric AAMDNodes Tags = LI.getAAMetadata(); 17710b57cec5SDimitry Andric if (Tags) { 17720b57cec5SDimitry Andric TL->setAAMetadata(Tags); 17730b57cec5SDimitry Andric FL->setAAMetadata(Tags); 17740b57cec5SDimitry Andric } 17750b57cec5SDimitry Andric 17760b57cec5SDimitry Andric Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, 1777bdd1243dSDimitry Andric LI.getName() + ".sroa.speculated"); 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n"); 1780bdd1243dSDimitry Andric LI.replaceAllUsesWith(V); 17810b57cec5SDimitry Andric } 1782bdd1243dSDimitry Andric 1783bdd1243dSDimitry Andric template <typename T> 1784bdd1243dSDimitry Andric static void rewriteMemOpOfSelect(SelectInst &SI, T &I, 17855f757f3fSDimitry Andric SelectHandSpeculativity Spec, 1786bdd1243dSDimitry Andric DomTreeUpdater &DTU) { 1787bdd1243dSDimitry Andric assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!"); 1788bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n"); 1789bdd1243dSDimitry Andric BasicBlock *Head = I.getParent(); 1790bdd1243dSDimitry Andric Instruction *ThenTerm = nullptr; 1791bdd1243dSDimitry Andric Instruction *ElseTerm = nullptr; 1792bdd1243dSDimitry Andric if (Spec.areNoneSpeculatable()) 1793bdd1243dSDimitry Andric SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm, 1794bdd1243dSDimitry Andric SI.getMetadata(LLVMContext::MD_prof), &DTU); 1795bdd1243dSDimitry Andric else { 1796bdd1243dSDimitry Andric SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false, 1797bdd1243dSDimitry Andric SI.getMetadata(LLVMContext::MD_prof), &DTU, 1798bdd1243dSDimitry Andric /*LI=*/nullptr, /*ThenBlock=*/nullptr); 1799bdd1243dSDimitry Andric if (Spec.isSpeculatable(/*isTrueVal=*/true)) 1800bdd1243dSDimitry Andric cast<BranchInst>(Head->getTerminator())->swapSuccessors(); 1801bdd1243dSDimitry Andric } 1802bdd1243dSDimitry Andric auto *HeadBI = cast<BranchInst>(Head->getTerminator()); 1803bdd1243dSDimitry Andric Spec = {}; // Do not use `Spec` beyond this point. 1804bdd1243dSDimitry Andric BasicBlock *Tail = I.getParent(); 1805bdd1243dSDimitry Andric Tail->setName(Head->getName() + ".cont"); 1806bdd1243dSDimitry Andric PHINode *PN; 1807bdd1243dSDimitry Andric if (isa<LoadInst>(I)) 1808*0fca6ea1SDimitry Andric PN = PHINode::Create(I.getType(), 2, "", I.getIterator()); 1809bdd1243dSDimitry Andric for (BasicBlock *SuccBB : successors(Head)) { 1810bdd1243dSDimitry Andric bool IsThen = SuccBB == HeadBI->getSuccessor(0); 1811bdd1243dSDimitry Andric int SuccIdx = IsThen ? 0 : 1; 1812bdd1243dSDimitry Andric auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB; 18139e7101a8SDimitry Andric auto &CondMemOp = cast<T>(*I.clone()); 1814bdd1243dSDimitry Andric if (NewMemOpBB != Head) { 1815bdd1243dSDimitry Andric NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else")); 1816bdd1243dSDimitry Andric if (isa<LoadInst>(I)) 1817bdd1243dSDimitry Andric ++NumLoadsPredicated; 1818bdd1243dSDimitry Andric else 1819bdd1243dSDimitry Andric ++NumStoresPredicated; 18209e7101a8SDimitry Andric } else { 182106c3fb27SDimitry Andric CondMemOp.dropUBImplyingAttrsAndMetadata(); 1822bdd1243dSDimitry Andric ++NumLoadsSpeculated; 18239e7101a8SDimitry Andric } 1824bdd1243dSDimitry Andric CondMemOp.insertBefore(NewMemOpBB->getTerminator()); 1825bdd1243dSDimitry Andric Value *Ptr = SI.getOperand(1 + SuccIdx); 1826bdd1243dSDimitry Andric CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr); 1827bdd1243dSDimitry Andric if (isa<LoadInst>(I)) { 1828bdd1243dSDimitry Andric CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val"); 1829bdd1243dSDimitry Andric PN->addIncoming(&CondMemOp, NewMemOpBB); 1830bdd1243dSDimitry Andric } else 1831bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n"); 1832bdd1243dSDimitry Andric } 1833bdd1243dSDimitry Andric if (isa<LoadInst>(I)) { 1834bdd1243dSDimitry Andric PN->takeName(&I); 1835bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *PN << "\n"); 1836bdd1243dSDimitry Andric I.replaceAllUsesWith(PN); 1837bdd1243dSDimitry Andric } 1838bdd1243dSDimitry Andric } 1839bdd1243dSDimitry Andric 1840bdd1243dSDimitry Andric static void rewriteMemOpOfSelect(SelectInst &SelInst, Instruction &I, 18415f757f3fSDimitry Andric SelectHandSpeculativity Spec, 1842bdd1243dSDimitry Andric DomTreeUpdater &DTU) { 1843bdd1243dSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) 1844bdd1243dSDimitry Andric rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU); 1845bdd1243dSDimitry Andric else if (auto *SI = dyn_cast<StoreInst>(&I)) 1846bdd1243dSDimitry Andric rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU); 1847bdd1243dSDimitry Andric else 1848bdd1243dSDimitry Andric llvm_unreachable_internal("Only for load and store."); 1849bdd1243dSDimitry Andric } 1850bdd1243dSDimitry Andric 1851bdd1243dSDimitry Andric static bool rewriteSelectInstMemOps(SelectInst &SI, 18525f757f3fSDimitry Andric const RewriteableMemOps &Ops, 1853bdd1243dSDimitry Andric IRBuilderTy &IRB, DomTreeUpdater *DTU) { 1854bdd1243dSDimitry Andric bool CFGChanged = false; 1855bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " original select: " << SI << "\n"); 1856bdd1243dSDimitry Andric 1857bdd1243dSDimitry Andric for (const RewriteableMemOp &Op : Ops) { 18585f757f3fSDimitry Andric SelectHandSpeculativity Spec; 1859bdd1243dSDimitry Andric Instruction *I; 1860bdd1243dSDimitry Andric if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) { 1861bdd1243dSDimitry Andric I = *US; 1862bdd1243dSDimitry Andric } else { 1863bdd1243dSDimitry Andric auto PSL = std::get<PossiblySpeculatableLoad>(Op); 1864bdd1243dSDimitry Andric I = PSL.getPointer(); 1865bdd1243dSDimitry Andric Spec = PSL.getInt(); 1866bdd1243dSDimitry Andric } 1867bdd1243dSDimitry Andric if (Spec.areAllSpeculatable()) { 1868bdd1243dSDimitry Andric speculateSelectInstLoads(SI, cast<LoadInst>(*I), IRB); 1869bdd1243dSDimitry Andric } else { 1870bdd1243dSDimitry Andric assert(DTU && "Should not get here when not allowed to modify the CFG!"); 1871bdd1243dSDimitry Andric rewriteMemOpOfSelect(SI, *I, Spec, *DTU); 1872bdd1243dSDimitry Andric CFGChanged = true; 1873bdd1243dSDimitry Andric } 1874bdd1243dSDimitry Andric I->eraseFromParent(); 1875bdd1243dSDimitry Andric } 1876bdd1243dSDimitry Andric 1877bdd1243dSDimitry Andric for (User *U : make_early_inc_range(SI.users())) 1878bdd1243dSDimitry Andric cast<BitCastInst>(U)->eraseFromParent(); 18790b57cec5SDimitry Andric SI.eraseFromParent(); 1880bdd1243dSDimitry Andric return CFGChanged; 18810b57cec5SDimitry Andric } 18820b57cec5SDimitry Andric 18830b57cec5SDimitry Andric /// Compute an adjusted pointer from Ptr by Offset bytes where the 18840b57cec5SDimitry Andric /// resulting pointer has PointerTy. 18850b57cec5SDimitry Andric static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, 1886e8d8bef9SDimitry Andric APInt Offset, Type *PointerTy, 1887e8d8bef9SDimitry Andric const Twine &NamePrefix) { 1888349cc55cSDimitry Andric if (Offset != 0) 18897a6dacacSDimitry Andric Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset), 1890349cc55cSDimitry Andric NamePrefix + "sroa_idx"); 1891349cc55cSDimitry Andric return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy, 1892349cc55cSDimitry Andric NamePrefix + "sroa_cast"); 1893349cc55cSDimitry Andric } 1894349cc55cSDimitry Andric 18950b57cec5SDimitry Andric /// Compute the adjusted alignment for a load or store from an offset. 18965ffd83dbSDimitry Andric static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) { 18975ffd83dbSDimitry Andric return commonAlignment(getLoadStoreAlignment(I), Offset); 18980b57cec5SDimitry Andric } 18990b57cec5SDimitry Andric 19000b57cec5SDimitry Andric /// Test whether we can convert a value from the old to the new type. 19010b57cec5SDimitry Andric /// 19020b57cec5SDimitry Andric /// This predicate should be used to guard calls to convertValue in order to 19030b57cec5SDimitry Andric /// ensure that we only try to convert viable values. The strategy is that we 19040b57cec5SDimitry Andric /// will peel off single element struct and array wrappings to get to an 19050b57cec5SDimitry Andric /// underlying value, and convert that value. 19060b57cec5SDimitry Andric static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { 19070b57cec5SDimitry Andric if (OldTy == NewTy) 19080b57cec5SDimitry Andric return true; 19090b57cec5SDimitry Andric 19100b57cec5SDimitry Andric // For integer types, we can't handle any bit-width differences. This would 19110b57cec5SDimitry Andric // break both vector conversions with extension and introduce endianness 19120b57cec5SDimitry Andric // issues when in conjunction with loads and stores. 19130b57cec5SDimitry Andric if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) { 19140b57cec5SDimitry Andric assert(cast<IntegerType>(OldTy)->getBitWidth() != 19150b57cec5SDimitry Andric cast<IntegerType>(NewTy)->getBitWidth() && 19160b57cec5SDimitry Andric "We can't have the same bitwidth for different int types"); 19170b57cec5SDimitry Andric return false; 19180b57cec5SDimitry Andric } 19190b57cec5SDimitry Andric 1920bdd1243dSDimitry Andric if (DL.getTypeSizeInBits(NewTy).getFixedValue() != 1921bdd1243dSDimitry Andric DL.getTypeSizeInBits(OldTy).getFixedValue()) 19220b57cec5SDimitry Andric return false; 19230b57cec5SDimitry Andric if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) 19240b57cec5SDimitry Andric return false; 19250b57cec5SDimitry Andric 19260b57cec5SDimitry Andric // We can convert pointers to integers and vice-versa. Same for vectors 19270b57cec5SDimitry Andric // of pointers and integers. 19280b57cec5SDimitry Andric OldTy = OldTy->getScalarType(); 19290b57cec5SDimitry Andric NewTy = NewTy->getScalarType(); 19300b57cec5SDimitry Andric if (NewTy->isPointerTy() || OldTy->isPointerTy()) { 19310b57cec5SDimitry Andric if (NewTy->isPointerTy() && OldTy->isPointerTy()) { 19325ffd83dbSDimitry Andric unsigned OldAS = OldTy->getPointerAddressSpace(); 19335ffd83dbSDimitry Andric unsigned NewAS = NewTy->getPointerAddressSpace(); 19345ffd83dbSDimitry Andric // Convert pointers if they are pointers from the same address space or 19355ffd83dbSDimitry Andric // different integral (not non-integral) address spaces with the same 19365ffd83dbSDimitry Andric // pointer size. 19375ffd83dbSDimitry Andric return OldAS == NewAS || 19385ffd83dbSDimitry Andric (!DL.isNonIntegralAddressSpace(OldAS) && 19395ffd83dbSDimitry Andric !DL.isNonIntegralAddressSpace(NewAS) && 19405ffd83dbSDimitry Andric DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS)); 19410b57cec5SDimitry Andric } 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric // We can convert integers to integral pointers, but not to non-integral 19440b57cec5SDimitry Andric // pointers. 19450b57cec5SDimitry Andric if (OldTy->isIntegerTy()) 19460b57cec5SDimitry Andric return !DL.isNonIntegralPointerType(NewTy); 19470b57cec5SDimitry Andric 19480b57cec5SDimitry Andric // We can convert integral pointers to integers, but non-integral pointers 19490b57cec5SDimitry Andric // need to remain pointers. 19500b57cec5SDimitry Andric if (!DL.isNonIntegralPointerType(OldTy)) 19510b57cec5SDimitry Andric return NewTy->isIntegerTy(); 19520b57cec5SDimitry Andric 19530b57cec5SDimitry Andric return false; 19540b57cec5SDimitry Andric } 19550b57cec5SDimitry Andric 1956bdd1243dSDimitry Andric if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy()) 1957bdd1243dSDimitry Andric return false; 1958bdd1243dSDimitry Andric 19590b57cec5SDimitry Andric return true; 19600b57cec5SDimitry Andric } 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric /// Generic routine to convert an SSA value to a value of a different 19630b57cec5SDimitry Andric /// type. 19640b57cec5SDimitry Andric /// 19650b57cec5SDimitry Andric /// This will try various different casting techniques, such as bitcasts, 19660b57cec5SDimitry Andric /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test 19670b57cec5SDimitry Andric /// two types for viability with this routine. 19680b57cec5SDimitry Andric static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, 19690b57cec5SDimitry Andric Type *NewTy) { 19700b57cec5SDimitry Andric Type *OldTy = V->getType(); 19710b57cec5SDimitry Andric assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type"); 19720b57cec5SDimitry Andric 19730b57cec5SDimitry Andric if (OldTy == NewTy) 19740b57cec5SDimitry Andric return V; 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) && 19770b57cec5SDimitry Andric "Integer types must be the exact same to convert."); 19780b57cec5SDimitry Andric 19795ffd83dbSDimitry Andric // See if we need inttoptr for this type pair. May require additional bitcast. 19800b57cec5SDimitry Andric if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { 19810b57cec5SDimitry Andric // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* 19820b57cec5SDimitry Andric // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*> 19835ffd83dbSDimitry Andric // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*> 19845ffd83dbSDimitry Andric // Directly handle i64 to i8* 19850b57cec5SDimitry Andric return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), 19860b57cec5SDimitry Andric NewTy); 19870b57cec5SDimitry Andric } 19880b57cec5SDimitry Andric 19895ffd83dbSDimitry Andric // See if we need ptrtoint for this type pair. May require additional bitcast. 19900b57cec5SDimitry Andric if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) { 19910b57cec5SDimitry Andric // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 19920b57cec5SDimitry Andric // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32> 19935ffd83dbSDimitry Andric // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32> 19945ffd83dbSDimitry Andric // Expand i8* to i64 --> i8* to i64 to i64 19950b57cec5SDimitry Andric return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), 19960b57cec5SDimitry Andric NewTy); 19975ffd83dbSDimitry Andric } 19980b57cec5SDimitry Andric 19995ffd83dbSDimitry Andric if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) { 20005ffd83dbSDimitry Andric unsigned OldAS = OldTy->getPointerAddressSpace(); 20015ffd83dbSDimitry Andric unsigned NewAS = NewTy->getPointerAddressSpace(); 20025ffd83dbSDimitry Andric // To convert pointers with different address spaces (they are already 20035ffd83dbSDimitry Andric // checked convertible, i.e. they have the same pointer size), so far we 20045ffd83dbSDimitry Andric // cannot use `bitcast` (which has restrict on the same address space) or 20055ffd83dbSDimitry Andric // `addrspacecast` (which is not always no-op casting). Instead, use a pair 20065ffd83dbSDimitry Andric // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit 20075ffd83dbSDimitry Andric // size. 20085ffd83dbSDimitry Andric if (OldAS != NewAS) { 20095ffd83dbSDimitry Andric assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS)); 20105ffd83dbSDimitry Andric return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), 20115ffd83dbSDimitry Andric NewTy); 20125ffd83dbSDimitry Andric } 20130b57cec5SDimitry Andric } 20140b57cec5SDimitry Andric 20150b57cec5SDimitry Andric return IRB.CreateBitCast(V, NewTy); 20160b57cec5SDimitry Andric } 20170b57cec5SDimitry Andric 20180b57cec5SDimitry Andric /// Test whether the given slice use can be promoted to a vector. 20190b57cec5SDimitry Andric /// 20200b57cec5SDimitry Andric /// This function is called to test each entry in a partition which is slated 20210b57cec5SDimitry Andric /// for a single slice. 20220b57cec5SDimitry Andric static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, 20230b57cec5SDimitry Andric VectorType *Ty, 20240b57cec5SDimitry Andric uint64_t ElementSize, 20250b57cec5SDimitry Andric const DataLayout &DL) { 20260b57cec5SDimitry Andric // First validate the slice offsets. 20270b57cec5SDimitry Andric uint64_t BeginOffset = 20280b57cec5SDimitry Andric std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset(); 20290b57cec5SDimitry Andric uint64_t BeginIndex = BeginOffset / ElementSize; 20300b57cec5SDimitry Andric if (BeginIndex * ElementSize != BeginOffset || 20315ffd83dbSDimitry Andric BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements()) 20320b57cec5SDimitry Andric return false; 2033*0fca6ea1SDimitry Andric uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset(); 20340b57cec5SDimitry Andric uint64_t EndIndex = EndOffset / ElementSize; 20355ffd83dbSDimitry Andric if (EndIndex * ElementSize != EndOffset || 20365ffd83dbSDimitry Andric EndIndex > cast<FixedVectorType>(Ty)->getNumElements()) 20370b57cec5SDimitry Andric return false; 20380b57cec5SDimitry Andric 20390b57cec5SDimitry Andric assert(EndIndex > BeginIndex && "Empty vector!"); 20400b57cec5SDimitry Andric uint64_t NumElements = EndIndex - BeginIndex; 20410b57cec5SDimitry Andric Type *SliceTy = (NumElements == 1) 20420b57cec5SDimitry Andric ? Ty->getElementType() 20435ffd83dbSDimitry Andric : FixedVectorType::get(Ty->getElementType(), NumElements); 20440b57cec5SDimitry Andric 20450b57cec5SDimitry Andric Type *SplitIntTy = 20460b57cec5SDimitry Andric Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); 20470b57cec5SDimitry Andric 20480b57cec5SDimitry Andric Use *U = S.getUse(); 20490b57cec5SDimitry Andric 20500b57cec5SDimitry Andric if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { 20510b57cec5SDimitry Andric if (MI->isVolatile()) 20520b57cec5SDimitry Andric return false; 20530b57cec5SDimitry Andric if (!S.isSplittable()) 20540b57cec5SDimitry Andric return false; // Skip any unsplittable intrinsics. 20550b57cec5SDimitry Andric } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { 2056e8d8bef9SDimitry Andric if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) 20570b57cec5SDimitry Andric return false; 20580b57cec5SDimitry Andric } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { 20590b57cec5SDimitry Andric if (LI->isVolatile()) 20600b57cec5SDimitry Andric return false; 20610b57cec5SDimitry Andric Type *LTy = LI->getType(); 2062349cc55cSDimitry Andric // Disable vector promotion when there are loads or stores of an FCA. 2063349cc55cSDimitry Andric if (LTy->isStructTy()) 2064349cc55cSDimitry Andric return false; 20650b57cec5SDimitry Andric if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { 20660b57cec5SDimitry Andric assert(LTy->isIntegerTy()); 20670b57cec5SDimitry Andric LTy = SplitIntTy; 20680b57cec5SDimitry Andric } 20690b57cec5SDimitry Andric if (!canConvertValue(DL, SliceTy, LTy)) 20700b57cec5SDimitry Andric return false; 20710b57cec5SDimitry Andric } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { 20720b57cec5SDimitry Andric if (SI->isVolatile()) 20730b57cec5SDimitry Andric return false; 20740b57cec5SDimitry Andric Type *STy = SI->getValueOperand()->getType(); 2075349cc55cSDimitry Andric // Disable vector promotion when there are loads or stores of an FCA. 2076349cc55cSDimitry Andric if (STy->isStructTy()) 2077349cc55cSDimitry Andric return false; 20780b57cec5SDimitry Andric if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { 20790b57cec5SDimitry Andric assert(STy->isIntegerTy()); 20800b57cec5SDimitry Andric STy = SplitIntTy; 20810b57cec5SDimitry Andric } 20820b57cec5SDimitry Andric if (!canConvertValue(DL, STy, SliceTy)) 20830b57cec5SDimitry Andric return false; 20840b57cec5SDimitry Andric } else { 20850b57cec5SDimitry Andric return false; 20860b57cec5SDimitry Andric } 20870b57cec5SDimitry Andric 20880b57cec5SDimitry Andric return true; 20890b57cec5SDimitry Andric } 20900b57cec5SDimitry Andric 2091bdd1243dSDimitry Andric /// Test whether a vector type is viable for promotion. 2092bdd1243dSDimitry Andric /// 20937a6dacacSDimitry Andric /// This implements the necessary checking for \c checkVectorTypesForPromotion 20947a6dacacSDimitry Andric /// (and thus isVectorPromotionViable) over all slices of the alloca for the 20957a6dacacSDimitry Andric /// given VectorType. 2096bdd1243dSDimitry Andric static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, 2097bdd1243dSDimitry Andric const DataLayout &DL) { 2098bdd1243dSDimitry Andric uint64_t ElementSize = 2099bdd1243dSDimitry Andric DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); 2100bdd1243dSDimitry Andric 2101bdd1243dSDimitry Andric // While the definition of LLVM vectors is bitpacked, we don't support sizes 2102bdd1243dSDimitry Andric // that aren't byte sized. 2103bdd1243dSDimitry Andric if (ElementSize % 8) 2104bdd1243dSDimitry Andric return false; 2105bdd1243dSDimitry Andric assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 && 2106bdd1243dSDimitry Andric "vector size not a multiple of element size?"); 2107bdd1243dSDimitry Andric ElementSize /= 8; 2108bdd1243dSDimitry Andric 2109bdd1243dSDimitry Andric for (const Slice &S : P) 2110bdd1243dSDimitry Andric if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL)) 2111bdd1243dSDimitry Andric return false; 2112bdd1243dSDimitry Andric 2113bdd1243dSDimitry Andric for (const Slice *S : P.splitSliceTails()) 2114bdd1243dSDimitry Andric if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL)) 2115bdd1243dSDimitry Andric return false; 2116bdd1243dSDimitry Andric 2117bdd1243dSDimitry Andric return true; 2118bdd1243dSDimitry Andric } 2119bdd1243dSDimitry Andric 21207a6dacacSDimitry Andric /// Test whether any vector type in \p CandidateTys is viable for promotion. 21210b57cec5SDimitry Andric /// 21227a6dacacSDimitry Andric /// This implements the necessary checking for \c isVectorPromotionViable over 21237a6dacacSDimitry Andric /// all slices of the alloca for the given VectorType. 21247a6dacacSDimitry Andric static VectorType * 21257a6dacacSDimitry Andric checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, 21267a6dacacSDimitry Andric SmallVectorImpl<VectorType *> &CandidateTys, 21277a6dacacSDimitry Andric bool HaveCommonEltTy, Type *CommonEltTy, 21287a6dacacSDimitry Andric bool HaveVecPtrTy, bool HaveCommonVecPtrTy, 21297a6dacacSDimitry Andric VectorType *CommonVecPtrTy) { 21300b57cec5SDimitry Andric // If we didn't find a vector type, nothing to do here. 21310b57cec5SDimitry Andric if (CandidateTys.empty()) 21320b57cec5SDimitry Andric return nullptr; 21330b57cec5SDimitry Andric 2134bdd1243dSDimitry Andric // Pointer-ness is sticky, if we had a vector-of-pointers candidate type, 2135bdd1243dSDimitry Andric // then we should choose it, not some other alternative. 2136bdd1243dSDimitry Andric // But, we can't perform a no-op pointer address space change via bitcast, 2137bdd1243dSDimitry Andric // so if we didn't have a common pointer element type, bail. 2138bdd1243dSDimitry Andric if (HaveVecPtrTy && !HaveCommonVecPtrTy) 21390b57cec5SDimitry Andric return nullptr; 21400b57cec5SDimitry Andric 2141bdd1243dSDimitry Andric // Try to pick the "best" element type out of the choices. 2142bdd1243dSDimitry Andric if (!HaveCommonEltTy && HaveVecPtrTy) { 2143bdd1243dSDimitry Andric // If there was a pointer element type, there's really only one choice. 2144bdd1243dSDimitry Andric CandidateTys.clear(); 2145bdd1243dSDimitry Andric CandidateTys.push_back(CommonVecPtrTy); 2146bdd1243dSDimitry Andric } else if (!HaveCommonEltTy && !HaveVecPtrTy) { 2147bdd1243dSDimitry Andric // Integer-ify vector types. 2148bdd1243dSDimitry Andric for (VectorType *&VTy : CandidateTys) { 2149bdd1243dSDimitry Andric if (!VTy->getElementType()->isIntegerTy()) 2150bdd1243dSDimitry Andric VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy( 2151bdd1243dSDimitry Andric VTy->getContext(), VTy->getScalarSizeInBits()))); 2152bdd1243dSDimitry Andric } 2153bdd1243dSDimitry Andric 21540b57cec5SDimitry Andric // Rank the remaining candidate vector types. This is easy because we know 21550b57cec5SDimitry Andric // they're all integer vectors. We sort by ascending number of elements. 215606c3fb27SDimitry Andric auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) { 21570b57cec5SDimitry Andric (void)DL; 2158bdd1243dSDimitry Andric assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == 2159bdd1243dSDimitry Andric DL.getTypeSizeInBits(LHSTy).getFixedValue() && 21600b57cec5SDimitry Andric "Cannot have vector types of different sizes!"); 21610b57cec5SDimitry Andric assert(RHSTy->getElementType()->isIntegerTy() && 21620b57cec5SDimitry Andric "All non-integer types eliminated!"); 21630b57cec5SDimitry Andric assert(LHSTy->getElementType()->isIntegerTy() && 21640b57cec5SDimitry Andric "All non-integer types eliminated!"); 21655ffd83dbSDimitry Andric return cast<FixedVectorType>(RHSTy)->getNumElements() < 21665ffd83dbSDimitry Andric cast<FixedVectorType>(LHSTy)->getNumElements(); 21670b57cec5SDimitry Andric }; 216806c3fb27SDimitry Andric auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) { 216906c3fb27SDimitry Andric (void)DL; 217006c3fb27SDimitry Andric assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == 217106c3fb27SDimitry Andric DL.getTypeSizeInBits(LHSTy).getFixedValue() && 217206c3fb27SDimitry Andric "Cannot have vector types of different sizes!"); 217306c3fb27SDimitry Andric assert(RHSTy->getElementType()->isIntegerTy() && 217406c3fb27SDimitry Andric "All non-integer types eliminated!"); 217506c3fb27SDimitry Andric assert(LHSTy->getElementType()->isIntegerTy() && 217606c3fb27SDimitry Andric "All non-integer types eliminated!"); 217706c3fb27SDimitry Andric return cast<FixedVectorType>(RHSTy)->getNumElements() == 217806c3fb27SDimitry Andric cast<FixedVectorType>(LHSTy)->getNumElements(); 217906c3fb27SDimitry Andric }; 218006c3fb27SDimitry Andric llvm::sort(CandidateTys, RankVectorTypesComp); 2181*0fca6ea1SDimitry Andric CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq), 21820b57cec5SDimitry Andric CandidateTys.end()); 21830b57cec5SDimitry Andric } else { 21840b57cec5SDimitry Andric // The only way to have the same element type in every vector type is to 21850b57cec5SDimitry Andric // have the same vector type. Check that and remove all but one. 21860b57cec5SDimitry Andric #ifndef NDEBUG 21870b57cec5SDimitry Andric for (VectorType *VTy : CandidateTys) { 21880b57cec5SDimitry Andric assert(VTy->getElementType() == CommonEltTy && 21890b57cec5SDimitry Andric "Unaccounted for element type!"); 21900b57cec5SDimitry Andric assert(VTy == CandidateTys[0] && 21910b57cec5SDimitry Andric "Different vector types with the same element type!"); 21920b57cec5SDimitry Andric } 21930b57cec5SDimitry Andric #endif 21940b57cec5SDimitry Andric CandidateTys.resize(1); 21950b57cec5SDimitry Andric } 21960b57cec5SDimitry Andric 2197bdd1243dSDimitry Andric // FIXME: hack. Do we have a named constant for this? 2198bdd1243dSDimitry Andric // SDAG SDNode can't have more than 65535 operands. 2199bdd1243dSDimitry Andric llvm::erase_if(CandidateTys, [](VectorType *VTy) { 2200bdd1243dSDimitry Andric return cast<FixedVectorType>(VTy)->getNumElements() > 2201bdd1243dSDimitry Andric std::numeric_limits<unsigned short>::max(); 2202bdd1243dSDimitry Andric }); 22030b57cec5SDimitry Andric 22040b57cec5SDimitry Andric for (VectorType *VTy : CandidateTys) 2205bdd1243dSDimitry Andric if (checkVectorTypeForPromotion(P, VTy, DL)) 22060b57cec5SDimitry Andric return VTy; 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric return nullptr; 22090b57cec5SDimitry Andric } 22100b57cec5SDimitry Andric 2211439352acSDimitry Andric static VectorType *createAndCheckVectorTypesForPromotion( 2212439352acSDimitry Andric SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy, 2213439352acSDimitry Andric function_ref<void(Type *)> CheckCandidateType, Partition &P, 2214439352acSDimitry Andric const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys, 2215439352acSDimitry Andric bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, 2216439352acSDimitry Andric bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) { 2217439352acSDimitry Andric [[maybe_unused]] VectorType *OriginalElt = 2218439352acSDimitry Andric CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr; 2219439352acSDimitry Andric // Consider additional vector types where the element type size is a 2220439352acSDimitry Andric // multiple of load/store element size. 2221439352acSDimitry Andric for (Type *Ty : OtherTys) { 2222439352acSDimitry Andric if (!VectorType::isValidElementType(Ty)) 2223439352acSDimitry Andric continue; 2224439352acSDimitry Andric unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue(); 2225439352acSDimitry Andric // Make a copy of CandidateTys and iterate through it, because we 2226439352acSDimitry Andric // might append to CandidateTys in the loop. 2227439352acSDimitry Andric for (VectorType *const VTy : CandidateTysCopy) { 2228439352acSDimitry Andric // The elements in the copy should remain invariant throughout the loop 2229439352acSDimitry Andric assert(CandidateTysCopy[0] == OriginalElt && "Different Element"); 2230439352acSDimitry Andric unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue(); 2231439352acSDimitry Andric unsigned ElementSize = 2232439352acSDimitry Andric DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); 2233439352acSDimitry Andric if (TypeSize != VectorSize && TypeSize != ElementSize && 2234439352acSDimitry Andric VectorSize % TypeSize == 0) { 2235439352acSDimitry Andric VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false); 2236439352acSDimitry Andric CheckCandidateType(NewVTy); 2237439352acSDimitry Andric } 2238439352acSDimitry Andric } 2239439352acSDimitry Andric } 2240439352acSDimitry Andric 2241439352acSDimitry Andric return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy, 2242439352acSDimitry Andric CommonEltTy, HaveVecPtrTy, 2243439352acSDimitry Andric HaveCommonVecPtrTy, CommonVecPtrTy); 2244439352acSDimitry Andric } 2245439352acSDimitry Andric 22467a6dacacSDimitry Andric /// Test whether the given alloca partitioning and range of slices can be 22477a6dacacSDimitry Andric /// promoted to a vector. 22487a6dacacSDimitry Andric /// 22497a6dacacSDimitry Andric /// This is a quick test to check whether we can rewrite a particular alloca 22507a6dacacSDimitry Andric /// partition (and its newly formed alloca) into a vector alloca with only 22517a6dacacSDimitry Andric /// whole-vector loads and stores such that it could be promoted to a vector 22527a6dacacSDimitry Andric /// SSA value. We only can ensure this for a limited set of operations, and we 22537a6dacacSDimitry Andric /// don't want to do the rewrites unless we are confident that the result will 22547a6dacacSDimitry Andric /// be promotable, so we have an early test here. 22557a6dacacSDimitry Andric static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { 22567a6dacacSDimitry Andric // Collect the candidate types for vector-based promotion. Also track whether 22577a6dacacSDimitry Andric // we have different element types. 22587a6dacacSDimitry Andric SmallVector<VectorType *, 4> CandidateTys; 22597a6dacacSDimitry Andric SetVector<Type *> LoadStoreTys; 2260439352acSDimitry Andric SetVector<Type *> DeferredTys; 22617a6dacacSDimitry Andric Type *CommonEltTy = nullptr; 22627a6dacacSDimitry Andric VectorType *CommonVecPtrTy = nullptr; 22637a6dacacSDimitry Andric bool HaveVecPtrTy = false; 22647a6dacacSDimitry Andric bool HaveCommonEltTy = true; 22657a6dacacSDimitry Andric bool HaveCommonVecPtrTy = true; 22667a6dacacSDimitry Andric auto CheckCandidateType = [&](Type *Ty) { 22677a6dacacSDimitry Andric if (auto *VTy = dyn_cast<VectorType>(Ty)) { 22687a6dacacSDimitry Andric // Return if bitcast to vectors is different for total size in bits. 22697a6dacacSDimitry Andric if (!CandidateTys.empty()) { 22707a6dacacSDimitry Andric VectorType *V = CandidateTys[0]; 22717a6dacacSDimitry Andric if (DL.getTypeSizeInBits(VTy).getFixedValue() != 22727a6dacacSDimitry Andric DL.getTypeSizeInBits(V).getFixedValue()) { 22737a6dacacSDimitry Andric CandidateTys.clear(); 22747a6dacacSDimitry Andric return; 22757a6dacacSDimitry Andric } 22767a6dacacSDimitry Andric } 22777a6dacacSDimitry Andric CandidateTys.push_back(VTy); 22787a6dacacSDimitry Andric Type *EltTy = VTy->getElementType(); 22797a6dacacSDimitry Andric 22807a6dacacSDimitry Andric if (!CommonEltTy) 22817a6dacacSDimitry Andric CommonEltTy = EltTy; 22827a6dacacSDimitry Andric else if (CommonEltTy != EltTy) 22837a6dacacSDimitry Andric HaveCommonEltTy = false; 22847a6dacacSDimitry Andric 22857a6dacacSDimitry Andric if (EltTy->isPointerTy()) { 22867a6dacacSDimitry Andric HaveVecPtrTy = true; 22877a6dacacSDimitry Andric if (!CommonVecPtrTy) 22887a6dacacSDimitry Andric CommonVecPtrTy = VTy; 22897a6dacacSDimitry Andric else if (CommonVecPtrTy != VTy) 22907a6dacacSDimitry Andric HaveCommonVecPtrTy = false; 22917a6dacacSDimitry Andric } 22927a6dacacSDimitry Andric } 22937a6dacacSDimitry Andric }; 22947a6dacacSDimitry Andric 22957a6dacacSDimitry Andric // Put load and store types into a set for de-duplication. 22967a6dacacSDimitry Andric for (const Slice &S : P) { 22977a6dacacSDimitry Andric Type *Ty; 22987a6dacacSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser())) 22997a6dacacSDimitry Andric Ty = LI->getType(); 23007a6dacacSDimitry Andric else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) 23017a6dacacSDimitry Andric Ty = SI->getValueOperand()->getType(); 23027a6dacacSDimitry Andric else 23037a6dacacSDimitry Andric continue; 2304439352acSDimitry Andric 2305439352acSDimitry Andric auto CandTy = Ty->getScalarType(); 2306439352acSDimitry Andric if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() || 2307439352acSDimitry Andric S.endOffset() != P.endOffset())) { 2308439352acSDimitry Andric DeferredTys.insert(Ty); 2309439352acSDimitry Andric continue; 2310439352acSDimitry Andric } 2311439352acSDimitry Andric 23127a6dacacSDimitry Andric LoadStoreTys.insert(Ty); 23137a6dacacSDimitry Andric // Consider any loads or stores that are the exact size of the slice. 23147a6dacacSDimitry Andric if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset()) 23157a6dacacSDimitry Andric CheckCandidateType(Ty); 23167a6dacacSDimitry Andric } 23177a6dacacSDimitry Andric 2318439352acSDimitry Andric SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys; 2319439352acSDimitry Andric if (auto *VTy = createAndCheckVectorTypesForPromotion( 2320439352acSDimitry Andric LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL, 2321439352acSDimitry Andric CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, 23227a6dacacSDimitry Andric HaveCommonVecPtrTy, CommonVecPtrTy)) 23237a6dacacSDimitry Andric return VTy; 23247a6dacacSDimitry Andric 23257a6dacacSDimitry Andric CandidateTys.clear(); 2326439352acSDimitry Andric return createAndCheckVectorTypesForPromotion( 2327439352acSDimitry Andric DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys, 2328439352acSDimitry Andric HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy, 2329439352acSDimitry Andric CommonVecPtrTy); 23307a6dacacSDimitry Andric } 23317a6dacacSDimitry Andric 23320b57cec5SDimitry Andric /// Test whether a slice of an alloca is valid for integer widening. 23330b57cec5SDimitry Andric /// 23340b57cec5SDimitry Andric /// This implements the necessary checking for the \c isIntegerWideningViable 23350b57cec5SDimitry Andric /// test below on a single slice of the alloca. 23360b57cec5SDimitry Andric static bool isIntegerWideningViableForSlice(const Slice &S, 23370b57cec5SDimitry Andric uint64_t AllocBeginOffset, 23380b57cec5SDimitry Andric Type *AllocaTy, 23390b57cec5SDimitry Andric const DataLayout &DL, 23400b57cec5SDimitry Andric bool &WholeAllocaOp) { 2341bdd1243dSDimitry Andric uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue(); 23420b57cec5SDimitry Andric 23430b57cec5SDimitry Andric uint64_t RelBegin = S.beginOffset() - AllocBeginOffset; 23440b57cec5SDimitry Andric uint64_t RelEnd = S.endOffset() - AllocBeginOffset; 23450b57cec5SDimitry Andric 234681ad6265SDimitry Andric Use *U = S.getUse(); 234781ad6265SDimitry Andric 234881ad6265SDimitry Andric // Lifetime intrinsics operate over the whole alloca whose sizes are usually 234981ad6265SDimitry Andric // larger than other load/store slices (RelEnd > Size). But lifetime are 235081ad6265SDimitry Andric // always promotable and should not impact other slices' promotability of the 235181ad6265SDimitry Andric // partition. 235281ad6265SDimitry Andric if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { 235381ad6265SDimitry Andric if (II->isLifetimeStartOrEnd() || II->isDroppable()) 235481ad6265SDimitry Andric return true; 235581ad6265SDimitry Andric } 235681ad6265SDimitry Andric 23570b57cec5SDimitry Andric // We can't reasonably handle cases where the load or store extends past 23580b57cec5SDimitry Andric // the end of the alloca's type and into its padding. 23590b57cec5SDimitry Andric if (RelEnd > Size) 23600b57cec5SDimitry Andric return false; 23610b57cec5SDimitry Andric 23620b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { 23630b57cec5SDimitry Andric if (LI->isVolatile()) 23640b57cec5SDimitry Andric return false; 23650b57cec5SDimitry Andric // We can't handle loads that extend past the allocated memory. 2366bdd1243dSDimitry Andric if (DL.getTypeStoreSize(LI->getType()).getFixedValue() > Size) 23670b57cec5SDimitry Andric return false; 23680b57cec5SDimitry Andric // So far, AllocaSliceRewriter does not support widening split slice tails 23690b57cec5SDimitry Andric // in rewriteIntegerLoad. 23700b57cec5SDimitry Andric if (S.beginOffset() < AllocBeginOffset) 23710b57cec5SDimitry Andric return false; 23720b57cec5SDimitry Andric // Note that we don't count vector loads or stores as whole-alloca 23730b57cec5SDimitry Andric // operations which enable integer widening because we would prefer to use 23740b57cec5SDimitry Andric // vector widening instead. 23750b57cec5SDimitry Andric if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size) 23760b57cec5SDimitry Andric WholeAllocaOp = true; 23770b57cec5SDimitry Andric if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { 2378bdd1243dSDimitry Andric if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue()) 23790b57cec5SDimitry Andric return false; 23800b57cec5SDimitry Andric } else if (RelBegin != 0 || RelEnd != Size || 23810b57cec5SDimitry Andric !canConvertValue(DL, AllocaTy, LI->getType())) { 23820b57cec5SDimitry Andric // Non-integer loads need to be convertible from the alloca type so that 23830b57cec5SDimitry Andric // they are promotable. 23840b57cec5SDimitry Andric return false; 23850b57cec5SDimitry Andric } 23860b57cec5SDimitry Andric } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { 23870b57cec5SDimitry Andric Type *ValueTy = SI->getValueOperand()->getType(); 23880b57cec5SDimitry Andric if (SI->isVolatile()) 23890b57cec5SDimitry Andric return false; 23900b57cec5SDimitry Andric // We can't handle stores that extend past the allocated memory. 2391bdd1243dSDimitry Andric if (DL.getTypeStoreSize(ValueTy).getFixedValue() > Size) 23920b57cec5SDimitry Andric return false; 23930b57cec5SDimitry Andric // So far, AllocaSliceRewriter does not support widening split slice tails 23940b57cec5SDimitry Andric // in rewriteIntegerStore. 23950b57cec5SDimitry Andric if (S.beginOffset() < AllocBeginOffset) 23960b57cec5SDimitry Andric return false; 23970b57cec5SDimitry Andric // Note that we don't count vector loads or stores as whole-alloca 23980b57cec5SDimitry Andric // operations which enable integer widening because we would prefer to use 23990b57cec5SDimitry Andric // vector widening instead. 24000b57cec5SDimitry Andric if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size) 24010b57cec5SDimitry Andric WholeAllocaOp = true; 24020b57cec5SDimitry Andric if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { 2403bdd1243dSDimitry Andric if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue()) 24040b57cec5SDimitry Andric return false; 24050b57cec5SDimitry Andric } else if (RelBegin != 0 || RelEnd != Size || 24060b57cec5SDimitry Andric !canConvertValue(DL, ValueTy, AllocaTy)) { 24070b57cec5SDimitry Andric // Non-integer stores need to be convertible to the alloca type so that 24080b57cec5SDimitry Andric // they are promotable. 24090b57cec5SDimitry Andric return false; 24100b57cec5SDimitry Andric } 24110b57cec5SDimitry Andric } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { 24120b57cec5SDimitry Andric if (MI->isVolatile() || !isa<Constant>(MI->getLength())) 24130b57cec5SDimitry Andric return false; 24140b57cec5SDimitry Andric if (!S.isSplittable()) 24150b57cec5SDimitry Andric return false; // Skip any unsplittable intrinsics. 24160b57cec5SDimitry Andric } else { 24170b57cec5SDimitry Andric return false; 24180b57cec5SDimitry Andric } 24190b57cec5SDimitry Andric 24200b57cec5SDimitry Andric return true; 24210b57cec5SDimitry Andric } 24220b57cec5SDimitry Andric 24230b57cec5SDimitry Andric /// Test whether the given alloca partition's integer operations can be 24240b57cec5SDimitry Andric /// widened to promotable ones. 24250b57cec5SDimitry Andric /// 24260b57cec5SDimitry Andric /// This is a quick test to check whether we can rewrite the integer loads and 24270b57cec5SDimitry Andric /// stores to a particular alloca into wider loads and stores and be able to 24280b57cec5SDimitry Andric /// promote the resulting alloca. 24290b57cec5SDimitry Andric static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, 24300b57cec5SDimitry Andric const DataLayout &DL) { 2431bdd1243dSDimitry Andric uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue(); 24320b57cec5SDimitry Andric // Don't create integer types larger than the maximum bitwidth. 24330b57cec5SDimitry Andric if (SizeInBits > IntegerType::MAX_INT_BITS) 24340b57cec5SDimitry Andric return false; 24350b57cec5SDimitry Andric 24360b57cec5SDimitry Andric // Don't try to handle allocas with bit-padding. 2437bdd1243dSDimitry Andric if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue()) 24380b57cec5SDimitry Andric return false; 24390b57cec5SDimitry Andric 24400b57cec5SDimitry Andric // We need to ensure that an integer type with the appropriate bitwidth can 24410b57cec5SDimitry Andric // be converted to the alloca type, whatever that is. We don't want to force 24420b57cec5SDimitry Andric // the alloca itself to have an integer type if there is a more suitable one. 24430b57cec5SDimitry Andric Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits); 24440b57cec5SDimitry Andric if (!canConvertValue(DL, AllocaTy, IntTy) || 24450b57cec5SDimitry Andric !canConvertValue(DL, IntTy, AllocaTy)) 24460b57cec5SDimitry Andric return false; 24470b57cec5SDimitry Andric 24480b57cec5SDimitry Andric // While examining uses, we ensure that the alloca has a covering load or 24490b57cec5SDimitry Andric // store. We don't want to widen the integer operations only to fail to 24500b57cec5SDimitry Andric // promote due to some other unsplittable entry (which we may make splittable 24510b57cec5SDimitry Andric // later). However, if there are only splittable uses, go ahead and assume 24520b57cec5SDimitry Andric // that we cover the alloca. 24530b57cec5SDimitry Andric // FIXME: We shouldn't consider split slices that happen to start in the 24540b57cec5SDimitry Andric // partition here... 2455e8d8bef9SDimitry Andric bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits); 24560b57cec5SDimitry Andric 24570b57cec5SDimitry Andric for (const Slice &S : P) 24580b57cec5SDimitry Andric if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL, 24590b57cec5SDimitry Andric WholeAllocaOp)) 24600b57cec5SDimitry Andric return false; 24610b57cec5SDimitry Andric 24620b57cec5SDimitry Andric for (const Slice *S : P.splitSliceTails()) 24630b57cec5SDimitry Andric if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL, 24640b57cec5SDimitry Andric WholeAllocaOp)) 24650b57cec5SDimitry Andric return false; 24660b57cec5SDimitry Andric 24670b57cec5SDimitry Andric return WholeAllocaOp; 24680b57cec5SDimitry Andric } 24690b57cec5SDimitry Andric 24700b57cec5SDimitry Andric static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, 24710b57cec5SDimitry Andric IntegerType *Ty, uint64_t Offset, 24720b57cec5SDimitry Andric const Twine &Name) { 24730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " start: " << *V << "\n"); 24740b57cec5SDimitry Andric IntegerType *IntTy = cast<IntegerType>(V->getType()); 2475bdd1243dSDimitry Andric assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <= 2476bdd1243dSDimitry Andric DL.getTypeStoreSize(IntTy).getFixedValue() && 24770b57cec5SDimitry Andric "Element extends past full value"); 24780b57cec5SDimitry Andric uint64_t ShAmt = 8 * Offset; 24790b57cec5SDimitry Andric if (DL.isBigEndian()) 2480bdd1243dSDimitry Andric ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() - 2481bdd1243dSDimitry Andric DL.getTypeStoreSize(Ty).getFixedValue() - Offset); 24820b57cec5SDimitry Andric if (ShAmt) { 24830b57cec5SDimitry Andric V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); 24840b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n"); 24850b57cec5SDimitry Andric } 24860b57cec5SDimitry Andric assert(Ty->getBitWidth() <= IntTy->getBitWidth() && 24870b57cec5SDimitry Andric "Cannot extract to a larger integer!"); 24880b57cec5SDimitry Andric if (Ty != IntTy) { 24890b57cec5SDimitry Andric V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); 24900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n"); 24910b57cec5SDimitry Andric } 24920b57cec5SDimitry Andric return V; 24930b57cec5SDimitry Andric } 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andric static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, 24960b57cec5SDimitry Andric Value *V, uint64_t Offset, const Twine &Name) { 24970b57cec5SDimitry Andric IntegerType *IntTy = cast<IntegerType>(Old->getType()); 24980b57cec5SDimitry Andric IntegerType *Ty = cast<IntegerType>(V->getType()); 24990b57cec5SDimitry Andric assert(Ty->getBitWidth() <= IntTy->getBitWidth() && 25000b57cec5SDimitry Andric "Cannot insert a larger integer!"); 25010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " start: " << *V << "\n"); 25020b57cec5SDimitry Andric if (Ty != IntTy) { 25030b57cec5SDimitry Andric V = IRB.CreateZExt(V, IntTy, Name + ".ext"); 25040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " extended: " << *V << "\n"); 25050b57cec5SDimitry Andric } 2506bdd1243dSDimitry Andric assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <= 2507bdd1243dSDimitry Andric DL.getTypeStoreSize(IntTy).getFixedValue() && 25080b57cec5SDimitry Andric "Element store outside of alloca store"); 25090b57cec5SDimitry Andric uint64_t ShAmt = 8 * Offset; 25100b57cec5SDimitry Andric if (DL.isBigEndian()) 2511bdd1243dSDimitry Andric ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() - 2512bdd1243dSDimitry Andric DL.getTypeStoreSize(Ty).getFixedValue() - Offset); 25130b57cec5SDimitry Andric if (ShAmt) { 25140b57cec5SDimitry Andric V = IRB.CreateShl(V, ShAmt, Name + ".shift"); 25150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n"); 25160b57cec5SDimitry Andric } 25170b57cec5SDimitry Andric 25180b57cec5SDimitry Andric if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { 25190b57cec5SDimitry Andric APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); 25200b57cec5SDimitry Andric Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); 25210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n"); 25220b57cec5SDimitry Andric V = IRB.CreateOr(Old, V, Name + ".insert"); 25230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n"); 25240b57cec5SDimitry Andric } 25250b57cec5SDimitry Andric return V; 25260b57cec5SDimitry Andric } 25270b57cec5SDimitry Andric 25280b57cec5SDimitry Andric static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, 25290b57cec5SDimitry Andric unsigned EndIndex, const Twine &Name) { 25305ffd83dbSDimitry Andric auto *VecTy = cast<FixedVectorType>(V->getType()); 25310b57cec5SDimitry Andric unsigned NumElements = EndIndex - BeginIndex; 25320b57cec5SDimitry Andric assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); 25330b57cec5SDimitry Andric 25340b57cec5SDimitry Andric if (NumElements == VecTy->getNumElements()) 25350b57cec5SDimitry Andric return V; 25360b57cec5SDimitry Andric 25370b57cec5SDimitry Andric if (NumElements == 1) { 25380b57cec5SDimitry Andric V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), 25390b57cec5SDimitry Andric Name + ".extract"); 25400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " extract: " << *V << "\n"); 25410b57cec5SDimitry Andric return V; 25420b57cec5SDimitry Andric } 25430b57cec5SDimitry Andric 254481ad6265SDimitry Andric auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex)); 2545e8d8bef9SDimitry Andric V = IRB.CreateShuffleVector(V, Mask, Name + ".extract"); 25460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); 25470b57cec5SDimitry Andric return V; 25480b57cec5SDimitry Andric } 25490b57cec5SDimitry Andric 25500b57cec5SDimitry Andric static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, 25510b57cec5SDimitry Andric unsigned BeginIndex, const Twine &Name) { 25520b57cec5SDimitry Andric VectorType *VecTy = cast<VectorType>(Old->getType()); 25530b57cec5SDimitry Andric assert(VecTy && "Can only insert a vector into a vector"); 25540b57cec5SDimitry Andric 25550b57cec5SDimitry Andric VectorType *Ty = dyn_cast<VectorType>(V->getType()); 25560b57cec5SDimitry Andric if (!Ty) { 25570b57cec5SDimitry Andric // Single element to insert. 25580b57cec5SDimitry Andric V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), 25590b57cec5SDimitry Andric Name + ".insert"); 25600b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " insert: " << *V << "\n"); 25610b57cec5SDimitry Andric return V; 25620b57cec5SDimitry Andric } 25630b57cec5SDimitry Andric 25645ffd83dbSDimitry Andric assert(cast<FixedVectorType>(Ty)->getNumElements() <= 25655ffd83dbSDimitry Andric cast<FixedVectorType>(VecTy)->getNumElements() && 25660b57cec5SDimitry Andric "Too many elements!"); 25675ffd83dbSDimitry Andric if (cast<FixedVectorType>(Ty)->getNumElements() == 25685ffd83dbSDimitry Andric cast<FixedVectorType>(VecTy)->getNumElements()) { 25690b57cec5SDimitry Andric assert(V->getType() == VecTy && "Vector type mismatch"); 25700b57cec5SDimitry Andric return V; 25710b57cec5SDimitry Andric } 25725ffd83dbSDimitry Andric unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements(); 25730b57cec5SDimitry Andric 25740b57cec5SDimitry Andric // When inserting a smaller vector into the larger to store, we first 25750b57cec5SDimitry Andric // use a shuffle vector to widen it with undef elements, and then 25760b57cec5SDimitry Andric // a second shuffle vector to select between the loaded vector and the 25770b57cec5SDimitry Andric // incoming vector. 2578e8d8bef9SDimitry Andric SmallVector<int, 8> Mask; 25795ffd83dbSDimitry Andric Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements()); 25805ffd83dbSDimitry Andric for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) 25810b57cec5SDimitry Andric if (i >= BeginIndex && i < EndIndex) 2582e8d8bef9SDimitry Andric Mask.push_back(i - BeginIndex); 25830b57cec5SDimitry Andric else 2584e8d8bef9SDimitry Andric Mask.push_back(-1); 2585e8d8bef9SDimitry Andric V = IRB.CreateShuffleVector(V, Mask, Name + ".expand"); 25860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); 25870b57cec5SDimitry Andric 2588e8d8bef9SDimitry Andric SmallVector<Constant *, 8> Mask2; 2589e8d8bef9SDimitry Andric Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements()); 25905ffd83dbSDimitry Andric for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) 2591e8d8bef9SDimitry Andric Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); 25920b57cec5SDimitry Andric 2593e8d8bef9SDimitry Andric V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); 25940b57cec5SDimitry Andric 25950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " blend: " << *V << "\n"); 25960b57cec5SDimitry Andric return V; 25970b57cec5SDimitry Andric } 25980b57cec5SDimitry Andric 25995f757f3fSDimitry Andric namespace { 26005f757f3fSDimitry Andric 26010b57cec5SDimitry Andric /// Visitor to rewrite instructions using p particular slice of an alloca 26020b57cec5SDimitry Andric /// to use a new alloca. 26030b57cec5SDimitry Andric /// 26040b57cec5SDimitry Andric /// Also implements the rewriting to vector-based accesses when the partition 26050b57cec5SDimitry Andric /// passes the isVectorPromotionViable predicate. Most of the rewriting logic 26060b57cec5SDimitry Andric /// lives here. 26075f757f3fSDimitry Andric class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { 26080b57cec5SDimitry Andric // Befriend the base class so it can delegate to private visit methods. 26090b57cec5SDimitry Andric friend class InstVisitor<AllocaSliceRewriter, bool>; 26100b57cec5SDimitry Andric 26110b57cec5SDimitry Andric using Base = InstVisitor<AllocaSliceRewriter, bool>; 26120b57cec5SDimitry Andric 26130b57cec5SDimitry Andric const DataLayout &DL; 26140b57cec5SDimitry Andric AllocaSlices &AS; 26155f757f3fSDimitry Andric SROA &Pass; 26160b57cec5SDimitry Andric AllocaInst &OldAI, &NewAI; 26170b57cec5SDimitry Andric const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; 26180b57cec5SDimitry Andric Type *NewAllocaTy; 26190b57cec5SDimitry Andric 26200b57cec5SDimitry Andric // This is a convenience and flag variable that will be null unless the new 26210b57cec5SDimitry Andric // alloca's integer operations should be widened to this integer type due to 26220b57cec5SDimitry Andric // passing isIntegerWideningViable above. If it is non-null, the desired 26230b57cec5SDimitry Andric // integer type will be stored here for easy access during rewriting. 26240b57cec5SDimitry Andric IntegerType *IntTy; 26250b57cec5SDimitry Andric 26260b57cec5SDimitry Andric // If we are rewriting an alloca partition which can be written as pure 26270b57cec5SDimitry Andric // vector operations, we stash extra information here. When VecTy is 26280b57cec5SDimitry Andric // non-null, we have some strict guarantees about the rewritten alloca: 26290b57cec5SDimitry Andric // - The new alloca is exactly the size of the vector type here. 26300b57cec5SDimitry Andric // - The accesses all either map to the entire vector or to a single 26310b57cec5SDimitry Andric // element. 26320b57cec5SDimitry Andric // - The set of accessing instructions is only one of those handled above 26330b57cec5SDimitry Andric // in isVectorPromotionViable. Generally these are the same access kinds 26340b57cec5SDimitry Andric // which are promotable via mem2reg. 26350b57cec5SDimitry Andric VectorType *VecTy; 26360b57cec5SDimitry Andric Type *ElementTy; 26370b57cec5SDimitry Andric uint64_t ElementSize; 26380b57cec5SDimitry Andric 26390b57cec5SDimitry Andric // The original offset of the slice currently being rewritten relative to 26400b57cec5SDimitry Andric // the original alloca. 26410b57cec5SDimitry Andric uint64_t BeginOffset = 0; 26420b57cec5SDimitry Andric uint64_t EndOffset = 0; 26430b57cec5SDimitry Andric 26440b57cec5SDimitry Andric // The new offsets of the slice currently being rewritten relative to the 26450b57cec5SDimitry Andric // original alloca. 2646480093f4SDimitry Andric uint64_t NewBeginOffset = 0, NewEndOffset = 0; 26470b57cec5SDimitry Andric 2648480093f4SDimitry Andric uint64_t SliceSize = 0; 26490b57cec5SDimitry Andric bool IsSplittable = false; 26500b57cec5SDimitry Andric bool IsSplit = false; 26510b57cec5SDimitry Andric Use *OldUse = nullptr; 26520b57cec5SDimitry Andric Instruction *OldPtr = nullptr; 26530b57cec5SDimitry Andric 26540b57cec5SDimitry Andric // Track post-rewrite users which are PHI nodes and Selects. 26550b57cec5SDimitry Andric SmallSetVector<PHINode *, 8> &PHIUsers; 26560b57cec5SDimitry Andric SmallSetVector<SelectInst *, 8> &SelectUsers; 26570b57cec5SDimitry Andric 26580b57cec5SDimitry Andric // Utility IR builder, whose name prefix is setup for each visited use, and 26590b57cec5SDimitry Andric // the insertion point is set to point to the user. 26600b57cec5SDimitry Andric IRBuilderTy IRB; 26610b57cec5SDimitry Andric 2662bdd1243dSDimitry Andric // Return the new alloca, addrspacecasted if required to avoid changing the 2663bdd1243dSDimitry Andric // addrspace of a volatile access. 2664bdd1243dSDimitry Andric Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) { 2665bdd1243dSDimitry Andric if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace()) 2666bdd1243dSDimitry Andric return &NewAI; 2667bdd1243dSDimitry Andric 26685f757f3fSDimitry Andric Type *AccessTy = IRB.getPtrTy(AddrSpace); 2669bdd1243dSDimitry Andric return IRB.CreateAddrSpaceCast(&NewAI, AccessTy); 2670bdd1243dSDimitry Andric } 2671bdd1243dSDimitry Andric 26720b57cec5SDimitry Andric public: 26735f757f3fSDimitry Andric AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, 26740b57cec5SDimitry Andric AllocaInst &OldAI, AllocaInst &NewAI, 26750b57cec5SDimitry Andric uint64_t NewAllocaBeginOffset, 26760b57cec5SDimitry Andric uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, 26770b57cec5SDimitry Andric VectorType *PromotableVecTy, 26780b57cec5SDimitry Andric SmallSetVector<PHINode *, 8> &PHIUsers, 26790b57cec5SDimitry Andric SmallSetVector<SelectInst *, 8> &SelectUsers) 26800b57cec5SDimitry Andric : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), 26810b57cec5SDimitry Andric NewAllocaBeginOffset(NewAllocaBeginOffset), 26820b57cec5SDimitry Andric NewAllocaEndOffset(NewAllocaEndOffset), 26830b57cec5SDimitry Andric NewAllocaTy(NewAI.getAllocatedType()), 26845ffd83dbSDimitry Andric IntTy( 26855ffd83dbSDimitry Andric IsIntegerPromotable 26865ffd83dbSDimitry Andric ? Type::getIntNTy(NewAI.getContext(), 26875ffd83dbSDimitry Andric DL.getTypeSizeInBits(NewAI.getAllocatedType()) 2688bdd1243dSDimitry Andric .getFixedValue()) 26890b57cec5SDimitry Andric : nullptr), 26900b57cec5SDimitry Andric VecTy(PromotableVecTy), 26910b57cec5SDimitry Andric ElementTy(VecTy ? VecTy->getElementType() : nullptr), 2692bdd1243dSDimitry Andric ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8 26935ffd83dbSDimitry Andric : 0), 26940b57cec5SDimitry Andric PHIUsers(PHIUsers), SelectUsers(SelectUsers), 26950b57cec5SDimitry Andric IRB(NewAI.getContext(), ConstantFolder()) { 26960b57cec5SDimitry Andric if (VecTy) { 2697bdd1243dSDimitry Andric assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 && 26980b57cec5SDimitry Andric "Only multiple-of-8 sized vector elements are viable"); 26990b57cec5SDimitry Andric ++NumVectorized; 27000b57cec5SDimitry Andric } 27010b57cec5SDimitry Andric assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)); 27020b57cec5SDimitry Andric } 27030b57cec5SDimitry Andric 27040b57cec5SDimitry Andric bool visit(AllocaSlices::const_iterator I) { 27050b57cec5SDimitry Andric bool CanSROA = true; 27060b57cec5SDimitry Andric BeginOffset = I->beginOffset(); 27070b57cec5SDimitry Andric EndOffset = I->endOffset(); 27080b57cec5SDimitry Andric IsSplittable = I->isSplittable(); 27090b57cec5SDimitry Andric IsSplit = 27100b57cec5SDimitry Andric BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset; 27110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : "")); 27120b57cec5SDimitry Andric LLVM_DEBUG(AS.printSlice(dbgs(), I, "")); 27130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 27140b57cec5SDimitry Andric 27150b57cec5SDimitry Andric // Compute the intersecting offset range. 27160b57cec5SDimitry Andric assert(BeginOffset < NewAllocaEndOffset); 27170b57cec5SDimitry Andric assert(EndOffset > NewAllocaBeginOffset); 27180b57cec5SDimitry Andric NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset); 27190b57cec5SDimitry Andric NewEndOffset = std::min(EndOffset, NewAllocaEndOffset); 27200b57cec5SDimitry Andric 27210b57cec5SDimitry Andric SliceSize = NewEndOffset - NewBeginOffset; 2722bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset 2723bdd1243dSDimitry Andric << ") NewBegin:(" << NewBeginOffset << ", " 2724bdd1243dSDimitry Andric << NewEndOffset << ") NewAllocaBegin:(" 2725bdd1243dSDimitry Andric << NewAllocaBeginOffset << ", " << NewAllocaEndOffset 2726bdd1243dSDimitry Andric << ")\n"); 272706c3fb27SDimitry Andric assert(IsSplit || NewBeginOffset == BeginOffset); 27280b57cec5SDimitry Andric OldUse = I->getUse(); 27290b57cec5SDimitry Andric OldPtr = cast<Instruction>(OldUse->get()); 27300b57cec5SDimitry Andric 27310b57cec5SDimitry Andric Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); 27320b57cec5SDimitry Andric IRB.SetInsertPoint(OldUserI); 27330b57cec5SDimitry Andric IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); 2734*0fca6ea1SDimitry Andric IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." + 2735*0fca6ea1SDimitry Andric Twine(BeginOffset) + "."); 27360b57cec5SDimitry Andric 27370b57cec5SDimitry Andric CanSROA &= visit(cast<Instruction>(OldUse->getUser())); 27380b57cec5SDimitry Andric if (VecTy || IntTy) 27390b57cec5SDimitry Andric assert(CanSROA); 27400b57cec5SDimitry Andric return CanSROA; 27410b57cec5SDimitry Andric } 27420b57cec5SDimitry Andric 27430b57cec5SDimitry Andric private: 27440b57cec5SDimitry Andric // Make sure the other visit overloads are visible. 27450b57cec5SDimitry Andric using Base::visit; 27460b57cec5SDimitry Andric 27470b57cec5SDimitry Andric // Every instruction which can end up as a user must have a rewrite rule. 27480b57cec5SDimitry Andric bool visitInstruction(Instruction &I) { 27490b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n"); 27500b57cec5SDimitry Andric llvm_unreachable("No rewrite rule for this instruction!"); 27510b57cec5SDimitry Andric } 27520b57cec5SDimitry Andric 27530b57cec5SDimitry Andric Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) { 27540b57cec5SDimitry Andric // Note that the offset computation can use BeginOffset or NewBeginOffset 27550b57cec5SDimitry Andric // interchangeably for unsplit slices. 27560b57cec5SDimitry Andric assert(IsSplit || BeginOffset == NewBeginOffset); 27570b57cec5SDimitry Andric uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; 27580b57cec5SDimitry Andric 27590b57cec5SDimitry Andric #ifndef NDEBUG 27600b57cec5SDimitry Andric StringRef OldName = OldPtr->getName(); 27610b57cec5SDimitry Andric // Skip through the last '.sroa.' component of the name. 27620b57cec5SDimitry Andric size_t LastSROAPrefix = OldName.rfind(".sroa."); 27630b57cec5SDimitry Andric if (LastSROAPrefix != StringRef::npos) { 27640b57cec5SDimitry Andric OldName = OldName.substr(LastSROAPrefix + strlen(".sroa.")); 27650b57cec5SDimitry Andric // Look for an SROA slice index. 27660b57cec5SDimitry Andric size_t IndexEnd = OldName.find_first_not_of("0123456789"); 27670b57cec5SDimitry Andric if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') { 27680b57cec5SDimitry Andric // Strip the index and look for the offset. 27690b57cec5SDimitry Andric OldName = OldName.substr(IndexEnd + 1); 27700b57cec5SDimitry Andric size_t OffsetEnd = OldName.find_first_not_of("0123456789"); 27710b57cec5SDimitry Andric if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.') 27720b57cec5SDimitry Andric // Strip the offset. 27730b57cec5SDimitry Andric OldName = OldName.substr(OffsetEnd + 1); 27740b57cec5SDimitry Andric } 27750b57cec5SDimitry Andric } 27760b57cec5SDimitry Andric // Strip any SROA suffixes as well. 27770b57cec5SDimitry Andric OldName = OldName.substr(0, OldName.find(".sroa_")); 27780b57cec5SDimitry Andric #endif 27790b57cec5SDimitry Andric 27800b57cec5SDimitry Andric return getAdjustedPtr(IRB, DL, &NewAI, 27810b57cec5SDimitry Andric APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset), 27820b57cec5SDimitry Andric PointerTy, 27830b57cec5SDimitry Andric #ifndef NDEBUG 27840b57cec5SDimitry Andric Twine(OldName) + "." 27850b57cec5SDimitry Andric #else 27860b57cec5SDimitry Andric Twine() 27870b57cec5SDimitry Andric #endif 27880b57cec5SDimitry Andric ); 27890b57cec5SDimitry Andric } 27900b57cec5SDimitry Andric 27910b57cec5SDimitry Andric /// Compute suitable alignment to access this slice of the *new* 27920b57cec5SDimitry Andric /// alloca. 27930b57cec5SDimitry Andric /// 27940b57cec5SDimitry Andric /// You can optionally pass a type to this routine and if that type's ABI 27950b57cec5SDimitry Andric /// alignment is itself suitable, this will return zero. 27965ffd83dbSDimitry Andric Align getSliceAlign() { 27975ffd83dbSDimitry Andric return commonAlignment(NewAI.getAlign(), 27985ffd83dbSDimitry Andric NewBeginOffset - NewAllocaBeginOffset); 27990b57cec5SDimitry Andric } 28000b57cec5SDimitry Andric 28010b57cec5SDimitry Andric unsigned getIndex(uint64_t Offset) { 28020b57cec5SDimitry Andric assert(VecTy && "Can only call getIndex when rewriting a vector"); 28030b57cec5SDimitry Andric uint64_t RelOffset = Offset - NewAllocaBeginOffset; 28040b57cec5SDimitry Andric assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds"); 28050b57cec5SDimitry Andric uint32_t Index = RelOffset / ElementSize; 28060b57cec5SDimitry Andric assert(Index * ElementSize == RelOffset); 28070b57cec5SDimitry Andric return Index; 28080b57cec5SDimitry Andric } 28090b57cec5SDimitry Andric 28100b57cec5SDimitry Andric void deleteIfTriviallyDead(Value *V) { 28110b57cec5SDimitry Andric Instruction *I = cast<Instruction>(V); 28120b57cec5SDimitry Andric if (isInstructionTriviallyDead(I)) 2813e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(I); 28140b57cec5SDimitry Andric } 28150b57cec5SDimitry Andric 2816fe6060f1SDimitry Andric Value *rewriteVectorizedLoadInst(LoadInst &LI) { 28170b57cec5SDimitry Andric unsigned BeginIndex = getIndex(NewBeginOffset); 28180b57cec5SDimitry Andric unsigned EndIndex = getIndex(NewEndOffset); 28190b57cec5SDimitry Andric assert(EndIndex > BeginIndex && "Empty vector!"); 28200b57cec5SDimitry Andric 2821fe6060f1SDimitry Andric LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 28225ffd83dbSDimitry Andric NewAI.getAlign(), "load"); 2823fe6060f1SDimitry Andric 2824fe6060f1SDimitry Andric Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, 2825fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 2826fe6060f1SDimitry Andric return extractVector(IRB, Load, BeginIndex, EndIndex, "vec"); 28270b57cec5SDimitry Andric } 28280b57cec5SDimitry Andric 28290b57cec5SDimitry Andric Value *rewriteIntegerLoad(LoadInst &LI) { 28300b57cec5SDimitry Andric assert(IntTy && "We cannot insert an integer to the alloca"); 28310b57cec5SDimitry Andric assert(!LI.isVolatile()); 28320b57cec5SDimitry Andric Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 28335ffd83dbSDimitry Andric NewAI.getAlign(), "load"); 28340b57cec5SDimitry Andric V = convertValue(DL, IRB, V, IntTy); 28350b57cec5SDimitry Andric assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 28360b57cec5SDimitry Andric uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; 28370b57cec5SDimitry Andric if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) { 28380b57cec5SDimitry Andric IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8); 28390b57cec5SDimitry Andric V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract"); 28400b57cec5SDimitry Andric } 28410b57cec5SDimitry Andric // It is possible that the extracted type is not the load type. This 28420b57cec5SDimitry Andric // happens if there is a load past the end of the alloca, and as 28430b57cec5SDimitry Andric // a consequence the slice is narrower but still a candidate for integer 28440b57cec5SDimitry Andric // lowering. To handle this case, we just zero extend the extracted 28450b57cec5SDimitry Andric // integer. 28460b57cec5SDimitry Andric assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 && 28470b57cec5SDimitry Andric "Can only handle an extract for an overly wide load"); 28480b57cec5SDimitry Andric if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8) 28490b57cec5SDimitry Andric V = IRB.CreateZExt(V, LI.getType()); 28500b57cec5SDimitry Andric return V; 28510b57cec5SDimitry Andric } 28520b57cec5SDimitry Andric 28530b57cec5SDimitry Andric bool visitLoadInst(LoadInst &LI) { 28540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); 28550b57cec5SDimitry Andric Value *OldOp = LI.getOperand(0); 28560b57cec5SDimitry Andric assert(OldOp == OldPtr); 28570b57cec5SDimitry Andric 2858349cc55cSDimitry Andric AAMDNodes AATags = LI.getAAMetadata(); 28590b57cec5SDimitry Andric 28600b57cec5SDimitry Andric unsigned AS = LI.getPointerAddressSpace(); 28610b57cec5SDimitry Andric 28620b57cec5SDimitry Andric Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) 28630b57cec5SDimitry Andric : LI.getType(); 28645ffd83dbSDimitry Andric const bool IsLoadPastEnd = 2865bdd1243dSDimitry Andric DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize; 28660b57cec5SDimitry Andric bool IsPtrAdjusted = false; 28670b57cec5SDimitry Andric Value *V; 28680b57cec5SDimitry Andric if (VecTy) { 2869fe6060f1SDimitry Andric V = rewriteVectorizedLoadInst(LI); 28700b57cec5SDimitry Andric } else if (IntTy && LI.getType()->isIntegerTy()) { 28710b57cec5SDimitry Andric V = rewriteIntegerLoad(LI); 28720b57cec5SDimitry Andric } else if (NewBeginOffset == NewAllocaBeginOffset && 28730b57cec5SDimitry Andric NewEndOffset == NewAllocaEndOffset && 28740b57cec5SDimitry Andric (canConvertValue(DL, NewAllocaTy, TargetTy) || 28750b57cec5SDimitry Andric (IsLoadPastEnd && NewAllocaTy->isIntegerTy() && 28765f757f3fSDimitry Andric TargetTy->isIntegerTy() && !LI.isVolatile()))) { 2877bdd1243dSDimitry Andric Value *NewPtr = 2878bdd1243dSDimitry Andric getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile()); 2879bdd1243dSDimitry Andric LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr, 28805ffd83dbSDimitry Andric NewAI.getAlign(), LI.isVolatile(), 28815ffd83dbSDimitry Andric LI.getName()); 28820b57cec5SDimitry Andric if (LI.isVolatile()) 28830b57cec5SDimitry Andric NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); 28848c27c554SDimitry Andric if (NewLI->isAtomic()) 28858c27c554SDimitry Andric NewLI->setAlignment(LI.getAlign()); 28860b57cec5SDimitry Andric 2887bdd1243dSDimitry Andric // Copy any metadata that is valid for the new load. This may require 2888bdd1243dSDimitry Andric // conversion to a different kind of metadata, e.g. !nonnull might change 2889bdd1243dSDimitry Andric // to !range or vice versa. 2890bdd1243dSDimitry Andric copyMetadataForLoad(*NewLI, LI); 2891bdd1243dSDimitry Andric 2892bdd1243dSDimitry Andric // Do this after copyMetadataForLoad() to preserve the TBAA shift. 2893bdd1243dSDimitry Andric if (AATags) 2894*0fca6ea1SDimitry Andric NewLI->setAAMetadata(AATags.adjustForAccess( 2895*0fca6ea1SDimitry Andric NewBeginOffset - BeginOffset, NewLI->getType(), DL)); 28960b57cec5SDimitry Andric 28970b57cec5SDimitry Andric // Try to preserve nonnull metadata 28980b57cec5SDimitry Andric V = NewLI; 28990b57cec5SDimitry Andric 29000b57cec5SDimitry Andric // If this is an integer load past the end of the slice (which means the 29010b57cec5SDimitry Andric // bytes outside the slice are undef or this load is dead) just forcibly 29020b57cec5SDimitry Andric // fix the integer size with correct handling of endianness. 29030b57cec5SDimitry Andric if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy)) 29040b57cec5SDimitry Andric if (auto *TITy = dyn_cast<IntegerType>(TargetTy)) 29050b57cec5SDimitry Andric if (AITy->getBitWidth() < TITy->getBitWidth()) { 29060b57cec5SDimitry Andric V = IRB.CreateZExt(V, TITy, "load.ext"); 29070b57cec5SDimitry Andric if (DL.isBigEndian()) 29080b57cec5SDimitry Andric V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(), 29090b57cec5SDimitry Andric "endian_shift"); 29100b57cec5SDimitry Andric } 29110b57cec5SDimitry Andric } else { 29125f757f3fSDimitry Andric Type *LTy = IRB.getPtrTy(AS); 29135ffd83dbSDimitry Andric LoadInst *NewLI = 29145ffd83dbSDimitry Andric IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), 29155ffd83dbSDimitry Andric getSliceAlign(), LI.isVolatile(), LI.getName()); 2916*0fca6ea1SDimitry Andric 29170b57cec5SDimitry Andric if (AATags) 2918*0fca6ea1SDimitry Andric NewLI->setAAMetadata(AATags.adjustForAccess( 2919*0fca6ea1SDimitry Andric NewBeginOffset - BeginOffset, NewLI->getType(), DL)); 2920*0fca6ea1SDimitry Andric 29210b57cec5SDimitry Andric if (LI.isVolatile()) 29220b57cec5SDimitry Andric NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); 2923fe6060f1SDimitry Andric NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, 2924fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 29250b57cec5SDimitry Andric 29260b57cec5SDimitry Andric V = NewLI; 29270b57cec5SDimitry Andric IsPtrAdjusted = true; 29280b57cec5SDimitry Andric } 29290b57cec5SDimitry Andric V = convertValue(DL, IRB, V, TargetTy); 29300b57cec5SDimitry Andric 29310b57cec5SDimitry Andric if (IsSplit) { 29320b57cec5SDimitry Andric assert(!LI.isVolatile()); 29330b57cec5SDimitry Andric assert(LI.getType()->isIntegerTy() && 29340b57cec5SDimitry Andric "Only integer type loads and stores are split"); 2935bdd1243dSDimitry Andric assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() && 29360b57cec5SDimitry Andric "Split load isn't smaller than original load"); 29370b57cec5SDimitry Andric assert(DL.typeSizeEqualsStoreSize(LI.getType()) && 29380b57cec5SDimitry Andric "Non-byte-multiple bit width"); 29390b57cec5SDimitry Andric // Move the insertion point just past the load so that we can refer to it. 2940*0fca6ea1SDimitry Andric BasicBlock::iterator LIIt = std::next(LI.getIterator()); 2941*0fca6ea1SDimitry Andric // Ensure the insertion point comes before any debug-info immediately 2942*0fca6ea1SDimitry Andric // after the load, so that variable values referring to the load are 2943*0fca6ea1SDimitry Andric // dominated by it. 2944*0fca6ea1SDimitry Andric LIIt.setHeadBit(true); 2945*0fca6ea1SDimitry Andric IRB.SetInsertPoint(LI.getParent(), LIIt); 29460b57cec5SDimitry Andric // Create a placeholder value with the same type as LI to use as the 29470b57cec5SDimitry Andric // basis for the new value. This allows us to replace the uses of LI with 29480b57cec5SDimitry Andric // the computed value, and then replace the placeholder with LI, leaving 29490b57cec5SDimitry Andric // LI only used for this computation. 29505f757f3fSDimitry Andric Value *Placeholder = 29515f757f3fSDimitry Andric new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "", 29525ffd83dbSDimitry Andric false, Align(1)); 29530b57cec5SDimitry Andric V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset, 29540b57cec5SDimitry Andric "insert"); 29550b57cec5SDimitry Andric LI.replaceAllUsesWith(V); 29560b57cec5SDimitry Andric Placeholder->replaceAllUsesWith(&LI); 29570b57cec5SDimitry Andric Placeholder->deleteValue(); 29580b57cec5SDimitry Andric } else { 29590b57cec5SDimitry Andric LI.replaceAllUsesWith(V); 29600b57cec5SDimitry Andric } 29610b57cec5SDimitry Andric 2962e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(&LI); 29630b57cec5SDimitry Andric deleteIfTriviallyDead(OldOp); 29640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *V << "\n"); 29650b57cec5SDimitry Andric return !LI.isVolatile() && !IsPtrAdjusted; 29660b57cec5SDimitry Andric } 29670b57cec5SDimitry Andric 29680b57cec5SDimitry Andric bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp, 29690b57cec5SDimitry Andric AAMDNodes AATags) { 2970bdd1243dSDimitry Andric // Capture V for the purpose of debug-info accounting once it's converted 2971bdd1243dSDimitry Andric // to a vector store. 2972bdd1243dSDimitry Andric Value *OrigV = V; 29730b57cec5SDimitry Andric if (V->getType() != VecTy) { 29740b57cec5SDimitry Andric unsigned BeginIndex = getIndex(NewBeginOffset); 29750b57cec5SDimitry Andric unsigned EndIndex = getIndex(NewEndOffset); 29760b57cec5SDimitry Andric assert(EndIndex > BeginIndex && "Empty vector!"); 29770b57cec5SDimitry Andric unsigned NumElements = EndIndex - BeginIndex; 29785ffd83dbSDimitry Andric assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() && 29795ffd83dbSDimitry Andric "Too many elements!"); 29800b57cec5SDimitry Andric Type *SliceTy = (NumElements == 1) 29810b57cec5SDimitry Andric ? ElementTy 29825ffd83dbSDimitry Andric : FixedVectorType::get(ElementTy, NumElements); 29830b57cec5SDimitry Andric if (V->getType() != SliceTy) 29840b57cec5SDimitry Andric V = convertValue(DL, IRB, V, SliceTy); 29850b57cec5SDimitry Andric 29860b57cec5SDimitry Andric // Mix in the existing elements. 29870b57cec5SDimitry Andric Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 29885ffd83dbSDimitry Andric NewAI.getAlign(), "load"); 29890b57cec5SDimitry Andric V = insertVector(IRB, Old, V, BeginIndex, "vec"); 29900b57cec5SDimitry Andric } 29915ffd83dbSDimitry Andric StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); 2992fe6060f1SDimitry Andric Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, 2993fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 29940b57cec5SDimitry Andric if (AATags) 2995*0fca6ea1SDimitry Andric Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, 2996*0fca6ea1SDimitry Andric V->getType(), DL)); 2997e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(&SI); 29980b57cec5SDimitry Andric 2999bdd1243dSDimitry Andric // NOTE: Careful to use OrigV rather than V. 300006c3fb27SDimitry Andric migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, 300106c3fb27SDimitry Andric Store, Store->getPointerOperand(), OrigV, DL); 30020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); 30030b57cec5SDimitry Andric return true; 30040b57cec5SDimitry Andric } 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andric bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) { 30070b57cec5SDimitry Andric assert(IntTy && "We cannot extract an integer from the alloca"); 30080b57cec5SDimitry Andric assert(!SI.isVolatile()); 3009bdd1243dSDimitry Andric if (DL.getTypeSizeInBits(V->getType()).getFixedValue() != 30105ffd83dbSDimitry Andric IntTy->getBitWidth()) { 30110b57cec5SDimitry Andric Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 30125ffd83dbSDimitry Andric NewAI.getAlign(), "oldload"); 30130b57cec5SDimitry Andric Old = convertValue(DL, IRB, Old, IntTy); 30140b57cec5SDimitry Andric assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); 30150b57cec5SDimitry Andric uint64_t Offset = BeginOffset - NewAllocaBeginOffset; 30160b57cec5SDimitry Andric V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert"); 30170b57cec5SDimitry Andric } 30180b57cec5SDimitry Andric V = convertValue(DL, IRB, V, NewAllocaTy); 30195ffd83dbSDimitry Andric StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); 30200b57cec5SDimitry Andric Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, 30210b57cec5SDimitry Andric LLVMContext::MD_access_group}); 30220b57cec5SDimitry Andric if (AATags) 3023*0fca6ea1SDimitry Andric Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, 3024*0fca6ea1SDimitry Andric V->getType(), DL)); 3025bdd1243dSDimitry Andric 302606c3fb27SDimitry Andric migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, 302706c3fb27SDimitry Andric Store, Store->getPointerOperand(), 302806c3fb27SDimitry Andric Store->getValueOperand(), DL); 3029bdd1243dSDimitry Andric 3030e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(&SI); 30310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); 30320b57cec5SDimitry Andric return true; 30330b57cec5SDimitry Andric } 30340b57cec5SDimitry Andric 30350b57cec5SDimitry Andric bool visitStoreInst(StoreInst &SI) { 30360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); 30370b57cec5SDimitry Andric Value *OldOp = SI.getOperand(1); 30380b57cec5SDimitry Andric assert(OldOp == OldPtr); 30390b57cec5SDimitry Andric 3040349cc55cSDimitry Andric AAMDNodes AATags = SI.getAAMetadata(); 30410b57cec5SDimitry Andric Value *V = SI.getValueOperand(); 30420b57cec5SDimitry Andric 30430b57cec5SDimitry Andric // Strip all inbounds GEPs and pointer casts to try to dig out any root 30440b57cec5SDimitry Andric // alloca that should be re-examined after promoting this alloca. 30450b57cec5SDimitry Andric if (V->getType()->isPointerTy()) 30460b57cec5SDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) 30470b57cec5SDimitry Andric Pass.PostPromotionWorklist.insert(AI); 30480b57cec5SDimitry Andric 3049bdd1243dSDimitry Andric if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedValue()) { 30500b57cec5SDimitry Andric assert(!SI.isVolatile()); 30510b57cec5SDimitry Andric assert(V->getType()->isIntegerTy() && 30520b57cec5SDimitry Andric "Only integer type loads and stores are split"); 30530b57cec5SDimitry Andric assert(DL.typeSizeEqualsStoreSize(V->getType()) && 30540b57cec5SDimitry Andric "Non-byte-multiple bit width"); 30550b57cec5SDimitry Andric IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8); 30560b57cec5SDimitry Andric V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset, 30570b57cec5SDimitry Andric "extract"); 30580b57cec5SDimitry Andric } 30590b57cec5SDimitry Andric 30600b57cec5SDimitry Andric if (VecTy) 30610b57cec5SDimitry Andric return rewriteVectorizedStoreInst(V, SI, OldOp, AATags); 30620b57cec5SDimitry Andric if (IntTy && V->getType()->isIntegerTy()) 30630b57cec5SDimitry Andric return rewriteIntegerStore(V, SI, AATags); 30640b57cec5SDimitry Andric 30650b57cec5SDimitry Andric StoreInst *NewSI; 30660b57cec5SDimitry Andric if (NewBeginOffset == NewAllocaBeginOffset && 30670b57cec5SDimitry Andric NewEndOffset == NewAllocaEndOffset && 30685f757f3fSDimitry Andric canConvertValue(DL, V->getType(), NewAllocaTy)) { 30690b57cec5SDimitry Andric V = convertValue(DL, IRB, V, NewAllocaTy); 3070bdd1243dSDimitry Andric Value *NewPtr = 3071bdd1243dSDimitry Andric getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile()); 3072bdd1243dSDimitry Andric 30735ffd83dbSDimitry Andric NewSI = 3074bdd1243dSDimitry Andric IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile()); 30750b57cec5SDimitry Andric } else { 30760b57cec5SDimitry Andric unsigned AS = SI.getPointerAddressSpace(); 30775f757f3fSDimitry Andric Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS)); 30785ffd83dbSDimitry Andric NewSI = 30795ffd83dbSDimitry Andric IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile()); 30800b57cec5SDimitry Andric } 30810b57cec5SDimitry Andric NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, 30820b57cec5SDimitry Andric LLVMContext::MD_access_group}); 30830b57cec5SDimitry Andric if (AATags) 3084*0fca6ea1SDimitry Andric NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, 3085*0fca6ea1SDimitry Andric V->getType(), DL)); 30860b57cec5SDimitry Andric if (SI.isVolatile()) 30870b57cec5SDimitry Andric NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); 30888c27c554SDimitry Andric if (NewSI->isAtomic()) 30898c27c554SDimitry Andric NewSI->setAlignment(SI.getAlign()); 3090bdd1243dSDimitry Andric 309106c3fb27SDimitry Andric migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, 309206c3fb27SDimitry Andric NewSI, NewSI->getPointerOperand(), 309306c3fb27SDimitry Andric NewSI->getValueOperand(), DL); 3094bdd1243dSDimitry Andric 3095e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(&SI); 30960b57cec5SDimitry Andric deleteIfTriviallyDead(OldOp); 30970b57cec5SDimitry Andric 30980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n"); 3099349cc55cSDimitry Andric return NewSI->getPointerOperand() == &NewAI && 3100349cc55cSDimitry Andric NewSI->getValueOperand()->getType() == NewAllocaTy && 3101349cc55cSDimitry Andric !SI.isVolatile(); 31020b57cec5SDimitry Andric } 31030b57cec5SDimitry Andric 31040b57cec5SDimitry Andric /// Compute an integer value from splatting an i8 across the given 31050b57cec5SDimitry Andric /// number of bytes. 31060b57cec5SDimitry Andric /// 31070b57cec5SDimitry Andric /// Note that this routine assumes an i8 is a byte. If that isn't true, don't 31080b57cec5SDimitry Andric /// call this routine. 31090b57cec5SDimitry Andric /// FIXME: Heed the advice above. 31100b57cec5SDimitry Andric /// 31110b57cec5SDimitry Andric /// \param V The i8 value to splat. 31120b57cec5SDimitry Andric /// \param Size The number of bytes in the output (assuming i8 is one byte) 31130b57cec5SDimitry Andric Value *getIntegerSplat(Value *V, unsigned Size) { 31140b57cec5SDimitry Andric assert(Size > 0 && "Expected a positive number of bytes."); 31150b57cec5SDimitry Andric IntegerType *VTy = cast<IntegerType>(V->getType()); 31160b57cec5SDimitry Andric assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); 31170b57cec5SDimitry Andric if (Size == 1) 31180b57cec5SDimitry Andric return V; 31190b57cec5SDimitry Andric 31200b57cec5SDimitry Andric Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8); 31210b57cec5SDimitry Andric V = IRB.CreateMul( 31220b57cec5SDimitry Andric IRB.CreateZExt(V, SplatIntTy, "zext"), 312381ad6265SDimitry Andric IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy), 312481ad6265SDimitry Andric IRB.CreateZExt(Constant::getAllOnesValue(V->getType()), 31250b57cec5SDimitry Andric SplatIntTy)), 31260b57cec5SDimitry Andric "isplat"); 31270b57cec5SDimitry Andric return V; 31280b57cec5SDimitry Andric } 31290b57cec5SDimitry Andric 31300b57cec5SDimitry Andric /// Compute a vector splat for a given element value. 31310b57cec5SDimitry Andric Value *getVectorSplat(Value *V, unsigned NumElements) { 31320b57cec5SDimitry Andric V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); 31330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " splat: " << *V << "\n"); 31340b57cec5SDimitry Andric return V; 31350b57cec5SDimitry Andric } 31360b57cec5SDimitry Andric 31370b57cec5SDimitry Andric bool visitMemSetInst(MemSetInst &II) { 31380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << II << "\n"); 31390b57cec5SDimitry Andric assert(II.getRawDest() == OldPtr); 31400b57cec5SDimitry Andric 3141349cc55cSDimitry Andric AAMDNodes AATags = II.getAAMetadata(); 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric // If the memset has a variable size, it cannot be split, just adjust the 31440b57cec5SDimitry Andric // pointer to the new alloca. 3145fe6060f1SDimitry Andric if (!isa<ConstantInt>(II.getLength())) { 31460b57cec5SDimitry Andric assert(!IsSplit); 31470b57cec5SDimitry Andric assert(NewBeginOffset == BeginOffset); 31480b57cec5SDimitry Andric II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); 31490b57cec5SDimitry Andric II.setDestAlignment(getSliceAlign()); 3150bdd1243dSDimitry Andric // In theory we should call migrateDebugInfo here. However, we do not 3151bdd1243dSDimitry Andric // emit dbg.assign intrinsics for mem intrinsics storing through non- 3152bdd1243dSDimitry Andric // constant geps, or storing a variable number of bytes. 3153bdd1243dSDimitry Andric assert(at::getAssignmentMarkers(&II).empty() && 3154*0fca6ea1SDimitry Andric at::getDVRAssignmentMarkers(&II).empty() && 3155bdd1243dSDimitry Andric "AT: Unexpected link to non-const GEP"); 31560b57cec5SDimitry Andric deleteIfTriviallyDead(OldPtr); 31570b57cec5SDimitry Andric return false; 31580b57cec5SDimitry Andric } 31590b57cec5SDimitry Andric 31600b57cec5SDimitry Andric // Record this instruction for deletion. 3161e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(&II); 31620b57cec5SDimitry Andric 31630b57cec5SDimitry Andric Type *AllocaTy = NewAI.getAllocatedType(); 31640b57cec5SDimitry Andric Type *ScalarTy = AllocaTy->getScalarType(); 31650b57cec5SDimitry Andric 31660b57cec5SDimitry Andric const bool CanContinue = [&]() { 31670b57cec5SDimitry Andric if (VecTy || IntTy) 31680b57cec5SDimitry Andric return true; 3169*0fca6ea1SDimitry Andric if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) 31700b57cec5SDimitry Andric return false; 31716e75b2fbSDimitry Andric // Length must be in range for FixedVectorType. 31720b57cec5SDimitry Andric auto *C = cast<ConstantInt>(II.getLength()); 31736e75b2fbSDimitry Andric const uint64_t Len = C->getLimitedValue(); 31746e75b2fbSDimitry Andric if (Len > std::numeric_limits<unsigned>::max()) 31750b57cec5SDimitry Andric return false; 31760b57cec5SDimitry Andric auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext()); 31775ffd83dbSDimitry Andric auto *SrcTy = FixedVectorType::get(Int8Ty, Len); 31780b57cec5SDimitry Andric return canConvertValue(DL, SrcTy, AllocaTy) && 3179bdd1243dSDimitry Andric DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue()); 31800b57cec5SDimitry Andric }(); 31810b57cec5SDimitry Andric 31820b57cec5SDimitry Andric // If this doesn't map cleanly onto the alloca type, and that type isn't 31830b57cec5SDimitry Andric // a single value type, just emit a memset. 31840b57cec5SDimitry Andric if (!CanContinue) { 31850b57cec5SDimitry Andric Type *SizeTy = II.getLength()->getType(); 3186*0fca6ea1SDimitry Andric unsigned Sz = NewEndOffset - NewBeginOffset; 3187*0fca6ea1SDimitry Andric Constant *Size = ConstantInt::get(SizeTy, Sz); 3188bdd1243dSDimitry Andric MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet( 31890b57cec5SDimitry Andric getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, 3190bdd1243dSDimitry Andric MaybeAlign(getSliceAlign()), II.isVolatile())); 31910b57cec5SDimitry Andric if (AATags) 3192*0fca6ea1SDimitry Andric New->setAAMetadata( 3193*0fca6ea1SDimitry Andric AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz)); 3194bdd1243dSDimitry Andric 319506c3fb27SDimitry Andric migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, 319606c3fb27SDimitry Andric New, New->getRawDest(), nullptr, DL); 3197bdd1243dSDimitry Andric 31980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); 31990b57cec5SDimitry Andric return false; 32000b57cec5SDimitry Andric } 32010b57cec5SDimitry Andric 32020b57cec5SDimitry Andric // If we can represent this as a simple value, we have to build the actual 32030b57cec5SDimitry Andric // value to store, which requires expanding the byte present in memset to 32040b57cec5SDimitry Andric // a sensible representation for the alloca type. This is essentially 32050b57cec5SDimitry Andric // splatting the byte to a sufficiently wide integer, splatting it across 32060b57cec5SDimitry Andric // any desired vector width, and bitcasting to the final type. 32070b57cec5SDimitry Andric Value *V; 32080b57cec5SDimitry Andric 32090b57cec5SDimitry Andric if (VecTy) { 32100b57cec5SDimitry Andric // If this is a memset of a vectorized alloca, insert it. 32110b57cec5SDimitry Andric assert(ElementTy == ScalarTy); 32120b57cec5SDimitry Andric 32130b57cec5SDimitry Andric unsigned BeginIndex = getIndex(NewBeginOffset); 32140b57cec5SDimitry Andric unsigned EndIndex = getIndex(NewEndOffset); 32150b57cec5SDimitry Andric assert(EndIndex > BeginIndex && "Empty vector!"); 32160b57cec5SDimitry Andric unsigned NumElements = EndIndex - BeginIndex; 32175ffd83dbSDimitry Andric assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() && 32185ffd83dbSDimitry Andric "Too many elements!"); 32190b57cec5SDimitry Andric 32205ffd83dbSDimitry Andric Value *Splat = getIntegerSplat( 3221bdd1243dSDimitry Andric II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8); 32220b57cec5SDimitry Andric Splat = convertValue(DL, IRB, Splat, ElementTy); 32230b57cec5SDimitry Andric if (NumElements > 1) 32240b57cec5SDimitry Andric Splat = getVectorSplat(Splat, NumElements); 32250b57cec5SDimitry Andric 32260b57cec5SDimitry Andric Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 32275ffd83dbSDimitry Andric NewAI.getAlign(), "oldload"); 32280b57cec5SDimitry Andric V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); 32290b57cec5SDimitry Andric } else if (IntTy) { 32300b57cec5SDimitry Andric // If this is a memset on an alloca where we can widen stores, insert the 32310b57cec5SDimitry Andric // set integer. 32320b57cec5SDimitry Andric assert(!II.isVolatile()); 32330b57cec5SDimitry Andric 32340b57cec5SDimitry Andric uint64_t Size = NewEndOffset - NewBeginOffset; 32350b57cec5SDimitry Andric V = getIntegerSplat(II.getValue(), Size); 32360b57cec5SDimitry Andric 32370b57cec5SDimitry Andric if (IntTy && (BeginOffset != NewAllocaBeginOffset || 32380b57cec5SDimitry Andric EndOffset != NewAllocaBeginOffset)) { 32390b57cec5SDimitry Andric Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 32405ffd83dbSDimitry Andric NewAI.getAlign(), "oldload"); 32410b57cec5SDimitry Andric Old = convertValue(DL, IRB, Old, IntTy); 32420b57cec5SDimitry Andric uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; 32430b57cec5SDimitry Andric V = insertInteger(DL, IRB, Old, V, Offset, "insert"); 32440b57cec5SDimitry Andric } else { 32450b57cec5SDimitry Andric assert(V->getType() == IntTy && 32460b57cec5SDimitry Andric "Wrong type for an alloca wide integer!"); 32470b57cec5SDimitry Andric } 32480b57cec5SDimitry Andric V = convertValue(DL, IRB, V, AllocaTy); 32490b57cec5SDimitry Andric } else { 32500b57cec5SDimitry Andric // Established these invariants above. 32510b57cec5SDimitry Andric assert(NewBeginOffset == NewAllocaBeginOffset); 32520b57cec5SDimitry Andric assert(NewEndOffset == NewAllocaEndOffset); 32530b57cec5SDimitry Andric 32545ffd83dbSDimitry Andric V = getIntegerSplat(II.getValue(), 3255bdd1243dSDimitry Andric DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8); 32560b57cec5SDimitry Andric if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) 32575ffd83dbSDimitry Andric V = getVectorSplat( 32585ffd83dbSDimitry Andric V, cast<FixedVectorType>(AllocaVecTy)->getNumElements()); 32590b57cec5SDimitry Andric 32600b57cec5SDimitry Andric V = convertValue(DL, IRB, V, AllocaTy); 32610b57cec5SDimitry Andric } 32620b57cec5SDimitry Andric 3263bdd1243dSDimitry Andric Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); 32645ffd83dbSDimitry Andric StoreInst *New = 3265bdd1243dSDimitry Andric IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); 3266fe6060f1SDimitry Andric New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, 3267fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 32680b57cec5SDimitry Andric if (AATags) 3269*0fca6ea1SDimitry Andric New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, 3270*0fca6ea1SDimitry Andric V->getType(), DL)); 3271bdd1243dSDimitry Andric 327206c3fb27SDimitry Andric migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, 327306c3fb27SDimitry Andric New, New->getPointerOperand(), V, DL); 3274bdd1243dSDimitry Andric 32750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); 32760b57cec5SDimitry Andric return !II.isVolatile(); 32770b57cec5SDimitry Andric } 32780b57cec5SDimitry Andric 32790b57cec5SDimitry Andric bool visitMemTransferInst(MemTransferInst &II) { 32800b57cec5SDimitry Andric // Rewriting of memory transfer instructions can be a bit tricky. We break 32810b57cec5SDimitry Andric // them into two categories: split intrinsics and unsplit intrinsics. 32820b57cec5SDimitry Andric 32830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << II << "\n"); 32840b57cec5SDimitry Andric 3285349cc55cSDimitry Andric AAMDNodes AATags = II.getAAMetadata(); 32860b57cec5SDimitry Andric 32870b57cec5SDimitry Andric bool IsDest = &II.getRawDestUse() == OldUse; 32880b57cec5SDimitry Andric assert((IsDest && II.getRawDest() == OldPtr) || 32890b57cec5SDimitry Andric (!IsDest && II.getRawSource() == OldPtr)); 32900b57cec5SDimitry Andric 329181ad6265SDimitry Andric Align SliceAlign = getSliceAlign(); 32920b57cec5SDimitry Andric // For unsplit intrinsics, we simply modify the source and destination 32930b57cec5SDimitry Andric // pointers in place. This isn't just an optimization, it is a matter of 32940b57cec5SDimitry Andric // correctness. With unsplit intrinsics we may be dealing with transfers 32950b57cec5SDimitry Andric // within a single alloca before SROA ran, or with transfers that have 32960b57cec5SDimitry Andric // a variable length. We may also be dealing with memmove instead of 32970b57cec5SDimitry Andric // memcpy, and so simply updating the pointers is the necessary for us to 32980b57cec5SDimitry Andric // update both source and dest of a single call. 32990b57cec5SDimitry Andric if (!IsSplittable) { 33000b57cec5SDimitry Andric Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); 33010b57cec5SDimitry Andric if (IsDest) { 3302bdd1243dSDimitry Andric // Update the address component of linked dbg.assigns. 33037a6dacacSDimitry Andric auto UpdateAssignAddress = [&](auto *DbgAssign) { 33047a6dacacSDimitry Andric if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) || 33057a6dacacSDimitry Andric DbgAssign->getAddress() == II.getDest()) 33067a6dacacSDimitry Andric DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr); 33077a6dacacSDimitry Andric }; 33087a6dacacSDimitry Andric for_each(at::getAssignmentMarkers(&II), UpdateAssignAddress); 3309*0fca6ea1SDimitry Andric for_each(at::getDVRAssignmentMarkers(&II), UpdateAssignAddress); 33100b57cec5SDimitry Andric II.setDest(AdjustedPtr); 33110b57cec5SDimitry Andric II.setDestAlignment(SliceAlign); 3312bdd1243dSDimitry Andric } else { 33130b57cec5SDimitry Andric II.setSource(AdjustedPtr); 33140b57cec5SDimitry Andric II.setSourceAlignment(SliceAlign); 33150b57cec5SDimitry Andric } 33160b57cec5SDimitry Andric 33170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << II << "\n"); 33180b57cec5SDimitry Andric deleteIfTriviallyDead(OldPtr); 33190b57cec5SDimitry Andric return false; 33200b57cec5SDimitry Andric } 33210b57cec5SDimitry Andric // For split transfer intrinsics we have an incredibly useful assurance: 33220b57cec5SDimitry Andric // the source and destination do not reside within the same alloca, and at 33230b57cec5SDimitry Andric // least one of them does not escape. This means that we can replace 33240b57cec5SDimitry Andric // memmove with memcpy, and we don't need to worry about all manner of 33250b57cec5SDimitry Andric // downsides to splitting and transforming the operations. 33260b57cec5SDimitry Andric 33270b57cec5SDimitry Andric // If this doesn't map cleanly onto the alloca type, and that type isn't 33280b57cec5SDimitry Andric // a single value type, just emit a memcpy. 33290b57cec5SDimitry Andric bool EmitMemCpy = 33300b57cec5SDimitry Andric !VecTy && !IntTy && 33310b57cec5SDimitry Andric (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset || 33325ffd83dbSDimitry Andric SliceSize != 3333bdd1243dSDimitry Andric DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedValue() || 3334cb14a3feSDimitry Andric !DL.typeSizeEqualsStoreSize(NewAI.getAllocatedType()) || 33350b57cec5SDimitry Andric !NewAI.getAllocatedType()->isSingleValueType()); 33360b57cec5SDimitry Andric 33370b57cec5SDimitry Andric // If we're just going to emit a memcpy, the alloca hasn't changed, and the 33380b57cec5SDimitry Andric // size hasn't been shrunk based on analysis of the viable range, this is 33390b57cec5SDimitry Andric // a no-op. 33400b57cec5SDimitry Andric if (EmitMemCpy && &OldAI == &NewAI) { 33410b57cec5SDimitry Andric // Ensure the start lines up. 33420b57cec5SDimitry Andric assert(NewBeginOffset == BeginOffset); 33430b57cec5SDimitry Andric 33440b57cec5SDimitry Andric // Rewrite the size as needed. 33450b57cec5SDimitry Andric if (NewEndOffset != EndOffset) 33460b57cec5SDimitry Andric II.setLength(ConstantInt::get(II.getLength()->getType(), 33470b57cec5SDimitry Andric NewEndOffset - NewBeginOffset)); 33480b57cec5SDimitry Andric return false; 33490b57cec5SDimitry Andric } 33500b57cec5SDimitry Andric // Record this instruction for deletion. 3351e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(&II); 33520b57cec5SDimitry Andric 33530b57cec5SDimitry Andric // Strip all inbounds GEPs and pointer casts to try to dig out any root 33540b57cec5SDimitry Andric // alloca that should be re-examined after rewriting this instruction. 33550b57cec5SDimitry Andric Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); 33560b57cec5SDimitry Andric if (AllocaInst *AI = 33570b57cec5SDimitry Andric dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) { 33580b57cec5SDimitry Andric assert(AI != &OldAI && AI != &NewAI && 33590b57cec5SDimitry Andric "Splittable transfers cannot reach the same alloca on both ends."); 33600b57cec5SDimitry Andric Pass.Worklist.insert(AI); 33610b57cec5SDimitry Andric } 33620b57cec5SDimitry Andric 33630b57cec5SDimitry Andric Type *OtherPtrTy = OtherPtr->getType(); 33640b57cec5SDimitry Andric unsigned OtherAS = OtherPtrTy->getPointerAddressSpace(); 33650b57cec5SDimitry Andric 33660b57cec5SDimitry Andric // Compute the relative offset for the other pointer within the transfer. 33670b57cec5SDimitry Andric unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS); 33680b57cec5SDimitry Andric APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset); 3369480093f4SDimitry Andric Align OtherAlign = 33705ffd83dbSDimitry Andric (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne(); 3371480093f4SDimitry Andric OtherAlign = 3372480093f4SDimitry Andric commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue()); 33730b57cec5SDimitry Andric 33740b57cec5SDimitry Andric if (EmitMemCpy) { 33750b57cec5SDimitry Andric // Compute the other pointer, folding as much as possible to produce 33760b57cec5SDimitry Andric // a single, simple GEP in most cases. 33770b57cec5SDimitry Andric OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, 33780b57cec5SDimitry Andric OtherPtr->getName() + "."); 33790b57cec5SDimitry Andric 33800b57cec5SDimitry Andric Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); 33810b57cec5SDimitry Andric Type *SizeTy = II.getLength()->getType(); 33820b57cec5SDimitry Andric Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); 33830b57cec5SDimitry Andric 33840b57cec5SDimitry Andric Value *DestPtr, *SrcPtr; 3385480093f4SDimitry Andric MaybeAlign DestAlign, SrcAlign; 33860b57cec5SDimitry Andric // Note: IsDest is true iff we're copying into the new alloca slice 33870b57cec5SDimitry Andric if (IsDest) { 33880b57cec5SDimitry Andric DestPtr = OurPtr; 33890b57cec5SDimitry Andric DestAlign = SliceAlign; 33900b57cec5SDimitry Andric SrcPtr = OtherPtr; 33910b57cec5SDimitry Andric SrcAlign = OtherAlign; 33920b57cec5SDimitry Andric } else { 33930b57cec5SDimitry Andric DestPtr = OtherPtr; 33940b57cec5SDimitry Andric DestAlign = OtherAlign; 33950b57cec5SDimitry Andric SrcPtr = OurPtr; 33960b57cec5SDimitry Andric SrcAlign = SliceAlign; 33970b57cec5SDimitry Andric } 33980b57cec5SDimitry Andric CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, 33990b57cec5SDimitry Andric Size, II.isVolatile()); 34000b57cec5SDimitry Andric if (AATags) 3401d409305fSDimitry Andric New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); 3402bdd1243dSDimitry Andric 340306c3fb27SDimitry Andric APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0); 340406c3fb27SDimitry Andric if (IsDest) { 340506c3fb27SDimitry Andric migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, 340606c3fb27SDimitry Andric &II, New, DestPtr, nullptr, DL); 340706c3fb27SDimitry Andric } else if (AllocaInst *Base = dyn_cast<AllocaInst>( 340806c3fb27SDimitry Andric DestPtr->stripAndAccumulateConstantOffsets( 340906c3fb27SDimitry Andric DL, Offset, /*AllowNonInbounds*/ true))) { 341006c3fb27SDimitry Andric migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, 341106c3fb27SDimitry Andric SliceSize * 8, &II, New, DestPtr, nullptr, DL); 341206c3fb27SDimitry Andric } 34130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); 34140b57cec5SDimitry Andric return false; 34150b57cec5SDimitry Andric } 34160b57cec5SDimitry Andric 34170b57cec5SDimitry Andric bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset && 34180b57cec5SDimitry Andric NewEndOffset == NewAllocaEndOffset; 34190b57cec5SDimitry Andric uint64_t Size = NewEndOffset - NewBeginOffset; 34200b57cec5SDimitry Andric unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0; 34210b57cec5SDimitry Andric unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0; 34220b57cec5SDimitry Andric unsigned NumElements = EndIndex - BeginIndex; 34230b57cec5SDimitry Andric IntegerType *SubIntTy = 34240b57cec5SDimitry Andric IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr; 34250b57cec5SDimitry Andric 34260b57cec5SDimitry Andric // Reset the other pointer type to match the register type we're going to 34270b57cec5SDimitry Andric // use, but using the address space of the original other pointer. 34280b57cec5SDimitry Andric Type *OtherTy; 34290b57cec5SDimitry Andric if (VecTy && !IsWholeAlloca) { 34300b57cec5SDimitry Andric if (NumElements == 1) 34310b57cec5SDimitry Andric OtherTy = VecTy->getElementType(); 34320b57cec5SDimitry Andric else 34335ffd83dbSDimitry Andric OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements); 34340b57cec5SDimitry Andric } else if (IntTy && !IsWholeAlloca) { 34350b57cec5SDimitry Andric OtherTy = SubIntTy; 34360b57cec5SDimitry Andric } else { 34370b57cec5SDimitry Andric OtherTy = NewAllocaTy; 34380b57cec5SDimitry Andric } 34390b57cec5SDimitry Andric 3440bdd1243dSDimitry Andric Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, 34410b57cec5SDimitry Andric OtherPtr->getName() + "."); 3442480093f4SDimitry Andric MaybeAlign SrcAlign = OtherAlign; 3443480093f4SDimitry Andric MaybeAlign DstAlign = SliceAlign; 3444bdd1243dSDimitry Andric if (!IsDest) 34450b57cec5SDimitry Andric std::swap(SrcAlign, DstAlign); 3446bdd1243dSDimitry Andric 3447bdd1243dSDimitry Andric Value *SrcPtr; 3448bdd1243dSDimitry Andric Value *DstPtr; 3449bdd1243dSDimitry Andric 3450bdd1243dSDimitry Andric if (IsDest) { 3451bdd1243dSDimitry Andric DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); 3452bdd1243dSDimitry Andric SrcPtr = AdjPtr; 3453bdd1243dSDimitry Andric } else { 3454bdd1243dSDimitry Andric DstPtr = AdjPtr; 3455bdd1243dSDimitry Andric SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile()); 34560b57cec5SDimitry Andric } 34570b57cec5SDimitry Andric 34580b57cec5SDimitry Andric Value *Src; 34590b57cec5SDimitry Andric if (VecTy && !IsWholeAlloca && !IsDest) { 34600b57cec5SDimitry Andric Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 34615ffd83dbSDimitry Andric NewAI.getAlign(), "load"); 34620b57cec5SDimitry Andric Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); 34630b57cec5SDimitry Andric } else if (IntTy && !IsWholeAlloca && !IsDest) { 34640b57cec5SDimitry Andric Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 34655ffd83dbSDimitry Andric NewAI.getAlign(), "load"); 34660b57cec5SDimitry Andric Src = convertValue(DL, IRB, Src, IntTy); 34670b57cec5SDimitry Andric uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; 34680b57cec5SDimitry Andric Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract"); 34690b57cec5SDimitry Andric } else { 34700b57cec5SDimitry Andric LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, 34710b57cec5SDimitry Andric II.isVolatile(), "copyload"); 3472fe6060f1SDimitry Andric Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, 3473fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 34740b57cec5SDimitry Andric if (AATags) 3475*0fca6ea1SDimitry Andric Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, 3476*0fca6ea1SDimitry Andric Load->getType(), DL)); 34770b57cec5SDimitry Andric Src = Load; 34780b57cec5SDimitry Andric } 34790b57cec5SDimitry Andric 34800b57cec5SDimitry Andric if (VecTy && !IsWholeAlloca && IsDest) { 34810b57cec5SDimitry Andric Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 34825ffd83dbSDimitry Andric NewAI.getAlign(), "oldload"); 34830b57cec5SDimitry Andric Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); 34840b57cec5SDimitry Andric } else if (IntTy && !IsWholeAlloca && IsDest) { 34850b57cec5SDimitry Andric Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, 34865ffd83dbSDimitry Andric NewAI.getAlign(), "oldload"); 34870b57cec5SDimitry Andric Old = convertValue(DL, IRB, Old, IntTy); 34880b57cec5SDimitry Andric uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; 34890b57cec5SDimitry Andric Src = insertInteger(DL, IRB, Old, Src, Offset, "insert"); 34900b57cec5SDimitry Andric Src = convertValue(DL, IRB, Src, NewAllocaTy); 34910b57cec5SDimitry Andric } 34920b57cec5SDimitry Andric 34930b57cec5SDimitry Andric StoreInst *Store = cast<StoreInst>( 34940b57cec5SDimitry Andric IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); 3495fe6060f1SDimitry Andric Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, 3496fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 34970b57cec5SDimitry Andric if (AATags) 3498*0fca6ea1SDimitry Andric Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, 3499*0fca6ea1SDimitry Andric Src->getType(), DL)); 3500bdd1243dSDimitry Andric 350106c3fb27SDimitry Andric APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0); 350206c3fb27SDimitry Andric if (IsDest) { 350306c3fb27SDimitry Andric 350406c3fb27SDimitry Andric migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, 350506c3fb27SDimitry Andric Store, DstPtr, Src, DL); 350606c3fb27SDimitry Andric } else if (AllocaInst *Base = dyn_cast<AllocaInst>( 350706c3fb27SDimitry Andric DstPtr->stripAndAccumulateConstantOffsets( 350806c3fb27SDimitry Andric DL, Offset, /*AllowNonInbounds*/ true))) { 350906c3fb27SDimitry Andric migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8, 351006c3fb27SDimitry Andric &II, Store, DstPtr, Src, DL); 351106c3fb27SDimitry Andric } 351206c3fb27SDimitry Andric 35130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); 35140b57cec5SDimitry Andric return !II.isVolatile(); 35150b57cec5SDimitry Andric } 35160b57cec5SDimitry Andric 35170b57cec5SDimitry Andric bool visitIntrinsicInst(IntrinsicInst &II) { 35185f757f3fSDimitry Andric assert((II.isLifetimeStartOrEnd() || II.isLaunderOrStripInvariantGroup() || 35195f757f3fSDimitry Andric II.isDroppable()) && 3520e8d8bef9SDimitry Andric "Unexpected intrinsic!"); 35210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << II << "\n"); 35220b57cec5SDimitry Andric 35230b57cec5SDimitry Andric // Record this instruction for deletion. 3524e8d8bef9SDimitry Andric Pass.DeadInsts.push_back(&II); 35250b57cec5SDimitry Andric 3526e8d8bef9SDimitry Andric if (II.isDroppable()) { 3527e8d8bef9SDimitry Andric assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume"); 3528e8d8bef9SDimitry Andric // TODO For now we forget assumed information, this can be improved. 3529e8d8bef9SDimitry Andric OldPtr->dropDroppableUsesIn(II); 3530e8d8bef9SDimitry Andric return true; 3531e8d8bef9SDimitry Andric } 3532e8d8bef9SDimitry Andric 35335f757f3fSDimitry Andric if (II.isLaunderOrStripInvariantGroup()) 35345f757f3fSDimitry Andric return true; 35355f757f3fSDimitry Andric 3536e8d8bef9SDimitry Andric assert(II.getArgOperand(1) == OldPtr); 35370b57cec5SDimitry Andric // Lifetime intrinsics are only promotable if they cover the whole alloca. 35380b57cec5SDimitry Andric // Therefore, we drop lifetime intrinsics which don't cover the whole 35390b57cec5SDimitry Andric // alloca. 35400b57cec5SDimitry Andric // (In theory, intrinsics which partially cover an alloca could be 35410b57cec5SDimitry Andric // promoted, but PromoteMemToReg doesn't handle that case.) 35420b57cec5SDimitry Andric // FIXME: Check whether the alloca is promotable before dropping the 35430b57cec5SDimitry Andric // lifetime intrinsics? 35440b57cec5SDimitry Andric if (NewBeginOffset != NewAllocaBeginOffset || 35450b57cec5SDimitry Andric NewEndOffset != NewAllocaEndOffset) 35460b57cec5SDimitry Andric return true; 35470b57cec5SDimitry Andric 35480b57cec5SDimitry Andric ConstantInt *Size = 35490b57cec5SDimitry Andric ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), 35500b57cec5SDimitry Andric NewEndOffset - NewBeginOffset); 35510b57cec5SDimitry Andric // Lifetime intrinsics always expect an i8* so directly get such a pointer 35520b57cec5SDimitry Andric // for the new alloca slice. 35535f757f3fSDimitry Andric Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace()); 35540b57cec5SDimitry Andric Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy); 35550b57cec5SDimitry Andric Value *New; 35560b57cec5SDimitry Andric if (II.getIntrinsicID() == Intrinsic::lifetime_start) 35570b57cec5SDimitry Andric New = IRB.CreateLifetimeStart(Ptr, Size); 35580b57cec5SDimitry Andric else 35590b57cec5SDimitry Andric New = IRB.CreateLifetimeEnd(Ptr, Size); 35600b57cec5SDimitry Andric 35610b57cec5SDimitry Andric (void)New; 35620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); 35630b57cec5SDimitry Andric 35640b57cec5SDimitry Andric return true; 35650b57cec5SDimitry Andric } 35660b57cec5SDimitry Andric 35670b57cec5SDimitry Andric void fixLoadStoreAlign(Instruction &Root) { 35680b57cec5SDimitry Andric // This algorithm implements the same visitor loop as 35690b57cec5SDimitry Andric // hasUnsafePHIOrSelectUse, and fixes the alignment of each load 35700b57cec5SDimitry Andric // or store found. 35710b57cec5SDimitry Andric SmallPtrSet<Instruction *, 4> Visited; 35720b57cec5SDimitry Andric SmallVector<Instruction *, 4> Uses; 35730b57cec5SDimitry Andric Visited.insert(&Root); 35740b57cec5SDimitry Andric Uses.push_back(&Root); 35750b57cec5SDimitry Andric do { 35760b57cec5SDimitry Andric Instruction *I = Uses.pop_back_val(); 35770b57cec5SDimitry Andric 35780b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 35795ffd83dbSDimitry Andric LI->setAlignment(std::min(LI->getAlign(), getSliceAlign())); 35800b57cec5SDimitry Andric continue; 35810b57cec5SDimitry Andric } 35820b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 35835ffd83dbSDimitry Andric SI->setAlignment(std::min(SI->getAlign(), getSliceAlign())); 35840b57cec5SDimitry Andric continue; 35850b57cec5SDimitry Andric } 35860b57cec5SDimitry Andric 35870b57cec5SDimitry Andric assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) || 35880b57cec5SDimitry Andric isa<PHINode>(I) || isa<SelectInst>(I) || 35890b57cec5SDimitry Andric isa<GetElementPtrInst>(I)); 35900b57cec5SDimitry Andric for (User *U : I->users()) 35910b57cec5SDimitry Andric if (Visited.insert(cast<Instruction>(U)).second) 35920b57cec5SDimitry Andric Uses.push_back(cast<Instruction>(U)); 35930b57cec5SDimitry Andric } while (!Uses.empty()); 35940b57cec5SDimitry Andric } 35950b57cec5SDimitry Andric 35960b57cec5SDimitry Andric bool visitPHINode(PHINode &PN) { 35970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); 35980b57cec5SDimitry Andric assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); 35990b57cec5SDimitry Andric assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable"); 36000b57cec5SDimitry Andric 36010b57cec5SDimitry Andric // We would like to compute a new pointer in only one place, but have it be 36020b57cec5SDimitry Andric // as local as possible to the PHI. To do that, we re-use the location of 36030b57cec5SDimitry Andric // the old pointer, which necessarily must be in the right position to 36040b57cec5SDimitry Andric // dominate the PHI. 36055ffd83dbSDimitry Andric IRBuilderBase::InsertPointGuard Guard(IRB); 36060b57cec5SDimitry Andric if (isa<PHINode>(OldPtr)) 36075f757f3fSDimitry Andric IRB.SetInsertPoint(OldPtr->getParent(), 36085f757f3fSDimitry Andric OldPtr->getParent()->getFirstInsertionPt()); 36090b57cec5SDimitry Andric else 36105ffd83dbSDimitry Andric IRB.SetInsertPoint(OldPtr); 36115ffd83dbSDimitry Andric IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc()); 36120b57cec5SDimitry Andric 36135ffd83dbSDimitry Andric Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); 36140b57cec5SDimitry Andric // Replace the operands which were using the old pointer. 36150b57cec5SDimitry Andric std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr); 36160b57cec5SDimitry Andric 36170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); 36180b57cec5SDimitry Andric deleteIfTriviallyDead(OldPtr); 36190b57cec5SDimitry Andric 36200b57cec5SDimitry Andric // Fix the alignment of any loads or stores using this PHI node. 36210b57cec5SDimitry Andric fixLoadStoreAlign(PN); 36220b57cec5SDimitry Andric 36230b57cec5SDimitry Andric // PHIs can't be promoted on their own, but often can be speculated. We 36240b57cec5SDimitry Andric // check the speculation outside of the rewriter so that we see the 36250b57cec5SDimitry Andric // fully-rewritten alloca. 36260b57cec5SDimitry Andric PHIUsers.insert(&PN); 36270b57cec5SDimitry Andric return true; 36280b57cec5SDimitry Andric } 36290b57cec5SDimitry Andric 36300b57cec5SDimitry Andric bool visitSelectInst(SelectInst &SI) { 36310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); 36320b57cec5SDimitry Andric assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && 36330b57cec5SDimitry Andric "Pointer isn't an operand!"); 36340b57cec5SDimitry Andric assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable"); 36350b57cec5SDimitry Andric assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable"); 36360b57cec5SDimitry Andric 36370b57cec5SDimitry Andric Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); 36380b57cec5SDimitry Andric // Replace the operands which were using the old pointer. 36390b57cec5SDimitry Andric if (SI.getOperand(1) == OldPtr) 36400b57cec5SDimitry Andric SI.setOperand(1, NewPtr); 36410b57cec5SDimitry Andric if (SI.getOperand(2) == OldPtr) 36420b57cec5SDimitry Andric SI.setOperand(2, NewPtr); 36430b57cec5SDimitry Andric 36440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); 36450b57cec5SDimitry Andric deleteIfTriviallyDead(OldPtr); 36460b57cec5SDimitry Andric 36470b57cec5SDimitry Andric // Fix the alignment of any loads or stores using this select. 36480b57cec5SDimitry Andric fixLoadStoreAlign(SI); 36490b57cec5SDimitry Andric 36500b57cec5SDimitry Andric // Selects can't be promoted on their own, but often can be speculated. We 36510b57cec5SDimitry Andric // check the speculation outside of the rewriter so that we see the 36520b57cec5SDimitry Andric // fully-rewritten alloca. 36530b57cec5SDimitry Andric SelectUsers.insert(&SI); 36540b57cec5SDimitry Andric return true; 36550b57cec5SDimitry Andric } 36560b57cec5SDimitry Andric }; 36570b57cec5SDimitry Andric 36580b57cec5SDimitry Andric /// Visitor to rewrite aggregate loads and stores as scalar. 36590b57cec5SDimitry Andric /// 36600b57cec5SDimitry Andric /// This pass aggressively rewrites all aggregate loads and stores on 36610b57cec5SDimitry Andric /// a particular pointer (or any pointer derived from it which we can identify) 36620b57cec5SDimitry Andric /// with scalar loads and stores. 36630b57cec5SDimitry Andric class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { 36640b57cec5SDimitry Andric // Befriend the base class so it can delegate to private visit methods. 36650b57cec5SDimitry Andric friend class InstVisitor<AggLoadStoreRewriter, bool>; 36660b57cec5SDimitry Andric 36670b57cec5SDimitry Andric /// Queue of pointer uses to analyze and potentially rewrite. 36680b57cec5SDimitry Andric SmallVector<Use *, 8> Queue; 36690b57cec5SDimitry Andric 36700b57cec5SDimitry Andric /// Set to prevent us from cycling with phi nodes and loops. 36710b57cec5SDimitry Andric SmallPtrSet<User *, 8> Visited; 36720b57cec5SDimitry Andric 36730b57cec5SDimitry Andric /// The current pointer use being rewritten. This is used to dig up the used 36740b57cec5SDimitry Andric /// value (as opposed to the user). 3675480093f4SDimitry Andric Use *U = nullptr; 36760b57cec5SDimitry Andric 36770b57cec5SDimitry Andric /// Used to calculate offsets, and hence alignment, of subobjects. 36780b57cec5SDimitry Andric const DataLayout &DL; 36790b57cec5SDimitry Andric 368004eeddc0SDimitry Andric IRBuilderTy &IRB; 368104eeddc0SDimitry Andric 36820b57cec5SDimitry Andric public: 368304eeddc0SDimitry Andric AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB) 368404eeddc0SDimitry Andric : DL(DL), IRB(IRB) {} 36850b57cec5SDimitry Andric 36860b57cec5SDimitry Andric /// Rewrite loads and stores through a pointer and all pointers derived from 36870b57cec5SDimitry Andric /// it. 36880b57cec5SDimitry Andric bool rewrite(Instruction &I) { 36890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n"); 36900b57cec5SDimitry Andric enqueueUsers(I); 36910b57cec5SDimitry Andric bool Changed = false; 36920b57cec5SDimitry Andric while (!Queue.empty()) { 36930b57cec5SDimitry Andric U = Queue.pop_back_val(); 36940b57cec5SDimitry Andric Changed |= visit(cast<Instruction>(U->getUser())); 36950b57cec5SDimitry Andric } 36960b57cec5SDimitry Andric return Changed; 36970b57cec5SDimitry Andric } 36980b57cec5SDimitry Andric 36990b57cec5SDimitry Andric private: 37000b57cec5SDimitry Andric /// Enqueue all the users of the given instruction for further processing. 37010b57cec5SDimitry Andric /// This uses a set to de-duplicate users. 37020b57cec5SDimitry Andric void enqueueUsers(Instruction &I) { 37030b57cec5SDimitry Andric for (Use &U : I.uses()) 37040b57cec5SDimitry Andric if (Visited.insert(U.getUser()).second) 37050b57cec5SDimitry Andric Queue.push_back(&U); 37060b57cec5SDimitry Andric } 37070b57cec5SDimitry Andric 37080b57cec5SDimitry Andric // Conservative default is to not rewrite anything. 37090b57cec5SDimitry Andric bool visitInstruction(Instruction &I) { return false; } 37100b57cec5SDimitry Andric 37110b57cec5SDimitry Andric /// Generic recursive split emission class. 37120b57cec5SDimitry Andric template <typename Derived> class OpSplitter { 37130b57cec5SDimitry Andric protected: 37140b57cec5SDimitry Andric /// The builder used to form new instructions. 371504eeddc0SDimitry Andric IRBuilderTy &IRB; 37160b57cec5SDimitry Andric 37170b57cec5SDimitry Andric /// The indices which to be used with insert- or extractvalue to select the 37180b57cec5SDimitry Andric /// appropriate value within the aggregate. 37190b57cec5SDimitry Andric SmallVector<unsigned, 4> Indices; 37200b57cec5SDimitry Andric 37210b57cec5SDimitry Andric /// The indices to a GEP instruction which will move Ptr to the correct slot 37220b57cec5SDimitry Andric /// within the aggregate. 37230b57cec5SDimitry Andric SmallVector<Value *, 4> GEPIndices; 37240b57cec5SDimitry Andric 37250b57cec5SDimitry Andric /// The base pointer of the original op, used as a base for GEPing the 37260b57cec5SDimitry Andric /// split operations. 37270b57cec5SDimitry Andric Value *Ptr; 37280b57cec5SDimitry Andric 37290b57cec5SDimitry Andric /// The base pointee type being GEPed into. 37300b57cec5SDimitry Andric Type *BaseTy; 37310b57cec5SDimitry Andric 37320b57cec5SDimitry Andric /// Known alignment of the base pointer. 3733480093f4SDimitry Andric Align BaseAlign; 37340b57cec5SDimitry Andric 37350b57cec5SDimitry Andric /// To calculate offset of each component so we can correctly deduce 37360b57cec5SDimitry Andric /// alignments. 37370b57cec5SDimitry Andric const DataLayout &DL; 37380b57cec5SDimitry Andric 37390b57cec5SDimitry Andric /// Initialize the splitter with an insertion point, Ptr and start with a 37400b57cec5SDimitry Andric /// single zero GEP index. 37410b57cec5SDimitry Andric OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, 374204eeddc0SDimitry Andric Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB) 374304eeddc0SDimitry Andric : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy), 374404eeddc0SDimitry Andric BaseAlign(BaseAlign), DL(DL) { 374504eeddc0SDimitry Andric IRB.SetInsertPoint(InsertionPoint); 374604eeddc0SDimitry Andric } 37470b57cec5SDimitry Andric 37480b57cec5SDimitry Andric public: 37490b57cec5SDimitry Andric /// Generic recursive split emission routine. 37500b57cec5SDimitry Andric /// 37510b57cec5SDimitry Andric /// This method recursively splits an aggregate op (load or store) into 37520b57cec5SDimitry Andric /// scalar or vector ops. It splits recursively until it hits a single value 37530b57cec5SDimitry Andric /// and emits that single value operation via the template argument. 37540b57cec5SDimitry Andric /// 37550b57cec5SDimitry Andric /// The logic of this routine relies on GEPs and insertvalue and 37560b57cec5SDimitry Andric /// extractvalue all operating with the same fundamental index list, merely 37570b57cec5SDimitry Andric /// formatted differently (GEPs need actual values). 37580b57cec5SDimitry Andric /// 37590b57cec5SDimitry Andric /// \param Ty The type being split recursively into smaller ops. 37600b57cec5SDimitry Andric /// \param Agg The aggregate value being built up or stored, depending on 37610b57cec5SDimitry Andric /// whether this is splitting a load or a store respectively. 37620b57cec5SDimitry Andric void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { 37630b57cec5SDimitry Andric if (Ty->isSingleValueType()) { 37640b57cec5SDimitry Andric unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices); 37650b57cec5SDimitry Andric return static_cast<Derived *>(this)->emitFunc( 3766480093f4SDimitry Andric Ty, Agg, commonAlignment(BaseAlign, Offset), Name); 37670b57cec5SDimitry Andric } 37680b57cec5SDimitry Andric 37690b57cec5SDimitry Andric if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 37700b57cec5SDimitry Andric unsigned OldSize = Indices.size(); 37710b57cec5SDimitry Andric (void)OldSize; 37720b57cec5SDimitry Andric for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size; 37730b57cec5SDimitry Andric ++Idx) { 37740b57cec5SDimitry Andric assert(Indices.size() == OldSize && "Did not return to the old size"); 37750b57cec5SDimitry Andric Indices.push_back(Idx); 37760b57cec5SDimitry Andric GEPIndices.push_back(IRB.getInt32(Idx)); 37770b57cec5SDimitry Andric emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx)); 37780b57cec5SDimitry Andric GEPIndices.pop_back(); 37790b57cec5SDimitry Andric Indices.pop_back(); 37800b57cec5SDimitry Andric } 37810b57cec5SDimitry Andric return; 37820b57cec5SDimitry Andric } 37830b57cec5SDimitry Andric 37840b57cec5SDimitry Andric if (StructType *STy = dyn_cast<StructType>(Ty)) { 37850b57cec5SDimitry Andric unsigned OldSize = Indices.size(); 37860b57cec5SDimitry Andric (void)OldSize; 37870b57cec5SDimitry Andric for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size; 37880b57cec5SDimitry Andric ++Idx) { 37890b57cec5SDimitry Andric assert(Indices.size() == OldSize && "Did not return to the old size"); 37900b57cec5SDimitry Andric Indices.push_back(Idx); 37910b57cec5SDimitry Andric GEPIndices.push_back(IRB.getInt32(Idx)); 37920b57cec5SDimitry Andric emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx)); 37930b57cec5SDimitry Andric GEPIndices.pop_back(); 37940b57cec5SDimitry Andric Indices.pop_back(); 37950b57cec5SDimitry Andric } 37960b57cec5SDimitry Andric return; 37970b57cec5SDimitry Andric } 37980b57cec5SDimitry Andric 37990b57cec5SDimitry Andric llvm_unreachable("Only arrays and structs are aggregate loadable types"); 38000b57cec5SDimitry Andric } 38010b57cec5SDimitry Andric }; 38020b57cec5SDimitry Andric 38030b57cec5SDimitry Andric struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { 38040b57cec5SDimitry Andric AAMDNodes AATags; 38050b57cec5SDimitry Andric 38060b57cec5SDimitry Andric LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, 380704eeddc0SDimitry Andric AAMDNodes AATags, Align BaseAlign, const DataLayout &DL, 380804eeddc0SDimitry Andric IRBuilderTy &IRB) 380904eeddc0SDimitry Andric : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL, 381004eeddc0SDimitry Andric IRB), 3811480093f4SDimitry Andric AATags(AATags) {} 38120b57cec5SDimitry Andric 38130b57cec5SDimitry Andric /// Emit a leaf load of a single value. This is called at the leaves of the 38140b57cec5SDimitry Andric /// recursive emission to actually load values. 3815480093f4SDimitry Andric void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) { 38160b57cec5SDimitry Andric assert(Ty->isSingleValueType()); 38170b57cec5SDimitry Andric // Load the single value and insert it using the indices. 38180b57cec5SDimitry Andric Value *GEP = 38190b57cec5SDimitry Andric IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); 3820480093f4SDimitry Andric LoadInst *Load = 38215ffd83dbSDimitry Andric IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); 3822d409305fSDimitry Andric 3823d409305fSDimitry Andric APInt Offset( 3824d409305fSDimitry Andric DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); 3825d409305fSDimitry Andric if (AATags && 3826d409305fSDimitry Andric GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) 3827*0fca6ea1SDimitry Andric Load->setAAMetadata( 3828*0fca6ea1SDimitry Andric AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL)); 3829d409305fSDimitry Andric 38300b57cec5SDimitry Andric Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); 38310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); 38320b57cec5SDimitry Andric } 38330b57cec5SDimitry Andric }; 38340b57cec5SDimitry Andric 38350b57cec5SDimitry Andric bool visitLoadInst(LoadInst &LI) { 38360b57cec5SDimitry Andric assert(LI.getPointerOperand() == *U); 38370b57cec5SDimitry Andric if (!LI.isSimple() || LI.getType()->isSingleValueType()) 38380b57cec5SDimitry Andric return false; 38390b57cec5SDimitry Andric 38400b57cec5SDimitry Andric // We have an aggregate being loaded, split it apart. 38410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); 3842349cc55cSDimitry Andric LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(), 384304eeddc0SDimitry Andric getAdjustedAlignment(&LI, 0), DL, IRB); 384404eeddc0SDimitry Andric Value *V = PoisonValue::get(LI.getType()); 38450b57cec5SDimitry Andric Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); 38465ffd83dbSDimitry Andric Visited.erase(&LI); 38470b57cec5SDimitry Andric LI.replaceAllUsesWith(V); 38480b57cec5SDimitry Andric LI.eraseFromParent(); 38490b57cec5SDimitry Andric return true; 38500b57cec5SDimitry Andric } 38510b57cec5SDimitry Andric 38520b57cec5SDimitry Andric struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { 38530b57cec5SDimitry Andric StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, 3854bdd1243dSDimitry Andric AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign, 3855bdd1243dSDimitry Andric const DataLayout &DL, IRBuilderTy &IRB) 38560b57cec5SDimitry Andric : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, 385704eeddc0SDimitry Andric DL, IRB), 3858bdd1243dSDimitry Andric AATags(AATags), AggStore(AggStore) {} 38590b57cec5SDimitry Andric AAMDNodes AATags; 3860bdd1243dSDimitry Andric StoreInst *AggStore; 38610b57cec5SDimitry Andric /// Emit a leaf store of a single value. This is called at the leaves of the 38620b57cec5SDimitry Andric /// recursive emission to actually produce stores. 3863480093f4SDimitry Andric void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) { 38640b57cec5SDimitry Andric assert(Ty->isSingleValueType()); 38650b57cec5SDimitry Andric // Extract the single value and store it using the indices. 38660b57cec5SDimitry Andric // 38670b57cec5SDimitry Andric // The gep and extractvalue values are factored out of the CreateStore 38680b57cec5SDimitry Andric // call to make the output independent of the argument evaluation order. 38690b57cec5SDimitry Andric Value *ExtractValue = 38700b57cec5SDimitry Andric IRB.CreateExtractValue(Agg, Indices, Name + ".extract"); 38710b57cec5SDimitry Andric Value *InBoundsGEP = 38720b57cec5SDimitry Andric IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); 38730b57cec5SDimitry Andric StoreInst *Store = 38745ffd83dbSDimitry Andric IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment); 3875d409305fSDimitry Andric 3876d409305fSDimitry Andric APInt Offset( 3877d409305fSDimitry Andric DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); 387806c3fb27SDimitry Andric GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset); 3879*0fca6ea1SDimitry Andric if (AATags) { 3880*0fca6ea1SDimitry Andric Store->setAAMetadata(AATags.adjustForAccess( 3881*0fca6ea1SDimitry Andric Offset.getZExtValue(), ExtractValue->getType(), DL)); 3882*0fca6ea1SDimitry Andric } 3883d409305fSDimitry Andric 3884bdd1243dSDimitry Andric // migrateDebugInfo requires the base Alloca. Walk to it from this gep. 3885bdd1243dSDimitry Andric // If we cannot (because there's an intervening non-const or unbounded 3886bdd1243dSDimitry Andric // gep) then we wouldn't expect to see dbg.assign intrinsics linked to 3887bdd1243dSDimitry Andric // this instruction. 388806c3fb27SDimitry Andric Value *Base = AggStore->getPointerOperand()->stripInBoundsOffsets(); 3889bdd1243dSDimitry Andric if (auto *OldAI = dyn_cast<AllocaInst>(Base)) { 3890bdd1243dSDimitry Andric uint64_t SizeInBits = 3891bdd1243dSDimitry Andric DL.getTypeSizeInBits(Store->getValueOperand()->getType()); 389206c3fb27SDimitry Andric migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8, 389306c3fb27SDimitry Andric SizeInBits, AggStore, Store, 389406c3fb27SDimitry Andric Store->getPointerOperand(), Store->getValueOperand(), 389506c3fb27SDimitry Andric DL); 3896bdd1243dSDimitry Andric } else { 3897bdd1243dSDimitry Andric assert(at::getAssignmentMarkers(Store).empty() && 3898*0fca6ea1SDimitry Andric at::getDVRAssignmentMarkers(Store).empty() && 3899bdd1243dSDimitry Andric "AT: unexpected debug.assign linked to store through " 3900bdd1243dSDimitry Andric "unbounded GEP"); 3901bdd1243dSDimitry Andric } 39020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); 39030b57cec5SDimitry Andric } 39040b57cec5SDimitry Andric }; 39050b57cec5SDimitry Andric 39060b57cec5SDimitry Andric bool visitStoreInst(StoreInst &SI) { 39070b57cec5SDimitry Andric if (!SI.isSimple() || SI.getPointerOperand() != *U) 39080b57cec5SDimitry Andric return false; 39090b57cec5SDimitry Andric Value *V = SI.getValueOperand(); 39100b57cec5SDimitry Andric if (V->getType()->isSingleValueType()) 39110b57cec5SDimitry Andric return false; 39120b57cec5SDimitry Andric 39130b57cec5SDimitry Andric // We have an aggregate being stored, split it apart. 39140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); 3915bdd1243dSDimitry Andric StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI, 391604eeddc0SDimitry Andric getAdjustedAlignment(&SI, 0), DL, IRB); 39170b57cec5SDimitry Andric Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); 39185ffd83dbSDimitry Andric Visited.erase(&SI); 391906c3fb27SDimitry Andric // The stores replacing SI each have markers describing fragments of the 392006c3fb27SDimitry Andric // assignment so delete the assignment markers linked to SI. 392106c3fb27SDimitry Andric at::deleteAssignmentMarkers(&SI); 39220b57cec5SDimitry Andric SI.eraseFromParent(); 39230b57cec5SDimitry Andric return true; 39240b57cec5SDimitry Andric } 39250b57cec5SDimitry Andric 39260b57cec5SDimitry Andric bool visitBitCastInst(BitCastInst &BC) { 39270b57cec5SDimitry Andric enqueueUsers(BC); 39280b57cec5SDimitry Andric return false; 39290b57cec5SDimitry Andric } 39300b57cec5SDimitry Andric 39310b57cec5SDimitry Andric bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { 39320b57cec5SDimitry Andric enqueueUsers(ASC); 39330b57cec5SDimitry Andric return false; 39340b57cec5SDimitry Andric } 39350b57cec5SDimitry Andric 3936*0fca6ea1SDimitry Andric // Unfold gep (select cond, ptr1, ptr2), idx 3937*0fca6ea1SDimitry Andric // => select cond, gep(ptr1, idx), gep(ptr2, idx) 3938*0fca6ea1SDimitry Andric // and gep ptr, (select cond, idx1, idx2) 3939*0fca6ea1SDimitry Andric // => select cond, gep(ptr, idx1), gep(ptr, idx2) 3940*0fca6ea1SDimitry Andric bool unfoldGEPSelect(GetElementPtrInst &GEPI) { 3941*0fca6ea1SDimitry Andric // Check whether the GEP has exactly one select operand and all indices 3942*0fca6ea1SDimitry Andric // will become constant after the transform. 3943*0fca6ea1SDimitry Andric SelectInst *Sel = dyn_cast<SelectInst>(GEPI.getPointerOperand()); 3944*0fca6ea1SDimitry Andric for (Value *Op : GEPI.indices()) { 3945*0fca6ea1SDimitry Andric if (auto *SI = dyn_cast<SelectInst>(Op)) { 3946*0fca6ea1SDimitry Andric if (Sel) 39475ffd83dbSDimitry Andric return false; 39485ffd83dbSDimitry Andric 3949*0fca6ea1SDimitry Andric Sel = SI; 3950*0fca6ea1SDimitry Andric if (!isa<ConstantInt>(Sel->getTrueValue()) || 3951*0fca6ea1SDimitry Andric !isa<ConstantInt>(Sel->getFalseValue())) 3952*0fca6ea1SDimitry Andric return false; 3953*0fca6ea1SDimitry Andric continue; 3954*0fca6ea1SDimitry Andric } 39555ffd83dbSDimitry Andric 3956*0fca6ea1SDimitry Andric if (!isa<ConstantInt>(Op)) 3957*0fca6ea1SDimitry Andric return false; 3958*0fca6ea1SDimitry Andric } 3959*0fca6ea1SDimitry Andric 3960*0fca6ea1SDimitry Andric if (!Sel) 3961*0fca6ea1SDimitry Andric return false; 3962*0fca6ea1SDimitry Andric 3963*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n"; 3964*0fca6ea1SDimitry Andric dbgs() << " original: " << *Sel << "\n"; 3965*0fca6ea1SDimitry Andric dbgs() << " " << GEPI << "\n";); 3966*0fca6ea1SDimitry Andric 3967*0fca6ea1SDimitry Andric auto GetNewOps = [&](Value *SelOp) { 3968*0fca6ea1SDimitry Andric SmallVector<Value *> NewOps; 3969*0fca6ea1SDimitry Andric for (Value *Op : GEPI.operands()) 3970*0fca6ea1SDimitry Andric if (Op == Sel) 3971*0fca6ea1SDimitry Andric NewOps.push_back(SelOp); 3972*0fca6ea1SDimitry Andric else 3973*0fca6ea1SDimitry Andric NewOps.push_back(Op); 3974*0fca6ea1SDimitry Andric return NewOps; 3975*0fca6ea1SDimitry Andric }; 3976*0fca6ea1SDimitry Andric 3977*0fca6ea1SDimitry Andric Value *True = Sel->getTrueValue(); 3978*0fca6ea1SDimitry Andric Value *False = Sel->getFalseValue(); 3979*0fca6ea1SDimitry Andric SmallVector<Value *> TrueOps = GetNewOps(True); 3980*0fca6ea1SDimitry Andric SmallVector<Value *> FalseOps = GetNewOps(False); 39815ffd83dbSDimitry Andric 398204eeddc0SDimitry Andric IRB.SetInsertPoint(&GEPI); 3983*0fca6ea1SDimitry Andric GEPNoWrapFlags NW = GEPI.getNoWrapFlags(); 39845ffd83dbSDimitry Andric 3985fe6060f1SDimitry Andric Type *Ty = GEPI.getSourceElementType(); 3986*0fca6ea1SDimitry Andric Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(), 3987*0fca6ea1SDimitry Andric True->getName() + ".sroa.gep", NW); 39885ffd83dbSDimitry Andric 3989*0fca6ea1SDimitry Andric Value *NFalse = 3990*0fca6ea1SDimitry Andric IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(), 3991*0fca6ea1SDimitry Andric False->getName() + ".sroa.gep", NW); 39925ffd83dbSDimitry Andric 399304eeddc0SDimitry Andric Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse, 39945ffd83dbSDimitry Andric Sel->getName() + ".sroa.sel"); 39955ffd83dbSDimitry Andric Visited.erase(&GEPI); 39965ffd83dbSDimitry Andric GEPI.replaceAllUsesWith(NSel); 39975ffd83dbSDimitry Andric GEPI.eraseFromParent(); 39985ffd83dbSDimitry Andric Instruction *NSelI = cast<Instruction>(NSel); 39995ffd83dbSDimitry Andric Visited.insert(NSelI); 40005ffd83dbSDimitry Andric enqueueUsers(*NSelI); 40015ffd83dbSDimitry Andric 4002*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n"; 4003*0fca6ea1SDimitry Andric dbgs() << " " << *NFalse << "\n"; 4004*0fca6ea1SDimitry Andric dbgs() << " " << *NSel << "\n";); 40055ffd83dbSDimitry Andric 40065ffd83dbSDimitry Andric return true; 40075ffd83dbSDimitry Andric } 40085ffd83dbSDimitry Andric 4009*0fca6ea1SDimitry Andric // Unfold gep (phi ptr1, ptr2), idx 4010*0fca6ea1SDimitry Andric // => phi ((gep ptr1, idx), (gep ptr2, idx)) 4011*0fca6ea1SDimitry Andric // and gep ptr, (phi idx1, idx2) 4012*0fca6ea1SDimitry Andric // => phi ((gep ptr, idx1), (gep ptr, idx2)) 4013*0fca6ea1SDimitry Andric bool unfoldGEPPhi(GetElementPtrInst &GEPI) { 4014*0fca6ea1SDimitry Andric // To prevent infinitely expanding recursive phis, bail if the GEP pointer 4015*0fca6ea1SDimitry Andric // operand (looking through the phi if it is the phi we want to unfold) is 4016*0fca6ea1SDimitry Andric // an instruction besides a static alloca. 4017*0fca6ea1SDimitry Andric PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand()); 4018*0fca6ea1SDimitry Andric auto IsInvalidPointerOperand = [](Value *V) { 4019*0fca6ea1SDimitry Andric if (!isa<Instruction>(V)) 40205ffd83dbSDimitry Andric return false; 4021*0fca6ea1SDimitry Andric if (auto *AI = dyn_cast<AllocaInst>(V)) 4022*0fca6ea1SDimitry Andric return !AI->isStaticAlloca(); 4023*0fca6ea1SDimitry Andric return true; 4024*0fca6ea1SDimitry Andric }; 4025*0fca6ea1SDimitry Andric if (Phi) { 4026*0fca6ea1SDimitry Andric if (any_of(Phi->operands(), IsInvalidPointerOperand)) 40275ffd83dbSDimitry Andric return false; 4028e8d8bef9SDimitry Andric } else { 4029*0fca6ea1SDimitry Andric if (IsInvalidPointerOperand(GEPI.getPointerOperand())) 4030*0fca6ea1SDimitry Andric return false; 4031e8d8bef9SDimitry Andric } 4032*0fca6ea1SDimitry Andric // Check whether the GEP has exactly one phi operand (including the pointer 4033*0fca6ea1SDimitry Andric // operand) and all indices will become constant after the transform. 4034*0fca6ea1SDimitry Andric for (Value *Op : GEPI.indices()) { 4035*0fca6ea1SDimitry Andric if (auto *SI = dyn_cast<PHINode>(Op)) { 4036*0fca6ea1SDimitry Andric if (Phi) 4037*0fca6ea1SDimitry Andric return false; 4038*0fca6ea1SDimitry Andric 4039*0fca6ea1SDimitry Andric Phi = SI; 4040*0fca6ea1SDimitry Andric if (!all_of(Phi->incoming_values(), 4041*0fca6ea1SDimitry Andric [](Value *V) { return isa<ConstantInt>(V); })) 4042*0fca6ea1SDimitry Andric return false; 4043*0fca6ea1SDimitry Andric continue; 4044*0fca6ea1SDimitry Andric } 4045*0fca6ea1SDimitry Andric 4046*0fca6ea1SDimitry Andric if (!isa<ConstantInt>(Op)) 4047*0fca6ea1SDimitry Andric return false; 4048*0fca6ea1SDimitry Andric } 4049*0fca6ea1SDimitry Andric 4050*0fca6ea1SDimitry Andric if (!Phi) 4051*0fca6ea1SDimitry Andric return false; 4052*0fca6ea1SDimitry Andric 4053*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n"; 4054*0fca6ea1SDimitry Andric dbgs() << " original: " << *Phi << "\n"; 4055*0fca6ea1SDimitry Andric dbgs() << " " << GEPI << "\n";); 4056*0fca6ea1SDimitry Andric 4057*0fca6ea1SDimitry Andric auto GetNewOps = [&](Value *PhiOp) { 4058*0fca6ea1SDimitry Andric SmallVector<Value *> NewOps; 4059*0fca6ea1SDimitry Andric for (Value *Op : GEPI.operands()) 4060*0fca6ea1SDimitry Andric if (Op == Phi) 4061*0fca6ea1SDimitry Andric NewOps.push_back(PhiOp); 4062*0fca6ea1SDimitry Andric else 4063*0fca6ea1SDimitry Andric NewOps.push_back(Op); 4064*0fca6ea1SDimitry Andric return NewOps; 4065*0fca6ea1SDimitry Andric }; 4066*0fca6ea1SDimitry Andric 4067*0fca6ea1SDimitry Andric IRB.SetInsertPoint(Phi); 4068*0fca6ea1SDimitry Andric PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(), 4069*0fca6ea1SDimitry Andric Phi->getName() + ".sroa.phi"); 4070*0fca6ea1SDimitry Andric 4071*0fca6ea1SDimitry Andric Type *SourceTy = GEPI.getSourceElementType(); 4072*0fca6ea1SDimitry Andric // We only handle arguments, constants, and static allocas here, so we can 4073*0fca6ea1SDimitry Andric // insert GEPs at the end of the entry block. 4074*0fca6ea1SDimitry Andric IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator()); 4075*0fca6ea1SDimitry Andric for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { 4076*0fca6ea1SDimitry Andric Value *Op = Phi->getIncomingValue(I); 4077*0fca6ea1SDimitry Andric BasicBlock *BB = Phi->getIncomingBlock(I); 4078*0fca6ea1SDimitry Andric Value *NewGEP; 4079*0fca6ea1SDimitry Andric if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) { 4080*0fca6ea1SDimitry Andric NewGEP = NewPhi->getIncomingValue(NI); 4081*0fca6ea1SDimitry Andric } else { 4082*0fca6ea1SDimitry Andric SmallVector<Value *> NewOps = GetNewOps(Op); 4083*0fca6ea1SDimitry Andric NewGEP = 4084*0fca6ea1SDimitry Andric IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(), 4085*0fca6ea1SDimitry Andric Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags()); 4086*0fca6ea1SDimitry Andric } 4087*0fca6ea1SDimitry Andric NewPhi->addIncoming(NewGEP, BB); 40885ffd83dbSDimitry Andric } 40895ffd83dbSDimitry Andric 40905ffd83dbSDimitry Andric Visited.erase(&GEPI); 4091*0fca6ea1SDimitry Andric GEPI.replaceAllUsesWith(NewPhi); 40925ffd83dbSDimitry Andric GEPI.eraseFromParent(); 4093*0fca6ea1SDimitry Andric Visited.insert(NewPhi); 4094*0fca6ea1SDimitry Andric enqueueUsers(*NewPhi); 40955ffd83dbSDimitry Andric 4096*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " to: "; 4097*0fca6ea1SDimitry Andric for (Value *In 4098*0fca6ea1SDimitry Andric : NewPhi->incoming_values()) dbgs() 4099*0fca6ea1SDimitry Andric << "\n " << *In; 4100*0fca6ea1SDimitry Andric dbgs() << "\n " << *NewPhi << '\n'); 41015ffd83dbSDimitry Andric 41025ffd83dbSDimitry Andric return true; 41035ffd83dbSDimitry Andric } 41045ffd83dbSDimitry Andric 41050b57cec5SDimitry Andric bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { 4106*0fca6ea1SDimitry Andric if (unfoldGEPSelect(GEPI)) 41075ffd83dbSDimitry Andric return true; 41085ffd83dbSDimitry Andric 4109*0fca6ea1SDimitry Andric if (unfoldGEPPhi(GEPI)) 41105ffd83dbSDimitry Andric return true; 41115ffd83dbSDimitry Andric 41120b57cec5SDimitry Andric enqueueUsers(GEPI); 41130b57cec5SDimitry Andric return false; 41140b57cec5SDimitry Andric } 41150b57cec5SDimitry Andric 41160b57cec5SDimitry Andric bool visitPHINode(PHINode &PN) { 41170b57cec5SDimitry Andric enqueueUsers(PN); 41180b57cec5SDimitry Andric return false; 41190b57cec5SDimitry Andric } 41200b57cec5SDimitry Andric 41210b57cec5SDimitry Andric bool visitSelectInst(SelectInst &SI) { 41220b57cec5SDimitry Andric enqueueUsers(SI); 41230b57cec5SDimitry Andric return false; 41240b57cec5SDimitry Andric } 41250b57cec5SDimitry Andric }; 41260b57cec5SDimitry Andric 41270b57cec5SDimitry Andric } // end anonymous namespace 41280b57cec5SDimitry Andric 41290b57cec5SDimitry Andric /// Strip aggregate type wrapping. 41300b57cec5SDimitry Andric /// 41310b57cec5SDimitry Andric /// This removes no-op aggregate types wrapping an underlying type. It will 41320b57cec5SDimitry Andric /// strip as many layers of types as it can without changing either the type 41330b57cec5SDimitry Andric /// size or the allocated size. 41340b57cec5SDimitry Andric static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { 41350b57cec5SDimitry Andric if (Ty->isSingleValueType()) 41360b57cec5SDimitry Andric return Ty; 41370b57cec5SDimitry Andric 4138bdd1243dSDimitry Andric uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue(); 4139bdd1243dSDimitry Andric uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue(); 41400b57cec5SDimitry Andric 41410b57cec5SDimitry Andric Type *InnerTy; 41420b57cec5SDimitry Andric if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { 41430b57cec5SDimitry Andric InnerTy = ArrTy->getElementType(); 41440b57cec5SDimitry Andric } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 41450b57cec5SDimitry Andric const StructLayout *SL = DL.getStructLayout(STy); 41460b57cec5SDimitry Andric unsigned Index = SL->getElementContainingOffset(0); 41470b57cec5SDimitry Andric InnerTy = STy->getElementType(Index); 41480b57cec5SDimitry Andric } else { 41490b57cec5SDimitry Andric return Ty; 41500b57cec5SDimitry Andric } 41510b57cec5SDimitry Andric 4152bdd1243dSDimitry Andric if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() || 4153bdd1243dSDimitry Andric TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue()) 41540b57cec5SDimitry Andric return Ty; 41550b57cec5SDimitry Andric 41560b57cec5SDimitry Andric return stripAggregateTypeWrapping(DL, InnerTy); 41570b57cec5SDimitry Andric } 41580b57cec5SDimitry Andric 41590b57cec5SDimitry Andric /// Try to find a partition of the aggregate type passed in for a given 41600b57cec5SDimitry Andric /// offset and size. 41610b57cec5SDimitry Andric /// 41620b57cec5SDimitry Andric /// This recurses through the aggregate type and tries to compute a subtype 41630b57cec5SDimitry Andric /// based on the offset and size. When the offset and size span a sub-section 41640b57cec5SDimitry Andric /// of an array, it will even compute a new array type for that sub-section, 41650b57cec5SDimitry Andric /// and the same for structs. 41660b57cec5SDimitry Andric /// 41670b57cec5SDimitry Andric /// Note that this routine is very strict and tries to find a partition of the 41680b57cec5SDimitry Andric /// type which produces the *exact* right offset and size. It is not forgiving 41690b57cec5SDimitry Andric /// when the size or offset cause either end of type-based partition to be off. 41700b57cec5SDimitry Andric /// Also, this is a best-effort routine. It is reasonable to give up and not 41710b57cec5SDimitry Andric /// return a type if necessary. 41720b57cec5SDimitry Andric static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, 41730b57cec5SDimitry Andric uint64_t Size) { 4174bdd1243dSDimitry Andric if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size) 41750b57cec5SDimitry Andric return stripAggregateTypeWrapping(DL, Ty); 4176bdd1243dSDimitry Andric if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() || 4177bdd1243dSDimitry Andric (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size) 41780b57cec5SDimitry Andric return nullptr; 41790b57cec5SDimitry Andric 41805ffd83dbSDimitry Andric if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { 41815ffd83dbSDimitry Andric Type *ElementTy; 41825ffd83dbSDimitry Andric uint64_t TyNumElements; 41835ffd83dbSDimitry Andric if (auto *AT = dyn_cast<ArrayType>(Ty)) { 41845ffd83dbSDimitry Andric ElementTy = AT->getElementType(); 41855ffd83dbSDimitry Andric TyNumElements = AT->getNumElements(); 41865ffd83dbSDimitry Andric } else { 41875ffd83dbSDimitry Andric // FIXME: This isn't right for vectors with non-byte-sized or 41885ffd83dbSDimitry Andric // non-power-of-two sized elements. 41895ffd83dbSDimitry Andric auto *VT = cast<FixedVectorType>(Ty); 41905ffd83dbSDimitry Andric ElementTy = VT->getElementType(); 41915ffd83dbSDimitry Andric TyNumElements = VT->getNumElements(); 41925ffd83dbSDimitry Andric } 4193bdd1243dSDimitry Andric uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue(); 41940b57cec5SDimitry Andric uint64_t NumSkippedElements = Offset / ElementSize; 41955ffd83dbSDimitry Andric if (NumSkippedElements >= TyNumElements) 41960b57cec5SDimitry Andric return nullptr; 41970b57cec5SDimitry Andric Offset -= NumSkippedElements * ElementSize; 41980b57cec5SDimitry Andric 41990b57cec5SDimitry Andric // First check if we need to recurse. 42000b57cec5SDimitry Andric if (Offset > 0 || Size < ElementSize) { 42010b57cec5SDimitry Andric // Bail if the partition ends in a different array element. 42020b57cec5SDimitry Andric if ((Offset + Size) > ElementSize) 42030b57cec5SDimitry Andric return nullptr; 42040b57cec5SDimitry Andric // Recurse through the element type trying to peel off offset bytes. 42050b57cec5SDimitry Andric return getTypePartition(DL, ElementTy, Offset, Size); 42060b57cec5SDimitry Andric } 42070b57cec5SDimitry Andric assert(Offset == 0); 42080b57cec5SDimitry Andric 42090b57cec5SDimitry Andric if (Size == ElementSize) 42100b57cec5SDimitry Andric return stripAggregateTypeWrapping(DL, ElementTy); 42110b57cec5SDimitry Andric assert(Size > ElementSize); 42120b57cec5SDimitry Andric uint64_t NumElements = Size / ElementSize; 42130b57cec5SDimitry Andric if (NumElements * ElementSize != Size) 42140b57cec5SDimitry Andric return nullptr; 42150b57cec5SDimitry Andric return ArrayType::get(ElementTy, NumElements); 42160b57cec5SDimitry Andric } 42170b57cec5SDimitry Andric 42180b57cec5SDimitry Andric StructType *STy = dyn_cast<StructType>(Ty); 42190b57cec5SDimitry Andric if (!STy) 42200b57cec5SDimitry Andric return nullptr; 42210b57cec5SDimitry Andric 42220b57cec5SDimitry Andric const StructLayout *SL = DL.getStructLayout(STy); 422306c3fb27SDimitry Andric 422406c3fb27SDimitry Andric if (SL->getSizeInBits().isScalable()) 422506c3fb27SDimitry Andric return nullptr; 422606c3fb27SDimitry Andric 42270b57cec5SDimitry Andric if (Offset >= SL->getSizeInBytes()) 42280b57cec5SDimitry Andric return nullptr; 42290b57cec5SDimitry Andric uint64_t EndOffset = Offset + Size; 42300b57cec5SDimitry Andric if (EndOffset > SL->getSizeInBytes()) 42310b57cec5SDimitry Andric return nullptr; 42320b57cec5SDimitry Andric 42330b57cec5SDimitry Andric unsigned Index = SL->getElementContainingOffset(Offset); 42340b57cec5SDimitry Andric Offset -= SL->getElementOffset(Index); 42350b57cec5SDimitry Andric 42360b57cec5SDimitry Andric Type *ElementTy = STy->getElementType(Index); 4237bdd1243dSDimitry Andric uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue(); 42380b57cec5SDimitry Andric if (Offset >= ElementSize) 42390b57cec5SDimitry Andric return nullptr; // The offset points into alignment padding. 42400b57cec5SDimitry Andric 42410b57cec5SDimitry Andric // See if any partition must be contained by the element. 42420b57cec5SDimitry Andric if (Offset > 0 || Size < ElementSize) { 42430b57cec5SDimitry Andric if ((Offset + Size) > ElementSize) 42440b57cec5SDimitry Andric return nullptr; 42450b57cec5SDimitry Andric return getTypePartition(DL, ElementTy, Offset, Size); 42460b57cec5SDimitry Andric } 42470b57cec5SDimitry Andric assert(Offset == 0); 42480b57cec5SDimitry Andric 42490b57cec5SDimitry Andric if (Size == ElementSize) 42500b57cec5SDimitry Andric return stripAggregateTypeWrapping(DL, ElementTy); 42510b57cec5SDimitry Andric 42520b57cec5SDimitry Andric StructType::element_iterator EI = STy->element_begin() + Index, 42530b57cec5SDimitry Andric EE = STy->element_end(); 42540b57cec5SDimitry Andric if (EndOffset < SL->getSizeInBytes()) { 42550b57cec5SDimitry Andric unsigned EndIndex = SL->getElementContainingOffset(EndOffset); 42560b57cec5SDimitry Andric if (Index == EndIndex) 42570b57cec5SDimitry Andric return nullptr; // Within a single element and its padding. 42580b57cec5SDimitry Andric 42590b57cec5SDimitry Andric // Don't try to form "natural" types if the elements don't line up with the 42600b57cec5SDimitry Andric // expected size. 42610b57cec5SDimitry Andric // FIXME: We could potentially recurse down through the last element in the 42620b57cec5SDimitry Andric // sub-struct to find a natural end point. 42630b57cec5SDimitry Andric if (SL->getElementOffset(EndIndex) != EndOffset) 42640b57cec5SDimitry Andric return nullptr; 42650b57cec5SDimitry Andric 42660b57cec5SDimitry Andric assert(Index < EndIndex); 42670b57cec5SDimitry Andric EE = STy->element_begin() + EndIndex; 42680b57cec5SDimitry Andric } 42690b57cec5SDimitry Andric 42700b57cec5SDimitry Andric // Try to build up a sub-structure. 42710b57cec5SDimitry Andric StructType *SubTy = 4272bdd1243dSDimitry Andric StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked()); 42730b57cec5SDimitry Andric const StructLayout *SubSL = DL.getStructLayout(SubTy); 42740b57cec5SDimitry Andric if (Size != SubSL->getSizeInBytes()) 42750b57cec5SDimitry Andric return nullptr; // The sub-struct doesn't have quite the size needed. 42760b57cec5SDimitry Andric 42770b57cec5SDimitry Andric return SubTy; 42780b57cec5SDimitry Andric } 42790b57cec5SDimitry Andric 42800b57cec5SDimitry Andric /// Pre-split loads and stores to simplify rewriting. 42810b57cec5SDimitry Andric /// 42820b57cec5SDimitry Andric /// We want to break up the splittable load+store pairs as much as 42830b57cec5SDimitry Andric /// possible. This is important to do as a preprocessing step, as once we 42840b57cec5SDimitry Andric /// start rewriting the accesses to partitions of the alloca we lose the 42850b57cec5SDimitry Andric /// necessary information to correctly split apart paired loads and stores 42860b57cec5SDimitry Andric /// which both point into this alloca. The case to consider is something like 42870b57cec5SDimitry Andric /// the following: 42880b57cec5SDimitry Andric /// 42890b57cec5SDimitry Andric /// %a = alloca [12 x i8] 4290bdd1243dSDimitry Andric /// %gep1 = getelementptr i8, ptr %a, i32 0 4291bdd1243dSDimitry Andric /// %gep2 = getelementptr i8, ptr %a, i32 4 4292bdd1243dSDimitry Andric /// %gep3 = getelementptr i8, ptr %a, i32 8 4293bdd1243dSDimitry Andric /// store float 0.0, ptr %gep1 4294bdd1243dSDimitry Andric /// store float 1.0, ptr %gep2 4295bdd1243dSDimitry Andric /// %v = load i64, ptr %gep1 4296bdd1243dSDimitry Andric /// store i64 %v, ptr %gep2 4297bdd1243dSDimitry Andric /// %f1 = load float, ptr %gep2 4298bdd1243dSDimitry Andric /// %f2 = load float, ptr %gep3 42990b57cec5SDimitry Andric /// 43000b57cec5SDimitry Andric /// Here we want to form 3 partitions of the alloca, each 4 bytes large, and 43010b57cec5SDimitry Andric /// promote everything so we recover the 2 SSA values that should have been 43020b57cec5SDimitry Andric /// there all along. 43030b57cec5SDimitry Andric /// 43040b57cec5SDimitry Andric /// \returns true if any changes are made. 43055f757f3fSDimitry Andric bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { 43060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n"); 43070b57cec5SDimitry Andric 43080b57cec5SDimitry Andric // Track the loads and stores which are candidates for pre-splitting here, in 43090b57cec5SDimitry Andric // the order they first appear during the partition scan. These give stable 43100b57cec5SDimitry Andric // iteration order and a basis for tracking which loads and stores we 43110b57cec5SDimitry Andric // actually split. 43120b57cec5SDimitry Andric SmallVector<LoadInst *, 4> Loads; 43130b57cec5SDimitry Andric SmallVector<StoreInst *, 4> Stores; 43140b57cec5SDimitry Andric 43150b57cec5SDimitry Andric // We need to accumulate the splits required of each load or store where we 43160b57cec5SDimitry Andric // can find them via a direct lookup. This is important to cross-check loads 43170b57cec5SDimitry Andric // and stores against each other. We also track the slice so that we can kill 43180b57cec5SDimitry Andric // all the slices that end up split. 43190b57cec5SDimitry Andric struct SplitOffsets { 43200b57cec5SDimitry Andric Slice *S; 43210b57cec5SDimitry Andric std::vector<uint64_t> Splits; 43220b57cec5SDimitry Andric }; 43230b57cec5SDimitry Andric SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap; 43240b57cec5SDimitry Andric 43250b57cec5SDimitry Andric // Track loads out of this alloca which cannot, for any reason, be pre-split. 43260b57cec5SDimitry Andric // This is important as we also cannot pre-split stores of those loads! 43270b57cec5SDimitry Andric // FIXME: This is all pretty gross. It means that we can be more aggressive 43280b57cec5SDimitry Andric // in pre-splitting when the load feeding the store happens to come from 43290b57cec5SDimitry Andric // a separate alloca. Put another way, the effectiveness of SROA would be 43300b57cec5SDimitry Andric // decreased by a frontend which just concatenated all of its local allocas 43310b57cec5SDimitry Andric // into one big flat alloca. But defeating such patterns is exactly the job 43320b57cec5SDimitry Andric // SROA is tasked with! Sadly, to not have this discrepancy we would have 43330b57cec5SDimitry Andric // change store pre-splitting to actually force pre-splitting of the load 43340b57cec5SDimitry Andric // that feeds it *and all stores*. That makes pre-splitting much harder, but 43350b57cec5SDimitry Andric // maybe it would make it more principled? 43360b57cec5SDimitry Andric SmallPtrSet<LoadInst *, 8> UnsplittableLoads; 43370b57cec5SDimitry Andric 43380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n"); 43390b57cec5SDimitry Andric for (auto &P : AS.partitions()) { 43400b57cec5SDimitry Andric for (Slice &S : P) { 43410b57cec5SDimitry Andric Instruction *I = cast<Instruction>(S.getUse()->getUser()); 43420b57cec5SDimitry Andric if (!S.isSplittable() || S.endOffset() <= P.endOffset()) { 43430b57cec5SDimitry Andric // If this is a load we have to track that it can't participate in any 43440b57cec5SDimitry Andric // pre-splitting. If this is a store of a load we have to track that 43450b57cec5SDimitry Andric // that load also can't participate in any pre-splitting. 43460b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(I)) 43470b57cec5SDimitry Andric UnsplittableLoads.insert(LI); 43480b57cec5SDimitry Andric else if (auto *SI = dyn_cast<StoreInst>(I)) 43490b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand())) 43500b57cec5SDimitry Andric UnsplittableLoads.insert(LI); 43510b57cec5SDimitry Andric continue; 43520b57cec5SDimitry Andric } 43530b57cec5SDimitry Andric assert(P.endOffset() > S.beginOffset() && 43540b57cec5SDimitry Andric "Empty or backwards partition!"); 43550b57cec5SDimitry Andric 43560b57cec5SDimitry Andric // Determine if this is a pre-splittable slice. 43570b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(I)) { 43580b57cec5SDimitry Andric assert(!LI->isVolatile() && "Cannot split volatile loads!"); 43590b57cec5SDimitry Andric 43600b57cec5SDimitry Andric // The load must be used exclusively to store into other pointers for 43610b57cec5SDimitry Andric // us to be able to arbitrarily pre-split it. The stores must also be 43620b57cec5SDimitry Andric // simple to avoid changing semantics. 43630b57cec5SDimitry Andric auto IsLoadSimplyStored = [](LoadInst *LI) { 43640b57cec5SDimitry Andric for (User *LU : LI->users()) { 43650b57cec5SDimitry Andric auto *SI = dyn_cast<StoreInst>(LU); 43660b57cec5SDimitry Andric if (!SI || !SI->isSimple()) 43670b57cec5SDimitry Andric return false; 43680b57cec5SDimitry Andric } 43690b57cec5SDimitry Andric return true; 43700b57cec5SDimitry Andric }; 43710b57cec5SDimitry Andric if (!IsLoadSimplyStored(LI)) { 43720b57cec5SDimitry Andric UnsplittableLoads.insert(LI); 43730b57cec5SDimitry Andric continue; 43740b57cec5SDimitry Andric } 43750b57cec5SDimitry Andric 43760b57cec5SDimitry Andric Loads.push_back(LI); 43770b57cec5SDimitry Andric } else if (auto *SI = dyn_cast<StoreInst>(I)) { 43780b57cec5SDimitry Andric if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex())) 43790b57cec5SDimitry Andric // Skip stores *of* pointers. FIXME: This shouldn't even be possible! 43800b57cec5SDimitry Andric continue; 43810b57cec5SDimitry Andric auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand()); 43820b57cec5SDimitry Andric if (!StoredLoad || !StoredLoad->isSimple()) 43830b57cec5SDimitry Andric continue; 43840b57cec5SDimitry Andric assert(!SI->isVolatile() && "Cannot split volatile stores!"); 43850b57cec5SDimitry Andric 43860b57cec5SDimitry Andric Stores.push_back(SI); 43870b57cec5SDimitry Andric } else { 43880b57cec5SDimitry Andric // Other uses cannot be pre-split. 43890b57cec5SDimitry Andric continue; 43900b57cec5SDimitry Andric } 43910b57cec5SDimitry Andric 43920b57cec5SDimitry Andric // Record the initial split. 43930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n"); 43940b57cec5SDimitry Andric auto &Offsets = SplitOffsetsMap[I]; 43950b57cec5SDimitry Andric assert(Offsets.Splits.empty() && 43960b57cec5SDimitry Andric "Should not have splits the first time we see an instruction!"); 43970b57cec5SDimitry Andric Offsets.S = &S; 43980b57cec5SDimitry Andric Offsets.Splits.push_back(P.endOffset() - S.beginOffset()); 43990b57cec5SDimitry Andric } 44000b57cec5SDimitry Andric 44010b57cec5SDimitry Andric // Now scan the already split slices, and add a split for any of them which 44020b57cec5SDimitry Andric // we're going to pre-split. 44030b57cec5SDimitry Andric for (Slice *S : P.splitSliceTails()) { 44040b57cec5SDimitry Andric auto SplitOffsetsMapI = 44050b57cec5SDimitry Andric SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser())); 44060b57cec5SDimitry Andric if (SplitOffsetsMapI == SplitOffsetsMap.end()) 44070b57cec5SDimitry Andric continue; 44080b57cec5SDimitry Andric auto &Offsets = SplitOffsetsMapI->second; 44090b57cec5SDimitry Andric 44100b57cec5SDimitry Andric assert(Offsets.S == S && "Found a mismatched slice!"); 44110b57cec5SDimitry Andric assert(!Offsets.Splits.empty() && 44120b57cec5SDimitry Andric "Cannot have an empty set of splits on the second partition!"); 44130b57cec5SDimitry Andric assert(Offsets.Splits.back() == 44140b57cec5SDimitry Andric P.beginOffset() - Offsets.S->beginOffset() && 44150b57cec5SDimitry Andric "Previous split does not end where this one begins!"); 44160b57cec5SDimitry Andric 44170b57cec5SDimitry Andric // Record each split. The last partition's end isn't needed as the size 44180b57cec5SDimitry Andric // of the slice dictates that. 44190b57cec5SDimitry Andric if (S->endOffset() > P.endOffset()) 44200b57cec5SDimitry Andric Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset()); 44210b57cec5SDimitry Andric } 44220b57cec5SDimitry Andric } 44230b57cec5SDimitry Andric 44240b57cec5SDimitry Andric // We may have split loads where some of their stores are split stores. For 44250b57cec5SDimitry Andric // such loads and stores, we can only pre-split them if their splits exactly 44260b57cec5SDimitry Andric // match relative to their starting offset. We have to verify this prior to 44270b57cec5SDimitry Andric // any rewriting. 4428e8d8bef9SDimitry Andric llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { 44290b57cec5SDimitry Andric // Lookup the load we are storing in our map of split 44300b57cec5SDimitry Andric // offsets. 44310b57cec5SDimitry Andric auto *LI = cast<LoadInst>(SI->getValueOperand()); 44320b57cec5SDimitry Andric // If it was completely unsplittable, then we're done, 44330b57cec5SDimitry Andric // and this store can't be pre-split. 44340b57cec5SDimitry Andric if (UnsplittableLoads.count(LI)) 44350b57cec5SDimitry Andric return true; 44360b57cec5SDimitry Andric 44370b57cec5SDimitry Andric auto LoadOffsetsI = SplitOffsetsMap.find(LI); 44380b57cec5SDimitry Andric if (LoadOffsetsI == SplitOffsetsMap.end()) 44390b57cec5SDimitry Andric return false; // Unrelated loads are definitely safe. 44400b57cec5SDimitry Andric auto &LoadOffsets = LoadOffsetsI->second; 44410b57cec5SDimitry Andric 44420b57cec5SDimitry Andric // Now lookup the store's offsets. 44430b57cec5SDimitry Andric auto &StoreOffsets = SplitOffsetsMap[SI]; 44440b57cec5SDimitry Andric 44450b57cec5SDimitry Andric // If the relative offsets of each split in the load and 44460b57cec5SDimitry Andric // store match exactly, then we can split them and we 44470b57cec5SDimitry Andric // don't need to remove them here. 44480b57cec5SDimitry Andric if (LoadOffsets.Splits == StoreOffsets.Splits) 44490b57cec5SDimitry Andric return false; 44500b57cec5SDimitry Andric 4451e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n" 44520b57cec5SDimitry Andric << " " << *LI << "\n" 44530b57cec5SDimitry Andric << " " << *SI << "\n"); 44540b57cec5SDimitry Andric 44550b57cec5SDimitry Andric // We've found a store and load that we need to split 44560b57cec5SDimitry Andric // with mismatched relative splits. Just give up on them 44570b57cec5SDimitry Andric // and remove both instructions from our list of 44580b57cec5SDimitry Andric // candidates. 44590b57cec5SDimitry Andric UnsplittableLoads.insert(LI); 44600b57cec5SDimitry Andric return true; 4461e8d8bef9SDimitry Andric }); 44620b57cec5SDimitry Andric // Now we have to go *back* through all the stores, because a later store may 44630b57cec5SDimitry Andric // have caused an earlier store's load to become unsplittable and if it is 44640b57cec5SDimitry Andric // unsplittable for the later store, then we can't rely on it being split in 44650b57cec5SDimitry Andric // the earlier store either. 4466e8d8bef9SDimitry Andric llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) { 4467e8d8bef9SDimitry Andric auto *LI = cast<LoadInst>(SI->getValueOperand()); 44680b57cec5SDimitry Andric return UnsplittableLoads.count(LI); 4469e8d8bef9SDimitry Andric }); 44700b57cec5SDimitry Andric // Once we've established all the loads that can't be split for some reason, 44710b57cec5SDimitry Andric // filter any that made it into our list out. 4472e8d8bef9SDimitry Andric llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) { 44730b57cec5SDimitry Andric return UnsplittableLoads.count(LI); 4474e8d8bef9SDimitry Andric }); 44750b57cec5SDimitry Andric 44760b57cec5SDimitry Andric // If no loads or stores are left, there is no pre-splitting to be done for 44770b57cec5SDimitry Andric // this alloca. 44780b57cec5SDimitry Andric if (Loads.empty() && Stores.empty()) 44790b57cec5SDimitry Andric return false; 44800b57cec5SDimitry Andric 44810b57cec5SDimitry Andric // From here on, we can't fail and will be building new accesses, so rig up 44820b57cec5SDimitry Andric // an IR builder. 44830b57cec5SDimitry Andric IRBuilderTy IRB(&AI); 44840b57cec5SDimitry Andric 44850b57cec5SDimitry Andric // Collect the new slices which we will merge into the alloca slices. 44860b57cec5SDimitry Andric SmallVector<Slice, 4> NewSlices; 44870b57cec5SDimitry Andric 44880b57cec5SDimitry Andric // Track any allocas we end up splitting loads and stores for so we iterate 44890b57cec5SDimitry Andric // on them. 44900b57cec5SDimitry Andric SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas; 44910b57cec5SDimitry Andric 44920b57cec5SDimitry Andric // At this point, we have collected all of the loads and stores we can 44930b57cec5SDimitry Andric // pre-split, and the specific splits needed for them. We actually do the 44940b57cec5SDimitry Andric // splitting in a specific order in order to handle when one of the loads in 44950b57cec5SDimitry Andric // the value operand to one of the stores. 44960b57cec5SDimitry Andric // 44970b57cec5SDimitry Andric // First, we rewrite all of the split loads, and just accumulate each split 44980b57cec5SDimitry Andric // load in a parallel structure. We also build the slices for them and append 44990b57cec5SDimitry Andric // them to the alloca slices. 45000b57cec5SDimitry Andric SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap; 45010b57cec5SDimitry Andric std::vector<LoadInst *> SplitLoads; 4502*0fca6ea1SDimitry Andric const DataLayout &DL = AI.getDataLayout(); 45030b57cec5SDimitry Andric for (LoadInst *LI : Loads) { 45040b57cec5SDimitry Andric SplitLoads.clear(); 45050b57cec5SDimitry Andric 45060b57cec5SDimitry Andric auto &Offsets = SplitOffsetsMap[LI]; 450781ad6265SDimitry Andric unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset(); 450881ad6265SDimitry Andric assert(LI->getType()->getIntegerBitWidth() % 8 == 0 && 450981ad6265SDimitry Andric "Load must have type size equal to store size"); 451081ad6265SDimitry Andric assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize && 451181ad6265SDimitry Andric "Load must be >= slice size"); 451281ad6265SDimitry Andric 45130b57cec5SDimitry Andric uint64_t BaseOffset = Offsets.S->beginOffset(); 451481ad6265SDimitry Andric assert(BaseOffset + SliceSize > BaseOffset && 45150b57cec5SDimitry Andric "Cannot represent alloca access size using 64-bit integers!"); 45160b57cec5SDimitry Andric 45170b57cec5SDimitry Andric Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand()); 45180b57cec5SDimitry Andric IRB.SetInsertPoint(LI); 45190b57cec5SDimitry Andric 45200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n"); 45210b57cec5SDimitry Andric 45220b57cec5SDimitry Andric uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); 45230b57cec5SDimitry Andric int Idx = 0, Size = Offsets.Splits.size(); 45240b57cec5SDimitry Andric for (;;) { 452581ad6265SDimitry Andric auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8); 45260b57cec5SDimitry Andric auto AS = LI->getPointerAddressSpace(); 45275f757f3fSDimitry Andric auto *PartPtrTy = LI->getPointerOperandType(); 45280b57cec5SDimitry Andric LoadInst *PLoad = IRB.CreateAlignedLoad( 45290b57cec5SDimitry Andric PartTy, 45300b57cec5SDimitry Andric getAdjustedPtr(IRB, DL, BasePtr, 45310b57cec5SDimitry Andric APInt(DL.getIndexSizeInBits(AS), PartOffset), 45320b57cec5SDimitry Andric PartPtrTy, BasePtr->getName() + "."), 45335ffd83dbSDimitry Andric getAdjustedAlignment(LI, PartOffset), 4534480093f4SDimitry Andric /*IsVolatile*/ false, LI->getName()); 45350b57cec5SDimitry Andric PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, 45360b57cec5SDimitry Andric LLVMContext::MD_access_group}); 45370b57cec5SDimitry Andric 45380b57cec5SDimitry Andric // Append this load onto the list of split loads so we can find it later 45390b57cec5SDimitry Andric // to rewrite the stores. 45400b57cec5SDimitry Andric SplitLoads.push_back(PLoad); 45410b57cec5SDimitry Andric 45420b57cec5SDimitry Andric // Now build a new slice for the alloca. 45430b57cec5SDimitry Andric NewSlices.push_back( 45440b57cec5SDimitry Andric Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, 45450b57cec5SDimitry Andric &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), 45460b57cec5SDimitry Andric /*IsSplittable*/ false)); 45470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() 45480b57cec5SDimitry Andric << ", " << NewSlices.back().endOffset() 45490b57cec5SDimitry Andric << "): " << *PLoad << "\n"); 45500b57cec5SDimitry Andric 45510b57cec5SDimitry Andric // See if we've handled all the splits. 45520b57cec5SDimitry Andric if (Idx >= Size) 45530b57cec5SDimitry Andric break; 45540b57cec5SDimitry Andric 45550b57cec5SDimitry Andric // Setup the next partition. 45560b57cec5SDimitry Andric PartOffset = Offsets.Splits[Idx]; 45570b57cec5SDimitry Andric ++Idx; 455881ad6265SDimitry Andric PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset; 45590b57cec5SDimitry Andric } 45600b57cec5SDimitry Andric 45610b57cec5SDimitry Andric // Now that we have the split loads, do the slow walk over all uses of the 45620b57cec5SDimitry Andric // load and rewrite them as split stores, or save the split loads to use 45630b57cec5SDimitry Andric // below if the store is going to be split there anyways. 45640b57cec5SDimitry Andric bool DeferredStores = false; 45650b57cec5SDimitry Andric for (User *LU : LI->users()) { 45660b57cec5SDimitry Andric StoreInst *SI = cast<StoreInst>(LU); 45670b57cec5SDimitry Andric if (!Stores.empty() && SplitOffsetsMap.count(SI)) { 45680b57cec5SDimitry Andric DeferredStores = true; 45690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI 45700b57cec5SDimitry Andric << "\n"); 45710b57cec5SDimitry Andric continue; 45720b57cec5SDimitry Andric } 45730b57cec5SDimitry Andric 45740b57cec5SDimitry Andric Value *StoreBasePtr = SI->getPointerOperand(); 45750b57cec5SDimitry Andric IRB.SetInsertPoint(SI); 4576*0fca6ea1SDimitry Andric AAMDNodes AATags = SI->getAAMetadata(); 45770b57cec5SDimitry Andric 45780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); 45790b57cec5SDimitry Andric 45800b57cec5SDimitry Andric for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { 45810b57cec5SDimitry Andric LoadInst *PLoad = SplitLoads[Idx]; 45820b57cec5SDimitry Andric uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1]; 45835f757f3fSDimitry Andric auto *PartPtrTy = SI->getPointerOperandType(); 45840b57cec5SDimitry Andric 45850b57cec5SDimitry Andric auto AS = SI->getPointerAddressSpace(); 45860b57cec5SDimitry Andric StoreInst *PStore = IRB.CreateAlignedStore( 45870b57cec5SDimitry Andric PLoad, 45880b57cec5SDimitry Andric getAdjustedPtr(IRB, DL, StoreBasePtr, 45890b57cec5SDimitry Andric APInt(DL.getIndexSizeInBits(AS), PartOffset), 45900b57cec5SDimitry Andric PartPtrTy, StoreBasePtr->getName() + "."), 45915ffd83dbSDimitry Andric getAdjustedAlignment(SI, PartOffset), 4592480093f4SDimitry Andric /*IsVolatile*/ false); 4593fe6060f1SDimitry Andric PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, 4594bdd1243dSDimitry Andric LLVMContext::MD_access_group, 4595bdd1243dSDimitry Andric LLVMContext::MD_DIAssignID}); 4596*0fca6ea1SDimitry Andric 4597*0fca6ea1SDimitry Andric if (AATags) 4598*0fca6ea1SDimitry Andric PStore->setAAMetadata( 4599*0fca6ea1SDimitry Andric AATags.adjustForAccess(PartOffset, PLoad->getType(), DL)); 46000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); 46010b57cec5SDimitry Andric } 46020b57cec5SDimitry Andric 46030b57cec5SDimitry Andric // We want to immediately iterate on any allocas impacted by splitting 46040b57cec5SDimitry Andric // this store, and we have to track any promotable alloca (indicated by 46050b57cec5SDimitry Andric // a direct store) as needing to be resplit because it is no longer 46060b57cec5SDimitry Andric // promotable. 46070b57cec5SDimitry Andric if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) { 46080b57cec5SDimitry Andric ResplitPromotableAllocas.insert(OtherAI); 46090b57cec5SDimitry Andric Worklist.insert(OtherAI); 46100b57cec5SDimitry Andric } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>( 46110b57cec5SDimitry Andric StoreBasePtr->stripInBoundsOffsets())) { 46120b57cec5SDimitry Andric Worklist.insert(OtherAI); 46130b57cec5SDimitry Andric } 46140b57cec5SDimitry Andric 46150b57cec5SDimitry Andric // Mark the original store as dead. 4616e8d8bef9SDimitry Andric DeadInsts.push_back(SI); 46170b57cec5SDimitry Andric } 46180b57cec5SDimitry Andric 46190b57cec5SDimitry Andric // Save the split loads if there are deferred stores among the users. 46200b57cec5SDimitry Andric if (DeferredStores) 46210b57cec5SDimitry Andric SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads))); 46220b57cec5SDimitry Andric 46230b57cec5SDimitry Andric // Mark the original load as dead and kill the original slice. 4624e8d8bef9SDimitry Andric DeadInsts.push_back(LI); 46250b57cec5SDimitry Andric Offsets.S->kill(); 46260b57cec5SDimitry Andric } 46270b57cec5SDimitry Andric 46280b57cec5SDimitry Andric // Second, we rewrite all of the split stores. At this point, we know that 46290b57cec5SDimitry Andric // all loads from this alloca have been split already. For stores of such 46300b57cec5SDimitry Andric // loads, we can simply look up the pre-existing split loads. For stores of 46310b57cec5SDimitry Andric // other loads, we split those loads first and then write split stores of 46320b57cec5SDimitry Andric // them. 46330b57cec5SDimitry Andric for (StoreInst *SI : Stores) { 46340b57cec5SDimitry Andric auto *LI = cast<LoadInst>(SI->getValueOperand()); 46350b57cec5SDimitry Andric IntegerType *Ty = cast<IntegerType>(LI->getType()); 4636fe6060f1SDimitry Andric assert(Ty->getBitWidth() % 8 == 0); 46370b57cec5SDimitry Andric uint64_t StoreSize = Ty->getBitWidth() / 8; 46380b57cec5SDimitry Andric assert(StoreSize > 0 && "Cannot have a zero-sized integer store!"); 46390b57cec5SDimitry Andric 46400b57cec5SDimitry Andric auto &Offsets = SplitOffsetsMap[SI]; 46410b57cec5SDimitry Andric assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() && 46420b57cec5SDimitry Andric "Slice size should always match load size exactly!"); 46430b57cec5SDimitry Andric uint64_t BaseOffset = Offsets.S->beginOffset(); 46440b57cec5SDimitry Andric assert(BaseOffset + StoreSize > BaseOffset && 46450b57cec5SDimitry Andric "Cannot represent alloca access size using 64-bit integers!"); 46460b57cec5SDimitry Andric 46470b57cec5SDimitry Andric Value *LoadBasePtr = LI->getPointerOperand(); 46480b57cec5SDimitry Andric Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand()); 46490b57cec5SDimitry Andric 46500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n"); 46510b57cec5SDimitry Andric 46520b57cec5SDimitry Andric // Check whether we have an already split load. 46530b57cec5SDimitry Andric auto SplitLoadsMapI = SplitLoadsMap.find(LI); 46540b57cec5SDimitry Andric std::vector<LoadInst *> *SplitLoads = nullptr; 46550b57cec5SDimitry Andric if (SplitLoadsMapI != SplitLoadsMap.end()) { 46560b57cec5SDimitry Andric SplitLoads = &SplitLoadsMapI->second; 46570b57cec5SDimitry Andric assert(SplitLoads->size() == Offsets.Splits.size() + 1 && 46580b57cec5SDimitry Andric "Too few split loads for the number of splits in the store!"); 46590b57cec5SDimitry Andric } else { 46600b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n"); 46610b57cec5SDimitry Andric } 46620b57cec5SDimitry Andric 46630b57cec5SDimitry Andric uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); 46640b57cec5SDimitry Andric int Idx = 0, Size = Offsets.Splits.size(); 46650b57cec5SDimitry Andric for (;;) { 46660b57cec5SDimitry Andric auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); 46675f757f3fSDimitry Andric auto *LoadPartPtrTy = LI->getPointerOperandType(); 46685f757f3fSDimitry Andric auto *StorePartPtrTy = SI->getPointerOperandType(); 46690b57cec5SDimitry Andric 46700b57cec5SDimitry Andric // Either lookup a split load or create one. 46710b57cec5SDimitry Andric LoadInst *PLoad; 46720b57cec5SDimitry Andric if (SplitLoads) { 46730b57cec5SDimitry Andric PLoad = (*SplitLoads)[Idx]; 46740b57cec5SDimitry Andric } else { 46750b57cec5SDimitry Andric IRB.SetInsertPoint(LI); 46760b57cec5SDimitry Andric auto AS = LI->getPointerAddressSpace(); 46770b57cec5SDimitry Andric PLoad = IRB.CreateAlignedLoad( 46780b57cec5SDimitry Andric PartTy, 46790b57cec5SDimitry Andric getAdjustedPtr(IRB, DL, LoadBasePtr, 46800b57cec5SDimitry Andric APInt(DL.getIndexSizeInBits(AS), PartOffset), 46810b57cec5SDimitry Andric LoadPartPtrTy, LoadBasePtr->getName() + "."), 46825ffd83dbSDimitry Andric getAdjustedAlignment(LI, PartOffset), 4683480093f4SDimitry Andric /*IsVolatile*/ false, LI->getName()); 4684fe6060f1SDimitry Andric PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, 4685fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 46860b57cec5SDimitry Andric } 46870b57cec5SDimitry Andric 46880b57cec5SDimitry Andric // And store this partition. 46890b57cec5SDimitry Andric IRB.SetInsertPoint(SI); 46900b57cec5SDimitry Andric auto AS = SI->getPointerAddressSpace(); 46910b57cec5SDimitry Andric StoreInst *PStore = IRB.CreateAlignedStore( 46920b57cec5SDimitry Andric PLoad, 46930b57cec5SDimitry Andric getAdjustedPtr(IRB, DL, StoreBasePtr, 46940b57cec5SDimitry Andric APInt(DL.getIndexSizeInBits(AS), PartOffset), 46950b57cec5SDimitry Andric StorePartPtrTy, StoreBasePtr->getName() + "."), 46965ffd83dbSDimitry Andric getAdjustedAlignment(SI, PartOffset), 4697480093f4SDimitry Andric /*IsVolatile*/ false); 4698fe6060f1SDimitry Andric PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, 4699fe6060f1SDimitry Andric LLVMContext::MD_access_group}); 47000b57cec5SDimitry Andric 47010b57cec5SDimitry Andric // Now build a new slice for the alloca. 47020b57cec5SDimitry Andric NewSlices.push_back( 47030b57cec5SDimitry Andric Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, 47040b57cec5SDimitry Andric &PStore->getOperandUse(PStore->getPointerOperandIndex()), 47050b57cec5SDimitry Andric /*IsSplittable*/ false)); 47060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() 47070b57cec5SDimitry Andric << ", " << NewSlices.back().endOffset() 47080b57cec5SDimitry Andric << "): " << *PStore << "\n"); 47090b57cec5SDimitry Andric if (!SplitLoads) { 47100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n"); 47110b57cec5SDimitry Andric } 47120b57cec5SDimitry Andric 47130b57cec5SDimitry Andric // See if we've finished all the splits. 47140b57cec5SDimitry Andric if (Idx >= Size) 47150b57cec5SDimitry Andric break; 47160b57cec5SDimitry Andric 47170b57cec5SDimitry Andric // Setup the next partition. 47180b57cec5SDimitry Andric PartOffset = Offsets.Splits[Idx]; 47190b57cec5SDimitry Andric ++Idx; 47200b57cec5SDimitry Andric PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset; 47210b57cec5SDimitry Andric } 47220b57cec5SDimitry Andric 47230b57cec5SDimitry Andric // We want to immediately iterate on any allocas impacted by splitting 47240b57cec5SDimitry Andric // this load, which is only relevant if it isn't a load of this alloca and 47250b57cec5SDimitry Andric // thus we didn't already split the loads above. We also have to keep track 47260b57cec5SDimitry Andric // of any promotable allocas we split loads on as they can no longer be 47270b57cec5SDimitry Andric // promoted. 47280b57cec5SDimitry Andric if (!SplitLoads) { 47290b57cec5SDimitry Andric if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) { 47300b57cec5SDimitry Andric assert(OtherAI != &AI && "We can't re-split our own alloca!"); 47310b57cec5SDimitry Andric ResplitPromotableAllocas.insert(OtherAI); 47320b57cec5SDimitry Andric Worklist.insert(OtherAI); 47330b57cec5SDimitry Andric } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>( 47340b57cec5SDimitry Andric LoadBasePtr->stripInBoundsOffsets())) { 47350b57cec5SDimitry Andric assert(OtherAI != &AI && "We can't re-split our own alloca!"); 47360b57cec5SDimitry Andric Worklist.insert(OtherAI); 47370b57cec5SDimitry Andric } 47380b57cec5SDimitry Andric } 47390b57cec5SDimitry Andric 47400b57cec5SDimitry Andric // Mark the original store as dead now that we've split it up and kill its 47410b57cec5SDimitry Andric // slice. Note that we leave the original load in place unless this store 47420b57cec5SDimitry Andric // was its only use. It may in turn be split up if it is an alloca load 47430b57cec5SDimitry Andric // for some other alloca, but it may be a normal load. This may introduce 47440b57cec5SDimitry Andric // redundant loads, but where those can be merged the rest of the optimizer 47450b57cec5SDimitry Andric // should handle the merging, and this uncovers SSA splits which is more 47460b57cec5SDimitry Andric // important. In practice, the original loads will almost always be fully 47470b57cec5SDimitry Andric // split and removed eventually, and the splits will be merged by any 47480b57cec5SDimitry Andric // trivial CSE, including instcombine. 47490b57cec5SDimitry Andric if (LI->hasOneUse()) { 47500b57cec5SDimitry Andric assert(*LI->user_begin() == SI && "Single use isn't this store!"); 4751e8d8bef9SDimitry Andric DeadInsts.push_back(LI); 47520b57cec5SDimitry Andric } 4753e8d8bef9SDimitry Andric DeadInsts.push_back(SI); 47540b57cec5SDimitry Andric Offsets.S->kill(); 47550b57cec5SDimitry Andric } 47560b57cec5SDimitry Andric 47570b57cec5SDimitry Andric // Remove the killed slices that have ben pre-split. 4758e8d8bef9SDimitry Andric llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); }); 47590b57cec5SDimitry Andric 47600b57cec5SDimitry Andric // Insert our new slices. This will sort and merge them into the sorted 47610b57cec5SDimitry Andric // sequence. 47620b57cec5SDimitry Andric AS.insert(NewSlices); 47630b57cec5SDimitry Andric 47640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Pre-split slices:\n"); 47650b57cec5SDimitry Andric #ifndef NDEBUG 47660b57cec5SDimitry Andric for (auto I = AS.begin(), E = AS.end(); I != E; ++I) 47670b57cec5SDimitry Andric LLVM_DEBUG(AS.print(dbgs(), I, " ")); 47680b57cec5SDimitry Andric #endif 47690b57cec5SDimitry Andric 47700b57cec5SDimitry Andric // Finally, don't try to promote any allocas that new require re-splitting. 47710b57cec5SDimitry Andric // They have already been added to the worklist above. 4772e8d8bef9SDimitry Andric llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) { 4773e8d8bef9SDimitry Andric return ResplitPromotableAllocas.count(AI); 4774e8d8bef9SDimitry Andric }); 47750b57cec5SDimitry Andric 47760b57cec5SDimitry Andric return true; 47770b57cec5SDimitry Andric } 47780b57cec5SDimitry Andric 47790b57cec5SDimitry Andric /// Rewrite an alloca partition's users. 47800b57cec5SDimitry Andric /// 47810b57cec5SDimitry Andric /// This routine drives both of the rewriting goals of the SROA pass. It tries 47820b57cec5SDimitry Andric /// to rewrite uses of an alloca partition to be conducive for SSA value 47830b57cec5SDimitry Andric /// promotion. If the partition needs a new, more refined alloca, this will 47840b57cec5SDimitry Andric /// build that new alloca, preserving as much type information as possible, and 47850b57cec5SDimitry Andric /// rewrite the uses of the old alloca to point at the new one and have the 47860b57cec5SDimitry Andric /// appropriate new offsets. It also evaluates how successful the rewrite was 47870b57cec5SDimitry Andric /// at enabling promotion and if it was successful queues the alloca to be 47880b57cec5SDimitry Andric /// promoted. 47895f757f3fSDimitry Andric AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, 47900b57cec5SDimitry Andric Partition &P) { 47910b57cec5SDimitry Andric // Try to compute a friendly type for this partition of the alloca. This 47920b57cec5SDimitry Andric // won't always succeed, in which case we fall back to a legal integer type 47930b57cec5SDimitry Andric // or an i8 array of an appropriate size. 47940b57cec5SDimitry Andric Type *SliceTy = nullptr; 4795bdd1243dSDimitry Andric VectorType *SliceVecTy = nullptr; 4796*0fca6ea1SDimitry Andric const DataLayout &DL = AI.getDataLayout(); 4797e8d8bef9SDimitry Andric std::pair<Type *, IntegerType *> CommonUseTy = 4798e8d8bef9SDimitry Andric findCommonType(P.begin(), P.end(), P.endOffset()); 4799e8d8bef9SDimitry Andric // Do all uses operate on the same type? 4800e8d8bef9SDimitry Andric if (CommonUseTy.first) 4801bdd1243dSDimitry Andric if (DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >= P.size()) { 4802e8d8bef9SDimitry Andric SliceTy = CommonUseTy.first; 4803bdd1243dSDimitry Andric SliceVecTy = dyn_cast<VectorType>(SliceTy); 4804bdd1243dSDimitry Andric } 4805e8d8bef9SDimitry Andric // If not, can we find an appropriate subtype in the original allocated type? 48060b57cec5SDimitry Andric if (!SliceTy) 48070b57cec5SDimitry Andric if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), 48080b57cec5SDimitry Andric P.beginOffset(), P.size())) 48090b57cec5SDimitry Andric SliceTy = TypePartitionTy; 4810bdd1243dSDimitry Andric 4811e8d8bef9SDimitry Andric // If still not, can we use the largest bitwidth integer type used? 4812e8d8bef9SDimitry Andric if (!SliceTy && CommonUseTy.second) 4813bdd1243dSDimitry Andric if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size()) { 4814e8d8bef9SDimitry Andric SliceTy = CommonUseTy.second; 4815bdd1243dSDimitry Andric SliceVecTy = dyn_cast<VectorType>(SliceTy); 4816bdd1243dSDimitry Andric } 48170b57cec5SDimitry Andric if ((!SliceTy || (SliceTy->isArrayTy() && 48180b57cec5SDimitry Andric SliceTy->getArrayElementType()->isIntegerTy())) && 4819bdd1243dSDimitry Andric DL.isLegalInteger(P.size() * 8)) { 48200b57cec5SDimitry Andric SliceTy = Type::getIntNTy(*C, P.size() * 8); 4821bdd1243dSDimitry Andric } 4822bdd1243dSDimitry Andric 4823bdd1243dSDimitry Andric // If the common use types are not viable for promotion then attempt to find 4824bdd1243dSDimitry Andric // another type that is viable. 4825bdd1243dSDimitry Andric if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL)) 4826bdd1243dSDimitry Andric if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), 4827bdd1243dSDimitry Andric P.beginOffset(), P.size())) { 4828bdd1243dSDimitry Andric VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy); 4829bdd1243dSDimitry Andric if (TypePartitionVecTy && 4830bdd1243dSDimitry Andric checkVectorTypeForPromotion(P, TypePartitionVecTy, DL)) 4831bdd1243dSDimitry Andric SliceTy = TypePartitionTy; 4832bdd1243dSDimitry Andric } 4833bdd1243dSDimitry Andric 48340b57cec5SDimitry Andric if (!SliceTy) 48350b57cec5SDimitry Andric SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); 4836bdd1243dSDimitry Andric assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size()); 48370b57cec5SDimitry Andric 48380b57cec5SDimitry Andric bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); 48390b57cec5SDimitry Andric 48400b57cec5SDimitry Andric VectorType *VecTy = 48410b57cec5SDimitry Andric IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); 48420b57cec5SDimitry Andric if (VecTy) 48430b57cec5SDimitry Andric SliceTy = VecTy; 48440b57cec5SDimitry Andric 48450b57cec5SDimitry Andric // Check for the case where we're going to rewrite to a new alloca of the 48460b57cec5SDimitry Andric // exact same type as the original, and with the same access offsets. In that 48470b57cec5SDimitry Andric // case, re-use the existing alloca, but still run through the rewriter to 48480b57cec5SDimitry Andric // perform phi and select speculation. 48490b57cec5SDimitry Andric // P.beginOffset() can be non-zero even with the same type in a case with 48500b57cec5SDimitry Andric // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll). 48510b57cec5SDimitry Andric AllocaInst *NewAI; 48520b57cec5SDimitry Andric if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) { 48530b57cec5SDimitry Andric NewAI = &AI; 48540b57cec5SDimitry Andric // FIXME: We should be able to bail at this point with "nothing changed". 48550b57cec5SDimitry Andric // FIXME: We might want to defer PHI speculation until after here. 48560b57cec5SDimitry Andric // FIXME: return nullptr; 48570b57cec5SDimitry Andric } else { 48585ffd83dbSDimitry Andric // Make sure the alignment is compatible with P.beginOffset(). 48595ffd83dbSDimitry Andric const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset()); 48600b57cec5SDimitry Andric // If we will get at least this much alignment from the type alone, leave 48610b57cec5SDimitry Andric // the alloca's alignment unconstrained. 48625ffd83dbSDimitry Andric const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy); 48630b57cec5SDimitry Andric NewAI = new AllocaInst( 4864bdd1243dSDimitry Andric SliceTy, AI.getAddressSpace(), nullptr, 48655ffd83dbSDimitry Andric IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment, 4866*0fca6ea1SDimitry Andric AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), 4867*0fca6ea1SDimitry Andric AI.getIterator()); 48680b57cec5SDimitry Andric // Copy the old AI debug location over to the new one. 48690b57cec5SDimitry Andric NewAI->setDebugLoc(AI.getDebugLoc()); 48700b57cec5SDimitry Andric ++NumNewAllocas; 48710b57cec5SDimitry Andric } 48720b57cec5SDimitry Andric 4873*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset() 4874*0fca6ea1SDimitry Andric << "," << P.endOffset() << ") to: " << *NewAI << "\n"); 48750b57cec5SDimitry Andric 48760b57cec5SDimitry Andric // Track the high watermark on the worklist as it is only relevant for 48770b57cec5SDimitry Andric // promoted allocas. We will reset it to this point if the alloca is not in 48780b57cec5SDimitry Andric // fact scheduled for promotion. 48790b57cec5SDimitry Andric unsigned PPWOldSize = PostPromotionWorklist.size(); 48800b57cec5SDimitry Andric unsigned NumUses = 0; 48810b57cec5SDimitry Andric SmallSetVector<PHINode *, 8> PHIUsers; 48820b57cec5SDimitry Andric SmallSetVector<SelectInst *, 8> SelectUsers; 48830b57cec5SDimitry Andric 48840b57cec5SDimitry Andric AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), 48850b57cec5SDimitry Andric P.endOffset(), IsIntegerPromotable, VecTy, 48860b57cec5SDimitry Andric PHIUsers, SelectUsers); 48870b57cec5SDimitry Andric bool Promotable = true; 48880b57cec5SDimitry Andric for (Slice *S : P.splitSliceTails()) { 48890b57cec5SDimitry Andric Promotable &= Rewriter.visit(S); 48900b57cec5SDimitry Andric ++NumUses; 48910b57cec5SDimitry Andric } 48920b57cec5SDimitry Andric for (Slice &S : P) { 48930b57cec5SDimitry Andric Promotable &= Rewriter.visit(&S); 48940b57cec5SDimitry Andric ++NumUses; 48950b57cec5SDimitry Andric } 48960b57cec5SDimitry Andric 48970b57cec5SDimitry Andric NumAllocaPartitionUses += NumUses; 48980b57cec5SDimitry Andric MaxUsesPerAllocaPartition.updateMax(NumUses); 48990b57cec5SDimitry Andric 49000b57cec5SDimitry Andric // Now that we've processed all the slices in the new partition, check if any 49010b57cec5SDimitry Andric // PHIs or Selects would block promotion. 49020b57cec5SDimitry Andric for (PHINode *PHI : PHIUsers) 49030b57cec5SDimitry Andric if (!isSafePHIToSpeculate(*PHI)) { 49040b57cec5SDimitry Andric Promotable = false; 49050b57cec5SDimitry Andric PHIUsers.clear(); 49060b57cec5SDimitry Andric SelectUsers.clear(); 49070b57cec5SDimitry Andric break; 49080b57cec5SDimitry Andric } 49090b57cec5SDimitry Andric 4910bdd1243dSDimitry Andric SmallVector<std::pair<SelectInst *, RewriteableMemOps>, 2> 4911bdd1243dSDimitry Andric NewSelectsToRewrite; 4912bdd1243dSDimitry Andric NewSelectsToRewrite.reserve(SelectUsers.size()); 4913bdd1243dSDimitry Andric for (SelectInst *Sel : SelectUsers) { 4914bdd1243dSDimitry Andric std::optional<RewriteableMemOps> Ops = 4915bdd1243dSDimitry Andric isSafeSelectToSpeculate(*Sel, PreserveCFG); 4916bdd1243dSDimitry Andric if (!Ops) { 49170b57cec5SDimitry Andric Promotable = false; 49180b57cec5SDimitry Andric PHIUsers.clear(); 49190b57cec5SDimitry Andric SelectUsers.clear(); 4920bdd1243dSDimitry Andric NewSelectsToRewrite.clear(); 49210b57cec5SDimitry Andric break; 49220b57cec5SDimitry Andric } 4923bdd1243dSDimitry Andric NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops)); 4924bdd1243dSDimitry Andric } 49250b57cec5SDimitry Andric 49260b57cec5SDimitry Andric if (Promotable) { 4927e8d8bef9SDimitry Andric for (Use *U : AS.getDeadUsesIfPromotable()) { 4928e8d8bef9SDimitry Andric auto *OldInst = dyn_cast<Instruction>(U->get()); 4929e8d8bef9SDimitry Andric Value::dropDroppableUse(*U); 4930e8d8bef9SDimitry Andric if (OldInst) 4931e8d8bef9SDimitry Andric if (isInstructionTriviallyDead(OldInst)) 4932e8d8bef9SDimitry Andric DeadInsts.push_back(OldInst); 4933e8d8bef9SDimitry Andric } 49340b57cec5SDimitry Andric if (PHIUsers.empty() && SelectUsers.empty()) { 49350b57cec5SDimitry Andric // Promote the alloca. 49360b57cec5SDimitry Andric PromotableAllocas.push_back(NewAI); 49370b57cec5SDimitry Andric } else { 49380b57cec5SDimitry Andric // If we have either PHIs or Selects to speculate, add them to those 49390b57cec5SDimitry Andric // worklists and re-queue the new alloca so that we promote in on the 49400b57cec5SDimitry Andric // next iteration. 49410b57cec5SDimitry Andric for (PHINode *PHIUser : PHIUsers) 49420b57cec5SDimitry Andric SpeculatablePHIs.insert(PHIUser); 4943bdd1243dSDimitry Andric SelectsToRewrite.reserve(SelectsToRewrite.size() + 4944bdd1243dSDimitry Andric NewSelectsToRewrite.size()); 4945bdd1243dSDimitry Andric for (auto &&KV : llvm::make_range( 4946bdd1243dSDimitry Andric std::make_move_iterator(NewSelectsToRewrite.begin()), 4947bdd1243dSDimitry Andric std::make_move_iterator(NewSelectsToRewrite.end()))) 4948bdd1243dSDimitry Andric SelectsToRewrite.insert(std::move(KV)); 49490b57cec5SDimitry Andric Worklist.insert(NewAI); 49500b57cec5SDimitry Andric } 49510b57cec5SDimitry Andric } else { 49520b57cec5SDimitry Andric // Drop any post-promotion work items if promotion didn't happen. 49530b57cec5SDimitry Andric while (PostPromotionWorklist.size() > PPWOldSize) 49540b57cec5SDimitry Andric PostPromotionWorklist.pop_back(); 49550b57cec5SDimitry Andric 49560b57cec5SDimitry Andric // We couldn't promote and we didn't create a new partition, nothing 49570b57cec5SDimitry Andric // happened. 49580b57cec5SDimitry Andric if (NewAI == &AI) 49590b57cec5SDimitry Andric return nullptr; 49600b57cec5SDimitry Andric 49610b57cec5SDimitry Andric // If we can't promote the alloca, iterate on it to check for new 49620b57cec5SDimitry Andric // refinements exposed by splitting the current alloca. Don't iterate on an 49630b57cec5SDimitry Andric // alloca which didn't actually change and didn't get promoted. 49640b57cec5SDimitry Andric Worklist.insert(NewAI); 49650b57cec5SDimitry Andric } 49660b57cec5SDimitry Andric 49670b57cec5SDimitry Andric return NewAI; 49680b57cec5SDimitry Andric } 49690b57cec5SDimitry Andric 4970*0fca6ea1SDimitry Andric // There isn't a shared interface to get the "address" parts out of a 4971*0fca6ea1SDimitry Andric // dbg.declare and dbg.assign, so provide some wrappers now for 4972*0fca6ea1SDimitry Andric // both debug intrinsics and records. 4973*0fca6ea1SDimitry Andric const Value *getAddress(const DbgVariableIntrinsic *DVI) { 4974*0fca6ea1SDimitry Andric if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI)) 4975*0fca6ea1SDimitry Andric return DAI->getAddress(); 4976*0fca6ea1SDimitry Andric return cast<DbgDeclareInst>(DVI)->getAddress(); 4977*0fca6ea1SDimitry Andric } 4978*0fca6ea1SDimitry Andric 4979*0fca6ea1SDimitry Andric const Value *getAddress(const DbgVariableRecord *DVR) { 4980*0fca6ea1SDimitry Andric assert(DVR->getType() == DbgVariableRecord::LocationType::Declare || 4981*0fca6ea1SDimitry Andric DVR->getType() == DbgVariableRecord::LocationType::Assign); 4982*0fca6ea1SDimitry Andric return DVR->getAddress(); 4983*0fca6ea1SDimitry Andric } 4984*0fca6ea1SDimitry Andric 4985*0fca6ea1SDimitry Andric bool isKillAddress(const DbgVariableIntrinsic *DVI) { 4986*0fca6ea1SDimitry Andric if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI)) 4987*0fca6ea1SDimitry Andric return DAI->isKillAddress(); 4988*0fca6ea1SDimitry Andric return cast<DbgDeclareInst>(DVI)->isKillLocation(); 4989*0fca6ea1SDimitry Andric } 4990*0fca6ea1SDimitry Andric 4991*0fca6ea1SDimitry Andric bool isKillAddress(const DbgVariableRecord *DVR) { 4992*0fca6ea1SDimitry Andric assert(DVR->getType() == DbgVariableRecord::LocationType::Declare || 4993*0fca6ea1SDimitry Andric DVR->getType() == DbgVariableRecord::LocationType::Assign); 4994*0fca6ea1SDimitry Andric if (DVR->getType() == DbgVariableRecord::LocationType::Assign) 4995*0fca6ea1SDimitry Andric return DVR->isKillAddress(); 4996*0fca6ea1SDimitry Andric return DVR->isKillLocation(); 4997*0fca6ea1SDimitry Andric } 4998*0fca6ea1SDimitry Andric 4999*0fca6ea1SDimitry Andric const DIExpression *getAddressExpression(const DbgVariableIntrinsic *DVI) { 5000*0fca6ea1SDimitry Andric if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI)) 5001*0fca6ea1SDimitry Andric return DAI->getAddressExpression(); 5002*0fca6ea1SDimitry Andric return cast<DbgDeclareInst>(DVI)->getExpression(); 5003*0fca6ea1SDimitry Andric } 5004*0fca6ea1SDimitry Andric 5005*0fca6ea1SDimitry Andric const DIExpression *getAddressExpression(const DbgVariableRecord *DVR) { 5006*0fca6ea1SDimitry Andric assert(DVR->getType() == DbgVariableRecord::LocationType::Declare || 5007*0fca6ea1SDimitry Andric DVR->getType() == DbgVariableRecord::LocationType::Assign); 5008*0fca6ea1SDimitry Andric if (DVR->getType() == DbgVariableRecord::LocationType::Assign) 5009*0fca6ea1SDimitry Andric return DVR->getAddressExpression(); 5010*0fca6ea1SDimitry Andric return DVR->getExpression(); 5011*0fca6ea1SDimitry Andric } 5012*0fca6ea1SDimitry Andric 5013*0fca6ea1SDimitry Andric /// Create or replace an existing fragment in a DIExpression with \p Frag. 5014*0fca6ea1SDimitry Andric /// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext 5015*0fca6ea1SDimitry Andric /// operation, add \p BitExtractOffset to the offset part. 5016*0fca6ea1SDimitry Andric /// 5017*0fca6ea1SDimitry Andric /// Returns the new expression, or nullptr if this fails (see details below). 5018*0fca6ea1SDimitry Andric /// 5019*0fca6ea1SDimitry Andric /// This function is similar to DIExpression::createFragmentExpression except 5020*0fca6ea1SDimitry Andric /// for 3 important distinctions: 5021*0fca6ea1SDimitry Andric /// 1. The new fragment isn't relative to an existing fragment. 5022*0fca6ea1SDimitry Andric /// 2. It assumes the computed location is a memory location. This means we 5023*0fca6ea1SDimitry Andric /// don't need to perform checks that creating the fragment preserves the 5024*0fca6ea1SDimitry Andric /// expression semantics. 5025*0fca6ea1SDimitry Andric /// 3. Existing extract_bits are modified independently of fragment changes 5026*0fca6ea1SDimitry Andric /// using \p BitExtractOffset. A change to the fragment offset or size 5027*0fca6ea1SDimitry Andric /// may affect a bit extract. But a bit extract offset can change 5028*0fca6ea1SDimitry Andric /// independently of the fragment dimensions. 5029*0fca6ea1SDimitry Andric /// 5030*0fca6ea1SDimitry Andric /// Returns the new expression, or nullptr if one couldn't be created. 5031*0fca6ea1SDimitry Andric /// Ideally this is only used to signal that a bit-extract has become 5032*0fca6ea1SDimitry Andric /// zero-sized (and thus the new debug record has no size and can be 5033*0fca6ea1SDimitry Andric /// dropped), however, it fails for other reasons too - see the FIXME below. 5034*0fca6ea1SDimitry Andric /// 5035*0fca6ea1SDimitry Andric /// FIXME: To keep the change that introduces this function NFC it bails 5036*0fca6ea1SDimitry Andric /// in some situations unecessarily, e.g. when fragment and bit extract 5037*0fca6ea1SDimitry Andric /// sizes differ. 5038*0fca6ea1SDimitry Andric static DIExpression *createOrReplaceFragment(const DIExpression *Expr, 5039*0fca6ea1SDimitry Andric DIExpression::FragmentInfo Frag, 5040*0fca6ea1SDimitry Andric int64_t BitExtractOffset) { 5041*0fca6ea1SDimitry Andric SmallVector<uint64_t, 8> Ops; 5042*0fca6ea1SDimitry Andric bool HasFragment = false; 5043*0fca6ea1SDimitry Andric bool HasBitExtract = false; 5044*0fca6ea1SDimitry Andric 5045*0fca6ea1SDimitry Andric for (auto &Op : Expr->expr_ops()) { 5046*0fca6ea1SDimitry Andric if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { 5047*0fca6ea1SDimitry Andric HasFragment = true; 5048*0fca6ea1SDimitry Andric continue; 5049*0fca6ea1SDimitry Andric } 5050*0fca6ea1SDimitry Andric if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext || 5051*0fca6ea1SDimitry Andric Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_sext) { 5052*0fca6ea1SDimitry Andric HasBitExtract = true; 5053*0fca6ea1SDimitry Andric int64_t ExtractOffsetInBits = Op.getArg(0); 5054*0fca6ea1SDimitry Andric int64_t ExtractSizeInBits = Op.getArg(1); 5055*0fca6ea1SDimitry Andric 5056*0fca6ea1SDimitry Andric // DIExpression::createFragmentExpression doesn't know how to handle 5057*0fca6ea1SDimitry Andric // a fragment that is smaller than the extract. Copy the behaviour 5058*0fca6ea1SDimitry Andric // (bail) to avoid non-NFC changes. 5059*0fca6ea1SDimitry Andric // FIXME: Don't do this. 5060*0fca6ea1SDimitry Andric if (Frag.SizeInBits < uint64_t(ExtractSizeInBits)) 5061*0fca6ea1SDimitry Andric return nullptr; 5062*0fca6ea1SDimitry Andric 5063*0fca6ea1SDimitry Andric assert(BitExtractOffset <= 0); 5064*0fca6ea1SDimitry Andric int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset; 5065*0fca6ea1SDimitry Andric 5066*0fca6ea1SDimitry Andric // DIExpression::createFragmentExpression doesn't know what to do 5067*0fca6ea1SDimitry Andric // if the new extract starts "outside" the existing one. Copy the 5068*0fca6ea1SDimitry Andric // behaviour (bail) to avoid non-NFC changes. 5069*0fca6ea1SDimitry Andric // FIXME: Don't do this. 5070*0fca6ea1SDimitry Andric if (AdjustedOffset < 0) 5071*0fca6ea1SDimitry Andric return nullptr; 5072*0fca6ea1SDimitry Andric 5073*0fca6ea1SDimitry Andric Ops.push_back(Op.getOp()); 5074*0fca6ea1SDimitry Andric Ops.push_back(std::max<int64_t>(0, AdjustedOffset)); 5075*0fca6ea1SDimitry Andric Ops.push_back(ExtractSizeInBits); 5076*0fca6ea1SDimitry Andric continue; 5077*0fca6ea1SDimitry Andric } 5078*0fca6ea1SDimitry Andric Op.appendToVector(Ops); 5079*0fca6ea1SDimitry Andric } 5080*0fca6ea1SDimitry Andric 5081*0fca6ea1SDimitry Andric // Unsupported by createFragmentExpression, so don't support it here yet to 5082*0fca6ea1SDimitry Andric // preserve NFC-ness. 5083*0fca6ea1SDimitry Andric if (HasFragment && HasBitExtract) 5084*0fca6ea1SDimitry Andric return nullptr; 5085*0fca6ea1SDimitry Andric 5086*0fca6ea1SDimitry Andric if (!HasBitExtract) { 5087*0fca6ea1SDimitry Andric Ops.push_back(dwarf::DW_OP_LLVM_fragment); 5088*0fca6ea1SDimitry Andric Ops.push_back(Frag.OffsetInBits); 5089*0fca6ea1SDimitry Andric Ops.push_back(Frag.SizeInBits); 5090*0fca6ea1SDimitry Andric } 5091*0fca6ea1SDimitry Andric return DIExpression::get(Expr->getContext(), Ops); 5092*0fca6ea1SDimitry Andric } 5093*0fca6ea1SDimitry Andric 5094*0fca6ea1SDimitry Andric /// Insert a new dbg.declare. 5095*0fca6ea1SDimitry Andric /// \p Orig Original to copy debug loc and variable from. 5096*0fca6ea1SDimitry Andric /// \p NewAddr Location's new base address. 5097*0fca6ea1SDimitry Andric /// \p NewAddrExpr New expression to apply to address. 5098*0fca6ea1SDimitry Andric /// \p BeforeInst Insert position. 5099*0fca6ea1SDimitry Andric /// \p NewFragment New fragment (absolute, non-relative). 5100*0fca6ea1SDimitry Andric /// \p BitExtractAdjustment Offset to apply to any extract_bits op. 5101*0fca6ea1SDimitry Andric static void 5102*0fca6ea1SDimitry Andric insertNewDbgInst(DIBuilder &DIB, DbgDeclareInst *Orig, AllocaInst *NewAddr, 5103*0fca6ea1SDimitry Andric DIExpression *NewAddrExpr, Instruction *BeforeInst, 5104*0fca6ea1SDimitry Andric std::optional<DIExpression::FragmentInfo> NewFragment, 5105*0fca6ea1SDimitry Andric int64_t BitExtractAdjustment) { 5106*0fca6ea1SDimitry Andric if (NewFragment) 5107*0fca6ea1SDimitry Andric NewAddrExpr = createOrReplaceFragment(NewAddrExpr, *NewFragment, 5108*0fca6ea1SDimitry Andric BitExtractAdjustment); 5109*0fca6ea1SDimitry Andric if (!NewAddrExpr) 5110*0fca6ea1SDimitry Andric return; 5111*0fca6ea1SDimitry Andric 5112*0fca6ea1SDimitry Andric DIB.insertDeclare(NewAddr, Orig->getVariable(), NewAddrExpr, 51135f757f3fSDimitry Andric Orig->getDebugLoc(), BeforeInst); 51145f757f3fSDimitry Andric } 5115*0fca6ea1SDimitry Andric 5116*0fca6ea1SDimitry Andric /// Insert a new dbg.assign. 5117*0fca6ea1SDimitry Andric /// \p Orig Original to copy debug loc, variable, value and value expression 5118*0fca6ea1SDimitry Andric /// from. 5119*0fca6ea1SDimitry Andric /// \p NewAddr Location's new base address. 5120*0fca6ea1SDimitry Andric /// \p NewAddrExpr New expression to apply to address. 5121*0fca6ea1SDimitry Andric /// \p BeforeInst Insert position. 5122*0fca6ea1SDimitry Andric /// \p NewFragment New fragment (absolute, non-relative). 5123*0fca6ea1SDimitry Andric /// \p BitExtractAdjustment Offset to apply to any extract_bits op. 5124*0fca6ea1SDimitry Andric static void 5125*0fca6ea1SDimitry Andric insertNewDbgInst(DIBuilder &DIB, DbgAssignIntrinsic *Orig, AllocaInst *NewAddr, 5126*0fca6ea1SDimitry Andric DIExpression *NewAddrExpr, Instruction *BeforeInst, 5127*0fca6ea1SDimitry Andric std::optional<DIExpression::FragmentInfo> NewFragment, 5128*0fca6ea1SDimitry Andric int64_t BitExtractAdjustment) { 5129*0fca6ea1SDimitry Andric // DIBuilder::insertDbgAssign will insert the #dbg_assign after NewAddr. 51305f757f3fSDimitry Andric (void)BeforeInst; 5131*0fca6ea1SDimitry Andric 5132*0fca6ea1SDimitry Andric // A dbg.assign puts fragment info in the value expression only. The address 5133*0fca6ea1SDimitry Andric // expression has already been built: NewAddrExpr. 5134*0fca6ea1SDimitry Andric DIExpression *NewFragmentExpr = Orig->getExpression(); 5135*0fca6ea1SDimitry Andric if (NewFragment) 5136*0fca6ea1SDimitry Andric NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment, 5137*0fca6ea1SDimitry Andric BitExtractAdjustment); 5138*0fca6ea1SDimitry Andric if (!NewFragmentExpr) 5139*0fca6ea1SDimitry Andric return; 5140*0fca6ea1SDimitry Andric 5141*0fca6ea1SDimitry Andric // Apply a DIAssignID to the store if it doesn't already have it. 51425f757f3fSDimitry Andric if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) { 51435f757f3fSDimitry Andric NewAddr->setMetadata(LLVMContext::MD_DIAssignID, 51445f757f3fSDimitry Andric DIAssignID::getDistinct(NewAddr->getContext())); 51455f757f3fSDimitry Andric } 5146*0fca6ea1SDimitry Andric 5147*0fca6ea1SDimitry Andric Instruction *NewAssign = 5148*0fca6ea1SDimitry Andric DIB.insertDbgAssign(NewAddr, Orig->getValue(), Orig->getVariable(), 5149*0fca6ea1SDimitry Andric NewFragmentExpr, NewAddr, NewAddrExpr, 5150*0fca6ea1SDimitry Andric Orig->getDebugLoc()) 5151*0fca6ea1SDimitry Andric .get<Instruction *>(); 51525f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n"); 51535f757f3fSDimitry Andric (void)NewAssign; 51545f757f3fSDimitry Andric } 5155*0fca6ea1SDimitry Andric 5156*0fca6ea1SDimitry Andric /// Insert a new DbgRecord. 5157*0fca6ea1SDimitry Andric /// \p Orig Original to copy record type, debug loc and variable from, and 5158*0fca6ea1SDimitry Andric /// additionally value and value expression for dbg_assign records. 5159*0fca6ea1SDimitry Andric /// \p NewAddr Location's new base address. 5160*0fca6ea1SDimitry Andric /// \p NewAddrExpr New expression to apply to address. 5161*0fca6ea1SDimitry Andric /// \p BeforeInst Insert position. 5162*0fca6ea1SDimitry Andric /// \p NewFragment New fragment (absolute, non-relative). 5163*0fca6ea1SDimitry Andric /// \p BitExtractAdjustment Offset to apply to any extract_bits op. 5164*0fca6ea1SDimitry Andric static void 5165*0fca6ea1SDimitry Andric insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, 5166*0fca6ea1SDimitry Andric DIExpression *NewAddrExpr, Instruction *BeforeInst, 5167*0fca6ea1SDimitry Andric std::optional<DIExpression::FragmentInfo> NewFragment, 5168*0fca6ea1SDimitry Andric int64_t BitExtractAdjustment) { 51695f757f3fSDimitry Andric (void)DIB; 5170*0fca6ea1SDimitry Andric 5171*0fca6ea1SDimitry Andric // A dbg_assign puts fragment info in the value expression only. The address 5172*0fca6ea1SDimitry Andric // expression has already been built: NewAddrExpr. A dbg_declare puts the 5173*0fca6ea1SDimitry Andric // new fragment info into NewAddrExpr (as it only has one expression). 5174*0fca6ea1SDimitry Andric DIExpression *NewFragmentExpr = 5175*0fca6ea1SDimitry Andric Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr; 5176*0fca6ea1SDimitry Andric if (NewFragment) 5177*0fca6ea1SDimitry Andric NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment, 5178*0fca6ea1SDimitry Andric BitExtractAdjustment); 5179*0fca6ea1SDimitry Andric if (!NewFragmentExpr) 5180*0fca6ea1SDimitry Andric return; 5181*0fca6ea1SDimitry Andric 51827a6dacacSDimitry Andric if (Orig->isDbgDeclare()) { 5183*0fca6ea1SDimitry Andric DbgVariableRecord *DVR = DbgVariableRecord::createDVRDeclare( 51847a6dacacSDimitry Andric NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc()); 5185*0fca6ea1SDimitry Andric BeforeInst->getParent()->insertDbgRecordBefore(DVR, 51867a6dacacSDimitry Andric BeforeInst->getIterator()); 51877a6dacacSDimitry Andric return; 51887a6dacacSDimitry Andric } 5189*0fca6ea1SDimitry Andric 5190*0fca6ea1SDimitry Andric // Apply a DIAssignID to the store if it doesn't already have it. 51917a6dacacSDimitry Andric if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) { 51927a6dacacSDimitry Andric NewAddr->setMetadata(LLVMContext::MD_DIAssignID, 51937a6dacacSDimitry Andric DIAssignID::getDistinct(NewAddr->getContext())); 51947a6dacacSDimitry Andric } 5195*0fca6ea1SDimitry Andric 5196*0fca6ea1SDimitry Andric DbgVariableRecord *NewAssign = DbgVariableRecord::createLinkedDVRAssign( 51977a6dacacSDimitry Andric NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr, 5198*0fca6ea1SDimitry Andric NewAddrExpr, Orig->getDebugLoc()); 5199*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n"); 52007a6dacacSDimitry Andric (void)NewAssign; 52015f757f3fSDimitry Andric } 52025f757f3fSDimitry Andric 52030b57cec5SDimitry Andric /// Walks the slices of an alloca and form partitions based on them, 52040b57cec5SDimitry Andric /// rewriting each of their uses. 52055f757f3fSDimitry Andric bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { 52060b57cec5SDimitry Andric if (AS.begin() == AS.end()) 52070b57cec5SDimitry Andric return false; 52080b57cec5SDimitry Andric 52090b57cec5SDimitry Andric unsigned NumPartitions = 0; 52100b57cec5SDimitry Andric bool Changed = false; 52110b57cec5SDimitry Andric const DataLayout &DL = AI.getModule()->getDataLayout(); 52120b57cec5SDimitry Andric 52130b57cec5SDimitry Andric // First try to pre-split loads and stores. 52140b57cec5SDimitry Andric Changed |= presplitLoadsAndStores(AI, AS); 52150b57cec5SDimitry Andric 52160b57cec5SDimitry Andric // Now that we have identified any pre-splitting opportunities, 52170b57cec5SDimitry Andric // mark loads and stores unsplittable except for the following case. 52180b57cec5SDimitry Andric // We leave a slice splittable if all other slices are disjoint or fully 52190b57cec5SDimitry Andric // included in the slice, such as whole-alloca loads and stores. 52200b57cec5SDimitry Andric // If we fail to split these during pre-splitting, we want to force them 52210b57cec5SDimitry Andric // to be rewritten into a partition. 52220b57cec5SDimitry Andric bool IsSorted = true; 52230b57cec5SDimitry Andric 52245ffd83dbSDimitry Andric uint64_t AllocaSize = 5225bdd1243dSDimitry Andric DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue(); 52260b57cec5SDimitry Andric const uint64_t MaxBitVectorSize = 1024; 52270b57cec5SDimitry Andric if (AllocaSize <= MaxBitVectorSize) { 52280b57cec5SDimitry Andric // If a byte boundary is included in any load or store, a slice starting or 52290b57cec5SDimitry Andric // ending at the boundary is not splittable. 52300b57cec5SDimitry Andric SmallBitVector SplittableOffset(AllocaSize + 1, true); 52310b57cec5SDimitry Andric for (Slice &S : AS) 52320b57cec5SDimitry Andric for (unsigned O = S.beginOffset() + 1; 52330b57cec5SDimitry Andric O < S.endOffset() && O < AllocaSize; O++) 52340b57cec5SDimitry Andric SplittableOffset.reset(O); 52350b57cec5SDimitry Andric 52360b57cec5SDimitry Andric for (Slice &S : AS) { 52370b57cec5SDimitry Andric if (!S.isSplittable()) 52380b57cec5SDimitry Andric continue; 52390b57cec5SDimitry Andric 52400b57cec5SDimitry Andric if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) && 52410b57cec5SDimitry Andric (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()])) 52420b57cec5SDimitry Andric continue; 52430b57cec5SDimitry Andric 52440b57cec5SDimitry Andric if (isa<LoadInst>(S.getUse()->getUser()) || 52450b57cec5SDimitry Andric isa<StoreInst>(S.getUse()->getUser())) { 52460b57cec5SDimitry Andric S.makeUnsplittable(); 52470b57cec5SDimitry Andric IsSorted = false; 52480b57cec5SDimitry Andric } 52490b57cec5SDimitry Andric } 5250*0fca6ea1SDimitry Andric } else { 52510b57cec5SDimitry Andric // We only allow whole-alloca splittable loads and stores 52520b57cec5SDimitry Andric // for a large alloca to avoid creating too large BitVector. 52530b57cec5SDimitry Andric for (Slice &S : AS) { 52540b57cec5SDimitry Andric if (!S.isSplittable()) 52550b57cec5SDimitry Andric continue; 52560b57cec5SDimitry Andric 52570b57cec5SDimitry Andric if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize) 52580b57cec5SDimitry Andric continue; 52590b57cec5SDimitry Andric 52600b57cec5SDimitry Andric if (isa<LoadInst>(S.getUse()->getUser()) || 52610b57cec5SDimitry Andric isa<StoreInst>(S.getUse()->getUser())) { 52620b57cec5SDimitry Andric S.makeUnsplittable(); 52630b57cec5SDimitry Andric IsSorted = false; 52640b57cec5SDimitry Andric } 52650b57cec5SDimitry Andric } 52660b57cec5SDimitry Andric } 52670b57cec5SDimitry Andric 52680b57cec5SDimitry Andric if (!IsSorted) 5269*0fca6ea1SDimitry Andric llvm::stable_sort(AS); 52700b57cec5SDimitry Andric 52710b57cec5SDimitry Andric /// Describes the allocas introduced by rewritePartition in order to migrate 52720b57cec5SDimitry Andric /// the debug info. 52730b57cec5SDimitry Andric struct Fragment { 52740b57cec5SDimitry Andric AllocaInst *Alloca; 52750b57cec5SDimitry Andric uint64_t Offset; 52760b57cec5SDimitry Andric uint64_t Size; 52770b57cec5SDimitry Andric Fragment(AllocaInst *AI, uint64_t O, uint64_t S) 52780b57cec5SDimitry Andric : Alloca(AI), Offset(O), Size(S) {} 52790b57cec5SDimitry Andric }; 52800b57cec5SDimitry Andric SmallVector<Fragment, 4> Fragments; 52810b57cec5SDimitry Andric 52820b57cec5SDimitry Andric // Rewrite each partition. 52830b57cec5SDimitry Andric for (auto &P : AS.partitions()) { 52840b57cec5SDimitry Andric if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) { 52850b57cec5SDimitry Andric Changed = true; 52860b57cec5SDimitry Andric if (NewAI != &AI) { 52870b57cec5SDimitry Andric uint64_t SizeOfByte = 8; 52885ffd83dbSDimitry Andric uint64_t AllocaSize = 5289bdd1243dSDimitry Andric DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue(); 52900b57cec5SDimitry Andric // Don't include any padding. 52910b57cec5SDimitry Andric uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte); 5292*0fca6ea1SDimitry Andric Fragments.push_back( 5293*0fca6ea1SDimitry Andric Fragment(NewAI, P.beginOffset() * SizeOfByte, Size)); 52940b57cec5SDimitry Andric } 52950b57cec5SDimitry Andric } 52960b57cec5SDimitry Andric ++NumPartitions; 52970b57cec5SDimitry Andric } 52980b57cec5SDimitry Andric 52990b57cec5SDimitry Andric NumAllocaPartitions += NumPartitions; 53000b57cec5SDimitry Andric MaxPartitionsPerAlloca.updateMax(NumPartitions); 53010b57cec5SDimitry Andric 53020b57cec5SDimitry Andric // Migrate debug information from the old alloca to the new alloca(s) 53030b57cec5SDimitry Andric // and the individual partitions. 53045f757f3fSDimitry Andric auto MigrateOne = [&](auto *DbgVariable) { 5305*0fca6ea1SDimitry Andric // Can't overlap with undef memory. 5306*0fca6ea1SDimitry Andric if (isKillAddress(DbgVariable)) 5307*0fca6ea1SDimitry Andric return; 5308*0fca6ea1SDimitry Andric 5309*0fca6ea1SDimitry Andric const Value *DbgPtr = getAddress(DbgVariable); 5310*0fca6ea1SDimitry Andric DIExpression::FragmentInfo VarFrag = 5311*0fca6ea1SDimitry Andric DbgVariable->getFragmentOrEntireVariable(); 5312*0fca6ea1SDimitry Andric // Get the address expression constant offset if one exists and the ops 5313*0fca6ea1SDimitry Andric // that come after it. 5314*0fca6ea1SDimitry Andric int64_t CurrentExprOffsetInBytes = 0; 5315*0fca6ea1SDimitry Andric SmallVector<uint64_t> PostOffsetOps; 5316*0fca6ea1SDimitry Andric if (!getAddressExpression(DbgVariable) 5317*0fca6ea1SDimitry Andric ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps)) 5318*0fca6ea1SDimitry Andric return; // Couldn't interpret this DIExpression - drop the var. 5319*0fca6ea1SDimitry Andric 5320*0fca6ea1SDimitry Andric // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext. 5321*0fca6ea1SDimitry Andric int64_t ExtractOffsetInBits = 0; 5322*0fca6ea1SDimitry Andric for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) { 5323*0fca6ea1SDimitry Andric if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext || 5324*0fca6ea1SDimitry Andric Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_sext) { 5325*0fca6ea1SDimitry Andric ExtractOffsetInBits = Op.getArg(0); 5326*0fca6ea1SDimitry Andric break; 5327*0fca6ea1SDimitry Andric } 5328*0fca6ea1SDimitry Andric } 5329*0fca6ea1SDimitry Andric 53300b57cec5SDimitry Andric DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false); 53310b57cec5SDimitry Andric for (auto Fragment : Fragments) { 5332*0fca6ea1SDimitry Andric int64_t OffsetFromLocationInBits; 5333*0fca6ea1SDimitry Andric std::optional<DIExpression::FragmentInfo> NewDbgFragment; 5334*0fca6ea1SDimitry Andric // Find the variable fragment that the new alloca slice covers. 5335*0fca6ea1SDimitry Andric // Drop debug info for this variable fragment if we can't compute an 5336*0fca6ea1SDimitry Andric // intersect between it and the alloca slice. 5337*0fca6ea1SDimitry Andric if (!DIExpression::calculateFragmentIntersect( 5338*0fca6ea1SDimitry Andric DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr, 5339*0fca6ea1SDimitry Andric CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag, 5340*0fca6ea1SDimitry Andric NewDbgFragment, OffsetFromLocationInBits)) 5341*0fca6ea1SDimitry Andric continue; // Do not migrate this fragment to this slice. 53420b57cec5SDimitry Andric 5343*0fca6ea1SDimitry Andric // Zero sized fragment indicates there's no intersect between the variable 5344*0fca6ea1SDimitry Andric // fragment and the alloca slice. Skip this slice for this variable 5345*0fca6ea1SDimitry Andric // fragment. 5346*0fca6ea1SDimitry Andric if (NewDbgFragment && !NewDbgFragment->SizeInBits) 5347*0fca6ea1SDimitry Andric continue; // Do not migrate this fragment to this slice. 53480b57cec5SDimitry Andric 5349*0fca6ea1SDimitry Andric // No fragment indicates DbgVariable's variable or fragment exactly 5350*0fca6ea1SDimitry Andric // overlaps the slice; copy its fragment (or nullopt if there isn't one). 5351*0fca6ea1SDimitry Andric if (!NewDbgFragment) 5352*0fca6ea1SDimitry Andric NewDbgFragment = DbgVariable->getFragment(); 5353*0fca6ea1SDimitry Andric 5354*0fca6ea1SDimitry Andric // Reduce the new expression offset by the bit-extract offset since 5355*0fca6ea1SDimitry Andric // we'll be keeping that. 5356*0fca6ea1SDimitry Andric int64_t OffestFromNewAllocaInBits = 5357*0fca6ea1SDimitry Andric OffsetFromLocationInBits - ExtractOffsetInBits; 5358*0fca6ea1SDimitry Andric // We need to adjust an existing bit extract if the offset expression 5359*0fca6ea1SDimitry Andric // can't eat the slack (i.e., if the new offset would be negative). 5360*0fca6ea1SDimitry Andric int64_t BitExtractOffset = 5361*0fca6ea1SDimitry Andric std::min<int64_t>(0, OffestFromNewAllocaInBits); 5362*0fca6ea1SDimitry Andric // The magnitude of a negative value indicates the number of bits into 5363*0fca6ea1SDimitry Andric // the existing variable fragment that the memory region begins. The new 5364*0fca6ea1SDimitry Andric // variable fragment already excludes those bits - the new DbgPtr offset 5365*0fca6ea1SDimitry Andric // only needs to be applied if it's positive. 5366*0fca6ea1SDimitry Andric OffestFromNewAllocaInBits = 5367*0fca6ea1SDimitry Andric std::max(int64_t(0), OffestFromNewAllocaInBits); 5368*0fca6ea1SDimitry Andric 5369*0fca6ea1SDimitry Andric // Rebuild the expression: 5370*0fca6ea1SDimitry Andric // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment} 5371*0fca6ea1SDimitry Andric // Add NewDbgFragment later, because dbg.assigns don't want it in the 5372*0fca6ea1SDimitry Andric // address expression but the value expression instead. 5373*0fca6ea1SDimitry Andric DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps); 5374*0fca6ea1SDimitry Andric if (OffestFromNewAllocaInBits > 0) { 5375*0fca6ea1SDimitry Andric int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8; 5376*0fca6ea1SDimitry Andric NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes); 53770b57cec5SDimitry Andric } 53780b57cec5SDimitry Andric 5379e8d8bef9SDimitry Andric // Remove any existing intrinsics on the new alloca describing 5380e8d8bef9SDimitry Andric // the variable fragment. 53815f757f3fSDimitry Andric auto RemoveOne = [DbgVariable](auto *OldDII) { 53825f757f3fSDimitry Andric auto SameVariableFragment = [](const auto *LHS, const auto *RHS) { 5383e8d8bef9SDimitry Andric return LHS->getVariable() == RHS->getVariable() && 5384e8d8bef9SDimitry Andric LHS->getDebugLoc()->getInlinedAt() == 5385e8d8bef9SDimitry Andric RHS->getDebugLoc()->getInlinedAt(); 5386e8d8bef9SDimitry Andric }; 538706c3fb27SDimitry Andric if (SameVariableFragment(OldDII, DbgVariable)) 53880b57cec5SDimitry Andric OldDII->eraseFromParent(); 53895f757f3fSDimitry Andric }; 53907a6dacacSDimitry Andric for_each(findDbgDeclares(Fragment.Alloca), RemoveOne); 5391*0fca6ea1SDimitry Andric for_each(findDVRDeclares(Fragment.Alloca), RemoveOne); 53920b57cec5SDimitry Andric 5393*0fca6ea1SDimitry Andric insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI, 5394*0fca6ea1SDimitry Andric NewDbgFragment, BitExtractOffset); 5395bdd1243dSDimitry Andric } 53965f757f3fSDimitry Andric }; 53975f757f3fSDimitry Andric 53985f757f3fSDimitry Andric // Migrate debug information from the old alloca to the new alloca(s) 53995f757f3fSDimitry Andric // and the individual partitions. 54007a6dacacSDimitry Andric for_each(findDbgDeclares(&AI), MigrateOne); 5401*0fca6ea1SDimitry Andric for_each(findDVRDeclares(&AI), MigrateOne); 54025f757f3fSDimitry Andric for_each(at::getAssignmentMarkers(&AI), MigrateOne); 5403*0fca6ea1SDimitry Andric for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne); 54045f757f3fSDimitry Andric 54050b57cec5SDimitry Andric return Changed; 54060b57cec5SDimitry Andric } 54070b57cec5SDimitry Andric 540804eeddc0SDimitry Andric /// Clobber a use with poison, deleting the used value if it becomes dead. 54095f757f3fSDimitry Andric void SROA::clobberUse(Use &U) { 54100b57cec5SDimitry Andric Value *OldV = U; 541104eeddc0SDimitry Andric // Replace the use with an poison value. 541204eeddc0SDimitry Andric U = PoisonValue::get(OldV->getType()); 54130b57cec5SDimitry Andric 54140b57cec5SDimitry Andric // Check for this making an instruction dead. We have to garbage collect 54150b57cec5SDimitry Andric // all the dead instructions to ensure the uses of any alloca end up being 54160b57cec5SDimitry Andric // minimal. 54170b57cec5SDimitry Andric if (Instruction *OldI = dyn_cast<Instruction>(OldV)) 54180b57cec5SDimitry Andric if (isInstructionTriviallyDead(OldI)) { 5419e8d8bef9SDimitry Andric DeadInsts.push_back(OldI); 54200b57cec5SDimitry Andric } 54210b57cec5SDimitry Andric } 54220b57cec5SDimitry Andric 54230b57cec5SDimitry Andric /// Analyze an alloca for SROA. 54240b57cec5SDimitry Andric /// 54250b57cec5SDimitry Andric /// This analyzes the alloca to ensure we can reason about it, builds 54260b57cec5SDimitry Andric /// the slices of the alloca, and then hands it off to be split and 54270b57cec5SDimitry Andric /// rewritten as needed. 5428bdd1243dSDimitry Andric std::pair<bool /*Changed*/, bool /*CFGChanged*/> 54295f757f3fSDimitry Andric SROA::runOnAlloca(AllocaInst &AI) { 5430bdd1243dSDimitry Andric bool Changed = false; 5431bdd1243dSDimitry Andric bool CFGChanged = false; 5432bdd1243dSDimitry Andric 54330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); 54340b57cec5SDimitry Andric ++NumAllocasAnalyzed; 54350b57cec5SDimitry Andric 54360b57cec5SDimitry Andric // Special case dead allocas, as they're trivial. 54370b57cec5SDimitry Andric if (AI.use_empty()) { 54380b57cec5SDimitry Andric AI.eraseFromParent(); 5439bdd1243dSDimitry Andric Changed = true; 5440bdd1243dSDimitry Andric return {Changed, CFGChanged}; 54410b57cec5SDimitry Andric } 5442*0fca6ea1SDimitry Andric const DataLayout &DL = AI.getDataLayout(); 54430b57cec5SDimitry Andric 54440b57cec5SDimitry Andric // Skip alloca forms that this analysis can't handle. 54455ffd83dbSDimitry Andric auto *AT = AI.getAllocatedType(); 544606c3fb27SDimitry Andric TypeSize Size = DL.getTypeAllocSize(AT); 544706c3fb27SDimitry Andric if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() || 544806c3fb27SDimitry Andric Size.getFixedValue() == 0) 5449bdd1243dSDimitry Andric return {Changed, CFGChanged}; 54500b57cec5SDimitry Andric 54510b57cec5SDimitry Andric // First, split any FCA loads and stores touching this alloca to promote 54520b57cec5SDimitry Andric // better splitting and promotion opportunities. 545304eeddc0SDimitry Andric IRBuilderTy IRB(&AI); 545404eeddc0SDimitry Andric AggLoadStoreRewriter AggRewriter(DL, IRB); 54550b57cec5SDimitry Andric Changed |= AggRewriter.rewrite(AI); 54560b57cec5SDimitry Andric 54570b57cec5SDimitry Andric // Build the slices using a recursive instruction-visiting builder. 54580b57cec5SDimitry Andric AllocaSlices AS(DL, AI); 54590b57cec5SDimitry Andric LLVM_DEBUG(AS.print(dbgs())); 54600b57cec5SDimitry Andric if (AS.isEscaped()) 5461bdd1243dSDimitry Andric return {Changed, CFGChanged}; 54620b57cec5SDimitry Andric 54630b57cec5SDimitry Andric // Delete all the dead users of this alloca before splitting and rewriting it. 54640b57cec5SDimitry Andric for (Instruction *DeadUser : AS.getDeadUsers()) { 54650b57cec5SDimitry Andric // Free up everything used by this instruction. 54660b57cec5SDimitry Andric for (Use &DeadOp : DeadUser->operands()) 54670b57cec5SDimitry Andric clobberUse(DeadOp); 54680b57cec5SDimitry Andric 54690b57cec5SDimitry Andric // Now replace the uses of this instruction. 547004eeddc0SDimitry Andric DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType())); 54710b57cec5SDimitry Andric 54720b57cec5SDimitry Andric // And mark it for deletion. 5473e8d8bef9SDimitry Andric DeadInsts.push_back(DeadUser); 54740b57cec5SDimitry Andric Changed = true; 54750b57cec5SDimitry Andric } 54760b57cec5SDimitry Andric for (Use *DeadOp : AS.getDeadOperands()) { 54770b57cec5SDimitry Andric clobberUse(*DeadOp); 54780b57cec5SDimitry Andric Changed = true; 54790b57cec5SDimitry Andric } 54800b57cec5SDimitry Andric 54810b57cec5SDimitry Andric // No slices to split. Leave the dead alloca for a later pass to clean up. 54820b57cec5SDimitry Andric if (AS.begin() == AS.end()) 5483bdd1243dSDimitry Andric return {Changed, CFGChanged}; 54840b57cec5SDimitry Andric 54850b57cec5SDimitry Andric Changed |= splitAlloca(AI, AS); 54860b57cec5SDimitry Andric 54870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Speculating PHIs\n"); 54880b57cec5SDimitry Andric while (!SpeculatablePHIs.empty()) 548904eeddc0SDimitry Andric speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val()); 54900b57cec5SDimitry Andric 5491bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " Rewriting Selects\n"); 5492bdd1243dSDimitry Andric auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector(); 5493bdd1243dSDimitry Andric while (!RemainingSelectsToRewrite.empty()) { 5494bdd1243dSDimitry Andric const auto [K, V] = RemainingSelectsToRewrite.pop_back_val(); 5495bdd1243dSDimitry Andric CFGChanged |= 5496bdd1243dSDimitry Andric rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU); 5497bdd1243dSDimitry Andric } 54980b57cec5SDimitry Andric 5499bdd1243dSDimitry Andric return {Changed, CFGChanged}; 55000b57cec5SDimitry Andric } 55010b57cec5SDimitry Andric 55020b57cec5SDimitry Andric /// Delete the dead instructions accumulated in this run. 55030b57cec5SDimitry Andric /// 55040b57cec5SDimitry Andric /// Recursively deletes the dead instructions we've accumulated. This is done 55050b57cec5SDimitry Andric /// at the very end to maximize locality of the recursive delete and to 55060b57cec5SDimitry Andric /// minimize the problems of invalidated instruction pointers as such pointers 55070b57cec5SDimitry Andric /// are used heavily in the intermediate stages of the algorithm. 55080b57cec5SDimitry Andric /// 55090b57cec5SDimitry Andric /// We also record the alloca instructions deleted here so that they aren't 55100b57cec5SDimitry Andric /// subsequently handed to mem2reg to promote. 55115f757f3fSDimitry Andric bool SROA::deleteDeadInstructions( 55120b57cec5SDimitry Andric SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) { 55130b57cec5SDimitry Andric bool Changed = false; 55140b57cec5SDimitry Andric while (!DeadInsts.empty()) { 5515e8d8bef9SDimitry Andric Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); 5516bdd1243dSDimitry Andric if (!I) 5517bdd1243dSDimitry Andric continue; 55180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); 55190b57cec5SDimitry Andric 55200b57cec5SDimitry Andric // If the instruction is an alloca, find the possible dbg.declare connected 55210b57cec5SDimitry Andric // to it, and remove it too. We must do this before calling RAUW or we will 55220b57cec5SDimitry Andric // not be able to find it. 55230b57cec5SDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { 55240b57cec5SDimitry Andric DeletedAllocas.insert(AI); 55257a6dacacSDimitry Andric for (DbgDeclareInst *OldDII : findDbgDeclares(AI)) 55265f757f3fSDimitry Andric OldDII->eraseFromParent(); 5527*0fca6ea1SDimitry Andric for (DbgVariableRecord *OldDII : findDVRDeclares(AI)) 55280b57cec5SDimitry Andric OldDII->eraseFromParent(); 55290b57cec5SDimitry Andric } 55300b57cec5SDimitry Andric 5531bdd1243dSDimitry Andric at::deleteAssignmentMarkers(I); 55320b57cec5SDimitry Andric I->replaceAllUsesWith(UndefValue::get(I->getType())); 55330b57cec5SDimitry Andric 55340b57cec5SDimitry Andric for (Use &Operand : I->operands()) 55350b57cec5SDimitry Andric if (Instruction *U = dyn_cast<Instruction>(Operand)) { 55360b57cec5SDimitry Andric // Zero out the operand and see if it becomes trivially dead. 55370b57cec5SDimitry Andric Operand = nullptr; 55380b57cec5SDimitry Andric if (isInstructionTriviallyDead(U)) 5539e8d8bef9SDimitry Andric DeadInsts.push_back(U); 55400b57cec5SDimitry Andric } 55410b57cec5SDimitry Andric 55420b57cec5SDimitry Andric ++NumDeleted; 55430b57cec5SDimitry Andric I->eraseFromParent(); 55440b57cec5SDimitry Andric Changed = true; 55450b57cec5SDimitry Andric } 55460b57cec5SDimitry Andric return Changed; 55470b57cec5SDimitry Andric } 55480b57cec5SDimitry Andric 55490b57cec5SDimitry Andric /// Promote the allocas, using the best available technique. 55500b57cec5SDimitry Andric /// 55510b57cec5SDimitry Andric /// This attempts to promote whatever allocas have been identified as viable in 55520b57cec5SDimitry Andric /// the PromotableAllocas list. If that list is empty, there is nothing to do. 55530b57cec5SDimitry Andric /// This function returns whether any promotion occurred. 55545f757f3fSDimitry Andric bool SROA::promoteAllocas(Function &F) { 55550b57cec5SDimitry Andric if (PromotableAllocas.empty()) 55560b57cec5SDimitry Andric return false; 55570b57cec5SDimitry Andric 55580b57cec5SDimitry Andric NumPromoted += PromotableAllocas.size(); 55590b57cec5SDimitry Andric 556006c3fb27SDimitry Andric if (SROASkipMem2Reg) { 556106c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n"); 556206c3fb27SDimitry Andric } else { 55630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); 5564bdd1243dSDimitry Andric PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC); 556506c3fb27SDimitry Andric } 556606c3fb27SDimitry Andric 55670b57cec5SDimitry Andric PromotableAllocas.clear(); 55680b57cec5SDimitry Andric return true; 55690b57cec5SDimitry Andric } 55700b57cec5SDimitry Andric 55715f757f3fSDimitry Andric std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) { 55720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); 55730b57cec5SDimitry Andric 5574*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout(); 55750b57cec5SDimitry Andric BasicBlock &EntryBB = F.getEntryBlock(); 55760b57cec5SDimitry Andric for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); 55770b57cec5SDimitry Andric I != E; ++I) { 55785ffd83dbSDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { 557906c3fb27SDimitry Andric if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() && 558006c3fb27SDimitry Andric isAllocaPromotable(AI)) 55815ffd83dbSDimitry Andric PromotableAllocas.push_back(AI); 558206c3fb27SDimitry Andric else 55830b57cec5SDimitry Andric Worklist.insert(AI); 55840b57cec5SDimitry Andric } 55855ffd83dbSDimitry Andric } 55860b57cec5SDimitry Andric 55870b57cec5SDimitry Andric bool Changed = false; 5588bdd1243dSDimitry Andric bool CFGChanged = false; 55890b57cec5SDimitry Andric // A set of deleted alloca instruction pointers which should be removed from 55900b57cec5SDimitry Andric // the list of promotable allocas. 55910b57cec5SDimitry Andric SmallPtrSet<AllocaInst *, 4> DeletedAllocas; 55920b57cec5SDimitry Andric 55930b57cec5SDimitry Andric do { 55940b57cec5SDimitry Andric while (!Worklist.empty()) { 5595bdd1243dSDimitry Andric auto [IterationChanged, IterationCFGChanged] = 5596bdd1243dSDimitry Andric runOnAlloca(*Worklist.pop_back_val()); 5597bdd1243dSDimitry Andric Changed |= IterationChanged; 5598bdd1243dSDimitry Andric CFGChanged |= IterationCFGChanged; 5599bdd1243dSDimitry Andric 56000b57cec5SDimitry Andric Changed |= deleteDeadInstructions(DeletedAllocas); 56010b57cec5SDimitry Andric 56020b57cec5SDimitry Andric // Remove the deleted allocas from various lists so that we don't try to 56030b57cec5SDimitry Andric // continue processing them. 56040b57cec5SDimitry Andric if (!DeletedAllocas.empty()) { 56050b57cec5SDimitry Andric auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); }; 56060b57cec5SDimitry Andric Worklist.remove_if(IsInSet); 56070b57cec5SDimitry Andric PostPromotionWorklist.remove_if(IsInSet); 5608e8d8bef9SDimitry Andric llvm::erase_if(PromotableAllocas, IsInSet); 56090b57cec5SDimitry Andric DeletedAllocas.clear(); 56100b57cec5SDimitry Andric } 56110b57cec5SDimitry Andric } 56120b57cec5SDimitry Andric 56130b57cec5SDimitry Andric Changed |= promoteAllocas(F); 56140b57cec5SDimitry Andric 56150b57cec5SDimitry Andric Worklist = PostPromotionWorklist; 56160b57cec5SDimitry Andric PostPromotionWorklist.clear(); 56170b57cec5SDimitry Andric } while (!Worklist.empty()); 56180b57cec5SDimitry Andric 5619bdd1243dSDimitry Andric assert((!CFGChanged || Changed) && "Can not only modify the CFG."); 5620bdd1243dSDimitry Andric assert((!CFGChanged || !PreserveCFG) && 5621bdd1243dSDimitry Andric "Should not have modified the CFG when told to preserve it."); 5622bdd1243dSDimitry Andric 56235f757f3fSDimitry Andric if (Changed && isAssignmentTrackingEnabled(*F.getParent())) { 56247a6dacacSDimitry Andric for (auto &BB : F) { 562506c3fb27SDimitry Andric RemoveRedundantDbgInstrs(&BB); 562606c3fb27SDimitry Andric } 56277a6dacacSDimitry Andric } 562806c3fb27SDimitry Andric 56295f757f3fSDimitry Andric return {Changed, CFGChanged}; 56305f757f3fSDimitry Andric } 56315f757f3fSDimitry Andric 56325f757f3fSDimitry Andric PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) { 56335f757f3fSDimitry Andric DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 56345f757f3fSDimitry Andric AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 56355f757f3fSDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 56365f757f3fSDimitry Andric auto [Changed, CFGChanged] = 56375f757f3fSDimitry Andric SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F); 56385f757f3fSDimitry Andric if (!Changed) 56395f757f3fSDimitry Andric return PreservedAnalyses::all(); 56400b57cec5SDimitry Andric PreservedAnalyses PA; 5641bdd1243dSDimitry Andric if (!CFGChanged) 56420b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>(); 5643bdd1243dSDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 56440b57cec5SDimitry Andric return PA; 56450b57cec5SDimitry Andric } 56460b57cec5SDimitry Andric 5647bdd1243dSDimitry Andric void SROAPass::printPipeline( 5648bdd1243dSDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 5649bdd1243dSDimitry Andric static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline( 5650bdd1243dSDimitry Andric OS, MapClassName2PassName); 56515f757f3fSDimitry Andric OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>" 56525f757f3fSDimitry Andric : "<modify-cfg>"); 5653bdd1243dSDimitry Andric } 5654bdd1243dSDimitry Andric 56555f757f3fSDimitry Andric SROAPass::SROAPass(SROAOptions PreserveCFG) : PreserveCFG(PreserveCFG) {} 56565f757f3fSDimitry Andric 56575f757f3fSDimitry Andric namespace { 5658bdd1243dSDimitry Andric 56590b57cec5SDimitry Andric /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. 56605f757f3fSDimitry Andric class SROALegacyPass : public FunctionPass { 56615f757f3fSDimitry Andric SROAOptions PreserveCFG; 56620b57cec5SDimitry Andric 56630b57cec5SDimitry Andric public: 56640b57cec5SDimitry Andric static char ID; 56650b57cec5SDimitry Andric 5666bdd1243dSDimitry Andric SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG) 56675f757f3fSDimitry Andric : FunctionPass(ID), PreserveCFG(PreserveCFG) { 56680b57cec5SDimitry Andric initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); 56690b57cec5SDimitry Andric } 56700b57cec5SDimitry Andric 56710b57cec5SDimitry Andric bool runOnFunction(Function &F) override { 56720b57cec5SDimitry Andric if (skipFunction(F)) 56730b57cec5SDimitry Andric return false; 56740b57cec5SDimitry Andric 56755f757f3fSDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 56765f757f3fSDimitry Andric AssumptionCache &AC = 56775f757f3fSDimitry Andric getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 56785f757f3fSDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 56795f757f3fSDimitry Andric auto [Changed, _] = 56805f757f3fSDimitry Andric SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F); 56815f757f3fSDimitry Andric return Changed; 56820b57cec5SDimitry Andric } 56830b57cec5SDimitry Andric 56840b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 56850b57cec5SDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 56860b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 56870b57cec5SDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>(); 5688bdd1243dSDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 56890b57cec5SDimitry Andric } 56900b57cec5SDimitry Andric 56910b57cec5SDimitry Andric StringRef getPassName() const override { return "SROA"; } 56920b57cec5SDimitry Andric }; 56930b57cec5SDimitry Andric 56945f757f3fSDimitry Andric } // end anonymous namespace 56955f757f3fSDimitry Andric 56960b57cec5SDimitry Andric char SROALegacyPass::ID = 0; 56970b57cec5SDimitry Andric 5698bdd1243dSDimitry Andric FunctionPass *llvm::createSROAPass(bool PreserveCFG) { 5699bdd1243dSDimitry Andric return new SROALegacyPass(PreserveCFG ? SROAOptions::PreserveCFG 5700bdd1243dSDimitry Andric : SROAOptions::ModifyCFG); 5701bdd1243dSDimitry Andric } 57020b57cec5SDimitry Andric 57030b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", 57040b57cec5SDimitry Andric "Scalar Replacement Of Aggregates", false, false) 57050b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 57060b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 57070b57cec5SDimitry Andric INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", 57080b57cec5SDimitry Andric false, false) 5709