10b57cec5SDimitry Andric //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Loop unrolling may create many similar GEPs for array accesses. 100b57cec5SDimitry Andric // e.g., a 2-level loop 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // float a[32][32]; // global variable 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric // for (int i = 0; i < 2; ++i) { 150b57cec5SDimitry Andric // for (int j = 0; j < 2; ++j) { 160b57cec5SDimitry Andric // ... 170b57cec5SDimitry Andric // ... = a[x + i][y + j]; 180b57cec5SDimitry Andric // ... 190b57cec5SDimitry Andric // } 200b57cec5SDimitry Andric // } 210b57cec5SDimitry Andric // 220b57cec5SDimitry Andric // will probably be unrolled to: 230b57cec5SDimitry Andric // 240b57cec5SDimitry Andric // gep %a, 0, %x, %y; load 250b57cec5SDimitry Andric // gep %a, 0, %x, %y + 1; load 260b57cec5SDimitry Andric // gep %a, 0, %x + 1, %y; load 270b57cec5SDimitry Andric // gep %a, 0, %x + 1, %y + 1; load 280b57cec5SDimitry Andric // 290b57cec5SDimitry Andric // LLVM's GVN does not use partial redundancy elimination yet, and is thus 300b57cec5SDimitry Andric // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs 310b57cec5SDimitry Andric // significant slowdown in targets with limited addressing modes. For instance, 320b57cec5SDimitry Andric // because the PTX target does not support the reg+reg addressing mode, the 330b57cec5SDimitry Andric // NVPTX backend emits PTX code that literally computes the pointer address of 340b57cec5SDimitry Andric // each GEP, wasting tons of registers. It emits the following PTX for the 350b57cec5SDimitry Andric // first load and similar PTX for other loads. 360b57cec5SDimitry Andric // 370b57cec5SDimitry Andric // mov.u32 %r1, %x; 380b57cec5SDimitry Andric // mov.u32 %r2, %y; 390b57cec5SDimitry Andric // mul.wide.u32 %rl2, %r1, 128; 400b57cec5SDimitry Andric // mov.u64 %rl3, a; 410b57cec5SDimitry Andric // add.s64 %rl4, %rl3, %rl2; 420b57cec5SDimitry Andric // mul.wide.u32 %rl5, %r2, 4; 430b57cec5SDimitry Andric // add.s64 %rl6, %rl4, %rl5; 440b57cec5SDimitry Andric // ld.global.f32 %f1, [%rl6]; 450b57cec5SDimitry Andric // 460b57cec5SDimitry Andric // To reduce the register pressure, the optimization implemented in this file 470b57cec5SDimitry Andric // merges the common part of a group of GEPs, so we can compute each pointer 480b57cec5SDimitry Andric // address by adding a simple offset to the common part, saving many registers. 490b57cec5SDimitry Andric // 500b57cec5SDimitry Andric // It works by splitting each GEP into a variadic base and a constant offset. 510b57cec5SDimitry Andric // The variadic base can be computed once and reused by multiple GEPs, and the 520b57cec5SDimitry Andric // constant offsets can be nicely folded into the reg+immediate addressing mode 530b57cec5SDimitry Andric // (supported by most targets) without using any extra register. 540b57cec5SDimitry Andric // 550b57cec5SDimitry Andric // For instance, we transform the four GEPs and four loads in the above example 560b57cec5SDimitry Andric // into: 570b57cec5SDimitry Andric // 580b57cec5SDimitry Andric // base = gep a, 0, x, y 590b57cec5SDimitry Andric // load base 60*0fca6ea1SDimitry Andric // load base + 1 * sizeof(float) 610b57cec5SDimitry Andric // load base + 32 * sizeof(float) 620b57cec5SDimitry Andric // load base + 33 * sizeof(float) 630b57cec5SDimitry Andric // 640b57cec5SDimitry Andric // Given the transformed IR, a backend that supports the reg+immediate 650b57cec5SDimitry Andric // addressing mode can easily fold the pointer arithmetics into the loads. For 660b57cec5SDimitry Andric // example, the NVPTX backend can easily fold the pointer arithmetics into the 670b57cec5SDimitry Andric // ld.global.f32 instructions, and the resultant PTX uses much fewer registers. 680b57cec5SDimitry Andric // 690b57cec5SDimitry Andric // mov.u32 %r1, %tid.x; 700b57cec5SDimitry Andric // mov.u32 %r2, %tid.y; 710b57cec5SDimitry Andric // mul.wide.u32 %rl2, %r1, 128; 720b57cec5SDimitry Andric // mov.u64 %rl3, a; 730b57cec5SDimitry Andric // add.s64 %rl4, %rl3, %rl2; 740b57cec5SDimitry Andric // mul.wide.u32 %rl5, %r2, 4; 750b57cec5SDimitry Andric // add.s64 %rl6, %rl4, %rl5; 760b57cec5SDimitry Andric // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX 770b57cec5SDimitry Andric // ld.global.f32 %f2, [%rl6+4]; // much better 780b57cec5SDimitry Andric // ld.global.f32 %f3, [%rl6+128]; // much better 790b57cec5SDimitry Andric // ld.global.f32 %f4, [%rl6+132]; // much better 800b57cec5SDimitry Andric // 810b57cec5SDimitry Andric // Another improvement enabled by the LowerGEP flag is to lower a GEP with 820b57cec5SDimitry Andric // multiple indices to either multiple GEPs with a single index or arithmetic 830b57cec5SDimitry Andric // operations (depending on whether the target uses alias analysis in codegen). 840b57cec5SDimitry Andric // Such transformation can have following benefits: 850b57cec5SDimitry Andric // (1) It can always extract constants in the indices of structure type. 860b57cec5SDimitry Andric // (2) After such Lowering, there are more optimization opportunities such as 870b57cec5SDimitry Andric // CSE, LICM and CGP. 880b57cec5SDimitry Andric // 890b57cec5SDimitry Andric // E.g. The following GEPs have multiple indices: 900b57cec5SDimitry Andric // BB1: 910b57cec5SDimitry Andric // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3 920b57cec5SDimitry Andric // load %p 930b57cec5SDimitry Andric // ... 940b57cec5SDimitry Andric // BB2: 950b57cec5SDimitry Andric // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2 960b57cec5SDimitry Andric // load %p2 970b57cec5SDimitry Andric // ... 980b57cec5SDimitry Andric // 990b57cec5SDimitry Andric // We can not do CSE to the common part related to index "i64 %i". Lowering 1000b57cec5SDimitry Andric // GEPs can achieve such goals. 1010b57cec5SDimitry Andric // If the target does not use alias analysis in codegen, this pass will 1020b57cec5SDimitry Andric // lower a GEP with multiple indices into arithmetic operations: 1030b57cec5SDimitry Andric // BB1: 1040b57cec5SDimitry Andric // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity 1050b57cec5SDimitry Andric // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity 1060b57cec5SDimitry Andric // %3 = add i64 %1, %2 ; CSE opportunity 1070b57cec5SDimitry Andric // %4 = mul i64 %j1, length_of_struct 1080b57cec5SDimitry Andric // %5 = add i64 %3, %4 1090b57cec5SDimitry Andric // %6 = add i64 %3, struct_field_3 ; Constant offset 1100b57cec5SDimitry Andric // %p = inttoptr i64 %6 to i32* 1110b57cec5SDimitry Andric // load %p 1120b57cec5SDimitry Andric // ... 1130b57cec5SDimitry Andric // BB2: 1140b57cec5SDimitry Andric // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity 1150b57cec5SDimitry Andric // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity 1160b57cec5SDimitry Andric // %9 = add i64 %7, %8 ; CSE opportunity 1170b57cec5SDimitry Andric // %10 = mul i64 %j2, length_of_struct 1180b57cec5SDimitry Andric // %11 = add i64 %9, %10 1190b57cec5SDimitry Andric // %12 = add i64 %11, struct_field_2 ; Constant offset 1200b57cec5SDimitry Andric // %p = inttoptr i64 %12 to i32* 1210b57cec5SDimitry Andric // load %p2 1220b57cec5SDimitry Andric // ... 1230b57cec5SDimitry Andric // 1240b57cec5SDimitry Andric // If the target uses alias analysis in codegen, this pass will lower a GEP 1250b57cec5SDimitry Andric // with multiple indices into multiple GEPs with a single index: 1260b57cec5SDimitry Andric // BB1: 1270b57cec5SDimitry Andric // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity 1280b57cec5SDimitry Andric // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity 1290b57cec5SDimitry Andric // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity 1300b57cec5SDimitry Andric // %4 = mul i64 %j1, length_of_struct 1310b57cec5SDimitry Andric // %5 = getelementptr i8* %3, i64 %4 1320b57cec5SDimitry Andric // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset 1330b57cec5SDimitry Andric // %p = bitcast i8* %6 to i32* 1340b57cec5SDimitry Andric // load %p 1350b57cec5SDimitry Andric // ... 1360b57cec5SDimitry Andric // BB2: 1370b57cec5SDimitry Andric // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity 1380b57cec5SDimitry Andric // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity 1390b57cec5SDimitry Andric // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity 1400b57cec5SDimitry Andric // %10 = mul i64 %j2, length_of_struct 1410b57cec5SDimitry Andric // %11 = getelementptr i8* %9, i64 %10 1420b57cec5SDimitry Andric // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset 1430b57cec5SDimitry Andric // %p2 = bitcast i8* %12 to i32* 1440b57cec5SDimitry Andric // load %p2 1450b57cec5SDimitry Andric // ... 1460b57cec5SDimitry Andric // 1470b57cec5SDimitry Andric // Lowering GEPs can also benefit other passes such as LICM and CGP. 1480b57cec5SDimitry Andric // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple 1490b57cec5SDimitry Andric // indices if one of the index is variant. If we lower such GEP into invariant 1500b57cec5SDimitry Andric // parts and variant parts, LICM can hoist/sink those invariant parts. 1510b57cec5SDimitry Andric // CGP (CodeGen Prepare) tries to sink address calculations that match the 1520b57cec5SDimitry Andric // target's addressing modes. A GEP with multiple indices may not match and will 1530b57cec5SDimitry Andric // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of 1540b57cec5SDimitry Andric // them. So we end up with a better addressing mode. 1550b57cec5SDimitry Andric // 1560b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1570b57cec5SDimitry Andric 158e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" 1590b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 1600b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 1610b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 1620b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 1630b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 1640b57cec5SDimitry Andric #include "llvm/Analysis/MemoryBuiltins.h" 1650b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 1660b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 1670b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 1680b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 1690b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 1700b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 1710b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 1720b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 1730b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 1740b57cec5SDimitry Andric #include "llvm/IR/Function.h" 1750b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h" 1760b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 177*0fca6ea1SDimitry Andric #include "llvm/IR/InstrTypes.h" 1780b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 1790b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 1800b57cec5SDimitry Andric #include "llvm/IR/Module.h" 181e8d8bef9SDimitry Andric #include "llvm/IR/PassManager.h" 1820b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 1830b57cec5SDimitry Andric #include "llvm/IR/Type.h" 1840b57cec5SDimitry Andric #include "llvm/IR/User.h" 1850b57cec5SDimitry Andric #include "llvm/IR/Value.h" 186480093f4SDimitry Andric #include "llvm/InitializePasses.h" 1870b57cec5SDimitry Andric #include "llvm/Pass.h" 1880b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 1890b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 1900b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 1910b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 1920b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 193480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 1940b57cec5SDimitry Andric #include <cassert> 1950b57cec5SDimitry Andric #include <cstdint> 1960b57cec5SDimitry Andric #include <string> 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric using namespace llvm; 1990b57cec5SDimitry Andric using namespace llvm::PatternMatch; 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric static cl::opt<bool> DisableSeparateConstOffsetFromGEP( 2020b57cec5SDimitry Andric "disable-separate-const-offset-from-gep", cl::init(false), 2030b57cec5SDimitry Andric cl::desc("Do not separate the constant offset from a GEP instruction"), 2040b57cec5SDimitry Andric cl::Hidden); 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric // Setting this flag may emit false positives when the input module already 2070b57cec5SDimitry Andric // contains dead instructions. Therefore, we set it only in unit tests that are 2080b57cec5SDimitry Andric // free of dead code. 2090b57cec5SDimitry Andric static cl::opt<bool> 2100b57cec5SDimitry Andric VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), 2110b57cec5SDimitry Andric cl::desc("Verify this pass produces no dead code"), 2120b57cec5SDimitry Andric cl::Hidden); 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric namespace { 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric /// A helper class for separating a constant offset from a GEP index. 2170b57cec5SDimitry Andric /// 2180b57cec5SDimitry Andric /// In real programs, a GEP index may be more complicated than a simple addition 2190b57cec5SDimitry Andric /// of something and a constant integer which can be trivially splitted. For 2200b57cec5SDimitry Andric /// example, to split ((a << 3) | 5) + b, we need to search deeper for the 2210b57cec5SDimitry Andric /// constant offset, so that we can separate the index to (a << 3) + b and 5. 2220b57cec5SDimitry Andric /// 2230b57cec5SDimitry Andric /// Therefore, this class looks into the expression that computes a given GEP 2240b57cec5SDimitry Andric /// index, and tries to find a constant integer that can be hoisted to the 2250b57cec5SDimitry Andric /// outermost level of the expression as an addition. Not every constant in an 2260b57cec5SDimitry Andric /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a + 2270b57cec5SDimitry Andric /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case, 2280b57cec5SDimitry Andric /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). 2290b57cec5SDimitry Andric class ConstantOffsetExtractor { 2300b57cec5SDimitry Andric public: 2310b57cec5SDimitry Andric /// Extracts a constant offset from the given GEP index. It returns the 2320b57cec5SDimitry Andric /// new index representing the remainder (equal to the original index minus 2330b57cec5SDimitry Andric /// the constant offset), or nullptr if we cannot extract a constant offset. 2340b57cec5SDimitry Andric /// \p Idx The given GEP index 2350b57cec5SDimitry Andric /// \p GEP The given GEP 2360b57cec5SDimitry Andric /// \p UserChainTail Outputs the tail of UserChain so that we can 2370b57cec5SDimitry Andric /// garbage-collect unused instructions in UserChain. 2380b57cec5SDimitry Andric static Value *Extract(Value *Idx, GetElementPtrInst *GEP, 239*0fca6ea1SDimitry Andric User *&UserChainTail); 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric /// Looks for a constant offset from the given GEP index without extracting 2420b57cec5SDimitry Andric /// it. It returns the numeric value of the extracted constant offset (0 if 2430b57cec5SDimitry Andric /// failed). The meaning of the arguments are the same as Extract. 244*0fca6ea1SDimitry Andric static int64_t Find(Value *Idx, GetElementPtrInst *GEP); 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric private: 247*0fca6ea1SDimitry Andric ConstantOffsetExtractor(BasicBlock::iterator InsertionPt) 248*0fca6ea1SDimitry Andric : IP(InsertionPt), DL(InsertionPt->getDataLayout()) {} 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric /// Searches the expression that computes V for a non-zero constant C s.t. 2510b57cec5SDimitry Andric /// V can be reassociated into the form V' + C. If the searching is 2520b57cec5SDimitry Andric /// successful, returns C and update UserChain as a def-use chain from C to V; 2530b57cec5SDimitry Andric /// otherwise, UserChain is empty. 2540b57cec5SDimitry Andric /// 2550b57cec5SDimitry Andric /// \p V The given expression 2560b57cec5SDimitry Andric /// \p SignExtended Whether V will be sign-extended in the computation of the 2570b57cec5SDimitry Andric /// GEP index 2580b57cec5SDimitry Andric /// \p ZeroExtended Whether V will be zero-extended in the computation of the 2590b57cec5SDimitry Andric /// GEP index 2600b57cec5SDimitry Andric /// \p NonNegative Whether V is guaranteed to be non-negative. For example, 2610b57cec5SDimitry Andric /// an index of an inbounds GEP is guaranteed to be 2620b57cec5SDimitry Andric /// non-negative. Levaraging this, we can better split 2630b57cec5SDimitry Andric /// inbounds GEPs. 2640b57cec5SDimitry Andric APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric /// A helper function to look into both operands of a binary operator. 2670b57cec5SDimitry Andric APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, 2680b57cec5SDimitry Andric bool ZeroExtended); 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric /// After finding the constant offset C from the GEP index I, we build a new 2710b57cec5SDimitry Andric /// index I' s.t. I' + C = I. This function builds and returns the new 2720b57cec5SDimitry Andric /// index I' according to UserChain produced by function "find". 2730b57cec5SDimitry Andric /// 2740b57cec5SDimitry Andric /// The building conceptually takes two steps: 2750b57cec5SDimitry Andric /// 1) iteratively distribute s/zext towards the leaves of the expression tree 2760b57cec5SDimitry Andric /// that computes I 2770b57cec5SDimitry Andric /// 2) reassociate the expression tree to the form I' + C. 2780b57cec5SDimitry Andric /// 2790b57cec5SDimitry Andric /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute 2800b57cec5SDimitry Andric /// sext to a, b and 5 so that we have 2810b57cec5SDimitry Andric /// sext(a) + (sext(b) + 5). 2820b57cec5SDimitry Andric /// Then, we reassociate it to 2830b57cec5SDimitry Andric /// (sext(a) + sext(b)) + 5. 2840b57cec5SDimitry Andric /// Given this form, we know I' is sext(a) + sext(b). 2850b57cec5SDimitry Andric Value *rebuildWithoutConstOffset(); 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric /// After the first step of rebuilding the GEP index without the constant 2880b57cec5SDimitry Andric /// offset, distribute s/zext to the operands of all operators in UserChain. 2890b57cec5SDimitry Andric /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) => 2900b57cec5SDimitry Andric /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))). 2910b57cec5SDimitry Andric /// 2920b57cec5SDimitry Andric /// The function also updates UserChain to point to new subexpressions after 2930b57cec5SDimitry Andric /// distributing s/zext. e.g., the old UserChain of the above example is 2940b57cec5SDimitry Andric /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)), 2950b57cec5SDimitry Andric /// and the new UserChain is 2960b57cec5SDimitry Andric /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) -> 2970b57cec5SDimitry Andric /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5)) 2980b57cec5SDimitry Andric /// 2990b57cec5SDimitry Andric /// \p ChainIndex The index to UserChain. ChainIndex is initially 3000b57cec5SDimitry Andric /// UserChain.size() - 1, and is decremented during 3010b57cec5SDimitry Andric /// the recursion. 3020b57cec5SDimitry Andric Value *distributeExtsAndCloneChain(unsigned ChainIndex); 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric /// Reassociates the GEP index to the form I' + C and returns I'. 3050b57cec5SDimitry Andric Value *removeConstOffset(unsigned ChainIndex); 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric /// A helper function to apply ExtInsts, a list of s/zext, to value V. 3080b57cec5SDimitry Andric /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function 3090b57cec5SDimitry Andric /// returns "sext i32 (zext i16 V to i32) to i64". 3100b57cec5SDimitry Andric Value *applyExts(Value *V); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric /// A helper function that returns whether we can trace into the operands 3130b57cec5SDimitry Andric /// of binary operator BO for a constant offset. 3140b57cec5SDimitry Andric /// 3150b57cec5SDimitry Andric /// \p SignExtended Whether BO is surrounded by sext 3160b57cec5SDimitry Andric /// \p ZeroExtended Whether BO is surrounded by zext 3170b57cec5SDimitry Andric /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound 3180b57cec5SDimitry Andric /// array index. 3190b57cec5SDimitry Andric bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO, 3200b57cec5SDimitry Andric bool NonNegative); 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric /// The path from the constant offset to the old GEP index. e.g., if the GEP 3230b57cec5SDimitry Andric /// index is "a * b + (c + 5)". After running function find, UserChain[0] will 3240b57cec5SDimitry Andric /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and 3250b57cec5SDimitry Andric /// UserChain[2] will be the entire expression "a * b + (c + 5)". 3260b57cec5SDimitry Andric /// 3270b57cec5SDimitry Andric /// This path helps to rebuild the new GEP index. 3280b57cec5SDimitry Andric SmallVector<User *, 8> UserChain; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric /// A data structure used in rebuildWithoutConstOffset. Contains all 3310b57cec5SDimitry Andric /// sext/zext instructions along UserChain. 3320b57cec5SDimitry Andric SmallVector<CastInst *, 16> ExtInsts; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric /// Insertion position of cloned instructions. 335*0fca6ea1SDimitry Andric BasicBlock::iterator IP; 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric const DataLayout &DL; 3380b57cec5SDimitry Andric }; 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric /// A pass that tries to split every GEP in the function into a variadic 3410b57cec5SDimitry Andric /// base and a constant offset. It is a FunctionPass because searching for the 3420b57cec5SDimitry Andric /// constant offset may inspect other basic blocks. 343e8d8bef9SDimitry Andric class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass { 3440b57cec5SDimitry Andric public: 3450b57cec5SDimitry Andric static char ID; 3460b57cec5SDimitry Andric 347e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false) 3480b57cec5SDimitry Andric : FunctionPass(ID), LowerGEP(LowerGEP) { 349e8d8bef9SDimitry Andric initializeSeparateConstOffsetFromGEPLegacyPassPass( 350e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 3540b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 3550b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 3560b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 3570b57cec5SDimitry Andric AU.setPreservesCFG(); 3580b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 3590b57cec5SDimitry Andric } 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric bool runOnFunction(Function &F) override; 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric private: 364e8d8bef9SDimitry Andric bool LowerGEP; 365e8d8bef9SDimitry Andric }; 366e8d8bef9SDimitry Andric 367e8d8bef9SDimitry Andric /// A pass that tries to split every GEP in the function into a variadic 368e8d8bef9SDimitry Andric /// base and a constant offset. It is a FunctionPass because searching for the 369e8d8bef9SDimitry Andric /// constant offset may inspect other basic blocks. 370e8d8bef9SDimitry Andric class SeparateConstOffsetFromGEP { 371e8d8bef9SDimitry Andric public: 372e8d8bef9SDimitry Andric SeparateConstOffsetFromGEP( 37306c3fb27SDimitry Andric DominatorTree *DT, LoopInfo *LI, TargetLibraryInfo *TLI, 374e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP) 37506c3fb27SDimitry Andric : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} 376e8d8bef9SDimitry Andric 377e8d8bef9SDimitry Andric bool run(Function &F); 378e8d8bef9SDimitry Andric 379e8d8bef9SDimitry Andric private: 38006c3fb27SDimitry Andric /// Track the operands of an add or sub. 38106c3fb27SDimitry Andric using ExprKey = std::pair<Value *, Value *>; 38206c3fb27SDimitry Andric 38306c3fb27SDimitry Andric /// Create a pair for use as a map key for a commutable operation. 38406c3fb27SDimitry Andric static ExprKey createNormalizedCommutablePair(Value *A, Value *B) { 38506c3fb27SDimitry Andric if (A < B) 38606c3fb27SDimitry Andric return {A, B}; 38706c3fb27SDimitry Andric return {B, A}; 38806c3fb27SDimitry Andric } 38906c3fb27SDimitry Andric 3900b57cec5SDimitry Andric /// Tries to split the given GEP into a variadic base and a constant offset, 3910b57cec5SDimitry Andric /// and returns true if the splitting succeeds. 3920b57cec5SDimitry Andric bool splitGEP(GetElementPtrInst *GEP); 3930b57cec5SDimitry Andric 394*0fca6ea1SDimitry Andric /// Tries to reorder the given GEP with the GEP that produces the base if 395*0fca6ea1SDimitry Andric /// doing so results in producing a constant offset as the outermost 396*0fca6ea1SDimitry Andric /// index. 397*0fca6ea1SDimitry Andric bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI); 398*0fca6ea1SDimitry Andric 3990b57cec5SDimitry Andric /// Lower a GEP with multiple indices into multiple GEPs with a single index. 4000b57cec5SDimitry Andric /// Function splitGEP already split the original GEP into a variadic part and 4010b57cec5SDimitry Andric /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the 4020b57cec5SDimitry Andric /// variadic part into a set of GEPs with a single index and applies 4030b57cec5SDimitry Andric /// AccumulativeByteOffset to it. 4040b57cec5SDimitry Andric /// \p Variadic The variadic part of the original GEP. 4050b57cec5SDimitry Andric /// \p AccumulativeByteOffset The constant offset. 4060b57cec5SDimitry Andric void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic, 4070b57cec5SDimitry Andric int64_t AccumulativeByteOffset); 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. 4100b57cec5SDimitry Andric /// Function splitGEP already split the original GEP into a variadic part and 4110b57cec5SDimitry Andric /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the 4120b57cec5SDimitry Andric /// variadic part into a set of arithmetic operations and applies 4130b57cec5SDimitry Andric /// AccumulativeByteOffset to it. 4140b57cec5SDimitry Andric /// \p Variadic The variadic part of the original GEP. 4150b57cec5SDimitry Andric /// \p AccumulativeByteOffset The constant offset. 4160b57cec5SDimitry Andric void lowerToArithmetics(GetElementPtrInst *Variadic, 4170b57cec5SDimitry Andric int64_t AccumulativeByteOffset); 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric /// Finds the constant offset within each index and accumulates them. If 4200b57cec5SDimitry Andric /// LowerGEP is true, it finds in indices of both sequential and structure 4210b57cec5SDimitry Andric /// types, otherwise it only finds in sequential indices. The output 4220b57cec5SDimitry Andric /// NeedsExtraction indicates whether we successfully find a non-zero constant 4230b57cec5SDimitry Andric /// offset. 4240b57cec5SDimitry Andric int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric /// Canonicalize array indices to pointer-size integers. This helps to 4270b57cec5SDimitry Andric /// simplify the logic of splitting a GEP. For example, if a + b is a 4280b57cec5SDimitry Andric /// pointer-size integer, we have 4290b57cec5SDimitry Andric /// gep base, a + b = gep (gep base, a), b 4300b57cec5SDimitry Andric /// However, this equality may not hold if the size of a + b is smaller than 4310b57cec5SDimitry Andric /// the pointer size, because LLVM conceptually sign-extends GEP indices to 4320b57cec5SDimitry Andric /// pointer size before computing the address 4330b57cec5SDimitry Andric /// (http://llvm.org/docs/LangRef.html#id181). 4340b57cec5SDimitry Andric /// 4350b57cec5SDimitry Andric /// This canonicalization is very likely already done in clang and 4360b57cec5SDimitry Andric /// instcombine. Therefore, the program will probably remain the same. 4370b57cec5SDimitry Andric /// 4380b57cec5SDimitry Andric /// Returns true if the module changes. 4390b57cec5SDimitry Andric /// 4400b57cec5SDimitry Andric /// Verified in @i32_add in split-gep.ll 44106c3fb27SDimitry Andric bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP); 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow. 4440b57cec5SDimitry Andric /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting 4450b57cec5SDimitry Andric /// the constant offset. After extraction, it becomes desirable to reunion the 4460b57cec5SDimitry Andric /// distributed sexts. For example, 4470b57cec5SDimitry Andric /// 4480b57cec5SDimitry Andric /// &a[sext(i +nsw (j +nsw 5)] 4490b57cec5SDimitry Andric /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)] 4500b57cec5SDimitry Andric /// => constant extraction &a[sext(i) + sext(j)] + 5 4510b57cec5SDimitry Andric /// => reunion &a[sext(i +nsw j)] + 5 4520b57cec5SDimitry Andric bool reuniteExts(Function &F); 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric /// A helper that reunites sexts in an instruction. 4550b57cec5SDimitry Andric bool reuniteExts(Instruction *I); 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric /// Find the closest dominator of <Dominatee> that is equivalent to <Key>. 4585ffd83dbSDimitry Andric Instruction *findClosestMatchingDominator( 45906c3fb27SDimitry Andric ExprKey Key, Instruction *Dominatee, 46006c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs); 4615ffd83dbSDimitry Andric 4620b57cec5SDimitry Andric /// Verify F is free of dead code. 4630b57cec5SDimitry Andric void verifyNoDeadCode(Function &F); 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric bool hasMoreThanOneUseInLoop(Value *v, Loop *L); 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric // Swap the index operand of two GEP. 4680b57cec5SDimitry Andric void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second); 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric // Check if it is safe to swap operand of two GEP. 4710b57cec5SDimitry Andric bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second, 4720b57cec5SDimitry Andric Loop *CurLoop); 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric const DataLayout *DL = nullptr; 4750b57cec5SDimitry Andric DominatorTree *DT = nullptr; 4760b57cec5SDimitry Andric LoopInfo *LI; 4770b57cec5SDimitry Andric TargetLibraryInfo *TLI; 478e8d8bef9SDimitry Andric // Retrieved lazily since not always used. 479e8d8bef9SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI; 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric /// Whether to lower a GEP with multiple indices into arithmetic operations or 4820b57cec5SDimitry Andric /// multiple GEPs with a single index. 4830b57cec5SDimitry Andric bool LowerGEP; 4840b57cec5SDimitry Andric 48506c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds; 48606c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs; 4870b57cec5SDimitry Andric }; 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric } // end anonymous namespace 4900b57cec5SDimitry Andric 491e8d8bef9SDimitry Andric char SeparateConstOffsetFromGEPLegacyPass::ID = 0; 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN( 494e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", 4950b57cec5SDimitry Andric "Split GEPs to a variadic base and a constant offset for better CSE", false, 4960b57cec5SDimitry Andric false) 4970b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 4980b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 4990b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 5000b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 5010b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 5020b57cec5SDimitry Andric INITIALIZE_PASS_END( 503e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep", 5040b57cec5SDimitry Andric "Split GEPs to a variadic base and a constant offset for better CSE", false, 5050b57cec5SDimitry Andric false) 5060b57cec5SDimitry Andric 5070b57cec5SDimitry Andric FunctionPass *llvm::createSeparateConstOffsetFromGEPPass(bool LowerGEP) { 508e8d8bef9SDimitry Andric return new SeparateConstOffsetFromGEPLegacyPass(LowerGEP); 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, 5120b57cec5SDimitry Andric bool ZeroExtended, 5130b57cec5SDimitry Andric BinaryOperator *BO, 5140b57cec5SDimitry Andric bool NonNegative) { 5150b57cec5SDimitry Andric // We only consider ADD, SUB and OR, because a non-zero constant found in 5160b57cec5SDimitry Andric // expressions composed of these operations can be easily hoisted as a 5170b57cec5SDimitry Andric // constant offset by reassociation. 5180b57cec5SDimitry Andric if (BO->getOpcode() != Instruction::Add && 5190b57cec5SDimitry Andric BO->getOpcode() != Instruction::Sub && 5200b57cec5SDimitry Andric BO->getOpcode() != Instruction::Or) { 5210b57cec5SDimitry Andric return false; 5220b57cec5SDimitry Andric } 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1); 525*0fca6ea1SDimitry Andric // Do not trace into "or" unless it is equivalent to "add". 526*0fca6ea1SDimitry Andric // This is the case if the or's disjoint flag is set. 5270b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Or && 528*0fca6ea1SDimitry Andric !cast<PossiblyDisjointInst>(BO)->isDisjoint()) 5290b57cec5SDimitry Andric return false; 5300b57cec5SDimitry Andric 53106c3fb27SDimitry Andric // FIXME: We don't currently support constants from the RHS of subs, 53206c3fb27SDimitry Andric // when we are zero-extended, because we need a way to zero-extended 53306c3fb27SDimitry Andric // them before they are negated. 53406c3fb27SDimitry Andric if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub) 53506c3fb27SDimitry Andric return false; 53606c3fb27SDimitry Andric 5370b57cec5SDimitry Andric // In addition, tracing into BO requires that its surrounding s/zext (if 5380b57cec5SDimitry Andric // any) is distributable to both operands. 5390b57cec5SDimitry Andric // 5400b57cec5SDimitry Andric // Suppose BO = A op B. 5410b57cec5SDimitry Andric // SignExtended | ZeroExtended | Distributable? 5420b57cec5SDimitry Andric // --------------+--------------+---------------------------------- 5430b57cec5SDimitry Andric // 0 | 0 | true because no s/zext exists 5440b57cec5SDimitry Andric // 0 | 1 | zext(BO) == zext(A) op zext(B) 5450b57cec5SDimitry Andric // 1 | 0 | sext(BO) == sext(A) op sext(B) 5460b57cec5SDimitry Andric // 1 | 1 | zext(sext(BO)) == 5470b57cec5SDimitry Andric // | | zext(sext(A)) op zext(sext(B)) 5480b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) { 5490b57cec5SDimitry Andric // If a + b >= 0 and (a >= 0 or b >= 0), then 5500b57cec5SDimitry Andric // sext(a + b) = sext(a) + sext(b) 5510b57cec5SDimitry Andric // even if the addition is not marked nsw. 5520b57cec5SDimitry Andric // 5535ffd83dbSDimitry Andric // Leveraging this invariant, we can trace into an sext'ed inbound GEP 5540b57cec5SDimitry Andric // index if the constant offset is non-negative. 5550b57cec5SDimitry Andric // 5560b57cec5SDimitry Andric // Verified in @sext_add in split-gep.ll. 5570b57cec5SDimitry Andric if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) { 5580b57cec5SDimitry Andric if (!ConstLHS->isNegative()) 5590b57cec5SDimitry Andric return true; 5600b57cec5SDimitry Andric } 5610b57cec5SDimitry Andric if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) { 5620b57cec5SDimitry Andric if (!ConstRHS->isNegative()) 5630b57cec5SDimitry Andric return true; 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) 5680b57cec5SDimitry Andric // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) 5690b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Add || 5700b57cec5SDimitry Andric BO->getOpcode() == Instruction::Sub) { 5710b57cec5SDimitry Andric if (SignExtended && !BO->hasNoSignedWrap()) 5720b57cec5SDimitry Andric return false; 5730b57cec5SDimitry Andric if (ZeroExtended && !BO->hasNoUnsignedWrap()) 5740b57cec5SDimitry Andric return false; 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric return true; 5780b57cec5SDimitry Andric } 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, 5810b57cec5SDimitry Andric bool SignExtended, 5820b57cec5SDimitry Andric bool ZeroExtended) { 5835ffd83dbSDimitry Andric // Save off the current height of the chain, in case we need to restore it. 5845ffd83dbSDimitry Andric size_t ChainLength = UserChain.size(); 5855ffd83dbSDimitry Andric 5860b57cec5SDimitry Andric // BO being non-negative does not shed light on whether its operands are 5870b57cec5SDimitry Andric // non-negative. Clear the NonNegative flag here. 5880b57cec5SDimitry Andric APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, 5890b57cec5SDimitry Andric /* NonNegative */ false); 5900b57cec5SDimitry Andric // If we found a constant offset in the left operand, stop and return that. 5910b57cec5SDimitry Andric // This shortcut might cause us to miss opportunities of combining the 5920b57cec5SDimitry Andric // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. 5930b57cec5SDimitry Andric // However, such cases are probably already handled by -instcombine, 5940b57cec5SDimitry Andric // given this pass runs after the standard optimizations. 5950b57cec5SDimitry Andric if (ConstantOffset != 0) return ConstantOffset; 5965ffd83dbSDimitry Andric 5975ffd83dbSDimitry Andric // Reset the chain back to where it was when we started exploring this node, 5985ffd83dbSDimitry Andric // since visiting the LHS didn't pan out. 5995ffd83dbSDimitry Andric UserChain.resize(ChainLength); 6005ffd83dbSDimitry Andric 6010b57cec5SDimitry Andric ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, 6020b57cec5SDimitry Andric /* NonNegative */ false); 6030b57cec5SDimitry Andric // If U is a sub operator, negate the constant offset found in the right 6040b57cec5SDimitry Andric // operand. 6050b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Sub) 6060b57cec5SDimitry Andric ConstantOffset = -ConstantOffset; 6075ffd83dbSDimitry Andric 6085ffd83dbSDimitry Andric // If RHS wasn't a suitable candidate either, reset the chain again. 6095ffd83dbSDimitry Andric if (ConstantOffset == 0) 6105ffd83dbSDimitry Andric UserChain.resize(ChainLength); 6115ffd83dbSDimitry Andric 6120b57cec5SDimitry Andric return ConstantOffset; 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, 6160b57cec5SDimitry Andric bool ZeroExtended, bool NonNegative) { 6170b57cec5SDimitry Andric // TODO(jingyue): We could trace into integer/pointer casts, such as 6180b57cec5SDimitry Andric // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only 6190b57cec5SDimitry Andric // integers because it gives good enough results for our benchmarks. 6200b57cec5SDimitry Andric unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric // We cannot do much with Values that are not a User, such as an Argument. 6230b57cec5SDimitry Andric User *U = dyn_cast<User>(V); 6240b57cec5SDimitry Andric if (U == nullptr) return APInt(BitWidth, 0); 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric APInt ConstantOffset(BitWidth, 0); 6270b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { 6280b57cec5SDimitry Andric // Hooray, we found it! 6290b57cec5SDimitry Andric ConstantOffset = CI->getValue(); 6300b57cec5SDimitry Andric } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) { 6310b57cec5SDimitry Andric // Trace into subexpressions for more hoisting opportunities. 6320b57cec5SDimitry Andric if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) 6330b57cec5SDimitry Andric ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); 6340b57cec5SDimitry Andric } else if (isa<TruncInst>(V)) { 6350b57cec5SDimitry Andric ConstantOffset = 6360b57cec5SDimitry Andric find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative) 6370b57cec5SDimitry Andric .trunc(BitWidth); 6380b57cec5SDimitry Andric } else if (isa<SExtInst>(V)) { 6390b57cec5SDimitry Andric ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, 6400b57cec5SDimitry Andric ZeroExtended, NonNegative).sext(BitWidth); 6410b57cec5SDimitry Andric } else if (isa<ZExtInst>(V)) { 6420b57cec5SDimitry Andric // As an optimization, we can clear the SignExtended flag because 6430b57cec5SDimitry Andric // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll. 6440b57cec5SDimitry Andric // 6450b57cec5SDimitry Andric // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0. 6460b57cec5SDimitry Andric ConstantOffset = 6470b57cec5SDimitry Andric find(U->getOperand(0), /* SignExtended */ false, 6480b57cec5SDimitry Andric /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric // If we found a non-zero constant offset, add it to the path for 6520b57cec5SDimitry Andric // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't 6530b57cec5SDimitry Andric // help this optimization. 6540b57cec5SDimitry Andric if (ConstantOffset != 0) 6550b57cec5SDimitry Andric UserChain.push_back(U); 6560b57cec5SDimitry Andric return ConstantOffset; 6570b57cec5SDimitry Andric } 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric Value *ConstantOffsetExtractor::applyExts(Value *V) { 6600b57cec5SDimitry Andric Value *Current = V; 6610b57cec5SDimitry Andric // ExtInsts is built in the use-def order. Therefore, we apply them to V 6620b57cec5SDimitry Andric // in the reversed order. 6630eae32dcSDimitry Andric for (CastInst *I : llvm::reverse(ExtInsts)) { 6640b57cec5SDimitry Andric if (Constant *C = dyn_cast<Constant>(Current)) { 6655f757f3fSDimitry Andric // Try to constant fold the cast. 6665f757f3fSDimitry Andric Current = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), DL); 6675f757f3fSDimitry Andric if (Current) 6685f757f3fSDimitry Andric continue; 6695f757f3fSDimitry Andric } 6705f757f3fSDimitry Andric 6710eae32dcSDimitry Andric Instruction *Ext = I->clone(); 6720b57cec5SDimitry Andric Ext->setOperand(0, Current); 673*0fca6ea1SDimitry Andric Ext->insertBefore(*IP->getParent(), IP); 6740b57cec5SDimitry Andric Current = Ext; 6750b57cec5SDimitry Andric } 6760b57cec5SDimitry Andric return Current; 6770b57cec5SDimitry Andric } 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() { 6800b57cec5SDimitry Andric distributeExtsAndCloneChain(UserChain.size() - 1); 6810b57cec5SDimitry Andric // Remove all nullptrs (used to be s/zext) from UserChain. 6820b57cec5SDimitry Andric unsigned NewSize = 0; 6830b57cec5SDimitry Andric for (User *I : UserChain) { 6840b57cec5SDimitry Andric if (I != nullptr) { 6850b57cec5SDimitry Andric UserChain[NewSize] = I; 6860b57cec5SDimitry Andric NewSize++; 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric UserChain.resize(NewSize); 6900b57cec5SDimitry Andric return removeConstOffset(UserChain.size() - 1); 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric Value * 6940b57cec5SDimitry Andric ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) { 6950b57cec5SDimitry Andric User *U = UserChain[ChainIndex]; 6960b57cec5SDimitry Andric if (ChainIndex == 0) { 6970b57cec5SDimitry Andric assert(isa<ConstantInt>(U)); 6980b57cec5SDimitry Andric // If U is a ConstantInt, applyExts will return a ConstantInt as well. 6990b57cec5SDimitry Andric return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U)); 7000b57cec5SDimitry Andric } 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric if (CastInst *Cast = dyn_cast<CastInst>(U)) { 7030b57cec5SDimitry Andric assert( 7040b57cec5SDimitry Andric (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) && 7050b57cec5SDimitry Andric "Only following instructions can be traced: sext, zext & trunc"); 7060b57cec5SDimitry Andric ExtInsts.push_back(Cast); 7070b57cec5SDimitry Andric UserChain[ChainIndex] = nullptr; 7080b57cec5SDimitry Andric return distributeExtsAndCloneChain(ChainIndex - 1); 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric // Function find only trace into BinaryOperator and CastInst. 7120b57cec5SDimitry Andric BinaryOperator *BO = cast<BinaryOperator>(U); 7130b57cec5SDimitry Andric // OpNo = which operand of BO is UserChain[ChainIndex - 1] 7140b57cec5SDimitry Andric unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); 7150b57cec5SDimitry Andric Value *TheOther = applyExts(BO->getOperand(1 - OpNo)); 7160b57cec5SDimitry Andric Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1); 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric BinaryOperator *NewBO = nullptr; 7190b57cec5SDimitry Andric if (OpNo == 0) { 7200b57cec5SDimitry Andric NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther, 7210b57cec5SDimitry Andric BO->getName(), IP); 7220b57cec5SDimitry Andric } else { 7230b57cec5SDimitry Andric NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain, 7240b57cec5SDimitry Andric BO->getName(), IP); 7250b57cec5SDimitry Andric } 7260b57cec5SDimitry Andric return UserChain[ChainIndex] = NewBO; 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { 7300b57cec5SDimitry Andric if (ChainIndex == 0) { 7310b57cec5SDimitry Andric assert(isa<ConstantInt>(UserChain[ChainIndex])); 7320b57cec5SDimitry Andric return ConstantInt::getNullValue(UserChain[ChainIndex]->getType()); 7330b57cec5SDimitry Andric } 7340b57cec5SDimitry Andric 7350b57cec5SDimitry Andric BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]); 7365ffd83dbSDimitry Andric assert((BO->use_empty() || BO->hasOneUse()) && 7370b57cec5SDimitry Andric "distributeExtsAndCloneChain clones each BinaryOperator in " 7380b57cec5SDimitry Andric "UserChain, so no one should be used more than " 7390b57cec5SDimitry Andric "once"); 7400b57cec5SDimitry Andric 7410b57cec5SDimitry Andric unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); 7420b57cec5SDimitry Andric assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]); 7430b57cec5SDimitry Andric Value *NextInChain = removeConstOffset(ChainIndex - 1); 7440b57cec5SDimitry Andric Value *TheOther = BO->getOperand(1 - OpNo); 7450b57cec5SDimitry Andric 7460b57cec5SDimitry Andric // If NextInChain is 0 and not the LHS of a sub, we can simplify the 7470b57cec5SDimitry Andric // sub-expression to be just TheOther. 7480b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) { 7490b57cec5SDimitry Andric if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0)) 7500b57cec5SDimitry Andric return TheOther; 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric BinaryOperator::BinaryOps NewOp = BO->getOpcode(); 7540b57cec5SDimitry Andric if (BO->getOpcode() == Instruction::Or) { 7550b57cec5SDimitry Andric // Rebuild "or" as "add", because "or" may be invalid for the new 7560b57cec5SDimitry Andric // expression. 7570b57cec5SDimitry Andric // 7580b57cec5SDimitry Andric // For instance, given 7590b57cec5SDimitry Andric // a | (b + 5) where a and b + 5 have no common bits, 7600b57cec5SDimitry Andric // we can extract 5 as the constant offset. 7610b57cec5SDimitry Andric // 7620b57cec5SDimitry Andric // However, reusing the "or" in the new index would give us 7630b57cec5SDimitry Andric // (a | b) + 5 7640b57cec5SDimitry Andric // which does not equal a | (b + 5). 7650b57cec5SDimitry Andric // 7660b57cec5SDimitry Andric // Replacing the "or" with "add" is fine, because 7670b57cec5SDimitry Andric // a | (b + 5) = a + (b + 5) = (a + b) + 5 7680b57cec5SDimitry Andric NewOp = Instruction::Add; 7690b57cec5SDimitry Andric } 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric BinaryOperator *NewBO; 7720b57cec5SDimitry Andric if (OpNo == 0) { 7730b57cec5SDimitry Andric NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP); 7740b57cec5SDimitry Andric } else { 7750b57cec5SDimitry Andric NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP); 7760b57cec5SDimitry Andric } 7770b57cec5SDimitry Andric NewBO->takeName(BO); 7780b57cec5SDimitry Andric return NewBO; 7790b57cec5SDimitry Andric } 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP, 782*0fca6ea1SDimitry Andric User *&UserChainTail) { 783*0fca6ea1SDimitry Andric ConstantOffsetExtractor Extractor(GEP->getIterator()); 7840b57cec5SDimitry Andric // Find a non-zero constant offset first. 7850b57cec5SDimitry Andric APInt ConstantOffset = 7860b57cec5SDimitry Andric Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, 7870b57cec5SDimitry Andric GEP->isInBounds()); 7880b57cec5SDimitry Andric if (ConstantOffset == 0) { 7890b57cec5SDimitry Andric UserChainTail = nullptr; 7900b57cec5SDimitry Andric return nullptr; 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric // Separates the constant offset from the GEP index. 7930b57cec5SDimitry Andric Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset(); 7940b57cec5SDimitry Andric UserChainTail = Extractor.UserChain.back(); 7950b57cec5SDimitry Andric return IdxWithoutConstOffset; 7960b57cec5SDimitry Andric } 7970b57cec5SDimitry Andric 798*0fca6ea1SDimitry Andric int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) { 7990b57cec5SDimitry Andric // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. 800*0fca6ea1SDimitry Andric return ConstantOffsetExtractor(GEP->getIterator()) 8010b57cec5SDimitry Andric .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, 8020b57cec5SDimitry Andric GEP->isInBounds()) 8030b57cec5SDimitry Andric .getSExtValue(); 8040b57cec5SDimitry Andric } 8050b57cec5SDimitry Andric 80606c3fb27SDimitry Andric bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize( 8070b57cec5SDimitry Andric GetElementPtrInst *GEP) { 8080b57cec5SDimitry Andric bool Changed = false; 80906c3fb27SDimitry Andric Type *PtrIdxTy = DL->getIndexType(GEP->getType()); 8100b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*GEP); 8110b57cec5SDimitry Andric for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); 8120b57cec5SDimitry Andric I != E; ++I, ++GTI) { 8130b57cec5SDimitry Andric // Skip struct member indices which must be i32. 8140b57cec5SDimitry Andric if (GTI.isSequential()) { 81506c3fb27SDimitry Andric if ((*I)->getType() != PtrIdxTy) { 816*0fca6ea1SDimitry Andric *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", 817*0fca6ea1SDimitry Andric GEP->getIterator()); 8180b57cec5SDimitry Andric Changed = true; 8190b57cec5SDimitry Andric } 8200b57cec5SDimitry Andric } 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric return Changed; 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric int64_t 8260b57cec5SDimitry Andric SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, 8270b57cec5SDimitry Andric bool &NeedsExtraction) { 8280b57cec5SDimitry Andric NeedsExtraction = false; 8290b57cec5SDimitry Andric int64_t AccumulativeByteOffset = 0; 8300b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*GEP); 8310b57cec5SDimitry Andric for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { 8320b57cec5SDimitry Andric if (GTI.isSequential()) { 833bdd1243dSDimitry Andric // Constant offsets of scalable types are not really constant. 8345f757f3fSDimitry Andric if (GTI.getIndexedType()->isScalableTy()) 835bdd1243dSDimitry Andric continue; 836bdd1243dSDimitry Andric 8370b57cec5SDimitry Andric // Tries to extract a constant offset from this GEP index. 8380b57cec5SDimitry Andric int64_t ConstantOffset = 839*0fca6ea1SDimitry Andric ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP); 8400b57cec5SDimitry Andric if (ConstantOffset != 0) { 8410b57cec5SDimitry Andric NeedsExtraction = true; 8420b57cec5SDimitry Andric // A GEP may have multiple indices. We accumulate the extracted 8430b57cec5SDimitry Andric // constant offset to a byte offset, and later offset the remainder of 8440b57cec5SDimitry Andric // the original GEP with this byte offset. 8450b57cec5SDimitry Andric AccumulativeByteOffset += 8461db9f3b2SDimitry Andric ConstantOffset * GTI.getSequentialElementStride(*DL); 8470b57cec5SDimitry Andric } 8480b57cec5SDimitry Andric } else if (LowerGEP) { 8490b57cec5SDimitry Andric StructType *StTy = GTI.getStructType(); 8500b57cec5SDimitry Andric uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue(); 8510b57cec5SDimitry Andric // Skip field 0 as the offset is always 0. 8520b57cec5SDimitry Andric if (Field != 0) { 8530b57cec5SDimitry Andric NeedsExtraction = true; 8540b57cec5SDimitry Andric AccumulativeByteOffset += 8550b57cec5SDimitry Andric DL->getStructLayout(StTy)->getElementOffset(Field); 8560b57cec5SDimitry Andric } 8570b57cec5SDimitry Andric } 8580b57cec5SDimitry Andric } 8590b57cec5SDimitry Andric return AccumulativeByteOffset; 8600b57cec5SDimitry Andric } 8610b57cec5SDimitry Andric 8620b57cec5SDimitry Andric void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( 8630b57cec5SDimitry Andric GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { 8640b57cec5SDimitry Andric IRBuilder<> Builder(Variadic); 86506c3fb27SDimitry Andric Type *PtrIndexTy = DL->getIndexType(Variadic->getType()); 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric Value *ResultPtr = Variadic->getOperand(0); 8680b57cec5SDimitry Andric Loop *L = LI->getLoopFor(Variadic->getParent()); 8690b57cec5SDimitry Andric // Check if the base is not loop invariant or used more than once. 8700b57cec5SDimitry Andric bool isSwapCandidate = 8710b57cec5SDimitry Andric L && L->isLoopInvariant(ResultPtr) && 8720b57cec5SDimitry Andric !hasMoreThanOneUseInLoop(ResultPtr, L); 8730b57cec5SDimitry Andric Value *FirstResult = nullptr; 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*Variadic); 8760b57cec5SDimitry Andric // Create an ugly GEP for each sequential index. We don't create GEPs for 8770b57cec5SDimitry Andric // structure indices, as they are accumulated in the constant offset index. 8780b57cec5SDimitry Andric for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) { 8790b57cec5SDimitry Andric if (GTI.isSequential()) { 8800b57cec5SDimitry Andric Value *Idx = Variadic->getOperand(I); 8810b57cec5SDimitry Andric // Skip zero indices. 8820b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) 8830b57cec5SDimitry Andric if (CI->isZero()) 8840b57cec5SDimitry Andric continue; 8850b57cec5SDimitry Andric 88606c3fb27SDimitry Andric APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(), 8871db9f3b2SDimitry Andric GTI.getSequentialElementStride(*DL)); 8880b57cec5SDimitry Andric // Scale the index by element size. 8890b57cec5SDimitry Andric if (ElementSize != 1) { 8900b57cec5SDimitry Andric if (ElementSize.isPowerOf2()) { 8910b57cec5SDimitry Andric Idx = Builder.CreateShl( 89206c3fb27SDimitry Andric Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2())); 8930b57cec5SDimitry Andric } else { 89406c3fb27SDimitry Andric Idx = 89506c3fb27SDimitry Andric Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize)); 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric } 8980b57cec5SDimitry Andric // Create an ugly GEP with a single index for each index. 8997a6dacacSDimitry Andric ResultPtr = Builder.CreatePtrAdd(ResultPtr, Idx, "uglygep"); 9000b57cec5SDimitry Andric if (FirstResult == nullptr) 9010b57cec5SDimitry Andric FirstResult = ResultPtr; 9020b57cec5SDimitry Andric } 9030b57cec5SDimitry Andric } 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric // Create a GEP with the constant offset index. 9060b57cec5SDimitry Andric if (AccumulativeByteOffset != 0) { 90706c3fb27SDimitry Andric Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset); 9087a6dacacSDimitry Andric ResultPtr = Builder.CreatePtrAdd(ResultPtr, Offset, "uglygep"); 9090b57cec5SDimitry Andric } else 9100b57cec5SDimitry Andric isSwapCandidate = false; 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric // If we created a GEP with constant index, and the base is loop invariant, 9130b57cec5SDimitry Andric // then we swap the first one with it, so LICM can move constant GEP out 9140b57cec5SDimitry Andric // later. 915e8d8bef9SDimitry Andric auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult); 916e8d8bef9SDimitry Andric auto *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr); 9170b57cec5SDimitry Andric if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L)) 9180b57cec5SDimitry Andric swapGEPOperand(FirstGEP, SecondGEP); 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric Variadic->replaceAllUsesWith(ResultPtr); 9210b57cec5SDimitry Andric Variadic->eraseFromParent(); 9220b57cec5SDimitry Andric } 9230b57cec5SDimitry Andric 9240b57cec5SDimitry Andric void 9250b57cec5SDimitry Andric SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, 9260b57cec5SDimitry Andric int64_t AccumulativeByteOffset) { 9270b57cec5SDimitry Andric IRBuilder<> Builder(Variadic); 9280b57cec5SDimitry Andric Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); 92906c3fb27SDimitry Andric assert(IntPtrTy == DL->getIndexType(Variadic->getType()) && 93006c3fb27SDimitry Andric "Pointer type must match index type for arithmetic-based lowering of " 93106c3fb27SDimitry Andric "split GEPs"); 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); 9340b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*Variadic); 9350b57cec5SDimitry Andric // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We 9360b57cec5SDimitry Andric // don't create arithmetics for structure indices, as they are accumulated 9370b57cec5SDimitry Andric // in the constant offset index. 9380b57cec5SDimitry Andric for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) { 9390b57cec5SDimitry Andric if (GTI.isSequential()) { 9400b57cec5SDimitry Andric Value *Idx = Variadic->getOperand(I); 9410b57cec5SDimitry Andric // Skip zero indices. 9420b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) 9430b57cec5SDimitry Andric if (CI->isZero()) 9440b57cec5SDimitry Andric continue; 9450b57cec5SDimitry Andric 9460b57cec5SDimitry Andric APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), 9471db9f3b2SDimitry Andric GTI.getSequentialElementStride(*DL)); 9480b57cec5SDimitry Andric // Scale the index by element size. 9490b57cec5SDimitry Andric if (ElementSize != 1) { 9500b57cec5SDimitry Andric if (ElementSize.isPowerOf2()) { 9510b57cec5SDimitry Andric Idx = Builder.CreateShl( 9520b57cec5SDimitry Andric Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); 9530b57cec5SDimitry Andric } else { 9540b57cec5SDimitry Andric Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); 9550b57cec5SDimitry Andric } 9560b57cec5SDimitry Andric } 9570b57cec5SDimitry Andric // Create an ADD for each index. 9580b57cec5SDimitry Andric ResultPtr = Builder.CreateAdd(ResultPtr, Idx); 9590b57cec5SDimitry Andric } 9600b57cec5SDimitry Andric } 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric // Create an ADD for the constant offset index. 9630b57cec5SDimitry Andric if (AccumulativeByteOffset != 0) { 9640b57cec5SDimitry Andric ResultPtr = Builder.CreateAdd( 9650b57cec5SDimitry Andric ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset)); 9660b57cec5SDimitry Andric } 9670b57cec5SDimitry Andric 9680b57cec5SDimitry Andric ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType()); 9690b57cec5SDimitry Andric Variadic->replaceAllUsesWith(ResultPtr); 9700b57cec5SDimitry Andric Variadic->eraseFromParent(); 9710b57cec5SDimitry Andric } 9720b57cec5SDimitry Andric 973*0fca6ea1SDimitry Andric bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, 974*0fca6ea1SDimitry Andric TargetTransformInfo &TTI) { 975*0fca6ea1SDimitry Andric auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand()); 976*0fca6ea1SDimitry Andric if (!PtrGEP) 977*0fca6ea1SDimitry Andric return false; 978*0fca6ea1SDimitry Andric 979*0fca6ea1SDimitry Andric bool NestedNeedsExtraction; 980*0fca6ea1SDimitry Andric int64_t NestedByteOffset = 981*0fca6ea1SDimitry Andric accumulateByteOffset(PtrGEP, NestedNeedsExtraction); 982*0fca6ea1SDimitry Andric if (!NestedNeedsExtraction) 983*0fca6ea1SDimitry Andric return false; 984*0fca6ea1SDimitry Andric 985*0fca6ea1SDimitry Andric unsigned AddrSpace = PtrGEP->getPointerAddressSpace(); 986*0fca6ea1SDimitry Andric if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), 987*0fca6ea1SDimitry Andric /*BaseGV=*/nullptr, NestedByteOffset, 988*0fca6ea1SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace)) 989*0fca6ea1SDimitry Andric return false; 990*0fca6ea1SDimitry Andric 991*0fca6ea1SDimitry Andric bool GEPInBounds = GEP->isInBounds(); 992*0fca6ea1SDimitry Andric bool PtrGEPInBounds = PtrGEP->isInBounds(); 993*0fca6ea1SDimitry Andric bool IsChainInBounds = GEPInBounds && PtrGEPInBounds; 994*0fca6ea1SDimitry Andric if (IsChainInBounds) { 995*0fca6ea1SDimitry Andric auto IsKnownNonNegative = [this](Value *V) { 996*0fca6ea1SDimitry Andric return isKnownNonNegative(V, *DL); 997*0fca6ea1SDimitry Andric }; 998*0fca6ea1SDimitry Andric IsChainInBounds &= all_of(GEP->indices(), IsKnownNonNegative); 999*0fca6ea1SDimitry Andric if (IsChainInBounds) 1000*0fca6ea1SDimitry Andric IsChainInBounds &= all_of(PtrGEP->indices(), IsKnownNonNegative); 1001*0fca6ea1SDimitry Andric } 1002*0fca6ea1SDimitry Andric 1003*0fca6ea1SDimitry Andric IRBuilder<> Builder(GEP); 1004*0fca6ea1SDimitry Andric // For trivial GEP chains, we can swap the indices. 1005*0fca6ea1SDimitry Andric Value *NewSrc = Builder.CreateGEP( 1006*0fca6ea1SDimitry Andric GEP->getSourceElementType(), PtrGEP->getPointerOperand(), 1007*0fca6ea1SDimitry Andric SmallVector<Value *, 4>(GEP->indices()), "", IsChainInBounds); 1008*0fca6ea1SDimitry Andric Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc, 1009*0fca6ea1SDimitry Andric SmallVector<Value *, 4>(PtrGEP->indices()), 1010*0fca6ea1SDimitry Andric "", IsChainInBounds); 1011*0fca6ea1SDimitry Andric GEP->replaceAllUsesWith(NewGEP); 1012*0fca6ea1SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(GEP); 1013*0fca6ea1SDimitry Andric return true; 1014*0fca6ea1SDimitry Andric } 1015*0fca6ea1SDimitry Andric 10160b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { 10170b57cec5SDimitry Andric // Skip vector GEPs. 10180b57cec5SDimitry Andric if (GEP->getType()->isVectorTy()) 10190b57cec5SDimitry Andric return false; 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric // The backend can already nicely handle the case where all indices are 10220b57cec5SDimitry Andric // constant. 10230b57cec5SDimitry Andric if (GEP->hasAllConstantIndices()) 10240b57cec5SDimitry Andric return false; 10250b57cec5SDimitry Andric 102606c3fb27SDimitry Andric bool Changed = canonicalizeArrayIndicesToIndexSize(GEP); 10270b57cec5SDimitry Andric 10280b57cec5SDimitry Andric bool NeedsExtraction; 10290b57cec5SDimitry Andric int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); 10300b57cec5SDimitry Andric 1031e8d8bef9SDimitry Andric TargetTransformInfo &TTI = GetTTI(*GEP->getFunction()); 10320b57cec5SDimitry Andric 1033*0fca6ea1SDimitry Andric if (!NeedsExtraction) { 1034*0fca6ea1SDimitry Andric Changed |= reorderGEP(GEP, TTI); 1035*0fca6ea1SDimitry Andric return Changed; 1036*0fca6ea1SDimitry Andric } 1037*0fca6ea1SDimitry Andric 10380b57cec5SDimitry Andric // If LowerGEP is disabled, before really splitting the GEP, check whether the 10390b57cec5SDimitry Andric // backend supports the addressing mode we are about to produce. If no, this 10400b57cec5SDimitry Andric // splitting probably won't be beneficial. 10410b57cec5SDimitry Andric // If LowerGEP is enabled, even the extracted constant offset can not match 10420b57cec5SDimitry Andric // the addressing mode, we can still do optimizations to other lowered parts 10430b57cec5SDimitry Andric // of variable indices. Therefore, we don't check for addressing modes in that 10440b57cec5SDimitry Andric // case. 10450b57cec5SDimitry Andric if (!LowerGEP) { 10460b57cec5SDimitry Andric unsigned AddrSpace = GEP->getPointerAddressSpace(); 10470b57cec5SDimitry Andric if (!TTI.isLegalAddressingMode(GEP->getResultElementType(), 10480b57cec5SDimitry Andric /*BaseGV=*/nullptr, AccumulativeByteOffset, 10490b57cec5SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0, 10500b57cec5SDimitry Andric AddrSpace)) { 10510b57cec5SDimitry Andric return Changed; 10520b57cec5SDimitry Andric } 10530b57cec5SDimitry Andric } 10540b57cec5SDimitry Andric 10550b57cec5SDimitry Andric // Remove the constant offset in each sequential index. The resultant GEP 10560b57cec5SDimitry Andric // computes the variadic base. 10570b57cec5SDimitry Andric // Notice that we don't remove struct field indices here. If LowerGEP is 10580b57cec5SDimitry Andric // disabled, a structure index is not accumulated and we still use the old 10590b57cec5SDimitry Andric // one. If LowerGEP is enabled, a structure index is accumulated in the 10600b57cec5SDimitry Andric // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later 10610b57cec5SDimitry Andric // handle the constant offset and won't need a new structure index. 10620b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(*GEP); 10630b57cec5SDimitry Andric for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { 10640b57cec5SDimitry Andric if (GTI.isSequential()) { 1065bdd1243dSDimitry Andric // Constant offsets of scalable types are not really constant. 10665f757f3fSDimitry Andric if (GTI.getIndexedType()->isScalableTy()) 1067bdd1243dSDimitry Andric continue; 1068bdd1243dSDimitry Andric 10690b57cec5SDimitry Andric // Splits this GEP index into a variadic part and a constant offset, and 10700b57cec5SDimitry Andric // uses the variadic part as the new index. 10710b57cec5SDimitry Andric Value *OldIdx = GEP->getOperand(I); 10720b57cec5SDimitry Andric User *UserChainTail; 10730b57cec5SDimitry Andric Value *NewIdx = 1074*0fca6ea1SDimitry Andric ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail); 10750b57cec5SDimitry Andric if (NewIdx != nullptr) { 10760b57cec5SDimitry Andric // Switches to the index with the constant offset removed. 10770b57cec5SDimitry Andric GEP->setOperand(I, NewIdx); 10780b57cec5SDimitry Andric // After switching to the new index, we can garbage-collect UserChain 10790b57cec5SDimitry Andric // and the old index if they are not used. 10800b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(UserChainTail); 10810b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(OldIdx); 10820b57cec5SDimitry Andric } 10830b57cec5SDimitry Andric } 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric 10860b57cec5SDimitry Andric // Clear the inbounds attribute because the new index may be off-bound. 10870b57cec5SDimitry Andric // e.g., 10880b57cec5SDimitry Andric // 10890b57cec5SDimitry Andric // b = add i64 a, 5 10900b57cec5SDimitry Andric // addr = gep inbounds float, float* p, i64 b 10910b57cec5SDimitry Andric // 10920b57cec5SDimitry Andric // is transformed to: 10930b57cec5SDimitry Andric // 10940b57cec5SDimitry Andric // addr2 = gep float, float* p, i64 a ; inbounds removed 10950b57cec5SDimitry Andric // addr = gep inbounds float, float* addr2, i64 5 10960b57cec5SDimitry Andric // 10970b57cec5SDimitry Andric // If a is -4, although the old index b is in bounds, the new index a is 10980b57cec5SDimitry Andric // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the 10990b57cec5SDimitry Andric // inbounds keyword is not present, the offsets are added to the base 11000b57cec5SDimitry Andric // address with silently-wrapping two's complement arithmetic". 11010b57cec5SDimitry Andric // Therefore, the final code will be a semantically equivalent. 11020b57cec5SDimitry Andric // 11030b57cec5SDimitry Andric // TODO(jingyue): do some range analysis to keep as many inbounds as 11040b57cec5SDimitry Andric // possible. GEPs with inbounds are more friendly to alias analysis. 1105*0fca6ea1SDimitry Andric // TODO(gep_nowrap): Preserve nuw at least. 11060b57cec5SDimitry Andric bool GEPWasInBounds = GEP->isInBounds(); 1107*0fca6ea1SDimitry Andric GEP->setNoWrapFlags(GEPNoWrapFlags::none()); 11080b57cec5SDimitry Andric 11090b57cec5SDimitry Andric // Lowers a GEP to either GEPs with a single index or arithmetic operations. 11100b57cec5SDimitry Andric if (LowerGEP) { 11110b57cec5SDimitry Andric // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to 11120b57cec5SDimitry Andric // arithmetic operations if the target uses alias analysis in codegen. 111306c3fb27SDimitry Andric // Additionally, pointers that aren't integral (and so can't be safely 111406c3fb27SDimitry Andric // converted to integers) or those whose offset size is different from their 111506c3fb27SDimitry Andric // pointer size (which means that doing integer arithmetic on them could 111606c3fb27SDimitry Andric // affect that data) can't be lowered in this way. 111706c3fb27SDimitry Andric unsigned AddrSpace = GEP->getPointerAddressSpace(); 111806c3fb27SDimitry Andric bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) != 111906c3fb27SDimitry Andric DL->getIndexSizeInBits(AddrSpace); 112006c3fb27SDimitry Andric if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) || 112106c3fb27SDimitry Andric PointerHasExtraData) 11220b57cec5SDimitry Andric lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); 11230b57cec5SDimitry Andric else 11240b57cec5SDimitry Andric lowerToArithmetics(GEP, AccumulativeByteOffset); 11250b57cec5SDimitry Andric return true; 11260b57cec5SDimitry Andric } 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric // No need to create another GEP if the accumulative byte offset is 0. 11290b57cec5SDimitry Andric if (AccumulativeByteOffset == 0) 11300b57cec5SDimitry Andric return true; 11310b57cec5SDimitry Andric 11320b57cec5SDimitry Andric // Offsets the base with the accumulative byte offset. 11330b57cec5SDimitry Andric // 11340b57cec5SDimitry Andric // %gep ; the base 11350b57cec5SDimitry Andric // ... %gep ... 11360b57cec5SDimitry Andric // 11370b57cec5SDimitry Andric // => add the offset 11380b57cec5SDimitry Andric // 11390b57cec5SDimitry Andric // %gep2 ; clone of %gep 1140297eecfbSDimitry Andric // %new.gep = gep i8, %gep2, %offset 11410b57cec5SDimitry Andric // %gep ; will be removed 11420b57cec5SDimitry Andric // ... %gep ... 11430b57cec5SDimitry Andric // 11440b57cec5SDimitry Andric // => replace all uses of %gep with %new.gep and remove %gep 11450b57cec5SDimitry Andric // 11460b57cec5SDimitry Andric // %gep2 ; clone of %gep 1147297eecfbSDimitry Andric // %new.gep = gep i8, %gep2, %offset 11480b57cec5SDimitry Andric // ... %new.gep ... 11490b57cec5SDimitry Andric Instruction *NewGEP = GEP->clone(); 11500b57cec5SDimitry Andric NewGEP->insertBefore(GEP); 11510b57cec5SDimitry Andric 115206c3fb27SDimitry Andric Type *PtrIdxTy = DL->getIndexType(GEP->getType()); 1153bdd1243dSDimitry Andric IRBuilder<> Builder(GEP); 11547a6dacacSDimitry Andric NewGEP = cast<Instruction>(Builder.CreatePtrAdd( 11557a6dacacSDimitry Andric NewGEP, ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true), 1156297eecfbSDimitry Andric GEP->getName(), GEPWasInBounds)); 11570b57cec5SDimitry Andric NewGEP->copyMetadata(*GEP); 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric GEP->replaceAllUsesWith(NewGEP); 11600b57cec5SDimitry Andric GEP->eraseFromParent(); 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric return true; 11630b57cec5SDimitry Andric } 11640b57cec5SDimitry Andric 1165e8d8bef9SDimitry Andric bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) { 11660b57cec5SDimitry Andric if (skipFunction(F)) 11670b57cec5SDimitry Andric return false; 1168e8d8bef9SDimitry Andric auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1169e8d8bef9SDimitry Andric auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1170e8d8bef9SDimitry Andric auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1171e8d8bef9SDimitry Andric auto GetTTI = [this](Function &F) -> TargetTransformInfo & { 1172e8d8bef9SDimitry Andric return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 1173e8d8bef9SDimitry Andric }; 117406c3fb27SDimitry Andric SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP); 1175e8d8bef9SDimitry Andric return Impl.run(F); 1176e8d8bef9SDimitry Andric } 11770b57cec5SDimitry Andric 1178e8d8bef9SDimitry Andric bool SeparateConstOffsetFromGEP::run(Function &F) { 11790b57cec5SDimitry Andric if (DisableSeparateConstOffsetFromGEP) 11800b57cec5SDimitry Andric return false; 11810b57cec5SDimitry Andric 1182*0fca6ea1SDimitry Andric DL = &F.getDataLayout(); 11830b57cec5SDimitry Andric bool Changed = false; 11840b57cec5SDimitry Andric for (BasicBlock &B : F) { 1185349cc55cSDimitry Andric if (!DT->isReachableFromEntry(&B)) 1186349cc55cSDimitry Andric continue; 1187349cc55cSDimitry Andric 1188349cc55cSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(B)) 1189349cc55cSDimitry Andric if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) 11900b57cec5SDimitry Andric Changed |= splitGEP(GEP); 11910b57cec5SDimitry Andric // No need to split GEP ConstantExprs because all its indices are constant 11920b57cec5SDimitry Andric // already. 11930b57cec5SDimitry Andric } 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric Changed |= reuniteExts(F); 11960b57cec5SDimitry Andric 11970b57cec5SDimitry Andric if (VerifyNoDeadCode) 11980b57cec5SDimitry Andric verifyNoDeadCode(F); 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric return Changed; 12010b57cec5SDimitry Andric } 12020b57cec5SDimitry Andric 12030b57cec5SDimitry Andric Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator( 120406c3fb27SDimitry Andric ExprKey Key, Instruction *Dominatee, 120506c3fb27SDimitry Andric DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) { 12060b57cec5SDimitry Andric auto Pos = DominatingExprs.find(Key); 12070b57cec5SDimitry Andric if (Pos == DominatingExprs.end()) 12080b57cec5SDimitry Andric return nullptr; 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric auto &Candidates = Pos->second; 12110b57cec5SDimitry Andric // Because we process the basic blocks in pre-order of the dominator tree, a 12120b57cec5SDimitry Andric // candidate that doesn't dominate the current instruction won't dominate any 12130b57cec5SDimitry Andric // future instruction either. Therefore, we pop it out of the stack. This 12140b57cec5SDimitry Andric // optimization makes the algorithm O(n). 12150b57cec5SDimitry Andric while (!Candidates.empty()) { 12160b57cec5SDimitry Andric Instruction *Candidate = Candidates.back(); 12170b57cec5SDimitry Andric if (DT->dominates(Candidate, Dominatee)) 12180b57cec5SDimitry Andric return Candidate; 12190b57cec5SDimitry Andric Candidates.pop_back(); 12200b57cec5SDimitry Andric } 12210b57cec5SDimitry Andric return nullptr; 12220b57cec5SDimitry Andric } 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { 122506c3fb27SDimitry Andric if (!I->getType()->isIntOrIntVectorTy()) 12260b57cec5SDimitry Andric return false; 12270b57cec5SDimitry Andric 12280b57cec5SDimitry Andric // Dom: LHS+RHS 12290b57cec5SDimitry Andric // I: sext(LHS)+sext(RHS) 12300b57cec5SDimitry Andric // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom). 12310b57cec5SDimitry Andric // TODO: handle zext 12320b57cec5SDimitry Andric Value *LHS = nullptr, *RHS = nullptr; 12335ffd83dbSDimitry Andric if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { 12340b57cec5SDimitry Andric if (LHS->getType() == RHS->getType()) { 123506c3fb27SDimitry Andric ExprKey Key = createNormalizedCommutablePair(LHS, RHS); 12365ffd83dbSDimitry Andric if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) { 1237*0fca6ea1SDimitry Andric Instruction *NewSExt = 1238*0fca6ea1SDimitry Andric new SExtInst(Dom, I->getType(), "", I->getIterator()); 12395ffd83dbSDimitry Andric NewSExt->takeName(I); 12405ffd83dbSDimitry Andric I->replaceAllUsesWith(NewSExt); 1241*0fca6ea1SDimitry Andric NewSExt->setDebugLoc(I->getDebugLoc()); 12425ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(I); 12435ffd83dbSDimitry Andric return true; 12445ffd83dbSDimitry Andric } 12455ffd83dbSDimitry Andric } 12465ffd83dbSDimitry Andric } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { 12475ffd83dbSDimitry Andric if (LHS->getType() == RHS->getType()) { 124806c3fb27SDimitry Andric if (auto *Dom = 124906c3fb27SDimitry Andric findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) { 1250*0fca6ea1SDimitry Andric Instruction *NewSExt = 1251*0fca6ea1SDimitry Andric new SExtInst(Dom, I->getType(), "", I->getIterator()); 12520b57cec5SDimitry Andric NewSExt->takeName(I); 12530b57cec5SDimitry Andric I->replaceAllUsesWith(NewSExt); 1254*0fca6ea1SDimitry Andric NewSExt->setDebugLoc(I->getDebugLoc()); 12550b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(I); 12560b57cec5SDimitry Andric return true; 12570b57cec5SDimitry Andric } 12580b57cec5SDimitry Andric } 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric 12610b57cec5SDimitry Andric // Add I to DominatingExprs if it's an add/sub that can't sign overflow. 12625ffd83dbSDimitry Andric if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) { 12635ffd83dbSDimitry Andric if (programUndefinedIfPoison(I)) { 126406c3fb27SDimitry Andric ExprKey Key = createNormalizedCommutablePair(LHS, RHS); 12655ffd83dbSDimitry Andric DominatingAdds[Key].push_back(I); 12665ffd83dbSDimitry Andric } 12675ffd83dbSDimitry Andric } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { 126806c3fb27SDimitry Andric if (programUndefinedIfPoison(I)) 126906c3fb27SDimitry Andric DominatingSubs[{LHS, RHS}].push_back(I); 12700b57cec5SDimitry Andric } 12710b57cec5SDimitry Andric return false; 12720b57cec5SDimitry Andric } 12730b57cec5SDimitry Andric 12740b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) { 12750b57cec5SDimitry Andric bool Changed = false; 12765ffd83dbSDimitry Andric DominatingAdds.clear(); 12775ffd83dbSDimitry Andric DominatingSubs.clear(); 12780b57cec5SDimitry Andric for (const auto Node : depth_first(DT)) { 12790b57cec5SDimitry Andric BasicBlock *BB = Node->getBlock(); 1280349cc55cSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(*BB)) 1281349cc55cSDimitry Andric Changed |= reuniteExts(&I); 12820b57cec5SDimitry Andric } 12830b57cec5SDimitry Andric return Changed; 12840b57cec5SDimitry Andric } 12850b57cec5SDimitry Andric 12860b57cec5SDimitry Andric void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) { 12870b57cec5SDimitry Andric for (BasicBlock &B : F) { 12880b57cec5SDimitry Andric for (Instruction &I : B) { 12890b57cec5SDimitry Andric if (isInstructionTriviallyDead(&I)) { 12900b57cec5SDimitry Andric std::string ErrMessage; 12910b57cec5SDimitry Andric raw_string_ostream RSO(ErrMessage); 12920b57cec5SDimitry Andric RSO << "Dead instruction detected!\n" << I << "\n"; 12930b57cec5SDimitry Andric llvm_unreachable(RSO.str().c_str()); 12940b57cec5SDimitry Andric } 12950b57cec5SDimitry Andric } 12960b57cec5SDimitry Andric } 12970b57cec5SDimitry Andric } 12980b57cec5SDimitry Andric 12990b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::isLegalToSwapOperand( 13000b57cec5SDimitry Andric GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) { 13010b57cec5SDimitry Andric if (!FirstGEP || !FirstGEP->hasOneUse()) 13020b57cec5SDimitry Andric return false; 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent()) 13050b57cec5SDimitry Andric return false; 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric if (FirstGEP == SecondGEP) 13080b57cec5SDimitry Andric return false; 13090b57cec5SDimitry Andric 13100b57cec5SDimitry Andric unsigned FirstNum = FirstGEP->getNumOperands(); 13110b57cec5SDimitry Andric unsigned SecondNum = SecondGEP->getNumOperands(); 13120b57cec5SDimitry Andric // Give up if the number of operands are not 2. 13130b57cec5SDimitry Andric if (FirstNum != SecondNum || FirstNum != 2) 13140b57cec5SDimitry Andric return false; 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric Value *FirstBase = FirstGEP->getOperand(0); 13170b57cec5SDimitry Andric Value *SecondBase = SecondGEP->getOperand(0); 13180b57cec5SDimitry Andric Value *FirstOffset = FirstGEP->getOperand(1); 13190b57cec5SDimitry Andric // Give up if the index of the first GEP is loop invariant. 13200b57cec5SDimitry Andric if (CurLoop->isLoopInvariant(FirstOffset)) 13210b57cec5SDimitry Andric return false; 13220b57cec5SDimitry Andric 13230b57cec5SDimitry Andric // Give up if base doesn't have same type. 13240b57cec5SDimitry Andric if (FirstBase->getType() != SecondBase->getType()) 13250b57cec5SDimitry Andric return false; 13260b57cec5SDimitry Andric 13270b57cec5SDimitry Andric Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset); 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric // Check if the second operand of first GEP has constant coefficient. 13300b57cec5SDimitry Andric // For an example, for the following code, we won't gain anything by 13310b57cec5SDimitry Andric // hoisting the second GEP out because the second GEP can be folded away. 13320b57cec5SDimitry Andric // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256 13330b57cec5SDimitry Andric // %67 = shl i64 %scevgep.sum.ur159, 2 13340b57cec5SDimitry Andric // %uglygep160 = getelementptr i8* %65, i64 %67 13350b57cec5SDimitry Andric // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024 13360b57cec5SDimitry Andric 13370b57cec5SDimitry Andric // Skip constant shift instruction which may be generated by Splitting GEPs. 13380b57cec5SDimitry Andric if (FirstOffsetDef && FirstOffsetDef->isShift() && 13390b57cec5SDimitry Andric isa<ConstantInt>(FirstOffsetDef->getOperand(1))) 13400b57cec5SDimitry Andric FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0)); 13410b57cec5SDimitry Andric 13420b57cec5SDimitry Andric // Give up if FirstOffsetDef is an Add or Sub with constant. 13430b57cec5SDimitry Andric // Because it may not profitable at all due to constant folding. 13440b57cec5SDimitry Andric if (FirstOffsetDef) 13450b57cec5SDimitry Andric if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) { 13460b57cec5SDimitry Andric unsigned opc = BO->getOpcode(); 13470b57cec5SDimitry Andric if ((opc == Instruction::Add || opc == Instruction::Sub) && 13480b57cec5SDimitry Andric (isa<ConstantInt>(BO->getOperand(0)) || 13490b57cec5SDimitry Andric isa<ConstantInt>(BO->getOperand(1)))) 13500b57cec5SDimitry Andric return false; 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric return true; 13530b57cec5SDimitry Andric } 13540b57cec5SDimitry Andric 13550b57cec5SDimitry Andric bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) { 13560b57cec5SDimitry Andric int UsesInLoop = 0; 13570b57cec5SDimitry Andric for (User *U : V->users()) { 13580b57cec5SDimitry Andric if (Instruction *User = dyn_cast<Instruction>(U)) 13590b57cec5SDimitry Andric if (L->contains(User)) 13600b57cec5SDimitry Andric if (++UsesInLoop > 1) 13610b57cec5SDimitry Andric return true; 13620b57cec5SDimitry Andric } 13630b57cec5SDimitry Andric return false; 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First, 13670b57cec5SDimitry Andric GetElementPtrInst *Second) { 13680b57cec5SDimitry Andric Value *Offset1 = First->getOperand(1); 13690b57cec5SDimitry Andric Value *Offset2 = Second->getOperand(1); 13700b57cec5SDimitry Andric First->setOperand(1, Offset2); 13710b57cec5SDimitry Andric Second->setOperand(1, Offset1); 13720b57cec5SDimitry Andric 13730b57cec5SDimitry Andric // We changed p+o+c to p+c+o, p+c may not be inbound anymore. 1374*0fca6ea1SDimitry Andric const DataLayout &DAL = First->getDataLayout(); 13750b57cec5SDimitry Andric APInt Offset(DAL.getIndexSizeInBits( 13760b57cec5SDimitry Andric cast<PointerType>(First->getType())->getAddressSpace()), 13770b57cec5SDimitry Andric 0); 13780b57cec5SDimitry Andric Value *NewBase = 13790b57cec5SDimitry Andric First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset); 13800b57cec5SDimitry Andric uint64_t ObjectSize; 13810b57cec5SDimitry Andric if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) || 13820b57cec5SDimitry Andric Offset.ugt(ObjectSize)) { 1383*0fca6ea1SDimitry Andric // TODO(gep_nowrap): Make flag preservation more precise. 1384*0fca6ea1SDimitry Andric First->setNoWrapFlags(GEPNoWrapFlags::none()); 1385*0fca6ea1SDimitry Andric Second->setNoWrapFlags(GEPNoWrapFlags::none()); 13860b57cec5SDimitry Andric } else 13870b57cec5SDimitry Andric First->setIsInBounds(true); 13880b57cec5SDimitry Andric } 1389e8d8bef9SDimitry Andric 139006c3fb27SDimitry Andric void SeparateConstOffsetFromGEPPass::printPipeline( 139106c3fb27SDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 139206c3fb27SDimitry Andric static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> *>(this) 139306c3fb27SDimitry Andric ->printPipeline(OS, MapClassName2PassName); 139406c3fb27SDimitry Andric OS << '<'; 139506c3fb27SDimitry Andric if (LowerGEP) 139606c3fb27SDimitry Andric OS << "lower-gep"; 139706c3fb27SDimitry Andric OS << '>'; 139806c3fb27SDimitry Andric } 139906c3fb27SDimitry Andric 1400e8d8bef9SDimitry Andric PreservedAnalyses 1401e8d8bef9SDimitry Andric SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) { 1402e8d8bef9SDimitry Andric auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1403e8d8bef9SDimitry Andric auto *LI = &AM.getResult<LoopAnalysis>(F); 1404e8d8bef9SDimitry Andric auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 1405e8d8bef9SDimitry Andric auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & { 1406e8d8bef9SDimitry Andric return AM.getResult<TargetIRAnalysis>(F); 1407e8d8bef9SDimitry Andric }; 140806c3fb27SDimitry Andric SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP); 1409e8d8bef9SDimitry Andric if (!Impl.run(F)) 1410e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 1411e8d8bef9SDimitry Andric PreservedAnalyses PA; 1412e8d8bef9SDimitry Andric PA.preserveSet<CFGAnalyses>(); 1413e8d8bef9SDimitry Andric return PA; 1414e8d8bef9SDimitry Andric } 1415