10b57cec5SDimitry Andric //===- GlobalSplit.cpp - global variable splitter -------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass uses inrange annotations on GEP indices to split globals where 100b57cec5SDimitry Andric // beneficial. Clang currently attaches these annotations to references to 110b57cec5SDimitry Andric // virtual table globals under the Itanium ABI for the benefit of the 120b57cec5SDimitry Andric // whole-program virtual call optimization and control flow integrity passes. 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "llvm/Transforms/IPO/GlobalSplit.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 180b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 190b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 200b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 210b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 220b57cec5SDimitry Andric #include "llvm/IR/Function.h" 230b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 240b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 250b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 260b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 270b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 280b57cec5SDimitry Andric #include "llvm/IR/Module.h" 290b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 300b57cec5SDimitry Andric #include "llvm/IR/Type.h" 310b57cec5SDimitry Andric #include "llvm/IR/User.h" 320b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 330b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h" 340b57cec5SDimitry Andric #include <cstdint> 350b57cec5SDimitry Andric #include <vector> 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric using namespace llvm; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric static bool splitGlobal(GlobalVariable &GV) { 400b57cec5SDimitry Andric // If the address of the global is taken outside of the module, we cannot 410b57cec5SDimitry Andric // apply this transformation. 420b57cec5SDimitry Andric if (!GV.hasLocalLinkage()) 430b57cec5SDimitry Andric return false; 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric // We currently only know how to split ConstantStructs. 460b57cec5SDimitry Andric auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer()); 470b57cec5SDimitry Andric if (!Init) 480b57cec5SDimitry Andric return false; 490b57cec5SDimitry Andric 50*0fca6ea1SDimitry Andric const DataLayout &DL = GV.getDataLayout(); 51*0fca6ea1SDimitry Andric const StructLayout *SL = DL.getStructLayout(Init->getType()); 52*0fca6ea1SDimitry Andric ArrayRef<TypeSize> MemberOffsets = SL->getMemberOffsets(); 53*0fca6ea1SDimitry Andric unsigned IndexWidth = DL.getIndexTypeSizeInBits(GV.getType()); 54*0fca6ea1SDimitry Andric 55*0fca6ea1SDimitry Andric // Verify that each user of the global is an inrange getelementptr constant, 56*0fca6ea1SDimitry Andric // and collect information on how it relates to the global. 57*0fca6ea1SDimitry Andric struct GEPInfo { 58*0fca6ea1SDimitry Andric GEPOperator *GEP; 59*0fca6ea1SDimitry Andric unsigned MemberIndex; 60*0fca6ea1SDimitry Andric APInt MemberRelativeOffset; 61*0fca6ea1SDimitry Andric 62*0fca6ea1SDimitry Andric GEPInfo(GEPOperator *GEP, unsigned MemberIndex, APInt MemberRelativeOffset) 63*0fca6ea1SDimitry Andric : GEP(GEP), MemberIndex(MemberIndex), 64*0fca6ea1SDimitry Andric MemberRelativeOffset(std::move(MemberRelativeOffset)) {} 65*0fca6ea1SDimitry Andric }; 66*0fca6ea1SDimitry Andric SmallVector<GEPInfo> Infos; 670b57cec5SDimitry Andric for (User *U : GV.users()) { 68*0fca6ea1SDimitry Andric auto *GEP = dyn_cast<GEPOperator>(U); 69*0fca6ea1SDimitry Andric if (!GEP) 700b57cec5SDimitry Andric return false; 710b57cec5SDimitry Andric 72*0fca6ea1SDimitry Andric std::optional<ConstantRange> InRange = GEP->getInRange(); 73*0fca6ea1SDimitry Andric if (!InRange) 740b57cec5SDimitry Andric return false; 75*0fca6ea1SDimitry Andric 76*0fca6ea1SDimitry Andric APInt Offset(IndexWidth, 0); 77*0fca6ea1SDimitry Andric if (!GEP->accumulateConstantOffset(DL, Offset)) 78*0fca6ea1SDimitry Andric return false; 79*0fca6ea1SDimitry Andric 80*0fca6ea1SDimitry Andric // Determine source-relative inrange. 81*0fca6ea1SDimitry Andric ConstantRange SrcInRange = InRange->sextOrTrunc(IndexWidth).add(Offset); 82*0fca6ea1SDimitry Andric 83*0fca6ea1SDimitry Andric // Check that the GEP offset is in the range (treating upper bound as 84*0fca6ea1SDimitry Andric // inclusive here). 85*0fca6ea1SDimitry Andric if (!SrcInRange.contains(Offset) && SrcInRange.getUpper() != Offset) 86*0fca6ea1SDimitry Andric return false; 87*0fca6ea1SDimitry Andric 88*0fca6ea1SDimitry Andric // Find which struct member the range corresponds to. 89*0fca6ea1SDimitry Andric if (SrcInRange.getLower().uge(SL->getSizeInBytes())) 90*0fca6ea1SDimitry Andric return false; 91*0fca6ea1SDimitry Andric 92*0fca6ea1SDimitry Andric unsigned MemberIndex = 93*0fca6ea1SDimitry Andric SL->getElementContainingOffset(SrcInRange.getLower().getZExtValue()); 94*0fca6ea1SDimitry Andric TypeSize MemberStart = MemberOffsets[MemberIndex]; 95*0fca6ea1SDimitry Andric TypeSize MemberEnd = MemberIndex == MemberOffsets.size() - 1 96*0fca6ea1SDimitry Andric ? SL->getSizeInBytes() 97*0fca6ea1SDimitry Andric : MemberOffsets[MemberIndex + 1]; 98*0fca6ea1SDimitry Andric 99*0fca6ea1SDimitry Andric // Verify that the range matches that struct member. 100*0fca6ea1SDimitry Andric if (SrcInRange.getLower() != MemberStart || 101*0fca6ea1SDimitry Andric SrcInRange.getUpper() != MemberEnd) 102*0fca6ea1SDimitry Andric return false; 103*0fca6ea1SDimitry Andric 104*0fca6ea1SDimitry Andric Infos.emplace_back(GEP, MemberIndex, Offset - MemberStart); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric SmallVector<MDNode *, 2> Types; 1080b57cec5SDimitry Andric GV.getMetadata(LLVMContext::MD_type, Types); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext()); 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands()); 1130b57cec5SDimitry Andric for (unsigned I = 0; I != Init->getNumOperands(); ++I) { 1140b57cec5SDimitry Andric // Build a global representing this split piece. 1150b57cec5SDimitry Andric auto *SplitGV = 1160b57cec5SDimitry Andric new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(), 1170b57cec5SDimitry Andric GV.isConstant(), GlobalValue::PrivateLinkage, 1180b57cec5SDimitry Andric Init->getOperand(I), GV.getName() + "." + utostr(I)); 1190b57cec5SDimitry Andric SplitGlobals[I] = SplitGV; 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric unsigned SplitBegin = SL->getElementOffset(I); 1220b57cec5SDimitry Andric unsigned SplitEnd = (I == Init->getNumOperands() - 1) 1230b57cec5SDimitry Andric ? SL->getSizeInBytes() 1240b57cec5SDimitry Andric : SL->getElementOffset(I + 1); 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric // Rebuild type metadata, adjusting by the split offset. 1270b57cec5SDimitry Andric // FIXME: See if we can use DW_OP_piece to preserve debug metadata here. 1280b57cec5SDimitry Andric for (MDNode *Type : Types) { 1290b57cec5SDimitry Andric uint64_t ByteOffset = cast<ConstantInt>( 1300b57cec5SDimitry Andric cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) 1310b57cec5SDimitry Andric ->getZExtValue(); 1320b57cec5SDimitry Andric // Type metadata may be attached one byte after the end of the vtable, for 1330b57cec5SDimitry Andric // classes without virtual methods in Itanium ABI. AFAIK, it is never 1340b57cec5SDimitry Andric // attached to the first byte of a vtable. Subtract one to get the right 1350b57cec5SDimitry Andric // slice. 1360b57cec5SDimitry Andric // This is making an assumption that vtable groups are the only kinds of 1370b57cec5SDimitry Andric // global variables that !type metadata can be attached to, and that they 1380b57cec5SDimitry Andric // are either Itanium ABI vtable groups or contain a single vtable (i.e. 1390b57cec5SDimitry Andric // Microsoft ABI vtables). 1400b57cec5SDimitry Andric uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1; 1410b57cec5SDimitry Andric if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd) 1420b57cec5SDimitry Andric continue; 1430b57cec5SDimitry Andric SplitGV->addMetadata( 1440b57cec5SDimitry Andric LLVMContext::MD_type, 1450b57cec5SDimitry Andric *MDNode::get(GV.getContext(), 1460b57cec5SDimitry Andric {ConstantAsMetadata::get( 1470b57cec5SDimitry Andric ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), 1480b57cec5SDimitry Andric Type->getOperand(1)})); 1490b57cec5SDimitry Andric } 1505ffd83dbSDimitry Andric 1515ffd83dbSDimitry Andric if (GV.hasMetadata(LLVMContext::MD_vcall_visibility)) 1525ffd83dbSDimitry Andric SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility()); 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric 155*0fca6ea1SDimitry Andric for (const GEPInfo &Info : Infos) { 156*0fca6ea1SDimitry Andric assert(Info.MemberIndex < SplitGlobals.size() && "Invalid member"); 1570b57cec5SDimitry Andric auto *NewGEP = ConstantExpr::getGetElementPtr( 158*0fca6ea1SDimitry Andric Type::getInt8Ty(GV.getContext()), SplitGlobals[Info.MemberIndex], 159*0fca6ea1SDimitry Andric ConstantInt::get(GV.getContext(), Info.MemberRelativeOffset), 160*0fca6ea1SDimitry Andric Info.GEP->isInBounds()); 161*0fca6ea1SDimitry Andric Info.GEP->replaceAllUsesWith(NewGEP); 1620b57cec5SDimitry Andric } 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric // Finally, remove the original global. Any remaining uses refer to invalid 16581ad6265SDimitry Andric // elements of the global, so replace with poison. 1660b57cec5SDimitry Andric if (!GV.use_empty()) 16781ad6265SDimitry Andric GV.replaceAllUsesWith(PoisonValue::get(GV.getType())); 1680b57cec5SDimitry Andric GV.eraseFromParent(); 1690b57cec5SDimitry Andric return true; 1700b57cec5SDimitry Andric } 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric static bool splitGlobals(Module &M) { 1730b57cec5SDimitry Andric // First, see if the module uses either of the llvm.type.test or 1740b57cec5SDimitry Andric // llvm.type.checked.load intrinsics, which indicates that splitting globals 1750b57cec5SDimitry Andric // may be beneficial. 1760b57cec5SDimitry Andric Function *TypeTestFunc = 1770b57cec5SDimitry Andric M.getFunction(Intrinsic::getName(Intrinsic::type_test)); 1780b57cec5SDimitry Andric Function *TypeCheckedLoadFunc = 1790b57cec5SDimitry Andric M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); 18006c3fb27SDimitry Andric Function *TypeCheckedLoadRelativeFunc = 18106c3fb27SDimitry Andric M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative)); 1820b57cec5SDimitry Andric if ((!TypeTestFunc || TypeTestFunc->use_empty()) && 18306c3fb27SDimitry Andric (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) && 18406c3fb27SDimitry Andric (!TypeCheckedLoadRelativeFunc || 18506c3fb27SDimitry Andric TypeCheckedLoadRelativeFunc->use_empty())) 1860b57cec5SDimitry Andric return false; 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric bool Changed = false; 189349cc55cSDimitry Andric for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) 1900b57cec5SDimitry Andric Changed |= splitGlobal(GV); 1910b57cec5SDimitry Andric return Changed; 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) { 1950b57cec5SDimitry Andric if (!splitGlobals(M)) 1960b57cec5SDimitry Andric return PreservedAnalyses::all(); 1970b57cec5SDimitry Andric return PreservedAnalyses::none(); 1980b57cec5SDimitry Andric } 199