xref: /llvm-project/llvm/lib/Transforms/IPO/GlobalSplit.cpp (revision 98ea1a81a28a6dd36941456c8ab4ce46f665f57a)
1 //===- GlobalSplit.cpp - global variable splitter -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass uses inrange annotations on GEP indices to split globals where
10 // beneficial. Clang currently attaches these annotations to references to
11 // virtual table globals under the Itanium ABI for the benefit of the
12 // whole-program virtual call optimization and control flow integrity passes.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/IPO/GlobalSplit.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/IR/Constant.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/GlobalVariable.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Metadata.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/Operator.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/IR/User.h"
32 #include "llvm/Support/Casting.h"
33 #include <cstdint>
34 #include <vector>
35 
36 using namespace llvm;
37 
38 static bool splitGlobal(GlobalVariable &GV) {
39   // If the address of the global is taken outside of the module, we cannot
40   // apply this transformation.
41   if (!GV.hasLocalLinkage())
42     return false;
43 
44   // We currently only know how to split ConstantStructs.
45   auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer());
46   if (!Init)
47     return false;
48 
49   const DataLayout &DL = GV.getDataLayout();
50   const StructLayout *SL = DL.getStructLayout(Init->getType());
51   ArrayRef<TypeSize> MemberOffsets = SL->getMemberOffsets();
52   unsigned IndexWidth = DL.getIndexTypeSizeInBits(GV.getType());
53 
54   // Verify that each user of the global is an inrange getelementptr constant,
55   // and collect information on how it relates to the global.
56   struct GEPInfo {
57     GEPOperator *GEP;
58     unsigned MemberIndex;
59     APInt MemberRelativeOffset;
60 
61     GEPInfo(GEPOperator *GEP, unsigned MemberIndex, APInt MemberRelativeOffset)
62         : GEP(GEP), MemberIndex(MemberIndex),
63           MemberRelativeOffset(std::move(MemberRelativeOffset)) {}
64   };
65   SmallVector<GEPInfo> Infos;
66   for (User *U : GV.users()) {
67     auto *GEP = dyn_cast<GEPOperator>(U);
68     if (!GEP)
69       return false;
70 
71     std::optional<ConstantRange> InRange = GEP->getInRange();
72     if (!InRange)
73       return false;
74 
75     APInt Offset(IndexWidth, 0);
76     if (!GEP->accumulateConstantOffset(DL, Offset))
77       return false;
78 
79     // Determine source-relative inrange.
80     ConstantRange SrcInRange = InRange->sextOrTrunc(IndexWidth).add(Offset);
81 
82     // Check that the GEP offset is in the range (treating upper bound as
83     // inclusive here).
84     if (!SrcInRange.contains(Offset) && SrcInRange.getUpper() != Offset)
85       return false;
86 
87     // Find which struct member the range corresponds to.
88     if (SrcInRange.getLower().uge(SL->getSizeInBytes()))
89       return false;
90 
91     unsigned MemberIndex =
92         SL->getElementContainingOffset(SrcInRange.getLower().getZExtValue());
93     TypeSize MemberStart = MemberOffsets[MemberIndex];
94     TypeSize MemberEnd = MemberIndex == MemberOffsets.size() - 1
95                              ? SL->getSizeInBytes()
96                              : MemberOffsets[MemberIndex + 1];
97 
98     // Verify that the range matches that struct member.
99     if (SrcInRange.getLower() != MemberStart ||
100         SrcInRange.getUpper() != MemberEnd)
101       return false;
102 
103     Infos.emplace_back(GEP, MemberIndex, Offset - MemberStart);
104   }
105 
106   SmallVector<MDNode *, 2> Types;
107   GV.getMetadata(LLVMContext::MD_type, Types);
108 
109   IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext());
110 
111   std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands());
112   for (unsigned I = 0; I != Init->getNumOperands(); ++I) {
113     // Build a global representing this split piece.
114     auto *SplitGV =
115         new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(),
116                            GV.isConstant(), GlobalValue::PrivateLinkage,
117                            Init->getOperand(I), GV.getName() + "." + utostr(I));
118     SplitGlobals[I] = SplitGV;
119 
120     unsigned SplitBegin = SL->getElementOffset(I);
121     unsigned SplitEnd = (I == Init->getNumOperands() - 1)
122                             ? SL->getSizeInBytes()
123                             : SL->getElementOffset(I + 1);
124 
125     // Rebuild type metadata, adjusting by the split offset.
126     // FIXME: See if we can use DW_OP_piece to preserve debug metadata here.
127     for (MDNode *Type : Types) {
128       uint64_t ByteOffset = cast<ConstantInt>(
129               cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
130               ->getZExtValue();
131       // Type metadata may be attached one byte after the end of the vtable, for
132       // classes without virtual methods in Itanium ABI. AFAIK, it is never
133       // attached to the first byte of a vtable. Subtract one to get the right
134       // slice.
135       // This is making an assumption that vtable groups are the only kinds of
136       // global variables that !type metadata can be attached to, and that they
137       // are either Itanium ABI vtable groups or contain a single vtable (i.e.
138       // Microsoft ABI vtables).
139       uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
140       if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
141         continue;
142       SplitGV->addMetadata(
143           LLVMContext::MD_type,
144           *MDNode::get(GV.getContext(),
145                        {ConstantAsMetadata::get(
146                             ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)),
147                         Type->getOperand(1)}));
148     }
149 
150     if (GV.hasMetadata(LLVMContext::MD_vcall_visibility))
151       SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility());
152   }
153 
154   for (const GEPInfo &Info : Infos) {
155     assert(Info.MemberIndex < SplitGlobals.size() && "Invalid member");
156     auto *NewGEP = ConstantExpr::getGetElementPtr(
157         Type::getInt8Ty(GV.getContext()), SplitGlobals[Info.MemberIndex],
158         ConstantInt::get(GV.getContext(), Info.MemberRelativeOffset),
159         Info.GEP->isInBounds());
160     Info.GEP->replaceAllUsesWith(NewGEP);
161   }
162 
163   // Finally, remove the original global. Any remaining uses refer to invalid
164   // elements of the global, so replace with poison.
165   if (!GV.use_empty())
166     GV.replaceAllUsesWith(PoisonValue::get(GV.getType()));
167   GV.eraseFromParent();
168   return true;
169 }
170 
171 static bool splitGlobals(Module &M) {
172   // First, see if the module uses either of the llvm.type.test or
173   // llvm.type.checked.load intrinsics, which indicates that splitting globals
174   // may be beneficial.
175   Function *TypeTestFunc =
176       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
177   Function *TypeCheckedLoadFunc =
178       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
179   Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
180       &M, Intrinsic::type_checked_load_relative);
181   if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
182       (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
183       (!TypeCheckedLoadRelativeFunc ||
184        TypeCheckedLoadRelativeFunc->use_empty()))
185     return false;
186 
187   bool Changed = false;
188   for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals()))
189     Changed |= splitGlobal(GV);
190   return Changed;
191 }
192 
193 PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) {
194   if (!splitGlobals(M))
195     return PreservedAnalyses::all();
196   return PreservedAnalyses::none();
197 }
198