1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass transforms simple global variables that never have their address
10 // taken. If obviously true, it marks read/write globals as constant, deletes
11 // variables only stored to, etc.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Transforms/IPO/GlobalOpt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/ConstantFolding.h"
25 #include "llvm/Analysis/MemoryBuiltins.h"
26 #include "llvm/Analysis/TargetLibraryInfo.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/IR/Attributes.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DebugInfoMetadata.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GetElementPtrTypeIterator.h"
40 #include "llvm/IR/GlobalAlias.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/GlobalVariable.h"
43 #include "llvm/IR/IRBuilder.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Operator.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/User.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/AtomicOrdering.h"
58 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/MathExtras.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/IPO.h"
65 #include "llvm/Transforms/Utils/CtorUtils.h"
66 #include "llvm/Transforms/Utils/Evaluator.h"
67 #include "llvm/Transforms/Utils/GlobalStatus.h"
68 #include "llvm/Transforms/Utils/Local.h"
69 #include <cassert>
70 #include <cstdint>
71 #include <utility>
72 #include <vector>
73
74 using namespace llvm;
75
76 #define DEBUG_TYPE "globalopt"
77
78 STATISTIC(NumMarked , "Number of globals marked constant");
79 STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
80 STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
81 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
82 STATISTIC(NumDeleted , "Number of globals deleted");
83 STATISTIC(NumGlobUses , "Number of global uses devirtualized");
84 STATISTIC(NumLocalized , "Number of globals localized");
85 STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
86 STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
87 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
88 STATISTIC(NumNestRemoved , "Number of nest attributes removed");
89 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
90 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
91 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
92 STATISTIC(NumInternalFunc, "Number of internal functions");
93 STATISTIC(NumColdCC, "Number of functions marked coldcc");
94
95 static cl::opt<bool>
96 EnableColdCCStressTest("enable-coldcc-stress-test",
97 cl::desc("Enable stress test of coldcc by adding "
98 "calling conv to all internal functions."),
99 cl::init(false), cl::Hidden);
100
101 static cl::opt<int> ColdCCRelFreq(
102 "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
103 cl::desc(
104 "Maximum block frequency, expressed as a percentage of caller's "
105 "entry frequency, for a call site to be considered cold for enabling"
106 "coldcc"));
107
108 /// Is this global variable possibly used by a leak checker as a root? If so,
109 /// we might not really want to eliminate the stores to it.
isLeakCheckerRoot(GlobalVariable * GV)110 static bool isLeakCheckerRoot(GlobalVariable *GV) {
111 // A global variable is a root if it is a pointer, or could plausibly contain
112 // a pointer. There are two challenges; one is that we could have a struct
113 // the has an inner member which is a pointer. We recurse through the type to
114 // detect these (up to a point). The other is that we may actually be a union
115 // of a pointer and another type, and so our LLVM type is an integer which
116 // gets converted into a pointer, or our type is an [i8 x #] with a pointer
117 // potentially contained here.
118
119 if (GV->hasPrivateLinkage())
120 return false;
121
122 SmallVector<Type *, 4> Types;
123 Types.push_back(GV->getValueType());
124
125 unsigned Limit = 20;
126 do {
127 Type *Ty = Types.pop_back_val();
128 switch (Ty->getTypeID()) {
129 default: break;
130 case Type::PointerTyID:
131 return true;
132 case Type::FixedVectorTyID:
133 case Type::ScalableVectorTyID:
134 if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
135 return true;
136 break;
137 case Type::ArrayTyID:
138 Types.push_back(cast<ArrayType>(Ty)->getElementType());
139 break;
140 case Type::StructTyID: {
141 StructType *STy = cast<StructType>(Ty);
142 if (STy->isOpaque()) return true;
143 for (StructType::element_iterator I = STy->element_begin(),
144 E = STy->element_end(); I != E; ++I) {
145 Type *InnerTy = *I;
146 if (isa<PointerType>(InnerTy)) return true;
147 if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
148 isa<VectorType>(InnerTy))
149 Types.push_back(InnerTy);
150 }
151 break;
152 }
153 }
154 if (--Limit == 0) return true;
155 } while (!Types.empty());
156 return false;
157 }
158
159 /// Given a value that is stored to a global but never read, determine whether
160 /// it's safe to remove the store and the chain of computation that feeds the
161 /// store.
IsSafeComputationToRemove(Value * V,function_ref<TargetLibraryInfo & (Function &)> GetTLI)162 static bool IsSafeComputationToRemove(
163 Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
164 do {
165 if (isa<Constant>(V))
166 return true;
167 if (!V->hasOneUse())
168 return false;
169 if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
170 isa<GlobalValue>(V))
171 return false;
172 if (isAllocationFn(V, GetTLI))
173 return true;
174
175 Instruction *I = cast<Instruction>(V);
176 if (I->mayHaveSideEffects())
177 return false;
178 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
179 if (!GEP->hasAllConstantIndices())
180 return false;
181 } else if (I->getNumOperands() != 1) {
182 return false;
183 }
184
185 V = I->getOperand(0);
186 } while (true);
187 }
188
189 /// This GV is a pointer root. Loop over all users of the global and clean up
190 /// any that obviously don't assign the global a value that isn't dynamically
191 /// allocated.
192 static bool
CleanupPointerRootUsers(GlobalVariable * GV,function_ref<TargetLibraryInfo & (Function &)> GetTLI)193 CleanupPointerRootUsers(GlobalVariable *GV,
194 function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
195 // A brief explanation of leak checkers. The goal is to find bugs where
196 // pointers are forgotten, causing an accumulating growth in memory
197 // usage over time. The common strategy for leak checkers is to explicitly
198 // allow the memory pointed to by globals at exit. This is popular because it
199 // also solves another problem where the main thread of a C++ program may shut
200 // down before other threads that are still expecting to use those globals. To
201 // handle that case, we expect the program may create a singleton and never
202 // destroy it.
203
204 bool Changed = false;
205
206 // If Dead[n].first is the only use of a malloc result, we can delete its
207 // chain of computation and the store to the global in Dead[n].second.
208 SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
209
210 // Constants can't be pointers to dynamically allocated memory.
211 for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
212 UI != E;) {
213 User *U = *UI++;
214 if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
215 Value *V = SI->getValueOperand();
216 if (isa<Constant>(V)) {
217 Changed = true;
218 SI->eraseFromParent();
219 } else if (Instruction *I = dyn_cast<Instruction>(V)) {
220 if (I->hasOneUse())
221 Dead.push_back(std::make_pair(I, SI));
222 }
223 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
224 if (isa<Constant>(MSI->getValue())) {
225 Changed = true;
226 MSI->eraseFromParent();
227 } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
228 if (I->hasOneUse())
229 Dead.push_back(std::make_pair(I, MSI));
230 }
231 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
232 GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
233 if (MemSrc && MemSrc->isConstant()) {
234 Changed = true;
235 MTI->eraseFromParent();
236 } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
237 if (I->hasOneUse())
238 Dead.push_back(std::make_pair(I, MTI));
239 }
240 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
241 if (CE->use_empty()) {
242 CE->destroyConstant();
243 Changed = true;
244 }
245 } else if (Constant *C = dyn_cast<Constant>(U)) {
246 if (isSafeToDestroyConstant(C)) {
247 C->destroyConstant();
248 // This could have invalidated UI, start over from scratch.
249 Dead.clear();
250 CleanupPointerRootUsers(GV, GetTLI);
251 return true;
252 }
253 }
254 }
255
256 for (int i = 0, e = Dead.size(); i != e; ++i) {
257 if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
258 Dead[i].second->eraseFromParent();
259 Instruction *I = Dead[i].first;
260 do {
261 if (isAllocationFn(I, GetTLI))
262 break;
263 Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
264 if (!J)
265 break;
266 I->eraseFromParent();
267 I = J;
268 } while (true);
269 I->eraseFromParent();
270 Changed = true;
271 }
272 }
273
274 return Changed;
275 }
276
277 /// We just marked GV constant. Loop over all users of the global, cleaning up
278 /// the obvious ones. This is largely just a quick scan over the use list to
279 /// clean up the easy and obvious cruft. This returns true if it made a change.
CleanupConstantGlobalUsers(Value * V,Constant * Init,const DataLayout & DL,function_ref<TargetLibraryInfo & (Function &)> GetTLI)280 static bool CleanupConstantGlobalUsers(
281 Value *V, Constant *Init, const DataLayout &DL,
282 function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
283 bool Changed = false;
284 // Note that we need to use a weak value handle for the worklist items. When
285 // we delete a constant array, we may also be holding pointer to one of its
286 // elements (or an element of one of its elements if we're dealing with an
287 // array of arrays) in the worklist.
288 SmallVector<WeakTrackingVH, 8> WorkList(V->users());
289 while (!WorkList.empty()) {
290 Value *UV = WorkList.pop_back_val();
291 if (!UV)
292 continue;
293
294 User *U = cast<User>(UV);
295
296 if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
297 if (Init) {
298 if (auto *Casted =
299 ConstantFoldLoadThroughBitcast(Init, LI->getType(), DL)) {
300 // Replace the load with the initializer.
301 LI->replaceAllUsesWith(Casted);
302 LI->eraseFromParent();
303 Changed = true;
304 }
305 }
306 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
307 // Store must be unreachable or storing Init into the global.
308 SI->eraseFromParent();
309 Changed = true;
310 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
311 if (CE->getOpcode() == Instruction::GetElementPtr) {
312 Constant *SubInit = nullptr;
313 if (Init)
314 SubInit = ConstantFoldLoadThroughGEPConstantExpr(
315 Init, CE, V->getType()->getPointerElementType(), DL);
316 Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
317 } else if ((CE->getOpcode() == Instruction::BitCast &&
318 CE->getType()->isPointerTy()) ||
319 CE->getOpcode() == Instruction::AddrSpaceCast) {
320 // Pointer cast, delete any stores and memsets to the global.
321 Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
322 }
323
324 if (CE->use_empty()) {
325 CE->destroyConstant();
326 Changed = true;
327 }
328 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
329 // Do not transform "gepinst (gep constexpr (GV))" here, because forming
330 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
331 // and will invalidate our notion of what Init is.
332 Constant *SubInit = nullptr;
333 if (!isa<ConstantExpr>(GEP->getOperand(0))) {
334 ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
335 ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
336 if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
337 SubInit = ConstantFoldLoadThroughGEPConstantExpr(
338 Init, CE, V->getType()->getPointerElementType(), DL);
339
340 // If the initializer is an all-null value and we have an inbounds GEP,
341 // we already know what the result of any load from that GEP is.
342 // TODO: Handle splats.
343 if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
344 SubInit = Constant::getNullValue(GEP->getResultElementType());
345 }
346 Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
347
348 if (GEP->use_empty()) {
349 GEP->eraseFromParent();
350 Changed = true;
351 }
352 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
353 if (MI->getRawDest() == V) {
354 MI->eraseFromParent();
355 Changed = true;
356 }
357
358 } else if (Constant *C = dyn_cast<Constant>(U)) {
359 // If we have a chain of dead constantexprs or other things dangling from
360 // us, and if they are all dead, nuke them without remorse.
361 if (isSafeToDestroyConstant(C)) {
362 C->destroyConstant();
363 CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
364 return true;
365 }
366 }
367 }
368 return Changed;
369 }
370
371 static bool isSafeSROAElementUse(Value *V);
372
373 /// Return true if the specified GEP is a safe user of a derived
374 /// expression from a global that we want to SROA.
isSafeSROAGEP(User * U)375 static bool isSafeSROAGEP(User *U) {
376 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
377 // don't like < 3 operand CE's, and we don't like non-constant integer
378 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
379 // value of C.
380 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
381 !cast<Constant>(U->getOperand(1))->isNullValue())
382 return false;
383
384 gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
385 ++GEPI; // Skip over the pointer index.
386
387 // For all other level we require that the indices are constant and inrange.
388 // In particular, consider: A[0][i]. We cannot know that the user isn't doing
389 // invalid things like allowing i to index an out-of-range subscript that
390 // accesses A[1]. This can also happen between different members of a struct
391 // in llvm IR.
392 for (; GEPI != E; ++GEPI) {
393 if (GEPI.isStruct())
394 continue;
395
396 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
397 if (!IdxVal || (GEPI.isBoundedSequential() &&
398 IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
399 return false;
400 }
401
402 return llvm::all_of(U->users(),
403 [](User *UU) { return isSafeSROAElementUse(UU); });
404 }
405
406 /// Return true if the specified instruction is a safe user of a derived
407 /// expression from a global that we want to SROA.
isSafeSROAElementUse(Value * V)408 static bool isSafeSROAElementUse(Value *V) {
409 // We might have a dead and dangling constant hanging off of here.
410 if (Constant *C = dyn_cast<Constant>(V))
411 return isSafeToDestroyConstant(C);
412
413 Instruction *I = dyn_cast<Instruction>(V);
414 if (!I) return false;
415
416 // Loads are ok.
417 if (isa<LoadInst>(I)) return true;
418
419 // Stores *to* the pointer are ok.
420 if (StoreInst *SI = dyn_cast<StoreInst>(I))
421 return SI->getOperand(0) != V;
422
423 // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
424 return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
425 }
426
427 /// Look at all uses of the global and decide whether it is safe for us to
428 /// perform this transformation.
GlobalUsersSafeToSRA(GlobalValue * GV)429 static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
430 for (User *U : GV->users()) {
431 // The user of the global must be a GEP Inst or a ConstantExpr GEP.
432 if (!isa<GetElementPtrInst>(U) &&
433 (!isa<ConstantExpr>(U) ||
434 cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
435 return false;
436
437 // Check the gep and it's users are safe to SRA
438 if (!isSafeSROAGEP(U))
439 return false;
440 }
441
442 return true;
443 }
444
IsSRASequential(Type * T)445 static bool IsSRASequential(Type *T) {
446 return isa<ArrayType>(T) || isa<VectorType>(T);
447 }
GetSRASequentialNumElements(Type * T)448 static uint64_t GetSRASequentialNumElements(Type *T) {
449 if (ArrayType *AT = dyn_cast<ArrayType>(T))
450 return AT->getNumElements();
451 return cast<FixedVectorType>(T)->getNumElements();
452 }
GetSRASequentialElementType(Type * T)453 static Type *GetSRASequentialElementType(Type *T) {
454 if (ArrayType *AT = dyn_cast<ArrayType>(T))
455 return AT->getElementType();
456 return cast<VectorType>(T)->getElementType();
457 }
CanDoGlobalSRA(GlobalVariable * GV)458 static bool CanDoGlobalSRA(GlobalVariable *GV) {
459 Constant *Init = GV->getInitializer();
460
461 if (isa<StructType>(Init->getType())) {
462 // nothing to check
463 } else if (IsSRASequential(Init->getType())) {
464 if (GetSRASequentialNumElements(Init->getType()) > 16 &&
465 GV->hasNUsesOrMore(16))
466 return false; // It's not worth it.
467 } else
468 return false;
469
470 return GlobalUsersSafeToSRA(GV);
471 }
472
473 /// Copy over the debug info for a variable to its SRA replacements.
transferSRADebugInfo(GlobalVariable * GV,GlobalVariable * NGV,uint64_t FragmentOffsetInBits,uint64_t FragmentSizeInBits,uint64_t VarSize)474 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
475 uint64_t FragmentOffsetInBits,
476 uint64_t FragmentSizeInBits,
477 uint64_t VarSize) {
478 SmallVector<DIGlobalVariableExpression *, 1> GVs;
479 GV->getDebugInfo(GVs);
480 for (auto *GVE : GVs) {
481 DIVariable *Var = GVE->getVariable();
482 DIExpression *Expr = GVE->getExpression();
483 // If the FragmentSize is smaller than the variable,
484 // emit a fragment expression.
485 if (FragmentSizeInBits < VarSize) {
486 if (auto E = DIExpression::createFragmentExpression(
487 Expr, FragmentOffsetInBits, FragmentSizeInBits))
488 Expr = *E;
489 else
490 return;
491 }
492 auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
493 NGV->addDebugInfo(NGVE);
494 }
495 }
496
497 /// Perform scalar replacement of aggregates on the specified global variable.
498 /// This opens the door for other optimizations by exposing the behavior of the
499 /// program in a more fine-grained way. We have determined that this
500 /// transformation is safe already. We return the first global variable we
501 /// insert so that the caller can reprocess it.
SRAGlobal(GlobalVariable * GV,const DataLayout & DL)502 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
503 // Make sure this global only has simple uses that we can SRA.
504 if (!CanDoGlobalSRA(GV))
505 return nullptr;
506
507 assert(GV->hasLocalLinkage());
508 Constant *Init = GV->getInitializer();
509 Type *Ty = Init->getType();
510 uint64_t VarSize = DL.getTypeSizeInBits(Ty);
511
512 std::map<unsigned, GlobalVariable *> NewGlobals;
513
514 // Get the alignment of the global, either explicit or target-specific.
515 Align StartAlignment =
516 DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType());
517
518 // Loop over all users and create replacement variables for used aggregate
519 // elements.
520 for (User *GEP : GV->users()) {
521 assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode() ==
522 Instruction::GetElementPtr) ||
523 isa<GetElementPtrInst>(GEP)) &&
524 "NonGEP CE's are not SRAable!");
525
526 // Ignore the 1th operand, which has to be zero or else the program is quite
527 // broken (undefined). Get the 2nd operand, which is the structure or array
528 // index.
529 unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
530 if (NewGlobals.count(ElementIdx) == 1)
531 continue; // we`ve already created replacement variable
532 assert(NewGlobals.count(ElementIdx) == 0);
533
534 Type *ElTy = nullptr;
535 if (StructType *STy = dyn_cast<StructType>(Ty))
536 ElTy = STy->getElementType(ElementIdx);
537 else
538 ElTy = GetSRASequentialElementType(Ty);
539 assert(ElTy);
540
541 Constant *In = Init->getAggregateElement(ElementIdx);
542 assert(In && "Couldn't get element of initializer?");
543
544 GlobalVariable *NGV = new GlobalVariable(
545 ElTy, false, GlobalVariable::InternalLinkage, In,
546 GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(),
547 GV->getType()->getAddressSpace());
548 NGV->setExternallyInitialized(GV->isExternallyInitialized());
549 NGV->copyAttributesFrom(GV);
550 NewGlobals.insert(std::make_pair(ElementIdx, NGV));
551
552 if (StructType *STy = dyn_cast<StructType>(Ty)) {
553 const StructLayout &Layout = *DL.getStructLayout(STy);
554
555 // Calculate the known alignment of the field. If the original aggregate
556 // had 256 byte alignment for example, something might depend on that:
557 // propagate info to each field.
558 uint64_t FieldOffset = Layout.getElementOffset(ElementIdx);
559 Align NewAlign = commonAlignment(StartAlignment, FieldOffset);
560 if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx)))
561 NGV->setAlignment(NewAlign);
562
563 // Copy over the debug info for the variable.
564 uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
565 uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);
566 transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize);
567 } else {
568 uint64_t EltSize = DL.getTypeAllocSize(ElTy);
569 Align EltAlign = DL.getABITypeAlign(ElTy);
570 uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
571
572 // Calculate the known alignment of the field. If the original aggregate
573 // had 256 byte alignment for example, something might depend on that:
574 // propagate info to each field.
575 Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx);
576 if (NewAlign > EltAlign)
577 NGV->setAlignment(NewAlign);
578 transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
579 FragmentSizeInBits, VarSize);
580 }
581 }
582
583 if (NewGlobals.empty())
584 return nullptr;
585
586 Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
587 for (auto NewGlobalVar : NewGlobals)
588 Globals.push_back(NewGlobalVar.second);
589
590 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
591
592 Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
593
594 // Loop over all of the uses of the global, replacing the constantexpr geps,
595 // with smaller constantexpr geps or direct references.
596 while (!GV->use_empty()) {
597 User *GEP = GV->user_back();
598 assert(((isa<ConstantExpr>(GEP) &&
599 cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
600 isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
601
602 // Ignore the 1th operand, which has to be zero or else the program is quite
603 // broken (undefined). Get the 2nd operand, which is the structure or array
604 // index.
605 unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
606 assert(NewGlobals.count(ElementIdx) == 1);
607
608 Value *NewPtr = NewGlobals[ElementIdx];
609 Type *NewTy = NewGlobals[ElementIdx]->getValueType();
610
611 // Form a shorter GEP if needed.
612 if (GEP->getNumOperands() > 3) {
613 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
614 SmallVector<Constant*, 8> Idxs;
615 Idxs.push_back(NullInt);
616 for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
617 Idxs.push_back(CE->getOperand(i));
618 NewPtr =
619 ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
620 } else {
621 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
622 SmallVector<Value*, 8> Idxs;
623 Idxs.push_back(NullInt);
624 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
625 Idxs.push_back(GEPI->getOperand(i));
626 NewPtr = GetElementPtrInst::Create(
627 NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx),
628 GEPI);
629 }
630 }
631 GEP->replaceAllUsesWith(NewPtr);
632
633 // We changed the pointer of any memory access user. Recalculate alignments.
634 for (User *U : NewPtr->users()) {
635 if (auto *Load = dyn_cast<LoadInst>(U)) {
636 Align PrefAlign = DL.getPrefTypeAlign(Load->getType());
637 Align NewAlign = getOrEnforceKnownAlignment(Load->getPointerOperand(),
638 PrefAlign, DL, Load);
639 Load->setAlignment(NewAlign);
640 }
641 if (auto *Store = dyn_cast<StoreInst>(U)) {
642 Align PrefAlign =
643 DL.getPrefTypeAlign(Store->getValueOperand()->getType());
644 Align NewAlign = getOrEnforceKnownAlignment(Store->getPointerOperand(),
645 PrefAlign, DL, Store);
646 Store->setAlignment(NewAlign);
647 }
648 }
649
650 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
651 GEPI->eraseFromParent();
652 else
653 cast<ConstantExpr>(GEP)->destroyConstant();
654 }
655
656 // Delete the old global, now that it is dead.
657 Globals.erase(GV);
658 ++NumSRA;
659
660 assert(NewGlobals.size() > 0);
661 return NewGlobals.begin()->second;
662 }
663
664 /// Return true if all users of the specified value will trap if the value is
665 /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
666 /// reprocessing them.
AllUsesOfValueWillTrapIfNull(const Value * V,SmallPtrSetImpl<const PHINode * > & PHIs)667 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
668 SmallPtrSetImpl<const PHINode*> &PHIs) {
669 for (const User *U : V->users()) {
670 if (const Instruction *I = dyn_cast<Instruction>(U)) {
671 // If null pointer is considered valid, then all uses are non-trapping.
672 // Non address-space 0 globals have already been pruned by the caller.
673 if (NullPointerIsDefined(I->getFunction()))
674 return false;
675 }
676 if (isa<LoadInst>(U)) {
677 // Will trap.
678 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
679 if (SI->getOperand(0) == V) {
680 //cerr << "NONTRAPPING USE: " << *U;
681 return false; // Storing the value.
682 }
683 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
684 if (CI->getCalledOperand() != V) {
685 //cerr << "NONTRAPPING USE: " << *U;
686 return false; // Not calling the ptr
687 }
688 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
689 if (II->getCalledOperand() != V) {
690 //cerr << "NONTRAPPING USE: " << *U;
691 return false; // Not calling the ptr
692 }
693 } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
694 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
695 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
696 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
697 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
698 // If we've already seen this phi node, ignore it, it has already been
699 // checked.
700 if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
701 return false;
702 } else {
703 //cerr << "NONTRAPPING USE: " << *U;
704 return false;
705 }
706 }
707 return true;
708 }
709
710 /// Return true if all uses of any loads from GV will trap if the loaded value
711 /// is null. Note that this also permits comparisons of the loaded value
712 /// against null, as a special case.
AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable * GV)713 static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
714 for (const User *U : GV->users())
715 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
716 SmallPtrSet<const PHINode*, 8> PHIs;
717 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
718 return false;
719 } else if (isa<StoreInst>(U)) {
720 // Ignore stores to the global.
721 } else {
722 // We don't know or understand this user, bail out.
723 //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
724 return false;
725 }
726 return true;
727 }
728
OptimizeAwayTrappingUsesOfValue(Value * V,Constant * NewV)729 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
730 bool Changed = false;
731 for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
732 Instruction *I = cast<Instruction>(*UI++);
733 // Uses are non-trapping if null pointer is considered valid.
734 // Non address-space 0 globals are already pruned by the caller.
735 if (NullPointerIsDefined(I->getFunction()))
736 return false;
737 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
738 LI->setOperand(0, NewV);
739 Changed = true;
740 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
741 if (SI->getOperand(1) == V) {
742 SI->setOperand(1, NewV);
743 Changed = true;
744 }
745 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
746 CallBase *CB = cast<CallBase>(I);
747 if (CB->getCalledOperand() == V) {
748 // Calling through the pointer! Turn into a direct call, but be careful
749 // that the pointer is not also being passed as an argument.
750 CB->setCalledOperand(NewV);
751 Changed = true;
752 bool PassedAsArg = false;
753 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
754 if (CB->getArgOperand(i) == V) {
755 PassedAsArg = true;
756 CB->setArgOperand(i, NewV);
757 }
758
759 if (PassedAsArg) {
760 // Being passed as an argument also. Be careful to not invalidate UI!
761 UI = V->user_begin();
762 }
763 }
764 } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
765 Changed |= OptimizeAwayTrappingUsesOfValue(CI,
766 ConstantExpr::getCast(CI->getOpcode(),
767 NewV, CI->getType()));
768 if (CI->use_empty()) {
769 Changed = true;
770 CI->eraseFromParent();
771 }
772 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
773 // Should handle GEP here.
774 SmallVector<Constant*, 8> Idxs;
775 Idxs.reserve(GEPI->getNumOperands()-1);
776 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
777 i != e; ++i)
778 if (Constant *C = dyn_cast<Constant>(*i))
779 Idxs.push_back(C);
780 else
781 break;
782 if (Idxs.size() == GEPI->getNumOperands()-1)
783 Changed |= OptimizeAwayTrappingUsesOfValue(
784 GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
785 NewV, Idxs));
786 if (GEPI->use_empty()) {
787 Changed = true;
788 GEPI->eraseFromParent();
789 }
790 }
791 }
792
793 return Changed;
794 }
795
796 /// The specified global has only one non-null value stored into it. If there
797 /// are uses of the loaded value that would trap if the loaded value is
798 /// dynamically null, then we know that they cannot be reachable with a null
799 /// optimize away the load.
OptimizeAwayTrappingUsesOfLoads(GlobalVariable * GV,Constant * LV,const DataLayout & DL,function_ref<TargetLibraryInfo & (Function &)> GetTLI)800 static bool OptimizeAwayTrappingUsesOfLoads(
801 GlobalVariable *GV, Constant *LV, const DataLayout &DL,
802 function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
803 bool Changed = false;
804
805 // Keep track of whether we are able to remove all the uses of the global
806 // other than the store that defines it.
807 bool AllNonStoreUsesGone = true;
808
809 // Replace all uses of loads with uses of uses of the stored value.
810 for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
811 User *GlobalUser = *GUI++;
812 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
813 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
814 // If we were able to delete all uses of the loads
815 if (LI->use_empty()) {
816 LI->eraseFromParent();
817 Changed = true;
818 } else {
819 AllNonStoreUsesGone = false;
820 }
821 } else if (isa<StoreInst>(GlobalUser)) {
822 // Ignore the store that stores "LV" to the global.
823 assert(GlobalUser->getOperand(1) == GV &&
824 "Must be storing *to* the global");
825 } else {
826 AllNonStoreUsesGone = false;
827
828 // If we get here we could have other crazy uses that are transitively
829 // loaded.
830 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
831 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
832 isa<BitCastInst>(GlobalUser) ||
833 isa<GetElementPtrInst>(GlobalUser)) &&
834 "Only expect load and stores!");
835 }
836 }
837
838 if (Changed) {
839 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
840 << "\n");
841 ++NumGlobUses;
842 }
843
844 // If we nuked all of the loads, then none of the stores are needed either,
845 // nor is the global.
846 if (AllNonStoreUsesGone) {
847 if (isLeakCheckerRoot(GV)) {
848 Changed |= CleanupPointerRootUsers(GV, GetTLI);
849 } else {
850 Changed = true;
851 CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
852 }
853 if (GV->use_empty()) {
854 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
855 Changed = true;
856 GV->eraseFromParent();
857 ++NumDeleted;
858 }
859 }
860 return Changed;
861 }
862
863 /// Walk the use list of V, constant folding all of the instructions that are
864 /// foldable.
ConstantPropUsersOf(Value * V,const DataLayout & DL,TargetLibraryInfo * TLI)865 static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
866 TargetLibraryInfo *TLI) {
867 for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
868 if (Instruction *I = dyn_cast<Instruction>(*UI++))
869 if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
870 I->replaceAllUsesWith(NewC);
871
872 // Advance UI to the next non-I use to avoid invalidating it!
873 // Instructions could multiply use V.
874 while (UI != E && *UI == I)
875 ++UI;
876 if (isInstructionTriviallyDead(I, TLI))
877 I->eraseFromParent();
878 }
879 }
880
881 /// This function takes the specified global variable, and transforms the
882 /// program as if it always contained the result of the specified malloc.
883 /// Because it is always the result of the specified malloc, there is no reason
884 /// to actually DO the malloc. Instead, turn the malloc into a global, and any
885 /// loads of GV as uses of the new global.
886 static GlobalVariable *
OptimizeGlobalAddressOfMalloc(GlobalVariable * GV,CallInst * CI,Type * AllocTy,ConstantInt * NElements,const DataLayout & DL,TargetLibraryInfo * TLI)887 OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
888 ConstantInt *NElements, const DataLayout &DL,
889 TargetLibraryInfo *TLI) {
890 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI
891 << '\n');
892
893 Type *GlobalType;
894 if (NElements->getZExtValue() == 1)
895 GlobalType = AllocTy;
896 else
897 // If we have an array allocation, the global variable is of an array.
898 GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
899
900 // Create the new global variable. The contents of the malloc'd memory is
901 // undefined, so initialize with an undef value.
902 GlobalVariable *NewGV = new GlobalVariable(
903 *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
904 UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
905 GV->getThreadLocalMode());
906
907 // If there are bitcast users of the malloc (which is typical, usually we have
908 // a malloc + bitcast) then replace them with uses of the new global. Update
909 // other users to use the global as well.
910 BitCastInst *TheBC = nullptr;
911 while (!CI->use_empty()) {
912 Instruction *User = cast<Instruction>(CI->user_back());
913 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
914 if (BCI->getType() == NewGV->getType()) {
915 BCI->replaceAllUsesWith(NewGV);
916 BCI->eraseFromParent();
917 } else {
918 BCI->setOperand(0, NewGV);
919 }
920 } else {
921 if (!TheBC)
922 TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
923 User->replaceUsesOfWith(CI, TheBC);
924 }
925 }
926
927 Constant *RepValue = NewGV;
928 if (NewGV->getType() != GV->getValueType())
929 RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
930
931 // If there is a comparison against null, we will insert a global bool to
932 // keep track of whether the global was initialized yet or not.
933 GlobalVariable *InitBool =
934 new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
935 GlobalValue::InternalLinkage,
936 ConstantInt::getFalse(GV->getContext()),
937 GV->getName()+".init", GV->getThreadLocalMode());
938 bool InitBoolUsed = false;
939
940 // Loop over all uses of GV, processing them in turn.
941 while (!GV->use_empty()) {
942 if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
943 // The global is initialized when the store to it occurs.
944 new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false,
945 Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI);
946 SI->eraseFromParent();
947 continue;
948 }
949
950 LoadInst *LI = cast<LoadInst>(GV->user_back());
951 while (!LI->use_empty()) {
952 Use &LoadUse = *LI->use_begin();
953 ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
954 if (!ICI) {
955 LoadUse = RepValue;
956 continue;
957 }
958
959 // Replace the cmp X, 0 with a use of the bool value.
960 // Sink the load to where the compare was, if atomic rules allow us to.
961 Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
962 InitBool->getName() + ".val", false, Align(1),
963 LI->getOrdering(), LI->getSyncScopeID(),
964 LI->isUnordered() ? (Instruction *)ICI : LI);
965 InitBoolUsed = true;
966 switch (ICI->getPredicate()) {
967 default: llvm_unreachable("Unknown ICmp Predicate!");
968 case ICmpInst::ICMP_ULT:
969 case ICmpInst::ICMP_SLT: // X < null -> always false
970 LV = ConstantInt::getFalse(GV->getContext());
971 break;
972 case ICmpInst::ICMP_ULE:
973 case ICmpInst::ICMP_SLE:
974 case ICmpInst::ICMP_EQ:
975 LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
976 break;
977 case ICmpInst::ICMP_NE:
978 case ICmpInst::ICMP_UGE:
979 case ICmpInst::ICMP_SGE:
980 case ICmpInst::ICMP_UGT:
981 case ICmpInst::ICMP_SGT:
982 break; // no change.
983 }
984 ICI->replaceAllUsesWith(LV);
985 ICI->eraseFromParent();
986 }
987 LI->eraseFromParent();
988 }
989
990 // If the initialization boolean was used, insert it, otherwise delete it.
991 if (!InitBoolUsed) {
992 while (!InitBool->use_empty()) // Delete initializations
993 cast<StoreInst>(InitBool->user_back())->eraseFromParent();
994 delete InitBool;
995 } else
996 GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
997
998 // Now the GV is dead, nuke it and the malloc..
999 GV->eraseFromParent();
1000 CI->eraseFromParent();
1001
1002 // To further other optimizations, loop over all users of NewGV and try to
1003 // constant prop them. This will promote GEP instructions with constant
1004 // indices into GEP constant-exprs, which will allow global-opt to hack on it.
1005 ConstantPropUsersOf(NewGV, DL, TLI);
1006 if (RepValue != NewGV)
1007 ConstantPropUsersOf(RepValue, DL, TLI);
1008
1009 return NewGV;
1010 }
1011
1012 /// Scan the use-list of V checking to make sure that there are no complex uses
1013 /// of V. We permit simple things like dereferencing the pointer, but not
1014 /// storing through the address, unless it is to the specified global.
1015 static bool
valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction * V,const GlobalVariable * GV)1016 valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
1017 const GlobalVariable *GV) {
1018 for (const User *U : V->users()) {
1019 const Instruction *Inst = cast<Instruction>(U);
1020
1021 if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
1022 continue; // Fine, ignore.
1023 }
1024
1025 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1026 if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
1027 return false; // Storing the pointer itself... bad.
1028 continue; // Otherwise, storing through it, or storing into GV... fine.
1029 }
1030
1031 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
1032 if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV))
1033 return false;
1034 continue;
1035 }
1036
1037 return false;
1038 }
1039 return true;
1040 }
1041
1042 /// This function is called when we see a pointer global variable with a single
1043 /// value stored it that is a malloc or cast of malloc.
tryToOptimizeStoreOfMallocToGlobal(GlobalVariable * GV,CallInst * CI,Type * AllocTy,AtomicOrdering Ordering,const DataLayout & DL,TargetLibraryInfo * TLI)1044 static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
1045 Type *AllocTy,
1046 AtomicOrdering Ordering,
1047 const DataLayout &DL,
1048 TargetLibraryInfo *TLI) {
1049 // If this is a malloc of an abstract type, don't touch it.
1050 if (!AllocTy->isSized())
1051 return false;
1052
1053 // We can't optimize this global unless all uses of it are *known* to be
1054 // of the malloc value, not of the null initializer value (consider a use
1055 // that compares the global's value against zero to see if the malloc has
1056 // been reached). To do this, we check to see if all uses of the global
1057 // would trap if the global were null: this proves that they must all
1058 // happen after the malloc.
1059 if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1060 return false;
1061
1062 // We can't optimize this if the malloc itself is used in a complex way,
1063 // for example, being stored into multiple globals. This allows the
1064 // malloc to be stored into the specified global, loaded icmp'd.
1065 // These are all things we could transform to using the global for.
1066 if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
1067 return false;
1068
1069 // If we have a global that is only initialized with a fixed size malloc,
1070 // transform the program to use global memory instead of malloc'd memory.
1071 // This eliminates dynamic allocation, avoids an indirection accessing the
1072 // data, and exposes the resultant global to further GlobalOpt.
1073 // We cannot optimize the malloc if we cannot determine malloc array size.
1074 Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1075 if (!NElems)
1076 return false;
1077
1078 if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1079 // Restrict this transformation to only working on small allocations
1080 // (2048 bytes currently), as we don't want to introduce a 16M global or
1081 // something.
1082 if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
1083 OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1084 return true;
1085 }
1086
1087 return false;
1088 }
1089
1090 // Try to optimize globals based on the knowledge that only one value (besides
1091 // its initializer) is ever stored to the global.
1092 static bool
optimizeOnceStoredGlobal(GlobalVariable * GV,Value * StoredOnceVal,AtomicOrdering Ordering,const DataLayout & DL,function_ref<TargetLibraryInfo & (Function &)> GetTLI)1093 optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1094 AtomicOrdering Ordering, const DataLayout &DL,
1095 function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
1096 // Ignore no-op GEPs and bitcasts.
1097 StoredOnceVal = StoredOnceVal->stripPointerCasts();
1098
1099 // If we are dealing with a pointer global that is initialized to null and
1100 // only has one (non-null) value stored into it, then we can optimize any
1101 // users of the loaded value (often calls and loads) that would trap if the
1102 // value was null.
1103 if (GV->getInitializer()->getType()->isPointerTy() &&
1104 GV->getInitializer()->isNullValue() &&
1105 !NullPointerIsDefined(
1106 nullptr /* F */,
1107 GV->getInitializer()->getType()->getPointerAddressSpace())) {
1108 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1109 if (GV->getInitializer()->getType() != SOVC->getType())
1110 SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1111
1112 // Optimize away any trapping uses of the loaded value.
1113 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
1114 return true;
1115 } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) {
1116 auto *TLI = &GetTLI(*CI->getFunction());
1117 Type *MallocType = getMallocAllocatedType(CI, TLI);
1118 if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
1119 Ordering, DL, TLI))
1120 return true;
1121 }
1122 }
1123
1124 return false;
1125 }
1126
1127 /// At this point, we have learned that the only two values ever stored into GV
1128 /// are its initializer and OtherVal. See if we can shrink the global into a
1129 /// boolean and select between the two values whenever it is used. This exposes
1130 /// the values to other scalar optimizations.
TryToShrinkGlobalToBoolean(GlobalVariable * GV,Constant * OtherVal)1131 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1132 Type *GVElType = GV->getValueType();
1133
1134 // If GVElType is already i1, it is already shrunk. If the type of the GV is
1135 // an FP value, pointer or vector, don't do this optimization because a select
1136 // between them is very expensive and unlikely to lead to later
1137 // simplification. In these cases, we typically end up with "cond ? v1 : v2"
1138 // where v1 and v2 both require constant pool loads, a big loss.
1139 if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1140 GVElType->isFloatingPointTy() ||
1141 GVElType->isPointerTy() || GVElType->isVectorTy())
1142 return false;
1143
1144 // Walk the use list of the global seeing if all the uses are load or store.
1145 // If there is anything else, bail out.
1146 for (User *U : GV->users())
1147 if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1148 return false;
1149
1150 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
1151
1152 // Create the new global, initializing it to false.
1153 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1154 false,
1155 GlobalValue::InternalLinkage,
1156 ConstantInt::getFalse(GV->getContext()),
1157 GV->getName()+".b",
1158 GV->getThreadLocalMode(),
1159 GV->getType()->getAddressSpace());
1160 NewGV->copyAttributesFrom(GV);
1161 GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1162
1163 Constant *InitVal = GV->getInitializer();
1164 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1165 "No reason to shrink to bool!");
1166
1167 SmallVector<DIGlobalVariableExpression *, 1> GVs;
1168 GV->getDebugInfo(GVs);
1169
1170 // If initialized to zero and storing one into the global, we can use a cast
1171 // instead of a select to synthesize the desired value.
1172 bool IsOneZero = false;
1173 bool EmitOneOrZero = true;
1174 auto *CI = dyn_cast<ConstantInt>(OtherVal);
1175 if (CI && CI->getValue().getActiveBits() <= 64) {
1176 IsOneZero = InitVal->isNullValue() && CI->isOne();
1177
1178 auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
1179 if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
1180 uint64_t ValInit = CIInit->getZExtValue();
1181 uint64_t ValOther = CI->getZExtValue();
1182 uint64_t ValMinus = ValOther - ValInit;
1183
1184 for(auto *GVe : GVs){
1185 DIGlobalVariable *DGV = GVe->getVariable();
1186 DIExpression *E = GVe->getExpression();
1187 const DataLayout &DL = GV->getParent()->getDataLayout();
1188 unsigned SizeInOctets =
1189 DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8;
1190
1191 // It is expected that the address of global optimized variable is on
1192 // top of the stack. After optimization, value of that variable will
1193 // be ether 0 for initial value or 1 for other value. The following
1194 // expression should return constant integer value depending on the
1195 // value at global object address:
1196 // val * (ValOther - ValInit) + ValInit:
1197 // DW_OP_deref DW_OP_constu <ValMinus>
1198 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1199 SmallVector<uint64_t, 12> Ops = {
1200 dwarf::DW_OP_deref_size, SizeInOctets,
1201 dwarf::DW_OP_constu, ValMinus,
1202 dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1203 dwarf::DW_OP_plus};
1204 bool WithStackValue = true;
1205 E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1206 DIGlobalVariableExpression *DGVE =
1207 DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1208 NewGV->addDebugInfo(DGVE);
1209 }
1210 EmitOneOrZero = false;
1211 }
1212 }
1213
1214 if (EmitOneOrZero) {
1215 // FIXME: This will only emit address for debugger on which will
1216 // be written only 0 or 1.
1217 for(auto *GV : GVs)
1218 NewGV->addDebugInfo(GV);
1219 }
1220
1221 while (!GV->use_empty()) {
1222 Instruction *UI = cast<Instruction>(GV->user_back());
1223 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1224 // Change the store into a boolean store.
1225 bool StoringOther = SI->getOperand(0) == OtherVal;
1226 // Only do this if we weren't storing a loaded value.
1227 Value *StoreVal;
1228 if (StoringOther || SI->getOperand(0) == InitVal) {
1229 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1230 StoringOther);
1231 } else {
1232 // Otherwise, we are storing a previously loaded copy. To do this,
1233 // change the copy from copying the original value to just copying the
1234 // bool.
1235 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1236
1237 // If we've already replaced the input, StoredVal will be a cast or
1238 // select instruction. If not, it will be a load of the original
1239 // global.
1240 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1241 assert(LI->getOperand(0) == GV && "Not a copy!");
1242 // Insert a new load, to preserve the saved value.
1243 StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
1244 LI->getName() + ".b", false, Align(1),
1245 LI->getOrdering(), LI->getSyncScopeID(), LI);
1246 } else {
1247 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1248 "This is not a form that we understand!");
1249 StoreVal = StoredVal->getOperand(0);
1250 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1251 }
1252 }
1253 StoreInst *NSI =
1254 new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
1255 SI->getSyncScopeID(), SI);
1256 NSI->setDebugLoc(SI->getDebugLoc());
1257 } else {
1258 // Change the load into a load of bool then a select.
1259 LoadInst *LI = cast<LoadInst>(UI);
1260 LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
1261 LI->getName() + ".b", false, Align(1),
1262 LI->getOrdering(), LI->getSyncScopeID(), LI);
1263 Instruction *NSI;
1264 if (IsOneZero)
1265 NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1266 else
1267 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1268 NSI->takeName(LI);
1269 // Since LI is split into two instructions, NLI and NSI both inherit the
1270 // same DebugLoc
1271 NLI->setDebugLoc(LI->getDebugLoc());
1272 NSI->setDebugLoc(LI->getDebugLoc());
1273 LI->replaceAllUsesWith(NSI);
1274 }
1275 UI->eraseFromParent();
1276 }
1277
1278 // Retain the name of the old global variable. People who are debugging their
1279 // programs may expect these variables to be named the same.
1280 NewGV->takeName(GV);
1281 GV->eraseFromParent();
1282 return true;
1283 }
1284
deleteIfDead(GlobalValue & GV,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats)1285 static bool deleteIfDead(
1286 GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1287 GV.removeDeadConstantUsers();
1288
1289 if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1290 return false;
1291
1292 if (const Comdat *C = GV.getComdat())
1293 if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1294 return false;
1295
1296 bool Dead;
1297 if (auto *F = dyn_cast<Function>(&GV))
1298 Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1299 else
1300 Dead = GV.use_empty();
1301 if (!Dead)
1302 return false;
1303
1304 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1305 GV.eraseFromParent();
1306 ++NumDeleted;
1307 return true;
1308 }
1309
isPointerValueDeadOnEntryToFunction(const Function * F,GlobalValue * GV,function_ref<DominatorTree & (Function &)> LookupDomTree)1310 static bool isPointerValueDeadOnEntryToFunction(
1311 const Function *F, GlobalValue *GV,
1312 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1313 // Find all uses of GV. We expect them all to be in F, and if we can't
1314 // identify any of the uses we bail out.
1315 //
1316 // On each of these uses, identify if the memory that GV points to is
1317 // used/required/live at the start of the function. If it is not, for example
1318 // if the first thing the function does is store to the GV, the GV can
1319 // possibly be demoted.
1320 //
1321 // We don't do an exhaustive search for memory operations - simply look
1322 // through bitcasts as they're quite common and benign.
1323 const DataLayout &DL = GV->getParent()->getDataLayout();
1324 SmallVector<LoadInst *, 4> Loads;
1325 SmallVector<StoreInst *, 4> Stores;
1326 for (auto *U : GV->users()) {
1327 if (Operator::getOpcode(U) == Instruction::BitCast) {
1328 for (auto *UU : U->users()) {
1329 if (auto *LI = dyn_cast<LoadInst>(UU))
1330 Loads.push_back(LI);
1331 else if (auto *SI = dyn_cast<StoreInst>(UU))
1332 Stores.push_back(SI);
1333 else
1334 return false;
1335 }
1336 continue;
1337 }
1338
1339 Instruction *I = dyn_cast<Instruction>(U);
1340 if (!I)
1341 return false;
1342 assert(I->getParent()->getParent() == F);
1343
1344 if (auto *LI = dyn_cast<LoadInst>(I))
1345 Loads.push_back(LI);
1346 else if (auto *SI = dyn_cast<StoreInst>(I))
1347 Stores.push_back(SI);
1348 else
1349 return false;
1350 }
1351
1352 // We have identified all uses of GV into loads and stores. Now check if all
1353 // of them are known not to depend on the value of the global at the function
1354 // entry point. We do this by ensuring that every load is dominated by at
1355 // least one store.
1356 auto &DT = LookupDomTree(*const_cast<Function *>(F));
1357
1358 // The below check is quadratic. Check we're not going to do too many tests.
1359 // FIXME: Even though this will always have worst-case quadratic time, we
1360 // could put effort into minimizing the average time by putting stores that
1361 // have been shown to dominate at least one load at the beginning of the
1362 // Stores array, making subsequent dominance checks more likely to succeed
1363 // early.
1364 //
1365 // The threshold here is fairly large because global->local demotion is a
1366 // very powerful optimization should it fire.
1367 const unsigned Threshold = 100;
1368 if (Loads.size() * Stores.size() > Threshold)
1369 return false;
1370
1371 for (auto *L : Loads) {
1372 auto *LTy = L->getType();
1373 if (none_of(Stores, [&](const StoreInst *S) {
1374 auto *STy = S->getValueOperand()->getType();
1375 // The load is only dominated by the store if DomTree says so
1376 // and the number of bits loaded in L is less than or equal to
1377 // the number of bits stored in S.
1378 return DT.dominates(S, L) &&
1379 DL.getTypeStoreSize(LTy).getFixedSize() <=
1380 DL.getTypeStoreSize(STy).getFixedSize();
1381 }))
1382 return false;
1383 }
1384 // All loads have known dependences inside F, so the global can be localized.
1385 return true;
1386 }
1387
1388 /// C may have non-instruction users. Can all of those users be turned into
1389 /// instructions?
allNonInstructionUsersCanBeMadeInstructions(Constant * C)1390 static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
1391 // We don't do this exhaustively. The most common pattern that we really need
1392 // to care about is a constant GEP or constant bitcast - so just looking
1393 // through one single ConstantExpr.
1394 //
1395 // The set of constants that this function returns true for must be able to be
1396 // handled by makeAllConstantUsesInstructions.
1397 for (auto *U : C->users()) {
1398 if (isa<Instruction>(U))
1399 continue;
1400 if (!isa<ConstantExpr>(U))
1401 // Non instruction, non-constantexpr user; cannot convert this.
1402 return false;
1403 for (auto *UU : U->users())
1404 if (!isa<Instruction>(UU))
1405 // A constantexpr used by another constant. We don't try and recurse any
1406 // further but just bail out at this point.
1407 return false;
1408 }
1409
1410 return true;
1411 }
1412
1413 /// C may have non-instruction users, and
1414 /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1415 /// non-instruction users to instructions.
makeAllConstantUsesInstructions(Constant * C)1416 static void makeAllConstantUsesInstructions(Constant *C) {
1417 SmallVector<ConstantExpr*,4> Users;
1418 for (auto *U : C->users()) {
1419 if (isa<ConstantExpr>(U))
1420 Users.push_back(cast<ConstantExpr>(U));
1421 else
1422 // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1423 // should not have returned true for C.
1424 assert(
1425 isa<Instruction>(U) &&
1426 "Can't transform non-constantexpr non-instruction to instruction!");
1427 }
1428
1429 SmallVector<Value*,4> UUsers;
1430 for (auto *U : Users) {
1431 UUsers.clear();
1432 append_range(UUsers, U->users());
1433 for (auto *UU : UUsers) {
1434 Instruction *UI = cast<Instruction>(UU);
1435 Instruction *NewU = U->getAsInstruction();
1436 NewU->insertBefore(UI);
1437 UI->replaceUsesOfWith(U, NewU);
1438 }
1439 // We've replaced all the uses, so destroy the constant. (destroyConstant
1440 // will update value handles and metadata.)
1441 U->destroyConstant();
1442 }
1443 }
1444
1445 /// Analyze the specified global variable and optimize
1446 /// it if possible. If we make a change, return true.
1447 static bool
processInternalGlobal(GlobalVariable * GV,const GlobalStatus & GS,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<DominatorTree & (Function &)> LookupDomTree)1448 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
1449 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1450 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1451 auto &DL = GV->getParent()->getDataLayout();
1452 // If this is a first class global and has only one accessing function and
1453 // this function is non-recursive, we replace the global with a local alloca
1454 // in this function.
1455 //
1456 // NOTE: It doesn't make sense to promote non-single-value types since we
1457 // are just replacing static memory to stack memory.
1458 //
1459 // If the global is in different address space, don't bring it to stack.
1460 if (!GS.HasMultipleAccessingFunctions &&
1461 GS.AccessingFunction &&
1462 GV->getValueType()->isSingleValueType() &&
1463 GV->getType()->getAddressSpace() == 0 &&
1464 !GV->isExternallyInitialized() &&
1465 allNonInstructionUsersCanBeMadeInstructions(GV) &&
1466 GS.AccessingFunction->doesNotRecurse() &&
1467 isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1468 LookupDomTree)) {
1469 const DataLayout &DL = GV->getParent()->getDataLayout();
1470
1471 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1472 Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1473 ->getEntryBlock().begin());
1474 Type *ElemTy = GV->getValueType();
1475 // FIXME: Pass Global's alignment when globals have alignment
1476 AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1477 GV->getName(), &FirstI);
1478 if (!isa<UndefValue>(GV->getInitializer()))
1479 new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1480
1481 makeAllConstantUsesInstructions(GV);
1482
1483 GV->replaceAllUsesWith(Alloca);
1484 GV->eraseFromParent();
1485 ++NumLocalized;
1486 return true;
1487 }
1488
1489 bool Changed = false;
1490
1491 // If the global is never loaded (but may be stored to), it is dead.
1492 // Delete it now.
1493 if (!GS.IsLoaded) {
1494 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1495
1496 if (isLeakCheckerRoot(GV)) {
1497 // Delete any constant stores to the global.
1498 Changed = CleanupPointerRootUsers(GV, GetTLI);
1499 } else {
1500 // Delete any stores we can find to the global. We may not be able to
1501 // make it completely dead though.
1502 Changed =
1503 CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
1504 }
1505
1506 // If the global is dead now, delete it.
1507 if (GV->use_empty()) {
1508 GV->eraseFromParent();
1509 ++NumDeleted;
1510 Changed = true;
1511 }
1512 return Changed;
1513
1514 }
1515 if (GS.StoredType <= GlobalStatus::InitializerStored) {
1516 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1517
1518 // Don't actually mark a global constant if it's atomic because atomic loads
1519 // are implemented by a trivial cmpxchg in some edge-cases and that usually
1520 // requires write access to the variable even if it's not actually changed.
1521 if (GS.Ordering == AtomicOrdering::NotAtomic) {
1522 assert(!GV->isConstant() && "Expected a non-constant global");
1523 GV->setConstant(true);
1524 Changed = true;
1525 }
1526
1527 // Clean up any obviously simplifiable users now.
1528 Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
1529
1530 // If the global is dead now, just nuke it.
1531 if (GV->use_empty()) {
1532 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
1533 << "all users and delete global!\n");
1534 GV->eraseFromParent();
1535 ++NumDeleted;
1536 return true;
1537 }
1538
1539 // Fall through to the next check; see if we can optimize further.
1540 ++NumMarked;
1541 }
1542 if (!GV->getInitializer()->getType()->isSingleValueType()) {
1543 const DataLayout &DL = GV->getParent()->getDataLayout();
1544 if (SRAGlobal(GV, DL))
1545 return true;
1546 }
1547 if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
1548 // If the initial value for the global was an undef value, and if only
1549 // one other value was stored into it, we can just change the
1550 // initializer to be the stored value, then delete all stores to the
1551 // global. This allows us to mark it constant.
1552 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
1553 if (isa<UndefValue>(GV->getInitializer())) {
1554 // Change the initial value here.
1555 GV->setInitializer(SOVConstant);
1556
1557 // Clean up any obviously simplifiable users now.
1558 CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
1559
1560 if (GV->use_empty()) {
1561 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
1562 << "simplify all users and delete global!\n");
1563 GV->eraseFromParent();
1564 ++NumDeleted;
1565 }
1566 ++NumSubstitute;
1567 return true;
1568 }
1569
1570 // Try to optimize globals based on the knowledge that only one value
1571 // (besides its initializer) is ever stored to the global.
1572 if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
1573 GetTLI))
1574 return true;
1575
1576 // Otherwise, if the global was not a boolean, we can shrink it to be a
1577 // boolean.
1578 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
1579 if (GS.Ordering == AtomicOrdering::NotAtomic) {
1580 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1581 ++NumShrunkToBool;
1582 return true;
1583 }
1584 }
1585 }
1586 }
1587
1588 return Changed;
1589 }
1590
1591 /// Analyze the specified global variable and optimize it if possible. If we
1592 /// make a change, return true.
1593 static bool
processGlobal(GlobalValue & GV,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<DominatorTree & (Function &)> LookupDomTree)1594 processGlobal(GlobalValue &GV,
1595 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1596 function_ref<DominatorTree &(Function &)> LookupDomTree) {
1597 if (GV.getName().startswith("llvm."))
1598 return false;
1599
1600 GlobalStatus GS;
1601
1602 if (GlobalStatus::analyzeGlobal(&GV, GS))
1603 return false;
1604
1605 bool Changed = false;
1606 if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
1607 auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
1608 : GlobalValue::UnnamedAddr::Local;
1609 if (NewUnnamedAddr != GV.getUnnamedAddr()) {
1610 GV.setUnnamedAddr(NewUnnamedAddr);
1611 NumUnnamed++;
1612 Changed = true;
1613 }
1614 }
1615
1616 // Do more involved optimizations if the global is internal.
1617 if (!GV.hasLocalLinkage())
1618 return Changed;
1619
1620 auto *GVar = dyn_cast<GlobalVariable>(&GV);
1621 if (!GVar)
1622 return Changed;
1623
1624 if (GVar->isConstant() || !GVar->hasInitializer())
1625 return Changed;
1626
1627 return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
1628 }
1629
1630 /// Walk all of the direct calls of the specified function, changing them to
1631 /// FastCC.
ChangeCalleesToFastCall(Function * F)1632 static void ChangeCalleesToFastCall(Function *F) {
1633 for (User *U : F->users()) {
1634 if (isa<BlockAddress>(U))
1635 continue;
1636 cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
1637 }
1638 }
1639
StripAttr(LLVMContext & C,AttributeList Attrs,Attribute::AttrKind A)1640 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
1641 Attribute::AttrKind A) {
1642 unsigned AttrIndex;
1643 if (Attrs.hasAttrSomewhere(A, &AttrIndex))
1644 return Attrs.removeAttribute(C, AttrIndex, A);
1645 return Attrs;
1646 }
1647
RemoveAttribute(Function * F,Attribute::AttrKind A)1648 static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
1649 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
1650 for (User *U : F->users()) {
1651 if (isa<BlockAddress>(U))
1652 continue;
1653 CallBase *CB = cast<CallBase>(U);
1654 CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
1655 }
1656 }
1657
1658 /// Return true if this is a calling convention that we'd like to change. The
1659 /// idea here is that we don't want to mess with the convention if the user
1660 /// explicitly requested something with performance implications like coldcc,
1661 /// GHC, or anyregcc.
hasChangeableCC(Function * F)1662 static bool hasChangeableCC(Function *F) {
1663 CallingConv::ID CC = F->getCallingConv();
1664
1665 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1666 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
1667 return false;
1668
1669 // FIXME: Change CC for the whole chain of musttail calls when possible.
1670 //
1671 // Can't change CC of the function that either has musttail calls, or is a
1672 // musttail callee itself
1673 for (User *U : F->users()) {
1674 if (isa<BlockAddress>(U))
1675 continue;
1676 CallInst* CI = dyn_cast<CallInst>(U);
1677 if (!CI)
1678 continue;
1679
1680 if (CI->isMustTailCall())
1681 return false;
1682 }
1683
1684 for (BasicBlock &BB : *F)
1685 if (BB.getTerminatingMustTailCall())
1686 return false;
1687
1688 return true;
1689 }
1690
1691 /// Return true if the block containing the call site has a BlockFrequency of
1692 /// less than ColdCCRelFreq% of the entry block.
isColdCallSite(CallBase & CB,BlockFrequencyInfo & CallerBFI)1693 static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
1694 const BranchProbability ColdProb(ColdCCRelFreq, 100);
1695 auto *CallSiteBB = CB.getParent();
1696 auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
1697 auto CallerEntryFreq =
1698 CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
1699 return CallSiteFreq < CallerEntryFreq * ColdProb;
1700 }
1701
1702 // This function checks if the input function F is cold at all call sites. It
1703 // also looks each call site's containing function, returning false if the
1704 // caller function contains other non cold calls. The input vector AllCallsCold
1705 // contains a list of functions that only have call sites in cold blocks.
1706 static bool
isValidCandidateForColdCC(Function & F,function_ref<BlockFrequencyInfo & (Function &)> GetBFI,const std::vector<Function * > & AllCallsCold)1707 isValidCandidateForColdCC(Function &F,
1708 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1709 const std::vector<Function *> &AllCallsCold) {
1710
1711 if (F.user_empty())
1712 return false;
1713
1714 for (User *U : F.users()) {
1715 if (isa<BlockAddress>(U))
1716 continue;
1717
1718 CallBase &CB = cast<CallBase>(*U);
1719 Function *CallerFunc = CB.getParent()->getParent();
1720 BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
1721 if (!isColdCallSite(CB, CallerBFI))
1722 return false;
1723 if (!llvm::is_contained(AllCallsCold, CallerFunc))
1724 return false;
1725 }
1726 return true;
1727 }
1728
changeCallSitesToColdCC(Function * F)1729 static void changeCallSitesToColdCC(Function *F) {
1730 for (User *U : F->users()) {
1731 if (isa<BlockAddress>(U))
1732 continue;
1733 cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
1734 }
1735 }
1736
1737 // This function iterates over all the call instructions in the input Function
1738 // and checks that all call sites are in cold blocks and are allowed to use the
1739 // coldcc calling convention.
1740 static bool
hasOnlyColdCalls(Function & F,function_ref<BlockFrequencyInfo & (Function &)> GetBFI)1741 hasOnlyColdCalls(Function &F,
1742 function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
1743 for (BasicBlock &BB : F) {
1744 for (Instruction &I : BB) {
1745 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1746 // Skip over isline asm instructions since they aren't function calls.
1747 if (CI->isInlineAsm())
1748 continue;
1749 Function *CalledFn = CI->getCalledFunction();
1750 if (!CalledFn)
1751 return false;
1752 if (!CalledFn->hasLocalLinkage())
1753 return false;
1754 // Skip over instrinsics since they won't remain as function calls.
1755 if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
1756 continue;
1757 // Check if it's valid to use coldcc calling convention.
1758 if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
1759 CalledFn->hasAddressTaken())
1760 return false;
1761 BlockFrequencyInfo &CallerBFI = GetBFI(F);
1762 if (!isColdCallSite(*CI, CallerBFI))
1763 return false;
1764 }
1765 }
1766 }
1767 return true;
1768 }
1769
hasMustTailCallers(Function * F)1770 static bool hasMustTailCallers(Function *F) {
1771 for (User *U : F->users()) {
1772 CallBase *CB = dyn_cast<CallBase>(U);
1773 if (!CB) {
1774 assert(isa<BlockAddress>(U) &&
1775 "Expected either CallBase or BlockAddress");
1776 continue;
1777 }
1778 if (CB->isMustTailCall())
1779 return true;
1780 }
1781 return false;
1782 }
1783
hasInvokeCallers(Function * F)1784 static bool hasInvokeCallers(Function *F) {
1785 for (User *U : F->users())
1786 if (isa<InvokeInst>(U))
1787 return true;
1788 return false;
1789 }
1790
RemovePreallocated(Function * F)1791 static void RemovePreallocated(Function *F) {
1792 RemoveAttribute(F, Attribute::Preallocated);
1793
1794 auto *M = F->getParent();
1795
1796 IRBuilder<> Builder(M->getContext());
1797
1798 // Cannot modify users() while iterating over it, so make a copy.
1799 SmallVector<User *, 4> PreallocatedCalls(F->users());
1800 for (User *U : PreallocatedCalls) {
1801 CallBase *CB = dyn_cast<CallBase>(U);
1802 if (!CB)
1803 continue;
1804
1805 assert(
1806 !CB->isMustTailCall() &&
1807 "Shouldn't call RemotePreallocated() on a musttail preallocated call");
1808 // Create copy of call without "preallocated" operand bundle.
1809 SmallVector<OperandBundleDef, 1> OpBundles;
1810 CB->getOperandBundlesAsDefs(OpBundles);
1811 CallBase *PreallocatedSetup = nullptr;
1812 for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
1813 if (It->getTag() == "preallocated") {
1814 PreallocatedSetup = cast<CallBase>(*It->input_begin());
1815 OpBundles.erase(It);
1816 break;
1817 }
1818 }
1819 assert(PreallocatedSetup && "Did not find preallocated bundle");
1820 uint64_t ArgCount =
1821 cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
1822
1823 assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
1824 "Unknown indirect call type");
1825 CallBase *NewCB = CallBase::Create(CB, OpBundles, CB);
1826 CB->replaceAllUsesWith(NewCB);
1827 NewCB->takeName(CB);
1828 CB->eraseFromParent();
1829
1830 Builder.SetInsertPoint(PreallocatedSetup);
1831 auto *StackSave =
1832 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1833
1834 Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
1835 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1836 StackSave);
1837
1838 // Replace @llvm.call.preallocated.arg() with alloca.
1839 // Cannot modify users() while iterating over it, so make a copy.
1840 // @llvm.call.preallocated.arg() can be called with the same index multiple
1841 // times. So for each @llvm.call.preallocated.arg(), we see if we have
1842 // already created a Value* for the index, and if not, create an alloca and
1843 // bitcast right after the @llvm.call.preallocated.setup() so that it
1844 // dominates all uses.
1845 SmallVector<Value *, 2> ArgAllocas(ArgCount);
1846 SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
1847 for (auto *User : PreallocatedArgs) {
1848 auto *UseCall = cast<CallBase>(User);
1849 assert(UseCall->getCalledFunction()->getIntrinsicID() ==
1850 Intrinsic::call_preallocated_arg &&
1851 "preallocated token use was not a llvm.call.preallocated.arg");
1852 uint64_t AllocArgIndex =
1853 cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
1854 Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
1855 if (!AllocaReplacement) {
1856 auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
1857 auto *ArgType = UseCall
1858 ->getAttribute(AttributeList::FunctionIndex,
1859 Attribute::Preallocated)
1860 .getValueAsType();
1861 auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
1862 Builder.SetInsertPoint(InsertBefore);
1863 auto *Alloca =
1864 Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
1865 auto *BitCast = Builder.CreateBitCast(
1866 Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
1867 ArgAllocas[AllocArgIndex] = BitCast;
1868 AllocaReplacement = BitCast;
1869 }
1870
1871 UseCall->replaceAllUsesWith(AllocaReplacement);
1872 UseCall->eraseFromParent();
1873 }
1874 // Remove @llvm.call.preallocated.setup().
1875 cast<Instruction>(PreallocatedSetup)->eraseFromParent();
1876 }
1877 }
1878
1879 static bool
OptimizeFunctions(Module & M,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<TargetTransformInfo & (Function &)> GetTTI,function_ref<BlockFrequencyInfo & (Function &)> GetBFI,function_ref<DominatorTree & (Function &)> LookupDomTree,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats)1880 OptimizeFunctions(Module &M,
1881 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1882 function_ref<TargetTransformInfo &(Function &)> GetTTI,
1883 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1884 function_ref<DominatorTree &(Function &)> LookupDomTree,
1885 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1886
1887 bool Changed = false;
1888
1889 std::vector<Function *> AllCallsCold;
1890 for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
1891 Function *F = &*FI++;
1892 if (hasOnlyColdCalls(*F, GetBFI))
1893 AllCallsCold.push_back(F);
1894 }
1895
1896 // Optimize functions.
1897 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
1898 Function *F = &*FI++;
1899
1900 // Don't perform global opt pass on naked functions; we don't want fast
1901 // calling conventions for naked functions.
1902 if (F->hasFnAttribute(Attribute::Naked))
1903 continue;
1904
1905 // Functions without names cannot be referenced outside this module.
1906 if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
1907 F->setLinkage(GlobalValue::InternalLinkage);
1908
1909 if (deleteIfDead(*F, NotDiscardableComdats)) {
1910 Changed = true;
1911 continue;
1912 }
1913
1914 // LLVM's definition of dominance allows instructions that are cyclic
1915 // in unreachable blocks, e.g.:
1916 // %pat = select i1 %condition, @global, i16* %pat
1917 // because any instruction dominates an instruction in a block that's
1918 // not reachable from entry.
1919 // So, remove unreachable blocks from the function, because a) there's
1920 // no point in analyzing them and b) GlobalOpt should otherwise grow
1921 // some more complicated logic to break these cycles.
1922 // Removing unreachable blocks might invalidate the dominator so we
1923 // recalculate it.
1924 if (!F->isDeclaration()) {
1925 if (removeUnreachableBlocks(*F)) {
1926 auto &DT = LookupDomTree(*F);
1927 DT.recalculate(*F);
1928 Changed = true;
1929 }
1930 }
1931
1932 Changed |= processGlobal(*F, GetTLI, LookupDomTree);
1933
1934 if (!F->hasLocalLinkage())
1935 continue;
1936
1937 // If we have an inalloca parameter that we can safely remove the
1938 // inalloca attribute from, do so. This unlocks optimizations that
1939 // wouldn't be safe in the presence of inalloca.
1940 // FIXME: We should also hoist alloca affected by this to the entry
1941 // block if possible.
1942 if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
1943 !F->hasAddressTaken() && !hasMustTailCallers(F)) {
1944 RemoveAttribute(F, Attribute::InAlloca);
1945 Changed = true;
1946 }
1947
1948 // FIXME: handle invokes
1949 // FIXME: handle musttail
1950 if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
1951 if (!F->hasAddressTaken() && !hasMustTailCallers(F) &&
1952 !hasInvokeCallers(F)) {
1953 RemovePreallocated(F);
1954 Changed = true;
1955 }
1956 continue;
1957 }
1958
1959 if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
1960 NumInternalFunc++;
1961 TargetTransformInfo &TTI = GetTTI(*F);
1962 // Change the calling convention to coldcc if either stress testing is
1963 // enabled or the target would like to use coldcc on functions which are
1964 // cold at all call sites and the callers contain no other non coldcc
1965 // calls.
1966 if (EnableColdCCStressTest ||
1967 (TTI.useColdCCForColdCall(*F) &&
1968 isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
1969 F->setCallingConv(CallingConv::Cold);
1970 changeCallSitesToColdCC(F);
1971 Changed = true;
1972 NumColdCC++;
1973 }
1974 }
1975
1976 if (hasChangeableCC(F) && !F->isVarArg() &&
1977 !F->hasAddressTaken()) {
1978 // If this function has a calling convention worth changing, is not a
1979 // varargs function, and is only called directly, promote it to use the
1980 // Fast calling convention.
1981 F->setCallingConv(CallingConv::Fast);
1982 ChangeCalleesToFastCall(F);
1983 ++NumFastCallFns;
1984 Changed = true;
1985 }
1986
1987 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
1988 !F->hasAddressTaken()) {
1989 // The function is not used by a trampoline intrinsic, so it is safe
1990 // to remove the 'nest' attribute.
1991 RemoveAttribute(F, Attribute::Nest);
1992 ++NumNestRemoved;
1993 Changed = true;
1994 }
1995 }
1996 return Changed;
1997 }
1998
1999 static bool
OptimizeGlobalVars(Module & M,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<DominatorTree & (Function &)> LookupDomTree,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats)2000 OptimizeGlobalVars(Module &M,
2001 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2002 function_ref<DominatorTree &(Function &)> LookupDomTree,
2003 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2004 bool Changed = false;
2005
2006 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
2007 GVI != E; ) {
2008 GlobalVariable *GV = &*GVI++;
2009 // Global variables without names cannot be referenced outside this module.
2010 if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
2011 GV->setLinkage(GlobalValue::InternalLinkage);
2012 // Simplify the initializer.
2013 if (GV->hasInitializer())
2014 if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
2015 auto &DL = M.getDataLayout();
2016 // TLI is not used in the case of a Constant, so use default nullptr
2017 // for that optional parameter, since we don't have a Function to
2018 // provide GetTLI anyway.
2019 Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
2020 if (New != C)
2021 GV->setInitializer(New);
2022 }
2023
2024 if (deleteIfDead(*GV, NotDiscardableComdats)) {
2025 Changed = true;
2026 continue;
2027 }
2028
2029 Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
2030 }
2031 return Changed;
2032 }
2033
2034 /// Evaluate a piece of a constantexpr store into a global initializer. This
2035 /// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
2036 /// GEP operands of Addr [0, OpNo) have been stepped into.
EvaluateStoreInto(Constant * Init,Constant * Val,ConstantExpr * Addr,unsigned OpNo)2037 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2038 ConstantExpr *Addr, unsigned OpNo) {
2039 // Base case of the recursion.
2040 if (OpNo == Addr->getNumOperands()) {
2041 assert(Val->getType() == Init->getType() && "Type mismatch!");
2042 return Val;
2043 }
2044
2045 SmallVector<Constant*, 32> Elts;
2046 if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2047 // Break up the constant into its elements.
2048 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2049 Elts.push_back(Init->getAggregateElement(i));
2050
2051 // Replace the element that we are supposed to.
2052 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2053 unsigned Idx = CU->getZExtValue();
2054 assert(Idx < STy->getNumElements() && "Struct index out of range!");
2055 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2056
2057 // Return the modified struct.
2058 return ConstantStruct::get(STy, Elts);
2059 }
2060
2061 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2062 uint64_t NumElts;
2063 if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType()))
2064 NumElts = ATy->getNumElements();
2065 else
2066 NumElts = cast<FixedVectorType>(Init->getType())->getNumElements();
2067
2068 // Break up the array into elements.
2069 for (uint64_t i = 0, e = NumElts; i != e; ++i)
2070 Elts.push_back(Init->getAggregateElement(i));
2071
2072 assert(CI->getZExtValue() < NumElts);
2073 Elts[CI->getZExtValue()] =
2074 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2075
2076 if (Init->getType()->isArrayTy())
2077 return ConstantArray::get(cast<ArrayType>(Init->getType()), Elts);
2078 return ConstantVector::get(Elts);
2079 }
2080
2081 /// We have decided that Addr (which satisfies the predicate
2082 /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
CommitValueTo(Constant * Val,Constant * Addr)2083 static void CommitValueTo(Constant *Val, Constant *Addr) {
2084 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2085 assert(GV->hasInitializer());
2086 GV->setInitializer(Val);
2087 return;
2088 }
2089
2090 ConstantExpr *CE = cast<ConstantExpr>(Addr);
2091 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2092 GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2093 }
2094
2095 /// Given a map of address -> value, where addresses are expected to be some form
2096 /// of either a global or a constant GEP, set the initializer for the address to
2097 /// be the value. This performs mostly the same function as CommitValueTo()
2098 /// and EvaluateStoreInto() but is optimized to be more efficient for the common
2099 /// case where the set of addresses are GEPs sharing the same underlying global,
2100 /// processing the GEPs in batches rather than individually.
2101 ///
2102 /// To give an example, consider the following C++ code adapted from the clang
2103 /// regression tests:
2104 /// struct S {
2105 /// int n = 10;
2106 /// int m = 2 * n;
2107 /// S(int a) : n(a) {}
2108 /// };
2109 ///
2110 /// template<typename T>
2111 /// struct U {
2112 /// T *r = &q;
2113 /// T q = 42;
2114 /// U *p = this;
2115 /// };
2116 ///
2117 /// U<S> e;
2118 ///
2119 /// The global static constructor for 'e' will need to initialize 'r' and 'p' of
2120 /// the outer struct, while also initializing the inner 'q' structs 'n' and 'm'
2121 /// members. This batch algorithm will simply use general CommitValueTo() method
2122 /// to handle the complex nested S struct initialization of 'q', before
2123 /// processing the outermost members in a single batch. Using CommitValueTo() to
2124 /// handle member in the outer struct is inefficient when the struct/array is
2125 /// very large as we end up creating and destroy constant arrays for each
2126 /// initialization.
2127 /// For the above case, we expect the following IR to be generated:
2128 ///
2129 /// %struct.U = type { %struct.S*, %struct.S, %struct.U* }
2130 /// %struct.S = type { i32, i32 }
2131 /// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e,
2132 /// i64 0, i32 1),
2133 /// %struct.S { i32 42, i32 84 }, %struct.U* @e }
2134 /// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex
2135 /// constant expression, while the other two elements of @e are "simple".
BatchCommitValueTo(const DenseMap<Constant *,Constant * > & Mem)2136 static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) {
2137 SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs;
2138 SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs;
2139 SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs;
2140 SimpleCEs.reserve(Mem.size());
2141
2142 for (const auto &I : Mem) {
2143 if (auto *GV = dyn_cast<GlobalVariable>(I.first)) {
2144 GVs.push_back(std::make_pair(GV, I.second));
2145 } else {
2146 ConstantExpr *GEP = cast<ConstantExpr>(I.first);
2147 // We don't handle the deeply recursive case using the batch method.
2148 if (GEP->getNumOperands() > 3)
2149 ComplexCEs.push_back(std::make_pair(GEP, I.second));
2150 else
2151 SimpleCEs.push_back(std::make_pair(GEP, I.second));
2152 }
2153 }
2154
2155 // The algorithm below doesn't handle cases like nested structs, so use the
2156 // slower fully general method if we have to.
2157 for (auto ComplexCE : ComplexCEs)
2158 CommitValueTo(ComplexCE.second, ComplexCE.first);
2159
2160 for (auto GVPair : GVs) {
2161 assert(GVPair.first->hasInitializer());
2162 GVPair.first->setInitializer(GVPair.second);
2163 }
2164
2165 if (SimpleCEs.empty())
2166 return;
2167
2168 // We cache a single global's initializer elements in the case where the
2169 // subsequent address/val pair uses the same one. This avoids throwing away and
2170 // rebuilding the constant struct/vector/array just because one element is
2171 // modified at a time.
2172 SmallVector<Constant *, 32> Elts;
2173 Elts.reserve(SimpleCEs.size());
2174 GlobalVariable *CurrentGV = nullptr;
2175
2176 auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) {
2177 Constant *Init = GV->getInitializer();
2178 Type *Ty = Init->getType();
2179 if (Update) {
2180 if (CurrentGV) {
2181 assert(CurrentGV && "Expected a GV to commit to!");
2182 Type *CurrentInitTy = CurrentGV->getInitializer()->getType();
2183 // We have a valid cache that needs to be committed.
2184 if (StructType *STy = dyn_cast<StructType>(CurrentInitTy))
2185 CurrentGV->setInitializer(ConstantStruct::get(STy, Elts));
2186 else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy))
2187 CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts));
2188 else
2189 CurrentGV->setInitializer(ConstantVector::get(Elts));
2190 }
2191 if (CurrentGV == GV)
2192 return;
2193 // Need to clear and set up cache for new initializer.
2194 CurrentGV = GV;
2195 Elts.clear();
2196 unsigned NumElts;
2197 if (auto *STy = dyn_cast<StructType>(Ty))
2198 NumElts = STy->getNumElements();
2199 else if (auto *ATy = dyn_cast<ArrayType>(Ty))
2200 NumElts = ATy->getNumElements();
2201 else
2202 NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2203 for (unsigned i = 0, e = NumElts; i != e; ++i)
2204 Elts.push_back(Init->getAggregateElement(i));
2205 }
2206 };
2207
2208 for (auto CEPair : SimpleCEs) {
2209 ConstantExpr *GEP = CEPair.first;
2210 Constant *Val = CEPair.second;
2211
2212 GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0));
2213 commitAndSetupCache(GV, GV != CurrentGV);
2214 ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2));
2215 Elts[CI->getZExtValue()] = Val;
2216 }
2217 // The last initializer in the list needs to be committed, others
2218 // will be committed on a new initializer being processed.
2219 commitAndSetupCache(CurrentGV, true);
2220 }
2221
2222 /// Evaluate static constructors in the function, if we can. Return true if we
2223 /// can, false otherwise.
EvaluateStaticConstructor(Function * F,const DataLayout & DL,TargetLibraryInfo * TLI)2224 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2225 TargetLibraryInfo *TLI) {
2226 // Call the function.
2227 Evaluator Eval(DL, TLI);
2228 Constant *RetValDummy;
2229 bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2230 SmallVector<Constant*, 0>());
2231
2232 if (EvalSuccess) {
2233 ++NumCtorsEvaluated;
2234
2235 // We succeeded at evaluation: commit the result.
2236 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2237 << F->getName() << "' to "
2238 << Eval.getMutatedMemory().size() << " stores.\n");
2239 BatchCommitValueTo(Eval.getMutatedMemory());
2240 for (GlobalVariable *GV : Eval.getInvariants())
2241 GV->setConstant(true);
2242 }
2243
2244 return EvalSuccess;
2245 }
2246
compareNames(Constant * const * A,Constant * const * B)2247 static int compareNames(Constant *const *A, Constant *const *B) {
2248 Value *AStripped = (*A)->stripPointerCasts();
2249 Value *BStripped = (*B)->stripPointerCasts();
2250 return AStripped->getName().compare(BStripped->getName());
2251 }
2252
setUsedInitializer(GlobalVariable & V,const SmallPtrSetImpl<GlobalValue * > & Init)2253 static void setUsedInitializer(GlobalVariable &V,
2254 const SmallPtrSetImpl<GlobalValue *> &Init) {
2255 if (Init.empty()) {
2256 V.eraseFromParent();
2257 return;
2258 }
2259
2260 // Type of pointer to the array of pointers.
2261 PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2262
2263 SmallVector<Constant *, 8> UsedArray;
2264 for (GlobalValue *GV : Init) {
2265 Constant *Cast
2266 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
2267 UsedArray.push_back(Cast);
2268 }
2269 // Sort to get deterministic order.
2270 array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2271 ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2272
2273 Module *M = V.getParent();
2274 V.removeFromParent();
2275 GlobalVariable *NV =
2276 new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2277 ConstantArray::get(ATy, UsedArray), "");
2278 NV->takeName(&V);
2279 NV->setSection("llvm.metadata");
2280 delete &V;
2281 }
2282
2283 namespace {
2284
2285 /// An easy to access representation of llvm.used and llvm.compiler.used.
2286 class LLVMUsed {
2287 SmallPtrSet<GlobalValue *, 4> Used;
2288 SmallPtrSet<GlobalValue *, 4> CompilerUsed;
2289 GlobalVariable *UsedV;
2290 GlobalVariable *CompilerUsedV;
2291
2292 public:
LLVMUsed(Module & M)2293 LLVMUsed(Module &M) {
2294 SmallVector<GlobalValue *, 4> Vec;
2295 UsedV = collectUsedGlobalVariables(M, Vec, false);
2296 Used = {Vec.begin(), Vec.end()};
2297 Vec.clear();
2298 CompilerUsedV = collectUsedGlobalVariables(M, Vec, true);
2299 CompilerUsed = {Vec.begin(), Vec.end()};
2300 }
2301
2302 using iterator = SmallPtrSet<GlobalValue *, 4>::iterator;
2303 using used_iterator_range = iterator_range<iterator>;
2304
usedBegin()2305 iterator usedBegin() { return Used.begin(); }
usedEnd()2306 iterator usedEnd() { return Used.end(); }
2307
used()2308 used_iterator_range used() {
2309 return used_iterator_range(usedBegin(), usedEnd());
2310 }
2311
compilerUsedBegin()2312 iterator compilerUsedBegin() { return CompilerUsed.begin(); }
compilerUsedEnd()2313 iterator compilerUsedEnd() { return CompilerUsed.end(); }
2314
compilerUsed()2315 used_iterator_range compilerUsed() {
2316 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2317 }
2318
usedCount(GlobalValue * GV) const2319 bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2320
compilerUsedCount(GlobalValue * GV) const2321 bool compilerUsedCount(GlobalValue *GV) const {
2322 return CompilerUsed.count(GV);
2323 }
2324
usedErase(GlobalValue * GV)2325 bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
compilerUsedErase(GlobalValue * GV)2326 bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
usedInsert(GlobalValue * GV)2327 bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2328
compilerUsedInsert(GlobalValue * GV)2329 bool compilerUsedInsert(GlobalValue *GV) {
2330 return CompilerUsed.insert(GV).second;
2331 }
2332
syncVariablesAndSets()2333 void syncVariablesAndSets() {
2334 if (UsedV)
2335 setUsedInitializer(*UsedV, Used);
2336 if (CompilerUsedV)
2337 setUsedInitializer(*CompilerUsedV, CompilerUsed);
2338 }
2339 };
2340
2341 } // end anonymous namespace
2342
hasUseOtherThanLLVMUsed(GlobalAlias & GA,const LLVMUsed & U)2343 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2344 if (GA.use_empty()) // No use at all.
2345 return false;
2346
2347 assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2348 "We should have removed the duplicated "
2349 "element from llvm.compiler.used");
2350 if (!GA.hasOneUse())
2351 // Strictly more than one use. So at least one is not in llvm.used and
2352 // llvm.compiler.used.
2353 return true;
2354
2355 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2356 return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2357 }
2358
hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue & V,const LLVMUsed & U)2359 static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2360 const LLVMUsed &U) {
2361 unsigned N = 2;
2362 assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2363 "We should have removed the duplicated "
2364 "element from llvm.compiler.used");
2365 if (U.usedCount(&V) || U.compilerUsedCount(&V))
2366 ++N;
2367 return V.hasNUsesOrMore(N);
2368 }
2369
mayHaveOtherReferences(GlobalAlias & GA,const LLVMUsed & U)2370 static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2371 if (!GA.hasLocalLinkage())
2372 return true;
2373
2374 return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2375 }
2376
hasUsesToReplace(GlobalAlias & GA,const LLVMUsed & U,bool & RenameTarget)2377 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2378 bool &RenameTarget) {
2379 RenameTarget = false;
2380 bool Ret = false;
2381 if (hasUseOtherThanLLVMUsed(GA, U))
2382 Ret = true;
2383
2384 // If the alias is externally visible, we may still be able to simplify it.
2385 if (!mayHaveOtherReferences(GA, U))
2386 return Ret;
2387
2388 // If the aliasee has internal linkage, give it the name and linkage
2389 // of the alias, and delete the alias. This turns:
2390 // define internal ... @f(...)
2391 // @a = alias ... @f
2392 // into:
2393 // define ... @a(...)
2394 Constant *Aliasee = GA.getAliasee();
2395 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2396 if (!Target->hasLocalLinkage())
2397 return Ret;
2398
2399 // Do not perform the transform if multiple aliases potentially target the
2400 // aliasee. This check also ensures that it is safe to replace the section
2401 // and other attributes of the aliasee with those of the alias.
2402 if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2403 return Ret;
2404
2405 RenameTarget = true;
2406 return true;
2407 }
2408
2409 static bool
OptimizeGlobalAliases(Module & M,SmallPtrSetImpl<const Comdat * > & NotDiscardableComdats)2410 OptimizeGlobalAliases(Module &M,
2411 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2412 bool Changed = false;
2413 LLVMUsed Used(M);
2414
2415 for (GlobalValue *GV : Used.used())
2416 Used.compilerUsedErase(GV);
2417
2418 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2419 I != E;) {
2420 GlobalAlias *J = &*I++;
2421
2422 // Aliases without names cannot be referenced outside this module.
2423 if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
2424 J->setLinkage(GlobalValue::InternalLinkage);
2425
2426 if (deleteIfDead(*J, NotDiscardableComdats)) {
2427 Changed = true;
2428 continue;
2429 }
2430
2431 // If the alias can change at link time, nothing can be done - bail out.
2432 if (J->isInterposable())
2433 continue;
2434
2435 Constant *Aliasee = J->getAliasee();
2436 GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2437 // We can't trivially replace the alias with the aliasee if the aliasee is
2438 // non-trivial in some way. We also can't replace the alias with the aliasee
2439 // if the aliasee is interposable because aliases point to the local
2440 // definition.
2441 // TODO: Try to handle non-zero GEPs of local aliasees.
2442 if (!Target || Target->isInterposable())
2443 continue;
2444 Target->removeDeadConstantUsers();
2445
2446 // Make all users of the alias use the aliasee instead.
2447 bool RenameTarget;
2448 if (!hasUsesToReplace(*J, Used, RenameTarget))
2449 continue;
2450
2451 J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
2452 ++NumAliasesResolved;
2453 Changed = true;
2454
2455 if (RenameTarget) {
2456 // Give the aliasee the name, linkage and other attributes of the alias.
2457 Target->takeName(&*J);
2458 Target->setLinkage(J->getLinkage());
2459 Target->setDSOLocal(J->isDSOLocal());
2460 Target->setVisibility(J->getVisibility());
2461 Target->setDLLStorageClass(J->getDLLStorageClass());
2462
2463 if (Used.usedErase(&*J))
2464 Used.usedInsert(Target);
2465
2466 if (Used.compilerUsedErase(&*J))
2467 Used.compilerUsedInsert(Target);
2468 } else if (mayHaveOtherReferences(*J, Used))
2469 continue;
2470
2471 // Delete the alias.
2472 M.getAliasList().erase(J);
2473 ++NumAliasesRemoved;
2474 Changed = true;
2475 }
2476
2477 Used.syncVariablesAndSets();
2478
2479 return Changed;
2480 }
2481
2482 static Function *
FindCXAAtExit(Module & M,function_ref<TargetLibraryInfo & (Function &)> GetTLI)2483 FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
2484 // Hack to get a default TLI before we have actual Function.
2485 auto FuncIter = M.begin();
2486 if (FuncIter == M.end())
2487 return nullptr;
2488 auto *TLI = &GetTLI(*FuncIter);
2489
2490 LibFunc F = LibFunc_cxa_atexit;
2491 if (!TLI->has(F))
2492 return nullptr;
2493
2494 Function *Fn = M.getFunction(TLI->getName(F));
2495 if (!Fn)
2496 return nullptr;
2497
2498 // Now get the actual TLI for Fn.
2499 TLI = &GetTLI(*Fn);
2500
2501 // Make sure that the function has the correct prototype.
2502 if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
2503 return nullptr;
2504
2505 return Fn;
2506 }
2507
2508 /// Returns whether the given function is an empty C++ destructor and can
2509 /// therefore be eliminated.
2510 /// Note that we assume that other optimization passes have already simplified
2511 /// the code so we simply check for 'ret'.
cxxDtorIsEmpty(const Function & Fn)2512 static bool cxxDtorIsEmpty(const Function &Fn) {
2513 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2514 // nounwind, but that doesn't seem worth doing.
2515 if (Fn.isDeclaration())
2516 return false;
2517
2518 for (auto &I : Fn.getEntryBlock()) {
2519 if (isa<DbgInfoIntrinsic>(I))
2520 continue;
2521 if (isa<ReturnInst>(I))
2522 return true;
2523 break;
2524 }
2525 return false;
2526 }
2527
OptimizeEmptyGlobalCXXDtors(Function * CXAAtExitFn)2528 static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2529 /// Itanium C++ ABI p3.3.5:
2530 ///
2531 /// After constructing a global (or local static) object, that will require
2532 /// destruction on exit, a termination function is registered as follows:
2533 ///
2534 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2535 ///
2536 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2537 /// call f(p) when DSO d is unloaded, before all such termination calls
2538 /// registered before this one. It returns zero if registration is
2539 /// successful, nonzero on failure.
2540
2541 // This pass will look for calls to __cxa_atexit where the function is trivial
2542 // and remove them.
2543 bool Changed = false;
2544
2545 for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
2546 I != E;) {
2547 // We're only interested in calls. Theoretically, we could handle invoke
2548 // instructions as well, but neither llvm-gcc nor clang generate invokes
2549 // to __cxa_atexit.
2550 CallInst *CI = dyn_cast<CallInst>(*I++);
2551 if (!CI)
2552 continue;
2553
2554 Function *DtorFn =
2555 dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2556 if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
2557 continue;
2558
2559 // Just remove the call.
2560 CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
2561 CI->eraseFromParent();
2562
2563 ++NumCXXDtorsRemoved;
2564
2565 Changed |= true;
2566 }
2567
2568 return Changed;
2569 }
2570
optimizeGlobalsInModule(Module & M,const DataLayout & DL,function_ref<TargetLibraryInfo & (Function &)> GetTLI,function_ref<TargetTransformInfo & (Function &)> GetTTI,function_ref<BlockFrequencyInfo & (Function &)> GetBFI,function_ref<DominatorTree & (Function &)> LookupDomTree)2571 static bool optimizeGlobalsInModule(
2572 Module &M, const DataLayout &DL,
2573 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2574 function_ref<TargetTransformInfo &(Function &)> GetTTI,
2575 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2576 function_ref<DominatorTree &(Function &)> LookupDomTree) {
2577 SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2578 bool Changed = false;
2579 bool LocalChange = true;
2580 while (LocalChange) {
2581 LocalChange = false;
2582
2583 NotDiscardableComdats.clear();
2584 for (const GlobalVariable &GV : M.globals())
2585 if (const Comdat *C = GV.getComdat())
2586 if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2587 NotDiscardableComdats.insert(C);
2588 for (Function &F : M)
2589 if (const Comdat *C = F.getComdat())
2590 if (!F.isDefTriviallyDead())
2591 NotDiscardableComdats.insert(C);
2592 for (GlobalAlias &GA : M.aliases())
2593 if (const Comdat *C = GA.getComdat())
2594 if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2595 NotDiscardableComdats.insert(C);
2596
2597 // Delete functions that are trivially dead, ccc -> fastcc
2598 LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
2599 NotDiscardableComdats);
2600
2601 // Optimize global_ctors list.
2602 LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
2603 return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
2604 });
2605
2606 // Optimize non-address-taken globals.
2607 LocalChange |=
2608 OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
2609
2610 // Resolve aliases, when possible.
2611 LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2612
2613 // Try to remove trivial global destructors if they are not removed
2614 // already.
2615 Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
2616 if (CXAAtExitFn)
2617 LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
2618
2619 Changed |= LocalChange;
2620 }
2621
2622 // TODO: Move all global ctors functions to the end of the module for code
2623 // layout.
2624
2625 return Changed;
2626 }
2627
run(Module & M,ModuleAnalysisManager & AM)2628 PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2629 auto &DL = M.getDataLayout();
2630 auto &FAM =
2631 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2632 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2633 return FAM.getResult<DominatorTreeAnalysis>(F);
2634 };
2635 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2636 return FAM.getResult<TargetLibraryAnalysis>(F);
2637 };
2638 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
2639 return FAM.getResult<TargetIRAnalysis>(F);
2640 };
2641
2642 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
2643 return FAM.getResult<BlockFrequencyAnalysis>(F);
2644 };
2645
2646 if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
2647 return PreservedAnalyses::all();
2648 return PreservedAnalyses::none();
2649 }
2650
2651 namespace {
2652
2653 struct GlobalOptLegacyPass : public ModulePass {
2654 static char ID; // Pass identification, replacement for typeid
2655
GlobalOptLegacyPass__anonaff4dab40a11::GlobalOptLegacyPass2656 GlobalOptLegacyPass() : ModulePass(ID) {
2657 initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
2658 }
2659
runOnModule__anonaff4dab40a11::GlobalOptLegacyPass2660 bool runOnModule(Module &M) override {
2661 if (skipModule(M))
2662 return false;
2663
2664 auto &DL = M.getDataLayout();
2665 auto LookupDomTree = [this](Function &F) -> DominatorTree & {
2666 return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2667 };
2668 auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
2669 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2670 };
2671 auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
2672 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2673 };
2674
2675 auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
2676 return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
2677 };
2678
2679 return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
2680 LookupDomTree);
2681 }
2682
getAnalysisUsage__anonaff4dab40a11::GlobalOptLegacyPass2683 void getAnalysisUsage(AnalysisUsage &AU) const override {
2684 AU.addRequired<TargetLibraryInfoWrapperPass>();
2685 AU.addRequired<TargetTransformInfoWrapperPass>();
2686 AU.addRequired<DominatorTreeWrapperPass>();
2687 AU.addRequired<BlockFrequencyInfoWrapperPass>();
2688 }
2689 };
2690
2691 } // end anonymous namespace
2692
2693 char GlobalOptLegacyPass::ID = 0;
2694
2695 INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
2696 "Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)2697 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2698 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2699 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
2700 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2701 INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
2702 "Global Variable Optimizer", false, false)
2703
2704 ModulePass *llvm::createGlobalOptimizerPass() {
2705 return new GlobalOptLegacyPass();
2706 }
2707