xref: /llvm-project/bolt/lib/Passes/ShrinkWrapping.cpp (revision fd38366e4525c5507bbb2a2fc1f7d113a964224e)
1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ShrinkWrapping class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Passes/DataflowInfoManager.h"
15 #include "bolt/Passes/MCF.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include <numeric>
18 #include <optional>
19 #include <stack>
20 
21 #define DEBUG_TYPE "shrinkwrapping"
22 
23 using namespace llvm;
24 
25 namespace opts {
26 
27 extern cl::opt<bool> TimeOpts;
28 extern cl::OptionCategory BoltOptCategory;
29 
30 static cl::opt<unsigned> ShrinkWrappingThreshold(
31     "shrink-wrapping-threshold",
32     cl::desc("Percentage of prologue execution count to use as threshold when"
33              " evaluating whether a block is cold enough to be profitable to"
34              " move eligible spills there"),
35     cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
36 } // namespace opts
37 
38 namespace llvm {
39 namespace bolt {
40 
analyzeSaves()41 void CalleeSavedAnalysis::analyzeSaves() {
42   ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
43   StackReachingUses &SRU = Info.getStackReachingUses();
44   auto &InsnToBB = Info.getInsnToBBMap();
45   BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
46 
47   LLVM_DEBUG(dbgs() << "Checking spill locations\n");
48   for (BinaryBasicBlock &BB : BF) {
49     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
50     const MCInst *Prev = nullptr;
51     for (MCInst &Inst : BB) {
52       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
53         // Blacklist weird stores we don't understand
54         if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
55             FIE->IsStoreFromReg) {
56           BlacklistedRegs.set(FIE->RegOrImm);
57           CalleeSaved.reset(FIE->RegOrImm);
58           Prev = &Inst;
59           continue;
60         }
61 
62         if (!FIE->IsStore || !FIE->IsStoreFromReg ||
63             BlacklistedRegs[FIE->RegOrImm]) {
64           Prev = &Inst;
65           continue;
66         }
67 
68         // If this reg is defined locally, it is not a callee-saved reg
69         if (RD.isReachedBy(FIE->RegOrImm,
70                            Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
71           BlacklistedRegs.set(FIE->RegOrImm);
72           CalleeSaved.reset(FIE->RegOrImm);
73           Prev = &Inst;
74           continue;
75         }
76 
77         // If this stack position is accessed in another function, we are
78         // probably dealing with a parameter passed in a stack -- do not mess
79         // with it
80         if (SRU.isStoreUsed(*FIE,
81                             Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
82             /*IncludeLocalAccesses=*/false) {
83           BlacklistedRegs.set(FIE->RegOrImm);
84           CalleeSaved.reset(FIE->RegOrImm);
85           Prev = &Inst;
86           continue;
87         }
88 
89         // If this stack position is loaded elsewhere in another reg, we can't
90         // update it, so blacklist it.
91         if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
92                                                   : SRU.expr_begin(BB))) {
93           BlacklistedRegs.set(FIE->RegOrImm);
94           CalleeSaved.reset(FIE->RegOrImm);
95           Prev = &Inst;
96           continue;
97         }
98 
99         // Ignore regs with multiple saves
100         if (CalleeSaved[FIE->RegOrImm]) {
101           BlacklistedRegs.set(FIE->RegOrImm);
102           CalleeSaved.reset(FIE->RegOrImm);
103           Prev = &Inst;
104           continue;
105         }
106 
107         CalleeSaved.set(FIE->RegOrImm);
108         SaveFIEByReg[FIE->RegOrImm] = &*FIE;
109         SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
110         BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
111         OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
112         LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
113                           << FIE->RegOrImm << "\n");
114       }
115       Prev = &Inst;
116     }
117   }
118 }
119 
analyzeRestores()120 void CalleeSavedAnalysis::analyzeRestores() {
121   ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
122 
123   // Now compute all restores of these callee-saved regs
124   for (BinaryBasicBlock &BB : BF) {
125     const MCInst *Prev = nullptr;
126     for (MCInst &Inst : llvm::reverse(BB)) {
127       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
128         if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
129           Prev = &Inst;
130           continue;
131         }
132 
133         // If this reg is used locally after a restore, then we are probably
134         // not dealing with a callee-saved reg. Except if this use is by
135         // another store, but we don't cover this case yet.
136         // Also not callee-saved if this load accesses caller stack or isn't
137         // simple.
138         if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
139             RU.isReachedBy(FIE->RegOrImm,
140                            Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
141           CalleeSaved.reset(FIE->RegOrImm);
142           Prev = &Inst;
143           continue;
144         }
145         // If stack offsets between saves/store don't agree with each other,
146         // we don't completely understand what's happening here
147         if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
148           CalleeSaved.reset(FIE->RegOrImm);
149           LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
150                                "mismatching restore: "
151                             << FIE->RegOrImm << "\n");
152           Prev = &Inst;
153           continue;
154         }
155 
156         LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
157                           << "\n");
158         if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
159           LoadFIEByReg[FIE->RegOrImm] = &*FIE;
160         BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
161                               AllocatorId);
162         HasRestores.set(FIE->RegOrImm);
163       }
164       Prev = &Inst;
165     }
166   }
167 }
168 
getSavesByReg(uint16_t Reg)169 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
170   std::vector<MCInst *> Results;
171   for (BinaryBasicBlock &BB : BF)
172     for (MCInst &Inst : BB)
173       if (getSavedReg(Inst) == Reg)
174         Results.push_back(&Inst);
175   return Results;
176 }
177 
getRestoresByReg(uint16_t Reg)178 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
179   std::vector<MCInst *> Results;
180   for (BinaryBasicBlock &BB : BF)
181     for (MCInst &Inst : BB)
182       if (getRestoredReg(Inst) == Reg)
183         Results.push_back(&Inst);
184   return Results;
185 }
186 
~CalleeSavedAnalysis()187 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
188   for (BinaryBasicBlock &BB : BF) {
189     for (MCInst &Inst : BB) {
190       BC.MIB->removeAnnotation(Inst, getSaveTag());
191       BC.MIB->removeAnnotation(Inst, getRestoreTag());
192     }
193   }
194 }
195 
blacklistRegion(int64_t Offset,int64_t Size)196 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
197   if (BlacklistedRegions[Offset] < Size)
198     BlacklistedRegions[Offset] = Size;
199 }
200 
isRegionBlacklisted(int64_t Offset,int64_t Size)201 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
202   for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
203     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
204       return true;
205   return false;
206 }
207 
blacklistAllInConflictWith(int64_t Offset,int64_t Size)208 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
209                                                      int64_t Size) {
210   bool HasConflict = false;
211   for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
212     std::pair<const int64_t, int64_t> &Elem = *Iter;
213     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
214         (Offset != Elem.first || Size != Elem.second)) {
215       Iter = AvailableRegions.erase(Iter);
216       HasConflict = true;
217       continue;
218     }
219     ++Iter;
220   }
221   if (HasConflict) {
222     blacklistRegion(Offset, Size);
223     return true;
224   }
225   return false;
226 }
227 
checkFramePointerInitialization(MCInst & Point)228 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
229   StackPointerTracking &SPT = Info.getStackPointerTracking();
230   if (!BC.MII->get(Point.getOpcode())
231            .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
232     return;
233 
234   int SPVal, FPVal;
235   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
236   std::pair<MCPhysReg, int64_t> FP;
237 
238   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
239     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
240   else
241     FP = std::make_pair(0, 0);
242   std::pair<MCPhysReg, int64_t> SP;
243 
244   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
245     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
246   else
247     SP = std::make_pair(0, 0);
248 
249   int64_t Output;
250   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
251     return;
252 
253   // Not your regular frame pointer initialization... bail
254   if (Output != SPVal)
255     blacklistRegion(0, 0);
256 }
257 
checkStackPointerRestore(MCInst & Point)258 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
259   StackPointerTracking &SPT = Info.getStackPointerTracking();
260   if (!BC.MII->get(Point.getOpcode())
261            .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
262     return;
263   // Check if the definition of SP comes from FP -- in this case, this
264   // value may need to be updated depending on our stack layout changes
265   bool UsesFP = llvm::any_of(BC.MIB->useOperands(Point), [&](MCOperand &Op) {
266     return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer();
267   });
268   if (!UsesFP)
269     return;
270 
271   // Setting up evaluation
272   int SPVal, FPVal;
273   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
274   std::pair<MCPhysReg, int64_t> FP;
275 
276   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
277     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
278   else
279     FP = std::make_pair(0, 0);
280   std::pair<MCPhysReg, int64_t> SP;
281 
282   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
283     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
284   else
285     SP = std::make_pair(0, 0);
286 
287   int64_t Output;
288   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
289     return;
290 
291   // If the value is the same of FP, no need to adjust it
292   if (Output == FPVal)
293     return;
294 
295   // If an allocation happened through FP, bail
296   if (Output <= SPVal) {
297     blacklistRegion(0, 0);
298     return;
299   }
300 
301   // We are restoring SP to an old value based on FP. Mark it as a stack
302   // access to be fixed later.
303   BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
304 }
305 
classifyStackAccesses()306 void StackLayoutModifier::classifyStackAccesses() {
307   // Understand when stack slots are being used non-locally
308   StackReachingUses &SRU = Info.getStackReachingUses();
309 
310   for (BinaryBasicBlock &BB : BF) {
311     const MCInst *Prev = nullptr;
312     for (MCInst &Inst : llvm::reverse(BB)) {
313       checkFramePointerInitialization(Inst);
314       checkStackPointerRestore(Inst);
315       ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
316       if (!FIEX) {
317         Prev = &Inst;
318         continue;
319       }
320       if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
321         blacklistRegion(FIEX->StackOffset, FIEX->Size);
322         Prev = &Inst;
323         continue;
324       }
325       // If this stack position is accessed in another function, we are
326       // probably dealing with a parameter passed in a stack -- do not mess
327       // with it
328       if (SRU.isStoreUsed(*FIEX,
329                           Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
330                           /*IncludeLocalAccesses=*/false)) {
331         blacklistRegion(FIEX->StackOffset, FIEX->Size);
332         Prev = &Inst;
333         continue;
334       }
335       // Now we have a clear stack slot access. Check if its blacklisted or if
336       // it conflicts with another chunk.
337       if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
338           blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
339         Prev = &Inst;
340         continue;
341       }
342       // We are free to go. Add it as available stack slot which we know how
343       // to move it.
344       AvailableRegions[FIEX->StackOffset] = FIEX->Size;
345       BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
346       RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
347       RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
348       LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
349                         << (int)FIEX->Size << "\n");
350     }
351   }
352 }
353 
classifyCFIs()354 void StackLayoutModifier::classifyCFIs() {
355   std::stack<std::pair<int64_t, uint16_t>> CFIStack;
356   int64_t CfaOffset = -8;
357   uint16_t CfaReg = 7;
358 
359   auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
360     const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
361     if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
362       BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
363       LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
364     } else {
365       IsSimple = false;
366       return;
367     }
368   };
369 
370   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
371     for (MCInst &Inst : *BB) {
372       if (!BC.MIB->isCFI(Inst))
373         continue;
374       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
375       switch (CFI->getOperation()) {
376       case MCCFIInstruction::OpDefCfa:
377         CfaOffset = -CFI->getOffset();
378         recordAccess(&Inst, CfaOffset);
379         [[fallthrough]];
380       case MCCFIInstruction::OpDefCfaRegister:
381         CfaReg = CFI->getRegister();
382         break;
383       case MCCFIInstruction::OpDefCfaOffset:
384         CfaOffset = -CFI->getOffset();
385         recordAccess(&Inst, CfaOffset);
386         break;
387       case MCCFIInstruction::OpOffset:
388         recordAccess(&Inst, CFI->getOffset());
389         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
390                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
391                                                     /*isEH=*/false),
392                               AllocatorId);
393         break;
394       case MCCFIInstruction::OpSameValue:
395         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
396                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
397                                                     /*isEH=*/false),
398                               AllocatorId);
399         break;
400       case MCCFIInstruction::OpRememberState:
401         CFIStack.push(std::make_pair(CfaOffset, CfaReg));
402         break;
403       case MCCFIInstruction::OpRestoreState: {
404         assert(!CFIStack.empty() && "Corrupt CFI stack");
405         std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
406         CFIStack.pop();
407         CfaOffset = Elem.first;
408         CfaReg = Elem.second;
409         break;
410       }
411       case MCCFIInstruction::OpRelOffset:
412       case MCCFIInstruction::OpAdjustCfaOffset:
413         llvm_unreachable("Unhandled AdjustCfaOffset");
414         break;
415       default:
416         break;
417       }
418     }
419   }
420 }
421 
scheduleChange(MCInst & Inst,StackLayoutModifier::WorklistItem Item)422 void StackLayoutModifier::scheduleChange(
423     MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
424   auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
425       Inst, getTodoTag(), AllocatorId);
426   WList.push_back(Item);
427 }
428 
canCollapseRegion(MCInst * DeletedPush)429 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
430   if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
431     return false;
432 
433   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
434   if (!FIE)
435     return false;
436 
437   return canCollapseRegion(FIE->StackOffset);
438 }
439 
canCollapseRegion(int64_t RegionAddr)440 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
441   if (!IsInitialized)
442     initialize();
443   if (!IsSimple)
444     return false;
445 
446   if (CollapsedRegions.count(RegionAddr))
447     return true;
448 
449   // Check if it is possible to readjust all accesses below RegionAddr
450   if (!BlacklistedRegions.empty())
451     return false;
452 
453   return true;
454 }
455 
collapseRegion(MCInst * DeletedPush)456 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
457   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
458   if (!FIE)
459     return false;
460   int64_t RegionAddr = FIE->StackOffset;
461   int64_t RegionSz = FIE->Size;
462   return collapseRegion(DeletedPush, RegionAddr, RegionSz);
463 }
464 
collapseRegion(MCInst * Alloc,int64_t RegionAddr,int64_t RegionSz)465 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
466                                          int64_t RegionSz) {
467   if (!canCollapseRegion(RegionAddr))
468     return false;
469 
470   assert(IsInitialized);
471   StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
472 
473   for (BinaryBasicBlock &BB : BF) {
474     for (MCInst &Inst : BB) {
475       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
476         continue;
477       auto Slot =
478           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
479               Inst, getSlotTag());
480       if (!AvailableRegions.count(Slot))
481         continue;
482       // We need to ensure this access is affected by the deleted push
483       if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
484         continue;
485 
486       if (BC.MIB->isCFI(Inst)) {
487         if (Slot > RegionAddr)
488           continue;
489         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
490         continue;
491       }
492       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
493       if (!FIE) {
494         if (Slot > RegionAddr)
495           continue;
496         // SP update based on frame pointer
497         scheduleChange(
498             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
499         continue;
500       }
501 
502       if (Slot == RegionAddr) {
503         BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
504         continue;
505       }
506       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
507         continue;
508 
509       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
510         continue;
511 
512       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
513         continue;
514 
515       scheduleChange(
516           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
517     }
518   }
519 
520   CollapsedRegions.insert(RegionAddr);
521   return true;
522 }
523 
setOffsetForCollapsedAccesses(int64_t NewOffset)524 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
525   for (BinaryBasicBlock &BB : BF) {
526     for (MCInst &Inst : BB) {
527       if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
528         continue;
529       BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
530       scheduleChange(
531           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
532     }
533   }
534 }
535 
canInsertRegion(ProgramPoint P)536 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
537   if (!IsInitialized)
538     initialize();
539   if (!IsSimple)
540     return false;
541 
542   StackPointerTracking &SPT = Info.getStackPointerTracking();
543   int64_t RegionAddr = SPT.getStateBefore(P)->first;
544   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
545     return false;
546 
547   if (InsertedRegions.count(RegionAddr))
548     return true;
549 
550   // Check if we are going to screw up stack accesses at call sites that
551   // pass parameters via stack
552   if (!BlacklistedRegions.empty())
553     return false;
554 
555   return true;
556 }
557 
insertRegion(ProgramPoint P,int64_t RegionSz)558 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
559   if (!canInsertRegion(P))
560     return false;
561 
562   assert(IsInitialized);
563   StackPointerTracking &SPT = Info.getStackPointerTracking();
564   // This RegionAddr is slightly different from the one seen in collapseRegion
565   // This is the value of SP before the allocation the user wants to make.
566   int64_t RegionAddr = SPT.getStateBefore(P)->first;
567   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
568     return false;
569 
570   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
571 
572   for (BinaryBasicBlock &BB : BF) {
573     for (MCInst &Inst : BB) {
574       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
575         continue;
576       auto Slot =
577           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
578               Inst, getSlotTag());
579       if (!AvailableRegions.count(Slot))
580         continue;
581 
582       if (!(DA.doesADominateB(P, Inst)))
583         continue;
584 
585       if (BC.MIB->isCFI(Inst)) {
586         if (Slot >= RegionAddr)
587           continue;
588         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
589         continue;
590       }
591       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
592       if (!FIE) {
593         if (Slot >= RegionAddr)
594           continue;
595         scheduleChange(
596             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
597         continue;
598       }
599 
600       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
601         continue;
602       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
603         continue;
604       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
605         continue;
606       scheduleChange(
607           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
608     }
609   }
610 
611   InsertedRegions.insert(RegionAddr);
612   return true;
613 }
614 
performChanges()615 void StackLayoutModifier::performChanges() {
616   std::set<uint32_t> ModifiedCFIIndices;
617   for (BinaryBasicBlock &BB : BF) {
618     for (MCInst &Inst : llvm::reverse(BB)) {
619       if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
620         assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
621         BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
622       }
623       if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
624         continue;
625       auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
626           Inst, getTodoTag());
627       int64_t Adjustment = 0;
628       WorklistItem::ActionType AdjustmentType = WorklistItem::None;
629       for (WorklistItem &WI : WList) {
630         if (WI.Action == WorklistItem::None)
631           continue;
632         assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
633                WI.Action == WorklistItem::AdjustCFI);
634         assert((AdjustmentType == WorklistItem::None ||
635                 AdjustmentType == WI.Action) &&
636                "Conflicting actions requested at the same program point");
637         AdjustmentType = WI.Action;
638         Adjustment += WI.OffsetUpdate;
639       }
640       if (!Adjustment)
641         continue;
642       if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
643         assert(BC.MIB->isCFI(Inst));
644         uint32_t CFINum = Inst.getOperand(0).getImm();
645         if (ModifiedCFIIndices.count(CFINum))
646           continue;
647         ModifiedCFIIndices.insert(CFINum);
648         const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
649         const MCCFIInstruction::OpType Operation = CFI->getOperation();
650         if (Operation == MCCFIInstruction::OpDefCfa ||
651             Operation == MCCFIInstruction::OpDefCfaOffset)
652           Adjustment = 0 - Adjustment;
653         LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
654                           << " to " << (CFI->getOffset() + Adjustment) << "\n");
655         BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
656         continue;
657       }
658       int32_t SrcImm = 0;
659       MCPhysReg Reg = 0;
660       MCPhysReg StackPtrReg = 0;
661       int64_t StackOffset = 0;
662       bool IsIndexed = false;
663       bool IsLoad = false;
664       bool IsStore = false;
665       bool IsSimple = false;
666       bool IsStoreFromReg = false;
667       uint8_t Size = 0;
668       bool Success = false;
669       Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
670                                       Reg, SrcImm, StackPtrReg, StackOffset,
671                                       Size, IsSimple, IsIndexed);
672       if (!Success) {
673         // SP update based on FP value
674         Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
675         assert(Success);
676         continue;
677       }
678       assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
679       if (StackPtrReg != BC.MIB->getFramePointer())
680         Adjustment = -Adjustment;
681       if (IsLoad)
682         BC.MIB->createRestoreFromStack(Inst, StackPtrReg,
683                                        StackOffset + Adjustment, Reg, Size);
684       else if (IsStore)
685         BC.MIB->createSaveToStack(Inst, StackPtrReg, StackOffset + Adjustment,
686                                   Reg, Size);
687       LLVM_DEBUG({
688         dbgs() << "Adjusted instruction: ";
689         Inst.dump();
690       });
691     }
692   }
693 }
694 
initialize()695 void StackLayoutModifier::initialize() {
696   classifyStackAccesses();
697   classifyCFIs();
698   IsInitialized = true;
699 }
700 
701 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
702 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
703 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
704 std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
705 std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
706 std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
707 
708 using BBIterTy = BinaryBasicBlock::iterator;
709 
classifyCSRUses()710 void ShrinkWrapping::classifyCSRUses() {
711   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
712   StackPointerTracking &SPT = Info.getStackPointerTracking();
713   UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
714                                      BitVector(DA.NumInstrs, false));
715 
716   const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
717   for (BinaryBasicBlock &BB : BF) {
718     for (MCInst &Inst : BB) {
719       if (BC.MIB->isCFI(Inst))
720         continue;
721       BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
722       BC.MIB->getTouchedRegs(Inst, BV);
723       BV &= CSA.CalleeSaved;
724       for (int I : BV.set_bits()) {
725         if (I == 0)
726           continue;
727         if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
728           UsesByReg[I].set(DA.ExprToIdx[&Inst]);
729       }
730       if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
731         continue;
732       BV = CSA.CalleeSaved;
733       BV &= FPAliases;
734       for (int I : BV.set_bits())
735         UsesByReg[I].set(DA.ExprToIdx[&Inst]);
736     }
737   }
738 }
739 
pruneUnwantedCSRs()740 void ShrinkWrapping::pruneUnwantedCSRs() {
741   BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
742   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
743     if (!CSA.CalleeSaved[I])
744       continue;
745     if (ParamRegs[I]) {
746       CSA.CalleeSaved.reset(I);
747       continue;
748     }
749     if (UsesByReg[I].empty()) {
750       LLVM_DEBUG(
751           dbgs()
752           << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
753           << "\n");
754       CSA.CalleeSaved.reset(I);
755       continue;
756     }
757     if (!CSA.HasRestores[I]) {
758       LLVM_DEBUG(
759           dbgs() << "Dismissing Callee-Saved Reg because it does not have "
760                     "restores:"
761                  << I << "\n");
762       CSA.CalleeSaved.reset(I);
763     }
764   }
765 }
766 
computeSaveLocations()767 void ShrinkWrapping::computeSaveLocations() {
768   BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
769   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
770   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
771   StackPointerTracking &SPT = Info.getStackPointerTracking();
772 
773   LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
774   for (BinaryBasicBlock &BB : BF) {
775     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
776 
777     MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
778     if (!First)
779       continue;
780 
781     // Use reaching instructions to detect if we are inside a loop - if we
782     // are, do not consider this BB as valid placement for saves.
783     if (RI.isInLoop(BB))
784       continue;
785 
786     const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
787     // If we don't know stack state at this point, bail
788     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
789         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
790       continue;
791 
792     for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
793       if (!CSA.CalleeSaved[I])
794         continue;
795 
796       BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
797       for (int J : UsesByReg[I].set_bits())
798         if (DA.doesADominateB(*First, J))
799           BBDominatedUses.set(J);
800       LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
801                         << BBDominatedUses.count() << " uses for reg " << I
802                         << ". Total uses for reg is " << UsesByReg[I].count()
803                         << "\n");
804       BBDominatedUses &= UsesByReg[I];
805       if (BBDominatedUses == UsesByReg[I]) {
806         LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
807                           << " as a save pos for " << I << "\n");
808         BestSavePos[I].push_back(First);
809         LLVM_DEBUG({
810           dbgs() << "Dominated uses are:\n";
811           for (int J : UsesByReg[I].set_bits()) {
812             dbgs() << "Idx " << J << ": ";
813             BC.printInstruction(dbgs(), *DA.Expressions[J]);
814             DA.Expressions[J]->dump();
815           }
816         });
817       }
818     }
819   }
820 
821   BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
822 
823   auto &InsnToBB = Info.getInsnToBBMap();
824   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
825     if (!CSA.CalleeSaved[I])
826       continue;
827 
828     std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(),
829                      [&](const MCInst *A, const MCInst *B) {
830                        const BinaryBasicBlock *BBA = InsnToBB[A];
831                        const BinaryBasicBlock *BBB = InsnToBB[B];
832                        const uint64_t CountA = BBA->getKnownExecutionCount();
833                        const uint64_t CountB = BBB->getKnownExecutionCount();
834                        return CountB < CountA;
835                      });
836 
837     for (MCInst *Pos : BestSavePos[I]) {
838       const BinaryBasicBlock *BB = InsnToBB[Pos];
839       const uint64_t Count = BB->getKnownExecutionCount();
840       BestSaveCount[I].push_back(Count);
841     }
842   }
843 }
844 
computeDomOrder()845 void ShrinkWrapping::computeDomOrder() {
846   DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
847   std::vector<MCPhysReg> Order;
848   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
849     Order.push_back(I);
850   }
851 
852   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
853   auto &InsnToBB = Info.getInsnToBBMap();
854   llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) {
855     BinaryBasicBlock *BBA =
856         BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
857     BinaryBasicBlock *BBB =
858         BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
859     if (BBA == BBB)
860       return A < B;
861     if (!BBA && BBB)
862       return false;
863     if (BBA && !BBB)
864       return true;
865     if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back()))
866       return true;
867     if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back()))
868       return false;
869     return A < B;
870   });
871 
872   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
873     DomOrder[Order[I]] = I;
874 }
875 
isBestSavePosCold(unsigned CSR,MCInst * & BestPosSave,uint64_t & TotalEstimatedWin)876 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
877                                        uint64_t &TotalEstimatedWin) {
878   const uint64_t CurSavingCost = CSA.SavingCost[CSR];
879   if (!CSA.CalleeSaved[CSR])
880     return false;
881 
882   assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
883          "save position vectors out of sync");
884   if (BestSaveCount[CSR].empty())
885     return false;
886 
887   const uint64_t BestCount = BestSaveCount[CSR].back();
888   BestPosSave = BestSavePos[CSR].back();
889   if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
890     return false;
891 
892   LLVM_DEBUG({
893     auto &InsnToBB = Info.getInsnToBBMap();
894     dbgs() << "Better position for saves found in func " << BF.getPrintName()
895            << " count << " << BF.getKnownExecutionCount() << "\n";
896     dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
897            << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
898   });
899 
900   TotalEstimatedWin = CurSavingCost - BestCount;
901   return true;
902 }
903 
904 /// Auxiliary function used to create basic blocks for critical edges and update
905 /// the dominance frontier with these new locations
splitFrontierCritEdges(BinaryFunction * Func,SmallVector<ProgramPoint,4> & Frontier,const SmallVector<bool,4> & IsCritEdge,const SmallVector<BinaryBasicBlock *,4> & From,const SmallVector<SmallVector<BinaryBasicBlock *,4>,4> & To)906 void ShrinkWrapping::splitFrontierCritEdges(
907     BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
908     const SmallVector<bool, 4> &IsCritEdge,
909     const SmallVector<BinaryBasicBlock *, 4> &From,
910     const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
911   LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
912                     << BF.getPrintName() << "\n");
913   // For every FromBB, there might be one or more critical edges, with
914   // To[I] containing destination BBs. It's important to memorize
915   // the original size of the Frontier as we may append to it while splitting
916   // critical edges originating with blocks with multiple destinations.
917   for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
918     if (!IsCritEdge[I])
919       continue;
920     if (To[I].empty())
921       continue;
922     BinaryBasicBlock *FromBB = From[I];
923     LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
924                       << "\n");
925     // Split edge for every DestinationBBs
926     for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
927       BinaryBasicBlock *DestinationBB = To[I][DI];
928       LLVM_DEBUG(dbgs() << "   - Dest : " << DestinationBB->getName() << "\n");
929       BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
930       // Insert dummy instruction so this BB is never empty (we need this for
931       // PredictiveStackPointerTracking to work, since it annotates instructions
932       // and not BBs).
933       if (NewBB->empty()) {
934         MCInst NewInst;
935         BC.MIB->createNoop(NewInst);
936         NewBB->addInstruction(std::move(NewInst));
937         scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
938       }
939 
940       // Update frontier
941       ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
942       if (DI == 0) {
943         // Update frontier inplace
944         Frontier[I] = NewFrontierPP;
945         LLVM_DEBUG(dbgs() << "   - Update frontier with " << NewBB->getName()
946                           << '\n');
947       } else {
948         // Append new frontier to the end of the list
949         Frontier.push_back(NewFrontierPP);
950         LLVM_DEBUG(dbgs() << "   - Append frontier " << NewBB->getName()
951                           << '\n');
952       }
953     }
954   }
955 }
956 
957 SmallVector<ProgramPoint, 4>
doRestorePlacement(MCInst * BestPosSave,unsigned CSR,uint64_t TotalEstimatedWin)958 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
959                                    uint64_t TotalEstimatedWin) {
960   SmallVector<ProgramPoint, 4> Frontier;
961   SmallVector<bool, 4> IsCritEdge;
962   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
963 
964   SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
965   SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
966   // In case of a critical edge, we need to create extra BBs to host restores
967   // into edges transitioning to the dominance frontier, otherwise we pull these
968   // restores to inside the dominated area.
969   Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
970   LLVM_DEBUG({
971     dbgs() << "Dumping dominance frontier for ";
972     BC.printInstruction(dbgs(), *BestPosSave);
973     for (ProgramPoint &PP : Frontier)
974       if (PP.isInst())
975         BC.printInstruction(dbgs(), *PP.getInst());
976       else
977         dbgs() << PP.getBB()->getName() << "\n";
978   });
979   for (ProgramPoint &PP : Frontier) {
980     bool HasCritEdges = false;
981     if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
982         doesInstUsesCSR(*PP.getInst(), CSR)) {
983       Frontier.clear();
984       return Frontier;
985     }
986     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
987     CritEdgesFrom.emplace_back(FrontierBB);
988     CritEdgesTo.emplace_back(0);
989     SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
990     // Check for invoke instructions at the dominance frontier, which indicates
991     // the landing pad is not dominated.
992     if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
993       Frontier.clear();
994       return Frontier;
995     }
996     doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
997       if (!DA.doesADominateB(*BestPosSave, P)) {
998         Dests.emplace_back(Info.getParentBB(P));
999         return;
1000       }
1001       HasCritEdges = true;
1002     });
1003     IsCritEdge.push_back(HasCritEdges);
1004   }
1005   // Restores cannot be placed in empty BBs because we have a dataflow
1006   // analysis that depends on insertions happening before real instructions
1007   // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1008   // dummy nop that is scheduled to be removed later.
1009   bool InvalidateRequired = false;
1010   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1011     if (BB->size() != 0)
1012       continue;
1013     MCInst NewInst;
1014     BC.MIB->createNoop(NewInst);
1015     auto II = BB->addInstruction(std::move(NewInst));
1016     scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1017     InvalidateRequired = true;
1018   }
1019   if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1020     LLVM_DEBUG({
1021       dbgs() << "Now detected critical edges in the following frontier:\n";
1022       for (ProgramPoint &PP : Frontier) {
1023         if (PP.isBB()) {
1024           dbgs() << "  BB: " << PP.getBB()->getName() << "\n";
1025         } else {
1026           dbgs() << "  Inst: ";
1027           PP.getInst()->dump();
1028         }
1029       }
1030     });
1031     splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1032                            CritEdgesTo);
1033     InvalidateRequired = true;
1034   }
1035   if (InvalidateRequired) {
1036     // BitVectors that represent all insns of the function are invalid now
1037     // since we changed BBs/Insts. Re-run steps that depend on pointers being
1038     // valid
1039     Info.invalidateAll();
1040     classifyCSRUses();
1041   }
1042   return Frontier;
1043 }
1044 
validatePushPopsMode(unsigned CSR,MCInst * BestPosSave,int64_t SaveOffset)1045 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1046                                           int64_t SaveOffset) {
1047   if (FA.requiresAlignment(BF)) {
1048     LLVM_DEBUG({
1049       dbgs() << "Reg " << CSR
1050              << " is not using push/pops due to function "
1051                 "alignment requirements.\n";
1052     });
1053     return false;
1054   }
1055   if (FA.hasStackArithmetic(BF)) {
1056     LLVM_DEBUG({
1057       dbgs() << "Reg " << CSR
1058              << " is not using push/pops due to function "
1059                 "taking the address of a stack position.\n";
1060     });
1061     return false;
1062   }
1063   for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1064     if (!SLM.canCollapseRegion(Save)) {
1065       LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1066       return false;
1067     }
1068   }
1069   // Abort if one of the restores for this CSR is not a POP.
1070   for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1071     if (!BC.MIB->isPop(*Load)) {
1072       LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1073       return false;
1074     }
1075   }
1076 
1077   StackPointerTracking &SPT = Info.getStackPointerTracking();
1078   // Abort if we are inserting a push into an entry BB (offset -8) and this
1079   // func sets up a frame pointer.
1080   if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1081       SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1082     LLVM_DEBUG({
1083       dbgs() << "Reg " << CSR
1084              << " cannot insert region or we are "
1085                 "trying to insert a push into entry bb.\n";
1086     });
1087     return false;
1088   }
1089   return true;
1090 }
1091 
fixPopsPlacements(const SmallVector<ProgramPoint,4> & RestorePoints,int64_t SaveOffset,unsigned CSR)1092 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1093     const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1094     unsigned CSR) {
1095   SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1096   // Moving pop locations to the correct sp offset
1097   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1098   StackPointerTracking &SPT = Info.getStackPointerTracking();
1099   for (ProgramPoint &PP : FixedRestorePoints) {
1100     BinaryBasicBlock *BB = Info.getParentBB(PP);
1101     bool Found = false;
1102     if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1103         SaveOffset) {
1104       BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1105       BV &= UsesByReg[CSR];
1106       if (!BV.any()) {
1107         Found = true;
1108         PP = BB;
1109         continue;
1110       }
1111     }
1112     for (MCInst &Inst : llvm::reverse(*BB)) {
1113       if (SPT.getStateBefore(Inst)->first == SaveOffset) {
1114         BitVector BV = *RI.getStateAt(Inst);
1115         BV &= UsesByReg[CSR];
1116         if (!BV.any()) {
1117           Found = true;
1118           PP = &Inst;
1119           break;
1120         }
1121       }
1122     }
1123     if (!Found) {
1124       LLVM_DEBUG({
1125         dbgs() << "Could not find restore insertion point for " << CSR
1126                << ", falling back to load/store mode\n";
1127       });
1128       FixedRestorePoints.clear();
1129       return FixedRestorePoints;
1130     }
1131   }
1132   return FixedRestorePoints;
1133 }
1134 
scheduleOldSaveRestoresRemoval(unsigned CSR,bool UsePushPops)1135 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1136                                                     bool UsePushPops) {
1137 
1138   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1139     std::vector<MCInst *> CFIs;
1140     for (MCInst &Inst : llvm::reverse(*BB)) {
1141       if (BC.MIB->isCFI(Inst)) {
1142         // Delete all offset CFIs related to this CSR
1143         if (SLM.getOffsetCFIReg(Inst) == CSR) {
1144           HasDeletedOffsetCFIs[CSR] = true;
1145           scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1146           continue;
1147         }
1148         CFIs.push_back(&Inst);
1149         continue;
1150       }
1151 
1152       uint16_t SavedReg = CSA.getSavedReg(Inst);
1153       uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1154       if (SavedReg != CSR && RestoredReg != CSR) {
1155         CFIs.clear();
1156         continue;
1157       }
1158 
1159       scheduleChange(&Inst, WorklistItem(UsePushPops
1160                                              ? WorklistItem::Erase
1161                                              : WorklistItem::ChangeToAdjustment,
1162                                          CSR));
1163 
1164       // Delete associated CFIs
1165       const bool RecordDeletedPushCFIs =
1166           SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1167       const bool RecordDeletedPopCFIs =
1168           RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1169       for (MCInst *CFI : CFIs) {
1170         const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1171         // Do not touch these...
1172         if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1173             MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1174           continue;
1175         scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1176         if (RecordDeletedPushCFIs) {
1177           // Do not record this to be replayed later because we are going to
1178           // rebuild it.
1179           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1180             continue;
1181           DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1182         }
1183         if (RecordDeletedPopCFIs) {
1184           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1185             continue;
1186           DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1187         }
1188       }
1189       CFIs.clear();
1190     }
1191   }
1192 }
1193 
doesInstUsesCSR(const MCInst & Inst,uint16_t CSR)1194 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1195   if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1196       CSA.getRestoredReg(Inst) == CSR)
1197     return false;
1198   BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1199   BC.MIB->getTouchedRegs(Inst, BV);
1200   return BV[CSR];
1201 }
1202 
scheduleSaveRestoreInsertions(unsigned CSR,MCInst * BestPosSave,SmallVector<ProgramPoint,4> & RestorePoints,bool UsePushPops)1203 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1204     unsigned CSR, MCInst *BestPosSave,
1205     SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1206   auto &InsnToBB = Info.getInsnToBBMap();
1207   const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1208   const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1209   assert(FIESave && FIELoad && "Invalid CSR");
1210 
1211   LLVM_DEBUG({
1212     dbgs() << "Scheduling save insertion at: ";
1213     BestPosSave->dump();
1214   });
1215 
1216   scheduleChange(BestPosSave,
1217                  UsePushPops ? WorklistItem::InsertPushOrPop
1218                              : WorklistItem::InsertLoadOrStore,
1219                  *FIESave, CSR);
1220 
1221   for (ProgramPoint &PP : RestorePoints) {
1222     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1223     LLVM_DEBUG({
1224       dbgs() << "Scheduling restore insertion at: ";
1225       if (PP.isInst())
1226         PP.getInst()->dump();
1227       else
1228         dbgs() << PP.getBB()->getName() << "\n";
1229     });
1230     MCInst *Term =
1231         FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1232     if (Term)
1233       PP = Term;
1234     bool PrecededByPrefix = false;
1235     if (PP.isInst()) {
1236       auto Iter = FrontierBB->findInstruction(PP.getInst());
1237       if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1238         --Iter;
1239         PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1240       }
1241     }
1242     if (PP.isInst() &&
1243         (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1244       assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1245              "cannot move to end of bb");
1246       scheduleChange(InsnToBB[PP.getInst()],
1247                      UsePushPops ? WorklistItem::InsertPushOrPop
1248                                  : WorklistItem::InsertLoadOrStore,
1249                      *FIELoad, CSR);
1250       continue;
1251     }
1252     scheduleChange(PP,
1253                    UsePushPops ? WorklistItem::InsertPushOrPop
1254                                : WorklistItem::InsertLoadOrStore,
1255                    *FIELoad, CSR);
1256   }
1257 }
1258 
moveSaveRestores()1259 void ShrinkWrapping::moveSaveRestores() {
1260   bool DisablePushPopMode = false;
1261   bool UsedPushPopMode = false;
1262   // Keeps info about successfully moved regs: reg index, save position and
1263   // save size
1264   std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1265   uint64_t TotalEstimatedWin = 0;
1266 
1267   computeDomOrder();
1268   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1269     MCInst *BestPosSave = nullptr;
1270     uint64_t EstimatedWin = 0;
1271     SmallVector<ProgramPoint, 4> RestorePoints;
1272     while (RestorePoints.empty() &&
1273            isBestSavePosCold(I, BestPosSave, EstimatedWin)) {
1274       RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin);
1275       if (RestorePoints.empty()) {
1276         LLVM_DEBUG({
1277           dbgs() << "Dropping opportunity because restore placement failed"
1278                     " -- total est. freq reduc: "
1279                  << EstimatedWin << ". Will try "
1280                  << (BestSaveCount[I].size() - 1) << " more times.\n";
1281         });
1282         BestSaveCount[I].pop_back();
1283         BestSavePos[I].pop_back();
1284         computeDomOrder();
1285       }
1286     }
1287     if (RestorePoints.empty()) {
1288       SpillsFailedDynamicCount += EstimatedWin;
1289       continue;
1290     }
1291 
1292     const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1293     const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1294     (void)FIELoad;
1295     assert(FIESave && FIELoad);
1296     StackPointerTracking &SPT = Info.getStackPointerTracking();
1297     const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1298     int SaveOffset = SPFP.first;
1299     uint8_t SaveSize = FIESave->Size;
1300 
1301     // If we don't know stack state at this point, bail
1302     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1303         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
1304       SpillsFailedDynamicCount += EstimatedWin;
1305       continue;
1306     }
1307 
1308     // Operation mode: if true, will insert push/pops instead of loads/restores
1309     bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1310 
1311     if (UsePushPops) {
1312       SmallVector<ProgramPoint, 4> FixedRestorePoints =
1313           fixPopsPlacements(RestorePoints, SaveOffset, I);
1314       if (FixedRestorePoints.empty())
1315         UsePushPops = false;
1316       else
1317         RestorePoints = FixedRestorePoints;
1318     }
1319 
1320     // Disable push-pop mode for all CSRs in this function
1321     if (!UsePushPops)
1322       DisablePushPopMode = true;
1323     else
1324       UsedPushPopMode = true;
1325 
1326     scheduleOldSaveRestoresRemoval(I, UsePushPops);
1327     scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1328     MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1329     TotalEstimatedWin += EstimatedWin;
1330   }
1331 
1332   // Revert push-pop mode if it failed for a single CSR
1333   if (DisablePushPopMode && UsedPushPopMode) {
1334     UsedPushPopMode = false;
1335     for (BinaryBasicBlock &BB : BF) {
1336       auto WRI = Todo.find(&BB);
1337       if (WRI != Todo.end()) {
1338         std::vector<WorklistItem> &TodoList = WRI->second;
1339         for (WorklistItem &Item : TodoList)
1340           if (Item.Action == WorklistItem::InsertPushOrPop)
1341             Item.Action = WorklistItem::InsertLoadOrStore;
1342       }
1343       for (MCInst &Inst : llvm::reverse(BB)) {
1344         auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1345             Inst, getAnnotationIndex());
1346         if (!TodoList)
1347           continue;
1348         bool isCFI = BC.MIB->isCFI(Inst);
1349         for (WorklistItem &Item : *TodoList) {
1350           if (Item.Action == WorklistItem::InsertPushOrPop)
1351             Item.Action = WorklistItem::InsertLoadOrStore;
1352           if (!isCFI && Item.Action == WorklistItem::Erase)
1353             Item.Action = WorklistItem::ChangeToAdjustment;
1354         }
1355       }
1356     }
1357   }
1358   SpillsMovedDynamicCount += TotalEstimatedWin;
1359 
1360   // Update statistics
1361   if (!UsedPushPopMode) {
1362     SpillsMovedRegularMode += MovedRegs.size();
1363     return;
1364   }
1365 
1366   // Schedule modifications to stack-accessing instructions via
1367   // StackLayoutModifier.
1368   SpillsMovedPushPopMode += MovedRegs.size();
1369   for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1370     unsigned RegNdx;
1371     MCInst *SavePos;
1372     size_t SaveSize;
1373     std::tie(RegNdx, SavePos, SaveSize) = I;
1374     for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1375       SLM.collapseRegion(Save);
1376     SLM.insertRegion(SavePos, SaveSize);
1377   }
1378 }
1379 
1380 namespace {
1381 /// Helper function to identify whether two basic blocks created by splitting
1382 /// a critical edge have the same contents.
isIdenticalSplitEdgeBB(const BinaryContext & BC,const BinaryBasicBlock & A,const BinaryBasicBlock & B)1383 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1384                             const BinaryBasicBlock &B) {
1385   if (A.succ_size() != B.succ_size())
1386     return false;
1387   if (A.succ_size() != 1)
1388     return false;
1389 
1390   if (*A.succ_begin() != *B.succ_begin())
1391     return false;
1392 
1393   if (A.size() != B.size())
1394     return false;
1395 
1396   // Compare instructions
1397   auto I = A.begin(), E = A.end();
1398   auto OtherI = B.begin(), OtherE = B.end();
1399   while (I != E && OtherI != OtherE) {
1400     if (I->getOpcode() != OtherI->getOpcode())
1401       return false;
1402     if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1403           return true;
1404         }))
1405       return false;
1406     ++I;
1407     ++OtherI;
1408   }
1409   return true;
1410 }
1411 } // namespace
1412 
foldIdenticalSplitEdges()1413 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1414   bool Changed = false;
1415   for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1416     BinaryBasicBlock &BB = *Iter;
1417     if (!BB.getName().starts_with(".LSplitEdge"))
1418       continue;
1419     for (BinaryBasicBlock &RBB : llvm::reverse(BF)) {
1420       if (&RBB == &BB)
1421         break;
1422       if (!RBB.getName().starts_with(".LSplitEdge") || !RBB.isValid() ||
1423           !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1424         continue;
1425       assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1426       BinaryBasicBlock *Pred = *RBB.pred_begin();
1427       uint64_t OrigCount = Pred->branch_info_begin()->Count;
1428       uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1429       BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1430       Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1431       Changed = true;
1432       // Remove the block from CFG
1433       RBB.markValid(false);
1434     }
1435   }
1436 
1437   return Changed;
1438 }
1439 
1440 namespace {
1441 
1442 // A special StackPointerTracking that compensates for our future plans
1443 // in removing/adding insn.
1444 class PredictiveStackPointerTracking
1445     : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1446   friend class DataflowAnalysis<PredictiveStackPointerTracking,
1447                                 std::pair<int, int>>;
1448   decltype(ShrinkWrapping::Todo) &TodoMap;
1449   DataflowInfoManager &Info;
1450 
1451   std::optional<unsigned> AnnotationIndex;
1452 
1453 protected:
compNextAux(const MCInst & Point,const std::vector<ShrinkWrapping::WorklistItem> & TodoItems,std::pair<int,int> & Res)1454   void compNextAux(const MCInst &Point,
1455                    const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1456                    std::pair<int, int> &Res) {
1457     for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1458       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1459           BC.MIB->isPush(Point)) {
1460         Res.first += BC.MIB->getPushSize(Point);
1461         continue;
1462       }
1463       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1464           BC.MIB->isPop(Point)) {
1465         Res.first -= BC.MIB->getPopSize(Point);
1466         continue;
1467       }
1468       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1469           Item.FIEToInsert.IsStore) {
1470         Res.first -= Item.FIEToInsert.Size;
1471         continue;
1472       }
1473       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1474           Item.FIEToInsert.IsLoad) {
1475         Res.first += Item.FIEToInsert.Size;
1476         continue;
1477       }
1478     }
1479   }
1480 
computeNext(const MCInst & Point,const std::pair<int,int> & Cur)1481   std::pair<int, int> computeNext(const MCInst &Point,
1482                                   const std::pair<int, int> &Cur) {
1483     std::pair<int, int> Res =
1484         StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1485             Point, Cur);
1486     if (Res.first == StackPointerTracking::SUPERPOSITION ||
1487         Res.first == StackPointerTracking::EMPTY)
1488       return Res;
1489     auto TodoItems =
1490         BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1491             Point, ShrinkWrapping::getAnnotationName());
1492     if (TodoItems)
1493       compNextAux(Point, *TodoItems, Res);
1494     auto &InsnToBBMap = Info.getInsnToBBMap();
1495     if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1496       return Res;
1497     auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1498     if (WRI == TodoMap.end())
1499       return Res;
1500     compNextAux(Point, WRI->second, Res);
1501     return Res;
1502   }
1503 
getAnnotationName() const1504   StringRef getAnnotationName() const {
1505     return StringRef("PredictiveStackPointerTracking");
1506   }
1507 
1508 public:
PredictiveStackPointerTracking(BinaryFunction & BF,decltype(ShrinkWrapping::Todo) & TodoMap,DataflowInfoManager & Info,MCPlusBuilder::AllocatorIdTy AllocatorId=0)1509   PredictiveStackPointerTracking(BinaryFunction &BF,
1510                                  decltype(ShrinkWrapping::Todo) &TodoMap,
1511                                  DataflowInfoManager &Info,
1512                                  MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1513       : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1514                                                                  AllocatorId),
1515         TodoMap(TodoMap), Info(Info) {}
1516 
run()1517   void run() {
1518     StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1519   }
1520 };
1521 
1522 } // end anonymous namespace
1523 
insertUpdatedCFI(unsigned CSR,int SPValPush,int SPValPop)1524 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1525                                       int SPValPop) {
1526   MCInst *SavePoint = nullptr;
1527   for (BinaryBasicBlock &BB : BF) {
1528     for (MCInst &Inst : llvm::reverse(BB)) {
1529       int32_t SrcImm = 0;
1530       MCPhysReg Reg = 0;
1531       MCPhysReg StackPtrReg = 0;
1532       int64_t StackOffset = 0;
1533       bool IsIndexed = false;
1534       bool IsLoad = false;
1535       bool IsStore = false;
1536       bool IsSimple = false;
1537       bool IsStoreFromReg = false;
1538       uint8_t Size = 0;
1539       if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg,
1540                                  SrcImm, StackPtrReg, StackOffset, Size,
1541                                  IsSimple, IsIndexed))
1542         continue;
1543       if (Reg != CSR || !IsStore || !IsSimple)
1544         continue;
1545       SavePoint = &Inst;
1546       break;
1547     }
1548     if (SavePoint)
1549       break;
1550   }
1551   assert(SavePoint);
1552   LLVM_DEBUG({
1553     dbgs() << "Now using as save point for reg " << CSR << " :";
1554     SavePoint->dump();
1555   });
1556   bool PrevAffectedZone = false;
1557   BinaryBasicBlock *PrevBB = nullptr;
1558   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1559   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1560     if (BB->size() == 0)
1561       continue;
1562     const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1563     const bool InAffectedZoneAtBegin =
1564         (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1565     bool InAffectedZone = InAffectedZoneAtBegin;
1566     for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1567       const bool CurZone = DA.count(*InstIter, *SavePoint);
1568       if (InAffectedZone != CurZone) {
1569         auto InsertionIter = InstIter;
1570         ++InsertionIter;
1571         InAffectedZone = CurZone;
1572         if (InAffectedZone)
1573           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1574                                             SPValPop);
1575         else
1576           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1577                                             SPValPush);
1578         --InstIter;
1579       }
1580     }
1581     // Are we at the first basic block or hot-cold split point?
1582     if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1583       if (InAffectedZoneAtBegin)
1584         insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1585     } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1586       if (InAffectedZoneAtBegin)
1587         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1588       else
1589         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1590     }
1591     PrevAffectedZone = InAffectedZoneAtEnd;
1592     PrevBB = BB;
1593   }
1594 }
1595 
rebuildCFIForSP()1596 void ShrinkWrapping::rebuildCFIForSP() {
1597   for (BinaryBasicBlock &BB : BF) {
1598     for (MCInst &Inst : BB) {
1599       if (!BC.MIB->isCFI(Inst))
1600         continue;
1601       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1602       if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1603         BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1604     }
1605   }
1606 
1607   int PrevSPVal = -8;
1608   BinaryBasicBlock *PrevBB = nullptr;
1609   StackPointerTracking &SPT = Info.getStackPointerTracking();
1610   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1611     if (BB->size() == 0)
1612       continue;
1613     const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1614     const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1615     int SPVal = SPValAtBegin;
1616     for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1617       const int CurVal = SPT.getStateAt(*Iter)->first;
1618       if (SPVal != CurVal) {
1619         auto InsertionIter = Iter;
1620         ++InsertionIter;
1621         Iter = BF.addCFIInstruction(
1622             BB, InsertionIter,
1623             MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1624         SPVal = CurVal;
1625       }
1626     }
1627     if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1628       BF.addCFIInstruction(
1629           BB, BB->begin(),
1630           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1631     else if (SPValAtBegin != PrevSPVal)
1632       BF.addCFIInstruction(
1633           PrevBB, PrevBB->end(),
1634           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1635     PrevSPVal = SPValAtEnd;
1636     PrevBB = BB;
1637   }
1638 
1639   for (BinaryBasicBlock &BB : BF)
1640     for (auto I = BB.begin(); I != BB.end();)
1641       if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1642         I = BB.eraseInstruction(I);
1643       else
1644         ++I;
1645 }
1646 
createStackAccess(int SPVal,int FPVal,const FrameIndexEntry & FIE,bool CreatePushOrPop)1647 Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1648                                                    const FrameIndexEntry &FIE,
1649                                                    bool CreatePushOrPop) {
1650   MCInst NewInst;
1651   if (SPVal != StackPointerTracking::SUPERPOSITION &&
1652       SPVal != StackPointerTracking::EMPTY) {
1653     if (FIE.IsLoad) {
1654       BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1655                                      FIE.StackOffset - SPVal, FIE.RegOrImm,
1656                                      FIE.Size);
1657     } else {
1658       BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1659                                 FIE.StackOffset - SPVal, FIE.RegOrImm,
1660                                 FIE.Size);
1661     }
1662     if (CreatePushOrPop)
1663       BC.MIB->changeToPushOrPop(NewInst);
1664     return NewInst;
1665   }
1666   assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1667          FPVal != StackPointerTracking::EMPTY);
1668 
1669   if (FIE.IsLoad) {
1670     BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1671                                    FIE.StackOffset - FPVal, FIE.RegOrImm,
1672                                    FIE.Size);
1673   } else {
1674     BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1675                               FIE.StackOffset - FPVal, FIE.RegOrImm, FIE.Size);
1676   }
1677   return NewInst;
1678 }
1679 
updateCFIInstOffset(MCInst & Inst,int64_t NewOffset)1680 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1681   const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1682   if (UpdatedCFIs.count(CFI))
1683     return;
1684 
1685   switch (CFI->getOperation()) {
1686   case MCCFIInstruction::OpDefCfa:
1687   case MCCFIInstruction::OpDefCfaRegister:
1688   case MCCFIInstruction::OpDefCfaOffset:
1689     CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1690     break;
1691   case MCCFIInstruction::OpOffset:
1692   default:
1693     break;
1694   }
1695 
1696   UpdatedCFIs.insert(CFI);
1697 }
1698 
insertCFIsForPushOrPop(BinaryBasicBlock & BB,BBIterTy Pos,unsigned Reg,bool isPush,int Sz,int64_t NewOffset)1699 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1700                                                 BBIterTy Pos, unsigned Reg,
1701                                                 bool isPush, int Sz,
1702                                                 int64_t NewOffset) {
1703   if (isPush) {
1704     for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1705       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1706       updateCFIInstOffset(*Pos++, NewOffset);
1707     }
1708     if (HasDeletedOffsetCFIs[Reg]) {
1709       Pos = BF.addCFIInstruction(
1710           &BB, Pos,
1711           MCCFIInstruction::createOffset(
1712               nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1713       ++Pos;
1714     }
1715   } else {
1716     for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1717       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1718       updateCFIInstOffset(*Pos++, NewOffset);
1719     }
1720     if (HasDeletedOffsetCFIs[Reg]) {
1721       Pos = BF.addCFIInstruction(
1722           &BB, Pos,
1723           MCCFIInstruction::createSameValue(
1724               nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1725       ++Pos;
1726     }
1727   }
1728   return Pos;
1729 }
1730 
processInsertion(BBIterTy InsertionPoint,BinaryBasicBlock * CurBB,const WorklistItem & Item,int64_t SPVal,int64_t FPVal)1731 Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1732                                                     BinaryBasicBlock *CurBB,
1733                                                     const WorklistItem &Item,
1734                                                     int64_t SPVal,
1735                                                     int64_t FPVal) {
1736   // Trigger CFI reconstruction for this CSR if necessary - writing to
1737   // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1738   if ((Item.FIEToInsert.IsStore &&
1739        !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1740       (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1741       HasDeletedOffsetCFIs[Item.AffectedReg]) {
1742     if (Item.Action == WorklistItem::InsertPushOrPop) {
1743       if (Item.FIEToInsert.IsStore)
1744         PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1745       else
1746         PopOffsetByReg[Item.AffectedReg] = SPVal;
1747     } else {
1748       if (Item.FIEToInsert.IsStore)
1749         PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1750       else
1751         PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1752     }
1753   }
1754 
1755   LLVM_DEBUG({
1756     dbgs() << "Creating stack access with SPVal = " << SPVal
1757            << "; stack offset = " << Item.FIEToInsert.StackOffset
1758            << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1759            << "\n";
1760   });
1761   Expected<MCInst> NewInstOrErr =
1762       createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1763                         Item.Action == WorklistItem::InsertPushOrPop);
1764   if (auto E = NewInstOrErr.takeError())
1765     return Error(std::move(E));
1766   MCInst &NewInst = *NewInstOrErr;
1767   if (InsertionPoint != CurBB->end()) {
1768     LLVM_DEBUG({
1769       dbgs() << "Adding before Inst: ";
1770       InsertionPoint->dump();
1771       dbgs() << "the following inst: ";
1772       NewInst.dump();
1773     });
1774     BBIterTy Iter =
1775         CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1776     return ++Iter;
1777   }
1778   CurBB->addInstruction(std::move(NewInst));
1779   LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1780   return CurBB->end();
1781 }
1782 
processInsertionsList(BBIterTy InsertionPoint,BinaryBasicBlock * CurBB,std::vector<WorklistItem> & TodoList,int64_t SPVal,int64_t FPVal)1783 Expected<BBIterTy> ShrinkWrapping::processInsertionsList(
1784     BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1785     std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1786   bool HasInsertions = llvm::any_of(TodoList, [&](WorklistItem &Item) {
1787     return Item.Action == WorklistItem::InsertLoadOrStore ||
1788            Item.Action == WorklistItem::InsertPushOrPop;
1789   });
1790 
1791   if (!HasInsertions)
1792     return InsertionPoint;
1793 
1794   assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1795            SPVal != StackPointerTracking::EMPTY) ||
1796           (FPVal != StackPointerTracking::SUPERPOSITION &&
1797            FPVal != StackPointerTracking::EMPTY)) &&
1798          "Cannot insert if we have no idea of the stack state here");
1799 
1800   // Revert the effect of PSPT for this location, we want SP Value before
1801   // insertions
1802   if (InsertionPoint == CurBB->end()) {
1803     for (WorklistItem &Item : TodoList) {
1804       if (Item.Action != WorklistItem::InsertPushOrPop)
1805         continue;
1806       if (Item.FIEToInsert.IsStore)
1807         SPVal += Item.FIEToInsert.Size;
1808       if (Item.FIEToInsert.IsLoad)
1809         SPVal -= Item.FIEToInsert.Size;
1810     }
1811   }
1812 
1813   // Reorder POPs to obey the correct dominance relation between them
1814   llvm::stable_sort(TodoList, [&](const WorklistItem &A,
1815                                   const WorklistItem &B) {
1816     if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1817         (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1818       return false;
1819     if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1820       return true;
1821     if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1822       return false;
1823     return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1824   });
1825 
1826   // Process insertions
1827   for (WorklistItem &Item : TodoList) {
1828     if (Item.Action == WorklistItem::Erase ||
1829         Item.Action == WorklistItem::ChangeToAdjustment)
1830       continue;
1831 
1832     auto InsertionPointOrErr =
1833         processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1834     if (auto E = InsertionPointOrErr.takeError())
1835       return Error(std::move(E));
1836     InsertionPoint = *InsertionPointOrErr;
1837     if (Item.Action == WorklistItem::InsertPushOrPop &&
1838         Item.FIEToInsert.IsStore)
1839       SPVal -= Item.FIEToInsert.Size;
1840     if (Item.Action == WorklistItem::InsertPushOrPop &&
1841         Item.FIEToInsert.IsLoad)
1842       SPVal += Item.FIEToInsert.Size;
1843   }
1844   return InsertionPoint;
1845 }
1846 
processInsertions()1847 Expected<bool> ShrinkWrapping::processInsertions() {
1848   PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1849   PSPT.run();
1850 
1851   bool Changes = false;
1852   for (BinaryBasicBlock &BB : BF) {
1853     // Process insertions before some inst.
1854     for (auto I = BB.begin(); I != BB.end(); ++I) {
1855       MCInst &Inst = *I;
1856       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1857           Inst, getAnnotationIndex());
1858       if (!TodoList)
1859         continue;
1860       Changes = true;
1861       std::vector<WorklistItem> List = *TodoList;
1862       LLVM_DEBUG({
1863         dbgs() << "Now processing insertions in " << BB.getName()
1864                << " before inst: ";
1865         Inst.dump();
1866       });
1867       auto Iter = I;
1868       std::pair<int, int> SPTState =
1869           *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1870       auto IterOrErr =
1871           processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1872       if (auto E = IterOrErr.takeError())
1873         return Error(std::move(E));
1874       I = *IterOrErr;
1875     }
1876     // Process insertions at the end of bb
1877     auto WRI = Todo.find(&BB);
1878     if (WRI != Todo.end()) {
1879       std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1880       if (auto E = processInsertionsList(BB.end(), &BB, WRI->second,
1881                                          SPTState.first, SPTState.second)
1882                        .takeError())
1883         return Error(std::move(E));
1884       Changes = true;
1885     }
1886   }
1887   return Changes;
1888 }
1889 
processDeletions()1890 void ShrinkWrapping::processDeletions() {
1891   LivenessAnalysis &LA = Info.getLivenessAnalysis();
1892   for (BinaryBasicBlock &BB : BF) {
1893     for (auto II = BB.begin(); II != BB.end(); ++II) {
1894       MCInst &Inst = *II;
1895       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1896           Inst, getAnnotationIndex());
1897       if (!TodoList)
1898         continue;
1899       // Process all deletions
1900       for (WorklistItem &Item : *TodoList) {
1901         if (Item.Action != WorklistItem::Erase &&
1902             Item.Action != WorklistItem::ChangeToAdjustment)
1903           continue;
1904 
1905         if (Item.Action == WorklistItem::ChangeToAdjustment) {
1906           // Is flag reg alive across this func?
1907           bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1908           if (int Sz = BC.MIB->getPushSize(Inst)) {
1909             BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1910             continue;
1911           }
1912           if (int Sz = BC.MIB->getPopSize(Inst)) {
1913             BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1914             continue;
1915           }
1916         }
1917 
1918         LLVM_DEBUG({
1919           dbgs() << "Erasing: ";
1920           BC.printInstruction(dbgs(), Inst);
1921         });
1922         II = std::prev(BB.eraseInstruction(II));
1923         break;
1924       }
1925     }
1926   }
1927 }
1928 
rebuildCFI()1929 void ShrinkWrapping::rebuildCFI() {
1930   const bool FP = Info.getStackPointerTracking().HasFramePointer;
1931   Info.invalidateAll();
1932   if (!FP) {
1933     rebuildCFIForSP();
1934     Info.invalidateAll();
1935   }
1936   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1937     if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1938       continue;
1939     const int64_t SPValPush = PushOffsetByReg[I];
1940     const int64_t SPValPop = PopOffsetByReg[I];
1941     insertUpdatedCFI(I, SPValPush, SPValPop);
1942     Info.invalidateAll();
1943   }
1944 }
1945 
perform(bool HotOnly)1946 Expected<bool> ShrinkWrapping::perform(bool HotOnly) {
1947   HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1948   PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1949   PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1950 
1951   // Update pass statistics
1952   uint64_t TotalInstrs = 0ULL;
1953   uint64_t TotalStoreInstrs = 0ULL;
1954   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1955     uint64_t BBExecCount = BB->getExecutionCount();
1956     if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
1957       continue;
1958     for (const auto &Instr : *BB) {
1959       if (BC.MIB->isPseudo(Instr))
1960         continue;
1961       if (BC.MIB->mayStore(Instr))
1962         TotalStoreInstrs += BBExecCount;
1963       TotalInstrs += BBExecCount;
1964     }
1965   }
1966   InstrDynamicCount += TotalInstrs;
1967   StoreDynamicCount += TotalStoreInstrs;
1968 
1969   if (!FA.hasFrameInfo(BF))
1970     return false;
1971 
1972   if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
1973     return false;
1974 
1975   if (opts::EqualizeBBCounts)
1976     equalizeBBCounts(Info, BF);
1977 
1978   if (BF.checkForAmbiguousJumpTables()) {
1979     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1980                       << ".\n");
1981     // We could call disambiguateJumpTables here, but it is probably not worth
1982     // the cost (of duplicating potentially large jump tables that could regress
1983     // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1984     // written in assembly language. Just bail.
1985     return false;
1986   }
1987   SLM.initialize();
1988   CSA.compute();
1989   classifyCSRUses();
1990   pruneUnwantedCSRs();
1991   computeSaveLocations();
1992   moveSaveRestores();
1993   LLVM_DEBUG({
1994     dbgs() << "Func before shrink-wrapping: \n";
1995     BF.dump();
1996   });
1997   SLM.performChanges();
1998   // Early exit if processInsertions doesn't detect any todo items
1999   auto ModifiedOrErr = processInsertions();
2000   if (auto E = ModifiedOrErr.takeError())
2001     return Error(std::move(E));
2002   const bool Modified = *ModifiedOrErr;
2003   if (!Modified)
2004     return false;
2005   processDeletions();
2006   if (foldIdenticalSplitEdges()) {
2007     const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
2008     (void)Stats;
2009     LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
2010                       << " redundant split edge BBs (" << Stats.second
2011                       << " bytes) for " << BF.getPrintName() << "\n");
2012   }
2013   rebuildCFI();
2014   // We may have split edges, creating BBs that need correct branching
2015   BF.fixBranches();
2016   LLVM_DEBUG({
2017     dbgs() << "Func after shrink-wrapping: \n";
2018     BF.dump();
2019   });
2020   return true;
2021 }
2022 
printStats(BinaryContext & BC)2023 void ShrinkWrapping::printStats(BinaryContext &BC) {
2024   BC.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2025             << " spills inserting load/stores and " << SpillsMovedPushPopMode
2026             << " spills inserting push/pops\n";
2027   if (!InstrDynamicCount || !StoreDynamicCount)
2028     return;
2029   BC.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2030             << " store executions ("
2031             << format("%.1lf%%",
2032                       (100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
2033             << " total instructions executed, "
2034             << format("%.1lf%%",
2035                       (100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
2036             << " store instructions)\n";
2037   BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2038             << SpillsFailedDynamicCount << " store executions ("
2039             << format("%.1lf%%",
2040                       (100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
2041             << " total instructions executed, "
2042             << format("%.1lf%%",
2043                       (100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
2044             << " store instructions)\n";
2045 }
2046 
2047 // Operators necessary as a result of using MCAnnotation
operator <<(raw_ostream & OS,const std::vector<ShrinkWrapping::WorklistItem> & Vec)2048 raw_ostream &operator<<(raw_ostream &OS,
2049                         const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2050   OS << "SWTodo[";
2051   const char *Sep = "";
2052   for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2053     OS << Sep;
2054     switch (Item.Action) {
2055     case ShrinkWrapping::WorklistItem::Erase:
2056       OS << "Erase";
2057       break;
2058     case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2059       OS << "ChangeToAdjustment";
2060       break;
2061     case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2062       OS << "InsertLoadOrStore";
2063       break;
2064     case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2065       OS << "InsertPushOrPop";
2066       break;
2067     }
2068     Sep = ", ";
2069   }
2070   OS << "]";
2071   return OS;
2072 }
2073 
2074 raw_ostream &
operator <<(raw_ostream & OS,const std::vector<StackLayoutModifier::WorklistItem> & Vec)2075 operator<<(raw_ostream &OS,
2076            const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2077   OS << "SLMTodo[";
2078   const char *Sep = "";
2079   for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2080     OS << Sep;
2081     switch (Item.Action) {
2082     case StackLayoutModifier::WorklistItem::None:
2083       OS << "None";
2084       break;
2085     case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2086       OS << "AdjustLoadStoreOffset";
2087       break;
2088     case StackLayoutModifier::WorklistItem::AdjustCFI:
2089       OS << "AdjustCFI";
2090       break;
2091     }
2092     Sep = ", ";
2093   }
2094   OS << "]";
2095   return OS;
2096 }
2097 
operator ==(const ShrinkWrapping::WorklistItem & A,const ShrinkWrapping::WorklistItem & B)2098 bool operator==(const ShrinkWrapping::WorklistItem &A,
2099                 const ShrinkWrapping::WorklistItem &B) {
2100   return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2101           A.Adjustment == B.Adjustment &&
2102           A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2103           A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2104           A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2105           A.FIEToInsert.Size == B.FIEToInsert.Size &&
2106           A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2107           A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2108 }
2109 
operator ==(const StackLayoutModifier::WorklistItem & A,const StackLayoutModifier::WorklistItem & B)2110 bool operator==(const StackLayoutModifier::WorklistItem &A,
2111                 const StackLayoutModifier::WorklistItem &B) {
2112   return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2113 }
2114 
2115 } // end namespace bolt
2116 } // end namespace llvm
2117