12f09f445SMaksim Panchenko //===- bolt/Passes/FrameOptimizer.cpp -------------------------------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // This file implements the FrameOptimizerPass class. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Passes/FrameOptimizer.h" 14a34c753fSRafael Auler #include "bolt/Core/ParallelUtilities.h" 15a34c753fSRafael Auler #include "bolt/Passes/BinaryFunctionCallGraph.h" 16a34c753fSRafael Auler #include "bolt/Passes/DataflowInfoManager.h" 17a34c753fSRafael Auler #include "bolt/Passes/ShrinkWrapping.h" 18a34c753fSRafael Auler #include "bolt/Passes/StackAvailableExpressions.h" 19a34c753fSRafael Auler #include "bolt/Passes/StackReachingUses.h" 20a34c753fSRafael Auler #include "llvm/Support/Timer.h" 21a34c753fSRafael Auler #include <deque> 22a34c753fSRafael Auler #include <unordered_map> 23a34c753fSRafael Auler 24a34c753fSRafael Auler #define DEBUG_TYPE "fop" 25a34c753fSRafael Auler 26a34c753fSRafael Auler using namespace llvm; 27a34c753fSRafael Auler 28a34c753fSRafael Auler namespace opts { 29a34c753fSRafael Auler extern cl::opt<unsigned> Verbosity; 30a34c753fSRafael Auler extern cl::opt<bool> TimeOpts; 31a34c753fSRafael Auler extern cl::OptionCategory BoltOptCategory; 32a34c753fSRafael Auler 33a34c753fSRafael Auler using namespace bolt; 34a34c753fSRafael Auler 35a34c753fSRafael Auler cl::opt<FrameOptimizationType> 36a34c753fSRafael Auler FrameOptimization("frame-opt", 37a34c753fSRafael Auler cl::init(FOP_NONE), 38a34c753fSRafael Auler cl::desc("optimize stack frame accesses"), 39a34c753fSRafael Auler cl::values( 40a34c753fSRafael Auler clEnumValN(FOP_NONE, "none", "do not perform frame optimization"), 41a34c753fSRafael Auler clEnumValN(FOP_HOT, "hot", "perform FOP on hot functions"), 42a34c753fSRafael Auler clEnumValN(FOP_ALL, "all", "perform FOP on all functions")), 43a34c753fSRafael Auler cl::ZeroOrMore, 44a34c753fSRafael Auler cl::cat(BoltOptCategory)); 45a34c753fSRafael Auler 46b92436efSFangrui Song cl::opt<bool> RemoveStores( 47b92436efSFangrui Song "frame-opt-rm-stores", cl::init(FOP_NONE), 48a34c753fSRafael Auler cl::desc("apply additional analysis to remove stores (experimental)"), 49a34c753fSRafael Auler cl::cat(BoltOptCategory)); 50a34c753fSRafael Auler 51a34c753fSRafael Auler } // namespace opts 52a34c753fSRafael Auler 53a34c753fSRafael Auler namespace llvm { 54a34c753fSRafael Auler namespace bolt { 55a34c753fSRafael Auler 56a34c753fSRafael Auler void FrameOptimizerPass::removeUnnecessaryLoads(const RegAnalysis &RA, 57a34c753fSRafael Auler const FrameAnalysis &FA, 58a34c753fSRafael Auler BinaryFunction &BF) { 5960b09997SMaksim Panchenko StackAvailableExpressions SAE(RA, FA, BF); 60a34c753fSRafael Auler SAE.run(); 61a34c753fSRafael Auler 62a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Performing unnecessary loads removal\n"); 63a34c753fSRafael Auler std::deque<std::pair<BinaryBasicBlock *, MCInst *>> ToErase; 64a34c753fSRafael Auler bool Changed = false; 65a34c753fSRafael Auler const auto ExprEnd = SAE.expr_end(); 6660b09997SMaksim Panchenko MCPlusBuilder *MIB = BF.getBinaryContext().MIB.get(); 67a34c753fSRafael Auler for (BinaryBasicBlock &BB : BF) { 68a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 69a34c753fSRafael Auler const MCInst *Prev = nullptr; 70a34c753fSRafael Auler for (MCInst &Inst : BB) { 71a34c753fSRafael Auler LLVM_DEBUG({ 72a34c753fSRafael Auler dbgs() << "\t\tNow at "; 73a34c753fSRafael Auler Inst.dump(); 74a34c753fSRafael Auler for (auto I = Prev ? SAE.expr_begin(*Prev) : SAE.expr_begin(BB); 75a34c753fSRafael Auler I != ExprEnd; ++I) { 76a34c753fSRafael Auler dbgs() << "\t\t\tReached by: "; 77a34c753fSRafael Auler (*I)->dump(); 78a34c753fSRafael Auler } 79a34c753fSRafael Auler }); 80a34c753fSRafael Auler // if Inst is a load from stack and the current available expressions show 81a34c753fSRafael Auler // this value is available in a register or immediate, replace this load 82a34c753fSRafael Auler // with move from register or from immediate. 83a34c753fSRafael Auler ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); 84a34c753fSRafael Auler if (!FIEX) { 85a34c753fSRafael Auler Prev = &Inst; 86a34c753fSRafael Auler continue; 87a34c753fSRafael Auler } 88a34c753fSRafael Auler // FIXME: Change to remove IsSimple == 0. We're being conservative here, 89a34c753fSRafael Auler // but once replaceMemOperandWithReg is ready, we should feed it with all 90a34c753fSRafael Auler // sorts of complex instructions. 91a34c753fSRafael Auler if (FIEX->IsLoad == false || FIEX->IsSimple == false || 92a34c753fSRafael Auler FIEX->StackOffset >= 0) { 93a34c753fSRafael Auler Prev = &Inst; 94a34c753fSRafael Auler continue; 95a34c753fSRafael Auler } 96a34c753fSRafael Auler 97a34c753fSRafael Auler for (auto I = Prev ? SAE.expr_begin(*Prev) : SAE.expr_begin(BB); 98a34c753fSRafael Auler I != ExprEnd; ++I) { 99a34c753fSRafael Auler const MCInst *AvailableInst = *I; 100a34c753fSRafael Auler ErrorOr<const FrameIndexEntry &> FIEY = FA.getFIEFor(*AvailableInst); 101a34c753fSRafael Auler if (!FIEY) 102a34c753fSRafael Auler continue; 103a34c753fSRafael Auler assert(FIEY->IsStore && FIEY->IsSimple); 104a34c753fSRafael Auler if (FIEX->StackOffset != FIEY->StackOffset || FIEX->Size != FIEY->Size) 105a34c753fSRafael Auler continue; 106a34c753fSRafael Auler // TODO: Change push/pops to stack adjustment instruction 10760b09997SMaksim Panchenko if (MIB->isPop(Inst)) 108a34c753fSRafael Auler continue; 109a34c753fSRafael Auler 110a34c753fSRafael Auler ++NumRedundantLoads; 111*42465efdSRafael Auler FreqRedundantLoads += BB.getKnownExecutionCount(); 112a34c753fSRafael Auler Changed = true; 113a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Redundant load instruction: "); 114a34c753fSRafael Auler LLVM_DEBUG(Inst.dump()); 115a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Related store instruction: "); 116a34c753fSRafael Auler LLVM_DEBUG(AvailableInst->dump()); 117a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "@BB: " << BB.getName() << "\n"); 118a34c753fSRafael Auler // Replace load 119a34c753fSRafael Auler if (FIEY->IsStoreFromReg) { 12060b09997SMaksim Panchenko if (!MIB->replaceMemOperandWithReg(Inst, FIEY->RegOrImm)) { 121a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "FAILED to change operand to a reg\n"); 122a34c753fSRafael Auler break; 123a34c753fSRafael Auler } 124*42465efdSRafael Auler FreqLoadsChangedToReg += BB.getKnownExecutionCount(); 12560b09997SMaksim Panchenko MIB->removeAnnotation(Inst, "FrameAccessEntry"); 126a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Changed operand to a reg\n"); 12760b09997SMaksim Panchenko if (MIB->isRedundantMove(Inst)) { 128a34c753fSRafael Auler ++NumLoadsDeleted; 129*42465efdSRafael Auler FreqLoadsDeleted += BB.getKnownExecutionCount(); 130a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Created a redundant move\n"); 131a34c753fSRafael Auler // Delete it! 132a34c753fSRafael Auler ToErase.push_front(std::make_pair(&BB, &Inst)); 133a34c753fSRafael Auler } 134a34c753fSRafael Auler } else { 135a34c753fSRafael Auler char Buf[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 136a34c753fSRafael Auler support::ulittle64_t::ref(Buf + 0) = FIEY->RegOrImm; 137a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Changing operand to an imm... "); 13860b09997SMaksim Panchenko if (!MIB->replaceMemOperandWithImm(Inst, StringRef(Buf, 8), 0)) { 139a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "FAILED\n"); 140a34c753fSRafael Auler } else { 141*42465efdSRafael Auler FreqLoadsChangedToImm += BB.getKnownExecutionCount(); 14260b09997SMaksim Panchenko MIB->removeAnnotation(Inst, "FrameAccessEntry"); 143a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Ok\n"); 144a34c753fSRafael Auler } 145a34c753fSRafael Auler } 146a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Changed to: "); 147a34c753fSRafael Auler LLVM_DEBUG(Inst.dump()); 148a34c753fSRafael Auler break; 149a34c753fSRafael Auler } 150a34c753fSRafael Auler Prev = &Inst; 151a34c753fSRafael Auler } 152a34c753fSRafael Auler } 153f92ab6afSAmir Ayupov if (Changed) 154a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "FOP modified \"" << BF.getPrintName() << "\"\n"); 155f92ab6afSAmir Ayupov 156a34c753fSRafael Auler // TODO: Implement an interface of eraseInstruction that works out the 157a34c753fSRafael Auler // complete list of elements to remove. 158f92ab6afSAmir Ayupov for (std::pair<BinaryBasicBlock *, MCInst *> I : ToErase) 159a34c753fSRafael Auler I.first->eraseInstruction(I.first->findInstruction(I.second)); 160a34c753fSRafael Auler } 161a34c753fSRafael Auler 162a34c753fSRafael Auler void FrameOptimizerPass::removeUnusedStores(const FrameAnalysis &FA, 163a34c753fSRafael Auler BinaryFunction &BF) { 16460b09997SMaksim Panchenko StackReachingUses SRU(FA, BF); 165a34c753fSRafael Auler SRU.run(); 166a34c753fSRafael Auler 167a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Performing unused stores removal\n"); 168a34c753fSRafael Auler std::vector<std::pair<BinaryBasicBlock *, MCInst *>> ToErase; 169a34c753fSRafael Auler bool Changed = false; 170a34c753fSRafael Auler for (BinaryBasicBlock &BB : BF) { 171a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 172a34c753fSRafael Auler const MCInst *Prev = nullptr; 173a34c753fSRafael Auler for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 174a34c753fSRafael Auler MCInst &Inst = *I; 175a34c753fSRafael Auler LLVM_DEBUG({ 176a34c753fSRafael Auler dbgs() << "\t\tNow at "; 177a34c753fSRafael Auler Inst.dump(); 178a34c753fSRafael Auler for (auto I = Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB); 179a34c753fSRafael Auler I != SRU.expr_end(); ++I) { 180a34c753fSRafael Auler dbgs() << "\t\t\tReached by: "; 181a34c753fSRafael Auler (*I)->dump(); 182a34c753fSRafael Auler } 183a34c753fSRafael Auler }); 184a34c753fSRafael Auler ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); 185a34c753fSRafael Auler if (!FIEX) { 186a34c753fSRafael Auler Prev = &Inst; 187a34c753fSRafael Auler continue; 188a34c753fSRafael Auler } 189a34c753fSRafael Auler if (FIEX->IsLoad || !FIEX->IsSimple || FIEX->StackOffset >= 0) { 190a34c753fSRafael Auler Prev = &Inst; 191a34c753fSRafael Auler continue; 192a34c753fSRafael Auler } 193a34c753fSRafael Auler 194a34c753fSRafael Auler if (SRU.isStoreUsed(*FIEX, 195a34c753fSRafael Auler Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB))) { 196a34c753fSRafael Auler Prev = &Inst; 197a34c753fSRafael Auler continue; 198a34c753fSRafael Auler } 199a34c753fSRafael Auler // TODO: Change push/pops to stack adjustment instruction 20060b09997SMaksim Panchenko if (BF.getBinaryContext().MIB->isPush(Inst)) 201a34c753fSRafael Auler continue; 202a34c753fSRafael Auler 203a34c753fSRafael Auler ++NumRedundantStores; 204*42465efdSRafael Auler FreqRedundantStores += BB.getKnownExecutionCount(); 205a34c753fSRafael Auler Changed = true; 206a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Unused store instruction: "); 207a34c753fSRafael Auler LLVM_DEBUG(Inst.dump()); 208a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "@BB: " << BB.getName() << "\n"); 209a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "FIE offset = " << FIEX->StackOffset 210a34c753fSRafael Auler << " size = " << (int)FIEX->Size << "\n"); 211a34c753fSRafael Auler // Delete it! 212a34c753fSRafael Auler ToErase.emplace_back(&BB, &Inst); 213a34c753fSRafael Auler Prev = &Inst; 214a34c753fSRafael Auler } 215a34c753fSRafael Auler } 216a34c753fSRafael Auler 217f92ab6afSAmir Ayupov for (std::pair<BinaryBasicBlock *, MCInst *> I : ToErase) 218a34c753fSRafael Auler I.first->eraseInstruction(I.first->findInstruction(I.second)); 219f92ab6afSAmir Ayupov 220f92ab6afSAmir Ayupov if (Changed) 221a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "FOP modified \"" << BF.getPrintName() << "\"\n"); 222a34c753fSRafael Auler } 223a34c753fSRafael Auler 224a34c753fSRafael Auler void FrameOptimizerPass::runOnFunctions(BinaryContext &BC) { 225a34c753fSRafael Auler if (opts::FrameOptimization == FOP_NONE) 226a34c753fSRafael Auler return; 227a34c753fSRafael Auler 228a34c753fSRafael Auler std::unique_ptr<BinaryFunctionCallGraph> CG; 229a34c753fSRafael Auler std::unique_ptr<FrameAnalysis> FA; 230a34c753fSRafael Auler std::unique_ptr<RegAnalysis> RA; 231a34c753fSRafael Auler 232a34c753fSRafael Auler { 233a34c753fSRafael Auler NamedRegionTimer T1("callgraph", "create call graph", "FOP", 234a34c753fSRafael Auler "FOP breakdown", opts::TimeOpts); 235a34c753fSRafael Auler CG = std::make_unique<BinaryFunctionCallGraph>(buildCallGraph(BC)); 236a34c753fSRafael Auler } 237a34c753fSRafael Auler 238a34c753fSRafael Auler { 239a34c753fSRafael Auler NamedRegionTimer T1("frameanalysis", "frame analysis", "FOP", 240a34c753fSRafael Auler "FOP breakdown", opts::TimeOpts); 241a34c753fSRafael Auler FA = std::make_unique<FrameAnalysis>(BC, *CG); 242a34c753fSRafael Auler } 243a34c753fSRafael Auler 244a34c753fSRafael Auler { 24540c2e0faSMaksim Panchenko NamedRegionTimer T1("reganalysis", "reg analysis", "FOP", "FOP breakdown", 24640c2e0faSMaksim Panchenko opts::TimeOpts); 247a34c753fSRafael Auler RA = std::make_unique<RegAnalysis>(BC, &BC.getBinaryFunctions(), CG.get()); 248a34c753fSRafael Auler } 249a34c753fSRafael Auler 250a34c753fSRafael Auler // Perform caller-saved register optimizations, then callee-saved register 251a34c753fSRafael Auler // optimizations (shrink wrapping) 252a34c753fSRafael Auler for (auto &I : BC.getBinaryFunctions()) { 253a34c753fSRafael Auler if (!FA->hasFrameInfo(I.second)) 254a34c753fSRafael Auler continue; 255a34c753fSRafael Auler // Restrict pass execution if user asked to only run on hot functions 256a34c753fSRafael Auler if (opts::FrameOptimization == FOP_HOT) { 257a34c753fSRafael Auler if (I.second.getKnownExecutionCount() < BC.getHotThreshold()) 258a34c753fSRafael Auler continue; 259a34c753fSRafael Auler LLVM_DEBUG( 260a34c753fSRafael Auler dbgs() << "Considering " << I.second.getPrintName() 261a34c753fSRafael Auler << " for frame optimizations because its execution count ( " 262a34c753fSRafael Auler << I.second.getKnownExecutionCount() 263a34c753fSRafael Auler << " ) exceeds our hotness threshold ( " 264a34c753fSRafael Auler << BC.getHotThreshold() << " )\n"); 265a34c753fSRafael Auler } 266a34c753fSRafael Auler 267a34c753fSRafael Auler { 268a34c753fSRafael Auler NamedRegionTimer T1("removeloads", "remove loads", "FOP", "FOP breakdown", 269a34c753fSRafael Auler opts::TimeOpts); 27060b09997SMaksim Panchenko removeUnnecessaryLoads(*RA, *FA, I.second); 271a34c753fSRafael Auler } 272a34c753fSRafael Auler 273a34c753fSRafael Auler if (opts::RemoveStores) { 274a34c753fSRafael Auler NamedRegionTimer T1("removestores", "remove stores", "FOP", 275a34c753fSRafael Auler "FOP breakdown", opts::TimeOpts); 27660b09997SMaksim Panchenko removeUnusedStores(*FA, I.second); 277a34c753fSRafael Auler } 278a34c753fSRafael Auler // Don't even start shrink wrapping if no profiling info is available 279a34c753fSRafael Auler if (I.second.getKnownExecutionCount() == 0) 280a34c753fSRafael Auler continue; 281a34c753fSRafael Auler } 282a34c753fSRafael Auler 283a34c753fSRafael Auler { 284a34c753fSRafael Auler NamedRegionTimer T1("shrinkwrapping", "shrink wrapping", "FOP", 285a34c753fSRafael Auler "FOP breakdown", opts::TimeOpts); 286a34c753fSRafael Auler performShrinkWrapping(*RA, *FA, BC); 287a34c753fSRafael Auler } 288a34c753fSRafael Auler 289a34c753fSRafael Auler outs() << "BOLT-INFO: FOP optimized " << NumRedundantLoads 290a34c753fSRafael Auler << " redundant load(s) and " << NumRedundantStores 291a34c753fSRafael Auler << " unused store(s)\n"; 292*42465efdSRafael Auler outs() << "BOLT-INFO: Frequency of redundant loads is " << FreqRedundantLoads 293*42465efdSRafael Auler << " and frequency of unused stores is " << FreqRedundantStores 294*42465efdSRafael Auler << "\n"; 295*42465efdSRafael Auler outs() << "BOLT-INFO: Frequency of loads changed to use a register is " 296*42465efdSRafael Auler << FreqLoadsChangedToReg 297*42465efdSRafael Auler << " and frequency of loads changed to use an immediate is " 298*42465efdSRafael Auler << FreqLoadsChangedToImm << "\n"; 299*42465efdSRafael Auler outs() << "BOLT-INFO: FOP deleted " << NumLoadsDeleted 300*42465efdSRafael Auler << " load(s) (dyn count: " << FreqLoadsDeleted << ") and " 301*42465efdSRafael Auler << NumRedundantStores << " store(s)\n"; 302a34c753fSRafael Auler FA->printStats(); 303a34c753fSRafael Auler ShrinkWrapping::printStats(); 304a34c753fSRafael Auler } 305a34c753fSRafael Auler 306a34c753fSRafael Auler void FrameOptimizerPass::performShrinkWrapping(const RegAnalysis &RA, 307a34c753fSRafael Auler const FrameAnalysis &FA, 308a34c753fSRafael Auler BinaryContext &BC) { 309a34c753fSRafael Auler // Initialize necessary annotations to allow safe parallel accesses to 310a34c753fSRafael Auler // annotation index in MIB 311a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex(CalleeSavedAnalysis::getSaveTagName()); 312a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex(CalleeSavedAnalysis::getRestoreTagName()); 313a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex(StackLayoutModifier::getTodoTagName()); 314a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex(StackLayoutModifier::getSlotTagName()); 315a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex( 316a34c753fSRafael Auler StackLayoutModifier::getOffsetCFIRegTagName()); 317a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("ReachingDefs"); 318a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("ReachingUses"); 319a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("LivenessAnalysis"); 320a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("StackReachingUses"); 321a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("PostDominatorAnalysis"); 322a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("DominatorAnalysis"); 323a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("StackPointerTracking"); 324a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("StackPointerTrackingForInternalCalls"); 325a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("StackAvailableExpressions"); 326a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("StackAllocationAnalysis"); 327a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("ShrinkWrap-Todo"); 328a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("PredictiveStackPointerTracking"); 329a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("ReachingInsnsBackward"); 330a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("ReachingInsns"); 331a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("AccessesDeletedPos"); 332a34c753fSRafael Auler BC.MIB->getOrCreateAnnotationIndex("DeleteMe"); 333a34c753fSRafael Auler 334*42465efdSRafael Auler std::vector<std::pair<uint64_t, const BinaryFunction *>> Top10Funcs; 335*42465efdSRafael Auler auto LogFunc = [&](BinaryFunction &BF) { 336*42465efdSRafael Auler auto Lower = std::lower_bound( 337*42465efdSRafael Auler Top10Funcs.begin(), Top10Funcs.end(), BF.getKnownExecutionCount(), 338*42465efdSRafael Auler [](const std::pair<uint64_t, const BinaryFunction *> &Elmt, 339*42465efdSRafael Auler uint64_t Value) { return Elmt.first > Value; }); 340*42465efdSRafael Auler if (Lower == Top10Funcs.end() && Top10Funcs.size() >= 10) 341*42465efdSRafael Auler return; 342*42465efdSRafael Auler Top10Funcs.insert(Lower, 343*42465efdSRafael Auler std::make_pair<>(BF.getKnownExecutionCount(), &BF)); 344*42465efdSRafael Auler if (Top10Funcs.size() > 10) 345*42465efdSRafael Auler Top10Funcs.resize(10); 346*42465efdSRafael Auler }; 347*42465efdSRafael Auler (void)LogFunc; 348*42465efdSRafael Auler 349a34c753fSRafael Auler ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 350*42465efdSRafael Auler if (BF.getFunctionScore() == 0) 351a34c753fSRafael Auler return true; 352a34c753fSRafael Auler 353a34c753fSRafael Auler return false; 354a34c753fSRafael Auler }; 355a34c753fSRafael Auler 356*42465efdSRafael Auler const bool HotOnly = opts::FrameOptimization == FOP_HOT; 357*42465efdSRafael Auler 358a34c753fSRafael Auler ParallelUtilities::WorkFuncWithAllocTy WorkFunction = 359a34c753fSRafael Auler [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) { 36060b09997SMaksim Panchenko DataflowInfoManager Info(BF, &RA, &FA, AllocatorId); 36160b09997SMaksim Panchenko ShrinkWrapping SW(FA, BF, Info, AllocatorId); 362a34c753fSRafael Auler 363*42465efdSRafael Auler if (SW.perform(HotOnly)) { 364a34c753fSRafael Auler std::lock_guard<std::mutex> Lock(FuncsChangedMutex); 365a34c753fSRafael Auler FuncsChanged.insert(&BF); 366*42465efdSRafael Auler LLVM_DEBUG(LogFunc(BF)); 367a34c753fSRafael Auler } 368a34c753fSRafael Auler }; 369a34c753fSRafael Auler 370a34c753fSRafael Auler ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 371a34c753fSRafael Auler BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFunction, 372a34c753fSRafael Auler SkipPredicate, "shrink-wrapping"); 373*42465efdSRafael Auler 374*42465efdSRafael Auler if (!Top10Funcs.empty()) { 375*42465efdSRafael Auler outs() << "BOLT-INFO: top 10 functions changed by shrink wrapping:\n"; 376*42465efdSRafael Auler for (const auto &Elmt : Top10Funcs) 377*42465efdSRafael Auler outs() << Elmt.first << " : " << Elmt.second->getPrintName() << "\n"; 378*42465efdSRafael Auler } 379a34c753fSRafael Auler } 380a34c753fSRafael Auler 381a34c753fSRafael Auler } // namespace bolt 382a34c753fSRafael Auler } // namespace llvm 383