1 //===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The set of optimization/analysis passes that run on BinaryFunctions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef BOLT_PASSES_BINARY_PASSES_H 14 #define BOLT_PASSES_BINARY_PASSES_H 15 16 #include "bolt/Core/BinaryContext.h" 17 #include "bolt/Core/BinaryFunction.h" 18 #include "bolt/Core/DynoStats.h" 19 #include "bolt/Profile/BoltAddressTranslation.h" 20 #include "llvm/Support/CommandLine.h" 21 #include <atomic> 22 #include <set> 23 #include <string> 24 #include <unordered_set> 25 26 namespace llvm { 27 namespace bolt { 28 29 /// An optimization/analysis pass that runs on functions. 30 class BinaryFunctionPass { 31 protected: 32 bool PrintPass; 33 BinaryFunctionPass(const bool PrintPass)34 explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {} 35 36 /// Control whether a specific function should be skipped during 37 /// optimization. 38 virtual bool shouldOptimize(const BinaryFunction &BF) const; 39 40 public: 41 virtual ~BinaryFunctionPass() = default; 42 43 /// The name of this pass 44 virtual const char *getName() const = 0; 45 46 /// Control whether debug info is printed after this pass is completed. printPass()47 bool printPass() const { return PrintPass; } 48 49 /// Control whether debug info is printed for an individual function after 50 /// this pass is completed (printPass() must have returned true). 51 virtual bool shouldPrint(const BinaryFunction &BF) const; 52 53 virtual Error runOnFunctions(BinaryContext &BC) = 0; 54 }; 55 56 /// A pass to set initial program-wide dynostats. 57 class DynoStatsSetPass : public BinaryFunctionPass { 58 public: DynoStatsSetPass()59 DynoStatsSetPass() : BinaryFunctionPass(false) {} 60 getName()61 const char *getName() const override { 62 return "set dyno-stats before optimizations"; 63 } 64 shouldPrint(const BinaryFunction & BF)65 bool shouldPrint(const BinaryFunction &BF) const override { return false; } 66 runOnFunctions(BinaryContext & BC)67 Error runOnFunctions(BinaryContext &BC) override { 68 BC.InitialDynoStats = getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 69 return Error::success(); 70 } 71 }; 72 73 /// A pass to print program-wide dynostats. 74 class DynoStatsPrintPass : public BinaryFunctionPass { 75 protected: 76 std::string Title; 77 78 public: DynoStatsPrintPass(const char * Title)79 DynoStatsPrintPass(const char *Title) 80 : BinaryFunctionPass(false), Title(Title) {} 81 getName()82 const char *getName() const override { 83 return "print dyno-stats after optimizations"; 84 } 85 shouldPrint(const BinaryFunction & BF)86 bool shouldPrint(const BinaryFunction &BF) const override { return false; } 87 runOnFunctions(BinaryContext & BC)88 Error runOnFunctions(BinaryContext &BC) override { 89 const DynoStats PrevDynoStats = BC.InitialDynoStats; 90 const DynoStats NewDynoStats = 91 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 92 const bool Changed = (NewDynoStats != PrevDynoStats); 93 BC.outs() << "BOLT-INFO: program-wide dynostats " << Title 94 << (Changed ? "" : " (no change)") << ":\n\n" 95 << PrevDynoStats; 96 if (Changed) { 97 BC.outs() << '\n'; 98 NewDynoStats.print(BC.outs(), &PrevDynoStats, BC.InstPrinter.get()); 99 } 100 BC.outs() << '\n'; 101 return Error::success(); 102 } 103 }; 104 105 /// The pass normalizes CFG by performing the following transformations: 106 /// * removes empty basic blocks 107 /// * merges duplicate edges and updates jump instructions 108 class NormalizeCFG : public BinaryFunctionPass { 109 std::atomic<uint64_t> NumBlocksRemoved{0}; 110 std::atomic<uint64_t> NumDuplicateEdgesMerged{0}; 111 112 void runOnFunction(BinaryFunction &BF); 113 114 public: NormalizeCFG(const cl::opt<bool> & PrintPass)115 NormalizeCFG(const cl::opt<bool> &PrintPass) 116 : BinaryFunctionPass(PrintPass) {} 117 getName()118 const char *getName() const override { return "normalize CFG"; } 119 120 Error runOnFunctions(BinaryContext &) override; 121 }; 122 123 /// Detect and eliminate unreachable basic blocks. We could have those 124 /// filled with nops and they are used for alignment. 125 class EliminateUnreachableBlocks : public BinaryFunctionPass { 126 std::unordered_set<const BinaryFunction *> Modified; 127 std::atomic<unsigned> DeletedBlocks{0}; 128 std::atomic<uint64_t> DeletedBytes{0}; 129 void runOnFunction(BinaryFunction &Function); 130 131 public: EliminateUnreachableBlocks(const cl::opt<bool> & PrintPass)132 EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass) 133 : BinaryFunctionPass(PrintPass) {} 134 getName()135 const char *getName() const override { return "eliminate-unreachable"; } shouldPrint(const BinaryFunction & BF)136 bool shouldPrint(const BinaryFunction &BF) const override { 137 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; 138 } 139 Error runOnFunctions(BinaryContext &) override; 140 }; 141 142 // Reorder the basic blocks for each function based on hotness. 143 class ReorderBasicBlocks : public BinaryFunctionPass { 144 public: 145 /// Choose which strategy should the block layout heuristic prioritize when 146 /// facing conflicting goals. 147 enum LayoutType : char { 148 /// LT_NONE - do not change layout of basic blocks 149 LT_NONE = 0, /// no reordering 150 /// LT_REVERSE - reverse the order of basic blocks, meant for testing 151 /// purposes. The first basic block is left intact and the rest are 152 /// put in the reverse order. 153 LT_REVERSE, 154 /// LT_OPTIMIZE - optimize layout of basic blocks based on profile. 155 LT_OPTIMIZE, 156 /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis' 157 /// paper (PLDI '90) about block reordering, trying to minimize branch 158 /// mispredictions. 159 LT_OPTIMIZE_BRANCH, 160 /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04) 161 /// that suggests putting frequently executed chains first in the layout. 162 LT_OPTIMIZE_CACHE, 163 // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation. 164 LT_OPTIMIZE_CACHE_PLUS, 165 /// Block reordering guided by the extended TSP metric. 166 LT_OPTIMIZE_EXT_TSP, 167 /// Create clusters and use random order for them. 168 LT_OPTIMIZE_SHUFFLE, 169 }; 170 171 private: 172 /// Run the specified layout algorithm on the given function. Returns `true` 173 /// if the order of blocks was changed. 174 bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type, 175 bool MinBranchClusters) const; 176 177 public: ReorderBasicBlocks(const cl::opt<bool> & PrintPass)178 explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass) 179 : BinaryFunctionPass(PrintPass) {} 180 181 bool shouldOptimize(const BinaryFunction &BF) const override; 182 getName()183 const char *getName() const override { return "reorder-blocks"; } 184 bool shouldPrint(const BinaryFunction &BF) const override; 185 Error runOnFunctions(BinaryContext &BC) override; 186 }; 187 188 /// Sync local branches with CFG. 189 class FixupBranches : public BinaryFunctionPass { 190 public: FixupBranches(const cl::opt<bool> & PrintPass)191 explicit FixupBranches(const cl::opt<bool> &PrintPass) 192 : BinaryFunctionPass(PrintPass) {} 193 getName()194 const char *getName() const override { return "fix-branches"; } 195 Error runOnFunctions(BinaryContext &BC) override; 196 }; 197 198 /// Fix the CFI state and exception handling information after all other 199 /// passes have completed. 200 class FinalizeFunctions : public BinaryFunctionPass { 201 public: FinalizeFunctions(const cl::opt<bool> & PrintPass)202 explicit FinalizeFunctions(const cl::opt<bool> &PrintPass) 203 : BinaryFunctionPass(PrintPass) {} 204 getName()205 const char *getName() const override { return "finalize-functions"; } 206 Error runOnFunctions(BinaryContext &BC) override; 207 }; 208 209 /// Perform any necessary adjustments for functions that do not fit into their 210 /// original space in non-relocation mode. 211 class CheckLargeFunctions : public BinaryFunctionPass { 212 public: CheckLargeFunctions(const cl::opt<bool> & PrintPass)213 explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass) 214 : BinaryFunctionPass(PrintPass) {} 215 getName()216 const char *getName() const override { return "check-large-functions"; } 217 218 Error runOnFunctions(BinaryContext &BC) override; 219 220 bool shouldOptimize(const BinaryFunction &BF) const override; 221 }; 222 223 /// Convert and remove all BOLT-related annotations before LLVM code emission. 224 class LowerAnnotations : public BinaryFunctionPass { 225 public: LowerAnnotations(const cl::opt<bool> & PrintPass)226 explicit LowerAnnotations(const cl::opt<bool> &PrintPass) 227 : BinaryFunctionPass(PrintPass) {} 228 getName()229 const char *getName() const override { return "lower-annotations"; } 230 Error runOnFunctions(BinaryContext &BC) override; 231 }; 232 233 /// Clean the state of the MC representation before sending it to emission 234 class CleanMCState : public BinaryFunctionPass { 235 public: CleanMCState(const cl::opt<bool> & PrintPass)236 explicit CleanMCState(const cl::opt<bool> &PrintPass) 237 : BinaryFunctionPass(PrintPass) {} 238 getName()239 const char *getName() const override { return "clean-mc-state"; } 240 Error runOnFunctions(BinaryContext &BC) override; 241 }; 242 243 /// An optimization to simplify conditional tail calls by removing 244 /// unnecessary branches. 245 /// 246 /// This optimization considers both of the following cases: 247 /// 248 /// foo: ... 249 /// jcc L1 original 250 /// ... 251 /// L1: jmp bar # TAILJMP 252 /// 253 /// -> 254 /// 255 /// foo: ... 256 /// jcc bar iff jcc L1 is expected 257 /// ... 258 /// 259 /// L1 is unreachable 260 /// 261 /// OR 262 /// 263 /// foo: ... 264 /// jcc L2 265 /// L1: jmp dest # TAILJMP 266 /// L2: ... 267 /// 268 /// -> 269 /// 270 /// foo: jncc dest # TAILJMP 271 /// L2: ... 272 /// 273 /// L1 is unreachable 274 /// 275 /// For this particular case, the first basic block ends with 276 /// a conditional branch and has two successors, one fall-through 277 /// and one for when the condition is true. 278 /// The target of the conditional is a basic block with a single 279 /// unconditional branch (i.e. tail call) to another function. 280 /// We don't care about the contents of the fall-through block. 281 /// We assume that the target of the conditional branch is the 282 /// first successor. 283 class SimplifyConditionalTailCalls : public BinaryFunctionPass { 284 uint64_t NumCandidateTailCalls{0}; 285 uint64_t NumTailCallsPatched{0}; 286 uint64_t CTCExecCount{0}; 287 uint64_t CTCTakenCount{0}; 288 uint64_t NumOrigForwardBranches{0}; 289 uint64_t NumOrigBackwardBranches{0}; 290 uint64_t NumDoubleJumps{0}; 291 uint64_t DeletedBlocks{0}; 292 uint64_t DeletedBytes{0}; 293 std::unordered_set<const BinaryFunction *> Modified; 294 std::set<const BinaryBasicBlock *> BeenOptimized; 295 296 bool shouldRewriteBranch(const BinaryBasicBlock *PredBB, 297 const MCInst &CondBranch, const BinaryBasicBlock *BB, 298 const bool DirectionFlag); 299 300 uint64_t fixTailCalls(BinaryFunction &BF); 301 302 public: SimplifyConditionalTailCalls(const cl::opt<bool> & PrintPass)303 explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass) 304 : BinaryFunctionPass(PrintPass) {} 305 getName()306 const char *getName() const override { 307 return "simplify-conditional-tail-calls"; 308 } shouldPrint(const BinaryFunction & BF)309 bool shouldPrint(const BinaryFunction &BF) const override { 310 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; 311 } 312 Error runOnFunctions(BinaryContext &BC) override; 313 }; 314 315 /// Convert instructions to the form with the minimum operand width. 316 class ShortenInstructions : public BinaryFunctionPass { 317 uint64_t shortenInstructions(BinaryFunction &Function); 318 319 public: ShortenInstructions(const cl::opt<bool> & PrintPass)320 explicit ShortenInstructions(const cl::opt<bool> &PrintPass) 321 : BinaryFunctionPass(PrintPass) {} 322 getName()323 const char *getName() const override { return "shorten-instructions"; } 324 325 Error runOnFunctions(BinaryContext &BC) override; 326 }; 327 328 /// Perform simple peephole optimizations. 329 class Peepholes : public BinaryFunctionPass { 330 public: 331 enum PeepholeOpts : char { 332 PEEP_NONE = 0x0, 333 PEEP_DOUBLE_JUMPS = 0x2, 334 PEEP_TAILCALL_TRAPS = 0x4, 335 PEEP_USELESS_BRANCHES = 0x8, 336 PEEP_ALL = 0xf 337 }; 338 339 private: 340 uint64_t NumDoubleJumps{0}; 341 uint64_t TailCallTraps{0}; 342 uint64_t NumUselessCondBranches{0}; 343 344 /// Add trap instructions immediately after indirect tail calls to prevent 345 /// the processor from decoding instructions immediate following the 346 /// tailcall. 347 void addTailcallTraps(BinaryFunction &Function); 348 349 /// Remove useless duplicate successors. When the conditional 350 /// successor is the same as the unconditional successor, we can 351 /// remove the conditional successor and branch instruction. 352 void removeUselessCondBranches(BinaryFunction &Function); 353 354 public: Peepholes(const cl::opt<bool> & PrintPass)355 explicit Peepholes(const cl::opt<bool> &PrintPass) 356 : BinaryFunctionPass(PrintPass) {} 357 getName()358 const char *getName() const override { return "peepholes"; } 359 Error runOnFunctions(BinaryContext &BC) override; 360 }; 361 362 /// An optimization to simplify loads from read-only sections.The pass converts 363 /// load instructions with statically computed target address such as: 364 /// 365 /// mov 0x12f(%rip), %eax 366 /// 367 /// to their counterparts that use immediate operands instead of memory loads: 368 /// 369 /// mov $0x4007dc, %eax 370 /// 371 /// when the target address points somewhere inside a read-only section. 372 /// 373 class SimplifyRODataLoads : public BinaryFunctionPass { 374 uint64_t NumLoadsSimplified{0}; 375 uint64_t NumDynamicLoadsSimplified{0}; 376 uint64_t NumLoadsFound{0}; 377 uint64_t NumDynamicLoadsFound{0}; 378 std::unordered_set<const BinaryFunction *> Modified; 379 380 bool simplifyRODataLoads(BinaryFunction &BF); 381 382 public: SimplifyRODataLoads(const cl::opt<bool> & PrintPass)383 explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass) 384 : BinaryFunctionPass(PrintPass) {} 385 getName()386 const char *getName() const override { return "simplify-read-only-loads"; } shouldPrint(const BinaryFunction & BF)387 bool shouldPrint(const BinaryFunction &BF) const override { 388 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; 389 } 390 Error runOnFunctions(BinaryContext &BC) override; 391 }; 392 393 /// Assign output sections to all functions. 394 class AssignSections : public BinaryFunctionPass { 395 public: AssignSections()396 explicit AssignSections() : BinaryFunctionPass(false) {} 397 getName()398 const char *getName() const override { return "assign-sections"; } 399 Error runOnFunctions(BinaryContext &BC) override; 400 }; 401 402 /// Compute and report to the user the imbalance in flow equations for all 403 /// CFGs, so we can detect bad quality profile. Prints average and standard 404 /// deviation of the absolute differences of outgoing flow minus incoming flow 405 /// for blocks of interest (excluding prologues, epilogues, and BB frequency 406 /// lower than 100). 407 class PrintProfileStats : public BinaryFunctionPass { 408 public: PrintProfileStats(const cl::opt<bool> & PrintPass)409 explicit PrintProfileStats(const cl::opt<bool> &PrintPass) 410 : BinaryFunctionPass(PrintPass) {} 411 getName()412 const char *getName() const override { return "profile-stats"; } shouldPrint(const BinaryFunction &)413 bool shouldPrint(const BinaryFunction &) const override { return false; } 414 Error runOnFunctions(BinaryContext &BC) override; 415 }; 416 417 /// Prints a list of the top 100 functions sorted by a set of 418 /// dyno stats categories. 419 class PrintProgramStats : public BinaryFunctionPass { 420 BoltAddressTranslation *BAT = nullptr; 421 422 public: 423 explicit PrintProgramStats(BoltAddressTranslation *BAT = nullptr) BinaryFunctionPass(false)424 : BinaryFunctionPass(false), BAT(BAT) {} 425 getName()426 const char *getName() const override { return "print-stats"; } shouldPrint(const BinaryFunction &)427 bool shouldPrint(const BinaryFunction &) const override { return false; } 428 Error runOnFunctions(BinaryContext &BC) override; 429 }; 430 431 /// Pass for lowering any instructions that we have raised and that have 432 /// to be lowered. 433 class InstructionLowering : public BinaryFunctionPass { 434 public: InstructionLowering(const cl::opt<bool> & PrintPass)435 explicit InstructionLowering(const cl::opt<bool> &PrintPass) 436 : BinaryFunctionPass(PrintPass) {} 437 getName()438 const char *getName() const override { return "inst-lowering"; } 439 440 Error runOnFunctions(BinaryContext &BC) override; 441 }; 442 443 /// Pass for stripping 'repz' from 'repz retq' sequence of instructions. 444 class StripRepRet : public BinaryFunctionPass { 445 public: StripRepRet(const cl::opt<bool> & PrintPass)446 explicit StripRepRet(const cl::opt<bool> &PrintPass) 447 : BinaryFunctionPass(PrintPass) {} 448 getName()449 const char *getName() const override { return "strip-rep-ret"; } 450 451 Error runOnFunctions(BinaryContext &BC) override; 452 }; 453 454 /// Pass for inlining calls to memcpy using 'rep movsb' on X86. 455 class InlineMemcpy : public BinaryFunctionPass { 456 public: InlineMemcpy(const cl::opt<bool> & PrintPass)457 explicit InlineMemcpy(const cl::opt<bool> &PrintPass) 458 : BinaryFunctionPass(PrintPass) {} 459 getName()460 const char *getName() const override { return "inline-memcpy"; } 461 462 Error runOnFunctions(BinaryContext &BC) override; 463 }; 464 465 /// Pass for specializing memcpy for a size of 1 byte. 466 class SpecializeMemcpy1 : public BinaryFunctionPass { 467 private: 468 std::vector<std::string> Spec; 469 470 /// Return indices of the call sites to optimize. Count starts at 1. 471 /// Returns an empty set for all call sites in the function. 472 std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const; 473 474 public: SpecializeMemcpy1(const cl::opt<bool> & PrintPass,cl::list<std::string> & Spec)475 explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass, 476 cl::list<std::string> &Spec) 477 : BinaryFunctionPass(PrintPass), Spec(Spec) {} 478 479 bool shouldOptimize(const BinaryFunction &BF) const override; 480 getName()481 const char *getName() const override { return "specialize-memcpy"; } 482 483 Error runOnFunctions(BinaryContext &BC) override; 484 }; 485 486 /// Pass to remove nops in code 487 class RemoveNops : public BinaryFunctionPass { 488 void runOnFunction(BinaryFunction &Function); 489 490 public: RemoveNops(const cl::opt<bool> & PrintPass)491 explicit RemoveNops(const cl::opt<bool> &PrintPass) 492 : BinaryFunctionPass(PrintPass) {} 493 getName()494 const char *getName() const override { return "remove-nops"; } 495 496 /// Pass entry point 497 Error runOnFunctions(BinaryContext &BC) override; 498 }; 499 500 enum FrameOptimizationType : char { 501 FOP_NONE, /// Don't perform FOP. 502 FOP_HOT, /// Perform FOP on hot functions. 503 FOP_ALL /// Perform FOP on all functions. 504 }; 505 506 } // namespace bolt 507 } // namespace llvm 508 509 #endif 510