1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/FixRISCVCallsPass.h" 16 #include "bolt/Passes/FixRelaxationPass.h" 17 #include "bolt/Passes/FrameOptimizer.h" 18 #include "bolt/Passes/Hugify.h" 19 #include "bolt/Passes/IdenticalCodeFolding.h" 20 #include "bolt/Passes/IndirectCallPromotion.h" 21 #include "bolt/Passes/Inliner.h" 22 #include "bolt/Passes/Instrumentation.h" 23 #include "bolt/Passes/JTFootprintReduction.h" 24 #include "bolt/Passes/LongJmp.h" 25 #include "bolt/Passes/LoopInversionPass.h" 26 #include "bolt/Passes/PLTCall.h" 27 #include "bolt/Passes/PatchEntries.h" 28 #include "bolt/Passes/RegReAssign.h" 29 #include "bolt/Passes/ReorderData.h" 30 #include "bolt/Passes/ReorderFunctions.h" 31 #include "bolt/Passes/RetpolineInsertion.h" 32 #include "bolt/Passes/SplitFunctions.h" 33 #include "bolt/Passes/StokeInfo.h" 34 #include "bolt/Passes/TailDuplication.h" 35 #include "bolt/Passes/ThreeWayBranch.h" 36 #include "bolt/Passes/ValidateInternalCalls.h" 37 #include "bolt/Passes/ValidateMemRefs.h" 38 #include "bolt/Passes/VeneerElimination.h" 39 #include "bolt/Utils/CommandLineOpts.h" 40 #include "llvm/Support/FormatVariadic.h" 41 #include "llvm/Support/Timer.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <memory> 44 #include <numeric> 45 46 using namespace llvm; 47 48 namespace opts { 49 50 extern cl::opt<bool> PrintAll; 51 extern cl::opt<bool> PrintDynoStats; 52 extern cl::opt<bool> DumpDotAll; 53 extern cl::opt<std::string> AsmDump; 54 extern cl::opt<bolt::PLTCall::OptType> PLT; 55 56 static cl::opt<bool> 57 DynoStatsAll("dyno-stats-all", 58 cl::desc("print dyno stats after each stage"), 59 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 60 61 static cl::opt<bool> 62 EliminateUnreachable("eliminate-unreachable", 63 cl::desc("eliminate unreachable code"), cl::init(true), 64 cl::cat(BoltOptCategory)); 65 66 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"), 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<bool> JTFootprintReductionFlag( 70 "jt-footprint-reduction", 71 cl::desc("make jump tables size smaller at the cost of using more " 72 "instructions at jump sites"), 73 cl::cat(BoltOptCategory)); 74 75 static cl::opt<bool> 76 KeepNops("keep-nops", 77 cl::desc("keep no-op instructions. By default they are removed."), 78 cl::Hidden, cl::cat(BoltOptCategory)); 79 80 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), 81 cl::ReallyHidden, cl::cat(BoltOptCategory)); 82 83 cl::opt<bool> 84 PrintAfterBranchFixup("print-after-branch-fixup", 85 cl::desc("print function after fixing local branches"), 86 cl::Hidden, cl::cat(BoltOptCategory)); 87 88 static cl::opt<bool> 89 PrintAfterLowering("print-after-lowering", 90 cl::desc("print function after instruction lowering"), 91 cl::Hidden, cl::cat(BoltOptCategory)); 92 93 cl::opt<bool> 94 PrintFinalized("print-finalized", 95 cl::desc("print function after CFG is finalized"), 96 cl::Hidden, cl::cat(BoltOptCategory)); 97 98 static cl::opt<bool> 99 PrintFOP("print-fop", 100 cl::desc("print functions after frame optimizer pass"), cl::Hidden, 101 cl::cat(BoltOptCategory)); 102 103 static cl::opt<bool> 104 PrintICF("print-icf", cl::desc("print functions after ICF optimization"), 105 cl::Hidden, cl::cat(BoltOptCategory)); 106 107 static cl::opt<bool> 108 PrintICP("print-icp", 109 cl::desc("print functions after indirect call promotion"), 110 cl::Hidden, cl::cat(BoltOptCategory)); 111 112 static cl::opt<bool> 113 PrintInline("print-inline", 114 cl::desc("print functions after inlining optimization"), 115 cl::Hidden, cl::cat(BoltOptCategory)); 116 117 static cl::opt<bool> PrintJTFootprintReduction( 118 "print-after-jt-footprint-reduction", 119 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden, 120 cl::cat(BoltOptCategory)); 121 122 static cl::opt<bool> 123 PrintLongJmp("print-longjmp", 124 cl::desc("print functions after longjmp pass"), cl::Hidden, 125 cl::cat(BoltOptCategory)); 126 127 cl::opt<bool> 128 PrintNormalized("print-normalized", 129 cl::desc("print functions after CFG is normalized"), 130 cl::Hidden, cl::cat(BoltCategory)); 131 132 static cl::opt<bool> PrintOptimizeBodyless( 133 "print-optimize-bodyless", 134 cl::desc("print functions after bodyless optimization"), cl::Hidden, 135 cl::cat(BoltOptCategory)); 136 137 static cl::opt<bool> 138 PrintPeepholes("print-peepholes", 139 cl::desc("print functions after peephole optimization"), 140 cl::Hidden, cl::cat(BoltOptCategory)); 141 142 static cl::opt<bool> 143 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"), 144 cl::Hidden, cl::cat(BoltOptCategory)); 145 146 static cl::opt<bool> 147 PrintProfileStats("print-profile-stats", 148 cl::desc("print profile quality/bias analysis"), 149 cl::cat(BoltCategory)); 150 151 static cl::opt<bool> 152 PrintRegReAssign("print-regreassign", 153 cl::desc("print functions after regreassign pass"), 154 cl::Hidden, cl::cat(BoltOptCategory)); 155 156 cl::opt<bool> 157 PrintReordered("print-reordered", 158 cl::desc("print functions after layout optimization"), 159 cl::Hidden, cl::cat(BoltOptCategory)); 160 161 static cl::opt<bool> 162 PrintReorderedFunctions("print-reordered-functions", 163 cl::desc("print functions after clustering"), 164 cl::Hidden, cl::cat(BoltOptCategory)); 165 166 static cl::opt<bool> PrintRetpolineInsertion( 167 "print-retpoline-insertion", 168 cl::desc("print functions after retpoline insertion pass"), cl::Hidden, 169 cl::cat(BoltCategory)); 170 171 static cl::opt<bool> PrintSCTC( 172 "print-sctc", 173 cl::desc("print functions after conditional tail call simplification"), 174 cl::Hidden, cl::cat(BoltOptCategory)); 175 176 static cl::opt<bool> PrintSimplifyROLoads( 177 "print-simplify-rodata-loads", 178 cl::desc("print functions after simplification of RO data loads"), 179 cl::Hidden, cl::cat(BoltOptCategory)); 180 181 static cl::opt<bool> 182 PrintSplit("print-split", cl::desc("print functions after code splitting"), 183 cl::Hidden, cl::cat(BoltOptCategory)); 184 185 static cl::opt<bool> 186 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), 187 cl::Hidden, cl::cat(BoltOptCategory)); 188 189 static cl::opt<bool> 190 PrintFixRelaxations("print-fix-relaxations", 191 cl::desc("print functions after fix relaxations pass"), 192 cl::Hidden, cl::cat(BoltOptCategory)); 193 194 static cl::opt<bool> 195 PrintFixRISCVCalls("print-fix-riscv-calls", 196 cl::desc("print functions after fix RISCV calls pass"), 197 cl::Hidden, cl::cat(BoltOptCategory)); 198 199 static cl::opt<bool> PrintVeneerElimination( 200 "print-veneer-elimination", 201 cl::desc("print functions after veneer elimination pass"), cl::Hidden, 202 cl::cat(BoltOptCategory)); 203 204 static cl::opt<bool> 205 PrintUCE("print-uce", 206 cl::desc("print functions after unreachable code elimination"), 207 cl::Hidden, cl::cat(BoltOptCategory)); 208 209 static cl::opt<bool> RegReAssign( 210 "reg-reassign", 211 cl::desc( 212 "reassign registers so as to avoid using REX prefixes in hot code"), 213 cl::cat(BoltOptCategory)); 214 215 static cl::opt<bool> SimplifyConditionalTailCalls( 216 "simplify-conditional-tail-calls", 217 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 218 cl::init(true), cl::cat(BoltOptCategory)); 219 220 static cl::opt<bool> SimplifyRODataLoads( 221 "simplify-rodata-loads", 222 cl::desc("simplify loads from read-only sections by replacing the memory " 223 "operand with the constant found in the corresponding section"), 224 cl::cat(BoltOptCategory)); 225 226 static cl::list<std::string> 227 SpecializeMemcpy1("memcpy1-spec", 228 cl::desc("list of functions with call sites for which to specialize memcpy() " 229 "for size 1"), 230 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 231 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 232 233 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), 234 cl::cat(BoltOptCategory)); 235 236 static cl::opt<bool> StringOps( 237 "inline-memcpy", 238 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 239 cl::cat(BoltOptCategory)); 240 241 static cl::opt<bool> StripRepRet( 242 "strip-rep-ret", 243 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 244 cl::init(true), cl::cat(BoltOptCategory)); 245 246 static cl::opt<bool> VerifyCFG("verify-cfg", 247 cl::desc("verify the CFG after every pass"), 248 cl::Hidden, cl::cat(BoltOptCategory)); 249 250 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch", 251 cl::desc("reorder three way branches"), 252 cl::ReallyHidden, 253 cl::cat(BoltOptCategory)); 254 255 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 256 cl::desc("fold jcc+mov into cmov"), 257 cl::ReallyHidden, 258 cl::cat(BoltOptCategory)); 259 260 } // namespace opts 261 262 namespace llvm { 263 namespace bolt { 264 265 using namespace opts; 266 267 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 268 const char BinaryFunctionPassManager::TimerGroupDesc[] = 269 "Binary Function Pass Manager"; 270 271 void BinaryFunctionPassManager::runPasses() { 272 auto &BFs = BC.getBinaryFunctions(); 273 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 274 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 275 &OptPassPair = Passes[PassIdx]; 276 if (!OptPassPair.first) 277 continue; 278 279 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 280 std::string PassIdName = 281 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 282 283 if (opts::Verbosity > 0) 284 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 285 286 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 287 TimerGroupDesc, TimeOpts); 288 289 callWithDynoStats([this, &Pass] { cantFail(Pass->runOnFunctions(BC)); }, 290 BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64()); 291 292 if (opts::VerifyCFG && 293 !std::accumulate( 294 BFs.begin(), BFs.end(), true, 295 [](const bool Valid, 296 const std::pair<const uint64_t, BinaryFunction> &It) { 297 return Valid && It.second.validateCFG(); 298 })) { 299 errs() << "BOLT-ERROR: Invalid CFG detected after pass " 300 << Pass->getName() << "\n"; 301 exit(1); 302 } 303 304 if (opts::Verbosity > 0) 305 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 306 307 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 308 continue; 309 310 const std::string Message = std::string("after ") + Pass->getName(); 311 312 for (auto &It : BFs) { 313 BinaryFunction &Function = It.second; 314 315 if (!Pass->shouldPrint(Function)) 316 continue; 317 318 Function.print(outs(), Message); 319 320 if (opts::DumpDotAll) 321 Function.dumpGraphForPass(PassIdName); 322 } 323 } 324 } 325 326 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 327 BinaryFunctionPassManager Manager(BC); 328 329 const DynoStats InitialDynoStats = 330 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 331 332 Manager.registerPass(std::make_unique<AsmDumpPass>(), 333 opts::AsmDump.getNumOccurrences()); 334 335 if (BC.isAArch64()) { 336 Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations)); 337 338 Manager.registerPass( 339 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 340 } 341 342 if (BC.isRISCV()) { 343 Manager.registerPass( 344 std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls)); 345 } 346 347 // Here we manage dependencies/order manually, since passes are run in the 348 // order they're registered. 349 350 // Run this pass first to use stats for the original functions. 351 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint)); 352 353 if (opts::PrintProfileStats) 354 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 355 356 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 357 358 Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint)); 359 360 if (opts::Instrument) 361 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 362 else if (opts::Hugify) 363 Manager.registerPass(std::make_unique<HugePage>(NeverPrint)); 364 365 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 366 367 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint), 368 !opts::KeepNops); 369 370 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 371 372 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 373 opts::StripRepRet); 374 375 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 376 opts::ICF); 377 378 Manager.registerPass( 379 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 380 !opts::SpecializeMemcpy1.empty()); 381 382 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 383 opts::StringOps); 384 385 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 386 387 Manager.registerPass( 388 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 389 opts::JTFootprintReductionFlag); 390 391 Manager.registerPass( 392 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 393 opts::SimplifyRODataLoads); 394 395 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 396 opts::RegReAssign); 397 398 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 399 400 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 401 opts::ICF); 402 403 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 404 405 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 406 opts::ThreeWayBranchFlag); 407 408 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 409 410 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 411 opts::EliminateUnreachable); 412 413 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 414 415 Manager.registerPass(std::make_unique<LoopInversionPass>()); 416 417 Manager.registerPass(std::make_unique<TailDuplication>()); 418 419 Manager.registerPass(std::make_unique<CMOVConversion>(), 420 opts::CMOVConversionFlag); 421 422 // This pass syncs local branches with CFG. If any of the following 423 // passes breaks the sync - they either need to re-run the pass or 424 // fix branches consistency internally. 425 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 426 427 // This pass should come close to last since it uses the estimated hot 428 // size of a function to determine the order. It should definitely 429 // also happen after any changes to the call graph are made, e.g. inlining. 430 Manager.registerPass( 431 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 432 433 // This is the second run of the SplitFunctions pass required by certain 434 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again 435 // after ReorderFunctions allows the finalized function order to be utilized 436 // to make more sophisticated splitting decisions, like hot-warm-cold 437 // splitting. 438 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 439 440 // Print final dyno stats right while CFG and instruction analysis are intact. 441 Manager.registerPass( 442 std::make_unique<DynoStatsPrintPass>( 443 InitialDynoStats, "after all optimizations before SCTC and FOP"), 444 opts::PrintDynoStats || opts::DynoStatsAll); 445 446 // Add the StokeInfo pass, which extract functions for stoke optimization and 447 // get the liveness information for them 448 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 449 450 // This pass introduces conditional jumps into external functions. 451 // Between extending CFG to support this and isolating this pass we chose 452 // the latter. Thus this pass will do double jump removal and unreachable 453 // code elimination if necessary and won't rely on peepholes/UCE for these 454 // optimizations. 455 // More generally this pass should be the last optimization pass that 456 // modifies branches/control flow. This pass is run after function 457 // reordering so that it can tell whether calls are forward/backward 458 // accurately. 459 Manager.registerPass( 460 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 461 opts::SimplifyConditionalTailCalls); 462 463 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 464 465 Manager.registerPass(std::make_unique<AlignerPass>()); 466 467 // Perform reordering on data contained in one or more sections using 468 // memory profiling data. 469 Manager.registerPass(std::make_unique<ReorderData>()); 470 471 if (BC.isAArch64()) { 472 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 473 474 // Tighten branches according to offset differences between branch and 475 // targets. No extra instructions after this pass, otherwise we may have 476 // relocations out of range and crash during linking. 477 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 478 } 479 480 // This pass should always run last.* 481 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 482 483 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 484 // FrameOptimizer move values around and needs to update CFIs. To do this, it 485 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 486 // placed according to the final layout. 487 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 488 489 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 490 491 Manager.registerPass( 492 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 493 494 // Assign each function an output section. 495 Manager.registerPass(std::make_unique<AssignSections>()); 496 497 // Patch original function entries 498 if (BC.HasRelocations) 499 Manager.registerPass(std::make_unique<PatchEntries>()); 500 501 // This pass turns tail calls into jumps which makes them invisible to 502 // function reordering. It's unsafe to use any CFG or instruction analysis 503 // after this point. 504 Manager.registerPass( 505 std::make_unique<InstructionLowering>(PrintAfterLowering)); 506 507 // In non-relocation mode, mark functions that do not fit into their original 508 // space as non-simple if we have to (e.g. for correct debug info update). 509 // NOTE: this pass depends on finalized code. 510 if (!BC.HasRelocations) 511 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 512 513 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 514 515 // Check for dirty state of MCSymbols caused by running calculateEmittedSize 516 // in parallel and restore them 517 Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint)); 518 519 Manager.runPasses(); 520 } 521 522 } // namespace bolt 523 } // namespace llvm 524