1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/ContinuityStats.h" 16 #include "bolt/Passes/FixRISCVCallsPass.h" 17 #include "bolt/Passes/FixRelaxationPass.h" 18 #include "bolt/Passes/FrameOptimizer.h" 19 #include "bolt/Passes/Hugify.h" 20 #include "bolt/Passes/IdenticalCodeFolding.h" 21 #include "bolt/Passes/IndirectCallPromotion.h" 22 #include "bolt/Passes/Inliner.h" 23 #include "bolt/Passes/Instrumentation.h" 24 #include "bolt/Passes/JTFootprintReduction.h" 25 #include "bolt/Passes/LongJmp.h" 26 #include "bolt/Passes/LoopInversionPass.h" 27 #include "bolt/Passes/MCF.h" 28 #include "bolt/Passes/PLTCall.h" 29 #include "bolt/Passes/PatchEntries.h" 30 #include "bolt/Passes/RegReAssign.h" 31 #include "bolt/Passes/ReorderData.h" 32 #include "bolt/Passes/ReorderFunctions.h" 33 #include "bolt/Passes/RetpolineInsertion.h" 34 #include "bolt/Passes/SplitFunctions.h" 35 #include "bolt/Passes/StokeInfo.h" 36 #include "bolt/Passes/TailDuplication.h" 37 #include "bolt/Passes/ThreeWayBranch.h" 38 #include "bolt/Passes/ValidateInternalCalls.h" 39 #include "bolt/Passes/ValidateMemRefs.h" 40 #include "bolt/Passes/VeneerElimination.h" 41 #include "bolt/Utils/CommandLineOpts.h" 42 #include "llvm/Support/FormatVariadic.h" 43 #include "llvm/Support/Timer.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <memory> 46 #include <numeric> 47 48 using namespace llvm; 49 50 namespace opts { 51 52 extern cl::opt<bool> PrintAll; 53 extern cl::opt<bool> PrintDynoStats; 54 extern cl::opt<bool> DumpDotAll; 55 extern cl::opt<std::string> AsmDump; 56 extern cl::opt<bolt::PLTCall::OptType> PLT; 57 extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, 58 llvm::bolt::DeprecatedICFNumericOptionParser> 59 ICF; 60 61 static cl::opt<bool> 62 DynoStatsAll("dyno-stats-all", 63 cl::desc("print dyno stats after each stage"), 64 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 65 66 static cl::opt<bool> 67 EliminateUnreachable("eliminate-unreachable", 68 cl::desc("eliminate unreachable code"), cl::init(true), 69 cl::cat(BoltOptCategory)); 70 71 static cl::opt<bool> JTFootprintReductionFlag( 72 "jt-footprint-reduction", 73 cl::desc("make jump tables size smaller at the cost of using more " 74 "instructions at jump sites"), 75 cl::cat(BoltOptCategory)); 76 77 cl::opt<bool> 78 KeepNops("keep-nops", 79 cl::desc("keep no-op instructions. By default they are removed."), 80 cl::Hidden, cl::cat(BoltOptCategory)); 81 82 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), 83 cl::ReallyHidden, cl::cat(BoltOptCategory)); 84 85 cl::opt<bool> 86 PrintAfterBranchFixup("print-after-branch-fixup", 87 cl::desc("print function after fixing local branches"), 88 cl::Hidden, cl::cat(BoltOptCategory)); 89 90 static cl::opt<bool> 91 PrintAfterLowering("print-after-lowering", 92 cl::desc("print function after instruction lowering"), 93 cl::Hidden, cl::cat(BoltOptCategory)); 94 95 static cl::opt<bool> PrintEstimateEdgeCounts( 96 "print-estimate-edge-counts", 97 cl::desc("print function after edge counts are set for no-LBR profile"), 98 cl::Hidden, cl::cat(BoltOptCategory)); 99 100 cl::opt<bool> 101 PrintFinalized("print-finalized", 102 cl::desc("print function after CFG is finalized"), 103 cl::Hidden, cl::cat(BoltOptCategory)); 104 105 static cl::opt<bool> 106 PrintFOP("print-fop", 107 cl::desc("print functions after frame optimizer pass"), cl::Hidden, 108 cl::cat(BoltOptCategory)); 109 110 static cl::opt<bool> 111 PrintICF("print-icf", cl::desc("print functions after ICF optimization"), 112 cl::Hidden, cl::cat(BoltOptCategory)); 113 114 static cl::opt<bool> 115 PrintICP("print-icp", 116 cl::desc("print functions after indirect call promotion"), 117 cl::Hidden, cl::cat(BoltOptCategory)); 118 119 static cl::opt<bool> 120 PrintInline("print-inline", 121 cl::desc("print functions after inlining optimization"), 122 cl::Hidden, cl::cat(BoltOptCategory)); 123 124 static cl::opt<bool> PrintJTFootprintReduction( 125 "print-after-jt-footprint-reduction", 126 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden, 127 cl::cat(BoltOptCategory)); 128 129 static cl::opt<bool> 130 PrintAdrRelaxation("print-adr-relaxation", 131 cl::desc("print functions after ADR Relaxation pass"), 132 cl::Hidden, cl::cat(BoltOptCategory)); 133 134 static cl::opt<bool> 135 PrintLongJmp("print-longjmp", 136 cl::desc("print functions after longjmp pass"), cl::Hidden, 137 cl::cat(BoltOptCategory)); 138 139 cl::opt<bool> 140 PrintNormalized("print-normalized", 141 cl::desc("print functions after CFG is normalized"), 142 cl::Hidden, cl::cat(BoltCategory)); 143 144 static cl::opt<bool> PrintOptimizeBodyless( 145 "print-optimize-bodyless", 146 cl::desc("print functions after bodyless optimization"), cl::Hidden, 147 cl::cat(BoltOptCategory)); 148 149 static cl::opt<bool> 150 PrintPeepholes("print-peepholes", 151 cl::desc("print functions after peephole optimization"), 152 cl::Hidden, cl::cat(BoltOptCategory)); 153 154 static cl::opt<bool> 155 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"), 156 cl::Hidden, cl::cat(BoltOptCategory)); 157 158 static cl::opt<bool> 159 PrintProfileStats("print-profile-stats", 160 cl::desc("print profile quality/bias analysis"), 161 cl::cat(BoltCategory)); 162 163 static cl::opt<bool> 164 PrintRegReAssign("print-regreassign", 165 cl::desc("print functions after regreassign pass"), 166 cl::Hidden, cl::cat(BoltOptCategory)); 167 168 cl::opt<bool> 169 PrintReordered("print-reordered", 170 cl::desc("print functions after layout optimization"), 171 cl::Hidden, cl::cat(BoltOptCategory)); 172 173 static cl::opt<bool> 174 PrintReorderedFunctions("print-reordered-functions", 175 cl::desc("print functions after clustering"), 176 cl::Hidden, cl::cat(BoltOptCategory)); 177 178 static cl::opt<bool> PrintRetpolineInsertion( 179 "print-retpoline-insertion", 180 cl::desc("print functions after retpoline insertion pass"), cl::Hidden, 181 cl::cat(BoltCategory)); 182 183 static cl::opt<bool> PrintSCTC( 184 "print-sctc", 185 cl::desc("print functions after conditional tail call simplification"), 186 cl::Hidden, cl::cat(BoltOptCategory)); 187 188 static cl::opt<bool> PrintSimplifyROLoads( 189 "print-simplify-rodata-loads", 190 cl::desc("print functions after simplification of RO data loads"), 191 cl::Hidden, cl::cat(BoltOptCategory)); 192 193 static cl::opt<bool> 194 PrintSplit("print-split", cl::desc("print functions after code splitting"), 195 cl::Hidden, cl::cat(BoltOptCategory)); 196 197 static cl::opt<bool> 198 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), 199 cl::Hidden, cl::cat(BoltOptCategory)); 200 201 static cl::opt<bool> 202 PrintFixRelaxations("print-fix-relaxations", 203 cl::desc("print functions after fix relaxations pass"), 204 cl::Hidden, cl::cat(BoltOptCategory)); 205 206 static cl::opt<bool> 207 PrintFixRISCVCalls("print-fix-riscv-calls", 208 cl::desc("print functions after fix RISCV calls pass"), 209 cl::Hidden, cl::cat(BoltOptCategory)); 210 211 static cl::opt<bool> PrintVeneerElimination( 212 "print-veneer-elimination", 213 cl::desc("print functions after veneer elimination pass"), cl::Hidden, 214 cl::cat(BoltOptCategory)); 215 216 static cl::opt<bool> 217 PrintUCE("print-uce", 218 cl::desc("print functions after unreachable code elimination"), 219 cl::Hidden, cl::cat(BoltOptCategory)); 220 221 static cl::opt<bool> RegReAssign( 222 "reg-reassign", 223 cl::desc( 224 "reassign registers so as to avoid using REX prefixes in hot code"), 225 cl::cat(BoltOptCategory)); 226 227 static cl::opt<bool> SimplifyConditionalTailCalls( 228 "simplify-conditional-tail-calls", 229 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 230 cl::init(true), cl::cat(BoltOptCategory)); 231 232 static cl::opt<bool> SimplifyRODataLoads( 233 "simplify-rodata-loads", 234 cl::desc("simplify loads from read-only sections by replacing the memory " 235 "operand with the constant found in the corresponding section"), 236 cl::cat(BoltOptCategory)); 237 238 static cl::list<std::string> 239 SpecializeMemcpy1("memcpy1-spec", 240 cl::desc("list of functions with call sites for which to specialize memcpy() " 241 "for size 1"), 242 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 243 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 244 245 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), 246 cl::cat(BoltOptCategory)); 247 248 static cl::opt<bool> StringOps( 249 "inline-memcpy", 250 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 251 cl::cat(BoltOptCategory)); 252 253 static cl::opt<bool> StripRepRet( 254 "strip-rep-ret", 255 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 256 cl::init(true), cl::cat(BoltOptCategory)); 257 258 static cl::opt<bool> VerifyCFG("verify-cfg", 259 cl::desc("verify the CFG after every pass"), 260 cl::Hidden, cl::cat(BoltOptCategory)); 261 262 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch", 263 cl::desc("reorder three way branches"), 264 cl::ReallyHidden, 265 cl::cat(BoltOptCategory)); 266 267 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 268 cl::desc("fold jcc+mov into cmov"), 269 cl::ReallyHidden, 270 cl::cat(BoltOptCategory)); 271 272 static cl::opt<bool> ShortenInstructions("shorten-instructions", 273 cl::desc("shorten instructions"), 274 cl::init(true), 275 cl::cat(BoltOptCategory)); 276 } // namespace opts 277 278 namespace llvm { 279 namespace bolt { 280 281 using namespace opts; 282 283 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 284 const char BinaryFunctionPassManager::TimerGroupDesc[] = 285 "Binary Function Pass Manager"; 286 287 Error BinaryFunctionPassManager::runPasses() { 288 auto &BFs = BC.getBinaryFunctions(); 289 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 290 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 291 &OptPassPair = Passes[PassIdx]; 292 if (!OptPassPair.first) 293 continue; 294 295 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 296 std::string PassIdName = 297 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 298 299 if (opts::Verbosity > 0) 300 BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 301 302 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 303 TimerGroupDesc, TimeOpts); 304 305 Error E = Error::success(); 306 callWithDynoStats( 307 BC.outs(), 308 [this, &E, &Pass] { 309 E = joinErrors(std::move(E), Pass->runOnFunctions(BC)); 310 }, 311 BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64()); 312 if (E) 313 return Error(std::move(E)); 314 315 if (opts::VerifyCFG && 316 !std::accumulate( 317 BFs.begin(), BFs.end(), true, 318 [](const bool Valid, 319 const std::pair<const uint64_t, BinaryFunction> &It) { 320 return Valid && It.second.validateCFG(); 321 })) { 322 return createFatalBOLTError( 323 Twine("BOLT-ERROR: Invalid CFG detected after pass ") + 324 Twine(Pass->getName()) + Twine("\n")); 325 } 326 327 if (opts::Verbosity > 0) 328 BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 329 330 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 331 continue; 332 333 const std::string Message = std::string("after ") + Pass->getName(); 334 335 for (auto &It : BFs) { 336 BinaryFunction &Function = It.second; 337 338 if (!Pass->shouldPrint(Function)) 339 continue; 340 341 Function.print(BC.outs(), Message); 342 343 if (opts::DumpDotAll) 344 Function.dumpGraphForPass(PassIdName); 345 } 346 } 347 return Error::success(); 348 } 349 350 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 351 BinaryFunctionPassManager Manager(BC); 352 353 Manager.registerPass( 354 std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts)); 355 356 Manager.registerPass(std::make_unique<DynoStatsSetPass>()); 357 358 Manager.registerPass(std::make_unique<AsmDumpPass>(), 359 opts::AsmDump.getNumOccurrences()); 360 361 if (BC.isAArch64()) { 362 Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations)); 363 364 Manager.registerPass( 365 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 366 } 367 368 if (BC.isRISCV()) { 369 Manager.registerPass( 370 std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls)); 371 } 372 373 // Here we manage dependencies/order manually, since passes are run in the 374 // order they're registered. 375 376 // Run this pass first to use stats for the original functions. 377 Manager.registerPass(std::make_unique<PrintProgramStats>()); 378 379 if (opts::PrintProfileStats) 380 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 381 382 Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint)); 383 384 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 385 386 Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint)); 387 388 if (opts::Instrument) 389 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 390 else if (opts::Hugify) 391 Manager.registerPass(std::make_unique<HugePage>(NeverPrint)); 392 393 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint), 394 opts::ShortenInstructions); 395 396 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint), 397 !opts::KeepNops); 398 399 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 400 401 if (BC.isX86()) 402 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 403 opts::StripRepRet); 404 405 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 406 opts::ICF != IdenticalCodeFolding::ICFLevel::None); 407 408 Manager.registerPass( 409 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 410 !opts::SpecializeMemcpy1.empty()); 411 412 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 413 opts::StringOps); 414 415 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 416 417 Manager.registerPass( 418 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 419 opts::JTFootprintReductionFlag); 420 421 Manager.registerPass( 422 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 423 opts::SimplifyRODataLoads); 424 425 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 426 opts::RegReAssign); 427 428 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 429 430 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 431 opts::ICF != IdenticalCodeFolding::ICFLevel::None); 432 433 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 434 435 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 436 opts::ThreeWayBranchFlag); 437 438 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 439 440 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 441 opts::EliminateUnreachable); 442 443 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 444 445 Manager.registerPass(std::make_unique<LoopInversionPass>()); 446 447 Manager.registerPass(std::make_unique<TailDuplication>()); 448 449 Manager.registerPass(std::make_unique<CMOVConversion>(), 450 opts::CMOVConversionFlag); 451 452 // This pass syncs local branches with CFG. If any of the following 453 // passes breaks the sync - they either need to re-run the pass or 454 // fix branches consistency internally. 455 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 456 457 // This pass should come close to last since it uses the estimated hot 458 // size of a function to determine the order. It should definitely 459 // also happen after any changes to the call graph are made, e.g. inlining. 460 Manager.registerPass( 461 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 462 463 // This is the second run of the SplitFunctions pass required by certain 464 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again 465 // after ReorderFunctions allows the finalized function order to be utilized 466 // to make more sophisticated splitting decisions, like hot-warm-cold 467 // splitting. 468 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 469 470 // Print final dyno stats right while CFG and instruction analysis are intact. 471 Manager.registerPass(std::make_unique<DynoStatsPrintPass>( 472 "after all optimizations before SCTC and FOP"), 473 opts::PrintDynoStats || opts::DynoStatsAll); 474 475 // Add the StokeInfo pass, which extract functions for stoke optimization and 476 // get the liveness information for them 477 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 478 479 // This pass introduces conditional jumps into external functions. 480 // Between extending CFG to support this and isolating this pass we chose 481 // the latter. Thus this pass will do double jump removal and unreachable 482 // code elimination if necessary and won't rely on peepholes/UCE for these 483 // optimizations. 484 // More generally this pass should be the last optimization pass that 485 // modifies branches/control flow. This pass is run after function 486 // reordering so that it can tell whether calls are forward/backward 487 // accurately. 488 Manager.registerPass( 489 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 490 opts::SimplifyConditionalTailCalls); 491 492 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 493 494 Manager.registerPass(std::make_unique<AlignerPass>()); 495 496 // Perform reordering on data contained in one or more sections using 497 // memory profiling data. 498 Manager.registerPass(std::make_unique<ReorderData>()); 499 500 if (BC.isAArch64()) { 501 Manager.registerPass( 502 std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation)); 503 504 // Tighten branches according to offset differences between branch and 505 // targets. No extra instructions after this pass, otherwise we may have 506 // relocations out of range and crash during linking. 507 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 508 } 509 510 // This pass should always run last.* 511 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 512 513 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 514 // FrameOptimizer move values around and needs to update CFIs. To do this, it 515 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 516 // placed according to the final layout. 517 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 518 519 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 520 521 Manager.registerPass( 522 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 523 524 // Assign each function an output section. 525 Manager.registerPass(std::make_unique<AssignSections>()); 526 527 // Patch original function entries 528 if (BC.HasRelocations) 529 Manager.registerPass(std::make_unique<PatchEntries>()); 530 531 // This pass turns tail calls into jumps which makes them invisible to 532 // function reordering. It's unsafe to use any CFG or instruction analysis 533 // after this point. 534 Manager.registerPass( 535 std::make_unique<InstructionLowering>(PrintAfterLowering)); 536 537 // In non-relocation mode, mark functions that do not fit into their original 538 // space as non-simple if we have to (e.g. for correct debug info update). 539 // NOTE: this pass depends on finalized code. 540 if (!BC.HasRelocations) 541 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 542 543 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 544 545 // Check for dirty state of MCSymbols caused by running calculateEmittedSize 546 // in parallel and restore them 547 Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint)); 548 549 return Manager.runPasses(); 550 } 551 552 } // namespace bolt 553 } // namespace llvm 554