1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/FixRISCVCallsPass.h" 16 #include "bolt/Passes/FixRelaxationPass.h" 17 #include "bolt/Passes/FrameOptimizer.h" 18 #include "bolt/Passes/Hugify.h" 19 #include "bolt/Passes/IdenticalCodeFolding.h" 20 #include "bolt/Passes/IndirectCallPromotion.h" 21 #include "bolt/Passes/Inliner.h" 22 #include "bolt/Passes/Instrumentation.h" 23 #include "bolt/Passes/JTFootprintReduction.h" 24 #include "bolt/Passes/LongJmp.h" 25 #include "bolt/Passes/LoopInversionPass.h" 26 #include "bolt/Passes/MCF.h" 27 #include "bolt/Passes/PLTCall.h" 28 #include "bolt/Passes/PatchEntries.h" 29 #include "bolt/Passes/RegReAssign.h" 30 #include "bolt/Passes/ReorderData.h" 31 #include "bolt/Passes/ReorderFunctions.h" 32 #include "bolt/Passes/RetpolineInsertion.h" 33 #include "bolt/Passes/SplitFunctions.h" 34 #include "bolt/Passes/StokeInfo.h" 35 #include "bolt/Passes/TailDuplication.h" 36 #include "bolt/Passes/ThreeWayBranch.h" 37 #include "bolt/Passes/ValidateInternalCalls.h" 38 #include "bolt/Passes/ValidateMemRefs.h" 39 #include "bolt/Passes/VeneerElimination.h" 40 #include "bolt/Utils/CommandLineOpts.h" 41 #include "llvm/Support/FormatVariadic.h" 42 #include "llvm/Support/Timer.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include <memory> 45 #include <numeric> 46 47 using namespace llvm; 48 49 namespace opts { 50 51 extern cl::opt<bool> PrintAll; 52 extern cl::opt<bool> PrintDynoStats; 53 extern cl::opt<bool> DumpDotAll; 54 extern cl::opt<std::string> AsmDump; 55 extern cl::opt<bolt::PLTCall::OptType> PLT; 56 57 static cl::opt<bool> 58 DynoStatsAll("dyno-stats-all", 59 cl::desc("print dyno stats after each stage"), 60 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 61 62 static cl::opt<bool> 63 EliminateUnreachable("eliminate-unreachable", 64 cl::desc("eliminate unreachable code"), cl::init(true), 65 cl::cat(BoltOptCategory)); 66 67 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"), 68 cl::cat(BoltOptCategory)); 69 70 static cl::opt<bool> JTFootprintReductionFlag( 71 "jt-footprint-reduction", 72 cl::desc("make jump tables size smaller at the cost of using more " 73 "instructions at jump sites"), 74 cl::cat(BoltOptCategory)); 75 76 cl::opt<bool> 77 KeepNops("keep-nops", 78 cl::desc("keep no-op instructions. By default they are removed."), 79 cl::Hidden, cl::cat(BoltOptCategory)); 80 81 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), 82 cl::ReallyHidden, cl::cat(BoltOptCategory)); 83 84 cl::opt<bool> 85 PrintAfterBranchFixup("print-after-branch-fixup", 86 cl::desc("print function after fixing local branches"), 87 cl::Hidden, cl::cat(BoltOptCategory)); 88 89 static cl::opt<bool> 90 PrintAfterLowering("print-after-lowering", 91 cl::desc("print function after instruction lowering"), 92 cl::Hidden, cl::cat(BoltOptCategory)); 93 94 static cl::opt<bool> PrintEstimateEdgeCounts( 95 "print-estimate-edge-counts", 96 cl::desc("print function after edge counts are set for no-LBR profile"), 97 cl::Hidden, cl::cat(BoltOptCategory)); 98 99 cl::opt<bool> 100 PrintFinalized("print-finalized", 101 cl::desc("print function after CFG is finalized"), 102 cl::Hidden, cl::cat(BoltOptCategory)); 103 104 static cl::opt<bool> 105 PrintFOP("print-fop", 106 cl::desc("print functions after frame optimizer pass"), cl::Hidden, 107 cl::cat(BoltOptCategory)); 108 109 static cl::opt<bool> 110 PrintICF("print-icf", cl::desc("print functions after ICF optimization"), 111 cl::Hidden, cl::cat(BoltOptCategory)); 112 113 static cl::opt<bool> 114 PrintICP("print-icp", 115 cl::desc("print functions after indirect call promotion"), 116 cl::Hidden, cl::cat(BoltOptCategory)); 117 118 static cl::opt<bool> 119 PrintInline("print-inline", 120 cl::desc("print functions after inlining optimization"), 121 cl::Hidden, cl::cat(BoltOptCategory)); 122 123 static cl::opt<bool> PrintJTFootprintReduction( 124 "print-after-jt-footprint-reduction", 125 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden, 126 cl::cat(BoltOptCategory)); 127 128 static cl::opt<bool> 129 PrintLongJmp("print-longjmp", 130 cl::desc("print functions after longjmp pass"), cl::Hidden, 131 cl::cat(BoltOptCategory)); 132 133 cl::opt<bool> 134 PrintNormalized("print-normalized", 135 cl::desc("print functions after CFG is normalized"), 136 cl::Hidden, cl::cat(BoltCategory)); 137 138 static cl::opt<bool> PrintOptimizeBodyless( 139 "print-optimize-bodyless", 140 cl::desc("print functions after bodyless optimization"), cl::Hidden, 141 cl::cat(BoltOptCategory)); 142 143 static cl::opt<bool> 144 PrintPeepholes("print-peepholes", 145 cl::desc("print functions after peephole optimization"), 146 cl::Hidden, cl::cat(BoltOptCategory)); 147 148 static cl::opt<bool> 149 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"), 150 cl::Hidden, cl::cat(BoltOptCategory)); 151 152 static cl::opt<bool> 153 PrintProfileStats("print-profile-stats", 154 cl::desc("print profile quality/bias analysis"), 155 cl::cat(BoltCategory)); 156 157 static cl::opt<bool> 158 PrintRegReAssign("print-regreassign", 159 cl::desc("print functions after regreassign pass"), 160 cl::Hidden, cl::cat(BoltOptCategory)); 161 162 cl::opt<bool> 163 PrintReordered("print-reordered", 164 cl::desc("print functions after layout optimization"), 165 cl::Hidden, cl::cat(BoltOptCategory)); 166 167 static cl::opt<bool> 168 PrintReorderedFunctions("print-reordered-functions", 169 cl::desc("print functions after clustering"), 170 cl::Hidden, cl::cat(BoltOptCategory)); 171 172 static cl::opt<bool> PrintRetpolineInsertion( 173 "print-retpoline-insertion", 174 cl::desc("print functions after retpoline insertion pass"), cl::Hidden, 175 cl::cat(BoltCategory)); 176 177 static cl::opt<bool> PrintSCTC( 178 "print-sctc", 179 cl::desc("print functions after conditional tail call simplification"), 180 cl::Hidden, cl::cat(BoltOptCategory)); 181 182 static cl::opt<bool> PrintSimplifyROLoads( 183 "print-simplify-rodata-loads", 184 cl::desc("print functions after simplification of RO data loads"), 185 cl::Hidden, cl::cat(BoltOptCategory)); 186 187 static cl::opt<bool> 188 PrintSplit("print-split", cl::desc("print functions after code splitting"), 189 cl::Hidden, cl::cat(BoltOptCategory)); 190 191 static cl::opt<bool> 192 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), 193 cl::Hidden, cl::cat(BoltOptCategory)); 194 195 static cl::opt<bool> 196 PrintFixRelaxations("print-fix-relaxations", 197 cl::desc("print functions after fix relaxations pass"), 198 cl::Hidden, cl::cat(BoltOptCategory)); 199 200 static cl::opt<bool> 201 PrintFixRISCVCalls("print-fix-riscv-calls", 202 cl::desc("print functions after fix RISCV calls pass"), 203 cl::Hidden, cl::cat(BoltOptCategory)); 204 205 static cl::opt<bool> PrintVeneerElimination( 206 "print-veneer-elimination", 207 cl::desc("print functions after veneer elimination pass"), cl::Hidden, 208 cl::cat(BoltOptCategory)); 209 210 static cl::opt<bool> 211 PrintUCE("print-uce", 212 cl::desc("print functions after unreachable code elimination"), 213 cl::Hidden, cl::cat(BoltOptCategory)); 214 215 static cl::opt<bool> RegReAssign( 216 "reg-reassign", 217 cl::desc( 218 "reassign registers so as to avoid using REX prefixes in hot code"), 219 cl::cat(BoltOptCategory)); 220 221 static cl::opt<bool> SimplifyConditionalTailCalls( 222 "simplify-conditional-tail-calls", 223 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 224 cl::init(true), cl::cat(BoltOptCategory)); 225 226 static cl::opt<bool> SimplifyRODataLoads( 227 "simplify-rodata-loads", 228 cl::desc("simplify loads from read-only sections by replacing the memory " 229 "operand with the constant found in the corresponding section"), 230 cl::cat(BoltOptCategory)); 231 232 static cl::list<std::string> 233 SpecializeMemcpy1("memcpy1-spec", 234 cl::desc("list of functions with call sites for which to specialize memcpy() " 235 "for size 1"), 236 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 237 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 238 239 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), 240 cl::cat(BoltOptCategory)); 241 242 static cl::opt<bool> StringOps( 243 "inline-memcpy", 244 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 245 cl::cat(BoltOptCategory)); 246 247 static cl::opt<bool> StripRepRet( 248 "strip-rep-ret", 249 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 250 cl::init(true), cl::cat(BoltOptCategory)); 251 252 static cl::opt<bool> VerifyCFG("verify-cfg", 253 cl::desc("verify the CFG after every pass"), 254 cl::Hidden, cl::cat(BoltOptCategory)); 255 256 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch", 257 cl::desc("reorder three way branches"), 258 cl::ReallyHidden, 259 cl::cat(BoltOptCategory)); 260 261 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 262 cl::desc("fold jcc+mov into cmov"), 263 cl::ReallyHidden, 264 cl::cat(BoltOptCategory)); 265 266 } // namespace opts 267 268 namespace llvm { 269 namespace bolt { 270 271 using namespace opts; 272 273 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 274 const char BinaryFunctionPassManager::TimerGroupDesc[] = 275 "Binary Function Pass Manager"; 276 277 Error BinaryFunctionPassManager::runPasses() { 278 auto &BFs = BC.getBinaryFunctions(); 279 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 280 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 281 &OptPassPair = Passes[PassIdx]; 282 if (!OptPassPair.first) 283 continue; 284 285 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 286 std::string PassIdName = 287 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 288 289 if (opts::Verbosity > 0) 290 BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 291 292 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 293 TimerGroupDesc, TimeOpts); 294 295 Error E = Error::success(); 296 callWithDynoStats( 297 BC.outs(), 298 [this, &E, &Pass] { 299 E = joinErrors(std::move(E), Pass->runOnFunctions(BC)); 300 }, 301 BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64()); 302 if (E) 303 return Error(std::move(E)); 304 305 if (opts::VerifyCFG && 306 !std::accumulate( 307 BFs.begin(), BFs.end(), true, 308 [](const bool Valid, 309 const std::pair<const uint64_t, BinaryFunction> &It) { 310 return Valid && It.second.validateCFG(); 311 })) { 312 return createFatalBOLTError( 313 Twine("BOLT-ERROR: Invalid CFG detected after pass ") + 314 Twine(Pass->getName()) + Twine("\n")); 315 } 316 317 if (opts::Verbosity > 0) 318 BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 319 320 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 321 continue; 322 323 const std::string Message = std::string("after ") + Pass->getName(); 324 325 for (auto &It : BFs) { 326 BinaryFunction &Function = It.second; 327 328 if (!Pass->shouldPrint(Function)) 329 continue; 330 331 Function.print(BC.outs(), Message); 332 333 if (opts::DumpDotAll) 334 Function.dumpGraphForPass(PassIdName); 335 } 336 } 337 return Error::success(); 338 } 339 340 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 341 BinaryFunctionPassManager Manager(BC); 342 343 Manager.registerPass( 344 std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts)); 345 346 const DynoStats InitialDynoStats = 347 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 348 349 Manager.registerPass(std::make_unique<AsmDumpPass>(), 350 opts::AsmDump.getNumOccurrences()); 351 352 if (BC.isAArch64()) { 353 Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations)); 354 355 Manager.registerPass( 356 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 357 } 358 359 if (BC.isRISCV()) { 360 Manager.registerPass( 361 std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls)); 362 } 363 364 // Here we manage dependencies/order manually, since passes are run in the 365 // order they're registered. 366 367 // Run this pass first to use stats for the original functions. 368 Manager.registerPass(std::make_unique<PrintProgramStats>()); 369 370 if (opts::PrintProfileStats) 371 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 372 373 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 374 375 Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint)); 376 377 if (opts::Instrument) 378 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 379 else if (opts::Hugify) 380 Manager.registerPass(std::make_unique<HugePage>(NeverPrint)); 381 382 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 383 384 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint), 385 !opts::KeepNops); 386 387 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 388 389 if (BC.isX86()) 390 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 391 opts::StripRepRet); 392 393 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 394 opts::ICF); 395 396 Manager.registerPass( 397 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 398 !opts::SpecializeMemcpy1.empty()); 399 400 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 401 opts::StringOps); 402 403 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 404 405 Manager.registerPass( 406 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 407 opts::JTFootprintReductionFlag); 408 409 Manager.registerPass( 410 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 411 opts::SimplifyRODataLoads); 412 413 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 414 opts::RegReAssign); 415 416 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 417 418 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 419 opts::ICF); 420 421 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 422 423 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 424 opts::ThreeWayBranchFlag); 425 426 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 427 428 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 429 opts::EliminateUnreachable); 430 431 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 432 433 Manager.registerPass(std::make_unique<LoopInversionPass>()); 434 435 Manager.registerPass(std::make_unique<TailDuplication>()); 436 437 Manager.registerPass(std::make_unique<CMOVConversion>(), 438 opts::CMOVConversionFlag); 439 440 // This pass syncs local branches with CFG. If any of the following 441 // passes breaks the sync - they either need to re-run the pass or 442 // fix branches consistency internally. 443 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 444 445 // This pass should come close to last since it uses the estimated hot 446 // size of a function to determine the order. It should definitely 447 // also happen after any changes to the call graph are made, e.g. inlining. 448 Manager.registerPass( 449 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 450 451 // This is the second run of the SplitFunctions pass required by certain 452 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again 453 // after ReorderFunctions allows the finalized function order to be utilized 454 // to make more sophisticated splitting decisions, like hot-warm-cold 455 // splitting. 456 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 457 458 // Print final dyno stats right while CFG and instruction analysis are intact. 459 Manager.registerPass( 460 std::make_unique<DynoStatsPrintPass>( 461 InitialDynoStats, "after all optimizations before SCTC and FOP"), 462 opts::PrintDynoStats || opts::DynoStatsAll); 463 464 // Add the StokeInfo pass, which extract functions for stoke optimization and 465 // get the liveness information for them 466 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 467 468 // This pass introduces conditional jumps into external functions. 469 // Between extending CFG to support this and isolating this pass we chose 470 // the latter. Thus this pass will do double jump removal and unreachable 471 // code elimination if necessary and won't rely on peepholes/UCE for these 472 // optimizations. 473 // More generally this pass should be the last optimization pass that 474 // modifies branches/control flow. This pass is run after function 475 // reordering so that it can tell whether calls are forward/backward 476 // accurately. 477 Manager.registerPass( 478 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 479 opts::SimplifyConditionalTailCalls); 480 481 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 482 483 Manager.registerPass(std::make_unique<AlignerPass>()); 484 485 // Perform reordering on data contained in one or more sections using 486 // memory profiling data. 487 Manager.registerPass(std::make_unique<ReorderData>()); 488 489 if (BC.isAArch64()) { 490 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 491 492 // Tighten branches according to offset differences between branch and 493 // targets. No extra instructions after this pass, otherwise we may have 494 // relocations out of range and crash during linking. 495 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 496 } 497 498 // This pass should always run last.* 499 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 500 501 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 502 // FrameOptimizer move values around and needs to update CFIs. To do this, it 503 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 504 // placed according to the final layout. 505 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 506 507 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 508 509 Manager.registerPass( 510 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 511 512 // Assign each function an output section. 513 Manager.registerPass(std::make_unique<AssignSections>()); 514 515 // Patch original function entries 516 if (BC.HasRelocations) 517 Manager.registerPass(std::make_unique<PatchEntries>()); 518 519 // This pass turns tail calls into jumps which makes them invisible to 520 // function reordering. It's unsafe to use any CFG or instruction analysis 521 // after this point. 522 Manager.registerPass( 523 std::make_unique<InstructionLowering>(PrintAfterLowering)); 524 525 // In non-relocation mode, mark functions that do not fit into their original 526 // space as non-simple if we have to (e.g. for correct debug info update). 527 // NOTE: this pass depends on finalized code. 528 if (!BC.HasRelocations) 529 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 530 531 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 532 533 // Check for dirty state of MCSymbols caused by running calculateEmittedSize 534 // in parallel and restore them 535 Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint)); 536 537 return Manager.runPasses(); 538 } 539 540 } // namespace bolt 541 } // namespace llvm 542