1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/FixRISCVCallsPass.h" 16 #include "bolt/Passes/FixRelaxationPass.h" 17 #include "bolt/Passes/FrameOptimizer.h" 18 #include "bolt/Passes/Hugify.h" 19 #include "bolt/Passes/IdenticalCodeFolding.h" 20 #include "bolt/Passes/IndirectCallPromotion.h" 21 #include "bolt/Passes/Inliner.h" 22 #include "bolt/Passes/Instrumentation.h" 23 #include "bolt/Passes/JTFootprintReduction.h" 24 #include "bolt/Passes/LongJmp.h" 25 #include "bolt/Passes/LoopInversionPass.h" 26 #include "bolt/Passes/PLTCall.h" 27 #include "bolt/Passes/PatchEntries.h" 28 #include "bolt/Passes/RegReAssign.h" 29 #include "bolt/Passes/ReorderData.h" 30 #include "bolt/Passes/ReorderFunctions.h" 31 #include "bolt/Passes/RetpolineInsertion.h" 32 #include "bolt/Passes/SplitFunctions.h" 33 #include "bolt/Passes/StokeInfo.h" 34 #include "bolt/Passes/TailDuplication.h" 35 #include "bolt/Passes/ThreeWayBranch.h" 36 #include "bolt/Passes/ValidateInternalCalls.h" 37 #include "bolt/Passes/ValidateMemRefs.h" 38 #include "bolt/Passes/VeneerElimination.h" 39 #include "bolt/Utils/CommandLineOpts.h" 40 #include "llvm/Support/FormatVariadic.h" 41 #include "llvm/Support/Timer.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <memory> 44 #include <numeric> 45 46 using namespace llvm; 47 48 namespace opts { 49 50 extern cl::opt<bool> PrintAll; 51 extern cl::opt<bool> PrintDynoStats; 52 extern cl::opt<bool> DumpDotAll; 53 extern cl::opt<std::string> AsmDump; 54 extern cl::opt<bolt::PLTCall::OptType> PLT; 55 56 static cl::opt<bool> 57 DynoStatsAll("dyno-stats-all", 58 cl::desc("print dyno stats after each stage"), 59 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 60 61 static cl::opt<bool> 62 EliminateUnreachable("eliminate-unreachable", 63 cl::desc("eliminate unreachable code"), cl::init(true), 64 cl::cat(BoltOptCategory)); 65 66 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"), 67 cl::cat(BoltOptCategory)); 68 69 static cl::opt<bool> JTFootprintReductionFlag( 70 "jt-footprint-reduction", 71 cl::desc("make jump tables size smaller at the cost of using more " 72 "instructions at jump sites"), 73 cl::cat(BoltOptCategory)); 74 75 cl::opt<bool> 76 KeepNops("keep-nops", 77 cl::desc("keep no-op instructions. By default they are removed."), 78 cl::Hidden, cl::cat(BoltOptCategory)); 79 80 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), 81 cl::ReallyHidden, cl::cat(BoltOptCategory)); 82 83 cl::opt<bool> 84 PrintAfterBranchFixup("print-after-branch-fixup", 85 cl::desc("print function after fixing local branches"), 86 cl::Hidden, cl::cat(BoltOptCategory)); 87 88 static cl::opt<bool> 89 PrintAfterLowering("print-after-lowering", 90 cl::desc("print function after instruction lowering"), 91 cl::Hidden, cl::cat(BoltOptCategory)); 92 93 cl::opt<bool> 94 PrintFinalized("print-finalized", 95 cl::desc("print function after CFG is finalized"), 96 cl::Hidden, cl::cat(BoltOptCategory)); 97 98 static cl::opt<bool> 99 PrintFOP("print-fop", 100 cl::desc("print functions after frame optimizer pass"), cl::Hidden, 101 cl::cat(BoltOptCategory)); 102 103 static cl::opt<bool> 104 PrintICF("print-icf", cl::desc("print functions after ICF optimization"), 105 cl::Hidden, cl::cat(BoltOptCategory)); 106 107 static cl::opt<bool> 108 PrintICP("print-icp", 109 cl::desc("print functions after indirect call promotion"), 110 cl::Hidden, cl::cat(BoltOptCategory)); 111 112 static cl::opt<bool> 113 PrintInline("print-inline", 114 cl::desc("print functions after inlining optimization"), 115 cl::Hidden, cl::cat(BoltOptCategory)); 116 117 static cl::opt<bool> PrintJTFootprintReduction( 118 "print-after-jt-footprint-reduction", 119 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden, 120 cl::cat(BoltOptCategory)); 121 122 static cl::opt<bool> 123 PrintLongJmp("print-longjmp", 124 cl::desc("print functions after longjmp pass"), cl::Hidden, 125 cl::cat(BoltOptCategory)); 126 127 cl::opt<bool> 128 PrintNormalized("print-normalized", 129 cl::desc("print functions after CFG is normalized"), 130 cl::Hidden, cl::cat(BoltCategory)); 131 132 static cl::opt<bool> PrintOptimizeBodyless( 133 "print-optimize-bodyless", 134 cl::desc("print functions after bodyless optimization"), cl::Hidden, 135 cl::cat(BoltOptCategory)); 136 137 static cl::opt<bool> 138 PrintPeepholes("print-peepholes", 139 cl::desc("print functions after peephole optimization"), 140 cl::Hidden, cl::cat(BoltOptCategory)); 141 142 static cl::opt<bool> 143 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"), 144 cl::Hidden, cl::cat(BoltOptCategory)); 145 146 static cl::opt<bool> 147 PrintProfileStats("print-profile-stats", 148 cl::desc("print profile quality/bias analysis"), 149 cl::cat(BoltCategory)); 150 151 static cl::opt<bool> 152 PrintRegReAssign("print-regreassign", 153 cl::desc("print functions after regreassign pass"), 154 cl::Hidden, cl::cat(BoltOptCategory)); 155 156 cl::opt<bool> 157 PrintReordered("print-reordered", 158 cl::desc("print functions after layout optimization"), 159 cl::Hidden, cl::cat(BoltOptCategory)); 160 161 static cl::opt<bool> 162 PrintReorderedFunctions("print-reordered-functions", 163 cl::desc("print functions after clustering"), 164 cl::Hidden, cl::cat(BoltOptCategory)); 165 166 static cl::opt<bool> PrintRetpolineInsertion( 167 "print-retpoline-insertion", 168 cl::desc("print functions after retpoline insertion pass"), cl::Hidden, 169 cl::cat(BoltCategory)); 170 171 static cl::opt<bool> PrintSCTC( 172 "print-sctc", 173 cl::desc("print functions after conditional tail call simplification"), 174 cl::Hidden, cl::cat(BoltOptCategory)); 175 176 static cl::opt<bool> PrintSimplifyROLoads( 177 "print-simplify-rodata-loads", 178 cl::desc("print functions after simplification of RO data loads"), 179 cl::Hidden, cl::cat(BoltOptCategory)); 180 181 static cl::opt<bool> 182 PrintSplit("print-split", cl::desc("print functions after code splitting"), 183 cl::Hidden, cl::cat(BoltOptCategory)); 184 185 static cl::opt<bool> 186 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), 187 cl::Hidden, cl::cat(BoltOptCategory)); 188 189 static cl::opt<bool> 190 PrintFixRelaxations("print-fix-relaxations", 191 cl::desc("print functions after fix relaxations pass"), 192 cl::Hidden, cl::cat(BoltOptCategory)); 193 194 static cl::opt<bool> 195 PrintFixRISCVCalls("print-fix-riscv-calls", 196 cl::desc("print functions after fix RISCV calls pass"), 197 cl::Hidden, cl::cat(BoltOptCategory)); 198 199 static cl::opt<bool> PrintVeneerElimination( 200 "print-veneer-elimination", 201 cl::desc("print functions after veneer elimination pass"), cl::Hidden, 202 cl::cat(BoltOptCategory)); 203 204 static cl::opt<bool> 205 PrintUCE("print-uce", 206 cl::desc("print functions after unreachable code elimination"), 207 cl::Hidden, cl::cat(BoltOptCategory)); 208 209 static cl::opt<bool> RegReAssign( 210 "reg-reassign", 211 cl::desc( 212 "reassign registers so as to avoid using REX prefixes in hot code"), 213 cl::cat(BoltOptCategory)); 214 215 static cl::opt<bool> SimplifyConditionalTailCalls( 216 "simplify-conditional-tail-calls", 217 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 218 cl::init(true), cl::cat(BoltOptCategory)); 219 220 static cl::opt<bool> SimplifyRODataLoads( 221 "simplify-rodata-loads", 222 cl::desc("simplify loads from read-only sections by replacing the memory " 223 "operand with the constant found in the corresponding section"), 224 cl::cat(BoltOptCategory)); 225 226 static cl::list<std::string> 227 SpecializeMemcpy1("memcpy1-spec", 228 cl::desc("list of functions with call sites for which to specialize memcpy() " 229 "for size 1"), 230 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 231 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 232 233 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), 234 cl::cat(BoltOptCategory)); 235 236 static cl::opt<bool> StringOps( 237 "inline-memcpy", 238 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 239 cl::cat(BoltOptCategory)); 240 241 static cl::opt<bool> StripRepRet( 242 "strip-rep-ret", 243 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 244 cl::init(true), cl::cat(BoltOptCategory)); 245 246 static cl::opt<bool> VerifyCFG("verify-cfg", 247 cl::desc("verify the CFG after every pass"), 248 cl::Hidden, cl::cat(BoltOptCategory)); 249 250 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch", 251 cl::desc("reorder three way branches"), 252 cl::ReallyHidden, 253 cl::cat(BoltOptCategory)); 254 255 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 256 cl::desc("fold jcc+mov into cmov"), 257 cl::ReallyHidden, 258 cl::cat(BoltOptCategory)); 259 260 } // namespace opts 261 262 namespace llvm { 263 namespace bolt { 264 265 using namespace opts; 266 267 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 268 const char BinaryFunctionPassManager::TimerGroupDesc[] = 269 "Binary Function Pass Manager"; 270 271 Error BinaryFunctionPassManager::runPasses() { 272 auto &BFs = BC.getBinaryFunctions(); 273 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 274 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 275 &OptPassPair = Passes[PassIdx]; 276 if (!OptPassPair.first) 277 continue; 278 279 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 280 std::string PassIdName = 281 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 282 283 if (opts::Verbosity > 0) 284 BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 285 286 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 287 TimerGroupDesc, TimeOpts); 288 289 Error E = Error::success(); 290 callWithDynoStats( 291 BC.outs(), 292 [this, &E, &Pass] { 293 E = joinErrors(std::move(E), Pass->runOnFunctions(BC)); 294 }, 295 BFs, Pass->getName(), opts::DynoStatsAll, BC.isAArch64()); 296 if (E) 297 return Error(std::move(E)); 298 299 if (opts::VerifyCFG && 300 !std::accumulate( 301 BFs.begin(), BFs.end(), true, 302 [](const bool Valid, 303 const std::pair<const uint64_t, BinaryFunction> &It) { 304 return Valid && It.second.validateCFG(); 305 })) { 306 return createFatalBOLTError( 307 Twine("BOLT-ERROR: Invalid CFG detected after pass ") + 308 Twine(Pass->getName()) + Twine("\n")); 309 } 310 311 if (opts::Verbosity > 0) 312 BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 313 314 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 315 continue; 316 317 const std::string Message = std::string("after ") + Pass->getName(); 318 319 for (auto &It : BFs) { 320 BinaryFunction &Function = It.second; 321 322 if (!Pass->shouldPrint(Function)) 323 continue; 324 325 Function.print(BC.outs(), Message); 326 327 if (opts::DumpDotAll) 328 Function.dumpGraphForPass(PassIdName); 329 } 330 } 331 return Error::success(); 332 } 333 334 Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 335 BinaryFunctionPassManager Manager(BC); 336 337 const DynoStats InitialDynoStats = 338 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 339 340 Manager.registerPass(std::make_unique<AsmDumpPass>(), 341 opts::AsmDump.getNumOccurrences()); 342 343 if (BC.isAArch64()) { 344 Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations)); 345 346 Manager.registerPass( 347 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 348 } 349 350 if (BC.isRISCV()) { 351 Manager.registerPass( 352 std::make_unique<FixRISCVCallsPass>(PrintFixRISCVCalls)); 353 } 354 355 // Here we manage dependencies/order manually, since passes are run in the 356 // order they're registered. 357 358 // Run this pass first to use stats for the original functions. 359 Manager.registerPass(std::make_unique<PrintProgramStats>()); 360 361 if (opts::PrintProfileStats) 362 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 363 364 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 365 366 Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint)); 367 368 if (opts::Instrument) 369 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 370 else if (opts::Hugify) 371 Manager.registerPass(std::make_unique<HugePage>(NeverPrint)); 372 373 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 374 375 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint), 376 !opts::KeepNops); 377 378 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 379 380 if (BC.isX86()) 381 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 382 opts::StripRepRet); 383 384 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 385 opts::ICF); 386 387 Manager.registerPass( 388 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 389 !opts::SpecializeMemcpy1.empty()); 390 391 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 392 opts::StringOps); 393 394 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 395 396 Manager.registerPass( 397 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 398 opts::JTFootprintReductionFlag); 399 400 Manager.registerPass( 401 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 402 opts::SimplifyRODataLoads); 403 404 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 405 opts::RegReAssign); 406 407 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 408 409 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 410 opts::ICF); 411 412 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 413 414 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 415 opts::ThreeWayBranchFlag); 416 417 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 418 419 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 420 opts::EliminateUnreachable); 421 422 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 423 424 Manager.registerPass(std::make_unique<LoopInversionPass>()); 425 426 Manager.registerPass(std::make_unique<TailDuplication>()); 427 428 Manager.registerPass(std::make_unique<CMOVConversion>(), 429 opts::CMOVConversionFlag); 430 431 // This pass syncs local branches with CFG. If any of the following 432 // passes breaks the sync - they either need to re-run the pass or 433 // fix branches consistency internally. 434 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 435 436 // This pass should come close to last since it uses the estimated hot 437 // size of a function to determine the order. It should definitely 438 // also happen after any changes to the call graph are made, e.g. inlining. 439 Manager.registerPass( 440 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 441 442 // This is the second run of the SplitFunctions pass required by certain 443 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again 444 // after ReorderFunctions allows the finalized function order to be utilized 445 // to make more sophisticated splitting decisions, like hot-warm-cold 446 // splitting. 447 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 448 449 // Print final dyno stats right while CFG and instruction analysis are intact. 450 Manager.registerPass( 451 std::make_unique<DynoStatsPrintPass>( 452 InitialDynoStats, "after all optimizations before SCTC and FOP"), 453 opts::PrintDynoStats || opts::DynoStatsAll); 454 455 // Add the StokeInfo pass, which extract functions for stoke optimization and 456 // get the liveness information for them 457 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 458 459 // This pass introduces conditional jumps into external functions. 460 // Between extending CFG to support this and isolating this pass we chose 461 // the latter. Thus this pass will do double jump removal and unreachable 462 // code elimination if necessary and won't rely on peepholes/UCE for these 463 // optimizations. 464 // More generally this pass should be the last optimization pass that 465 // modifies branches/control flow. This pass is run after function 466 // reordering so that it can tell whether calls are forward/backward 467 // accurately. 468 Manager.registerPass( 469 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 470 opts::SimplifyConditionalTailCalls); 471 472 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 473 474 Manager.registerPass(std::make_unique<AlignerPass>()); 475 476 // Perform reordering on data contained in one or more sections using 477 // memory profiling data. 478 Manager.registerPass(std::make_unique<ReorderData>()); 479 480 if (BC.isAArch64()) { 481 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 482 483 // Tighten branches according to offset differences between branch and 484 // targets. No extra instructions after this pass, otherwise we may have 485 // relocations out of range and crash during linking. 486 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 487 } 488 489 // This pass should always run last.* 490 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 491 492 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 493 // FrameOptimizer move values around and needs to update CFIs. To do this, it 494 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 495 // placed according to the final layout. 496 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 497 498 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 499 500 Manager.registerPass( 501 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 502 503 // Assign each function an output section. 504 Manager.registerPass(std::make_unique<AssignSections>()); 505 506 // Patch original function entries 507 if (BC.HasRelocations) 508 Manager.registerPass(std::make_unique<PatchEntries>()); 509 510 // This pass turns tail calls into jumps which makes them invisible to 511 // function reordering. It's unsafe to use any CFG or instruction analysis 512 // after this point. 513 Manager.registerPass( 514 std::make_unique<InstructionLowering>(PrintAfterLowering)); 515 516 // In non-relocation mode, mark functions that do not fit into their original 517 // space as non-simple if we have to (e.g. for correct debug info update). 518 // NOTE: this pass depends on finalized code. 519 if (!BC.HasRelocations) 520 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 521 522 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 523 524 // Check for dirty state of MCSymbols caused by running calculateEmittedSize 525 // in parallel and restore them 526 Manager.registerPass(std::make_unique<CleanMCState>(NeverPrint)); 527 528 return Manager.runPasses(); 529 } 530 531 } // namespace bolt 532 } // namespace llvm 533