1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/FrameOptimizer.h" 16 #include "bolt/Passes/Hugify.h" 17 #include "bolt/Passes/IdenticalCodeFolding.h" 18 #include "bolt/Passes/IndirectCallPromotion.h" 19 #include "bolt/Passes/Inliner.h" 20 #include "bolt/Passes/Instrumentation.h" 21 #include "bolt/Passes/JTFootprintReduction.h" 22 #include "bolt/Passes/LongJmp.h" 23 #include "bolt/Passes/LoopInversionPass.h" 24 #include "bolt/Passes/PLTCall.h" 25 #include "bolt/Passes/PatchEntries.h" 26 #include "bolt/Passes/RegReAssign.h" 27 #include "bolt/Passes/ReorderData.h" 28 #include "bolt/Passes/ReorderFunctions.h" 29 #include "bolt/Passes/RetpolineInsertion.h" 30 #include "bolt/Passes/SplitFunctions.h" 31 #include "bolt/Passes/StokeInfo.h" 32 #include "bolt/Passes/TailDuplication.h" 33 #include "bolt/Passes/ThreeWayBranch.h" 34 #include "bolt/Passes/ValidateInternalCalls.h" 35 #include "bolt/Passes/ValidateMemRefs.h" 36 #include "bolt/Passes/VeneerElimination.h" 37 #include "bolt/Utils/CommandLineOpts.h" 38 #include "llvm/Support/FormatVariadic.h" 39 #include "llvm/Support/Timer.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <memory> 42 #include <numeric> 43 44 using namespace llvm; 45 46 namespace opts { 47 48 extern cl::opt<bool> PrintAll; 49 extern cl::opt<bool> PrintDynoStats; 50 extern cl::opt<bool> DumpDotAll; 51 extern cl::opt<std::string> AsmDump; 52 extern cl::opt<bolt::PLTCall::OptType> PLT; 53 54 static cl::opt<bool> 55 DynoStatsAll("dyno-stats-all", 56 cl::desc("print dyno stats after each stage"), 57 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 58 59 static cl::opt<bool> 60 EliminateUnreachable("eliminate-unreachable", 61 cl::desc("eliminate unreachable code"), cl::init(true), 62 cl::cat(BoltOptCategory)); 63 64 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"), 65 cl::cat(BoltOptCategory)); 66 67 static cl::opt<bool> JTFootprintReductionFlag( 68 "jt-footprint-reduction", 69 cl::desc("make jump tables size smaller at the cost of using more " 70 "instructions at jump sites"), 71 cl::cat(BoltOptCategory)); 72 73 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), 74 cl::ReallyHidden, cl::cat(BoltOptCategory)); 75 76 cl::opt<bool> 77 PrintAfterBranchFixup("print-after-branch-fixup", 78 cl::desc("print function after fixing local branches"), 79 cl::Hidden, cl::cat(BoltOptCategory)); 80 81 static cl::opt<bool> 82 PrintAfterLowering("print-after-lowering", 83 cl::desc("print function after instruction lowering"), 84 cl::Hidden, cl::cat(BoltOptCategory)); 85 86 cl::opt<bool> 87 PrintFinalized("print-finalized", 88 cl::desc("print function after CFG is finalized"), 89 cl::Hidden, cl::cat(BoltOptCategory)); 90 91 static cl::opt<bool> 92 PrintFOP("print-fop", 93 cl::desc("print functions after frame optimizer pass"), cl::Hidden, 94 cl::cat(BoltOptCategory)); 95 96 static cl::opt<bool> 97 PrintICF("print-icf", cl::desc("print functions after ICF optimization"), 98 cl::Hidden, cl::cat(BoltOptCategory)); 99 100 static cl::opt<bool> 101 PrintICP("print-icp", 102 cl::desc("print functions after indirect call promotion"), 103 cl::Hidden, cl::cat(BoltOptCategory)); 104 105 static cl::opt<bool> 106 PrintInline("print-inline", 107 cl::desc("print functions after inlining optimization"), 108 cl::Hidden, cl::cat(BoltOptCategory)); 109 110 static cl::opt<bool> PrintJTFootprintReduction( 111 "print-after-jt-footprint-reduction", 112 cl::desc("print function after jt-footprint-reduction pass"), 113 cl::cat(BoltOptCategory)); 114 115 static cl::opt<bool> 116 PrintLongJmp("print-longjmp", 117 cl::desc("print functions after longjmp pass"), cl::Hidden, 118 cl::cat(BoltOptCategory)); 119 120 cl::opt<bool> 121 PrintNormalized("print-normalized", 122 cl::desc("print functions after CFG is normalized"), 123 cl::Hidden, cl::cat(BoltCategory)); 124 125 static cl::opt<bool> PrintOptimizeBodyless( 126 "print-optimize-bodyless", 127 cl::desc("print functions after bodyless optimization"), cl::Hidden, 128 cl::cat(BoltOptCategory)); 129 130 static cl::opt<bool> 131 PrintPeepholes("print-peepholes", 132 cl::desc("print functions after peephole optimization"), 133 cl::Hidden, cl::cat(BoltOptCategory)); 134 135 static cl::opt<bool> 136 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"), 137 cl::Hidden, cl::cat(BoltOptCategory)); 138 139 static cl::opt<bool> 140 PrintProfileStats("print-profile-stats", 141 cl::desc("print profile quality/bias analysis"), 142 cl::cat(BoltCategory)); 143 144 static cl::opt<bool> 145 PrintRegReAssign("print-regreassign", 146 cl::desc("print functions after regreassign pass"), 147 cl::Hidden, cl::cat(BoltOptCategory)); 148 149 cl::opt<bool> 150 PrintReordered("print-reordered", 151 cl::desc("print functions after layout optimization"), 152 cl::Hidden, cl::cat(BoltOptCategory)); 153 154 static cl::opt<bool> 155 PrintReorderedFunctions("print-reordered-functions", 156 cl::desc("print functions after clustering"), 157 cl::Hidden, cl::cat(BoltOptCategory)); 158 159 static cl::opt<bool> PrintRetpolineInsertion( 160 "print-retpoline-insertion", 161 cl::desc("print functions after retpoline insertion pass"), 162 cl::cat(BoltCategory)); 163 164 static cl::opt<bool> PrintSCTC( 165 "print-sctc", 166 cl::desc("print functions after conditional tail call simplification"), 167 cl::Hidden, cl::cat(BoltOptCategory)); 168 169 static cl::opt<bool> PrintSimplifyROLoads( 170 "print-simplify-rodata-loads", 171 cl::desc("print functions after simplification of RO data loads"), 172 cl::Hidden, cl::cat(BoltOptCategory)); 173 174 static cl::opt<bool> 175 PrintSplit("print-split", cl::desc("print functions after code splitting"), 176 cl::Hidden, cl::cat(BoltOptCategory)); 177 178 static cl::opt<bool> 179 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), 180 cl::cat(BoltOptCategory)); 181 182 static cl::opt<bool> PrintVeneerElimination( 183 "print-veneer-elimination", 184 cl::desc("print functions after veneer elimination pass"), 185 cl::cat(BoltOptCategory)); 186 187 static cl::opt<bool> 188 PrintUCE("print-uce", 189 cl::desc("print functions after unreachable code elimination"), 190 cl::Hidden, cl::cat(BoltOptCategory)); 191 192 static cl::opt<bool> RegReAssign( 193 "reg-reassign", 194 cl::desc( 195 "reassign registers so as to avoid using REX prefixes in hot code"), 196 cl::cat(BoltOptCategory)); 197 198 static cl::opt<bool> SimplifyConditionalTailCalls( 199 "simplify-conditional-tail-calls", 200 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 201 cl::init(true), cl::cat(BoltOptCategory)); 202 203 static cl::opt<bool> SimplifyRODataLoads( 204 "simplify-rodata-loads", 205 cl::desc("simplify loads from read-only sections by replacing the memory " 206 "operand with the constant found in the corresponding section"), 207 cl::cat(BoltOptCategory)); 208 209 static cl::list<std::string> 210 SpecializeMemcpy1("memcpy1-spec", 211 cl::desc("list of functions with call sites for which to specialize memcpy() " 212 "for size 1"), 213 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 214 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 215 216 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), 217 cl::cat(BoltOptCategory)); 218 219 static cl::opt<bool> StringOps( 220 "inline-memcpy", 221 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 222 cl::cat(BoltOptCategory)); 223 224 static cl::opt<bool> StripRepRet( 225 "strip-rep-ret", 226 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 227 cl::init(true), cl::cat(BoltOptCategory)); 228 229 static cl::opt<bool> VerifyCFG("verify-cfg", 230 cl::desc("verify the CFG after every pass"), 231 cl::Hidden, cl::cat(BoltOptCategory)); 232 233 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch", 234 cl::desc("reorder three way branches"), 235 cl::ReallyHidden, 236 cl::cat(BoltOptCategory)); 237 238 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 239 cl::desc("fold jcc+mov into cmov"), 240 cl::ReallyHidden, 241 cl::cat(BoltOptCategory)); 242 243 } // namespace opts 244 245 namespace llvm { 246 namespace bolt { 247 248 using namespace opts; 249 250 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 251 const char BinaryFunctionPassManager::TimerGroupDesc[] = 252 "Binary Function Pass Manager"; 253 254 void BinaryFunctionPassManager::runPasses() { 255 auto &BFs = BC.getBinaryFunctions(); 256 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 257 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 258 &OptPassPair = Passes[PassIdx]; 259 if (!OptPassPair.first) 260 continue; 261 262 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 263 std::string PassIdName = 264 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 265 266 if (opts::Verbosity > 0) 267 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 268 269 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 270 TimerGroupDesc, TimeOpts); 271 272 callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs, 273 Pass->getName(), opts::DynoStatsAll, BC.isAArch64()); 274 275 if (opts::VerifyCFG && 276 !std::accumulate( 277 BFs.begin(), BFs.end(), true, 278 [](const bool Valid, 279 const std::pair<const uint64_t, BinaryFunction> &It) { 280 return Valid && It.second.validateCFG(); 281 })) { 282 errs() << "BOLT-ERROR: Invalid CFG detected after pass " 283 << Pass->getName() << "\n"; 284 exit(1); 285 } 286 287 if (opts::Verbosity > 0) 288 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 289 290 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 291 continue; 292 293 const std::string Message = std::string("after ") + Pass->getName(); 294 295 for (auto &It : BFs) { 296 BinaryFunction &Function = It.second; 297 298 if (!Pass->shouldPrint(Function)) 299 continue; 300 301 Function.print(outs(), Message); 302 303 if (opts::DumpDotAll) 304 Function.dumpGraphForPass(PassIdName); 305 } 306 } 307 } 308 309 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 310 BinaryFunctionPassManager Manager(BC); 311 312 const DynoStats InitialDynoStats = 313 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 314 315 Manager.registerPass(std::make_unique<AsmDumpPass>(), 316 opts::AsmDump.getNumOccurrences()); 317 318 if (BC.isAArch64()) 319 Manager.registerPass( 320 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 321 322 // Here we manage dependencies/order manually, since passes are run in the 323 // order they're registered. 324 325 // Run this pass first to use stats for the original functions. 326 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint)); 327 328 if (opts::PrintProfileStats) 329 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 330 331 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 332 333 Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint)); 334 335 if (opts::Instrument) 336 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 337 else if (opts::Hugify) 338 Manager.registerPass(std::make_unique<HugePage>(NeverPrint)); 339 340 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 341 342 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint)); 343 344 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 345 346 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 347 opts::StripRepRet); 348 349 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 350 opts::ICF); 351 352 Manager.registerPass( 353 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 354 !opts::SpecializeMemcpy1.empty()); 355 356 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 357 opts::StringOps); 358 359 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 360 361 Manager.registerPass( 362 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 363 opts::JTFootprintReductionFlag); 364 365 Manager.registerPass( 366 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 367 opts::SimplifyRODataLoads); 368 369 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 370 opts::RegReAssign); 371 372 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 373 374 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 375 opts::ICF); 376 377 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 378 379 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 380 opts::ThreeWayBranchFlag); 381 382 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 383 384 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 385 opts::EliminateUnreachable); 386 387 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 388 389 Manager.registerPass(std::make_unique<LoopInversionPass>()); 390 391 Manager.registerPass(std::make_unique<TailDuplication>()); 392 393 Manager.registerPass(std::make_unique<CMOVConversion>(), 394 opts::CMOVConversionFlag); 395 396 // This pass syncs local branches with CFG. If any of the following 397 // passes breaks the sync - they either need to re-run the pass or 398 // fix branches consistency internally. 399 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 400 401 // This pass should come close to last since it uses the estimated hot 402 // size of a function to determine the order. It should definitely 403 // also happen after any changes to the call graph are made, e.g. inlining. 404 Manager.registerPass( 405 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 406 407 // Print final dyno stats right while CFG and instruction analysis are intact. 408 Manager.registerPass( 409 std::make_unique<DynoStatsPrintPass>( 410 InitialDynoStats, "after all optimizations before SCTC and FOP"), 411 opts::PrintDynoStats || opts::DynoStatsAll); 412 413 // Add the StokeInfo pass, which extract functions for stoke optimization and 414 // get the liveness information for them 415 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 416 417 // This pass introduces conditional jumps into external functions. 418 // Between extending CFG to support this and isolating this pass we chose 419 // the latter. Thus this pass will do double jump removal and unreachable 420 // code elimination if necessary and won't rely on peepholes/UCE for these 421 // optimizations. 422 // More generally this pass should be the last optimization pass that 423 // modifies branches/control flow. This pass is run after function 424 // reordering so that it can tell whether calls are forward/backward 425 // accurately. 426 Manager.registerPass( 427 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 428 opts::SimplifyConditionalTailCalls); 429 430 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 431 432 Manager.registerPass(std::make_unique<AlignerPass>()); 433 434 // Perform reordering on data contained in one or more sections using 435 // memory profiling data. 436 Manager.registerPass(std::make_unique<ReorderData>()); 437 438 if (BC.isAArch64()) { 439 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 440 441 // Tighten branches according to offset differences between branch and 442 // targets. No extra instructions after this pass, otherwise we may have 443 // relocations out of range and crash during linking. 444 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 445 } 446 447 // This pass should always run last.* 448 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 449 450 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 451 // FrameOptimizer move values around and needs to update CFIs. To do this, it 452 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 453 // placed according to the final layout. 454 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 455 456 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 457 458 Manager.registerPass( 459 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 460 461 // Assign each function an output section. 462 Manager.registerPass(std::make_unique<AssignSections>()); 463 464 // Patch original function entries 465 if (BC.HasRelocations) 466 Manager.registerPass(std::make_unique<PatchEntries>()); 467 468 // This pass turns tail calls into jumps which makes them invisible to 469 // function reordering. It's unsafe to use any CFG or instruction analysis 470 // after this point. 471 Manager.registerPass( 472 std::make_unique<InstructionLowering>(PrintAfterLowering)); 473 474 // In non-relocation mode, mark functions that do not fit into their original 475 // space as non-simple if we have to (e.g. for correct debug info update). 476 // NOTE: this pass depends on finalized code. 477 if (!BC.HasRelocations) 478 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 479 480 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 481 482 Manager.runPasses(); 483 } 484 485 } // namespace bolt 486 } // namespace llvm 487