1 //===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "bolt/Rewrite/BinaryPassManager.h" 10 #include "bolt/Passes/ADRRelaxationPass.h" 11 #include "bolt/Passes/Aligner.h" 12 #include "bolt/Passes/AllocCombiner.h" 13 #include "bolt/Passes/AsmDump.h" 14 #include "bolt/Passes/CMOVConversion.h" 15 #include "bolt/Passes/FrameOptimizer.h" 16 #include "bolt/Passes/IdenticalCodeFolding.h" 17 #include "bolt/Passes/IndirectCallPromotion.h" 18 #include "bolt/Passes/Inliner.h" 19 #include "bolt/Passes/Instrumentation.h" 20 #include "bolt/Passes/JTFootprintReduction.h" 21 #include "bolt/Passes/LongJmp.h" 22 #include "bolt/Passes/LoopInversionPass.h" 23 #include "bolt/Passes/PLTCall.h" 24 #include "bolt/Passes/PatchEntries.h" 25 #include "bolt/Passes/RegReAssign.h" 26 #include "bolt/Passes/ReorderData.h" 27 #include "bolt/Passes/ReorderFunctions.h" 28 #include "bolt/Passes/RetpolineInsertion.h" 29 #include "bolt/Passes/SplitFunctions.h" 30 #include "bolt/Passes/StokeInfo.h" 31 #include "bolt/Passes/TailDuplication.h" 32 #include "bolt/Passes/ThreeWayBranch.h" 33 #include "bolt/Passes/ValidateInternalCalls.h" 34 #include "bolt/Passes/VeneerElimination.h" 35 #include "bolt/Utils/CommandLineOpts.h" 36 #include "llvm/Support/FormatVariadic.h" 37 #include "llvm/Support/Timer.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <memory> 40 #include <numeric> 41 42 using namespace llvm; 43 44 namespace opts { 45 46 extern cl::opt<bool> PrintAll; 47 extern cl::opt<bool> PrintDynoStats; 48 extern cl::opt<bool> DumpDotAll; 49 extern cl::opt<std::string> AsmDump; 50 extern cl::opt<bolt::PLTCall::OptType> PLT; 51 52 static cl::opt<bool> 53 DynoStatsAll("dyno-stats-all", 54 cl::desc("print dyno stats after each stage"), 55 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); 56 57 static cl::opt<bool> 58 EliminateUnreachable("eliminate-unreachable", 59 cl::desc("eliminate unreachable code"), cl::init(true), 60 cl::cat(BoltOptCategory)); 61 62 cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"), 63 cl::cat(BoltOptCategory)); 64 65 static cl::opt<bool> JTFootprintReductionFlag( 66 "jt-footprint-reduction", 67 cl::desc("make jump tables size smaller at the cost of using more " 68 "instructions at jump sites"), 69 cl::cat(BoltOptCategory)); 70 71 cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), 72 cl::ReallyHidden, cl::cat(BoltOptCategory)); 73 74 cl::opt<bool> 75 PrintAfterBranchFixup("print-after-branch-fixup", 76 cl::desc("print function after fixing local branches"), 77 cl::Hidden, cl::cat(BoltOptCategory)); 78 79 static cl::opt<bool> 80 PrintAfterLowering("print-after-lowering", 81 cl::desc("print function after instruction lowering"), 82 cl::Hidden, cl::cat(BoltOptCategory)); 83 84 cl::opt<bool> 85 PrintFinalized("print-finalized", 86 cl::desc("print function after CFG is finalized"), 87 cl::Hidden, cl::cat(BoltOptCategory)); 88 89 static cl::opt<bool> 90 PrintFOP("print-fop", 91 cl::desc("print functions after frame optimizer pass"), cl::Hidden, 92 cl::cat(BoltOptCategory)); 93 94 static cl::opt<bool> 95 PrintICF("print-icf", cl::desc("print functions after ICF optimization"), 96 cl::Hidden, cl::cat(BoltOptCategory)); 97 98 static cl::opt<bool> 99 PrintICP("print-icp", 100 cl::desc("print functions after indirect call promotion"), 101 cl::Hidden, cl::cat(BoltOptCategory)); 102 103 static cl::opt<bool> 104 PrintInline("print-inline", 105 cl::desc("print functions after inlining optimization"), 106 cl::Hidden, cl::cat(BoltOptCategory)); 107 108 static cl::opt<bool> PrintJTFootprintReduction( 109 "print-after-jt-footprint-reduction", 110 cl::desc("print function after jt-footprint-reduction pass"), 111 cl::cat(BoltOptCategory)); 112 113 static cl::opt<bool> 114 PrintLongJmp("print-longjmp", 115 cl::desc("print functions after longjmp pass"), cl::Hidden, 116 cl::cat(BoltOptCategory)); 117 118 cl::opt<bool> 119 PrintNormalized("print-normalized", 120 cl::desc("print functions after CFG is normalized"), 121 cl::Hidden, cl::cat(BoltCategory)); 122 123 static cl::opt<bool> PrintOptimizeBodyless( 124 "print-optimize-bodyless", 125 cl::desc("print functions after bodyless optimization"), cl::Hidden, 126 cl::cat(BoltOptCategory)); 127 128 static cl::opt<bool> 129 PrintPeepholes("print-peepholes", 130 cl::desc("print functions after peephole optimization"), 131 cl::Hidden, cl::cat(BoltOptCategory)); 132 133 static cl::opt<bool> 134 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"), 135 cl::Hidden, cl::cat(BoltOptCategory)); 136 137 static cl::opt<bool> 138 PrintProfileStats("print-profile-stats", 139 cl::desc("print profile quality/bias analysis"), 140 cl::cat(BoltCategory)); 141 142 static cl::opt<bool> 143 PrintRegReAssign("print-regreassign", 144 cl::desc("print functions after regreassign pass"), 145 cl::Hidden, cl::cat(BoltOptCategory)); 146 147 cl::opt<bool> 148 PrintReordered("print-reordered", 149 cl::desc("print functions after layout optimization"), 150 cl::Hidden, cl::cat(BoltOptCategory)); 151 152 static cl::opt<bool> 153 PrintReorderedFunctions("print-reordered-functions", 154 cl::desc("print functions after clustering"), 155 cl::Hidden, cl::cat(BoltOptCategory)); 156 157 static cl::opt<bool> PrintRetpolineInsertion( 158 "print-retpoline-insertion", 159 cl::desc("print functions after retpoline insertion pass"), 160 cl::cat(BoltCategory)); 161 162 static cl::opt<bool> PrintSCTC( 163 "print-sctc", 164 cl::desc("print functions after conditional tail call simplification"), 165 cl::Hidden, cl::cat(BoltOptCategory)); 166 167 static cl::opt<bool> PrintSimplifyROLoads( 168 "print-simplify-rodata-loads", 169 cl::desc("print functions after simplification of RO data loads"), 170 cl::Hidden, cl::cat(BoltOptCategory)); 171 172 static cl::opt<bool> 173 PrintSplit("print-split", cl::desc("print functions after code splitting"), 174 cl::Hidden, cl::cat(BoltOptCategory)); 175 176 static cl::opt<bool> 177 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), 178 cl::cat(BoltOptCategory)); 179 180 static cl::opt<bool> PrintVeneerElimination( 181 "print-veneer-elimination", 182 cl::desc("print functions after veneer elimination pass"), 183 cl::cat(BoltOptCategory)); 184 185 static cl::opt<bool> 186 PrintUCE("print-uce", 187 cl::desc("print functions after unreachable code elimination"), 188 cl::Hidden, cl::cat(BoltOptCategory)); 189 190 static cl::opt<bool> RegReAssign( 191 "reg-reassign", 192 cl::desc( 193 "reassign registers so as to avoid using REX prefixes in hot code"), 194 cl::cat(BoltOptCategory)); 195 196 static cl::opt<bool> SimplifyConditionalTailCalls( 197 "simplify-conditional-tail-calls", 198 cl::desc("simplify conditional tail calls by removing unnecessary jumps"), 199 cl::init(true), cl::cat(BoltOptCategory)); 200 201 static cl::opt<bool> SimplifyRODataLoads( 202 "simplify-rodata-loads", 203 cl::desc("simplify loads from read-only sections by replacing the memory " 204 "operand with the constant found in the corresponding section"), 205 cl::cat(BoltOptCategory)); 206 207 static cl::list<std::string> 208 SpecializeMemcpy1("memcpy1-spec", 209 cl::desc("list of functions with call sites for which to specialize memcpy() " 210 "for size 1"), 211 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), 212 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 213 214 static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), 215 cl::cat(BoltOptCategory)); 216 217 static cl::opt<bool> StringOps( 218 "inline-memcpy", 219 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), 220 cl::cat(BoltOptCategory)); 221 222 static cl::opt<bool> StripRepRet( 223 "strip-rep-ret", 224 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), 225 cl::init(true), cl::cat(BoltOptCategory)); 226 227 static cl::opt<bool> VerifyCFG("verify-cfg", 228 cl::desc("verify the CFG after every pass"), 229 cl::Hidden, cl::cat(BoltOptCategory)); 230 231 static cl::opt<bool> ThreeWayBranchFlag("three-way-branch", 232 cl::desc("reorder three way branches"), 233 cl::ReallyHidden, 234 cl::cat(BoltOptCategory)); 235 236 static cl::opt<bool> CMOVConversionFlag("cmov-conversion", 237 cl::desc("fold jcc+mov into cmov"), 238 cl::ReallyHidden, 239 cl::cat(BoltOptCategory)); 240 241 } // namespace opts 242 243 namespace llvm { 244 namespace bolt { 245 246 using namespace opts; 247 248 const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; 249 const char BinaryFunctionPassManager::TimerGroupDesc[] = 250 "Binary Function Pass Manager"; 251 252 void BinaryFunctionPassManager::runPasses() { 253 auto &BFs = BC.getBinaryFunctions(); 254 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { 255 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> 256 &OptPassPair = Passes[PassIdx]; 257 if (!OptPassPair.first) 258 continue; 259 260 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; 261 std::string PassIdName = 262 formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); 263 264 if (opts::Verbosity > 0) 265 outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; 266 267 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, 268 TimerGroupDesc, TimeOpts); 269 270 callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs, 271 Pass->getName(), opts::DynoStatsAll, BC.isAArch64()); 272 273 if (opts::VerifyCFG && 274 !std::accumulate( 275 BFs.begin(), BFs.end(), true, 276 [](const bool Valid, 277 const std::pair<const uint64_t, BinaryFunction> &It) { 278 return Valid && It.second.validateCFG(); 279 })) { 280 errs() << "BOLT-ERROR: Invalid CFG detected after pass " 281 << Pass->getName() << "\n"; 282 exit(1); 283 } 284 285 if (opts::Verbosity > 0) 286 outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; 287 288 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) 289 continue; 290 291 const std::string Message = std::string("after ") + Pass->getName(); 292 293 for (auto &It : BFs) { 294 BinaryFunction &Function = It.second; 295 296 if (!Pass->shouldPrint(Function)) 297 continue; 298 299 Function.print(outs(), Message, true); 300 301 if (opts::DumpDotAll) 302 Function.dumpGraphForPass(PassIdName); 303 } 304 } 305 } 306 307 void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { 308 BinaryFunctionPassManager Manager(BC); 309 310 const DynoStats InitialDynoStats = 311 getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); 312 313 Manager.registerPass(std::make_unique<AsmDumpPass>(), 314 opts::AsmDump.getNumOccurrences()); 315 316 if (opts::Instrument) 317 Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); 318 319 // Here we manage dependencies/order manually, since passes are run in the 320 // order they're registered. 321 322 // Run this pass first to use stats for the original functions. 323 Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint)); 324 325 if (opts::PrintProfileStats) 326 Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); 327 328 Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); 329 330 Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); 331 332 Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint)); 333 334 Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); 335 336 Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), 337 opts::StripRepRet); 338 339 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 340 opts::ICF); 341 342 if (BC.isAArch64()) 343 Manager.registerPass( 344 std::make_unique<VeneerElimination>(PrintVeneerElimination)); 345 346 Manager.registerPass( 347 std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), 348 !opts::SpecializeMemcpy1.empty()); 349 350 Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), 351 opts::StringOps); 352 353 Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); 354 355 Manager.registerPass( 356 std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), 357 opts::JTFootprintReductionFlag); 358 359 Manager.registerPass( 360 std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), 361 opts::SimplifyRODataLoads); 362 363 Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), 364 opts::RegReAssign); 365 366 Manager.registerPass(std::make_unique<Inliner>(PrintInline)); 367 368 Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), 369 opts::ICF); 370 371 Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); 372 373 Manager.registerPass(std::make_unique<ThreeWayBranch>(), 374 opts::ThreeWayBranchFlag); 375 376 Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); 377 378 Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), 379 opts::EliminateUnreachable); 380 381 Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); 382 383 Manager.registerPass(std::make_unique<LoopInversionPass>()); 384 385 Manager.registerPass(std::make_unique<TailDuplication>()); 386 387 Manager.registerPass(std::make_unique<CMOVConversion>(), 388 opts::CMOVConversionFlag); 389 390 // This pass syncs local branches with CFG. If any of the following 391 // passes breaks the sync - they either need to re-run the pass or 392 // fix branches consistency internally. 393 Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); 394 395 // This pass should come close to last since it uses the estimated hot 396 // size of a function to determine the order. It should definitely 397 // also happen after any changes to the call graph are made, e.g. inlining. 398 Manager.registerPass( 399 std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); 400 401 // Print final dyno stats right while CFG and instruction analysis are intact. 402 Manager.registerPass( 403 std::make_unique<DynoStatsPrintPass>( 404 InitialDynoStats, "after all optimizations before SCTC and FOP"), 405 opts::PrintDynoStats || opts::DynoStatsAll); 406 407 // Add the StokeInfo pass, which extract functions for stoke optimization and 408 // get the liveness information for them 409 Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); 410 411 // This pass introduces conditional jumps into external functions. 412 // Between extending CFG to support this and isolating this pass we chose 413 // the latter. Thus this pass will do double jump removal and unreachable 414 // code elimination if necessary and won't rely on peepholes/UCE for these 415 // optimizations. 416 // More generally this pass should be the last optimization pass that 417 // modifies branches/control flow. This pass is run after function 418 // reordering so that it can tell whether calls are forward/backward 419 // accurately. 420 Manager.registerPass( 421 std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), 422 opts::SimplifyConditionalTailCalls); 423 424 Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); 425 426 Manager.registerPass(std::make_unique<AlignerPass>()); 427 428 // Perform reordering on data contained in one or more sections using 429 // memory profiling data. 430 Manager.registerPass(std::make_unique<ReorderData>()); 431 432 if (BC.isAArch64()) { 433 Manager.registerPass(std::make_unique<ADRRelaxationPass>()); 434 435 // Tighten branches according to offset differences between branch and 436 // targets. No extra instructions after this pass, otherwise we may have 437 // relocations out of range and crash during linking. 438 Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); 439 } 440 441 // This pass should always run last.* 442 Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); 443 444 // FrameOptimizer has an implicit dependency on FinalizeFunctions. 445 // FrameOptimizer move values around and needs to update CFIs. To do this, it 446 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly 447 // placed according to the final layout. 448 Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); 449 450 Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); 451 452 Manager.registerPass( 453 std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); 454 455 // Assign each function an output section. 456 Manager.registerPass(std::make_unique<AssignSections>()); 457 458 // Patch original function entries 459 if (BC.HasRelocations) 460 Manager.registerPass(std::make_unique<PatchEntries>()); 461 462 // This pass turns tail calls into jumps which makes them invisible to 463 // function reordering. It's unsafe to use any CFG or instruction analysis 464 // after this point. 465 Manager.registerPass( 466 std::make_unique<InstructionLowering>(PrintAfterLowering)); 467 468 // In non-relocation mode, mark functions that do not fit into their original 469 // space as non-simple if we have to (e.g. for correct debug info update). 470 // NOTE: this pass depends on finalized code. 471 if (!BC.HasRelocations) 472 Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); 473 474 Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); 475 476 Manager.runPasses(); 477 } 478 479 } // namespace bolt 480 } // namespace llvm 481