1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the IdenticalCodeFolding class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/IdenticalCodeFolding.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/ThreadPool.h" 17 #include "llvm/Support/Timer.h" 18 #include <atomic> 19 #include <map> 20 #include <set> 21 #include <unordered_map> 22 23 #define DEBUG_TYPE "bolt-icf" 24 25 using namespace llvm; 26 using namespace bolt; 27 28 namespace opts { 29 30 extern cl::OptionCategory BoltOptCategory; 31 32 static cl::opt<bool> UseDFS("icf-dfs", 33 cl::desc("use DFS ordering when using -icf option"), 34 cl::ReallyHidden, cl::cat(BoltOptCategory)); 35 36 static cl::opt<bool> 37 TimeICF("time-icf", 38 cl::desc("time icf steps"), 39 cl::ReallyHidden, 40 cl::ZeroOrMore, 41 cl::cat(BoltOptCategory)); 42 } // namespace opts 43 44 namespace { 45 using JumpTable = bolt::JumpTable; 46 47 /// Compare two jump tables in 2 functions. The function relies on consistent 48 /// ordering of basic blocks in both binary functions (e.g. DFS). 49 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB, 50 const BinaryFunction &FunctionA, 51 const BinaryFunction &FunctionB) { 52 if (JumpTableA.EntrySize != JumpTableB.EntrySize) 53 return false; 54 55 if (JumpTableA.Type != JumpTableB.Type) 56 return false; 57 58 if (JumpTableA.getSize() != JumpTableB.getSize()) 59 return false; 60 61 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { 62 const MCSymbol *LabelA = JumpTableA.Entries[Index]; 63 const MCSymbol *LabelB = JumpTableB.Entries[Index]; 64 65 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA); 66 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB); 67 68 if (!TargetA || !TargetB) { 69 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && 70 "no target basic block found"); 71 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && 72 "no target basic block found"); 73 74 if (TargetA != TargetB) 75 return false; 76 77 continue; 78 } 79 80 assert(TargetA && TargetB && "cannot locate target block(s)"); 81 82 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) 83 return false; 84 } 85 86 return true; 87 } 88 89 /// Helper function that compares an instruction of this function to the 90 /// given instruction of the given function. The functions should have 91 /// identical CFG. 92 template <class Compare> 93 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA, 94 const MCInst &InstB, const BinaryBasicBlock &BBB, 95 Compare Comp) { 96 if (InstA.getOpcode() != InstB.getOpcode()) 97 return false; 98 99 const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); 100 101 // In this function we check for special conditions: 102 // 103 // * instructions with landing pads 104 // 105 // Most of the common cases should be handled by MCPlus::equals() 106 // that compares regular instruction operands. 107 // 108 // NB: there's no need to compare jump table indirect jump instructions 109 // separately as jump tables are handled by comparing corresponding 110 // symbols. 111 const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA); 112 const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB); 113 114 if (EHInfoA || EHInfoB) { 115 if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) 116 return false; 117 118 if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) 119 return false; 120 121 if (EHInfoA && EHInfoB) { 122 // Action indices should match. 123 if (EHInfoA->second != EHInfoB->second) 124 return false; 125 126 if (!EHInfoA->first != !EHInfoB->first) 127 return false; 128 129 if (EHInfoA->first && EHInfoB->first) { 130 const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first); 131 const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first); 132 assert(LPA && LPB && "cannot locate landing pad(s)"); 133 134 if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) 135 return false; 136 } 137 } 138 } 139 140 return BC.MIB->equals(InstA, InstB, Comp); 141 } 142 143 /// Returns true if this function has identical code and CFG with 144 /// the given function \p BF. 145 /// 146 /// If \p CongruentSymbols is set to true, then symbolic operands that reference 147 /// potentially identical but different functions are ignored during the 148 /// comparison. 149 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, 150 bool CongruentSymbols) { 151 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG"); 152 153 // Compare the two functions, one basic block at a time. 154 // Currently we require two identical basic blocks to have identical 155 // instruction sequences and the same index in their corresponding 156 // functions. The latter is important for CFG equality. 157 158 if (A.layout_size() != B.layout_size()) 159 return false; 160 161 // Comparing multi-entry functions could be non-trivial. 162 if (A.isMultiEntry() || B.isMultiEntry()) 163 return false; 164 165 // Process both functions in either DFS or existing order. 166 const BinaryFunction::BasicBlockOrderType &OrderA = 167 opts::UseDFS ? A.dfs() : A.getLayout(); 168 const BinaryFunction::BasicBlockOrderType &OrderB = 169 opts::UseDFS ? B.dfs() : B.getLayout(); 170 171 const BinaryContext &BC = A.getBinaryContext(); 172 173 auto BBI = OrderB.begin(); 174 for (const BinaryBasicBlock *BB : OrderA) { 175 const BinaryBasicBlock *OtherBB = *BBI; 176 177 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) 178 return false; 179 180 // Compare successor basic blocks. 181 // NOTE: the comparison for jump tables is only partially verified here. 182 if (BB->succ_size() != OtherBB->succ_size()) 183 return false; 184 185 auto SuccBBI = OtherBB->succ_begin(); 186 for (const BinaryBasicBlock *SuccBB : BB->successors()) { 187 const BinaryBasicBlock *SuccOtherBB = *SuccBBI; 188 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) 189 return false; 190 ++SuccBBI; 191 } 192 193 // Compare all instructions including pseudos. 194 auto I = BB->begin(), E = BB->end(); 195 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); 196 while (I != E && OtherI != OtherE) { 197 // Compare symbols. 198 auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, 199 const MCSymbol *SymbolB) { 200 if (SymbolA == SymbolB) 201 return true; 202 203 // All local symbols are considered identical since they affect a 204 // control flow and we check the control flow separately. 205 // If a local symbol is escaped, then the function (potentially) has 206 // multiple entry points and we exclude such functions from 207 // comparison. 208 if (SymbolA->isTemporary() && SymbolB->isTemporary()) 209 return true; 210 211 // Compare symbols as functions. 212 uint64_t EntryIDA = 0; 213 uint64_t EntryIDB = 0; 214 const BinaryFunction *FunctionA = 215 BC.getFunctionForSymbol(SymbolA, &EntryIDA); 216 const BinaryFunction *FunctionB = 217 BC.getFunctionForSymbol(SymbolB, &EntryIDB); 218 if (FunctionA && EntryIDA) 219 FunctionA = nullptr; 220 if (FunctionB && EntryIDB) 221 FunctionB = nullptr; 222 if (FunctionA && FunctionB) { 223 // Self-referencing functions and recursive calls. 224 if (FunctionA == &A && FunctionB == &B) 225 return true; 226 227 // Functions with different hash values can never become identical, 228 // hence A and B are different. 229 if (CongruentSymbols) 230 return FunctionA->getHash() == FunctionB->getHash(); 231 232 return FunctionA == FunctionB; 233 } 234 235 // One of the symbols represents a function, the other one does not. 236 if (FunctionA != FunctionB) 237 return false; 238 239 // Check if symbols are jump tables. 240 const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName()); 241 if (!SIA) 242 return false; 243 const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName()); 244 if (!SIB) 245 return false; 246 247 assert((SIA->getAddress() != SIB->getAddress()) && 248 "different symbols should not have the same value"); 249 250 const JumpTable *JumpTableA = 251 A.getJumpTableContainingAddress(SIA->getAddress()); 252 if (!JumpTableA) 253 return false; 254 255 const JumpTable *JumpTableB = 256 B.getJumpTableContainingAddress(SIB->getAddress()); 257 if (!JumpTableB) 258 return false; 259 260 if ((SIA->getAddress() - JumpTableA->getAddress()) != 261 (SIB->getAddress() - JumpTableB->getAddress())) 262 return false; 263 264 return equalJumpTables(*JumpTableA, *JumpTableB, A, B); 265 }; 266 267 if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, 268 AreSymbolsIdentical)) 269 return false; 270 271 ++I; 272 ++OtherI; 273 } 274 275 // One of the identical blocks may have a trailing unconditional jump that 276 // is ignored for CFG purposes. 277 const MCInst *TrailingInstr = 278 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0)); 279 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) 280 return false; 281 282 ++BBI; 283 } 284 285 // Compare exceptions action tables. 286 if (A.getLSDAActionTable() != B.getLSDAActionTable() || 287 A.getLSDATypeTable() != B.getLSDATypeTable() || 288 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) 289 return false; 290 291 return true; 292 } 293 294 // This hash table is used to identify identical functions. It maps 295 // a function to a bucket of functions identical to it. 296 struct KeyHash { 297 size_t operator()(const BinaryFunction *F) const { return F->getHash(); } 298 }; 299 300 /// Identify two congruent functions. Two functions are considered congruent, 301 /// if they are identical/equal except for some of their instruction operands 302 /// that reference potentially identical functions, i.e. functions that could 303 /// be folded later. Congruent functions are candidates for folding in our 304 /// iterative ICF algorithm. 305 /// 306 /// Congruent functions are required to have identical hash. 307 struct KeyCongruent { 308 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 309 if (A == B) 310 return true; 311 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true); 312 } 313 }; 314 315 struct KeyEqual { 316 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 317 if (A == B) 318 return true; 319 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false); 320 } 321 }; 322 323 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, 324 KeyHash, KeyCongruent> 325 CongruentBucketsMap; 326 327 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, 328 KeyHash, KeyEqual> 329 IdenticalBucketsMap; 330 331 std::string hashInteger(uint64_t Value) { 332 std::string HashString; 333 if (Value == 0) 334 HashString.push_back(0); 335 336 while (Value) { 337 uint8_t LSB = Value & 0xff; 338 HashString.push_back(LSB); 339 Value >>= 8; 340 } 341 342 return HashString; 343 } 344 345 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 346 std::string HashString; 347 348 // Ignore function references. 349 if (BC.getFunctionForSymbol(&Symbol)) 350 return HashString; 351 352 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 353 if (!ErrorOrValue) 354 return HashString; 355 356 // Ignore jump table references. 357 if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 358 return HashString; 359 360 return HashString.append(hashInteger(*ErrorOrValue)); 361 } 362 363 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 364 switch (Expr.getKind()) { 365 case MCExpr::Constant: 366 return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 367 case MCExpr::SymbolRef: 368 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 369 case MCExpr::Unary: { 370 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 371 return hashInteger(UnaryExpr.getOpcode()) 372 .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 373 } 374 case MCExpr::Binary: { 375 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 376 return hashExpr(BC, *BinaryExpr.getLHS()) 377 .append(hashInteger(BinaryExpr.getOpcode())) 378 .append(hashExpr(BC, *BinaryExpr.getRHS())); 379 } 380 case MCExpr::Target: 381 return std::string(); 382 } 383 384 llvm_unreachable("invalid expression kind"); 385 } 386 387 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 388 if (Operand.isImm()) 389 return hashInteger(Operand.getImm()); 390 if (Operand.isReg()) 391 return hashInteger(Operand.getReg()); 392 if (Operand.isExpr()) 393 return hashExpr(BC, *Operand.getExpr()); 394 395 return std::string(); 396 } 397 398 } // namespace 399 400 namespace llvm { 401 namespace bolt { 402 403 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { 404 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); 405 uint64_t NumFunctionsFolded = 0; 406 std::atomic<uint64_t> NumJTFunctionsFolded{0}; 407 std::atomic<uint64_t> BytesSavedEstimate{0}; 408 std::atomic<uint64_t> CallsSavedEstimate{0}; 409 std::atomic<uint64_t> NumFoldedLastIteration{0}; 410 CongruentBucketsMap CongruentBuckets; 411 412 // Hash all the functions 413 auto hashFunctions = [&]() { 414 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown", 415 "ICF breakdown", opts::TimeICF); 416 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 417 // Make sure indices are in-order. 418 BF.updateLayoutIndices(); 419 420 // Pre-compute hash before pushing into hashtable. 421 // Hash instruction operands to minimize hash collisions. 422 BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) { 423 return hashInstOperand(BC, Op); 424 }); 425 }; 426 427 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 428 return !shouldOptimize(BF); 429 }; 430 431 ParallelUtilities::runOnEachFunction( 432 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc, 433 "hashFunctions", /*ForceSequential*/ false, 2); 434 }; 435 436 // Creates buckets with congruent functions - functions that potentially 437 // could be folded. 438 auto createCongruentBuckets = [&]() { 439 NamedRegionTimer CongruentBucketsTimer("congruent buckets", 440 "congruent buckets", "ICF breakdown", 441 "ICF breakdown", opts::TimeICF); 442 for (auto &BFI : BC.getBinaryFunctions()) { 443 BinaryFunction &BF = BFI.second; 444 if (!this->shouldOptimize(BF)) 445 continue; 446 CongruentBuckets[&BF].emplace(&BF); 447 } 448 }; 449 450 // Partition each set of congruent functions into sets of identical functions 451 // and fold them 452 auto performFoldingPass = [&]() { 453 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes", 454 "ICF breakdown", "ICF breakdown", 455 opts::TimeICF); 456 Timer SinglePass("single fold pass", "single fold pass"); 457 LLVM_DEBUG(SinglePass.startTimer()); 458 459 ThreadPool *ThPool; 460 if (!opts::NoThreads) 461 ThPool = &ParallelUtilities::getThreadPool(); 462 463 // Fold identical functions within a single congruent bucket 464 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { 465 Timer T("folding single congruent list", "folding single congruent list"); 466 LLVM_DEBUG(T.startTimer()); 467 468 // Identical functions go into the same bucket. 469 IdenticalBucketsMap IdenticalBuckets; 470 for (BinaryFunction *BF : Candidates) { 471 IdenticalBuckets[BF].emplace_back(BF); 472 } 473 474 for (auto &IBI : IdenticalBuckets) { 475 // Functions identified as identical. 476 std::vector<BinaryFunction *> &Twins = IBI.second; 477 if (Twins.size() < 2) 478 continue; 479 480 // Fold functions. Keep the order consistent across invocations with 481 // different options. 482 llvm::stable_sort( 483 Twins, [](const BinaryFunction *A, const BinaryFunction *B) { 484 return A->getFunctionNumber() < B->getFunctionNumber(); 485 }); 486 487 BinaryFunction *ParentBF = Twins[0]; 488 for (unsigned I = 1; I < Twins.size(); ++I) { 489 BinaryFunction *ChildBF = Twins[I]; 490 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " 491 << *ParentBF << '\n'); 492 493 // Remove child function from the list of candidates. 494 auto FI = Candidates.find(ChildBF); 495 assert(FI != Candidates.end() && 496 "function expected to be in the set"); 497 Candidates.erase(FI); 498 499 // Fold the function and remove from the list of processed functions. 500 BytesSavedEstimate += ChildBF->getSize(); 501 CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(), 502 ParentBF->getKnownExecutionCount()); 503 BC.foldFunction(*ChildBF, *ParentBF); 504 505 ++NumFoldedLastIteration; 506 507 if (ParentBF->hasJumpTables()) 508 ++NumJTFunctionsFolded; 509 } 510 } 511 512 LLVM_DEBUG(T.stopTimer()); 513 }; 514 515 // Create a task for each congruent bucket 516 for (auto &Entry : CongruentBuckets) { 517 std::set<BinaryFunction *> &Bucket = Entry.second; 518 if (Bucket.size() < 2) 519 continue; 520 521 if (opts::NoThreads) 522 processSingleBucket(Bucket); 523 else 524 ThPool->async(processSingleBucket, std::ref(Bucket)); 525 } 526 527 if (!opts::NoThreads) 528 ThPool->wait(); 529 530 LLVM_DEBUG(SinglePass.stopTimer()); 531 }; 532 533 hashFunctions(); 534 createCongruentBuckets(); 535 536 unsigned Iteration = 1; 537 // We repeat the pass until no new modifications happen. 538 do { 539 NumFoldedLastIteration = 0; 540 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n"); 541 542 performFoldingPass(); 543 544 NumFunctionsFolded += NumFoldedLastIteration; 545 ++Iteration; 546 547 } while (NumFoldedLastIteration > 0); 548 549 LLVM_DEBUG({ 550 // Print functions that are congruent but not identical. 551 for (auto &CBI : CongruentBuckets) { 552 std::set<BinaryFunction *> &Candidates = CBI.second; 553 if (Candidates.size() < 2) 554 continue; 555 dbgs() << "BOLT-DEBUG: the following " << Candidates.size() 556 << " functions (each of size " << (*Candidates.begin())->getSize() 557 << " bytes) are congruent but not identical:\n"; 558 for (BinaryFunction *BF : Candidates) { 559 dbgs() << " " << *BF; 560 if (BF->getKnownExecutionCount()) 561 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)"; 562 dbgs() << '\n'; 563 } 564 } 565 }); 566 567 if (NumFunctionsFolded) 568 outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " 569 << OriginalFunctionCount << " functions in " << Iteration 570 << " passes. " << NumJTFunctionsFolded 571 << " functions had jump tables.\n" 572 << "BOLT-INFO: Removing all identical functions will save " 573 << format("%.2lf", (double)BytesSavedEstimate / 1024) 574 << " KB of code space. Folded functions were called " 575 << CallsSavedEstimate << " times based on profile.\n"; 576 } 577 578 } // namespace bolt 579 } // namespace llvm 580