1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the IdenticalCodeFolding class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/IdenticalCodeFolding.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/ThreadPool.h" 18 #include "llvm/Support/Timer.h" 19 #include <atomic> 20 #include <iterator> 21 #include <map> 22 #include <set> 23 #include <unordered_map> 24 25 #define DEBUG_TYPE "bolt-icf" 26 27 using namespace llvm; 28 using namespace bolt; 29 30 namespace opts { 31 32 extern cl::OptionCategory BoltOptCategory; 33 34 static cl::opt<bool> UseDFS("icf-dfs", 35 cl::desc("use DFS ordering when using -icf option"), 36 cl::ReallyHidden, cl::cat(BoltOptCategory)); 37 38 static cl::opt<bool> 39 TimeICF("time-icf", 40 cl::desc("time icf steps"), 41 cl::ReallyHidden, 42 cl::ZeroOrMore, 43 cl::cat(BoltOptCategory)); 44 } // namespace opts 45 46 namespace { 47 using JumpTable = bolt::JumpTable; 48 49 /// Compare two jump tables in 2 functions. The function relies on consistent 50 /// ordering of basic blocks in both binary functions (e.g. DFS). 51 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB, 52 const BinaryFunction &FunctionA, 53 const BinaryFunction &FunctionB) { 54 if (JumpTableA.EntrySize != JumpTableB.EntrySize) 55 return false; 56 57 if (JumpTableA.Type != JumpTableB.Type) 58 return false; 59 60 if (JumpTableA.getSize() != JumpTableB.getSize()) 61 return false; 62 63 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { 64 const MCSymbol *LabelA = JumpTableA.Entries[Index]; 65 const MCSymbol *LabelB = JumpTableB.Entries[Index]; 66 67 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA); 68 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB); 69 70 if (!TargetA || !TargetB) { 71 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && 72 "no target basic block found"); 73 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && 74 "no target basic block found"); 75 76 if (TargetA != TargetB) 77 return false; 78 79 continue; 80 } 81 82 assert(TargetA && TargetB && "cannot locate target block(s)"); 83 84 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) 85 return false; 86 } 87 88 return true; 89 } 90 91 /// Helper function that compares an instruction of this function to the 92 /// given instruction of the given function. The functions should have 93 /// identical CFG. 94 template <class Compare> 95 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA, 96 const MCInst &InstB, const BinaryBasicBlock &BBB, 97 Compare Comp) { 98 if (InstA.getOpcode() != InstB.getOpcode()) 99 return false; 100 101 const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); 102 103 // In this function we check for special conditions: 104 // 105 // * instructions with landing pads 106 // 107 // Most of the common cases should be handled by MCPlus::equals() 108 // that compares regular instruction operands. 109 // 110 // NB: there's no need to compare jump table indirect jump instructions 111 // separately as jump tables are handled by comparing corresponding 112 // symbols. 113 const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA); 114 const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB); 115 116 if (EHInfoA || EHInfoB) { 117 if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) 118 return false; 119 120 if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) 121 return false; 122 123 if (EHInfoA && EHInfoB) { 124 // Action indices should match. 125 if (EHInfoA->second != EHInfoB->second) 126 return false; 127 128 if (!EHInfoA->first != !EHInfoB->first) 129 return false; 130 131 if (EHInfoA->first && EHInfoB->first) { 132 const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first); 133 const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first); 134 assert(LPA && LPB && "cannot locate landing pad(s)"); 135 136 if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) 137 return false; 138 } 139 } 140 } 141 142 return BC.MIB->equals(InstA, InstB, Comp); 143 } 144 145 /// Returns true if this function has identical code and CFG with 146 /// the given function \p BF. 147 /// 148 /// If \p CongruentSymbols is set to true, then symbolic operands that reference 149 /// potentially identical but different functions are ignored during the 150 /// comparison. 151 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, 152 bool CongruentSymbols) { 153 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG"); 154 155 // Compare the two functions, one basic block at a time. 156 // Currently we require two identical basic blocks to have identical 157 // instruction sequences and the same index in their corresponding 158 // functions. The latter is important for CFG equality. 159 160 if (A.getLayout().block_size() != B.getLayout().block_size()) 161 return false; 162 163 // Comparing multi-entry functions could be non-trivial. 164 if (A.isMultiEntry() || B.isMultiEntry()) 165 return false; 166 167 if (A.hasIslandsInfo() || B.hasIslandsInfo()) 168 return false; 169 170 // Process both functions in either DFS or existing order. 171 SmallVector<const BinaryBasicBlock *, 0> OrderA; 172 SmallVector<const BinaryBasicBlock *, 0> OrderB; 173 if (opts::UseDFS) { 174 copy(A.dfs(), std::back_inserter(OrderA)); 175 copy(B.dfs(), std::back_inserter(OrderB)); 176 } else { 177 copy(A.getLayout().blocks(), std::back_inserter(OrderA)); 178 copy(B.getLayout().blocks(), std::back_inserter(OrderB)); 179 } 180 181 const BinaryContext &BC = A.getBinaryContext(); 182 183 auto BBI = OrderB.begin(); 184 for (const BinaryBasicBlock *BB : OrderA) { 185 const BinaryBasicBlock *OtherBB = *BBI; 186 187 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) 188 return false; 189 190 // Compare successor basic blocks. 191 // NOTE: the comparison for jump tables is only partially verified here. 192 if (BB->succ_size() != OtherBB->succ_size()) 193 return false; 194 195 auto SuccBBI = OtherBB->succ_begin(); 196 for (const BinaryBasicBlock *SuccBB : BB->successors()) { 197 const BinaryBasicBlock *SuccOtherBB = *SuccBBI; 198 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) 199 return false; 200 ++SuccBBI; 201 } 202 203 // Compare all instructions including pseudos. 204 auto I = BB->begin(), E = BB->end(); 205 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); 206 while (I != E && OtherI != OtherE) { 207 // Compare symbols. 208 auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, 209 const MCSymbol *SymbolB) { 210 if (SymbolA == SymbolB) 211 return true; 212 213 // All local symbols are considered identical since they affect a 214 // control flow and we check the control flow separately. 215 // If a local symbol is escaped, then the function (potentially) has 216 // multiple entry points and we exclude such functions from 217 // comparison. 218 if (SymbolA->isTemporary() && SymbolB->isTemporary()) 219 return true; 220 221 // Compare symbols as functions. 222 uint64_t EntryIDA = 0; 223 uint64_t EntryIDB = 0; 224 const BinaryFunction *FunctionA = 225 BC.getFunctionForSymbol(SymbolA, &EntryIDA); 226 const BinaryFunction *FunctionB = 227 BC.getFunctionForSymbol(SymbolB, &EntryIDB); 228 if (FunctionA && EntryIDA) 229 FunctionA = nullptr; 230 if (FunctionB && EntryIDB) 231 FunctionB = nullptr; 232 if (FunctionA && FunctionB) { 233 // Self-referencing functions and recursive calls. 234 if (FunctionA == &A && FunctionB == &B) 235 return true; 236 237 // Functions with different hash values can never become identical, 238 // hence A and B are different. 239 if (CongruentSymbols) 240 return FunctionA->getHash() == FunctionB->getHash(); 241 242 return FunctionA == FunctionB; 243 } 244 245 // One of the symbols represents a function, the other one does not. 246 if (FunctionA != FunctionB) 247 return false; 248 249 // Check if symbols are jump tables. 250 const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName()); 251 if (!SIA) 252 return false; 253 const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName()); 254 if (!SIB) 255 return false; 256 257 assert((SIA->getAddress() != SIB->getAddress()) && 258 "different symbols should not have the same value"); 259 260 const JumpTable *JumpTableA = 261 A.getJumpTableContainingAddress(SIA->getAddress()); 262 if (!JumpTableA) 263 return false; 264 265 const JumpTable *JumpTableB = 266 B.getJumpTableContainingAddress(SIB->getAddress()); 267 if (!JumpTableB) 268 return false; 269 270 if ((SIA->getAddress() - JumpTableA->getAddress()) != 271 (SIB->getAddress() - JumpTableB->getAddress())) 272 return false; 273 274 return equalJumpTables(*JumpTableA, *JumpTableB, A, B); 275 }; 276 277 if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, 278 AreSymbolsIdentical)) 279 return false; 280 281 ++I; 282 ++OtherI; 283 } 284 285 // One of the identical blocks may have a trailing unconditional jump that 286 // is ignored for CFG purposes. 287 const MCInst *TrailingInstr = 288 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr)); 289 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) 290 return false; 291 292 ++BBI; 293 } 294 295 // Compare exceptions action tables. 296 if (A.getLSDAActionTable() != B.getLSDAActionTable() || 297 A.getLSDATypeTable() != B.getLSDATypeTable() || 298 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) 299 return false; 300 301 return true; 302 } 303 304 // This hash table is used to identify identical functions. It maps 305 // a function to a bucket of functions identical to it. 306 struct KeyHash { 307 size_t operator()(const BinaryFunction *F) const { return F->getHash(); } 308 }; 309 310 /// Identify two congruent functions. Two functions are considered congruent, 311 /// if they are identical/equal except for some of their instruction operands 312 /// that reference potentially identical functions, i.e. functions that could 313 /// be folded later. Congruent functions are candidates for folding in our 314 /// iterative ICF algorithm. 315 /// 316 /// Congruent functions are required to have identical hash. 317 struct KeyCongruent { 318 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 319 if (A == B) 320 return true; 321 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true); 322 } 323 }; 324 325 struct KeyEqual { 326 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 327 if (A == B) 328 return true; 329 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false); 330 } 331 }; 332 333 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, 334 KeyHash, KeyCongruent> 335 CongruentBucketsMap; 336 337 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, 338 KeyHash, KeyEqual> 339 IdenticalBucketsMap; 340 341 std::string hashInteger(uint64_t Value) { 342 std::string HashString; 343 if (Value == 0) 344 HashString.push_back(0); 345 346 while (Value) { 347 uint8_t LSB = Value & 0xff; 348 HashString.push_back(LSB); 349 Value >>= 8; 350 } 351 352 return HashString; 353 } 354 355 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 356 std::string HashString; 357 358 // Ignore function references. 359 if (BC.getFunctionForSymbol(&Symbol)) 360 return HashString; 361 362 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 363 if (!ErrorOrValue) 364 return HashString; 365 366 // Ignore jump table references. 367 if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 368 return HashString; 369 370 return HashString.append(hashInteger(*ErrorOrValue)); 371 } 372 373 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 374 switch (Expr.getKind()) { 375 case MCExpr::Constant: 376 return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 377 case MCExpr::SymbolRef: 378 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 379 case MCExpr::Unary: { 380 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 381 return hashInteger(UnaryExpr.getOpcode()) 382 .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 383 } 384 case MCExpr::Binary: { 385 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 386 return hashExpr(BC, *BinaryExpr.getLHS()) 387 .append(hashInteger(BinaryExpr.getOpcode())) 388 .append(hashExpr(BC, *BinaryExpr.getRHS())); 389 } 390 case MCExpr::Target: 391 return std::string(); 392 } 393 394 llvm_unreachable("invalid expression kind"); 395 } 396 397 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 398 if (Operand.isImm()) 399 return hashInteger(Operand.getImm()); 400 if (Operand.isReg()) 401 return hashInteger(Operand.getReg()); 402 if (Operand.isExpr()) 403 return hashExpr(BC, *Operand.getExpr()); 404 405 return std::string(); 406 } 407 408 } // namespace 409 410 namespace llvm { 411 namespace bolt { 412 413 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { 414 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); 415 uint64_t NumFunctionsFolded = 0; 416 std::atomic<uint64_t> NumJTFunctionsFolded{0}; 417 std::atomic<uint64_t> BytesSavedEstimate{0}; 418 std::atomic<uint64_t> CallsSavedEstimate{0}; 419 std::atomic<uint64_t> NumFoldedLastIteration{0}; 420 CongruentBucketsMap CongruentBuckets; 421 422 // Hash all the functions 423 auto hashFunctions = [&]() { 424 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown", 425 "ICF breakdown", opts::TimeICF); 426 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 427 // Make sure indices are in-order. 428 BF.getLayout().updateLayoutIndices(); 429 430 // Pre-compute hash before pushing into hashtable. 431 // Hash instruction operands to minimize hash collisions. 432 BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) { 433 return hashInstOperand(BC, Op); 434 }); 435 }; 436 437 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 438 return !shouldOptimize(BF); 439 }; 440 441 ParallelUtilities::runOnEachFunction( 442 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc, 443 "hashFunctions", /*ForceSequential*/ false, 2); 444 }; 445 446 // Creates buckets with congruent functions - functions that potentially 447 // could be folded. 448 auto createCongruentBuckets = [&]() { 449 NamedRegionTimer CongruentBucketsTimer("congruent buckets", 450 "congruent buckets", "ICF breakdown", 451 "ICF breakdown", opts::TimeICF); 452 for (auto &BFI : BC.getBinaryFunctions()) { 453 BinaryFunction &BF = BFI.second; 454 if (!this->shouldOptimize(BF)) 455 continue; 456 CongruentBuckets[&BF].emplace(&BF); 457 } 458 }; 459 460 // Partition each set of congruent functions into sets of identical functions 461 // and fold them 462 auto performFoldingPass = [&]() { 463 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes", 464 "ICF breakdown", "ICF breakdown", 465 opts::TimeICF); 466 Timer SinglePass("single fold pass", "single fold pass"); 467 LLVM_DEBUG(SinglePass.startTimer()); 468 469 ThreadPool *ThPool; 470 if (!opts::NoThreads) 471 ThPool = &ParallelUtilities::getThreadPool(); 472 473 // Fold identical functions within a single congruent bucket 474 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { 475 Timer T("folding single congruent list", "folding single congruent list"); 476 LLVM_DEBUG(T.startTimer()); 477 478 // Identical functions go into the same bucket. 479 IdenticalBucketsMap IdenticalBuckets; 480 for (BinaryFunction *BF : Candidates) { 481 IdenticalBuckets[BF].emplace_back(BF); 482 } 483 484 for (auto &IBI : IdenticalBuckets) { 485 // Functions identified as identical. 486 std::vector<BinaryFunction *> &Twins = IBI.second; 487 if (Twins.size() < 2) 488 continue; 489 490 // Fold functions. Keep the order consistent across invocations with 491 // different options. 492 llvm::stable_sort( 493 Twins, [](const BinaryFunction *A, const BinaryFunction *B) { 494 return A->getFunctionNumber() < B->getFunctionNumber(); 495 }); 496 497 BinaryFunction *ParentBF = Twins[0]; 498 for (unsigned I = 1; I < Twins.size(); ++I) { 499 BinaryFunction *ChildBF = Twins[I]; 500 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " 501 << *ParentBF << '\n'); 502 503 // Remove child function from the list of candidates. 504 auto FI = Candidates.find(ChildBF); 505 assert(FI != Candidates.end() && 506 "function expected to be in the set"); 507 Candidates.erase(FI); 508 509 // Fold the function and remove from the list of processed functions. 510 BytesSavedEstimate += ChildBF->getSize(); 511 CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(), 512 ParentBF->getKnownExecutionCount()); 513 BC.foldFunction(*ChildBF, *ParentBF); 514 515 ++NumFoldedLastIteration; 516 517 if (ParentBF->hasJumpTables()) 518 ++NumJTFunctionsFolded; 519 } 520 } 521 522 LLVM_DEBUG(T.stopTimer()); 523 }; 524 525 // Create a task for each congruent bucket 526 for (auto &Entry : CongruentBuckets) { 527 std::set<BinaryFunction *> &Bucket = Entry.second; 528 if (Bucket.size() < 2) 529 continue; 530 531 if (opts::NoThreads) 532 processSingleBucket(Bucket); 533 else 534 ThPool->async(processSingleBucket, std::ref(Bucket)); 535 } 536 537 if (!opts::NoThreads) 538 ThPool->wait(); 539 540 LLVM_DEBUG(SinglePass.stopTimer()); 541 }; 542 543 hashFunctions(); 544 createCongruentBuckets(); 545 546 unsigned Iteration = 1; 547 // We repeat the pass until no new modifications happen. 548 do { 549 NumFoldedLastIteration = 0; 550 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n"); 551 552 performFoldingPass(); 553 554 NumFunctionsFolded += NumFoldedLastIteration; 555 ++Iteration; 556 557 } while (NumFoldedLastIteration > 0); 558 559 LLVM_DEBUG({ 560 // Print functions that are congruent but not identical. 561 for (auto &CBI : CongruentBuckets) { 562 std::set<BinaryFunction *> &Candidates = CBI.second; 563 if (Candidates.size() < 2) 564 continue; 565 dbgs() << "BOLT-DEBUG: the following " << Candidates.size() 566 << " functions (each of size " << (*Candidates.begin())->getSize() 567 << " bytes) are congruent but not identical:\n"; 568 for (BinaryFunction *BF : Candidates) { 569 dbgs() << " " << *BF; 570 if (BF->getKnownExecutionCount()) 571 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)"; 572 dbgs() << '\n'; 573 } 574 } 575 }); 576 577 if (NumFunctionsFolded) 578 outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " 579 << OriginalFunctionCount << " functions in " << Iteration 580 << " passes. " << NumJTFunctionsFolded 581 << " functions had jump tables.\n" 582 << "BOLT-INFO: Removing all identical functions will save " 583 << format("%.2lf", (double)BytesSavedEstimate / 1024) 584 << " KB of code space. Folded functions were called " 585 << CallsSavedEstimate << " times based on profile.\n"; 586 } 587 588 } // namespace bolt 589 } // namespace llvm 590