1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the IdenticalCodeFolding class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/IdenticalCodeFolding.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/ThreadPool.h" 17 #include "llvm/Support/Timer.h" 18 #include <atomic> 19 #include <map> 20 #include <set> 21 #include <unordered_map> 22 23 #define DEBUG_TYPE "bolt-icf" 24 25 using namespace llvm; 26 using namespace bolt; 27 28 namespace opts { 29 30 extern cl::OptionCategory BoltOptCategory; 31 32 static cl::opt<bool> UseDFS("icf-dfs", 33 cl::desc("use DFS ordering when using -icf option"), 34 cl::ReallyHidden, cl::cat(BoltOptCategory)); 35 36 static cl::opt<bool> 37 TimeICF("time-icf", 38 cl::desc("time icf steps"), 39 cl::ReallyHidden, 40 cl::ZeroOrMore, 41 cl::cat(BoltOptCategory)); 42 } // namespace opts 43 44 namespace { 45 using JumpTable = bolt::JumpTable; 46 47 /// Compare two jump tables in 2 functions. The function relies on consistent 48 /// ordering of basic blocks in both binary functions (e.g. DFS). 49 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB, 50 const BinaryFunction &FunctionA, 51 const BinaryFunction &FunctionB) { 52 if (JumpTableA.EntrySize != JumpTableB.EntrySize) 53 return false; 54 55 if (JumpTableA.Type != JumpTableB.Type) 56 return false; 57 58 if (JumpTableA.getSize() != JumpTableB.getSize()) 59 return false; 60 61 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { 62 const MCSymbol *LabelA = JumpTableA.Entries[Index]; 63 const MCSymbol *LabelB = JumpTableB.Entries[Index]; 64 65 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA); 66 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB); 67 68 if (!TargetA || !TargetB) { 69 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && 70 "no target basic block found"); 71 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && 72 "no target basic block found"); 73 74 if (TargetA != TargetB) 75 return false; 76 77 continue; 78 } 79 80 assert(TargetA && TargetB && "cannot locate target block(s)"); 81 82 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) 83 return false; 84 } 85 86 return true; 87 } 88 89 /// Helper function that compares an instruction of this function to the 90 /// given instruction of the given function. The functions should have 91 /// identical CFG. 92 template <class Compare> 93 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA, 94 const MCInst &InstB, const BinaryBasicBlock &BBB, 95 Compare Comp) { 96 if (InstA.getOpcode() != InstB.getOpcode()) 97 return false; 98 99 const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); 100 101 // In this function we check for special conditions: 102 // 103 // * instructions with landing pads 104 // 105 // Most of the common cases should be handled by MCPlus::equals() 106 // that compares regular instruction operands. 107 // 108 // NB: there's no need to compare jump table indirect jump instructions 109 // separately as jump tables are handled by comparing corresponding 110 // symbols. 111 const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA); 112 const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB); 113 114 if (EHInfoA || EHInfoB) { 115 if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) 116 return false; 117 118 if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) 119 return false; 120 121 if (EHInfoA && EHInfoB) { 122 // Action indices should match. 123 if (EHInfoA->second != EHInfoB->second) 124 return false; 125 126 if (!EHInfoA->first != !EHInfoB->first) 127 return false; 128 129 if (EHInfoA->first && EHInfoB->first) { 130 const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first); 131 const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first); 132 assert(LPA && LPB && "cannot locate landing pad(s)"); 133 134 if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) 135 return false; 136 } 137 } 138 } 139 140 return BC.MIB->equals(InstA, InstB, Comp); 141 } 142 143 /// Returns true if this function has identical code and CFG with 144 /// the given function \p BF. 145 /// 146 /// If \p CongruentSymbols is set to true, then symbolic operands that reference 147 /// potentially identical but different functions are ignored during the 148 /// comparison. 149 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, 150 bool CongruentSymbols) { 151 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG"); 152 153 // Compare the two functions, one basic block at a time. 154 // Currently we require two identical basic blocks to have identical 155 // instruction sequences and the same index in their corresponding 156 // functions. The latter is important for CFG equality. 157 158 if (A.getLayout().block_size() != B.getLayout().block_size()) 159 return false; 160 161 // Comparing multi-entry functions could be non-trivial. 162 if (A.isMultiEntry() || B.isMultiEntry()) 163 return false; 164 165 if (A.hasIslandsInfo() || B.hasIslandsInfo()) 166 return false; 167 168 // Process both functions in either DFS or existing order. 169 const BinaryFunction::BasicBlockOrderType OrderA = 170 opts::UseDFS 171 ? A.dfs() 172 : BinaryFunction::BasicBlockOrderType(A.getLayout().block_begin(), 173 A.getLayout().block_end()); 174 const BinaryFunction::BasicBlockOrderType OrderB = 175 opts::UseDFS 176 ? B.dfs() 177 : BinaryFunction::BasicBlockOrderType(B.getLayout().block_begin(), 178 B.getLayout().block_end()); 179 180 const BinaryContext &BC = A.getBinaryContext(); 181 182 auto BBI = OrderB.begin(); 183 for (const BinaryBasicBlock *BB : OrderA) { 184 const BinaryBasicBlock *OtherBB = *BBI; 185 186 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) 187 return false; 188 189 // Compare successor basic blocks. 190 // NOTE: the comparison for jump tables is only partially verified here. 191 if (BB->succ_size() != OtherBB->succ_size()) 192 return false; 193 194 auto SuccBBI = OtherBB->succ_begin(); 195 for (const BinaryBasicBlock *SuccBB : BB->successors()) { 196 const BinaryBasicBlock *SuccOtherBB = *SuccBBI; 197 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) 198 return false; 199 ++SuccBBI; 200 } 201 202 // Compare all instructions including pseudos. 203 auto I = BB->begin(), E = BB->end(); 204 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); 205 while (I != E && OtherI != OtherE) { 206 // Compare symbols. 207 auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, 208 const MCSymbol *SymbolB) { 209 if (SymbolA == SymbolB) 210 return true; 211 212 // All local symbols are considered identical since they affect a 213 // control flow and we check the control flow separately. 214 // If a local symbol is escaped, then the function (potentially) has 215 // multiple entry points and we exclude such functions from 216 // comparison. 217 if (SymbolA->isTemporary() && SymbolB->isTemporary()) 218 return true; 219 220 // Compare symbols as functions. 221 uint64_t EntryIDA = 0; 222 uint64_t EntryIDB = 0; 223 const BinaryFunction *FunctionA = 224 BC.getFunctionForSymbol(SymbolA, &EntryIDA); 225 const BinaryFunction *FunctionB = 226 BC.getFunctionForSymbol(SymbolB, &EntryIDB); 227 if (FunctionA && EntryIDA) 228 FunctionA = nullptr; 229 if (FunctionB && EntryIDB) 230 FunctionB = nullptr; 231 if (FunctionA && FunctionB) { 232 // Self-referencing functions and recursive calls. 233 if (FunctionA == &A && FunctionB == &B) 234 return true; 235 236 // Functions with different hash values can never become identical, 237 // hence A and B are different. 238 if (CongruentSymbols) 239 return FunctionA->getHash() == FunctionB->getHash(); 240 241 return FunctionA == FunctionB; 242 } 243 244 // One of the symbols represents a function, the other one does not. 245 if (FunctionA != FunctionB) 246 return false; 247 248 // Check if symbols are jump tables. 249 const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName()); 250 if (!SIA) 251 return false; 252 const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName()); 253 if (!SIB) 254 return false; 255 256 assert((SIA->getAddress() != SIB->getAddress()) && 257 "different symbols should not have the same value"); 258 259 const JumpTable *JumpTableA = 260 A.getJumpTableContainingAddress(SIA->getAddress()); 261 if (!JumpTableA) 262 return false; 263 264 const JumpTable *JumpTableB = 265 B.getJumpTableContainingAddress(SIB->getAddress()); 266 if (!JumpTableB) 267 return false; 268 269 if ((SIA->getAddress() - JumpTableA->getAddress()) != 270 (SIB->getAddress() - JumpTableB->getAddress())) 271 return false; 272 273 return equalJumpTables(*JumpTableA, *JumpTableB, A, B); 274 }; 275 276 if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, 277 AreSymbolsIdentical)) 278 return false; 279 280 ++I; 281 ++OtherI; 282 } 283 284 // One of the identical blocks may have a trailing unconditional jump that 285 // is ignored for CFG purposes. 286 const MCInst *TrailingInstr = 287 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr)); 288 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) 289 return false; 290 291 ++BBI; 292 } 293 294 // Compare exceptions action tables. 295 if (A.getLSDAActionTable() != B.getLSDAActionTable() || 296 A.getLSDATypeTable() != B.getLSDATypeTable() || 297 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) 298 return false; 299 300 return true; 301 } 302 303 // This hash table is used to identify identical functions. It maps 304 // a function to a bucket of functions identical to it. 305 struct KeyHash { 306 size_t operator()(const BinaryFunction *F) const { return F->getHash(); } 307 }; 308 309 /// Identify two congruent functions. Two functions are considered congruent, 310 /// if they are identical/equal except for some of their instruction operands 311 /// that reference potentially identical functions, i.e. functions that could 312 /// be folded later. Congruent functions are candidates for folding in our 313 /// iterative ICF algorithm. 314 /// 315 /// Congruent functions are required to have identical hash. 316 struct KeyCongruent { 317 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 318 if (A == B) 319 return true; 320 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true); 321 } 322 }; 323 324 struct KeyEqual { 325 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 326 if (A == B) 327 return true; 328 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false); 329 } 330 }; 331 332 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, 333 KeyHash, KeyCongruent> 334 CongruentBucketsMap; 335 336 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, 337 KeyHash, KeyEqual> 338 IdenticalBucketsMap; 339 340 std::string hashInteger(uint64_t Value) { 341 std::string HashString; 342 if (Value == 0) 343 HashString.push_back(0); 344 345 while (Value) { 346 uint8_t LSB = Value & 0xff; 347 HashString.push_back(LSB); 348 Value >>= 8; 349 } 350 351 return HashString; 352 } 353 354 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 355 std::string HashString; 356 357 // Ignore function references. 358 if (BC.getFunctionForSymbol(&Symbol)) 359 return HashString; 360 361 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 362 if (!ErrorOrValue) 363 return HashString; 364 365 // Ignore jump table references. 366 if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 367 return HashString; 368 369 return HashString.append(hashInteger(*ErrorOrValue)); 370 } 371 372 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 373 switch (Expr.getKind()) { 374 case MCExpr::Constant: 375 return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 376 case MCExpr::SymbolRef: 377 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 378 case MCExpr::Unary: { 379 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 380 return hashInteger(UnaryExpr.getOpcode()) 381 .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 382 } 383 case MCExpr::Binary: { 384 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 385 return hashExpr(BC, *BinaryExpr.getLHS()) 386 .append(hashInteger(BinaryExpr.getOpcode())) 387 .append(hashExpr(BC, *BinaryExpr.getRHS())); 388 } 389 case MCExpr::Target: 390 return std::string(); 391 } 392 393 llvm_unreachable("invalid expression kind"); 394 } 395 396 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 397 if (Operand.isImm()) 398 return hashInteger(Operand.getImm()); 399 if (Operand.isReg()) 400 return hashInteger(Operand.getReg()); 401 if (Operand.isExpr()) 402 return hashExpr(BC, *Operand.getExpr()); 403 404 return std::string(); 405 } 406 407 } // namespace 408 409 namespace llvm { 410 namespace bolt { 411 412 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { 413 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); 414 uint64_t NumFunctionsFolded = 0; 415 std::atomic<uint64_t> NumJTFunctionsFolded{0}; 416 std::atomic<uint64_t> BytesSavedEstimate{0}; 417 std::atomic<uint64_t> CallsSavedEstimate{0}; 418 std::atomic<uint64_t> NumFoldedLastIteration{0}; 419 CongruentBucketsMap CongruentBuckets; 420 421 // Hash all the functions 422 auto hashFunctions = [&]() { 423 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown", 424 "ICF breakdown", opts::TimeICF); 425 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 426 // Make sure indices are in-order. 427 BF.getLayout().updateLayoutIndices(); 428 429 // Pre-compute hash before pushing into hashtable. 430 // Hash instruction operands to minimize hash collisions. 431 BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) { 432 return hashInstOperand(BC, Op); 433 }); 434 }; 435 436 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 437 return !shouldOptimize(BF); 438 }; 439 440 ParallelUtilities::runOnEachFunction( 441 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc, 442 "hashFunctions", /*ForceSequential*/ false, 2); 443 }; 444 445 // Creates buckets with congruent functions - functions that potentially 446 // could be folded. 447 auto createCongruentBuckets = [&]() { 448 NamedRegionTimer CongruentBucketsTimer("congruent buckets", 449 "congruent buckets", "ICF breakdown", 450 "ICF breakdown", opts::TimeICF); 451 for (auto &BFI : BC.getBinaryFunctions()) { 452 BinaryFunction &BF = BFI.second; 453 if (!this->shouldOptimize(BF)) 454 continue; 455 CongruentBuckets[&BF].emplace(&BF); 456 } 457 }; 458 459 // Partition each set of congruent functions into sets of identical functions 460 // and fold them 461 auto performFoldingPass = [&]() { 462 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes", 463 "ICF breakdown", "ICF breakdown", 464 opts::TimeICF); 465 Timer SinglePass("single fold pass", "single fold pass"); 466 LLVM_DEBUG(SinglePass.startTimer()); 467 468 ThreadPool *ThPool; 469 if (!opts::NoThreads) 470 ThPool = &ParallelUtilities::getThreadPool(); 471 472 // Fold identical functions within a single congruent bucket 473 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { 474 Timer T("folding single congruent list", "folding single congruent list"); 475 LLVM_DEBUG(T.startTimer()); 476 477 // Identical functions go into the same bucket. 478 IdenticalBucketsMap IdenticalBuckets; 479 for (BinaryFunction *BF : Candidates) { 480 IdenticalBuckets[BF].emplace_back(BF); 481 } 482 483 for (auto &IBI : IdenticalBuckets) { 484 // Functions identified as identical. 485 std::vector<BinaryFunction *> &Twins = IBI.second; 486 if (Twins.size() < 2) 487 continue; 488 489 // Fold functions. Keep the order consistent across invocations with 490 // different options. 491 llvm::stable_sort( 492 Twins, [](const BinaryFunction *A, const BinaryFunction *B) { 493 return A->getFunctionNumber() < B->getFunctionNumber(); 494 }); 495 496 BinaryFunction *ParentBF = Twins[0]; 497 for (unsigned I = 1; I < Twins.size(); ++I) { 498 BinaryFunction *ChildBF = Twins[I]; 499 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " 500 << *ParentBF << '\n'); 501 502 // Remove child function from the list of candidates. 503 auto FI = Candidates.find(ChildBF); 504 assert(FI != Candidates.end() && 505 "function expected to be in the set"); 506 Candidates.erase(FI); 507 508 // Fold the function and remove from the list of processed functions. 509 BytesSavedEstimate += ChildBF->getSize(); 510 CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(), 511 ParentBF->getKnownExecutionCount()); 512 BC.foldFunction(*ChildBF, *ParentBF); 513 514 ++NumFoldedLastIteration; 515 516 if (ParentBF->hasJumpTables()) 517 ++NumJTFunctionsFolded; 518 } 519 } 520 521 LLVM_DEBUG(T.stopTimer()); 522 }; 523 524 // Create a task for each congruent bucket 525 for (auto &Entry : CongruentBuckets) { 526 std::set<BinaryFunction *> &Bucket = Entry.second; 527 if (Bucket.size() < 2) 528 continue; 529 530 if (opts::NoThreads) 531 processSingleBucket(Bucket); 532 else 533 ThPool->async(processSingleBucket, std::ref(Bucket)); 534 } 535 536 if (!opts::NoThreads) 537 ThPool->wait(); 538 539 LLVM_DEBUG(SinglePass.stopTimer()); 540 }; 541 542 hashFunctions(); 543 createCongruentBuckets(); 544 545 unsigned Iteration = 1; 546 // We repeat the pass until no new modifications happen. 547 do { 548 NumFoldedLastIteration = 0; 549 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n"); 550 551 performFoldingPass(); 552 553 NumFunctionsFolded += NumFoldedLastIteration; 554 ++Iteration; 555 556 } while (NumFoldedLastIteration > 0); 557 558 LLVM_DEBUG({ 559 // Print functions that are congruent but not identical. 560 for (auto &CBI : CongruentBuckets) { 561 std::set<BinaryFunction *> &Candidates = CBI.second; 562 if (Candidates.size() < 2) 563 continue; 564 dbgs() << "BOLT-DEBUG: the following " << Candidates.size() 565 << " functions (each of size " << (*Candidates.begin())->getSize() 566 << " bytes) are congruent but not identical:\n"; 567 for (BinaryFunction *BF : Candidates) { 568 dbgs() << " " << *BF; 569 if (BF->getKnownExecutionCount()) 570 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)"; 571 dbgs() << '\n'; 572 } 573 } 574 }); 575 576 if (NumFunctionsFolded) 577 outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " 578 << OriginalFunctionCount << " functions in " << Iteration 579 << " passes. " << NumJTFunctionsFolded 580 << " functions had jump tables.\n" 581 << "BOLT-INFO: Removing all identical functions will save " 582 << format("%.2lf", (double)BytesSavedEstimate / 1024) 583 << " KB of code space. Folded functions were called " 584 << CallsSavedEstimate << " times based on profile.\n"; 585 } 586 587 } // namespace bolt 588 } // namespace llvm 589