1 //===--- IdenticalCodeFolding.cpp -----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Passes/IdenticalCodeFolding.h" 12 #include "bolt/Core/ParallelUtilities.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/ThreadPool.h" 15 #include "llvm/Support/Timer.h" 16 #include <atomic> 17 #include <map> 18 #include <set> 19 #include <unordered_map> 20 21 #define DEBUG_TYPE "bolt-icf" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 static cl::opt<bool> 31 UseDFS("icf-dfs", 32 cl::desc("use DFS ordering when using -icf option"), 33 cl::ReallyHidden, 34 cl::ZeroOrMore, 35 cl::cat(BoltOptCategory)); 36 37 static cl::opt<bool> 38 TimeICF("time-icf", 39 cl::desc("time icf steps"), 40 cl::ReallyHidden, 41 cl::ZeroOrMore, 42 cl::cat(BoltOptCategory)); 43 } // namespace opts 44 45 namespace { 46 using JumpTable = bolt::JumpTable; 47 48 /// Compare two jump tables in 2 functions. The function relies on consistent 49 /// ordering of basic blocks in both binary functions (e.g. DFS). 50 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB, 51 const BinaryFunction &FunctionA, 52 const BinaryFunction &FunctionB) { 53 if (JumpTableA.EntrySize != JumpTableB.EntrySize) 54 return false; 55 56 if (JumpTableA.Type != JumpTableB.Type) 57 return false; 58 59 if (JumpTableA.getSize() != JumpTableB.getSize()) 60 return false; 61 62 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { 63 const MCSymbol *LabelA = JumpTableA.Entries[Index]; 64 const MCSymbol *LabelB = JumpTableB.Entries[Index]; 65 66 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA); 67 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB); 68 69 if (!TargetA || !TargetB) { 70 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && 71 "no target basic block found"); 72 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && 73 "no target basic block found"); 74 75 if (TargetA != TargetB) 76 return false; 77 78 continue; 79 } 80 81 assert(TargetA && TargetB && "cannot locate target block(s)"); 82 83 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) 84 return false; 85 } 86 87 return true; 88 } 89 90 /// Helper function that compares an instruction of this function to the 91 /// given instruction of the given function. The functions should have 92 /// identical CFG. 93 template <class Compare> 94 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA, 95 const MCInst &InstB, const BinaryBasicBlock &BBB, 96 Compare Comp) { 97 if (InstA.getOpcode() != InstB.getOpcode()) { 98 return false; 99 } 100 101 const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); 102 103 // In this function we check for special conditions: 104 // 105 // * instructions with landing pads 106 // 107 // Most of the common cases should be handled by MCPlus::equals() 108 // that compares regular instruction operands. 109 // 110 // NB: there's no need to compare jump table indirect jump instructions 111 // separately as jump tables are handled by comparing corresponding 112 // symbols. 113 const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA); 114 const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB); 115 116 if (EHInfoA || EHInfoB) { 117 if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) 118 return false; 119 120 if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) 121 return false; 122 123 if (EHInfoA && EHInfoB) { 124 // Action indices should match. 125 if (EHInfoA->second != EHInfoB->second) 126 return false; 127 128 if (!EHInfoA->first != !EHInfoB->first) 129 return false; 130 131 if (EHInfoA->first && EHInfoB->first) { 132 const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first); 133 const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first); 134 assert(LPA && LPB && "cannot locate landing pad(s)"); 135 136 if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) 137 return false; 138 } 139 } 140 } 141 142 return BC.MIB->equals(InstA, InstB, Comp); 143 } 144 145 /// Returns true if this function has identical code and CFG with 146 /// the given function \p BF. 147 /// 148 /// If \p CongruentSymbols is set to true, then symbolic operands that reference 149 /// potentially identical but different functions are ignored during the 150 /// comparison. 151 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, 152 bool CongruentSymbols) { 153 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG"); 154 155 // Compare the two functions, one basic block at a time. 156 // Currently we require two identical basic blocks to have identical 157 // instruction sequences and the same index in their corresponding 158 // functions. The latter is important for CFG equality. 159 160 if (A.layout_size() != B.layout_size()) 161 return false; 162 163 // Comparing multi-entry functions could be non-trivial. 164 if (A.isMultiEntry() || B.isMultiEntry()) 165 return false; 166 167 // Process both functions in either DFS or existing order. 168 const BinaryFunction::BasicBlockOrderType &OrderA = 169 opts::UseDFS ? A.dfs() : A.getLayout(); 170 const BinaryFunction::BasicBlockOrderType &OrderB = 171 opts::UseDFS ? B.dfs() : B.getLayout(); 172 173 const BinaryContext &BC = A.getBinaryContext(); 174 175 auto BBI = OrderB.begin(); 176 for (const BinaryBasicBlock *BB : OrderA) { 177 const BinaryBasicBlock *OtherBB = *BBI; 178 179 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) 180 return false; 181 182 // Compare successor basic blocks. 183 // NOTE: the comparison for jump tables is only partially verified here. 184 if (BB->succ_size() != OtherBB->succ_size()) 185 return false; 186 187 auto SuccBBI = OtherBB->succ_begin(); 188 for (const BinaryBasicBlock *SuccBB : BB->successors()) { 189 const BinaryBasicBlock *SuccOtherBB = *SuccBBI; 190 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) 191 return false; 192 ++SuccBBI; 193 } 194 195 // Compare all instructions including pseudos. 196 auto I = BB->begin(), E = BB->end(); 197 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); 198 while (I != E && OtherI != OtherE) { 199 // Compare symbols. 200 auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, 201 const MCSymbol *SymbolB) { 202 if (SymbolA == SymbolB) 203 return true; 204 205 // All local symbols are considered identical since they affect a 206 // control flow and we check the control flow separately. 207 // If a local symbol is escaped, then the function (potentially) has 208 // multiple entry points and we exclude such functions from 209 // comparison. 210 if (SymbolA->isTemporary() && SymbolB->isTemporary()) 211 return true; 212 213 // Compare symbols as functions. 214 uint64_t EntryIDA = 0; 215 uint64_t EntryIDB = 0; 216 const BinaryFunction *FunctionA = 217 BC.getFunctionForSymbol(SymbolA, &EntryIDA); 218 const BinaryFunction *FunctionB = 219 BC.getFunctionForSymbol(SymbolB, &EntryIDB); 220 if (FunctionA && EntryIDA) 221 FunctionA = nullptr; 222 if (FunctionB && EntryIDB) 223 FunctionB = nullptr; 224 if (FunctionA && FunctionB) { 225 // Self-referencing functions and recursive calls. 226 if (FunctionA == &A && FunctionB == &B) 227 return true; 228 229 // Functions with different hash values can never become identical, 230 // hence A and B are different. 231 if (CongruentSymbols) 232 return FunctionA->getHash() == FunctionB->getHash(); 233 234 return FunctionA == FunctionB; 235 } 236 237 // One of the symbols represents a function, the other one does not. 238 if (FunctionA != FunctionB) { 239 return false; 240 } 241 242 // Check if symbols are jump tables. 243 const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName()); 244 if (!SIA) 245 return false; 246 const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName()); 247 if (!SIB) 248 return false; 249 250 assert((SIA->getAddress() != SIB->getAddress()) && 251 "different symbols should not have the same value"); 252 253 const JumpTable *JumpTableA = 254 A.getJumpTableContainingAddress(SIA->getAddress()); 255 if (!JumpTableA) 256 return false; 257 258 const JumpTable *JumpTableB = 259 B.getJumpTableContainingAddress(SIB->getAddress()); 260 if (!JumpTableB) 261 return false; 262 263 if ((SIA->getAddress() - JumpTableA->getAddress()) != 264 (SIB->getAddress() - JumpTableB->getAddress())) 265 return false; 266 267 return equalJumpTables(*JumpTableA, *JumpTableB, A, B); 268 }; 269 270 if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, 271 AreSymbolsIdentical)) { 272 return false; 273 } 274 275 ++I; 276 ++OtherI; 277 } 278 279 // One of the identical blocks may have a trailing unconditional jump that 280 // is ignored for CFG purposes. 281 const MCInst *TrailingInstr = 282 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0)); 283 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) { 284 return false; 285 } 286 287 ++BBI; 288 } 289 290 // Compare exceptions action tables. 291 if (A.getLSDAActionTable() != B.getLSDAActionTable() || 292 A.getLSDATypeTable() != B.getLSDATypeTable() || 293 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) { 294 return false; 295 } 296 297 return true; 298 } 299 300 // This hash table is used to identify identical functions. It maps 301 // a function to a bucket of functions identical to it. 302 struct KeyHash { 303 size_t operator()(const BinaryFunction *F) const { return F->getHash(); } 304 }; 305 306 /// Identify two congruent functions. Two functions are considered congruent, 307 /// if they are identical/equal except for some of their instruction operands 308 /// that reference potentially identical functions, i.e. functions that could 309 /// be folded later. Congruent functions are candidates for folding in our 310 /// iterative ICF algorithm. 311 /// 312 /// Congruent functions are required to have identical hash. 313 struct KeyCongruent { 314 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 315 if (A == B) 316 return true; 317 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true); 318 } 319 }; 320 321 struct KeyEqual { 322 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 323 if (A == B) 324 return true; 325 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false); 326 } 327 }; 328 329 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, 330 KeyHash, KeyCongruent> 331 CongruentBucketsMap; 332 333 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, 334 KeyHash, KeyEqual> 335 IdenticalBucketsMap; 336 337 std::string hashInteger(uint64_t Value) { 338 std::string HashString; 339 if (Value == 0) { 340 HashString.push_back(0); 341 } 342 while (Value) { 343 uint8_t LSB = Value & 0xff; 344 HashString.push_back(LSB); 345 Value >>= 8; 346 } 347 348 return HashString; 349 } 350 351 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 352 std::string HashString; 353 354 // Ignore function references. 355 if (BC.getFunctionForSymbol(&Symbol)) 356 return HashString; 357 358 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 359 if (!ErrorOrValue) 360 return HashString; 361 362 // Ignore jump table references. 363 if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 364 return HashString; 365 366 return HashString.append(hashInteger(*ErrorOrValue)); 367 } 368 369 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 370 switch (Expr.getKind()) { 371 case MCExpr::Constant: 372 return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 373 case MCExpr::SymbolRef: 374 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 375 case MCExpr::Unary: { 376 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 377 return hashInteger(UnaryExpr.getOpcode()) 378 .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 379 } 380 case MCExpr::Binary: { 381 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 382 return hashExpr(BC, *BinaryExpr.getLHS()) 383 .append(hashInteger(BinaryExpr.getOpcode())) 384 .append(hashExpr(BC, *BinaryExpr.getRHS())); 385 } 386 case MCExpr::Target: 387 return std::string(); 388 } 389 390 llvm_unreachable("invalid expression kind"); 391 } 392 393 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 394 if (Operand.isImm()) { 395 return hashInteger(Operand.getImm()); 396 } else if (Operand.isReg()) { 397 return hashInteger(Operand.getReg()); 398 } else if (Operand.isExpr()) { 399 return hashExpr(BC, *Operand.getExpr()); 400 } 401 402 return std::string(); 403 } 404 405 } // namespace 406 407 namespace llvm { 408 namespace bolt { 409 410 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { 411 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); 412 uint64_t NumFunctionsFolded = 0; 413 std::atomic<uint64_t> NumJTFunctionsFolded{0}; 414 std::atomic<uint64_t> BytesSavedEstimate{0}; 415 std::atomic<uint64_t> CallsSavedEstimate{0}; 416 std::atomic<uint64_t> NumFoldedLastIteration{0}; 417 CongruentBucketsMap CongruentBuckets; 418 419 // Hash all the functions 420 auto hashFunctions = [&]() { 421 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown", 422 "ICF breakdown", opts::TimeICF); 423 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 424 // Make sure indices are in-order. 425 BF.updateLayoutIndices(); 426 427 // Pre-compute hash before pushing into hashtable. 428 // Hash instruction operands to minimize hash collisions. 429 BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) { 430 return hashInstOperand(BC, Op); 431 }); 432 }; 433 434 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 435 return !shouldOptimize(BF); 436 }; 437 438 ParallelUtilities::runOnEachFunction( 439 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc, 440 "hashFunctions", /*ForceSequential*/ false, 2); 441 }; 442 443 // Creates buckets with congruent functions - functions that potentially 444 // could be folded. 445 auto createCongruentBuckets = [&]() { 446 NamedRegionTimer CongruentBucketsTimer("congruent buckets", 447 "congruent buckets", "ICF breakdown", 448 "ICF breakdown", opts::TimeICF); 449 for (auto &BFI : BC.getBinaryFunctions()) { 450 BinaryFunction &BF = BFI.second; 451 if (!this->shouldOptimize(BF)) 452 continue; 453 CongruentBuckets[&BF].emplace(&BF); 454 } 455 }; 456 457 // Partition each set of congruent functions into sets of identical functions 458 // and fold them 459 auto performFoldingPass = [&]() { 460 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes", 461 "ICF breakdown", "ICF breakdown", 462 opts::TimeICF); 463 Timer SinglePass("single fold pass", "single fold pass"); 464 LLVM_DEBUG(SinglePass.startTimer()); 465 466 ThreadPool *ThPool; 467 if (!opts::NoThreads) 468 ThPool = &ParallelUtilities::getThreadPool(); 469 470 // Fold identical functions within a single congruent bucket 471 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { 472 Timer T("folding single congruent list", "folding single congruent list"); 473 LLVM_DEBUG(T.startTimer()); 474 475 // Identical functions go into the same bucket. 476 IdenticalBucketsMap IdenticalBuckets; 477 for (BinaryFunction *BF : Candidates) { 478 IdenticalBuckets[BF].emplace_back(BF); 479 } 480 481 for (auto &IBI : IdenticalBuckets) { 482 // Functions identified as identical. 483 std::vector<BinaryFunction *> &Twins = IBI.second; 484 if (Twins.size() < 2) 485 continue; 486 487 // Fold functions. Keep the order consistent across invocations with 488 // different options. 489 std::stable_sort(Twins.begin(), Twins.end(), 490 [](const BinaryFunction *A, const BinaryFunction *B) { 491 return A->getFunctionNumber() < 492 B->getFunctionNumber(); 493 }); 494 495 BinaryFunction *ParentBF = Twins[0]; 496 for (unsigned I = 1; I < Twins.size(); ++I) { 497 BinaryFunction *ChildBF = Twins[I]; 498 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " 499 << *ParentBF << '\n'); 500 501 // Remove child function from the list of candidates. 502 auto FI = Candidates.find(ChildBF); 503 assert(FI != Candidates.end() && 504 "function expected to be in the set"); 505 Candidates.erase(FI); 506 507 // Fold the function and remove from the list of processed functions. 508 BytesSavedEstimate += ChildBF->getSize(); 509 CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(), 510 ParentBF->getKnownExecutionCount()); 511 BC.foldFunction(*ChildBF, *ParentBF); 512 513 ++NumFoldedLastIteration; 514 515 if (ParentBF->hasJumpTables()) 516 ++NumJTFunctionsFolded; 517 } 518 } 519 520 LLVM_DEBUG(T.stopTimer()); 521 }; 522 523 // Create a task for each congruent bucket 524 for (auto &Entry : CongruentBuckets) { 525 std::set<BinaryFunction *> &Bucket = Entry.second; 526 if (Bucket.size() < 2) 527 continue; 528 529 if (opts::NoThreads) 530 processSingleBucket(Bucket); 531 else 532 ThPool->async(processSingleBucket, std::ref(Bucket)); 533 } 534 535 if (!opts::NoThreads) 536 ThPool->wait(); 537 538 LLVM_DEBUG(SinglePass.stopTimer()); 539 }; 540 541 hashFunctions(); 542 createCongruentBuckets(); 543 544 unsigned Iteration = 1; 545 // We repeat the pass until no new modifications happen. 546 do { 547 NumFoldedLastIteration = 0; 548 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n"); 549 550 performFoldingPass(); 551 552 NumFunctionsFolded += NumFoldedLastIteration; 553 ++Iteration; 554 555 } while (NumFoldedLastIteration > 0); 556 557 LLVM_DEBUG({ 558 // Print functions that are congruent but not identical. 559 for (auto &CBI : CongruentBuckets) { 560 std::set<BinaryFunction *> &Candidates = CBI.second; 561 if (Candidates.size() < 2) 562 continue; 563 dbgs() << "BOLT-DEBUG: the following " << Candidates.size() 564 << " functions (each of size " << (*Candidates.begin())->getSize() 565 << " bytes) are congruent but not identical:\n"; 566 for (BinaryFunction *BF : Candidates) { 567 dbgs() << " " << *BF; 568 if (BF->getKnownExecutionCount()) { 569 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)"; 570 } 571 dbgs() << '\n'; 572 } 573 } 574 }); 575 576 if (NumFunctionsFolded) { 577 outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " 578 << OriginalFunctionCount << " functions in " << Iteration 579 << " passes. " << NumJTFunctionsFolded 580 << " functions had jump tables.\n" 581 << "BOLT-INFO: Removing all identical functions will save " 582 << format("%.2lf", (double)BytesSavedEstimate / 1024) 583 << " KB of code space. Folded functions were called " 584 << CallsSavedEstimate << " times based on profile.\n"; 585 } 586 } 587 588 } // namespace bolt 589 } // namespace llvm 590