1 //===--- IdenticalCodeFolding.cpp -----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Passes/IdenticalCodeFolding.h" 12 #include "bolt/Core/ParallelUtilities.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/ThreadPool.h" 15 #include "llvm/Support/Timer.h" 16 #include <atomic> 17 #include <map> 18 #include <set> 19 #include <unordered_map> 20 21 #define DEBUG_TYPE "bolt-icf" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 static cl::opt<bool> 31 UseDFS("icf-dfs", 32 cl::desc("use DFS ordering when using -icf option"), 33 cl::ReallyHidden, 34 cl::ZeroOrMore, 35 cl::cat(BoltOptCategory)); 36 37 static cl::opt<bool> 38 TimeICF("time-icf", 39 cl::desc("time icf steps"), 40 cl::ReallyHidden, 41 cl::ZeroOrMore, 42 cl::cat(BoltOptCategory)); 43 } // namespace opts 44 45 namespace { 46 using JumpTable = bolt::JumpTable; 47 48 /// Compare two jump tables in 2 functions. The function relies on consistent 49 /// ordering of basic blocks in both binary functions (e.g. DFS). 50 bool equalJumpTables(const JumpTable &JumpTableA, 51 const JumpTable &JumpTableB, 52 const BinaryFunction &FunctionA, 53 const BinaryFunction &FunctionB) { 54 if (JumpTableA.EntrySize != JumpTableB.EntrySize) 55 return false; 56 57 if (JumpTableA.Type != JumpTableB.Type) 58 return false; 59 60 if (JumpTableA.getSize() != JumpTableB.getSize()) 61 return false; 62 63 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { 64 const MCSymbol *LabelA = JumpTableA.Entries[Index]; 65 const MCSymbol *LabelB = JumpTableB.Entries[Index]; 66 67 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA); 68 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB); 69 70 if (!TargetA || !TargetB) { 71 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && 72 "no target basic block found"); 73 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && 74 "no target basic block found"); 75 76 if (TargetA != TargetB) 77 return false; 78 79 continue; 80 } 81 82 assert(TargetA && TargetB && "cannot locate target block(s)"); 83 84 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) 85 return false; 86 } 87 88 return true; 89 } 90 91 /// Helper function that compares an instruction of this function to the 92 /// given instruction of the given function. The functions should have 93 /// identical CFG. 94 template <class Compare> 95 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA, 96 const MCInst &InstB, const BinaryBasicBlock &BBB, 97 Compare Comp) { 98 if (InstA.getOpcode() != InstB.getOpcode()) { 99 return false; 100 } 101 102 const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); 103 104 // In this function we check for special conditions: 105 // 106 // * instructions with landing pads 107 // 108 // Most of the common cases should be handled by MCPlus::equals() 109 // that compares regular instruction operands. 110 // 111 // NB: there's no need to compare jump table indirect jump instructions 112 // separately as jump tables are handled by comparing corresponding 113 // symbols. 114 const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA); 115 const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB); 116 117 if (EHInfoA || EHInfoB) { 118 if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) 119 return false; 120 121 if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) 122 return false; 123 124 if (EHInfoA && EHInfoB) { 125 // Action indices should match. 126 if (EHInfoA->second != EHInfoB->second) 127 return false; 128 129 if (!EHInfoA->first != !EHInfoB->first) 130 return false; 131 132 if (EHInfoA->first && EHInfoB->first) { 133 const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first); 134 const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first); 135 assert(LPA && LPB && "cannot locate landing pad(s)"); 136 137 if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) 138 return false; 139 } 140 } 141 } 142 143 return BC.MIB->equals(InstA, InstB, Comp); 144 } 145 146 147 /// Returns true if this function has identical code and CFG with 148 /// the given function \p BF. 149 /// 150 /// If \p CongruentSymbols is set to true, then symbolic operands that reference 151 /// potentially identical but different functions are ignored during the 152 /// comparison. 153 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, 154 bool CongruentSymbols) { 155 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG"); 156 157 // Compare the two functions, one basic block at a time. 158 // Currently we require two identical basic blocks to have identical 159 // instruction sequences and the same index in their corresponding 160 // functions. The latter is important for CFG equality. 161 162 if (A.layout_size() != B.layout_size()) 163 return false; 164 165 // Comparing multi-entry functions could be non-trivial. 166 if (A.isMultiEntry() || B.isMultiEntry()) 167 return false; 168 169 // Process both functions in either DFS or existing order. 170 const std::vector<BinaryBasicBlock *> &OrderA = 171 opts::UseDFS ? A.dfs() : A.getLayout(); 172 const std::vector<BinaryBasicBlock *> &OrderB = 173 opts::UseDFS ? B.dfs() : B.getLayout(); 174 175 const BinaryContext &BC = A.getBinaryContext(); 176 177 auto BBI = OrderB.begin(); 178 for (const BinaryBasicBlock *BB : OrderA) { 179 const BinaryBasicBlock *OtherBB = *BBI; 180 181 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) 182 return false; 183 184 // Compare successor basic blocks. 185 // NOTE: the comparison for jump tables is only partially verified here. 186 if (BB->succ_size() != OtherBB->succ_size()) 187 return false; 188 189 auto SuccBBI = OtherBB->succ_begin(); 190 for (const BinaryBasicBlock *SuccBB : BB->successors()) { 191 const BinaryBasicBlock *SuccOtherBB = *SuccBBI; 192 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) 193 return false; 194 ++SuccBBI; 195 } 196 197 // Compare all instructions including pseudos. 198 auto I = BB->begin(), E = BB->end(); 199 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); 200 while (I != E && OtherI != OtherE) { 201 // Compare symbols. 202 auto AreSymbolsIdentical = [&] (const MCSymbol *SymbolA, 203 const MCSymbol *SymbolB) { 204 if (SymbolA == SymbolB) 205 return true; 206 207 // All local symbols are considered identical since they affect a 208 // control flow and we check the control flow separately. 209 // If a local symbol is escaped, then the function (potentially) has 210 // multiple entry points and we exclude such functions from 211 // comparison. 212 if (SymbolA->isTemporary() && SymbolB->isTemporary()) 213 return true; 214 215 // Compare symbols as functions. 216 uint64_t EntryIDA = 0; 217 uint64_t EntryIDB = 0; 218 const BinaryFunction *FunctionA = 219 BC.getFunctionForSymbol(SymbolA, &EntryIDA); 220 const BinaryFunction *FunctionB = 221 BC.getFunctionForSymbol(SymbolB, &EntryIDB); 222 if (FunctionA && EntryIDA) 223 FunctionA = nullptr; 224 if (FunctionB && EntryIDB) 225 FunctionB = nullptr; 226 if (FunctionA && FunctionB) { 227 // Self-referencing functions and recursive calls. 228 if (FunctionA == &A && FunctionB == &B) 229 return true; 230 231 // Functions with different hash values can never become identical, 232 // hence A and B are different. 233 if (CongruentSymbols) 234 return FunctionA->getHash() == FunctionB->getHash(); 235 236 return FunctionA == FunctionB; 237 } 238 239 // One of the symbols represents a function, the other one does not. 240 if (FunctionA != FunctionB) { 241 return false; 242 } 243 244 // Check if symbols are jump tables. 245 const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName()); 246 if (!SIA) 247 return false; 248 const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName()); 249 if (!SIB) 250 return false; 251 252 assert((SIA->getAddress() != SIB->getAddress()) && 253 "different symbols should not have the same value"); 254 255 const JumpTable *JumpTableA = 256 A.getJumpTableContainingAddress(SIA->getAddress()); 257 if (!JumpTableA) 258 return false; 259 260 const JumpTable *JumpTableB = 261 B.getJumpTableContainingAddress(SIB->getAddress()); 262 if (!JumpTableB) 263 return false; 264 265 if ((SIA->getAddress() - JumpTableA->getAddress()) != 266 (SIB->getAddress() - JumpTableB->getAddress())) 267 return false; 268 269 return equalJumpTables(*JumpTableA, *JumpTableB, A, B); 270 }; 271 272 if(!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, 273 AreSymbolsIdentical)) { 274 return false; 275 } 276 277 ++I; ++OtherI; 278 } 279 280 // One of the identical blocks may have a trailing unconditional jump that 281 // is ignored for CFG purposes. 282 const MCInst *TrailingInstr = 283 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0)); 284 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) { 285 return false; 286 } 287 288 ++BBI; 289 } 290 291 // Compare exceptions action tables. 292 if (A.getLSDAActionTable() != B.getLSDAActionTable() || 293 A.getLSDATypeTable() != B.getLSDATypeTable() || 294 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) { 295 return false; 296 } 297 298 return true; 299 } 300 301 // This hash table is used to identify identical functions. It maps 302 // a function to a bucket of functions identical to it. 303 struct KeyHash { 304 size_t operator()(const BinaryFunction *F) const { 305 return F->getHash(); 306 } 307 }; 308 309 /// Identify two congruent functions. Two functions are considered congruent, 310 /// if they are identical/equal except for some of their instruction operands 311 /// that reference potentially identical functions, i.e. functions that could 312 /// be folded later. Congruent functions are candidates for folding in our 313 /// iterative ICF algorithm. 314 /// 315 /// Congruent functions are required to have identical hash. 316 struct KeyCongruent { 317 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 318 if (A == B) 319 return true; 320 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true); 321 } 322 }; 323 324 struct KeyEqual { 325 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 326 if (A == B) 327 return true; 328 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false); 329 } 330 }; 331 332 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, 333 KeyHash, KeyCongruent> 334 CongruentBucketsMap; 335 336 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, 337 KeyHash, KeyEqual> 338 IdenticalBucketsMap; 339 340 std::string hashInteger(uint64_t Value) { 341 std::string HashString; 342 if (Value == 0) { 343 HashString.push_back(0); 344 } 345 while (Value) { 346 uint8_t LSB = Value & 0xff; 347 HashString.push_back(LSB); 348 Value >>= 8; 349 } 350 351 return HashString; 352 } 353 354 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 355 std::string HashString; 356 357 // Ignore function references. 358 if (BC.getFunctionForSymbol(&Symbol)) 359 return HashString; 360 361 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 362 if (!ErrorOrValue) 363 return HashString; 364 365 // Ignore jump table references. 366 if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 367 return HashString; 368 369 return HashString.append(hashInteger(*ErrorOrValue)); 370 } 371 372 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 373 switch (Expr.getKind()) { 374 case MCExpr::Constant: 375 return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 376 case MCExpr::SymbolRef: 377 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 378 case MCExpr::Unary: { 379 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 380 return hashInteger(UnaryExpr.getOpcode()) 381 .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 382 } 383 case MCExpr::Binary: { 384 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 385 return hashExpr(BC, *BinaryExpr.getLHS()) 386 .append(hashInteger(BinaryExpr.getOpcode())) 387 .append(hashExpr(BC, *BinaryExpr.getRHS())); 388 } 389 case MCExpr::Target: 390 return std::string(); 391 } 392 393 llvm_unreachable("invalid expression kind"); 394 } 395 396 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 397 if (Operand.isImm()) { 398 return hashInteger(Operand.getImm()); 399 } else if (Operand.isReg()) { 400 return hashInteger(Operand.getReg()); 401 } else if (Operand.isExpr()) { 402 return hashExpr(BC, *Operand.getExpr()); 403 } 404 405 return std::string(); 406 } 407 408 } // namespace 409 410 namespace llvm { 411 namespace bolt { 412 413 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { 414 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); 415 uint64_t NumFunctionsFolded = 0; 416 std::atomic<uint64_t> NumJTFunctionsFolded{0}; 417 std::atomic<uint64_t> BytesSavedEstimate{0}; 418 std::atomic<uint64_t> CallsSavedEstimate{0}; 419 std::atomic<uint64_t> NumFoldedLastIteration{0}; 420 CongruentBucketsMap CongruentBuckets; 421 422 // Hash all the functions 423 auto hashFunctions = [&]() { 424 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown", 425 "ICF breakdown", opts::TimeICF); 426 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 427 // Make sure indices are in-order. 428 BF.updateLayoutIndices(); 429 430 // Pre-compute hash before pushing into hashtable. 431 // Hash instruction operands to minimize hash collisions. 432 BF.computeHash(opts::UseDFS, 433 [&BC] (const MCOperand &Op) { 434 return hashInstOperand(BC, Op); 435 }); 436 }; 437 438 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 439 return !shouldOptimize(BF); 440 }; 441 442 ParallelUtilities::runOnEachFunction( 443 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc, 444 "hashFunctions", /*ForceSequential*/ false, 2); 445 }; 446 447 // Creates buckets with congruent functions - functions that potentially 448 // could be folded. 449 auto createCongruentBuckets = [&]() { 450 NamedRegionTimer CongruentBucketsTimer("congruent buckets", 451 "congruent buckets", "ICF breakdown", 452 "ICF breakdown", opts::TimeICF); 453 for (auto &BFI : BC.getBinaryFunctions()) { 454 BinaryFunction &BF = BFI.second; 455 if (!this->shouldOptimize(BF)) 456 continue; 457 CongruentBuckets[&BF].emplace(&BF); 458 } 459 }; 460 461 // Partition each set of congruent functions into sets of identical functions 462 // and fold them 463 auto performFoldingPass = [&]() { 464 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes", 465 "ICF breakdown", "ICF breakdown", 466 opts::TimeICF); 467 Timer SinglePass("single fold pass", "single fold pass"); 468 LLVM_DEBUG(SinglePass.startTimer()); 469 470 ThreadPool *ThPool; 471 if (!opts::NoThreads) 472 ThPool = &ParallelUtilities::getThreadPool(); 473 474 // Fold identical functions within a single congruent bucket 475 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { 476 Timer T("folding single congruent list", "folding single congruent list"); 477 LLVM_DEBUG(T.startTimer()); 478 479 // Identical functions go into the same bucket. 480 IdenticalBucketsMap IdenticalBuckets; 481 for (BinaryFunction *BF : Candidates) { 482 IdenticalBuckets[BF].emplace_back(BF); 483 } 484 485 for (auto &IBI : IdenticalBuckets) { 486 // Functions identified as identical. 487 std::vector<BinaryFunction *> &Twins = IBI.second; 488 if (Twins.size() < 2) 489 continue; 490 491 // Fold functions. Keep the order consistent across invocations with 492 // different options. 493 std::stable_sort(Twins.begin(), Twins.end(), 494 [](const BinaryFunction *A, const BinaryFunction *B) { 495 return A->getFunctionNumber() < 496 B->getFunctionNumber(); 497 }); 498 499 BinaryFunction *ParentBF = Twins[0]; 500 for (unsigned I = 1; I < Twins.size(); ++I) { 501 BinaryFunction *ChildBF = Twins[I]; 502 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " 503 << *ParentBF << '\n'); 504 505 // Remove child function from the list of candidates. 506 auto FI = Candidates.find(ChildBF); 507 assert(FI != Candidates.end() && 508 "function expected to be in the set"); 509 Candidates.erase(FI); 510 511 // Fold the function and remove from the list of processed functions. 512 BytesSavedEstimate += ChildBF->getSize(); 513 CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(), 514 ParentBF->getKnownExecutionCount()); 515 BC.foldFunction(*ChildBF, *ParentBF); 516 517 ++NumFoldedLastIteration; 518 519 if (ParentBF->hasJumpTables()) 520 ++NumJTFunctionsFolded; 521 } 522 } 523 524 LLVM_DEBUG(T.stopTimer()); 525 }; 526 527 // Create a task for each congruent bucket 528 for (auto &Entry : CongruentBuckets) { 529 std::set<BinaryFunction *> &Bucket = Entry.second; 530 if (Bucket.size() < 2) 531 continue; 532 533 if (opts::NoThreads) 534 processSingleBucket(Bucket); 535 else 536 ThPool->async(processSingleBucket, std::ref(Bucket)); 537 } 538 539 if (!opts::NoThreads) 540 ThPool->wait(); 541 542 LLVM_DEBUG(SinglePass.stopTimer()); 543 }; 544 545 hashFunctions(); 546 createCongruentBuckets(); 547 548 unsigned Iteration = 1; 549 // We repeat the pass until no new modifications happen. 550 do { 551 NumFoldedLastIteration = 0; 552 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n"); 553 554 performFoldingPass(); 555 556 NumFunctionsFolded += NumFoldedLastIteration; 557 ++Iteration; 558 559 } while (NumFoldedLastIteration > 0); 560 561 LLVM_DEBUG({ 562 // Print functions that are congruent but not identical. 563 for (auto &CBI : CongruentBuckets) { 564 std::set<BinaryFunction *> &Candidates = CBI.second; 565 if (Candidates.size() < 2) 566 continue; 567 dbgs() << "BOLT-DEBUG: the following " << Candidates.size() 568 << " functions (each of size " << (*Candidates.begin())->getSize() 569 << " bytes) are congruent but not identical:\n"; 570 for (BinaryFunction *BF : Candidates) { 571 dbgs() << " " << *BF; 572 if (BF->getKnownExecutionCount()) { 573 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)"; 574 } 575 dbgs() << '\n'; 576 } 577 } 578 }); 579 580 if (NumFunctionsFolded) { 581 outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded 582 << " out of " << OriginalFunctionCount << " functions in " 583 << Iteration << " passes. " 584 << NumJTFunctionsFolded << " functions had jump tables.\n" 585 << "BOLT-INFO: Removing all identical functions will save " 586 << format("%.2lf", (double) BytesSavedEstimate / 1024) 587 << " KB of code space. Folded functions were called " 588 << CallsSavedEstimate << " times based on profile.\n"; 589 } 590 } 591 592 } // namespace bolt 593 } // namespace llvm 594