xref: /llvm-project/bolt/lib/Passes/IdenticalCodeFolding.cpp (revision d5c03def2465735ffc5f446eba728fc64408b8c1)
1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdenticalCodeFolding class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/IdenticalCodeFolding.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/ThreadPool.h"
18 #include "llvm/Support/Timer.h"
19 #include <atomic>
20 #include <iterator>
21 #include <map>
22 #include <set>
23 #include <unordered_map>
24 
25 #define DEBUG_TYPE "bolt-icf"
26 
27 using namespace llvm;
28 using namespace bolt;
29 
30 namespace opts {
31 
32 extern cl::OptionCategory BoltOptCategory;
33 
34 static cl::opt<bool> UseDFS("icf-dfs",
35                             cl::desc("use DFS ordering when using -icf option"),
36                             cl::ReallyHidden, cl::cat(BoltOptCategory));
37 
38 static cl::opt<bool>
39 TimeICF("time-icf",
40   cl::desc("time icf steps"),
41   cl::ReallyHidden,
42   cl::ZeroOrMore,
43   cl::cat(BoltOptCategory));
44 } // namespace opts
45 
46 namespace {
47 using JumpTable = bolt::JumpTable;
48 
49 /// Compare two jump tables in 2 functions. The function relies on consistent
50 /// ordering of basic blocks in both binary functions (e.g. DFS).
51 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB,
52                      const BinaryFunction &FunctionA,
53                      const BinaryFunction &FunctionB) {
54   if (JumpTableA.EntrySize != JumpTableB.EntrySize)
55     return false;
56 
57   if (JumpTableA.Type != JumpTableB.Type)
58     return false;
59 
60   if (JumpTableA.getSize() != JumpTableB.getSize())
61     return false;
62 
63   for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
64     const MCSymbol *LabelA = JumpTableA.Entries[Index];
65     const MCSymbol *LabelB = JumpTableB.Entries[Index];
66 
67     const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
68     const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
69 
70     if (!TargetA || !TargetB) {
71       assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
72              "no target basic block found");
73       assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
74              "no target basic block found");
75 
76       if (TargetA != TargetB)
77         return false;
78 
79       continue;
80     }
81 
82     assert(TargetA && TargetB && "cannot locate target block(s)");
83 
84     if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
85       return false;
86   }
87 
88   return true;
89 }
90 
91 /// Helper function that compares an instruction of this function to the
92 /// given instruction of the given function. The functions should have
93 /// identical CFG.
94 template <class Compare>
95 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
96                            const MCInst &InstB, const BinaryBasicBlock &BBB,
97                            Compare Comp) {
98   if (InstA.getOpcode() != InstB.getOpcode())
99     return false;
100 
101   const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
102 
103   // In this function we check for special conditions:
104   //
105   //    * instructions with landing pads
106   //
107   // Most of the common cases should be handled by MCPlus::equals()
108   // that compares regular instruction operands.
109   //
110   // NB: there's no need to compare jump table indirect jump instructions
111   //     separately as jump tables are handled by comparing corresponding
112   //     symbols.
113   const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
114   const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
115 
116   if (EHInfoA || EHInfoB) {
117     if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
118       return false;
119 
120     if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
121       return false;
122 
123     if (EHInfoA && EHInfoB) {
124       // Action indices should match.
125       if (EHInfoA->second != EHInfoB->second)
126         return false;
127 
128       if (!EHInfoA->first != !EHInfoB->first)
129         return false;
130 
131       if (EHInfoA->first && EHInfoB->first) {
132         const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
133         const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
134         assert(LPA && LPB && "cannot locate landing pad(s)");
135 
136         if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
137           return false;
138       }
139     }
140   }
141 
142   return BC.MIB->equals(InstA, InstB, Comp);
143 }
144 
145 /// Returns true if this function has identical code and CFG with
146 /// the given function \p BF.
147 ///
148 /// If \p CongruentSymbols is set to true, then symbolic operands that reference
149 /// potentially identical but different functions are ignored during the
150 /// comparison.
151 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
152                      bool CongruentSymbols) {
153   assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
154 
155   // Compare the two functions, one basic block at a time.
156   // Currently we require two identical basic blocks to have identical
157   // instruction sequences and the same index in their corresponding
158   // functions. The latter is important for CFG equality.
159 
160   if (A.getLayout().block_size() != B.getLayout().block_size())
161     return false;
162 
163   // Comparing multi-entry functions could be non-trivial.
164   if (A.isMultiEntry() || B.isMultiEntry())
165     return false;
166 
167   if (A.hasIslandsInfo() || B.hasIslandsInfo())
168     return false;
169 
170   // Process both functions in either DFS or existing order.
171   SmallVector<const BinaryBasicBlock *, 0> OrderA;
172   SmallVector<const BinaryBasicBlock *, 0> OrderB;
173   if (opts::UseDFS) {
174     copy(A.dfs(), std::back_inserter(OrderA));
175     copy(B.dfs(), std::back_inserter(OrderB));
176   } else {
177     copy(A.getLayout().blocks(), std::back_inserter(OrderA));
178     copy(B.getLayout().blocks(), std::back_inserter(OrderB));
179   }
180 
181   const BinaryContext &BC = A.getBinaryContext();
182 
183   auto BBI = OrderB.begin();
184   for (const BinaryBasicBlock *BB : OrderA) {
185     const BinaryBasicBlock *OtherBB = *BBI;
186 
187     if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
188       return false;
189 
190     // Compare successor basic blocks.
191     // NOTE: the comparison for jump tables is only partially verified here.
192     if (BB->succ_size() != OtherBB->succ_size())
193       return false;
194 
195     auto SuccBBI = OtherBB->succ_begin();
196     for (const BinaryBasicBlock *SuccBB : BB->successors()) {
197       const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
198       if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
199         return false;
200       ++SuccBBI;
201     }
202 
203     // Compare all instructions including pseudos.
204     auto I = BB->begin(), E = BB->end();
205     auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
206     while (I != E && OtherI != OtherE) {
207       // Compare symbols.
208       auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
209                                      const MCSymbol *SymbolB) {
210         if (SymbolA == SymbolB)
211           return true;
212 
213         // All local symbols are considered identical since they affect a
214         // control flow and we check the control flow separately.
215         // If a local symbol is escaped, then the function (potentially) has
216         // multiple entry points and we exclude such functions from
217         // comparison.
218         if (SymbolA->isTemporary() && SymbolB->isTemporary())
219           return true;
220 
221         // Compare symbols as functions.
222         uint64_t EntryIDA = 0;
223         uint64_t EntryIDB = 0;
224         const BinaryFunction *FunctionA =
225             BC.getFunctionForSymbol(SymbolA, &EntryIDA);
226         const BinaryFunction *FunctionB =
227             BC.getFunctionForSymbol(SymbolB, &EntryIDB);
228         if (FunctionA && EntryIDA)
229           FunctionA = nullptr;
230         if (FunctionB && EntryIDB)
231           FunctionB = nullptr;
232         if (FunctionA && FunctionB) {
233           // Self-referencing functions and recursive calls.
234           if (FunctionA == &A && FunctionB == &B)
235             return true;
236 
237           // Functions with different hash values can never become identical,
238           // hence A and B are different.
239           if (CongruentSymbols)
240             return FunctionA->getHash() == FunctionB->getHash();
241 
242           return FunctionA == FunctionB;
243         }
244 
245         // One of the symbols represents a function, the other one does not.
246         if (FunctionA != FunctionB)
247           return false;
248 
249         // Check if symbols are jump tables.
250         const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
251         if (!SIA)
252           return false;
253         const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
254         if (!SIB)
255           return false;
256 
257         assert((SIA->getAddress() != SIB->getAddress()) &&
258                "different symbols should not have the same value");
259 
260         const JumpTable *JumpTableA =
261             A.getJumpTableContainingAddress(SIA->getAddress());
262         if (!JumpTableA)
263           return false;
264 
265         const JumpTable *JumpTableB =
266             B.getJumpTableContainingAddress(SIB->getAddress());
267         if (!JumpTableB)
268           return false;
269 
270         if ((SIA->getAddress() - JumpTableA->getAddress()) !=
271             (SIB->getAddress() - JumpTableB->getAddress()))
272           return false;
273 
274         return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
275       };
276 
277       if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
278                                  AreSymbolsIdentical))
279         return false;
280 
281       ++I;
282       ++OtherI;
283     }
284 
285     // One of the identical blocks may have a trailing unconditional jump that
286     // is ignored for CFG purposes.
287     const MCInst *TrailingInstr =
288         (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr));
289     if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr))
290       return false;
291 
292     ++BBI;
293   }
294 
295   // Compare exceptions action tables.
296   if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
297       A.getLSDATypeTable() != B.getLSDATypeTable() ||
298       A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable())
299     return false;
300 
301   return true;
302 }
303 
304 // This hash table is used to identify identical functions. It maps
305 // a function to a bucket of functions identical to it.
306 struct KeyHash {
307   size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
308 };
309 
310 /// Identify two congruent functions. Two functions are considered congruent,
311 /// if they are identical/equal except for some of their instruction operands
312 /// that reference potentially identical functions, i.e. functions that could
313 /// be folded later. Congruent functions are candidates for folding in our
314 /// iterative ICF algorithm.
315 ///
316 /// Congruent functions are required to have identical hash.
317 struct KeyCongruent {
318   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
319     if (A == B)
320       return true;
321     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true);
322   }
323 };
324 
325 struct KeyEqual {
326   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
327     if (A == B)
328       return true;
329     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false);
330   }
331 };
332 
333 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
334                            KeyHash, KeyCongruent>
335     CongruentBucketsMap;
336 
337 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
338                            KeyHash, KeyEqual>
339     IdenticalBucketsMap;
340 
341 std::string hashInteger(uint64_t Value) {
342   std::string HashString;
343   if (Value == 0)
344     HashString.push_back(0);
345 
346   while (Value) {
347     uint8_t LSB = Value & 0xff;
348     HashString.push_back(LSB);
349     Value >>= 8;
350   }
351 
352   return HashString;
353 }
354 
355 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
356   std::string HashString;
357 
358   // Ignore function references.
359   if (BC.getFunctionForSymbol(&Symbol))
360     return HashString;
361 
362   llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
363   if (!ErrorOrValue)
364     return HashString;
365 
366   // Ignore jump table references.
367   if (BC.getJumpTableContainingAddress(*ErrorOrValue))
368     return HashString;
369 
370   return HashString.append(hashInteger(*ErrorOrValue));
371 }
372 
373 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
374   switch (Expr.getKind()) {
375   case MCExpr::Constant:
376     return hashInteger(cast<MCConstantExpr>(Expr).getValue());
377   case MCExpr::SymbolRef:
378     return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
379   case MCExpr::Unary: {
380     const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
381     return hashInteger(UnaryExpr.getOpcode())
382         .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
383   }
384   case MCExpr::Binary: {
385     const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
386     return hashExpr(BC, *BinaryExpr.getLHS())
387         .append(hashInteger(BinaryExpr.getOpcode()))
388         .append(hashExpr(BC, *BinaryExpr.getRHS()));
389   }
390   case MCExpr::Target:
391     return std::string();
392   }
393 
394   llvm_unreachable("invalid expression kind");
395 }
396 
397 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
398   if (Operand.isImm())
399     return hashInteger(Operand.getImm());
400   if (Operand.isReg())
401     return hashInteger(Operand.getReg());
402   if (Operand.isExpr())
403     return hashExpr(BC, *Operand.getExpr());
404 
405   return std::string();
406 }
407 
408 } // namespace
409 
410 namespace llvm {
411 namespace bolt {
412 
413 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
414   const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
415   uint64_t NumFunctionsFolded = 0;
416   std::atomic<uint64_t> NumJTFunctionsFolded{0};
417   std::atomic<uint64_t> BytesSavedEstimate{0};
418   std::atomic<uint64_t> CallsSavedEstimate{0};
419   std::atomic<uint64_t> NumFoldedLastIteration{0};
420   CongruentBucketsMap CongruentBuckets;
421 
422   // Hash all the functions
423   auto hashFunctions = [&]() {
424     NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
425                                         "ICF breakdown", opts::TimeICF);
426     ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
427       // Make sure indices are in-order.
428       BF.getLayout().updateLayoutIndices();
429 
430       // Pre-compute hash before pushing into hashtable.
431       // Hash instruction operands to minimize hash collisions.
432       BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) {
433         return hashInstOperand(BC, Op);
434       });
435     };
436 
437     ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
438       return !shouldOptimize(BF);
439     };
440 
441     ParallelUtilities::runOnEachFunction(
442         BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
443         "hashFunctions", /*ForceSequential*/ false, 2);
444   };
445 
446   // Creates buckets with congruent functions - functions that potentially
447   // could  be folded.
448   auto createCongruentBuckets = [&]() {
449     NamedRegionTimer CongruentBucketsTimer("congruent buckets",
450                                            "congruent buckets", "ICF breakdown",
451                                            "ICF breakdown", opts::TimeICF);
452     for (auto &BFI : BC.getBinaryFunctions()) {
453       BinaryFunction &BF = BFI.second;
454       if (!this->shouldOptimize(BF))
455         continue;
456       CongruentBuckets[&BF].emplace(&BF);
457     }
458   };
459 
460   // Partition each set of congruent functions into sets of identical functions
461   // and fold them
462   auto performFoldingPass = [&]() {
463     NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
464                                         "ICF breakdown", "ICF breakdown",
465                                         opts::TimeICF);
466     Timer SinglePass("single fold pass", "single fold pass");
467     LLVM_DEBUG(SinglePass.startTimer());
468 
469     ThreadPool *ThPool;
470     if (!opts::NoThreads)
471       ThPool = &ParallelUtilities::getThreadPool();
472 
473     // Fold identical functions within a single congruent bucket
474     auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
475       Timer T("folding single congruent list", "folding single congruent list");
476       LLVM_DEBUG(T.startTimer());
477 
478       // Identical functions go into the same bucket.
479       IdenticalBucketsMap IdenticalBuckets;
480       for (BinaryFunction *BF : Candidates) {
481         IdenticalBuckets[BF].emplace_back(BF);
482       }
483 
484       for (auto &IBI : IdenticalBuckets) {
485         // Functions identified as identical.
486         std::vector<BinaryFunction *> &Twins = IBI.second;
487         if (Twins.size() < 2)
488           continue;
489 
490         // Fold functions. Keep the order consistent across invocations with
491         // different options.
492         llvm::stable_sort(
493             Twins, [](const BinaryFunction *A, const BinaryFunction *B) {
494               return A->getFunctionNumber() < B->getFunctionNumber();
495             });
496 
497         BinaryFunction *ParentBF = Twins[0];
498         for (unsigned I = 1; I < Twins.size(); ++I) {
499           BinaryFunction *ChildBF = Twins[I];
500           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
501                             << *ParentBF << '\n');
502 
503           // Remove child function from the list of candidates.
504           auto FI = Candidates.find(ChildBF);
505           assert(FI != Candidates.end() &&
506                  "function expected to be in the set");
507           Candidates.erase(FI);
508 
509           // Fold the function and remove from the list of processed functions.
510           BytesSavedEstimate += ChildBF->getSize();
511           CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(),
512                                          ParentBF->getKnownExecutionCount());
513           BC.foldFunction(*ChildBF, *ParentBF);
514 
515           ++NumFoldedLastIteration;
516 
517           if (ParentBF->hasJumpTables())
518             ++NumJTFunctionsFolded;
519         }
520       }
521 
522       LLVM_DEBUG(T.stopTimer());
523     };
524 
525     // Create a task for each congruent bucket
526     for (auto &Entry : CongruentBuckets) {
527       std::set<BinaryFunction *> &Bucket = Entry.second;
528       if (Bucket.size() < 2)
529         continue;
530 
531       if (opts::NoThreads)
532         processSingleBucket(Bucket);
533       else
534         ThPool->async(processSingleBucket, std::ref(Bucket));
535     }
536 
537     if (!opts::NoThreads)
538       ThPool->wait();
539 
540     LLVM_DEBUG(SinglePass.stopTimer());
541   };
542 
543   hashFunctions();
544   createCongruentBuckets();
545 
546   unsigned Iteration = 1;
547   // We repeat the pass until no new modifications happen.
548   do {
549     NumFoldedLastIteration = 0;
550     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
551 
552     performFoldingPass();
553 
554     NumFunctionsFolded += NumFoldedLastIteration;
555     ++Iteration;
556 
557   } while (NumFoldedLastIteration > 0);
558 
559   LLVM_DEBUG({
560     // Print functions that are congruent but not identical.
561     for (auto &CBI : CongruentBuckets) {
562       std::set<BinaryFunction *> &Candidates = CBI.second;
563       if (Candidates.size() < 2)
564         continue;
565       dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
566              << " functions (each of size " << (*Candidates.begin())->getSize()
567              << " bytes) are congruent but not identical:\n";
568       for (BinaryFunction *BF : Candidates) {
569         dbgs() << "  " << *BF;
570         if (BF->getKnownExecutionCount())
571           dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
572         dbgs() << '\n';
573       }
574     }
575   });
576 
577   if (NumFunctionsFolded)
578     outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
579            << OriginalFunctionCount << " functions in " << Iteration
580            << " passes. " << NumJTFunctionsFolded
581            << " functions had jump tables.\n"
582            << "BOLT-INFO: Removing all identical functions will save "
583            << format("%.2lf", (double)BytesSavedEstimate / 1024)
584            << " KB of code space. Folded functions were called "
585            << CallsSavedEstimate << " times based on profile.\n";
586 }
587 
588 } // namespace bolt
589 } // namespace llvm
590