xref: /llvm-project/bolt/lib/Passes/IdenticalCodeFolding.cpp (revision a34c753fe709a624f5b087397fb05adeac2311e4)
1 //===--- IdenticalCodeFolding.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Passes/IdenticalCodeFolding.h"
12 #include "bolt/Core/ParallelUtilities.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/ThreadPool.h"
15 #include "llvm/Support/Timer.h"
16 #include <atomic>
17 #include <map>
18 #include <set>
19 #include <unordered_map>
20 
21 #define DEBUG_TYPE "bolt-icf"
22 
23 using namespace llvm;
24 using namespace bolt;
25 
26 namespace opts {
27 
28 extern cl::OptionCategory BoltOptCategory;
29 
30 static cl::opt<bool>
31 UseDFS("icf-dfs",
32   cl::desc("use DFS ordering when using -icf option"),
33   cl::ReallyHidden,
34   cl::ZeroOrMore,
35   cl::cat(BoltOptCategory));
36 
37 static cl::opt<bool>
38 TimeICF("time-icf",
39   cl::desc("time icf steps"),
40   cl::ReallyHidden,
41   cl::ZeroOrMore,
42   cl::cat(BoltOptCategory));
43 } // namespace opts
44 
45 namespace {
46 using JumpTable = bolt::JumpTable;
47 
48 /// Compare two jump tables in 2 functions. The function relies on consistent
49 /// ordering of basic blocks in both binary functions (e.g. DFS).
50 bool equalJumpTables(const JumpTable &JumpTableA,
51                      const JumpTable &JumpTableB,
52                      const BinaryFunction &FunctionA,
53                      const BinaryFunction &FunctionB) {
54   if (JumpTableA.EntrySize != JumpTableB.EntrySize)
55     return false;
56 
57   if (JumpTableA.Type != JumpTableB.Type)
58     return false;
59 
60   if (JumpTableA.getSize() != JumpTableB.getSize())
61     return false;
62 
63   for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
64     const MCSymbol *LabelA = JumpTableA.Entries[Index];
65     const MCSymbol *LabelB = JumpTableB.Entries[Index];
66 
67     const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
68     const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
69 
70     if (!TargetA || !TargetB) {
71       assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
72              "no target basic block found");
73       assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
74              "no target basic block found");
75 
76       if (TargetA != TargetB)
77         return false;
78 
79       continue;
80     }
81 
82     assert(TargetA && TargetB && "cannot locate target block(s)");
83 
84     if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
85       return false;
86   }
87 
88   return true;
89 }
90 
91 /// Helper function that compares an instruction of this function to the
92 /// given instruction of the given function. The functions should have
93 /// identical CFG.
94 template <class Compare>
95 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
96                            const MCInst &InstB, const BinaryBasicBlock &BBB,
97                            Compare Comp) {
98   if (InstA.getOpcode() != InstB.getOpcode()) {
99     return false;
100   }
101 
102   const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
103 
104   // In this function we check for special conditions:
105   //
106   //    * instructions with landing pads
107   //
108   // Most of the common cases should be handled by MCPlus::equals()
109   // that compares regular instruction operands.
110   //
111   // NB: there's no need to compare jump table indirect jump instructions
112   //     separately as jump tables are handled by comparing corresponding
113   //     symbols.
114   const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
115   const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
116 
117   if (EHInfoA || EHInfoB) {
118     if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
119       return false;
120 
121     if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
122       return false;
123 
124     if (EHInfoA && EHInfoB) {
125       // Action indices should match.
126       if (EHInfoA->second != EHInfoB->second)
127         return false;
128 
129       if (!EHInfoA->first != !EHInfoB->first)
130         return false;
131 
132       if (EHInfoA->first && EHInfoB->first) {
133         const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
134         const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
135         assert(LPA && LPB && "cannot locate landing pad(s)");
136 
137         if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
138           return false;
139       }
140     }
141   }
142 
143   return BC.MIB->equals(InstA, InstB, Comp);
144 }
145 
146 
147 /// Returns true if this function has identical code and CFG with
148 /// the given function \p BF.
149 ///
150 /// If \p CongruentSymbols is set to true, then symbolic operands that reference
151 /// potentially identical but different functions are ignored during the
152 /// comparison.
153 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
154                      bool CongruentSymbols) {
155   assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
156 
157   // Compare the two functions, one basic block at a time.
158   // Currently we require two identical basic blocks to have identical
159   // instruction sequences and the same index in their corresponding
160   // functions. The latter is important for CFG equality.
161 
162   if (A.layout_size() != B.layout_size())
163     return false;
164 
165   // Comparing multi-entry functions could be non-trivial.
166   if (A.isMultiEntry() || B.isMultiEntry())
167     return false;
168 
169   // Process both functions in either DFS or existing order.
170   const std::vector<BinaryBasicBlock *> &OrderA =
171       opts::UseDFS ? A.dfs() : A.getLayout();
172   const std::vector<BinaryBasicBlock *> &OrderB =
173       opts::UseDFS ? B.dfs() : B.getLayout();
174 
175   const BinaryContext &BC = A.getBinaryContext();
176 
177   auto BBI = OrderB.begin();
178   for (const BinaryBasicBlock *BB : OrderA) {
179     const BinaryBasicBlock *OtherBB = *BBI;
180 
181     if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
182       return false;
183 
184     // Compare successor basic blocks.
185     // NOTE: the comparison for jump tables is only partially verified here.
186     if (BB->succ_size() != OtherBB->succ_size())
187       return false;
188 
189     auto SuccBBI = OtherBB->succ_begin();
190     for (const BinaryBasicBlock *SuccBB : BB->successors()) {
191       const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
192       if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
193         return false;
194       ++SuccBBI;
195     }
196 
197     // Compare all instructions including pseudos.
198     auto I = BB->begin(), E = BB->end();
199     auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
200     while (I != E && OtherI != OtherE) {
201       // Compare symbols.
202       auto AreSymbolsIdentical = [&] (const MCSymbol *SymbolA,
203                                       const MCSymbol *SymbolB) {
204         if (SymbolA == SymbolB)
205           return true;
206 
207         // All local symbols are considered identical since they affect a
208         // control flow and we check the control flow separately.
209         // If a local symbol is escaped, then the function (potentially) has
210         // multiple entry points and we exclude such functions from
211         // comparison.
212         if (SymbolA->isTemporary() && SymbolB->isTemporary())
213           return true;
214 
215         // Compare symbols as functions.
216         uint64_t EntryIDA = 0;
217         uint64_t EntryIDB = 0;
218         const BinaryFunction *FunctionA =
219             BC.getFunctionForSymbol(SymbolA, &EntryIDA);
220         const BinaryFunction *FunctionB =
221             BC.getFunctionForSymbol(SymbolB, &EntryIDB);
222         if (FunctionA && EntryIDA)
223           FunctionA = nullptr;
224         if (FunctionB && EntryIDB)
225           FunctionB = nullptr;
226         if (FunctionA && FunctionB) {
227           // Self-referencing functions and recursive calls.
228           if (FunctionA == &A && FunctionB == &B)
229             return true;
230 
231           // Functions with different hash values can never become identical,
232           // hence A and B are different.
233           if (CongruentSymbols)
234             return FunctionA->getHash() == FunctionB->getHash();
235 
236           return FunctionA == FunctionB;
237         }
238 
239         // One of the symbols represents a function, the other one does not.
240         if (FunctionA != FunctionB) {
241           return false;
242         }
243 
244         // Check if symbols are jump tables.
245         const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
246         if (!SIA)
247           return false;
248         const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
249         if (!SIB)
250           return false;
251 
252         assert((SIA->getAddress() != SIB->getAddress()) &&
253                "different symbols should not have the same value");
254 
255         const JumpTable *JumpTableA =
256             A.getJumpTableContainingAddress(SIA->getAddress());
257         if (!JumpTableA)
258           return false;
259 
260         const JumpTable *JumpTableB =
261             B.getJumpTableContainingAddress(SIB->getAddress());
262         if (!JumpTableB)
263           return false;
264 
265         if ((SIA->getAddress() - JumpTableA->getAddress()) !=
266             (SIB->getAddress() - JumpTableB->getAddress()))
267           return false;
268 
269         return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
270       };
271 
272       if(!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
273                                 AreSymbolsIdentical)) {
274         return false;
275       }
276 
277       ++I; ++OtherI;
278     }
279 
280     // One of the identical blocks may have a trailing unconditional jump that
281     // is ignored for CFG purposes.
282     const MCInst *TrailingInstr =
283         (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0));
284     if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) {
285       return false;
286     }
287 
288     ++BBI;
289   }
290 
291   // Compare exceptions action tables.
292   if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
293       A.getLSDATypeTable() != B.getLSDATypeTable() ||
294       A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) {
295     return false;
296   }
297 
298   return true;
299 }
300 
301 // This hash table is used to identify identical functions. It maps
302 // a function to a bucket of functions identical to it.
303 struct KeyHash {
304   size_t operator()(const BinaryFunction *F) const {
305     return F->getHash();
306   }
307 };
308 
309 /// Identify two congruent functions. Two functions are considered congruent,
310 /// if they are identical/equal except for some of their instruction operands
311 /// that reference potentially identical functions, i.e. functions that could
312 /// be folded later. Congruent functions are candidates for folding in our
313 /// iterative ICF algorithm.
314 ///
315 /// Congruent functions are required to have identical hash.
316 struct KeyCongruent {
317   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
318     if (A == B)
319       return true;
320     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true);
321   }
322 };
323 
324 struct KeyEqual {
325   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
326     if (A == B)
327       return true;
328     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false);
329   }
330 };
331 
332 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
333                            KeyHash, KeyCongruent>
334     CongruentBucketsMap;
335 
336 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
337                            KeyHash, KeyEqual>
338     IdenticalBucketsMap;
339 
340 std::string hashInteger(uint64_t Value) {
341   std::string HashString;
342   if (Value == 0) {
343     HashString.push_back(0);
344   }
345   while (Value) {
346     uint8_t LSB = Value & 0xff;
347     HashString.push_back(LSB);
348     Value >>= 8;
349   }
350 
351   return HashString;
352 }
353 
354 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
355   std::string HashString;
356 
357   // Ignore function references.
358   if (BC.getFunctionForSymbol(&Symbol))
359     return HashString;
360 
361   llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
362   if (!ErrorOrValue)
363     return HashString;
364 
365   // Ignore jump table references.
366   if (BC.getJumpTableContainingAddress(*ErrorOrValue))
367     return HashString;
368 
369   return HashString.append(hashInteger(*ErrorOrValue));
370 }
371 
372 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
373   switch (Expr.getKind()) {
374   case MCExpr::Constant:
375     return hashInteger(cast<MCConstantExpr>(Expr).getValue());
376   case MCExpr::SymbolRef:
377     return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
378   case MCExpr::Unary: {
379     const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
380     return hashInteger(UnaryExpr.getOpcode())
381         .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
382   }
383   case MCExpr::Binary: {
384     const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
385     return hashExpr(BC, *BinaryExpr.getLHS())
386         .append(hashInteger(BinaryExpr.getOpcode()))
387         .append(hashExpr(BC, *BinaryExpr.getRHS()));
388   }
389   case MCExpr::Target:
390     return std::string();
391   }
392 
393   llvm_unreachable("invalid expression kind");
394 }
395 
396 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
397   if (Operand.isImm()) {
398     return hashInteger(Operand.getImm());
399   } else if (Operand.isReg()) {
400     return hashInteger(Operand.getReg());
401   } else if (Operand.isExpr()) {
402     return hashExpr(BC, *Operand.getExpr());
403   }
404 
405   return std::string();
406 }
407 
408 } // namespace
409 
410 namespace llvm {
411 namespace bolt {
412 
413 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
414   const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
415   uint64_t NumFunctionsFolded = 0;
416   std::atomic<uint64_t> NumJTFunctionsFolded{0};
417   std::atomic<uint64_t> BytesSavedEstimate{0};
418   std::atomic<uint64_t> CallsSavedEstimate{0};
419   std::atomic<uint64_t> NumFoldedLastIteration{0};
420   CongruentBucketsMap CongruentBuckets;
421 
422   // Hash all the functions
423   auto hashFunctions = [&]() {
424     NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
425                                         "ICF breakdown", opts::TimeICF);
426     ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
427       // Make sure indices are in-order.
428       BF.updateLayoutIndices();
429 
430       // Pre-compute hash before pushing into hashtable.
431       // Hash instruction operands to minimize hash collisions.
432       BF.computeHash(opts::UseDFS,
433                      [&BC] (const MCOperand &Op) {
434                        return hashInstOperand(BC, Op);
435                      });
436     };
437 
438     ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
439       return !shouldOptimize(BF);
440     };
441 
442     ParallelUtilities::runOnEachFunction(
443         BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
444         "hashFunctions", /*ForceSequential*/ false, 2);
445   };
446 
447   // Creates buckets with congruent functions - functions that potentially
448   // could  be folded.
449   auto createCongruentBuckets = [&]() {
450     NamedRegionTimer CongruentBucketsTimer("congruent buckets",
451                                            "congruent buckets", "ICF breakdown",
452                                            "ICF breakdown", opts::TimeICF);
453     for (auto &BFI : BC.getBinaryFunctions()) {
454       BinaryFunction &BF = BFI.second;
455       if (!this->shouldOptimize(BF))
456         continue;
457       CongruentBuckets[&BF].emplace(&BF);
458     }
459   };
460 
461   // Partition each set of congruent functions into sets of identical functions
462   // and fold them
463   auto performFoldingPass = [&]() {
464     NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
465                                         "ICF breakdown", "ICF breakdown",
466                                         opts::TimeICF);
467     Timer SinglePass("single fold pass", "single fold pass");
468     LLVM_DEBUG(SinglePass.startTimer());
469 
470     ThreadPool *ThPool;
471     if (!opts::NoThreads)
472       ThPool = &ParallelUtilities::getThreadPool();
473 
474     // Fold identical functions within a single congruent bucket
475     auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
476       Timer T("folding single congruent list", "folding single congruent list");
477       LLVM_DEBUG(T.startTimer());
478 
479       // Identical functions go into the same bucket.
480       IdenticalBucketsMap IdenticalBuckets;
481       for (BinaryFunction *BF : Candidates) {
482         IdenticalBuckets[BF].emplace_back(BF);
483       }
484 
485       for (auto &IBI : IdenticalBuckets) {
486         // Functions identified as identical.
487         std::vector<BinaryFunction *> &Twins = IBI.second;
488         if (Twins.size() < 2)
489           continue;
490 
491         // Fold functions. Keep the order consistent across invocations with
492         // different options.
493         std::stable_sort(Twins.begin(), Twins.end(),
494                          [](const BinaryFunction *A, const BinaryFunction *B) {
495                            return A->getFunctionNumber() <
496                                   B->getFunctionNumber();
497                          });
498 
499         BinaryFunction *ParentBF = Twins[0];
500         for (unsigned I = 1; I < Twins.size(); ++I) {
501           BinaryFunction *ChildBF = Twins[I];
502           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
503                             << *ParentBF << '\n');
504 
505           // Remove child function from the list of candidates.
506           auto FI = Candidates.find(ChildBF);
507           assert(FI != Candidates.end() &&
508                  "function expected to be in the set");
509           Candidates.erase(FI);
510 
511           // Fold the function and remove from the list of processed functions.
512           BytesSavedEstimate += ChildBF->getSize();
513           CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(),
514                                          ParentBF->getKnownExecutionCount());
515           BC.foldFunction(*ChildBF, *ParentBF);
516 
517           ++NumFoldedLastIteration;
518 
519           if (ParentBF->hasJumpTables())
520             ++NumJTFunctionsFolded;
521         }
522       }
523 
524       LLVM_DEBUG(T.stopTimer());
525     };
526 
527     // Create a task for each congruent bucket
528     for (auto &Entry : CongruentBuckets) {
529       std::set<BinaryFunction *> &Bucket = Entry.second;
530       if (Bucket.size() < 2)
531         continue;
532 
533       if (opts::NoThreads)
534         processSingleBucket(Bucket);
535       else
536         ThPool->async(processSingleBucket, std::ref(Bucket));
537     }
538 
539     if (!opts::NoThreads)
540       ThPool->wait();
541 
542     LLVM_DEBUG(SinglePass.stopTimer());
543   };
544 
545   hashFunctions();
546   createCongruentBuckets();
547 
548   unsigned Iteration = 1;
549   // We repeat the pass until no new modifications happen.
550   do {
551     NumFoldedLastIteration = 0;
552     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
553 
554     performFoldingPass();
555 
556     NumFunctionsFolded += NumFoldedLastIteration;
557     ++Iteration;
558 
559   } while (NumFoldedLastIteration > 0);
560 
561   LLVM_DEBUG({
562     // Print functions that are congruent but not identical.
563     for (auto &CBI : CongruentBuckets) {
564       std::set<BinaryFunction *> &Candidates = CBI.second;
565       if (Candidates.size() < 2)
566         continue;
567       dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
568              << " functions (each of size " << (*Candidates.begin())->getSize()
569              << " bytes) are congruent but not identical:\n";
570       for (BinaryFunction *BF : Candidates) {
571         dbgs() << "  " << *BF;
572         if (BF->getKnownExecutionCount()) {
573           dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
574         }
575         dbgs() << '\n';
576       }
577     }
578   });
579 
580   if (NumFunctionsFolded) {
581     outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded
582            << " out of " << OriginalFunctionCount << " functions in "
583            << Iteration << " passes. "
584            << NumJTFunctionsFolded << " functions had jump tables.\n"
585            << "BOLT-INFO: Removing all identical functions will save "
586            << format("%.2lf", (double) BytesSavedEstimate / 1024)
587            << " KB of code space. Folded functions were called "
588            << CallsSavedEstimate << " times based on profile.\n";
589   }
590 }
591 
592 } // namespace bolt
593 } // namespace llvm
594