xref: /llvm-project/bolt/lib/Passes/IdenticalCodeFolding.cpp (revision 40c2e0fafe5675306f4ad43910bf6e2fd2025ff3)
1 //===--- IdenticalCodeFolding.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Passes/IdenticalCodeFolding.h"
12 #include "bolt/Core/ParallelUtilities.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/ThreadPool.h"
15 #include "llvm/Support/Timer.h"
16 #include <atomic>
17 #include <map>
18 #include <set>
19 #include <unordered_map>
20 
21 #define DEBUG_TYPE "bolt-icf"
22 
23 using namespace llvm;
24 using namespace bolt;
25 
26 namespace opts {
27 
28 extern cl::OptionCategory BoltOptCategory;
29 
30 static cl::opt<bool>
31 UseDFS("icf-dfs",
32   cl::desc("use DFS ordering when using -icf option"),
33   cl::ReallyHidden,
34   cl::ZeroOrMore,
35   cl::cat(BoltOptCategory));
36 
37 static cl::opt<bool>
38 TimeICF("time-icf",
39   cl::desc("time icf steps"),
40   cl::ReallyHidden,
41   cl::ZeroOrMore,
42   cl::cat(BoltOptCategory));
43 } // namespace opts
44 
45 namespace {
46 using JumpTable = bolt::JumpTable;
47 
48 /// Compare two jump tables in 2 functions. The function relies on consistent
49 /// ordering of basic blocks in both binary functions (e.g. DFS).
50 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB,
51                      const BinaryFunction &FunctionA,
52                      const BinaryFunction &FunctionB) {
53   if (JumpTableA.EntrySize != JumpTableB.EntrySize)
54     return false;
55 
56   if (JumpTableA.Type != JumpTableB.Type)
57     return false;
58 
59   if (JumpTableA.getSize() != JumpTableB.getSize())
60     return false;
61 
62   for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
63     const MCSymbol *LabelA = JumpTableA.Entries[Index];
64     const MCSymbol *LabelB = JumpTableB.Entries[Index];
65 
66     const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
67     const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
68 
69     if (!TargetA || !TargetB) {
70       assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
71              "no target basic block found");
72       assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
73              "no target basic block found");
74 
75       if (TargetA != TargetB)
76         return false;
77 
78       continue;
79     }
80 
81     assert(TargetA && TargetB && "cannot locate target block(s)");
82 
83     if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
84       return false;
85   }
86 
87   return true;
88 }
89 
90 /// Helper function that compares an instruction of this function to the
91 /// given instruction of the given function. The functions should have
92 /// identical CFG.
93 template <class Compare>
94 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
95                            const MCInst &InstB, const BinaryBasicBlock &BBB,
96                            Compare Comp) {
97   if (InstA.getOpcode() != InstB.getOpcode()) {
98     return false;
99   }
100 
101   const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
102 
103   // In this function we check for special conditions:
104   //
105   //    * instructions with landing pads
106   //
107   // Most of the common cases should be handled by MCPlus::equals()
108   // that compares regular instruction operands.
109   //
110   // NB: there's no need to compare jump table indirect jump instructions
111   //     separately as jump tables are handled by comparing corresponding
112   //     symbols.
113   const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
114   const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
115 
116   if (EHInfoA || EHInfoB) {
117     if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
118       return false;
119 
120     if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
121       return false;
122 
123     if (EHInfoA && EHInfoB) {
124       // Action indices should match.
125       if (EHInfoA->second != EHInfoB->second)
126         return false;
127 
128       if (!EHInfoA->first != !EHInfoB->first)
129         return false;
130 
131       if (EHInfoA->first && EHInfoB->first) {
132         const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
133         const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
134         assert(LPA && LPB && "cannot locate landing pad(s)");
135 
136         if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
137           return false;
138       }
139     }
140   }
141 
142   return BC.MIB->equals(InstA, InstB, Comp);
143 }
144 
145 /// Returns true if this function has identical code and CFG with
146 /// the given function \p BF.
147 ///
148 /// If \p CongruentSymbols is set to true, then symbolic operands that reference
149 /// potentially identical but different functions are ignored during the
150 /// comparison.
151 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
152                      bool CongruentSymbols) {
153   assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
154 
155   // Compare the two functions, one basic block at a time.
156   // Currently we require two identical basic blocks to have identical
157   // instruction sequences and the same index in their corresponding
158   // functions. The latter is important for CFG equality.
159 
160   if (A.layout_size() != B.layout_size())
161     return false;
162 
163   // Comparing multi-entry functions could be non-trivial.
164   if (A.isMultiEntry() || B.isMultiEntry())
165     return false;
166 
167   // Process both functions in either DFS or existing order.
168   const BinaryFunction::BasicBlockOrderType &OrderA =
169       opts::UseDFS ? A.dfs() : A.getLayout();
170   const BinaryFunction::BasicBlockOrderType &OrderB =
171       opts::UseDFS ? B.dfs() : B.getLayout();
172 
173   const BinaryContext &BC = A.getBinaryContext();
174 
175   auto BBI = OrderB.begin();
176   for (const BinaryBasicBlock *BB : OrderA) {
177     const BinaryBasicBlock *OtherBB = *BBI;
178 
179     if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
180       return false;
181 
182     // Compare successor basic blocks.
183     // NOTE: the comparison for jump tables is only partially verified here.
184     if (BB->succ_size() != OtherBB->succ_size())
185       return false;
186 
187     auto SuccBBI = OtherBB->succ_begin();
188     for (const BinaryBasicBlock *SuccBB : BB->successors()) {
189       const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
190       if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
191         return false;
192       ++SuccBBI;
193     }
194 
195     // Compare all instructions including pseudos.
196     auto I = BB->begin(), E = BB->end();
197     auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
198     while (I != E && OtherI != OtherE) {
199       // Compare symbols.
200       auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
201                                      const MCSymbol *SymbolB) {
202         if (SymbolA == SymbolB)
203           return true;
204 
205         // All local symbols are considered identical since they affect a
206         // control flow and we check the control flow separately.
207         // If a local symbol is escaped, then the function (potentially) has
208         // multiple entry points and we exclude such functions from
209         // comparison.
210         if (SymbolA->isTemporary() && SymbolB->isTemporary())
211           return true;
212 
213         // Compare symbols as functions.
214         uint64_t EntryIDA = 0;
215         uint64_t EntryIDB = 0;
216         const BinaryFunction *FunctionA =
217             BC.getFunctionForSymbol(SymbolA, &EntryIDA);
218         const BinaryFunction *FunctionB =
219             BC.getFunctionForSymbol(SymbolB, &EntryIDB);
220         if (FunctionA && EntryIDA)
221           FunctionA = nullptr;
222         if (FunctionB && EntryIDB)
223           FunctionB = nullptr;
224         if (FunctionA && FunctionB) {
225           // Self-referencing functions and recursive calls.
226           if (FunctionA == &A && FunctionB == &B)
227             return true;
228 
229           // Functions with different hash values can never become identical,
230           // hence A and B are different.
231           if (CongruentSymbols)
232             return FunctionA->getHash() == FunctionB->getHash();
233 
234           return FunctionA == FunctionB;
235         }
236 
237         // One of the symbols represents a function, the other one does not.
238         if (FunctionA != FunctionB) {
239           return false;
240         }
241 
242         // Check if symbols are jump tables.
243         const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
244         if (!SIA)
245           return false;
246         const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
247         if (!SIB)
248           return false;
249 
250         assert((SIA->getAddress() != SIB->getAddress()) &&
251                "different symbols should not have the same value");
252 
253         const JumpTable *JumpTableA =
254             A.getJumpTableContainingAddress(SIA->getAddress());
255         if (!JumpTableA)
256           return false;
257 
258         const JumpTable *JumpTableB =
259             B.getJumpTableContainingAddress(SIB->getAddress());
260         if (!JumpTableB)
261           return false;
262 
263         if ((SIA->getAddress() - JumpTableA->getAddress()) !=
264             (SIB->getAddress() - JumpTableB->getAddress()))
265           return false;
266 
267         return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
268       };
269 
270       if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
271                                  AreSymbolsIdentical)) {
272         return false;
273       }
274 
275       ++I;
276       ++OtherI;
277     }
278 
279     // One of the identical blocks may have a trailing unconditional jump that
280     // is ignored for CFG purposes.
281     const MCInst *TrailingInstr =
282         (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0));
283     if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) {
284       return false;
285     }
286 
287     ++BBI;
288   }
289 
290   // Compare exceptions action tables.
291   if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
292       A.getLSDATypeTable() != B.getLSDATypeTable() ||
293       A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) {
294     return false;
295   }
296 
297   return true;
298 }
299 
300 // This hash table is used to identify identical functions. It maps
301 // a function to a bucket of functions identical to it.
302 struct KeyHash {
303   size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
304 };
305 
306 /// Identify two congruent functions. Two functions are considered congruent,
307 /// if they are identical/equal except for some of their instruction operands
308 /// that reference potentially identical functions, i.e. functions that could
309 /// be folded later. Congruent functions are candidates for folding in our
310 /// iterative ICF algorithm.
311 ///
312 /// Congruent functions are required to have identical hash.
313 struct KeyCongruent {
314   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
315     if (A == B)
316       return true;
317     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true);
318   }
319 };
320 
321 struct KeyEqual {
322   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
323     if (A == B)
324       return true;
325     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false);
326   }
327 };
328 
329 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
330                            KeyHash, KeyCongruent>
331     CongruentBucketsMap;
332 
333 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
334                            KeyHash, KeyEqual>
335     IdenticalBucketsMap;
336 
337 std::string hashInteger(uint64_t Value) {
338   std::string HashString;
339   if (Value == 0) {
340     HashString.push_back(0);
341   }
342   while (Value) {
343     uint8_t LSB = Value & 0xff;
344     HashString.push_back(LSB);
345     Value >>= 8;
346   }
347 
348   return HashString;
349 }
350 
351 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
352   std::string HashString;
353 
354   // Ignore function references.
355   if (BC.getFunctionForSymbol(&Symbol))
356     return HashString;
357 
358   llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
359   if (!ErrorOrValue)
360     return HashString;
361 
362   // Ignore jump table references.
363   if (BC.getJumpTableContainingAddress(*ErrorOrValue))
364     return HashString;
365 
366   return HashString.append(hashInteger(*ErrorOrValue));
367 }
368 
369 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
370   switch (Expr.getKind()) {
371   case MCExpr::Constant:
372     return hashInteger(cast<MCConstantExpr>(Expr).getValue());
373   case MCExpr::SymbolRef:
374     return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
375   case MCExpr::Unary: {
376     const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
377     return hashInteger(UnaryExpr.getOpcode())
378         .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
379   }
380   case MCExpr::Binary: {
381     const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
382     return hashExpr(BC, *BinaryExpr.getLHS())
383         .append(hashInteger(BinaryExpr.getOpcode()))
384         .append(hashExpr(BC, *BinaryExpr.getRHS()));
385   }
386   case MCExpr::Target:
387     return std::string();
388   }
389 
390   llvm_unreachable("invalid expression kind");
391 }
392 
393 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
394   if (Operand.isImm()) {
395     return hashInteger(Operand.getImm());
396   } else if (Operand.isReg()) {
397     return hashInteger(Operand.getReg());
398   } else if (Operand.isExpr()) {
399     return hashExpr(BC, *Operand.getExpr());
400   }
401 
402   return std::string();
403 }
404 
405 } // namespace
406 
407 namespace llvm {
408 namespace bolt {
409 
410 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
411   const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
412   uint64_t NumFunctionsFolded = 0;
413   std::atomic<uint64_t> NumJTFunctionsFolded{0};
414   std::atomic<uint64_t> BytesSavedEstimate{0};
415   std::atomic<uint64_t> CallsSavedEstimate{0};
416   std::atomic<uint64_t> NumFoldedLastIteration{0};
417   CongruentBucketsMap CongruentBuckets;
418 
419   // Hash all the functions
420   auto hashFunctions = [&]() {
421     NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
422                                         "ICF breakdown", opts::TimeICF);
423     ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
424       // Make sure indices are in-order.
425       BF.updateLayoutIndices();
426 
427       // Pre-compute hash before pushing into hashtable.
428       // Hash instruction operands to minimize hash collisions.
429       BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) {
430         return hashInstOperand(BC, Op);
431       });
432     };
433 
434     ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
435       return !shouldOptimize(BF);
436     };
437 
438     ParallelUtilities::runOnEachFunction(
439         BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
440         "hashFunctions", /*ForceSequential*/ false, 2);
441   };
442 
443   // Creates buckets with congruent functions - functions that potentially
444   // could  be folded.
445   auto createCongruentBuckets = [&]() {
446     NamedRegionTimer CongruentBucketsTimer("congruent buckets",
447                                            "congruent buckets", "ICF breakdown",
448                                            "ICF breakdown", opts::TimeICF);
449     for (auto &BFI : BC.getBinaryFunctions()) {
450       BinaryFunction &BF = BFI.second;
451       if (!this->shouldOptimize(BF))
452         continue;
453       CongruentBuckets[&BF].emplace(&BF);
454     }
455   };
456 
457   // Partition each set of congruent functions into sets of identical functions
458   // and fold them
459   auto performFoldingPass = [&]() {
460     NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
461                                         "ICF breakdown", "ICF breakdown",
462                                         opts::TimeICF);
463     Timer SinglePass("single fold pass", "single fold pass");
464     LLVM_DEBUG(SinglePass.startTimer());
465 
466     ThreadPool *ThPool;
467     if (!opts::NoThreads)
468       ThPool = &ParallelUtilities::getThreadPool();
469 
470     // Fold identical functions within a single congruent bucket
471     auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
472       Timer T("folding single congruent list", "folding single congruent list");
473       LLVM_DEBUG(T.startTimer());
474 
475       // Identical functions go into the same bucket.
476       IdenticalBucketsMap IdenticalBuckets;
477       for (BinaryFunction *BF : Candidates) {
478         IdenticalBuckets[BF].emplace_back(BF);
479       }
480 
481       for (auto &IBI : IdenticalBuckets) {
482         // Functions identified as identical.
483         std::vector<BinaryFunction *> &Twins = IBI.second;
484         if (Twins.size() < 2)
485           continue;
486 
487         // Fold functions. Keep the order consistent across invocations with
488         // different options.
489         std::stable_sort(Twins.begin(), Twins.end(),
490                          [](const BinaryFunction *A, const BinaryFunction *B) {
491                            return A->getFunctionNumber() <
492                                   B->getFunctionNumber();
493                          });
494 
495         BinaryFunction *ParentBF = Twins[0];
496         for (unsigned I = 1; I < Twins.size(); ++I) {
497           BinaryFunction *ChildBF = Twins[I];
498           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
499                             << *ParentBF << '\n');
500 
501           // Remove child function from the list of candidates.
502           auto FI = Candidates.find(ChildBF);
503           assert(FI != Candidates.end() &&
504                  "function expected to be in the set");
505           Candidates.erase(FI);
506 
507           // Fold the function and remove from the list of processed functions.
508           BytesSavedEstimate += ChildBF->getSize();
509           CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(),
510                                          ParentBF->getKnownExecutionCount());
511           BC.foldFunction(*ChildBF, *ParentBF);
512 
513           ++NumFoldedLastIteration;
514 
515           if (ParentBF->hasJumpTables())
516             ++NumJTFunctionsFolded;
517         }
518       }
519 
520       LLVM_DEBUG(T.stopTimer());
521     };
522 
523     // Create a task for each congruent bucket
524     for (auto &Entry : CongruentBuckets) {
525       std::set<BinaryFunction *> &Bucket = Entry.second;
526       if (Bucket.size() < 2)
527         continue;
528 
529       if (opts::NoThreads)
530         processSingleBucket(Bucket);
531       else
532         ThPool->async(processSingleBucket, std::ref(Bucket));
533     }
534 
535     if (!opts::NoThreads)
536       ThPool->wait();
537 
538     LLVM_DEBUG(SinglePass.stopTimer());
539   };
540 
541   hashFunctions();
542   createCongruentBuckets();
543 
544   unsigned Iteration = 1;
545   // We repeat the pass until no new modifications happen.
546   do {
547     NumFoldedLastIteration = 0;
548     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
549 
550     performFoldingPass();
551 
552     NumFunctionsFolded += NumFoldedLastIteration;
553     ++Iteration;
554 
555   } while (NumFoldedLastIteration > 0);
556 
557   LLVM_DEBUG({
558     // Print functions that are congruent but not identical.
559     for (auto &CBI : CongruentBuckets) {
560       std::set<BinaryFunction *> &Candidates = CBI.second;
561       if (Candidates.size() < 2)
562         continue;
563       dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
564              << " functions (each of size " << (*Candidates.begin())->getSize()
565              << " bytes) are congruent but not identical:\n";
566       for (BinaryFunction *BF : Candidates) {
567         dbgs() << "  " << *BF;
568         if (BF->getKnownExecutionCount()) {
569           dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
570         }
571         dbgs() << '\n';
572       }
573     }
574   });
575 
576   if (NumFunctionsFolded) {
577     outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
578            << OriginalFunctionCount << " functions in " << Iteration
579            << " passes. " << NumJTFunctionsFolded
580            << " functions had jump tables.\n"
581            << "BOLT-INFO: Removing all identical functions will save "
582            << format("%.2lf", (double)BytesSavedEstimate / 1024)
583            << " KB of code space. Folded functions were called "
584            << CallsSavedEstimate << " times based on profile.\n";
585   }
586 }
587 
588 } // namespace bolt
589 } // namespace llvm
590