xref: /llvm-project/bolt/lib/Passes/IdenticalCodeFolding.cpp (revision 8477bc67614a45d9bbd5caa407bb376069789c7b)
1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdenticalCodeFolding class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/IdenticalCodeFolding.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/ThreadPool.h"
17 #include "llvm/Support/Timer.h"
18 #include <atomic>
19 #include <map>
20 #include <set>
21 #include <unordered_map>
22 
23 #define DEBUG_TYPE "bolt-icf"
24 
25 using namespace llvm;
26 using namespace bolt;
27 
28 namespace opts {
29 
30 extern cl::OptionCategory BoltOptCategory;
31 
32 static cl::opt<bool> UseDFS("icf-dfs",
33                             cl::desc("use DFS ordering when using -icf option"),
34                             cl::ReallyHidden, cl::cat(BoltOptCategory));
35 
36 static cl::opt<bool>
37 TimeICF("time-icf",
38   cl::desc("time icf steps"),
39   cl::ReallyHidden,
40   cl::ZeroOrMore,
41   cl::cat(BoltOptCategory));
42 } // namespace opts
43 
44 namespace {
45 using JumpTable = bolt::JumpTable;
46 
47 /// Compare two jump tables in 2 functions. The function relies on consistent
48 /// ordering of basic blocks in both binary functions (e.g. DFS).
49 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB,
50                      const BinaryFunction &FunctionA,
51                      const BinaryFunction &FunctionB) {
52   if (JumpTableA.EntrySize != JumpTableB.EntrySize)
53     return false;
54 
55   if (JumpTableA.Type != JumpTableB.Type)
56     return false;
57 
58   if (JumpTableA.getSize() != JumpTableB.getSize())
59     return false;
60 
61   for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
62     const MCSymbol *LabelA = JumpTableA.Entries[Index];
63     const MCSymbol *LabelB = JumpTableB.Entries[Index];
64 
65     const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
66     const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
67 
68     if (!TargetA || !TargetB) {
69       assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
70              "no target basic block found");
71       assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
72              "no target basic block found");
73 
74       if (TargetA != TargetB)
75         return false;
76 
77       continue;
78     }
79 
80     assert(TargetA && TargetB && "cannot locate target block(s)");
81 
82     if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
83       return false;
84   }
85 
86   return true;
87 }
88 
89 /// Helper function that compares an instruction of this function to the
90 /// given instruction of the given function. The functions should have
91 /// identical CFG.
92 template <class Compare>
93 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
94                            const MCInst &InstB, const BinaryBasicBlock &BBB,
95                            Compare Comp) {
96   if (InstA.getOpcode() != InstB.getOpcode())
97     return false;
98 
99   const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
100 
101   // In this function we check for special conditions:
102   //
103   //    * instructions with landing pads
104   //
105   // Most of the common cases should be handled by MCPlus::equals()
106   // that compares regular instruction operands.
107   //
108   // NB: there's no need to compare jump table indirect jump instructions
109   //     separately as jump tables are handled by comparing corresponding
110   //     symbols.
111   const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
112   const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
113 
114   if (EHInfoA || EHInfoB) {
115     if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
116       return false;
117 
118     if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
119       return false;
120 
121     if (EHInfoA && EHInfoB) {
122       // Action indices should match.
123       if (EHInfoA->second != EHInfoB->second)
124         return false;
125 
126       if (!EHInfoA->first != !EHInfoB->first)
127         return false;
128 
129       if (EHInfoA->first && EHInfoB->first) {
130         const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
131         const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
132         assert(LPA && LPB && "cannot locate landing pad(s)");
133 
134         if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
135           return false;
136       }
137     }
138   }
139 
140   return BC.MIB->equals(InstA, InstB, Comp);
141 }
142 
143 /// Returns true if this function has identical code and CFG with
144 /// the given function \p BF.
145 ///
146 /// If \p CongruentSymbols is set to true, then symbolic operands that reference
147 /// potentially identical but different functions are ignored during the
148 /// comparison.
149 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
150                      bool CongruentSymbols) {
151   assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
152 
153   // Compare the two functions, one basic block at a time.
154   // Currently we require two identical basic blocks to have identical
155   // instruction sequences and the same index in their corresponding
156   // functions. The latter is important for CFG equality.
157 
158   if (A.getLayout().block_size() != B.getLayout().block_size())
159     return false;
160 
161   // Comparing multi-entry functions could be non-trivial.
162   if (A.isMultiEntry() || B.isMultiEntry())
163     return false;
164 
165   // Process both functions in either DFS or existing order.
166   const BinaryFunction::BasicBlockOrderType OrderA =
167       opts::UseDFS
168           ? A.dfs()
169           : BinaryFunction::BasicBlockOrderType(A.getLayout().block_begin(),
170                                                 A.getLayout().block_end());
171   const BinaryFunction::BasicBlockOrderType OrderB =
172       opts::UseDFS
173           ? B.dfs()
174           : BinaryFunction::BasicBlockOrderType(B.getLayout().block_begin(),
175                                                 B.getLayout().block_end());
176 
177   const BinaryContext &BC = A.getBinaryContext();
178 
179   auto BBI = OrderB.begin();
180   for (const BinaryBasicBlock *BB : OrderA) {
181     const BinaryBasicBlock *OtherBB = *BBI;
182 
183     if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
184       return false;
185 
186     // Compare successor basic blocks.
187     // NOTE: the comparison for jump tables is only partially verified here.
188     if (BB->succ_size() != OtherBB->succ_size())
189       return false;
190 
191     auto SuccBBI = OtherBB->succ_begin();
192     for (const BinaryBasicBlock *SuccBB : BB->successors()) {
193       const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
194       if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
195         return false;
196       ++SuccBBI;
197     }
198 
199     // Compare all instructions including pseudos.
200     auto I = BB->begin(), E = BB->end();
201     auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
202     while (I != E && OtherI != OtherE) {
203       // Compare symbols.
204       auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
205                                      const MCSymbol *SymbolB) {
206         if (SymbolA == SymbolB)
207           return true;
208 
209         // All local symbols are considered identical since they affect a
210         // control flow and we check the control flow separately.
211         // If a local symbol is escaped, then the function (potentially) has
212         // multiple entry points and we exclude such functions from
213         // comparison.
214         if (SymbolA->isTemporary() && SymbolB->isTemporary())
215           return true;
216 
217         // Compare symbols as functions.
218         uint64_t EntryIDA = 0;
219         uint64_t EntryIDB = 0;
220         const BinaryFunction *FunctionA =
221             BC.getFunctionForSymbol(SymbolA, &EntryIDA);
222         const BinaryFunction *FunctionB =
223             BC.getFunctionForSymbol(SymbolB, &EntryIDB);
224         if (FunctionA && EntryIDA)
225           FunctionA = nullptr;
226         if (FunctionB && EntryIDB)
227           FunctionB = nullptr;
228         if (FunctionA && FunctionB) {
229           // Self-referencing functions and recursive calls.
230           if (FunctionA == &A && FunctionB == &B)
231             return true;
232 
233           // Functions with different hash values can never become identical,
234           // hence A and B are different.
235           if (CongruentSymbols)
236             return FunctionA->getHash() == FunctionB->getHash();
237 
238           return FunctionA == FunctionB;
239         }
240 
241         // One of the symbols represents a function, the other one does not.
242         if (FunctionA != FunctionB)
243           return false;
244 
245         // Check if symbols are jump tables.
246         const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
247         if (!SIA)
248           return false;
249         const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
250         if (!SIB)
251           return false;
252 
253         assert((SIA->getAddress() != SIB->getAddress()) &&
254                "different symbols should not have the same value");
255 
256         const JumpTable *JumpTableA =
257             A.getJumpTableContainingAddress(SIA->getAddress());
258         if (!JumpTableA)
259           return false;
260 
261         const JumpTable *JumpTableB =
262             B.getJumpTableContainingAddress(SIB->getAddress());
263         if (!JumpTableB)
264           return false;
265 
266         if ((SIA->getAddress() - JumpTableA->getAddress()) !=
267             (SIB->getAddress() - JumpTableB->getAddress()))
268           return false;
269 
270         return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
271       };
272 
273       if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
274                                  AreSymbolsIdentical))
275         return false;
276 
277       ++I;
278       ++OtherI;
279     }
280 
281     // One of the identical blocks may have a trailing unconditional jump that
282     // is ignored for CFG purposes.
283     const MCInst *TrailingInstr =
284         (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0));
285     if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr))
286       return false;
287 
288     ++BBI;
289   }
290 
291   // Compare exceptions action tables.
292   if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
293       A.getLSDATypeTable() != B.getLSDATypeTable() ||
294       A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable())
295     return false;
296 
297   return true;
298 }
299 
300 // This hash table is used to identify identical functions. It maps
301 // a function to a bucket of functions identical to it.
302 struct KeyHash {
303   size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
304 };
305 
306 /// Identify two congruent functions. Two functions are considered congruent,
307 /// if they are identical/equal except for some of their instruction operands
308 /// that reference potentially identical functions, i.e. functions that could
309 /// be folded later. Congruent functions are candidates for folding in our
310 /// iterative ICF algorithm.
311 ///
312 /// Congruent functions are required to have identical hash.
313 struct KeyCongruent {
314   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
315     if (A == B)
316       return true;
317     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true);
318   }
319 };
320 
321 struct KeyEqual {
322   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
323     if (A == B)
324       return true;
325     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false);
326   }
327 };
328 
329 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
330                            KeyHash, KeyCongruent>
331     CongruentBucketsMap;
332 
333 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
334                            KeyHash, KeyEqual>
335     IdenticalBucketsMap;
336 
337 std::string hashInteger(uint64_t Value) {
338   std::string HashString;
339   if (Value == 0)
340     HashString.push_back(0);
341 
342   while (Value) {
343     uint8_t LSB = Value & 0xff;
344     HashString.push_back(LSB);
345     Value >>= 8;
346   }
347 
348   return HashString;
349 }
350 
351 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
352   std::string HashString;
353 
354   // Ignore function references.
355   if (BC.getFunctionForSymbol(&Symbol))
356     return HashString;
357 
358   llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
359   if (!ErrorOrValue)
360     return HashString;
361 
362   // Ignore jump table references.
363   if (BC.getJumpTableContainingAddress(*ErrorOrValue))
364     return HashString;
365 
366   return HashString.append(hashInteger(*ErrorOrValue));
367 }
368 
369 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
370   switch (Expr.getKind()) {
371   case MCExpr::Constant:
372     return hashInteger(cast<MCConstantExpr>(Expr).getValue());
373   case MCExpr::SymbolRef:
374     return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
375   case MCExpr::Unary: {
376     const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
377     return hashInteger(UnaryExpr.getOpcode())
378         .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
379   }
380   case MCExpr::Binary: {
381     const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
382     return hashExpr(BC, *BinaryExpr.getLHS())
383         .append(hashInteger(BinaryExpr.getOpcode()))
384         .append(hashExpr(BC, *BinaryExpr.getRHS()));
385   }
386   case MCExpr::Target:
387     return std::string();
388   }
389 
390   llvm_unreachable("invalid expression kind");
391 }
392 
393 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
394   if (Operand.isImm())
395     return hashInteger(Operand.getImm());
396   if (Operand.isReg())
397     return hashInteger(Operand.getReg());
398   if (Operand.isExpr())
399     return hashExpr(BC, *Operand.getExpr());
400 
401   return std::string();
402 }
403 
404 } // namespace
405 
406 namespace llvm {
407 namespace bolt {
408 
409 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
410   const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
411   uint64_t NumFunctionsFolded = 0;
412   std::atomic<uint64_t> NumJTFunctionsFolded{0};
413   std::atomic<uint64_t> BytesSavedEstimate{0};
414   std::atomic<uint64_t> CallsSavedEstimate{0};
415   std::atomic<uint64_t> NumFoldedLastIteration{0};
416   CongruentBucketsMap CongruentBuckets;
417 
418   // Hash all the functions
419   auto hashFunctions = [&]() {
420     NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
421                                         "ICF breakdown", opts::TimeICF);
422     ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
423       // Make sure indices are in-order.
424       BF.getLayout().updateLayoutIndices();
425 
426       // Pre-compute hash before pushing into hashtable.
427       // Hash instruction operands to minimize hash collisions.
428       BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) {
429         return hashInstOperand(BC, Op);
430       });
431     };
432 
433     ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
434       return !shouldOptimize(BF);
435     };
436 
437     ParallelUtilities::runOnEachFunction(
438         BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
439         "hashFunctions", /*ForceSequential*/ false, 2);
440   };
441 
442   // Creates buckets with congruent functions - functions that potentially
443   // could  be folded.
444   auto createCongruentBuckets = [&]() {
445     NamedRegionTimer CongruentBucketsTimer("congruent buckets",
446                                            "congruent buckets", "ICF breakdown",
447                                            "ICF breakdown", opts::TimeICF);
448     for (auto &BFI : BC.getBinaryFunctions()) {
449       BinaryFunction &BF = BFI.second;
450       if (!this->shouldOptimize(BF))
451         continue;
452       CongruentBuckets[&BF].emplace(&BF);
453     }
454   };
455 
456   // Partition each set of congruent functions into sets of identical functions
457   // and fold them
458   auto performFoldingPass = [&]() {
459     NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
460                                         "ICF breakdown", "ICF breakdown",
461                                         opts::TimeICF);
462     Timer SinglePass("single fold pass", "single fold pass");
463     LLVM_DEBUG(SinglePass.startTimer());
464 
465     ThreadPool *ThPool;
466     if (!opts::NoThreads)
467       ThPool = &ParallelUtilities::getThreadPool();
468 
469     // Fold identical functions within a single congruent bucket
470     auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
471       Timer T("folding single congruent list", "folding single congruent list");
472       LLVM_DEBUG(T.startTimer());
473 
474       // Identical functions go into the same bucket.
475       IdenticalBucketsMap IdenticalBuckets;
476       for (BinaryFunction *BF : Candidates) {
477         IdenticalBuckets[BF].emplace_back(BF);
478       }
479 
480       for (auto &IBI : IdenticalBuckets) {
481         // Functions identified as identical.
482         std::vector<BinaryFunction *> &Twins = IBI.second;
483         if (Twins.size() < 2)
484           continue;
485 
486         // Fold functions. Keep the order consistent across invocations with
487         // different options.
488         llvm::stable_sort(
489             Twins, [](const BinaryFunction *A, const BinaryFunction *B) {
490               return A->getFunctionNumber() < B->getFunctionNumber();
491             });
492 
493         BinaryFunction *ParentBF = Twins[0];
494         for (unsigned I = 1; I < Twins.size(); ++I) {
495           BinaryFunction *ChildBF = Twins[I];
496           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
497                             << *ParentBF << '\n');
498 
499           // Remove child function from the list of candidates.
500           auto FI = Candidates.find(ChildBF);
501           assert(FI != Candidates.end() &&
502                  "function expected to be in the set");
503           Candidates.erase(FI);
504 
505           // Fold the function and remove from the list of processed functions.
506           BytesSavedEstimate += ChildBF->getSize();
507           CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(),
508                                          ParentBF->getKnownExecutionCount());
509           BC.foldFunction(*ChildBF, *ParentBF);
510 
511           ++NumFoldedLastIteration;
512 
513           if (ParentBF->hasJumpTables())
514             ++NumJTFunctionsFolded;
515         }
516       }
517 
518       LLVM_DEBUG(T.stopTimer());
519     };
520 
521     // Create a task for each congruent bucket
522     for (auto &Entry : CongruentBuckets) {
523       std::set<BinaryFunction *> &Bucket = Entry.second;
524       if (Bucket.size() < 2)
525         continue;
526 
527       if (opts::NoThreads)
528         processSingleBucket(Bucket);
529       else
530         ThPool->async(processSingleBucket, std::ref(Bucket));
531     }
532 
533     if (!opts::NoThreads)
534       ThPool->wait();
535 
536     LLVM_DEBUG(SinglePass.stopTimer());
537   };
538 
539   hashFunctions();
540   createCongruentBuckets();
541 
542   unsigned Iteration = 1;
543   // We repeat the pass until no new modifications happen.
544   do {
545     NumFoldedLastIteration = 0;
546     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
547 
548     performFoldingPass();
549 
550     NumFunctionsFolded += NumFoldedLastIteration;
551     ++Iteration;
552 
553   } while (NumFoldedLastIteration > 0);
554 
555   LLVM_DEBUG({
556     // Print functions that are congruent but not identical.
557     for (auto &CBI : CongruentBuckets) {
558       std::set<BinaryFunction *> &Candidates = CBI.second;
559       if (Candidates.size() < 2)
560         continue;
561       dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
562              << " functions (each of size " << (*Candidates.begin())->getSize()
563              << " bytes) are congruent but not identical:\n";
564       for (BinaryFunction *BF : Candidates) {
565         dbgs() << "  " << *BF;
566         if (BF->getKnownExecutionCount())
567           dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
568         dbgs() << '\n';
569       }
570     }
571   });
572 
573   if (NumFunctionsFolded)
574     outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
575            << OriginalFunctionCount << " functions in " << Iteration
576            << " passes. " << NumJTFunctionsFolded
577            << " functions had jump tables.\n"
578            << "BOLT-INFO: Removing all identical functions will save "
579            << format("%.2lf", (double)BytesSavedEstimate / 1024)
580            << " KB of code space. Folded functions were called "
581            << CallsSavedEstimate << " times based on profile.\n";
582 }
583 
584 } // namespace bolt
585 } // namespace llvm
586