xref: /llvm-project/bolt/lib/Passes/RegReAssign.cpp (revision 2430a354bfb9e8c08e0dd5f294012b40afb75ce0)
1 //===- bolt/Passes/RegReAssign.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the RegReAssign class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/RegReAssign.h"
14 #include "bolt/Core/BinaryFunctionCallGraph.h"
15 #include "bolt/Core/MCPlus.h"
16 #include "bolt/Passes/DataflowAnalysis.h"
17 #include "bolt/Passes/DataflowInfoManager.h"
18 #include "bolt/Utils/Utils.h"
19 #include <numeric>
20 
21 #define DEBUG_TYPE "regreassign"
22 
23 using namespace llvm;
24 
25 namespace opts {
26 extern cl::OptionCategory BoltOptCategory;
27 extern cl::opt<bool> UpdateDebugSections;
28 
29 static cl::opt<bool> AggressiveReAssign(
30     "use-aggr-reg-reassign",
31     cl::desc("use register liveness analysis to try to find more opportunities "
32              "for -reg-reassign optimization"),
33     cl::cat(BoltOptCategory));
34 }
35 
36 namespace llvm {
37 namespace bolt {
38 
swap(BinaryFunction & Function,MCPhysReg A,MCPhysReg B)39 void RegReAssign::swap(BinaryFunction &Function, MCPhysReg A, MCPhysReg B) {
40   BinaryContext &BC = Function.getBinaryContext();
41   const BitVector &AliasA = BC.MIB->getAliases(A, false);
42   const BitVector &AliasB = BC.MIB->getAliases(B, false);
43 
44   // Regular instructions
45   for (BinaryBasicBlock &BB : Function) {
46     for (MCInst &Inst : BB) {
47       for (MCOperand &Operand : MCPlus::primeOperands(Inst)) {
48         if (!Operand.isReg())
49           continue;
50 
51         unsigned Reg = Operand.getReg();
52         if (AliasA.test(Reg)) {
53           Operand.setReg(BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)));
54           --StaticBytesSaved;
55           DynBytesSaved -= BB.getKnownExecutionCount();
56           continue;
57         }
58         if (!AliasB.test(Reg))
59           continue;
60         Operand.setReg(BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)));
61         ++StaticBytesSaved;
62         DynBytesSaved += BB.getKnownExecutionCount();
63       }
64     }
65   }
66 
67   // CFI
68   DenseSet<const MCCFIInstruction *> Changed;
69   for (BinaryBasicBlock &BB : Function) {
70     for (MCInst &Inst : BB) {
71       if (!BC.MIB->isCFI(Inst))
72         continue;
73       const MCCFIInstruction *CFI = Function.getCFIFor(Inst);
74       if (Changed.count(CFI))
75         continue;
76       Changed.insert(CFI);
77 
78       switch (CFI->getOperation()) {
79       case MCCFIInstruction::OpRegister: {
80         const unsigned CFIReg2 = CFI->getRegister2();
81         const MCPhysReg Reg2 = *BC.MRI->getLLVMRegNum(CFIReg2, /*isEH=*/false);
82         if (AliasA.test(Reg2)) {
83           Function.setCFIFor(
84               Inst, MCCFIInstruction::createRegister(
85                         nullptr, CFI->getRegister(),
86                         BC.MRI->getDwarfRegNum(
87                             BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg2)),
88                             false)));
89         } else if (AliasB.test(Reg2)) {
90           Function.setCFIFor(
91               Inst, MCCFIInstruction::createRegister(
92                         nullptr, CFI->getRegister(),
93                         BC.MRI->getDwarfRegNum(
94                             BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg2)),
95                             false)));
96         }
97       }
98       [[fallthrough]];
99       case MCCFIInstruction::OpUndefined:
100       case MCCFIInstruction::OpDefCfa:
101       case MCCFIInstruction::OpOffset:
102       case MCCFIInstruction::OpRestore:
103       case MCCFIInstruction::OpSameValue:
104       case MCCFIInstruction::OpDefCfaRegister:
105       case MCCFIInstruction::OpRelOffset:
106       case MCCFIInstruction::OpEscape: {
107         unsigned CFIReg;
108         if (CFI->getOperation() != MCCFIInstruction::OpEscape) {
109           CFIReg = CFI->getRegister();
110         } else {
111           std::optional<uint8_t> Reg =
112               readDWARFExpressionTargetReg(CFI->getValues());
113           // Handle DW_CFA_def_cfa_expression
114           if (!Reg)
115             break;
116           CFIReg = *Reg;
117         }
118         const MCPhysReg Reg = *BC.MRI->getLLVMRegNum(CFIReg, /*isEH=*/false);
119         if (AliasA.test(Reg))
120           Function.mutateCFIRegisterFor(
121               Inst,
122               BC.MRI->getDwarfRegNum(
123                   BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)), false));
124         else if (AliasB.test(Reg))
125           Function.mutateCFIRegisterFor(
126               Inst,
127               BC.MRI->getDwarfRegNum(
128                   BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)), false));
129         break;
130       }
131       default:
132         break;
133       }
134     }
135   }
136 }
137 
rankRegisters(BinaryFunction & Function)138 void RegReAssign::rankRegisters(BinaryFunction &Function) {
139   BinaryContext &BC = Function.getBinaryContext();
140   std::fill(RegScore.begin(), RegScore.end(), 0);
141   std::fill(RankedRegs.begin(), RankedRegs.end(), 0);
142 
143   auto countRegScore = [&](BinaryBasicBlock &BB) {
144     for (MCInst &Inst : BB) {
145       const bool CannotUseREX = BC.MIB->cannotUseREX(Inst);
146       const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode());
147 
148       // Disallow substituitions involving regs in implicit uses lists
149       for (MCPhysReg ImplicitUse : Desc.implicit_uses()) {
150         const size_t RegEC =
151             BC.MIB->getAliases(ImplicitUse, false).find_first();
152         RegScore[RegEC] =
153             std::numeric_limits<decltype(RegScore)::value_type>::min();
154       }
155 
156       // Disallow substituitions involving regs in implicit defs lists
157       for (MCPhysReg ImplicitDef : Desc.implicit_defs()) {
158         const size_t RegEC =
159             BC.MIB->getAliases(ImplicitDef, false).find_first();
160         RegScore[RegEC] =
161             std::numeric_limits<decltype(RegScore)::value_type>::min();
162       }
163 
164       for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
165         const MCOperand &Operand = Inst.getOperand(I);
166         if (!Operand.isReg())
167           continue;
168 
169         if (Desc.getOperandConstraint(I, MCOI::TIED_TO) != -1)
170           continue;
171 
172         unsigned Reg = Operand.getReg();
173         size_t RegEC = BC.MIB->getAliases(Reg, false).find_first();
174         if (RegEC == 0)
175           continue;
176 
177         // Disallow substituitions involving regs in instrs that cannot use REX
178         // The relationship of X86 registers is shown in the diagram. BL and BH
179         // do not have a direct alias relationship. However, if the BH register
180         // cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as
181         // well, which means that BL register also cannot be swapped. Therefore,
182         // in the presence of BX/EBX/RBX registers, BL and BH have an alias
183         // relationship.
184         // ┌─────────────────┐
185         // │  RBX            │
186         // ├─────┬───────────┤
187         // │     │  EBX      │
188         // ├─────┴──┬────────┤
189         // │        │   BX   │
190         // ├────────┼───┬────┤
191         // │        │BH │BL  │
192         // └────────┴───┴────┘
193         if (CannotUseREX) {
194           RegScore[RegEC] =
195               std::numeric_limits<decltype(RegScore)::value_type>::min();
196           RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC];
197           continue;
198         }
199 
200         // Unsupported substitution, cannot swap BH with R* regs, bail
201         if (BC.MIB->isUpper8BitReg(Reg) && ClassicCSR.test(Reg)) {
202           RegScore[RegEC] =
203               std::numeric_limits<decltype(RegScore)::value_type>::min();
204           RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC];
205           continue;
206         }
207 
208         RegScore[RegEC] += BB.getKnownExecutionCount();
209       }
210     }
211   };
212   for (BinaryBasicBlock &BB : Function)
213     countRegScore(BB);
214 
215   for (BinaryFunction *ChildFrag : Function.getFragments()) {
216     for (BinaryBasicBlock &BB : *ChildFrag)
217       countRegScore(BB);
218   }
219 
220   std::iota(RankedRegs.begin(), RankedRegs.end(), 0); // 0, 1, 2, 3...
221   llvm::sort(RankedRegs,
222              [&](size_t A, size_t B) { return RegScore[A] > RegScore[B]; });
223 
224   LLVM_DEBUG({
225     for (size_t Reg : RankedRegs) {
226       if (RegScore[Reg] == 0)
227         continue;
228       dbgs() << Reg << " ";
229       if (RegScore[Reg] > 0)
230         dbgs() << BC.MRI->getName(Reg) << ": " << RegScore[Reg] << "\n";
231       else
232         dbgs() << BC.MRI->getName(Reg) << ": (blacklisted)\n";
233     }
234   });
235 }
236 
aggressivePassOverFunction(BinaryFunction & Function)237 void RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
238   BinaryContext &BC = Function.getBinaryContext();
239   rankRegisters(Function);
240 
241   // If there is a situation where function:
242   //   A() -> A.cold()
243   //   A.localalias() -> A.cold()
244   // simply swapping these two calls can cause issues.
245   for (BinaryFunction *ChildFrag : Function.getFragments()) {
246     if (ChildFrag->getParentFragments()->size() > 1)
247       return;
248     if (ChildFrag->empty())
249       return;
250   }
251 
252   // Bail early if our registers are all black listed, before running expensive
253   // analysis passes
254   bool Bail = true;
255   int64_t LowScoreClassic = std::numeric_limits<int64_t>::max();
256   for (int J : ClassicRegs.set_bits()) {
257     if (RegScore[J] <= 0)
258       continue;
259     Bail = false;
260     if (RegScore[J] < LowScoreClassic)
261       LowScoreClassic = RegScore[J];
262   }
263   if (Bail)
264     return;
265   BitVector Extended = ClassicRegs;
266   Extended.flip();
267   Extended &= GPRegs;
268   Bail = true;
269   int64_t HighScoreExtended = 0;
270   for (int J : Extended.set_bits()) {
271     if (RegScore[J] <= 0)
272       continue;
273     Bail = false;
274     if (RegScore[J] > HighScoreExtended)
275       HighScoreExtended = RegScore[J];
276   }
277   // Also bail early if there is no profitable substitution even if we assume
278   // all registers can be exchanged
279   if (Bail || (LowScoreClassic << 1) >= HighScoreExtended)
280     return;
281 
282   // -- expensive pass -- determine all regs alive during func start
283   DataflowInfoManager Info(Function, RA.get(), nullptr);
284   BitVector AliveAtStart = *Info.getLivenessAnalysis().getStateAt(
285       ProgramPoint::getFirstPointAt(*Function.begin()));
286   for (BinaryBasicBlock &BB : Function)
287     if (BB.pred_size() == 0)
288       AliveAtStart |= *Info.getLivenessAnalysis().getStateAt(
289           ProgramPoint::getFirstPointAt(BB));
290 
291   // Mark frame pointer alive because of CFI
292   AliveAtStart |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false);
293   // Never touch return registers
294   BC.MIB->getDefaultLiveOut(AliveAtStart);
295 
296   // Try swapping more profitable options first
297   auto Begin = RankedRegs.begin();
298   auto End = std::prev(RankedRegs.end());
299   while (Begin != End) {
300     MCPhysReg ClassicReg = *End;
301     if (!ClassicRegs[ClassicReg] || RegScore[ClassicReg] <= 0) {
302       --End;
303       continue;
304     }
305 
306     MCPhysReg ExtReg = *Begin;
307     if (!Extended[ExtReg] || RegScore[ExtReg] <= 0) {
308       ++Begin;
309       continue;
310     }
311 
312     if (RegScore[ClassicReg] << 1 >= RegScore[ExtReg]) {
313       LLVM_DEBUG(dbgs() << " Ending at " << BC.MRI->getName(ClassicReg)
314                         << " with " << BC.MRI->getName(ExtReg)
315                         << " because exchange is not profitable\n");
316       break;
317     }
318 
319     BitVector AnyAliasAlive = AliveAtStart;
320     AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
321     if (AnyAliasAlive.any()) {
322       LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
323                         << " with " << BC.MRI->getName(ExtReg)
324                         << " because classic reg is alive\n");
325       --End;
326       continue;
327     }
328     AnyAliasAlive = AliveAtStart;
329     AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
330     if (AnyAliasAlive.any()) {
331       LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
332                         << " with " << BC.MRI->getName(ExtReg)
333                         << " because extended reg is alive\n");
334       ++Begin;
335       continue;
336     }
337 
338     // Opportunity detected. Swap.
339     LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(ClassicReg)
340                       << " with " << BC.MRI->getName(ExtReg) << "\n\n");
341     swap(Function, ClassicReg, ExtReg);
342     FuncsChanged.insert(&Function);
343     for (BinaryFunction *ChildFrag : Function.getFragments()) {
344       swap(*ChildFrag, ClassicReg, ExtReg);
345       FuncsChanged.insert(ChildFrag);
346     }
347     ++Begin;
348     if (Begin == End)
349       break;
350     --End;
351   }
352 }
353 
conservativePassOverFunction(BinaryFunction & Function)354 bool RegReAssign::conservativePassOverFunction(BinaryFunction &Function) {
355   BinaryContext &BC = Function.getBinaryContext();
356   rankRegisters(Function);
357 
358   for (BinaryFunction *ChildFrag : Function.getFragments()) {
359     if (ChildFrag->getParentFragments()->size() > 1)
360       return false;
361     if (ChildFrag->empty())
362       return false;
363   }
364 
365   // Try swapping R12, R13, R14 or R15 with RBX (we work with all callee-saved
366   // regs except RBP)
367   MCPhysReg Candidate = 0;
368   for (int J : ExtendedCSR.set_bits())
369     if (RegScore[J] > RegScore[Candidate])
370       Candidate = J;
371 
372   if (!Candidate || RegScore[Candidate] < 0)
373     return false;
374 
375   // Check if our classic callee-saved reg (RBX is the only one) has lower
376   // score / utilization rate
377   MCPhysReg RBX = 0;
378   for (int I : ClassicCSR.set_bits()) {
379     int64_t ScoreRBX = RegScore[I];
380     if (ScoreRBX <= 0)
381       continue;
382 
383     if (RegScore[Candidate] > (ScoreRBX + 10))
384       RBX = I;
385   }
386 
387   if (!RBX)
388     return false;
389 
390   // The high 8 bits of the register will never be swapped. To prevent the high
391   // 8 bits from being swapped incorrectly, we should switched to swapping the
392   // low 8 bits of the register instead.
393   if (BC.MIB->isUpper8BitReg(RBX)) {
394     RBX = BC.MIB->getAliasSized(RBX, 1);
395     if (RegScore[RBX] < 0 || RegScore[RBX] > RegScore[Candidate])
396       return false;
397   }
398 
399   LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(RBX) << " with "
400                     << BC.MRI->getName(Candidate) << "\n\n");
401   (void)BC;
402   swap(Function, RBX, Candidate);
403   FuncsChanged.insert(&Function);
404   for (BinaryFunction *ChildFrag : Function.getFragments()) {
405     swap(*ChildFrag, RBX, Candidate);
406     FuncsChanged.insert(ChildFrag);
407   }
408   return true;
409 }
410 
setupAggressivePass(BinaryContext & BC,std::map<uint64_t,BinaryFunction> & BFs)411 void RegReAssign::setupAggressivePass(BinaryContext &BC,
412                                       std::map<uint64_t, BinaryFunction> &BFs) {
413   setupConservativePass(BC, BFs);
414   CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC)));
415   RA.reset(new RegAnalysis(BC, &BFs, &*CG));
416 
417   GPRegs = BitVector(BC.MRI->getNumRegs(), false);
418   BC.MIB->getGPRegs(GPRegs);
419 }
420 
setupConservativePass(BinaryContext & BC,std::map<uint64_t,BinaryFunction> & BFs)421 void RegReAssign::setupConservativePass(
422     BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) {
423   // Set up constant bitvectors used throughout this analysis
424   ClassicRegs = BitVector(BC.MRI->getNumRegs(), false);
425   CalleeSaved = BitVector(BC.MRI->getNumRegs(), false);
426   ClassicCSR = BitVector(BC.MRI->getNumRegs(), false);
427   ExtendedCSR = BitVector(BC.MRI->getNumRegs(), false);
428   // Never consider the frame pointer
429   BC.MIB->getClassicGPRegs(ClassicRegs);
430   ClassicRegs.flip();
431   ClassicRegs |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false);
432   ClassicRegs.flip();
433   BC.MIB->getCalleeSavedRegs(CalleeSaved);
434   ClassicCSR |= ClassicRegs;
435   ClassicCSR &= CalleeSaved;
436   BC.MIB->getClassicGPRegs(ClassicRegs);
437   ExtendedCSR |= ClassicRegs;
438   ExtendedCSR.flip();
439   ExtendedCSR &= CalleeSaved;
440 
441   LLVM_DEBUG({
442     RegStatePrinter P(BC);
443     dbgs() << "Starting register reassignment\nClassicRegs: ";
444     P.print(dbgs(), ClassicRegs);
445     dbgs() << "\nCalleeSaved: ";
446     P.print(dbgs(), CalleeSaved);
447     dbgs() << "\nClassicCSR: ";
448     P.print(dbgs(), ClassicCSR);
449     dbgs() << "\nExtendedCSR: ";
450     P.print(dbgs(), ExtendedCSR);
451     dbgs() << "\n";
452   });
453 }
454 
runOnFunctions(BinaryContext & BC)455 Error RegReAssign::runOnFunctions(BinaryContext &BC) {
456   RegScore = std::vector<int64_t>(BC.MRI->getNumRegs(), 0);
457   RankedRegs = std::vector<size_t>(BC.MRI->getNumRegs(), 0);
458 
459   if (opts::AggressiveReAssign)
460     setupAggressivePass(BC, BC.getBinaryFunctions());
461   else
462     setupConservativePass(BC, BC.getBinaryFunctions());
463 
464   for (auto &I : BC.getBinaryFunctions()) {
465     BinaryFunction &Function = I.second;
466 
467     if (!Function.isSimple() || Function.isIgnored() || Function.isFragment())
468       continue;
469 
470     LLVM_DEBUG(dbgs() << "====================================\n");
471     LLVM_DEBUG(dbgs() << " - " << Function.getPrintName() << "\n");
472     if (!conservativePassOverFunction(Function) && opts::AggressiveReAssign) {
473       aggressivePassOverFunction(Function);
474       LLVM_DEBUG({
475         if (FuncsChanged.count(&Function))
476           dbgs() << "Aggressive pass successful on " << Function.getPrintName()
477                  << "\n";
478       });
479     }
480   }
481 
482   if (FuncsChanged.empty()) {
483     BC.outs() << "BOLT-INFO: Reg Reassignment Pass: no changes were made.\n";
484     return Error::success();
485   }
486   if (opts::UpdateDebugSections)
487     BC.outs()
488         << "BOLT-WARNING: You used -reg-reassign and -update-debug-sections."
489         << " Some registers were changed but associated AT_LOCATION for "
490         << "impacted variables were NOT updated! This operation is "
491         << "currently unsupported by BOLT.\n";
492   BC.outs() << "BOLT-INFO: Reg Reassignment Pass Stats:\n";
493   BC.outs() << "\t   " << FuncsChanged.size() << " functions affected.\n";
494   BC.outs() << "\t   " << StaticBytesSaved << " static bytes saved.\n";
495   BC.outs() << "\t   " << DynBytesSaved << " dynamic bytes saved.\n";
496   return Error::success();
497 }
498 
499 } // namespace bolt
500 } // namespace llvm
501