xref: /llvm-project/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp (revision a41922ad7530ef5e311afbff2721e69cbf520890)
1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AArch64.h"
10 #include "AArch64InstrInfo.h"
11 #include "AArch64MachineFunctionInfo.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/CodeGen/MachineFrameInfo.h"
15 #include "llvm/CodeGen/MachineFunction.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/MachineInstrBuilder.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/CodeGen/MachineTraceMetrics.h"
20 #include "llvm/CodeGen/Passes.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
31 
32 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
33 
34 cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
35     "stack-tagging-unchecked-ld-st", cl::Hidden,
36     cl::init(UncheckedSafe),
37     cl::desc(
38         "Unconditionally apply unchecked-ld-st optimization (even for large "
39         "stack frames, or in the presence of variable sized allocas)."),
40     cl::values(
41         clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
42         clEnumValN(
43             UncheckedSafe, "safe",
44             "apply unchecked-ld-st when the target is definitely within range"),
45         clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
46 
47 static cl::opt<bool>
48     ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
49                 cl::desc("Apply first slot optimization for stack tagging "
50                          "(eliminate ADDG Rt, Rn, 0, 0)."));
51 
52 namespace {
53 
54 class AArch64StackTaggingPreRA : public MachineFunctionPass {
55   MachineFunction *MF;
56   AArch64FunctionInfo *AFI;
57   MachineFrameInfo *MFI;
58   MachineRegisterInfo *MRI;
59   const AArch64RegisterInfo *TRI;
60   const AArch64InstrInfo *TII;
61 
62   SmallVector<MachineInstr*, 16> ReTags;
63 
64 public:
65   static char ID;
66   AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
67     initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
68   }
69 
70   bool mayUseUncheckedLoadStore();
71   void uncheckUsesOf(unsigned TaggedReg, int FI);
72   void uncheckLoadsAndStores();
73   std::optional<int> findFirstSlotCandidate();
74 
75   bool runOnMachineFunction(MachineFunction &Func) override;
76   StringRef getPassName() const override {
77     return "AArch64 Stack Tagging PreRA";
78   }
79 
80   void getAnalysisUsage(AnalysisUsage &AU) const override {
81     AU.setPreservesCFG();
82     MachineFunctionPass::getAnalysisUsage(AU);
83   }
84 };
85 } // end anonymous namespace
86 
87 char AArch64StackTaggingPreRA::ID = 0;
88 
89 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
90                       "AArch64 Stack Tagging PreRA Pass", false, false)
91 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
92                     "AArch64 Stack Tagging PreRA Pass", false, false)
93 
94 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
95   return new AArch64StackTaggingPreRA();
96 }
97 
98 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
99   switch (Opcode) {
100   case AArch64::LDRBBui:
101   case AArch64::LDRHHui:
102   case AArch64::LDRWui:
103   case AArch64::LDRXui:
104 
105   case AArch64::LDRBui:
106   case AArch64::LDRHui:
107   case AArch64::LDRSui:
108   case AArch64::LDRDui:
109   case AArch64::LDRQui:
110 
111   case AArch64::LDRSHWui:
112   case AArch64::LDRSHXui:
113 
114   case AArch64::LDRSBWui:
115   case AArch64::LDRSBXui:
116 
117   case AArch64::LDRSWui:
118 
119   case AArch64::STRBBui:
120   case AArch64::STRHHui:
121   case AArch64::STRWui:
122   case AArch64::STRXui:
123 
124   case AArch64::STRBui:
125   case AArch64::STRHui:
126   case AArch64::STRSui:
127   case AArch64::STRDui:
128   case AArch64::STRQui:
129 
130   case AArch64::LDPWi:
131   case AArch64::LDPXi:
132   case AArch64::LDPSi:
133   case AArch64::LDPDi:
134   case AArch64::LDPQi:
135 
136   case AArch64::LDPSWi:
137 
138   case AArch64::STPWi:
139   case AArch64::STPXi:
140   case AArch64::STPSi:
141   case AArch64::STPDi:
142   case AArch64::STPQi:
143     return true;
144   default:
145     return false;
146   }
147 }
148 
149 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
150   if (ClUncheckedLdSt == UncheckedNever)
151     return false;
152   else if (ClUncheckedLdSt == UncheckedAlways)
153     return true;
154 
155   // This estimate can be improved if we had harder guarantees about stack frame
156   // layout. With LocalStackAllocation we can estimate SP offset to any
157   // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
158   // objects ahead of non-tagged ones, but that's not always desirable.
159   //
160   // Underestimating SP offset here may require the use of LDG to materialize
161   // the tagged address of the stack slot, along with a scratch register
162   // allocation (post-regalloc!).
163   //
164   // For now we do the safe thing here and require that the entire stack frame
165   // is within range of the shortest of the unchecked instructions.
166   unsigned FrameSize = 0;
167   for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
168     FrameSize += MFI->getObjectSize(i);
169   bool EntireFrameReachableFromSP = FrameSize < 0xf00;
170   return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
171 }
172 
173 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
174   for (MachineInstr &UseI :
175        llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
176     if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
177       // FI operand is always the one before the immediate offset.
178       unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
179       if (UseI.getOperand(OpIdx).isReg() &&
180           UseI.getOperand(OpIdx).getReg() == TaggedReg) {
181         UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
182         UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
183       }
184     } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
185       uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
186     }
187   }
188 }
189 
190 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
191   for (auto *I : ReTags) {
192     Register TaggedReg = I->getOperand(0).getReg();
193     int FI = I->getOperand(1).getIndex();
194     uncheckUsesOf(TaggedReg, FI);
195   }
196 }
197 
198 namespace {
199 struct SlotWithTag {
200   int FI;
201   int Tag;
202   SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
203   explicit SlotWithTag(const MachineInstr &MI)
204       : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
205   bool operator==(const SlotWithTag &Other) const {
206     return FI == Other.FI && Tag == Other.Tag;
207   }
208 };
209 } // namespace
210 
211 namespace llvm {
212 template <> struct DenseMapInfo<SlotWithTag> {
213   static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
214   static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
215   static unsigned getHashValue(const SlotWithTag &V) {
216     return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
217                         DenseMapInfo<int>::getHashValue(V.Tag));
218   }
219   static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
220     return A == B;
221   }
222 };
223 } // namespace llvm
224 
225 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
226   return MFI->getUseLocalStackAllocationBlock() &&
227          MFI->isObjectPreAllocated(FI);
228 }
229 
230 // Pin one of the tagged slots to offset 0 from the tagged base pointer.
231 // This would make its address available in a virtual register (IRG's def), as
232 // opposed to requiring an ADDG instruction to materialize. This effectively
233 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually
234 // live almost everywhere anyway), and therefore needs to happen before
235 // regalloc.
236 std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
237   // Find the best (FI, Tag) pair to pin to offset 0.
238   // Looking at the possible uses of a tagged address, the advantage of pinning
239   // is:
240   // - COPY to physical register.
241   //   Does not matter, this would trade a MOV instruction for an ADDG.
242   // - ST*G matter, but those mostly appear near the function prologue where all
243   //   the tagged addresses need to be materialized anyway; also, counting ST*G
244   //   uses would overweight large allocas that require more than one ST*G
245   //   instruction.
246   // - Load/Store instructions in the address operand do not require a tagged
247   //   pointer, so they also do not benefit. These operands have already been
248   //   eliminated (see uncheckLoadsAndStores) so all remaining load/store
249   //   instructions count.
250   // - Any other instruction may benefit from being pinned to offset 0.
251   LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
252   if (!ClFirstSlot)
253     return std::nullopt;
254 
255   DenseMap<SlotWithTag, int> RetagScore;
256   SlotWithTag MaxScoreST{-1, -1};
257   int MaxScore = -1;
258   for (auto *I : ReTags) {
259     SlotWithTag ST{*I};
260     if (isSlotPreAllocated(MFI, ST.FI))
261       continue;
262 
263     Register RetagReg = I->getOperand(0).getReg();
264     if (!RetagReg.isVirtual())
265       continue;
266 
267     int Score = 0;
268     SmallVector<Register, 8> WorkList;
269     WorkList.push_back(RetagReg);
270 
271     while (!WorkList.empty()) {
272       Register UseReg = WorkList.pop_back_val();
273       for (auto &UseI : MRI->use_instructions(UseReg)) {
274         unsigned Opcode = UseI.getOpcode();
275         if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
276             Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
277             Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
278             Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
279             Opcode == AArch64::STZGloop_wback)
280           continue;
281         if (UseI.isCopy()) {
282           Register DstReg = UseI.getOperand(0).getReg();
283           if (DstReg.isVirtual())
284             WorkList.push_back(DstReg);
285           continue;
286         }
287         LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
288                           << Register::virtReg2Index(UseReg) << " in " << UseI
289                           << "\n");
290         Score++;
291       }
292     }
293 
294     int TotalScore = RetagScore[ST] += Score;
295     if (TotalScore > MaxScore ||
296         (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
297       MaxScore = TotalScore;
298       MaxScoreST = ST;
299     }
300   }
301 
302   if (MaxScoreST.FI < 0)
303     return std::nullopt;
304 
305   // If FI's tag is already 0, we are done.
306   if (MaxScoreST.Tag == 0)
307     return MaxScoreST.FI;
308 
309   // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
310   SlotWithTag SwapST{-1, -1};
311   for (auto *I : ReTags) {
312     SlotWithTag ST{*I};
313     if (ST.Tag == 0) {
314       SwapST = ST;
315       break;
316     }
317   }
318 
319   // Swap tags between the victim and the highest scoring pair.
320   // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
321   // the highest score slot without changing anything else.
322   for (auto *&I : ReTags) {
323     SlotWithTag ST{*I};
324     MachineOperand &TagOp = I->getOperand(4);
325     if (ST == MaxScoreST) {
326       TagOp.setImm(0);
327     } else if (ST == SwapST) {
328       TagOp.setImm(MaxScoreST.Tag);
329     }
330   }
331   return MaxScoreST.FI;
332 }
333 
334 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
335   MF = &Func;
336   MRI = &MF->getRegInfo();
337   AFI = MF->getInfo<AArch64FunctionInfo>();
338   TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
339   TRI = static_cast<const AArch64RegisterInfo *>(
340       MF->getSubtarget().getRegisterInfo());
341   MFI = &MF->getFrameInfo();
342   ReTags.clear();
343 
344   assert(MRI->isSSA());
345 
346   LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
347                     << "********** Function: " << MF->getName() << '\n');
348 
349   SmallSetVector<int, 8> TaggedSlots;
350   for (auto &BB : *MF) {
351     for (auto &I : BB) {
352       if (I.getOpcode() == AArch64::TAGPstack) {
353         ReTags.push_back(&I);
354         int FI = I.getOperand(1).getIndex();
355         TaggedSlots.insert(FI);
356         // There should be no offsets in TAGP yet.
357         assert(I.getOperand(2).getImm() == 0);
358       }
359     }
360   }
361 
362   // Take over from SSP. It does nothing for tagged slots, and should not really
363   // have been enabled in the first place.
364   for (int FI : TaggedSlots)
365     MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
366 
367   if (ReTags.empty())
368     return false;
369 
370   if (mayUseUncheckedLoadStore())
371     uncheckLoadsAndStores();
372 
373   // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
374   // If the base tagged pointer is set up to the address of this slot,
375   // the ADDG instruction can be eliminated.
376   std::optional<int> BaseSlot = findFirstSlotCandidate();
377   if (BaseSlot)
378     AFI->setTaggedBasePointerIndex(*BaseSlot);
379 
380   for (auto *I : ReTags) {
381     int FI = I->getOperand(1).getIndex();
382     int Tag = I->getOperand(4).getImm();
383     Register Base = I->getOperand(3).getReg();
384     if (Tag == 0 && FI == BaseSlot) {
385       BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
386               I->getOperand(0).getReg())
387           .addReg(Base);
388       I->eraseFromParent();
389     }
390   }
391 
392   return true;
393 }
394