1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9
10 #include "AArch64.h"
11 #include "AArch64MachineFunctionInfo.h"
12 #include "AArch64InstrInfo.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/MachineTraceMetrics.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetRegisterInfo.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/raw_ostream.h"
31
32 using namespace llvm;
33
34 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
35
36 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
37
38 cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
39 "stack-tagging-unchecked-ld-st", cl::Hidden,
40 cl::init(UncheckedSafe),
41 cl::desc(
42 "Unconditionally apply unchecked-ld-st optimization (even for large "
43 "stack frames, or in the presence of variable sized allocas)."),
44 cl::values(
45 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
46 clEnumValN(
47 UncheckedSafe, "safe",
48 "apply unchecked-ld-st when the target is definitely within range"),
49 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
50
51 static cl::opt<bool>
52 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
53 cl::desc("Apply first slot optimization for stack tagging "
54 "(eliminate ADDG Rt, Rn, 0, 0)."));
55
56 namespace {
57
58 class AArch64StackTaggingPreRA : public MachineFunctionPass {
59 MachineFunction *MF;
60 AArch64FunctionInfo *AFI;
61 MachineFrameInfo *MFI;
62 MachineRegisterInfo *MRI;
63 const AArch64RegisterInfo *TRI;
64 const AArch64InstrInfo *TII;
65
66 SmallVector<MachineInstr*, 16> ReTags;
67
68 public:
69 static char ID;
AArch64StackTaggingPreRA()70 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
71 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
72 }
73
74 bool mayUseUncheckedLoadStore();
75 void uncheckUsesOf(unsigned TaggedReg, int FI);
76 void uncheckLoadsAndStores();
77 std::optional<int> findFirstSlotCandidate();
78
79 bool runOnMachineFunction(MachineFunction &Func) override;
getPassName() const80 StringRef getPassName() const override {
81 return "AArch64 Stack Tagging PreRA";
82 }
83
getAnalysisUsage(AnalysisUsage & AU) const84 void getAnalysisUsage(AnalysisUsage &AU) const override {
85 AU.setPreservesCFG();
86 MachineFunctionPass::getAnalysisUsage(AU);
87 }
88 };
89 } // end anonymous namespace
90
91 char AArch64StackTaggingPreRA::ID = 0;
92
93 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
94 "AArch64 Stack Tagging PreRA Pass", false, false)
95 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
96 "AArch64 Stack Tagging PreRA Pass", false, false)
97
createAArch64StackTaggingPreRAPass()98 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
99 return new AArch64StackTaggingPreRA();
100 }
101
isUncheckedLoadOrStoreOpcode(unsigned Opcode)102 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
103 switch (Opcode) {
104 case AArch64::LDRBBui:
105 case AArch64::LDRHHui:
106 case AArch64::LDRWui:
107 case AArch64::LDRXui:
108
109 case AArch64::LDRBui:
110 case AArch64::LDRHui:
111 case AArch64::LDRSui:
112 case AArch64::LDRDui:
113 case AArch64::LDRQui:
114
115 case AArch64::LDRSHWui:
116 case AArch64::LDRSHXui:
117
118 case AArch64::LDRSBWui:
119 case AArch64::LDRSBXui:
120
121 case AArch64::LDRSWui:
122
123 case AArch64::STRBBui:
124 case AArch64::STRHHui:
125 case AArch64::STRWui:
126 case AArch64::STRXui:
127
128 case AArch64::STRBui:
129 case AArch64::STRHui:
130 case AArch64::STRSui:
131 case AArch64::STRDui:
132 case AArch64::STRQui:
133
134 case AArch64::LDPWi:
135 case AArch64::LDPXi:
136 case AArch64::LDPSi:
137 case AArch64::LDPDi:
138 case AArch64::LDPQi:
139
140 case AArch64::LDPSWi:
141
142 case AArch64::STPWi:
143 case AArch64::STPXi:
144 case AArch64::STPSi:
145 case AArch64::STPDi:
146 case AArch64::STPQi:
147 return true;
148 default:
149 return false;
150 }
151 }
152
mayUseUncheckedLoadStore()153 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
154 if (ClUncheckedLdSt == UncheckedNever)
155 return false;
156 else if (ClUncheckedLdSt == UncheckedAlways)
157 return true;
158
159 // This estimate can be improved if we had harder guarantees about stack frame
160 // layout. With LocalStackAllocation we can estimate SP offset to any
161 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
162 // objects ahead of non-tagged ones, but that's not always desirable.
163 //
164 // Underestimating SP offset here may require the use of LDG to materialize
165 // the tagged address of the stack slot, along with a scratch register
166 // allocation (post-regalloc!).
167 //
168 // For now we do the safe thing here and require that the entire stack frame
169 // is within range of the shortest of the unchecked instructions.
170 unsigned FrameSize = 0;
171 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
172 FrameSize += MFI->getObjectSize(i);
173 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
174 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
175 }
176
uncheckUsesOf(unsigned TaggedReg,int FI)177 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
178 for (MachineInstr &UseI :
179 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
180 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
181 // FI operand is always the one before the immediate offset.
182 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
183 if (UseI.getOperand(OpIdx).isReg() &&
184 UseI.getOperand(OpIdx).getReg() == TaggedReg) {
185 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
186 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
187 }
188 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
189 uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
190 }
191 }
192 }
193
uncheckLoadsAndStores()194 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
195 for (auto *I : ReTags) {
196 Register TaggedReg = I->getOperand(0).getReg();
197 int FI = I->getOperand(1).getIndex();
198 uncheckUsesOf(TaggedReg, FI);
199 }
200 }
201
202 namespace {
203 struct SlotWithTag {
204 int FI;
205 int Tag;
SlotWithTag__anon446628cf0211::SlotWithTag206 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
SlotWithTag__anon446628cf0211::SlotWithTag207 explicit SlotWithTag(const MachineInstr &MI)
208 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
operator ==__anon446628cf0211::SlotWithTag209 bool operator==(const SlotWithTag &Other) const {
210 return FI == Other.FI && Tag == Other.Tag;
211 }
212 };
213 } // namespace
214
215 namespace llvm {
216 template <> struct DenseMapInfo<SlotWithTag> {
getEmptyKeyllvm::DenseMapInfo217 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
getTombstoneKeyllvm::DenseMapInfo218 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
getHashValuellvm::DenseMapInfo219 static unsigned getHashValue(const SlotWithTag &V) {
220 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
221 DenseMapInfo<int>::getHashValue(V.Tag));
222 }
isEqualllvm::DenseMapInfo223 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
224 return A == B;
225 }
226 };
227 } // namespace llvm
228
isSlotPreAllocated(MachineFrameInfo * MFI,int FI)229 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
230 return MFI->getUseLocalStackAllocationBlock() &&
231 MFI->isObjectPreAllocated(FI);
232 }
233
234 // Pin one of the tagged slots to offset 0 from the tagged base pointer.
235 // This would make its address available in a virtual register (IRG's def), as
236 // opposed to requiring an ADDG instruction to materialize. This effectively
237 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually
238 // live almost everywhere anyway), and therefore needs to happen before
239 // regalloc.
findFirstSlotCandidate()240 std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
241 // Find the best (FI, Tag) pair to pin to offset 0.
242 // Looking at the possible uses of a tagged address, the advantage of pinning
243 // is:
244 // - COPY to physical register.
245 // Does not matter, this would trade a MOV instruction for an ADDG.
246 // - ST*G matter, but those mostly appear near the function prologue where all
247 // the tagged addresses need to be materialized anyway; also, counting ST*G
248 // uses would overweight large allocas that require more than one ST*G
249 // instruction.
250 // - Load/Store instructions in the address operand do not require a tagged
251 // pointer, so they also do not benefit. These operands have already been
252 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
253 // instructions count.
254 // - Any other instruction may benefit from being pinned to offset 0.
255 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
256 if (!ClFirstSlot)
257 return std::nullopt;
258
259 DenseMap<SlotWithTag, int> RetagScore;
260 SlotWithTag MaxScoreST{-1, -1};
261 int MaxScore = -1;
262 for (auto *I : ReTags) {
263 SlotWithTag ST{*I};
264 if (isSlotPreAllocated(MFI, ST.FI))
265 continue;
266
267 Register RetagReg = I->getOperand(0).getReg();
268 if (!RetagReg.isVirtual())
269 continue;
270
271 int Score = 0;
272 SmallVector<Register, 8> WorkList;
273 WorkList.push_back(RetagReg);
274
275 while (!WorkList.empty()) {
276 Register UseReg = WorkList.pop_back_val();
277 for (auto &UseI : MRI->use_instructions(UseReg)) {
278 unsigned Opcode = UseI.getOpcode();
279 if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
280 Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
281 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
282 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
283 Opcode == AArch64::STZGloop_wback)
284 continue;
285 if (UseI.isCopy()) {
286 Register DstReg = UseI.getOperand(0).getReg();
287 if (DstReg.isVirtual())
288 WorkList.push_back(DstReg);
289 continue;
290 }
291 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
292 << Register::virtReg2Index(UseReg) << " in " << UseI
293 << "\n");
294 Score++;
295 }
296 }
297
298 int TotalScore = RetagScore[ST] += Score;
299 if (TotalScore > MaxScore ||
300 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
301 MaxScore = TotalScore;
302 MaxScoreST = ST;
303 }
304 }
305
306 if (MaxScoreST.FI < 0)
307 return std::nullopt;
308
309 // If FI's tag is already 0, we are done.
310 if (MaxScoreST.Tag == 0)
311 return MaxScoreST.FI;
312
313 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
314 SlotWithTag SwapST{-1, -1};
315 for (auto *I : ReTags) {
316 SlotWithTag ST{*I};
317 if (ST.Tag == 0) {
318 SwapST = ST;
319 break;
320 }
321 }
322
323 // Swap tags between the victim and the highest scoring pair.
324 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
325 // the highest score slot without changing anything else.
326 for (auto *&I : ReTags) {
327 SlotWithTag ST{*I};
328 MachineOperand &TagOp = I->getOperand(4);
329 if (ST == MaxScoreST) {
330 TagOp.setImm(0);
331 } else if (ST == SwapST) {
332 TagOp.setImm(MaxScoreST.Tag);
333 }
334 }
335 return MaxScoreST.FI;
336 }
337
runOnMachineFunction(MachineFunction & Func)338 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
339 MF = &Func;
340 MRI = &MF->getRegInfo();
341 AFI = MF->getInfo<AArch64FunctionInfo>();
342 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
343 TRI = static_cast<const AArch64RegisterInfo *>(
344 MF->getSubtarget().getRegisterInfo());
345 MFI = &MF->getFrameInfo();
346 ReTags.clear();
347
348 assert(MRI->isSSA());
349
350 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
351 << "********** Function: " << MF->getName() << '\n');
352
353 SmallSetVector<int, 8> TaggedSlots;
354 for (auto &BB : *MF) {
355 for (auto &I : BB) {
356 if (I.getOpcode() == AArch64::TAGPstack) {
357 ReTags.push_back(&I);
358 int FI = I.getOperand(1).getIndex();
359 TaggedSlots.insert(FI);
360 // There should be no offsets in TAGP yet.
361 assert(I.getOperand(2).getImm() == 0);
362 }
363 }
364 }
365
366 // Take over from SSP. It does nothing for tagged slots, and should not really
367 // have been enabled in the first place.
368 for (int FI : TaggedSlots)
369 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
370
371 if (ReTags.empty())
372 return false;
373
374 if (mayUseUncheckedLoadStore())
375 uncheckLoadsAndStores();
376
377 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
378 // If the base tagged pointer is set up to the address of this slot,
379 // the ADDG instruction can be eliminated.
380 std::optional<int> BaseSlot = findFirstSlotCandidate();
381 if (BaseSlot)
382 AFI->setTaggedBasePointerIndex(*BaseSlot);
383
384 for (auto *I : ReTags) {
385 int FI = I->getOperand(1).getIndex();
386 int Tag = I->getOperand(4).getImm();
387 Register Base = I->getOperand(3).getReg();
388 if (Tag == 0 && FI == BaseSlot) {
389 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
390 I->getOperand(0).getReg())
391 .addReg(Base);
392 I->eraseFromParent();
393 }
394 }
395
396 return true;
397 }
398