1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AArch64.h" 10 #include "AArch64InstrInfo.h" 11 #include "AArch64MachineFunctionInfo.h" 12 #include "llvm/ADT/SetVector.h" 13 #include "llvm/ADT/Statistic.h" 14 #include "llvm/CodeGen/MachineFrameInfo.h" 15 #include "llvm/CodeGen/MachineFunction.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/MachineInstrBuilder.h" 18 #include "llvm/CodeGen/MachineRegisterInfo.h" 19 #include "llvm/CodeGen/MachineTraceMetrics.h" 20 #include "llvm/CodeGen/Passes.h" 21 #include "llvm/CodeGen/TargetInstrInfo.h" 22 #include "llvm/CodeGen/TargetRegisterInfo.h" 23 #include "llvm/CodeGen/TargetSubtargetInfo.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" 31 32 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; 33 34 cl::opt<UncheckedLdStMode> ClUncheckedLdSt( 35 "stack-tagging-unchecked-ld-st", cl::Hidden, 36 cl::init(UncheckedSafe), 37 cl::desc( 38 "Unconditionally apply unchecked-ld-st optimization (even for large " 39 "stack frames, or in the presence of variable sized allocas)."), 40 cl::values( 41 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), 42 clEnumValN( 43 UncheckedSafe, "safe", 44 "apply unchecked-ld-st when the target is definitely within range"), 45 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); 46 47 static cl::opt<bool> 48 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), 49 cl::desc("Apply first slot optimization for stack tagging " 50 "(eliminate ADDG Rt, Rn, 0, 0).")); 51 52 namespace { 53 54 class AArch64StackTaggingPreRA : public MachineFunctionPass { 55 MachineFunction *MF; 56 AArch64FunctionInfo *AFI; 57 MachineFrameInfo *MFI; 58 MachineRegisterInfo *MRI; 59 const AArch64RegisterInfo *TRI; 60 const AArch64InstrInfo *TII; 61 62 SmallVector<MachineInstr*, 16> ReTags; 63 64 public: 65 static char ID; 66 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { 67 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); 68 } 69 70 bool mayUseUncheckedLoadStore(); 71 void uncheckUsesOf(unsigned TaggedReg, int FI); 72 void uncheckLoadsAndStores(); 73 std::optional<int> findFirstSlotCandidate(); 74 75 bool runOnMachineFunction(MachineFunction &Func) override; 76 StringRef getPassName() const override { 77 return "AArch64 Stack Tagging PreRA"; 78 } 79 80 void getAnalysisUsage(AnalysisUsage &AU) const override { 81 AU.setPreservesCFG(); 82 MachineFunctionPass::getAnalysisUsage(AU); 83 } 84 }; 85 } // end anonymous namespace 86 87 char AArch64StackTaggingPreRA::ID = 0; 88 89 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 90 "AArch64 Stack Tagging PreRA Pass", false, false) 91 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 92 "AArch64 Stack Tagging PreRA Pass", false, false) 93 94 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { 95 return new AArch64StackTaggingPreRA(); 96 } 97 98 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { 99 switch (Opcode) { 100 case AArch64::LDRBBui: 101 case AArch64::LDRHHui: 102 case AArch64::LDRWui: 103 case AArch64::LDRXui: 104 105 case AArch64::LDRBui: 106 case AArch64::LDRHui: 107 case AArch64::LDRSui: 108 case AArch64::LDRDui: 109 case AArch64::LDRQui: 110 111 case AArch64::LDRSHWui: 112 case AArch64::LDRSHXui: 113 114 case AArch64::LDRSBWui: 115 case AArch64::LDRSBXui: 116 117 case AArch64::LDRSWui: 118 119 case AArch64::STRBBui: 120 case AArch64::STRHHui: 121 case AArch64::STRWui: 122 case AArch64::STRXui: 123 124 case AArch64::STRBui: 125 case AArch64::STRHui: 126 case AArch64::STRSui: 127 case AArch64::STRDui: 128 case AArch64::STRQui: 129 130 case AArch64::LDPWi: 131 case AArch64::LDPXi: 132 case AArch64::LDPSi: 133 case AArch64::LDPDi: 134 case AArch64::LDPQi: 135 136 case AArch64::LDPSWi: 137 138 case AArch64::STPWi: 139 case AArch64::STPXi: 140 case AArch64::STPSi: 141 case AArch64::STPDi: 142 case AArch64::STPQi: 143 return true; 144 default: 145 return false; 146 } 147 } 148 149 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { 150 if (ClUncheckedLdSt == UncheckedNever) 151 return false; 152 else if (ClUncheckedLdSt == UncheckedAlways) 153 return true; 154 155 // This estimate can be improved if we had harder guarantees about stack frame 156 // layout. With LocalStackAllocation we can estimate SP offset to any 157 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged 158 // objects ahead of non-tagged ones, but that's not always desirable. 159 // 160 // Underestimating SP offset here may require the use of LDG to materialize 161 // the tagged address of the stack slot, along with a scratch register 162 // allocation (post-regalloc!). 163 // 164 // For now we do the safe thing here and require that the entire stack frame 165 // is within range of the shortest of the unchecked instructions. 166 unsigned FrameSize = 0; 167 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) 168 FrameSize += MFI->getObjectSize(i); 169 bool EntireFrameReachableFromSP = FrameSize < 0xf00; 170 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; 171 } 172 173 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { 174 for (MachineInstr &UseI : 175 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) { 176 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) { 177 // FI operand is always the one before the immediate offset. 178 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1; 179 if (UseI.getOperand(OpIdx).isReg() && 180 UseI.getOperand(OpIdx).getReg() == TaggedReg) { 181 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI); 182 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); 183 } 184 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) { 185 uncheckUsesOf(UseI.getOperand(0).getReg(), FI); 186 } 187 } 188 } 189 190 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { 191 for (auto *I : ReTags) { 192 Register TaggedReg = I->getOperand(0).getReg(); 193 int FI = I->getOperand(1).getIndex(); 194 uncheckUsesOf(TaggedReg, FI); 195 } 196 } 197 198 namespace { 199 struct SlotWithTag { 200 int FI; 201 int Tag; 202 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {} 203 explicit SlotWithTag(const MachineInstr &MI) 204 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {} 205 bool operator==(const SlotWithTag &Other) const { 206 return FI == Other.FI && Tag == Other.Tag; 207 } 208 }; 209 } // namespace 210 211 namespace llvm { 212 template <> struct DenseMapInfo<SlotWithTag> { 213 static inline SlotWithTag getEmptyKey() { return {-2, -2}; } 214 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; } 215 static unsigned getHashValue(const SlotWithTag &V) { 216 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI), 217 DenseMapInfo<int>::getHashValue(V.Tag)); 218 } 219 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) { 220 return A == B; 221 } 222 }; 223 } // namespace llvm 224 225 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) { 226 return MFI->getUseLocalStackAllocationBlock() && 227 MFI->isObjectPreAllocated(FI); 228 } 229 230 // Pin one of the tagged slots to offset 0 from the tagged base pointer. 231 // This would make its address available in a virtual register (IRG's def), as 232 // opposed to requiring an ADDG instruction to materialize. This effectively 233 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually 234 // live almost everywhere anyway), and therefore needs to happen before 235 // regalloc. 236 std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() { 237 // Find the best (FI, Tag) pair to pin to offset 0. 238 // Looking at the possible uses of a tagged address, the advantage of pinning 239 // is: 240 // - COPY to physical register. 241 // Does not matter, this would trade a MOV instruction for an ADDG. 242 // - ST*G matter, but those mostly appear near the function prologue where all 243 // the tagged addresses need to be materialized anyway; also, counting ST*G 244 // uses would overweight large allocas that require more than one ST*G 245 // instruction. 246 // - Load/Store instructions in the address operand do not require a tagged 247 // pointer, so they also do not benefit. These operands have already been 248 // eliminated (see uncheckLoadsAndStores) so all remaining load/store 249 // instructions count. 250 // - Any other instruction may benefit from being pinned to offset 0. 251 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n"); 252 if (!ClFirstSlot) 253 return std::nullopt; 254 255 DenseMap<SlotWithTag, int> RetagScore; 256 SlotWithTag MaxScoreST{-1, -1}; 257 int MaxScore = -1; 258 for (auto *I : ReTags) { 259 SlotWithTag ST{*I}; 260 if (isSlotPreAllocated(MFI, ST.FI)) 261 continue; 262 263 Register RetagReg = I->getOperand(0).getReg(); 264 if (!RetagReg.isVirtual()) 265 continue; 266 267 int Score = 0; 268 SmallVector<Register, 8> WorkList; 269 WorkList.push_back(RetagReg); 270 271 while (!WorkList.empty()) { 272 Register UseReg = WorkList.pop_back_val(); 273 for (auto &UseI : MRI->use_instructions(UseReg)) { 274 unsigned Opcode = UseI.getOpcode(); 275 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi || 276 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi || 277 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop || 278 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback || 279 Opcode == AArch64::STZGloop_wback) 280 continue; 281 if (UseI.isCopy()) { 282 Register DstReg = UseI.getOperand(0).getReg(); 283 if (DstReg.isVirtual()) 284 WorkList.push_back(DstReg); 285 continue; 286 } 287 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %" 288 << Register::virtReg2Index(UseReg) << " in " << UseI 289 << "\n"); 290 Score++; 291 } 292 } 293 294 int TotalScore = RetagScore[ST] += Score; 295 if (TotalScore > MaxScore || 296 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) { 297 MaxScore = TotalScore; 298 MaxScoreST = ST; 299 } 300 } 301 302 if (MaxScoreST.FI < 0) 303 return std::nullopt; 304 305 // If FI's tag is already 0, we are done. 306 if (MaxScoreST.Tag == 0) 307 return MaxScoreST.FI; 308 309 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0. 310 SlotWithTag SwapST{-1, -1}; 311 for (auto *I : ReTags) { 312 SlotWithTag ST{*I}; 313 if (ST.Tag == 0) { 314 SwapST = ST; 315 break; 316 } 317 } 318 319 // Swap tags between the victim and the highest scoring pair. 320 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for 321 // the highest score slot without changing anything else. 322 for (auto *&I : ReTags) { 323 SlotWithTag ST{*I}; 324 MachineOperand &TagOp = I->getOperand(4); 325 if (ST == MaxScoreST) { 326 TagOp.setImm(0); 327 } else if (ST == SwapST) { 328 TagOp.setImm(MaxScoreST.Tag); 329 } 330 } 331 return MaxScoreST.FI; 332 } 333 334 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { 335 MF = &Func; 336 MRI = &MF->getRegInfo(); 337 AFI = MF->getInfo<AArch64FunctionInfo>(); 338 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo()); 339 TRI = static_cast<const AArch64RegisterInfo *>( 340 MF->getSubtarget().getRegisterInfo()); 341 MFI = &MF->getFrameInfo(); 342 ReTags.clear(); 343 344 assert(MRI->isSSA()); 345 346 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" 347 << "********** Function: " << MF->getName() << '\n'); 348 349 SmallSetVector<int, 8> TaggedSlots; 350 for (auto &BB : *MF) { 351 for (auto &I : BB) { 352 if (I.getOpcode() == AArch64::TAGPstack) { 353 ReTags.push_back(&I); 354 int FI = I.getOperand(1).getIndex(); 355 TaggedSlots.insert(FI); 356 // There should be no offsets in TAGP yet. 357 assert(I.getOperand(2).getImm() == 0); 358 } 359 } 360 } 361 362 // Take over from SSP. It does nothing for tagged slots, and should not really 363 // have been enabled in the first place. 364 for (int FI : TaggedSlots) 365 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None); 366 367 if (ReTags.empty()) 368 return false; 369 370 if (mayUseUncheckedLoadStore()) 371 uncheckLoadsAndStores(); 372 373 // Find a slot that is used with zero tag offset, like ADDG #fi, 0. 374 // If the base tagged pointer is set up to the address of this slot, 375 // the ADDG instruction can be eliminated. 376 std::optional<int> BaseSlot = findFirstSlotCandidate(); 377 if (BaseSlot) 378 AFI->setTaggedBasePointerIndex(*BaseSlot); 379 380 for (auto *I : ReTags) { 381 int FI = I->getOperand(1).getIndex(); 382 int Tag = I->getOperand(4).getImm(); 383 Register Base = I->getOperand(3).getReg(); 384 if (Tag == 0 && FI == BaseSlot) { 385 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY), 386 I->getOperand(0).getReg()) 387 .addReg(Base); 388 I->eraseFromParent(); 389 } 390 } 391 392 return true; 393 } 394