1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/CodeGen/LivePhysRegs.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineConstantPool.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/CodeGen/TargetSubtargetInfo.h" 31 #include "llvm/IR/DebugLoc.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/CodeGen.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/TargetParser/Triple.h" 37 #include <cassert> 38 #include <cstdint> 39 #include <iterator> 40 41 using namespace llvm; 42 43 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 44 45 namespace { 46 47 class AArch64ExpandPseudo : public MachineFunctionPass { 48 public: 49 const AArch64InstrInfo *TII; 50 51 static char ID; 52 53 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 54 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 55 } 56 57 bool runOnMachineFunction(MachineFunction &Fn) override; 58 59 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 60 61 private: 62 bool expandMBB(MachineBasicBlock &MBB); 63 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 64 MachineBasicBlock::iterator &NextMBBI); 65 bool expandMultiVecPseudo(MachineBasicBlock &MBB, 66 MachineBasicBlock::iterator MBBI, 67 TargetRegisterClass ContiguousClass, 68 TargetRegisterClass StridedClass, 69 unsigned ContiguousOpc, unsigned StridedOpc); 70 bool expandFormTuplePseudo(MachineBasicBlock &MBB, 71 MachineBasicBlock::iterator MBBI, 72 MachineBasicBlock::iterator &NextMBBI, 73 unsigned Size); 74 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 75 unsigned BitSize); 76 77 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 78 MachineBasicBlock::iterator MBBI); 79 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 80 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 81 unsigned ExtendImm, unsigned ZeroReg, 82 MachineBasicBlock::iterator &NextMBBI); 83 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 84 MachineBasicBlock::iterator MBBI, 85 MachineBasicBlock::iterator &NextMBBI); 86 bool expandSetTagLoop(MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator MBBI, 88 MachineBasicBlock::iterator &NextMBBI); 89 bool expandSVESpillFill(MachineBasicBlock &MBB, 90 MachineBasicBlock::iterator MBBI, unsigned Opc, 91 unsigned N); 92 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 93 MachineBasicBlock::iterator MBBI); 94 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 95 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 96 MachineBasicBlock::iterator MBBI); 97 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator MBBI); 99 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, 100 MachineBasicBlock::iterator MBBI); 101 }; 102 103 } // end anonymous namespace 104 105 char AArch64ExpandPseudo::ID = 0; 106 107 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 108 AARCH64_EXPAND_PSEUDO_NAME, false, false) 109 110 /// Transfer implicit operands on the pseudo instruction to the 111 /// instructions created from the expansion. 112 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 113 MachineInstrBuilder &DefMI) { 114 const MCInstrDesc &Desc = OldMI.getDesc(); 115 for (const MachineOperand &MO : 116 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { 117 assert(MO.isReg() && MO.getReg()); 118 if (MO.isUse()) 119 UseMI.add(MO); 120 else 121 DefMI.add(MO); 122 } 123 } 124 125 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 126 /// real move-immediate instructions to synthesize the immediate. 127 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 128 MachineBasicBlock::iterator MBBI, 129 unsigned BitSize) { 130 MachineInstr &MI = *MBBI; 131 Register DstReg = MI.getOperand(0).getReg(); 132 uint64_t RenamableState = 133 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 134 uint64_t Imm = MI.getOperand(1).getImm(); 135 136 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 137 // Useless def, and we don't want to risk creating an invalid ORR (which 138 // would really write to sp). 139 MI.eraseFromParent(); 140 return true; 141 } 142 143 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 144 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 145 assert(Insn.size() != 0); 146 147 SmallVector<MachineInstrBuilder, 4> MIBS; 148 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 149 bool LastItem = std::next(I) == E; 150 switch (I->Opcode) 151 { 152 default: llvm_unreachable("unhandled!"); break; 153 154 case AArch64::ORRWri: 155 case AArch64::ORRXri: 156 if (I->Op1 == 0) { 157 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 158 .add(MI.getOperand(0)) 159 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 160 .addImm(I->Op2)); 161 } else { 162 Register DstReg = MI.getOperand(0).getReg(); 163 bool DstIsDead = MI.getOperand(0).isDead(); 164 MIBS.push_back( 165 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 166 .addReg(DstReg, RegState::Define | 167 getDeadRegState(DstIsDead && LastItem) | 168 RenamableState) 169 .addReg(DstReg) 170 .addImm(I->Op2)); 171 } 172 break; 173 case AArch64::ORRWrs: 174 case AArch64::ORRXrs: { 175 Register DstReg = MI.getOperand(0).getReg(); 176 bool DstIsDead = MI.getOperand(0).isDead(); 177 MIBS.push_back( 178 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 179 .addReg(DstReg, RegState::Define | 180 getDeadRegState(DstIsDead && LastItem) | 181 RenamableState) 182 .addReg(DstReg) 183 .addReg(DstReg) 184 .addImm(I->Op2)); 185 } break; 186 case AArch64::ANDXri: 187 case AArch64::EORXri: 188 if (I->Op1 == 0) { 189 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 190 .add(MI.getOperand(0)) 191 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 192 .addImm(I->Op2)); 193 } else { 194 Register DstReg = MI.getOperand(0).getReg(); 195 bool DstIsDead = MI.getOperand(0).isDead(); 196 MIBS.push_back( 197 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 198 .addReg(DstReg, RegState::Define | 199 getDeadRegState(DstIsDead && LastItem) | 200 RenamableState) 201 .addReg(DstReg) 202 .addImm(I->Op2)); 203 } 204 break; 205 case AArch64::MOVNWi: 206 case AArch64::MOVNXi: 207 case AArch64::MOVZWi: 208 case AArch64::MOVZXi: { 209 bool DstIsDead = MI.getOperand(0).isDead(); 210 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 211 .addReg(DstReg, RegState::Define | 212 getDeadRegState(DstIsDead && LastItem) | 213 RenamableState) 214 .addImm(I->Op1) 215 .addImm(I->Op2)); 216 } break; 217 case AArch64::MOVKWi: 218 case AArch64::MOVKXi: { 219 Register DstReg = MI.getOperand(0).getReg(); 220 bool DstIsDead = MI.getOperand(0).isDead(); 221 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 222 .addReg(DstReg, 223 RegState::Define | 224 getDeadRegState(DstIsDead && LastItem) | 225 RenamableState) 226 .addReg(DstReg) 227 .addImm(I->Op1) 228 .addImm(I->Op2)); 229 } break; 230 } 231 } 232 transferImpOps(MI, MIBS.front(), MIBS.back()); 233 MI.eraseFromParent(); 234 return true; 235 } 236 237 bool AArch64ExpandPseudo::expandCMP_SWAP( 238 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 239 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 240 MachineBasicBlock::iterator &NextMBBI) { 241 MachineInstr &MI = *MBBI; 242 MIMetadata MIMD(MI); 243 const MachineOperand &Dest = MI.getOperand(0); 244 Register StatusReg = MI.getOperand(1).getReg(); 245 bool StatusDead = MI.getOperand(1).isDead(); 246 // Duplicating undef operands into 2 instructions does not guarantee the same 247 // value on both; However undef should be replaced by xzr anyway. 248 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 249 Register AddrReg = MI.getOperand(2).getReg(); 250 Register DesiredReg = MI.getOperand(3).getReg(); 251 Register NewReg = MI.getOperand(4).getReg(); 252 253 MachineFunction *MF = MBB.getParent(); 254 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 255 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 256 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 257 258 MF->insert(++MBB.getIterator(), LoadCmpBB); 259 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 260 MF->insert(++StoreBB->getIterator(), DoneBB); 261 262 // .Lloadcmp: 263 // mov wStatus, 0 264 // ldaxr xDest, [xAddr] 265 // cmp xDest, xDesired 266 // b.ne .Ldone 267 if (!StatusDead) 268 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg) 269 .addImm(0).addImm(0); 270 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg()) 271 .addReg(AddrReg); 272 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg) 273 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 274 .addReg(DesiredReg) 275 .addImm(ExtendImm); 276 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc)) 277 .addImm(AArch64CC::NE) 278 .addMBB(DoneBB) 279 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 280 LoadCmpBB->addSuccessor(DoneBB); 281 LoadCmpBB->addSuccessor(StoreBB); 282 283 // .Lstore: 284 // stlxr wStatus, xNew, [xAddr] 285 // cbnz wStatus, .Lloadcmp 286 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg) 287 .addReg(NewReg) 288 .addReg(AddrReg); 289 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 290 .addReg(StatusReg, getKillRegState(StatusDead)) 291 .addMBB(LoadCmpBB); 292 StoreBB->addSuccessor(LoadCmpBB); 293 StoreBB->addSuccessor(DoneBB); 294 295 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 296 DoneBB->transferSuccessors(&MBB); 297 298 MBB.addSuccessor(LoadCmpBB); 299 300 NextMBBI = MBB.end(); 301 MI.eraseFromParent(); 302 303 // Recompute livein lists. 304 LivePhysRegs LiveRegs; 305 computeAndAddLiveIns(LiveRegs, *DoneBB); 306 computeAndAddLiveIns(LiveRegs, *StoreBB); 307 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 308 // Do an extra pass around the loop to get loop carried registers right. 309 StoreBB->clearLiveIns(); 310 computeAndAddLiveIns(LiveRegs, *StoreBB); 311 LoadCmpBB->clearLiveIns(); 312 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 313 314 return true; 315 } 316 317 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 318 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 319 MachineBasicBlock::iterator &NextMBBI) { 320 MachineInstr &MI = *MBBI; 321 MIMetadata MIMD(MI); 322 MachineOperand &DestLo = MI.getOperand(0); 323 MachineOperand &DestHi = MI.getOperand(1); 324 Register StatusReg = MI.getOperand(2).getReg(); 325 bool StatusDead = MI.getOperand(2).isDead(); 326 // Duplicating undef operands into 2 instructions does not guarantee the same 327 // value on both; However undef should be replaced by xzr anyway. 328 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 329 Register AddrReg = MI.getOperand(3).getReg(); 330 Register DesiredLoReg = MI.getOperand(4).getReg(); 331 Register DesiredHiReg = MI.getOperand(5).getReg(); 332 Register NewLoReg = MI.getOperand(6).getReg(); 333 Register NewHiReg = MI.getOperand(7).getReg(); 334 335 unsigned LdxpOp, StxpOp; 336 337 switch (MI.getOpcode()) { 338 case AArch64::CMP_SWAP_128_MONOTONIC: 339 LdxpOp = AArch64::LDXPX; 340 StxpOp = AArch64::STXPX; 341 break; 342 case AArch64::CMP_SWAP_128_RELEASE: 343 LdxpOp = AArch64::LDXPX; 344 StxpOp = AArch64::STLXPX; 345 break; 346 case AArch64::CMP_SWAP_128_ACQUIRE: 347 LdxpOp = AArch64::LDAXPX; 348 StxpOp = AArch64::STXPX; 349 break; 350 case AArch64::CMP_SWAP_128: 351 LdxpOp = AArch64::LDAXPX; 352 StxpOp = AArch64::STLXPX; 353 break; 354 default: 355 llvm_unreachable("Unexpected opcode"); 356 } 357 358 MachineFunction *MF = MBB.getParent(); 359 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 360 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 361 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 362 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 363 364 MF->insert(++MBB.getIterator(), LoadCmpBB); 365 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 366 MF->insert(++StoreBB->getIterator(), FailBB); 367 MF->insert(++FailBB->getIterator(), DoneBB); 368 369 // .Lloadcmp: 370 // ldaxp xDestLo, xDestHi, [xAddr] 371 // cmp xDestLo, xDesiredLo 372 // sbcs xDestHi, xDesiredHi 373 // b.ne .Ldone 374 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp)) 375 .addReg(DestLo.getReg(), RegState::Define) 376 .addReg(DestHi.getReg(), RegState::Define) 377 .addReg(AddrReg); 378 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 379 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 380 .addReg(DesiredLoReg) 381 .addImm(0); 382 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 383 .addUse(AArch64::WZR) 384 .addUse(AArch64::WZR) 385 .addImm(AArch64CC::EQ); 386 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 387 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 388 .addReg(DesiredHiReg) 389 .addImm(0); 390 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 391 .addUse(StatusReg, RegState::Kill) 392 .addUse(StatusReg, RegState::Kill) 393 .addImm(AArch64CC::EQ); 394 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW)) 395 .addUse(StatusReg, getKillRegState(StatusDead)) 396 .addMBB(FailBB); 397 LoadCmpBB->addSuccessor(FailBB); 398 LoadCmpBB->addSuccessor(StoreBB); 399 400 // .Lstore: 401 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 402 // cbnz wStatus, .Lloadcmp 403 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg) 404 .addReg(NewLoReg) 405 .addReg(NewHiReg) 406 .addReg(AddrReg); 407 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 408 .addReg(StatusReg, getKillRegState(StatusDead)) 409 .addMBB(LoadCmpBB); 410 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB); 411 StoreBB->addSuccessor(LoadCmpBB); 412 StoreBB->addSuccessor(DoneBB); 413 414 // .Lfail: 415 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 416 // cbnz wStatus, .Lloadcmp 417 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg) 418 .addReg(DestLo.getReg()) 419 .addReg(DestHi.getReg()) 420 .addReg(AddrReg); 421 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW)) 422 .addReg(StatusReg, getKillRegState(StatusDead)) 423 .addMBB(LoadCmpBB); 424 FailBB->addSuccessor(LoadCmpBB); 425 FailBB->addSuccessor(DoneBB); 426 427 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 428 DoneBB->transferSuccessors(&MBB); 429 430 MBB.addSuccessor(LoadCmpBB); 431 432 NextMBBI = MBB.end(); 433 MI.eraseFromParent(); 434 435 // Recompute liveness bottom up. 436 LivePhysRegs LiveRegs; 437 computeAndAddLiveIns(LiveRegs, *DoneBB); 438 computeAndAddLiveIns(LiveRegs, *FailBB); 439 computeAndAddLiveIns(LiveRegs, *StoreBB); 440 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 441 442 // Do an extra pass in the loop to get the loop carried dependencies right. 443 FailBB->clearLiveIns(); 444 computeAndAddLiveIns(LiveRegs, *FailBB); 445 StoreBB->clearLiveIns(); 446 computeAndAddLiveIns(LiveRegs, *StoreBB); 447 LoadCmpBB->clearLiveIns(); 448 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 449 450 return true; 451 } 452 453 /// \brief Expand Pseudos to Instructions with destructive operands. 454 /// 455 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 456 /// or for fixing relaxed register allocation conditions to comply with 457 /// the instructions register constraints. The latter case may be cheaper 458 /// than setting the register constraints in the register allocator, 459 /// since that will insert regular MOV instructions rather than MOVPRFX. 460 /// 461 /// Example (after register allocation): 462 /// 463 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 464 /// 465 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 466 /// * We cannot map directly to FSUB_ZPmZ_B because the register 467 /// constraints of the instruction are not met. 468 /// * Also the _ZERO specifies the false lanes need to be zeroed. 469 /// 470 /// We first try to see if the destructive operand == result operand, 471 /// if not, we try to swap the operands, e.g. 472 /// 473 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 474 /// 475 /// But because FSUB_ZPmZ is not commutative, this is semantically 476 /// different, so we need a reverse instruction: 477 /// 478 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 479 /// 480 /// Then we implement the zeroing of the false lanes of Z0 by adding 481 /// a zeroing MOVPRFX instruction: 482 /// 483 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 484 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 485 /// 486 /// Note that this can only be done for _ZERO or _UNDEF variants where 487 /// we can guarantee the false lanes to be zeroed (by implementing this) 488 /// or that they are undef (don't care / not used), otherwise the 489 /// swapping of operands is illegal because the operation is not 490 /// (or cannot be emulated to be) fully commutative. 491 bool AArch64ExpandPseudo::expand_DestructiveOp( 492 MachineInstr &MI, 493 MachineBasicBlock &MBB, 494 MachineBasicBlock::iterator MBBI) { 495 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 496 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 497 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 498 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 499 Register DstReg = MI.getOperand(0).getReg(); 500 bool DstIsDead = MI.getOperand(0).isDead(); 501 bool UseRev = false; 502 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 503 504 switch (DType) { 505 case AArch64::DestructiveBinaryComm: 506 case AArch64::DestructiveBinaryCommWithRev: 507 if (DstReg == MI.getOperand(3).getReg()) { 508 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 509 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 510 UseRev = true; 511 break; 512 } 513 [[fallthrough]]; 514 case AArch64::DestructiveBinary: 515 case AArch64::DestructiveBinaryImm: 516 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 517 break; 518 case AArch64::DestructiveUnaryPassthru: 519 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 520 break; 521 case AArch64::DestructiveTernaryCommWithRev: 522 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 523 if (DstReg == MI.getOperand(3).getReg()) { 524 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 525 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 526 UseRev = true; 527 } else if (DstReg == MI.getOperand(4).getReg()) { 528 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 529 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 530 UseRev = true; 531 } 532 break; 533 default: 534 llvm_unreachable("Unsupported Destructive Operand type"); 535 } 536 537 // MOVPRFX can only be used if the destination operand 538 // is the destructive operand, not as any other operand, 539 // so the Destructive Operand must be unique. 540 bool DOPRegIsUnique = false; 541 switch (DType) { 542 case AArch64::DestructiveBinary: 543 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg(); 544 break; 545 case AArch64::DestructiveBinaryComm: 546 case AArch64::DestructiveBinaryCommWithRev: 547 DOPRegIsUnique = 548 DstReg != MI.getOperand(DOPIdx).getReg() || 549 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 550 break; 551 case AArch64::DestructiveUnaryPassthru: 552 case AArch64::DestructiveBinaryImm: 553 DOPRegIsUnique = true; 554 break; 555 case AArch64::DestructiveTernaryCommWithRev: 556 DOPRegIsUnique = 557 DstReg != MI.getOperand(DOPIdx).getReg() || 558 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 559 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 560 break; 561 } 562 563 // Resolve the reverse opcode 564 if (UseRev) { 565 int NewOpcode; 566 // e.g. DIV -> DIVR 567 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 568 Opcode = NewOpcode; 569 // e.g. DIVR -> DIV 570 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 571 Opcode = NewOpcode; 572 } 573 574 // Get the right MOVPRFX 575 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 576 unsigned MovPrfx, LSLZero, MovPrfxZero; 577 switch (ElementSize) { 578 case AArch64::ElementSizeNone: 579 case AArch64::ElementSizeB: 580 MovPrfx = AArch64::MOVPRFX_ZZ; 581 LSLZero = AArch64::LSL_ZPmI_B; 582 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 583 break; 584 case AArch64::ElementSizeH: 585 MovPrfx = AArch64::MOVPRFX_ZZ; 586 LSLZero = AArch64::LSL_ZPmI_H; 587 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 588 break; 589 case AArch64::ElementSizeS: 590 MovPrfx = AArch64::MOVPRFX_ZZ; 591 LSLZero = AArch64::LSL_ZPmI_S; 592 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 593 break; 594 case AArch64::ElementSizeD: 595 MovPrfx = AArch64::MOVPRFX_ZZ; 596 LSLZero = AArch64::LSL_ZPmI_D; 597 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 598 break; 599 default: 600 llvm_unreachable("Unsupported ElementSize"); 601 } 602 603 // 604 // Create the destructive operation (if required) 605 // 606 MachineInstrBuilder PRFX, DOP; 607 if (FalseZero) { 608 // If we cannot prefix the requested instruction we'll instead emit a 609 // prefixed_zeroing_mov for DestructiveBinary. 610 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary || 611 DType == AArch64::DestructiveBinaryComm || 612 DType == AArch64::DestructiveBinaryCommWithRev) && 613 "The destructive operand should be unique"); 614 assert(ElementSize != AArch64::ElementSizeNone && 615 "This instruction is unpredicated"); 616 617 // Merge source operand into destination register 618 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 619 .addReg(DstReg, RegState::Define) 620 .addReg(MI.getOperand(PredIdx).getReg()) 621 .addReg(MI.getOperand(DOPIdx).getReg()); 622 623 // After the movprfx, the destructive operand is same as Dst 624 DOPIdx = 0; 625 626 // Create the additional LSL to zero the lanes when the DstReg is not 627 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence 628 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; 629 if ((DType == AArch64::DestructiveBinary || 630 DType == AArch64::DestructiveBinaryComm || 631 DType == AArch64::DestructiveBinaryCommWithRev) && 632 !DOPRegIsUnique) { 633 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) 634 .addReg(DstReg, RegState::Define) 635 .add(MI.getOperand(PredIdx)) 636 .addReg(DstReg) 637 .addImm(0); 638 } 639 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 640 assert(DOPRegIsUnique && "The destructive operand should be unique"); 641 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 642 .addReg(DstReg, RegState::Define) 643 .addReg(MI.getOperand(DOPIdx).getReg()); 644 DOPIdx = 0; 645 } 646 647 // 648 // Create the destructive operation 649 // 650 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 651 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 652 653 switch (DType) { 654 case AArch64::DestructiveUnaryPassthru: 655 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 656 .add(MI.getOperand(PredIdx)) 657 .add(MI.getOperand(SrcIdx)); 658 break; 659 case AArch64::DestructiveBinary: 660 case AArch64::DestructiveBinaryImm: 661 case AArch64::DestructiveBinaryComm: 662 case AArch64::DestructiveBinaryCommWithRev: 663 DOP.add(MI.getOperand(PredIdx)) 664 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 665 .add(MI.getOperand(SrcIdx)); 666 break; 667 case AArch64::DestructiveTernaryCommWithRev: 668 DOP.add(MI.getOperand(PredIdx)) 669 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 670 .add(MI.getOperand(SrcIdx)) 671 .add(MI.getOperand(Src2Idx)); 672 break; 673 } 674 675 if (PRFX) { 676 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 677 transferImpOps(MI, PRFX, DOP); 678 } else 679 transferImpOps(MI, DOP, DOP); 680 681 MI.eraseFromParent(); 682 return true; 683 } 684 685 bool AArch64ExpandPseudo::expandSetTagLoop( 686 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 687 MachineBasicBlock::iterator &NextMBBI) { 688 MachineInstr &MI = *MBBI; 689 DebugLoc DL = MI.getDebugLoc(); 690 Register SizeReg = MI.getOperand(0).getReg(); 691 Register AddressReg = MI.getOperand(1).getReg(); 692 693 MachineFunction *MF = MBB.getParent(); 694 695 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 696 const unsigned OpCode1 = 697 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 698 const unsigned OpCode2 = 699 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 700 701 unsigned Size = MI.getOperand(2).getImm(); 702 assert(Size > 0 && Size % 16 == 0); 703 if (Size % (16 * 2) != 0) { 704 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 705 .addReg(AddressReg) 706 .addReg(AddressReg) 707 .addImm(1); 708 Size -= 16; 709 } 710 MachineBasicBlock::iterator I = 711 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 712 .addImm(Size); 713 expandMOVImm(MBB, I, 64); 714 715 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 716 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 717 718 MF->insert(++MBB.getIterator(), LoopBB); 719 MF->insert(++LoopBB->getIterator(), DoneBB); 720 721 BuildMI(LoopBB, DL, TII->get(OpCode2)) 722 .addDef(AddressReg) 723 .addReg(AddressReg) 724 .addReg(AddressReg) 725 .addImm(2) 726 .cloneMemRefs(MI) 727 .setMIFlags(MI.getFlags()); 728 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri)) 729 .addDef(SizeReg) 730 .addReg(SizeReg) 731 .addImm(16 * 2) 732 .addImm(0); 733 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc)) 734 .addImm(AArch64CC::NE) 735 .addMBB(LoopBB) 736 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 737 738 LoopBB->addSuccessor(LoopBB); 739 LoopBB->addSuccessor(DoneBB); 740 741 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 742 DoneBB->transferSuccessors(&MBB); 743 744 MBB.addSuccessor(LoopBB); 745 746 NextMBBI = MBB.end(); 747 MI.eraseFromParent(); 748 // Recompute liveness bottom up. 749 LivePhysRegs LiveRegs; 750 computeAndAddLiveIns(LiveRegs, *DoneBB); 751 computeAndAddLiveIns(LiveRegs, *LoopBB); 752 // Do an extra pass in the loop to get the loop carried dependencies right. 753 // FIXME: is this necessary? 754 LoopBB->clearLiveIns(); 755 computeAndAddLiveIns(LiveRegs, *LoopBB); 756 DoneBB->clearLiveIns(); 757 computeAndAddLiveIns(LiveRegs, *DoneBB); 758 759 return true; 760 } 761 762 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 763 MachineBasicBlock::iterator MBBI, 764 unsigned Opc, unsigned N) { 765 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI || 766 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) && 767 "Unexpected opcode"); 768 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI) 769 ? RegState::Define 770 : 0; 771 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) 772 ? AArch64::zsub0 773 : AArch64::psub0; 774 const TargetRegisterInfo *TRI = 775 MBB.getParent()->getSubtarget().getRegisterInfo(); 776 MachineInstr &MI = *MBBI; 777 for (unsigned Offset = 0; Offset < N; ++Offset) { 778 int ImmOffset = MI.getOperand(2).getImm() + Offset; 779 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 780 assert(ImmOffset >= -256 && ImmOffset < 256 && 781 "Immediate spill offset out of range"); 782 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 783 .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset), 784 RState) 785 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 786 .addImm(ImmOffset); 787 } 788 MI.eraseFromParent(); 789 return true; 790 } 791 792 // Create a call with the passed opcode and explicit operands, copying over all 793 // the implicit operands from *MBBI, starting at the regmask. 794 static MachineInstr *createCallWithOps(MachineBasicBlock &MBB, 795 MachineBasicBlock::iterator MBBI, 796 const AArch64InstrInfo *TII, 797 unsigned Opcode, 798 ArrayRef<MachineOperand> ExplicitOps, 799 unsigned RegMaskStartIdx) { 800 // Build the MI, with explicit operands first (including the call target). 801 MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode)) 802 .add(ExplicitOps) 803 .getInstr(); 804 805 // Register arguments are added during ISel, but cannot be added as explicit 806 // operands of the branch as it expects to be B <target> which is only one 807 // operand. Instead they are implicit operands used by the branch. 808 while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) { 809 const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx); 810 assert(MOP.isReg() && "can only add register operands"); 811 Call->addOperand(MachineOperand::CreateReg( 812 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false, 813 /*isDead=*/false, /*isUndef=*/MOP.isUndef())); 814 RegMaskStartIdx++; 815 } 816 for (const MachineOperand &MO : 817 llvm::drop_begin(MBBI->operands(), RegMaskStartIdx)) 818 Call->addOperand(MO); 819 820 return Call; 821 } 822 823 // Create a call to CallTarget, copying over all the operands from *MBBI, 824 // starting at the regmask. 825 static MachineInstr *createCall(MachineBasicBlock &MBB, 826 MachineBasicBlock::iterator MBBI, 827 const AArch64InstrInfo *TII, 828 MachineOperand &CallTarget, 829 unsigned RegMaskStartIdx) { 830 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 831 832 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 833 "invalid operand for regular call"); 834 return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx); 835 } 836 837 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 838 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 839 // Expand CALL_RVMARKER pseudo to: 840 // - a branch to the call target, followed by 841 // - the special `mov x29, x29` marker, and 842 // - another branch, to the runtime function 843 // Mark the sequence as bundle, to avoid passes moving other code in between. 844 MachineInstr &MI = *MBBI; 845 MachineOperand &RVTarget = MI.getOperand(0); 846 assert(RVTarget.isGlobal() && "invalid operand for attached call"); 847 848 MachineInstr *OriginalCall = nullptr; 849 850 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) { 851 // ptrauth call. 852 const MachineOperand &CallTarget = MI.getOperand(1); 853 const MachineOperand &Key = MI.getOperand(2); 854 const MachineOperand &IntDisc = MI.getOperand(3); 855 const MachineOperand &AddrDisc = MI.getOperand(4); 856 857 assert((Key.getImm() == AArch64PACKey::IA || 858 Key.getImm() == AArch64PACKey::IB) && 859 "Invalid auth call key"); 860 861 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc}; 862 863 OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops, 864 /*RegMaskStartIdx=*/5); 865 } else { 866 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI"); 867 OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1), 868 // Regmask starts after the RV and call targets. 869 /*RegMaskStartIdx=*/2); 870 } 871 872 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 873 .addReg(AArch64::FP, RegState::Define) 874 .addReg(AArch64::XZR) 875 .addReg(AArch64::FP) 876 .addImm(0); 877 878 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) 879 .add(RVTarget) 880 .getInstr(); 881 882 if (MI.shouldUpdateAdditionalCallInfo()) 883 MBB.getParent()->moveAdditionalCallInfo(&MI, OriginalCall); 884 885 MI.eraseFromParent(); 886 finalizeBundle(MBB, OriginalCall->getIterator(), 887 std::next(RVCall->getIterator())); 888 return true; 889 } 890 891 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, 892 MachineBasicBlock::iterator MBBI) { 893 // Expand CALL_BTI pseudo to: 894 // - a branch to the call target 895 // - a BTI instruction 896 // Mark the sequence as a bundle, to avoid passes moving other code in 897 // between. 898 MachineInstr &MI = *MBBI; 899 MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0), 900 // Regmask starts after the call target. 901 /*RegMaskStartIdx=*/1); 902 903 Call->setCFIType(*MBB.getParent(), MI.getCFIType()); 904 905 MachineInstr *BTI = 906 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) 907 // BTI J so that setjmp can to BR to this. 908 .addImm(36) 909 .getInstr(); 910 911 if (MI.shouldUpdateAdditionalCallInfo()) 912 MBB.getParent()->moveAdditionalCallInfo(&MI, Call); 913 914 MI.eraseFromParent(); 915 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); 916 return true; 917 } 918 919 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 920 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 921 Register CtxReg = MBBI->getOperand(0).getReg(); 922 Register BaseReg = MBBI->getOperand(1).getReg(); 923 int Offset = MBBI->getOperand(2).getImm(); 924 DebugLoc DL(MBBI->getDebugLoc()); 925 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 926 927 if (STI.getTargetTriple().getArchName() != "arm64e") { 928 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 929 .addUse(CtxReg) 930 .addUse(BaseReg) 931 .addImm(Offset / 8) 932 .setMIFlag(MachineInstr::FrameSetup); 933 MBBI->eraseFromParent(); 934 return true; 935 } 936 937 // We need to sign the context in an address-discriminated way. 0xc31a is a 938 // fixed random value, chosen as part of the ABI. 939 // add x16, xBase, #Offset 940 // movk x16, #0xc31a, lsl #48 941 // mov x17, x22/xzr 942 // pacdb x17, x16 943 // str x17, [xBase, #Offset] 944 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 945 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 946 .addUse(BaseReg) 947 .addImm(abs(Offset)) 948 .addImm(0) 949 .setMIFlag(MachineInstr::FrameSetup); 950 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 951 .addUse(AArch64::X16) 952 .addImm(0xc31a) 953 .addImm(48) 954 .setMIFlag(MachineInstr::FrameSetup); 955 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 956 // move it somewhere before signing. 957 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 958 .addUse(AArch64::XZR) 959 .addUse(CtxReg) 960 .addImm(0) 961 .setMIFlag(MachineInstr::FrameSetup); 962 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 963 .addUse(AArch64::X17) 964 .addUse(AArch64::X16) 965 .setMIFlag(MachineInstr::FrameSetup); 966 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 967 .addUse(AArch64::X17) 968 .addUse(BaseReg) 969 .addImm(Offset / 8) 970 .setMIFlag(MachineInstr::FrameSetup); 971 972 MBBI->eraseFromParent(); 973 return true; 974 } 975 976 MachineBasicBlock * 977 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB, 978 MachineBasicBlock::iterator MBBI) { 979 MachineInstr &MI = *MBBI; 980 assert((std::next(MBBI) != MBB.end() || 981 MI.getParent()->successors().begin() != 982 MI.getParent()->successors().end()) && 983 "Unexpected unreachable in block that restores ZA"); 984 985 // Compare TPIDR2_EL0 value against 0. 986 DebugLoc DL = MI.getDebugLoc(); 987 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)) 988 .add(MI.getOperand(0)); 989 990 // Split MBB and create two new blocks: 991 // - MBB now contains all instructions before RestoreZAPseudo. 992 // - SMBB contains the RestoreZAPseudo instruction only. 993 // - EndBB contains all instructions after RestoreZAPseudo. 994 MachineInstr &PrevMI = *std::prev(MBBI); 995 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 996 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 997 ? *SMBB->successors().begin() 998 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 999 1000 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 1001 Cbz.addMBB(SMBB); 1002 BuildMI(&MBB, DL, TII->get(AArch64::B)) 1003 .addMBB(EndBB); 1004 MBB.addSuccessor(EndBB); 1005 1006 // Replace the pseudo with a call (BL). 1007 MachineInstrBuilder MIB = 1008 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL)); 1009 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit); 1010 for (unsigned I = 2; I < MI.getNumOperands(); ++I) 1011 MIB.add(MI.getOperand(I)); 1012 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 1013 1014 MI.eraseFromParent(); 1015 return EndBB; 1016 } 1017 1018 MachineBasicBlock * 1019 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, 1020 MachineBasicBlock::iterator MBBI) { 1021 MachineInstr &MI = *MBBI; 1022 // In the case of a smstart/smstop before a unreachable, just remove the pseudo. 1023 // Exception handling code generated by Clang may introduce unreachables and it 1024 // seems unnecessary to restore pstate.sm when that happens. Note that it is 1025 // not just an optimisation, the code below expects a successor instruction/block 1026 // in order to split the block at MBBI. 1027 if (std::next(MBBI) == MBB.end() && 1028 MI.getParent()->successors().begin() == 1029 MI.getParent()->successors().end()) { 1030 MI.eraseFromParent(); 1031 return &MBB; 1032 } 1033 1034 // Expand the pseudo into smstart or smstop instruction. The pseudo has the 1035 // following operands: 1036 // 1037 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask> 1038 // 1039 // The pseudo is expanded into a conditional smstart/smstop, with a 1040 // check if pstate.sm (register) equals the expected value, and if not, 1041 // invokes the smstart/smstop. 1042 // 1043 // As an example, the following block contains a normal call from a 1044 // streaming-compatible function: 1045 // 1046 // OrigBB: 1047 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP 1048 // bl @normal_callee 1049 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART 1050 // 1051 // ...which will be transformed into: 1052 // 1053 // OrigBB: 1054 // TBNZx %0:gpr64, 0, SMBB 1055 // b EndBB 1056 // 1057 // SMBB: 1058 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP 1059 // 1060 // EndBB: 1061 // bl @normal_callee 1062 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART 1063 // 1064 DebugLoc DL = MI.getDebugLoc(); 1065 1066 // Create the conditional branch based on the third operand of the 1067 // instruction, which tells us if we are wrapping a normal or streaming 1068 // function. 1069 // We test the live value of pstate.sm and toggle pstate.sm if this is not the 1070 // expected value for the callee (0 for a normal callee and 1 for a streaming 1071 // callee). 1072 unsigned Opc; 1073 switch (MI.getOperand(2).getImm()) { 1074 case AArch64SME::Always: 1075 llvm_unreachable("Should have matched to instruction directly"); 1076 case AArch64SME::IfCallerIsStreaming: 1077 Opc = AArch64::TBNZW; 1078 break; 1079 case AArch64SME::IfCallerIsNonStreaming: 1080 Opc = AArch64::TBZW; 1081 break; 1082 } 1083 auto PStateSM = MI.getOperand(3).getReg(); 1084 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1085 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32); 1086 MachineInstrBuilder Tbx = 1087 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0); 1088 1089 // Split MBB and create two new blocks: 1090 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. 1091 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. 1092 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. 1093 MachineInstr &PrevMI = *std::prev(MBBI); 1094 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 1095 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 1096 ? *SMBB->successors().begin() 1097 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 1098 1099 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 1100 Tbx.addMBB(SMBB); 1101 BuildMI(&MBB, DL, TII->get(AArch64::B)) 1102 .addMBB(EndBB); 1103 MBB.addSuccessor(EndBB); 1104 1105 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. 1106 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(), 1107 TII->get(AArch64::MSRpstatesvcrImm1)); 1108 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as 1109 // these contain the CopyFromReg for the first argument and the flag to 1110 // indicate whether the callee is streaming or normal). 1111 MIB.add(MI.getOperand(0)); 1112 MIB.add(MI.getOperand(1)); 1113 for (unsigned i = 4; i < MI.getNumOperands(); ++i) 1114 MIB.add(MI.getOperand(i)); 1115 1116 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 1117 1118 MI.eraseFromParent(); 1119 return EndBB; 1120 } 1121 1122 bool AArch64ExpandPseudo::expandMultiVecPseudo( 1123 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1124 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass, 1125 unsigned ContiguousOp, unsigned StridedOpc) { 1126 MachineInstr &MI = *MBBI; 1127 Register Tuple = MI.getOperand(0).getReg(); 1128 1129 auto ContiguousRange = ContiguousClass.getRegisters(); 1130 auto StridedRange = StridedClass.getRegisters(); 1131 unsigned Opc; 1132 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) { 1133 Opc = ContiguousOp; 1134 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) { 1135 Opc = StridedOpc; 1136 } else 1137 llvm_unreachable("Cannot expand Multi-Vector pseudo"); 1138 1139 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 1140 .add(MI.getOperand(0)) 1141 .add(MI.getOperand(1)) 1142 .add(MI.getOperand(2)) 1143 .add(MI.getOperand(3)); 1144 transferImpOps(MI, MIB, MIB); 1145 MI.eraseFromParent(); 1146 return true; 1147 } 1148 1149 bool AArch64ExpandPseudo::expandFormTuplePseudo( 1150 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1151 MachineBasicBlock::iterator &NextMBBI, unsigned Size) { 1152 assert((Size == 2 || Size == 4) && "Invalid Tuple Size"); 1153 MachineInstr &MI = *MBBI; 1154 Register ReturnTuple = MI.getOperand(0).getReg(); 1155 1156 const TargetRegisterInfo *TRI = 1157 MBB.getParent()->getSubtarget().getRegisterInfo(); 1158 for (unsigned I = 0; I < Size; ++I) { 1159 Register FormTupleOpReg = MI.getOperand(I + 1).getReg(); 1160 Register ReturnTupleSubReg = 1161 TRI->getSubReg(ReturnTuple, AArch64::zsub0 + I); 1162 // Add copies to ensure the subregisters remain in the correct order 1163 // for any contigious operation they are used by. 1164 if (FormTupleOpReg != ReturnTupleSubReg) 1165 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_ZZZ)) 1166 .addReg(ReturnTupleSubReg, RegState::Define) 1167 .addReg(FormTupleOpReg) 1168 .addReg(FormTupleOpReg); 1169 } 1170 1171 MI.eraseFromParent(); 1172 return true; 1173 } 1174 1175 /// If MBBI references a pseudo instruction that should be expanded here, 1176 /// do the expansion and return true. Otherwise return false. 1177 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 1178 MachineBasicBlock::iterator MBBI, 1179 MachineBasicBlock::iterator &NextMBBI) { 1180 MachineInstr &MI = *MBBI; 1181 unsigned Opcode = MI.getOpcode(); 1182 1183 // Check if we can expand the destructive op 1184 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 1185 if (OrigInstr != -1) { 1186 auto &Orig = TII->get(OrigInstr); 1187 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != 1188 AArch64::NotDestructive) { 1189 return expand_DestructiveOp(MI, MBB, MBBI); 1190 } 1191 } 1192 1193 switch (Opcode) { 1194 default: 1195 break; 1196 1197 case AArch64::BSPv8i8: 1198 case AArch64::BSPv16i8: { 1199 Register DstReg = MI.getOperand(0).getReg(); 1200 if (DstReg == MI.getOperand(3).getReg()) { 1201 // Expand to BIT 1202 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1203 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 1204 : AArch64::BITv16i8)) 1205 .add(MI.getOperand(0)) 1206 .add(MI.getOperand(3)) 1207 .add(MI.getOperand(2)) 1208 .add(MI.getOperand(1)); 1209 } else if (DstReg == MI.getOperand(2).getReg()) { 1210 // Expand to BIF 1211 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1212 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 1213 : AArch64::BIFv16i8)) 1214 .add(MI.getOperand(0)) 1215 .add(MI.getOperand(2)) 1216 .add(MI.getOperand(3)) 1217 .add(MI.getOperand(1)); 1218 } else { 1219 // Expand to BSL, use additional move if required 1220 if (DstReg == MI.getOperand(1).getReg()) { 1221 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1222 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1223 : AArch64::BSLv16i8)) 1224 .add(MI.getOperand(0)) 1225 .add(MI.getOperand(1)) 1226 .add(MI.getOperand(2)) 1227 .add(MI.getOperand(3)); 1228 } else { 1229 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1230 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 1231 : AArch64::ORRv16i8)) 1232 .addReg(DstReg, 1233 RegState::Define | 1234 getRenamableRegState(MI.getOperand(0).isRenamable())) 1235 .add(MI.getOperand(1)) 1236 .add(MI.getOperand(1)); 1237 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1238 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1239 : AArch64::BSLv16i8)) 1240 .add(MI.getOperand(0)) 1241 .addReg(DstReg, 1242 RegState::Kill | 1243 getRenamableRegState(MI.getOperand(0).isRenamable())) 1244 .add(MI.getOperand(2)) 1245 .add(MI.getOperand(3)); 1246 } 1247 } 1248 MI.eraseFromParent(); 1249 return true; 1250 } 1251 1252 case AArch64::ADDWrr: 1253 case AArch64::SUBWrr: 1254 case AArch64::ADDXrr: 1255 case AArch64::SUBXrr: 1256 case AArch64::ADDSWrr: 1257 case AArch64::SUBSWrr: 1258 case AArch64::ADDSXrr: 1259 case AArch64::SUBSXrr: 1260 case AArch64::ANDWrr: 1261 case AArch64::ANDXrr: 1262 case AArch64::BICWrr: 1263 case AArch64::BICXrr: 1264 case AArch64::ANDSWrr: 1265 case AArch64::ANDSXrr: 1266 case AArch64::BICSWrr: 1267 case AArch64::BICSXrr: 1268 case AArch64::EONWrr: 1269 case AArch64::EONXrr: 1270 case AArch64::EORWrr: 1271 case AArch64::EORXrr: 1272 case AArch64::ORNWrr: 1273 case AArch64::ORNXrr: 1274 case AArch64::ORRWrr: 1275 case AArch64::ORRXrr: { 1276 unsigned Opcode; 1277 switch (MI.getOpcode()) { 1278 default: 1279 return false; 1280 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 1281 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 1282 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 1283 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 1284 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 1285 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 1286 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 1287 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 1288 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 1289 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 1290 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 1291 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 1292 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 1293 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 1294 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 1295 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 1296 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 1297 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 1298 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 1299 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 1300 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 1301 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 1302 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 1303 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 1304 } 1305 MachineFunction &MF = *MBB.getParent(); 1306 // Try to create new inst without implicit operands added. 1307 MachineInstr *NewMI = MF.CreateMachineInstr( 1308 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); 1309 MBB.insert(MBBI, NewMI); 1310 MachineInstrBuilder MIB1(MF, NewMI); 1311 MIB1->setPCSections(MF, MI.getPCSections()); 1312 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) 1313 .add(MI.getOperand(1)) 1314 .add(MI.getOperand(2)) 1315 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1316 transferImpOps(MI, MIB1, MIB1); 1317 if (auto DebugNumber = MI.peekDebugInstrNum()) 1318 NewMI->setDebugInstrNum(DebugNumber); 1319 MI.eraseFromParent(); 1320 return true; 1321 } 1322 1323 case AArch64::LOADgot: { 1324 MachineFunction *MF = MBB.getParent(); 1325 Register DstReg = MI.getOperand(0).getReg(); 1326 const MachineOperand &MO1 = MI.getOperand(1); 1327 unsigned Flags = MO1.getTargetFlags(); 1328 1329 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 1330 // Tiny codemodel expand to LDR 1331 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1332 TII->get(AArch64::LDRXl), DstReg); 1333 1334 if (MO1.isGlobal()) { 1335 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 1336 } else if (MO1.isSymbol()) { 1337 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 1338 } else { 1339 assert(MO1.isCPI() && 1340 "Only expect globals, externalsymbols, or constant pools"); 1341 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 1342 } 1343 } else { 1344 // Small codemodel expand into ADRP + LDR. 1345 MachineFunction &MF = *MI.getParent()->getParent(); 1346 DebugLoc DL = MI.getDebugLoc(); 1347 MachineInstrBuilder MIB1 = 1348 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 1349 1350 MachineInstrBuilder MIB2; 1351 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 1352 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1353 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 1354 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 1355 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 1356 .addDef(Reg32) 1357 .addReg(DstReg, RegState::Kill) 1358 .addReg(DstReg, DstFlags | RegState::Implicit); 1359 } else { 1360 Register DstReg = MI.getOperand(0).getReg(); 1361 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 1362 .add(MI.getOperand(0)) 1363 .addUse(DstReg, RegState::Kill); 1364 } 1365 1366 if (MO1.isGlobal()) { 1367 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 1368 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 1369 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1370 } else if (MO1.isSymbol()) { 1371 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1372 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1373 AArch64II::MO_PAGEOFF | 1374 AArch64II::MO_NC); 1375 } else { 1376 assert(MO1.isCPI() && 1377 "Only expect globals, externalsymbols, or constant pools"); 1378 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1379 Flags | AArch64II::MO_PAGE); 1380 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1381 Flags | AArch64II::MO_PAGEOFF | 1382 AArch64II::MO_NC); 1383 } 1384 1385 transferImpOps(MI, MIB1, MIB2); 1386 } 1387 MI.eraseFromParent(); 1388 return true; 1389 } 1390 case AArch64::MOVaddrBA: { 1391 MachineFunction &MF = *MI.getParent()->getParent(); 1392 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1393 // blockaddress expressions have to come from a constant pool because the 1394 // largest addend (and hence offset within a function) allowed for ADRP is 1395 // only 8MB. 1396 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1397 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1398 1399 MachineConstantPool *MCP = MF.getConstantPool(); 1400 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1401 1402 Register DstReg = MI.getOperand(0).getReg(); 1403 auto MIB1 = 1404 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1405 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1406 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1407 TII->get(AArch64::LDRXui), DstReg) 1408 .addUse(DstReg) 1409 .addConstantPoolIndex( 1410 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1411 transferImpOps(MI, MIB1, MIB2); 1412 MI.eraseFromParent(); 1413 return true; 1414 } 1415 } 1416 [[fallthrough]]; 1417 case AArch64::MOVaddr: 1418 case AArch64::MOVaddrJT: 1419 case AArch64::MOVaddrCP: 1420 case AArch64::MOVaddrTLS: 1421 case AArch64::MOVaddrEXT: { 1422 // Expand into ADRP + ADD. 1423 Register DstReg = MI.getOperand(0).getReg(); 1424 assert(DstReg != AArch64::XZR); 1425 MachineInstrBuilder MIB1 = 1426 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1427 .add(MI.getOperand(1)); 1428 1429 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1430 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1431 // We do so by creating a MOVK that sets bits 48-63 of the register to 1432 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1433 // the small code model so we can assume a binary size of <= 4GB, which 1434 // makes the untagged PC relative offset positive. The binary must also be 1435 // loaded into address range [0, 2^48). Both of these properties need to 1436 // be ensured at runtime when using tagged addresses. 1437 auto Tag = MI.getOperand(1); 1438 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1439 Tag.setOffset(0x100000000); 1440 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1441 .addReg(DstReg) 1442 .add(Tag) 1443 .addImm(48); 1444 } 1445 1446 MachineInstrBuilder MIB2 = 1447 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1448 .add(MI.getOperand(0)) 1449 .addReg(DstReg) 1450 .add(MI.getOperand(2)) 1451 .addImm(0); 1452 1453 transferImpOps(MI, MIB1, MIB2); 1454 MI.eraseFromParent(); 1455 return true; 1456 } 1457 case AArch64::ADDlowTLS: 1458 // Produce a plain ADD 1459 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1460 .add(MI.getOperand(0)) 1461 .add(MI.getOperand(1)) 1462 .add(MI.getOperand(2)) 1463 .addImm(0); 1464 MI.eraseFromParent(); 1465 return true; 1466 1467 case AArch64::MOVbaseTLS: { 1468 Register DstReg = MI.getOperand(0).getReg(); 1469 auto SysReg = AArch64SysReg::TPIDR_EL0; 1470 MachineFunction *MF = MBB.getParent(); 1471 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1472 SysReg = AArch64SysReg::TPIDR_EL3; 1473 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1474 SysReg = AArch64SysReg::TPIDR_EL2; 1475 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1476 SysReg = AArch64SysReg::TPIDR_EL1; 1477 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP()) 1478 SysReg = AArch64SysReg::TPIDRRO_EL0; 1479 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1480 .addImm(SysReg); 1481 MI.eraseFromParent(); 1482 return true; 1483 } 1484 1485 case AArch64::MOVi32imm: 1486 return expandMOVImm(MBB, MBBI, 32); 1487 case AArch64::MOVi64imm: 1488 return expandMOVImm(MBB, MBBI, 64); 1489 case AArch64::RET_ReallyLR: { 1490 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1491 // function and missing live-ins. We are fine in practice because callee 1492 // saved register handling ensures the register value is restored before 1493 // RET, but we need the undef flag here to appease the MachineVerifier 1494 // liveness checks. 1495 MachineInstrBuilder MIB = 1496 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1497 .addReg(AArch64::LR, RegState::Undef); 1498 transferImpOps(MI, MIB, MIB); 1499 MI.eraseFromParent(); 1500 return true; 1501 } 1502 case AArch64::CMP_SWAP_8: 1503 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1504 AArch64::SUBSWrx, 1505 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1506 AArch64::WZR, NextMBBI); 1507 case AArch64::CMP_SWAP_16: 1508 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1509 AArch64::SUBSWrx, 1510 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1511 AArch64::WZR, NextMBBI); 1512 case AArch64::CMP_SWAP_32: 1513 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1514 AArch64::SUBSWrs, 1515 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1516 AArch64::WZR, NextMBBI); 1517 case AArch64::CMP_SWAP_64: 1518 return expandCMP_SWAP(MBB, MBBI, 1519 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1520 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1521 AArch64::XZR, NextMBBI); 1522 case AArch64::CMP_SWAP_128: 1523 case AArch64::CMP_SWAP_128_RELEASE: 1524 case AArch64::CMP_SWAP_128_ACQUIRE: 1525 case AArch64::CMP_SWAP_128_MONOTONIC: 1526 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1527 1528 case AArch64::AESMCrrTied: 1529 case AArch64::AESIMCrrTied: { 1530 MachineInstrBuilder MIB = 1531 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1532 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1533 AArch64::AESIMCrr)) 1534 .add(MI.getOperand(0)) 1535 .add(MI.getOperand(1)); 1536 transferImpOps(MI, MIB, MIB); 1537 MI.eraseFromParent(); 1538 return true; 1539 } 1540 case AArch64::IRGstack: { 1541 MachineFunction &MF = *MBB.getParent(); 1542 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1543 const AArch64FrameLowering *TFI = 1544 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1545 1546 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1547 // almost always point to SP-after-prologue; if not, emit a longer 1548 // instruction sequence. 1549 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1550 Register FrameReg; 1551 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1552 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1553 /*PreferFP=*/false, 1554 /*ForSimm=*/true); 1555 Register SrcReg = FrameReg; 1556 if (FrameRegOffset) { 1557 // Use output register as temporary. 1558 SrcReg = MI.getOperand(0).getReg(); 1559 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1560 FrameRegOffset, TII); 1561 } 1562 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1563 .add(MI.getOperand(0)) 1564 .addUse(SrcReg) 1565 .add(MI.getOperand(2)); 1566 MI.eraseFromParent(); 1567 return true; 1568 } 1569 case AArch64::TAGPstack: { 1570 int64_t Offset = MI.getOperand(2).getImm(); 1571 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1572 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1573 .add(MI.getOperand(0)) 1574 .add(MI.getOperand(1)) 1575 .addImm(std::abs(Offset)) 1576 .add(MI.getOperand(4)); 1577 MI.eraseFromParent(); 1578 return true; 1579 } 1580 case AArch64::STGloop_wback: 1581 case AArch64::STZGloop_wback: 1582 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1583 case AArch64::STGloop: 1584 case AArch64::STZGloop: 1585 report_fatal_error( 1586 "Non-writeback variants of STGloop / STZGloop should not " 1587 "survive past PrologEpilogInserter."); 1588 case AArch64::STR_ZZZZXI: 1589 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1590 case AArch64::STR_ZZZXI: 1591 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1592 case AArch64::STR_ZZXI: 1593 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1594 case AArch64::STR_PPXI: 1595 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); 1596 case AArch64::LDR_ZZZZXI: 1597 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1598 case AArch64::LDR_ZZZXI: 1599 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1600 case AArch64::LDR_ZZXI: 1601 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1602 case AArch64::LDR_PPXI: 1603 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); 1604 case AArch64::BLR_RVMARKER: 1605 case AArch64::BLRA_RVMARKER: 1606 return expandCALL_RVMARKER(MBB, MBBI); 1607 case AArch64::BLR_BTI: 1608 return expandCALL_BTI(MBB, MBBI); 1609 case AArch64::StoreSwiftAsyncContext: 1610 return expandStoreSwiftAsyncContext(MBB, MBBI); 1611 case AArch64::RestoreZAPseudo: { 1612 auto *NewMBB = expandRestoreZA(MBB, MBBI); 1613 if (NewMBB != &MBB) 1614 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1615 return true; 1616 } 1617 case AArch64::MSRpstatePseudo: { 1618 auto *NewMBB = expandCondSMToggle(MBB, MBBI); 1619 if (NewMBB != &MBB) 1620 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1621 return true; 1622 } 1623 case AArch64::COALESCER_BARRIER_FPR16: 1624 case AArch64::COALESCER_BARRIER_FPR32: 1625 case AArch64::COALESCER_BARRIER_FPR64: 1626 case AArch64::COALESCER_BARRIER_FPR128: 1627 MI.eraseFromParent(); 1628 return true; 1629 case AArch64::LD1B_2Z_IMM_PSEUDO: 1630 return expandMultiVecPseudo( 1631 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1632 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM); 1633 case AArch64::LD1H_2Z_IMM_PSEUDO: 1634 return expandMultiVecPseudo( 1635 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1636 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM); 1637 case AArch64::LD1W_2Z_IMM_PSEUDO: 1638 return expandMultiVecPseudo( 1639 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1640 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM); 1641 case AArch64::LD1D_2Z_IMM_PSEUDO: 1642 return expandMultiVecPseudo( 1643 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1644 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM); 1645 case AArch64::LDNT1B_2Z_IMM_PSEUDO: 1646 return expandMultiVecPseudo( 1647 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1648 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM); 1649 case AArch64::LDNT1H_2Z_IMM_PSEUDO: 1650 return expandMultiVecPseudo( 1651 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1652 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM); 1653 case AArch64::LDNT1W_2Z_IMM_PSEUDO: 1654 return expandMultiVecPseudo( 1655 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1656 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM); 1657 case AArch64::LDNT1D_2Z_IMM_PSEUDO: 1658 return expandMultiVecPseudo( 1659 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1660 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM); 1661 case AArch64::LD1B_2Z_PSEUDO: 1662 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1663 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z, 1664 AArch64::LD1B_2Z_STRIDED); 1665 case AArch64::LD1H_2Z_PSEUDO: 1666 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1667 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z, 1668 AArch64::LD1H_2Z_STRIDED); 1669 case AArch64::LD1W_2Z_PSEUDO: 1670 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1671 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z, 1672 AArch64::LD1W_2Z_STRIDED); 1673 case AArch64::LD1D_2Z_PSEUDO: 1674 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1675 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z, 1676 AArch64::LD1D_2Z_STRIDED); 1677 case AArch64::LDNT1B_2Z_PSEUDO: 1678 return expandMultiVecPseudo( 1679 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1680 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED); 1681 case AArch64::LDNT1H_2Z_PSEUDO: 1682 return expandMultiVecPseudo( 1683 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1684 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED); 1685 case AArch64::LDNT1W_2Z_PSEUDO: 1686 return expandMultiVecPseudo( 1687 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1688 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED); 1689 case AArch64::LDNT1D_2Z_PSEUDO: 1690 return expandMultiVecPseudo( 1691 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1692 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED); 1693 case AArch64::LD1B_4Z_IMM_PSEUDO: 1694 return expandMultiVecPseudo( 1695 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1696 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM); 1697 case AArch64::LD1H_4Z_IMM_PSEUDO: 1698 return expandMultiVecPseudo( 1699 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1700 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM); 1701 case AArch64::LD1W_4Z_IMM_PSEUDO: 1702 return expandMultiVecPseudo( 1703 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1704 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM); 1705 case AArch64::LD1D_4Z_IMM_PSEUDO: 1706 return expandMultiVecPseudo( 1707 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1708 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM); 1709 case AArch64::LDNT1B_4Z_IMM_PSEUDO: 1710 return expandMultiVecPseudo( 1711 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1712 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM); 1713 case AArch64::LDNT1H_4Z_IMM_PSEUDO: 1714 return expandMultiVecPseudo( 1715 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1716 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM); 1717 case AArch64::LDNT1W_4Z_IMM_PSEUDO: 1718 return expandMultiVecPseudo( 1719 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1720 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM); 1721 case AArch64::LDNT1D_4Z_IMM_PSEUDO: 1722 return expandMultiVecPseudo( 1723 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1724 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM); 1725 case AArch64::LD1B_4Z_PSEUDO: 1726 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1727 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z, 1728 AArch64::LD1B_4Z_STRIDED); 1729 case AArch64::LD1H_4Z_PSEUDO: 1730 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1731 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z, 1732 AArch64::LD1H_4Z_STRIDED); 1733 case AArch64::LD1W_4Z_PSEUDO: 1734 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1735 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z, 1736 AArch64::LD1W_4Z_STRIDED); 1737 case AArch64::LD1D_4Z_PSEUDO: 1738 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1739 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z, 1740 AArch64::LD1D_4Z_STRIDED); 1741 case AArch64::LDNT1B_4Z_PSEUDO: 1742 return expandMultiVecPseudo( 1743 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1744 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED); 1745 case AArch64::LDNT1H_4Z_PSEUDO: 1746 return expandMultiVecPseudo( 1747 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1748 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED); 1749 case AArch64::LDNT1W_4Z_PSEUDO: 1750 return expandMultiVecPseudo( 1751 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1752 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED); 1753 case AArch64::LDNT1D_4Z_PSEUDO: 1754 return expandMultiVecPseudo( 1755 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1756 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED); 1757 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO: 1758 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 2); 1759 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO: 1760 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 4); 1761 } 1762 return false; 1763 } 1764 1765 /// Iterate over the instructions in basic block MBB and expand any 1766 /// pseudo instructions. Return true if anything was modified. 1767 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1768 bool Modified = false; 1769 1770 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1771 while (MBBI != E) { 1772 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1773 Modified |= expandMI(MBB, MBBI, NMBBI); 1774 MBBI = NMBBI; 1775 } 1776 1777 return Modified; 1778 } 1779 1780 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1781 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1782 1783 bool Modified = false; 1784 for (auto &MBB : MF) 1785 Modified |= expandMBB(MBB); 1786 return Modified; 1787 } 1788 1789 /// Returns an instance of the pseudo instruction expansion pass. 1790 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1791 return new AArch64ExpandPseudo(); 1792 } 1793