1 //===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H 14 #define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H 15 16 #include "AArch64.h" 17 #include "AArch64RegisterInfo.h" 18 #include "llvm/CodeGen/TargetInstrInfo.h" 19 #include "llvm/Support/TypeSize.h" 20 #include <optional> 21 22 #define GET_INSTRINFO_HEADER 23 #include "AArch64GenInstrInfo.inc" 24 25 namespace llvm { 26 27 class AArch64Subtarget; 28 29 static const MachineMemOperand::Flags MOSuppressPair = 30 MachineMemOperand::MOTargetFlag1; 31 static const MachineMemOperand::Flags MOStridedAccess = 32 MachineMemOperand::MOTargetFlag2; 33 34 #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" 35 36 // AArch64 MachineCombiner patterns 37 enum AArch64MachineCombinerPattern : unsigned { 38 // These are patterns used to reduce the length of dependence chain. 39 SUBADD_OP1 = MachineCombinerPattern::TARGET_PATTERN_START, 40 SUBADD_OP2, 41 42 // These are multiply-add patterns matched by the AArch64 machine combiner. 43 MULADDW_OP1, 44 MULADDW_OP2, 45 MULSUBW_OP1, 46 MULSUBW_OP2, 47 MULADDWI_OP1, 48 MULSUBWI_OP1, 49 MULADDX_OP1, 50 MULADDX_OP2, 51 MULSUBX_OP1, 52 MULSUBX_OP2, 53 MULADDXI_OP1, 54 MULSUBXI_OP1, 55 // NEON integers vectors 56 MULADDv8i8_OP1, 57 MULADDv8i8_OP2, 58 MULADDv16i8_OP1, 59 MULADDv16i8_OP2, 60 MULADDv4i16_OP1, 61 MULADDv4i16_OP2, 62 MULADDv8i16_OP1, 63 MULADDv8i16_OP2, 64 MULADDv2i32_OP1, 65 MULADDv2i32_OP2, 66 MULADDv4i32_OP1, 67 MULADDv4i32_OP2, 68 69 MULSUBv8i8_OP1, 70 MULSUBv8i8_OP2, 71 MULSUBv16i8_OP1, 72 MULSUBv16i8_OP2, 73 MULSUBv4i16_OP1, 74 MULSUBv4i16_OP2, 75 MULSUBv8i16_OP1, 76 MULSUBv8i16_OP2, 77 MULSUBv2i32_OP1, 78 MULSUBv2i32_OP2, 79 MULSUBv4i32_OP1, 80 MULSUBv4i32_OP2, 81 82 MULADDv4i16_indexed_OP1, 83 MULADDv4i16_indexed_OP2, 84 MULADDv8i16_indexed_OP1, 85 MULADDv8i16_indexed_OP2, 86 MULADDv2i32_indexed_OP1, 87 MULADDv2i32_indexed_OP2, 88 MULADDv4i32_indexed_OP1, 89 MULADDv4i32_indexed_OP2, 90 91 MULSUBv4i16_indexed_OP1, 92 MULSUBv4i16_indexed_OP2, 93 MULSUBv8i16_indexed_OP1, 94 MULSUBv8i16_indexed_OP2, 95 MULSUBv2i32_indexed_OP1, 96 MULSUBv2i32_indexed_OP2, 97 MULSUBv4i32_indexed_OP1, 98 MULSUBv4i32_indexed_OP2, 99 100 // Floating Point 101 FMULADDH_OP1, 102 FMULADDH_OP2, 103 FMULSUBH_OP1, 104 FMULSUBH_OP2, 105 FMULADDS_OP1, 106 FMULADDS_OP2, 107 FMULSUBS_OP1, 108 FMULSUBS_OP2, 109 FMULADDD_OP1, 110 FMULADDD_OP2, 111 FMULSUBD_OP1, 112 FMULSUBD_OP2, 113 FNMULSUBH_OP1, 114 FNMULSUBS_OP1, 115 FNMULSUBD_OP1, 116 FMLAv1i32_indexed_OP1, 117 FMLAv1i32_indexed_OP2, 118 FMLAv1i64_indexed_OP1, 119 FMLAv1i64_indexed_OP2, 120 FMLAv4f16_OP1, 121 FMLAv4f16_OP2, 122 FMLAv8f16_OP1, 123 FMLAv8f16_OP2, 124 FMLAv2f32_OP2, 125 FMLAv2f32_OP1, 126 FMLAv2f64_OP1, 127 FMLAv2f64_OP2, 128 FMLAv4i16_indexed_OP1, 129 FMLAv4i16_indexed_OP2, 130 FMLAv8i16_indexed_OP1, 131 FMLAv8i16_indexed_OP2, 132 FMLAv2i32_indexed_OP1, 133 FMLAv2i32_indexed_OP2, 134 FMLAv2i64_indexed_OP1, 135 FMLAv2i64_indexed_OP2, 136 FMLAv4f32_OP1, 137 FMLAv4f32_OP2, 138 FMLAv4i32_indexed_OP1, 139 FMLAv4i32_indexed_OP2, 140 FMLSv1i32_indexed_OP2, 141 FMLSv1i64_indexed_OP2, 142 FMLSv4f16_OP1, 143 FMLSv4f16_OP2, 144 FMLSv8f16_OP1, 145 FMLSv8f16_OP2, 146 FMLSv2f32_OP1, 147 FMLSv2f32_OP2, 148 FMLSv2f64_OP1, 149 FMLSv2f64_OP2, 150 FMLSv4i16_indexed_OP1, 151 FMLSv4i16_indexed_OP2, 152 FMLSv8i16_indexed_OP1, 153 FMLSv8i16_indexed_OP2, 154 FMLSv2i32_indexed_OP1, 155 FMLSv2i32_indexed_OP2, 156 FMLSv2i64_indexed_OP1, 157 FMLSv2i64_indexed_OP2, 158 FMLSv4f32_OP1, 159 FMLSv4f32_OP2, 160 FMLSv4i32_indexed_OP1, 161 FMLSv4i32_indexed_OP2, 162 163 FMULv2i32_indexed_OP1, 164 FMULv2i32_indexed_OP2, 165 FMULv2i64_indexed_OP1, 166 FMULv2i64_indexed_OP2, 167 FMULv4i16_indexed_OP1, 168 FMULv4i16_indexed_OP2, 169 FMULv4i32_indexed_OP1, 170 FMULv4i32_indexed_OP2, 171 FMULv8i16_indexed_OP1, 172 FMULv8i16_indexed_OP2, 173 174 FNMADD, 175 }; 176 class AArch64InstrInfo final : public AArch64GenInstrInfo { 177 const AArch64RegisterInfo RI; 178 const AArch64Subtarget &Subtarget; 179 180 public: 181 explicit AArch64InstrInfo(const AArch64Subtarget &STI); 182 183 /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As 184 /// such, whenever a client has an instance of instruction info, it should 185 /// always be able to get register info as well (through this method). 186 const AArch64RegisterInfo &getRegisterInfo() const { return RI; } 187 188 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 189 190 bool isAsCheapAsAMove(const MachineInstr &MI) const override; 191 192 bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, 193 Register &DstReg, unsigned &SubIdx) const override; 194 195 bool 196 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 197 const MachineInstr &MIb) const override; 198 199 Register isLoadFromStackSlot(const MachineInstr &MI, 200 int &FrameIndex) const override; 201 Register isStoreToStackSlot(const MachineInstr &MI, 202 int &FrameIndex) const override; 203 204 /// Does this instruction set its full destination register to zero? 205 static bool isGPRZero(const MachineInstr &MI); 206 207 /// Does this instruction rename a GPR without modifying bits? 208 static bool isGPRCopy(const MachineInstr &MI); 209 210 /// Does this instruction rename an FPR without modifying bits? 211 static bool isFPRCopy(const MachineInstr &MI); 212 213 /// Return true if pairing the given load or store is hinted to be 214 /// unprofitable. 215 static bool isLdStPairSuppressed(const MachineInstr &MI); 216 217 /// Return true if the given load or store is a strided memory access. 218 static bool isStridedAccess(const MachineInstr &MI); 219 220 /// Return true if it has an unscaled load/store offset. 221 static bool hasUnscaledLdStOffset(unsigned Opc); 222 static bool hasUnscaledLdStOffset(MachineInstr &MI) { 223 return hasUnscaledLdStOffset(MI.getOpcode()); 224 } 225 226 /// Returns the unscaled load/store for the scaled load/store opcode, 227 /// if there is a corresponding unscaled variant available. 228 static std::optional<unsigned> getUnscaledLdSt(unsigned Opc); 229 230 /// Scaling factor for (scaled or unscaled) load or store. 231 static int getMemScale(unsigned Opc); 232 static int getMemScale(const MachineInstr &MI) { 233 return getMemScale(MI.getOpcode()); 234 } 235 236 /// Returns whether the instruction is a pre-indexed load. 237 static bool isPreLd(const MachineInstr &MI); 238 239 /// Returns whether the instruction is a pre-indexed store. 240 static bool isPreSt(const MachineInstr &MI); 241 242 /// Returns whether the instruction is a pre-indexed load/store. 243 static bool isPreLdSt(const MachineInstr &MI); 244 245 /// Returns whether the instruction is a paired load/store. 246 static bool isPairedLdSt(const MachineInstr &MI); 247 248 /// Returns the base register operator of a load/store. 249 static const MachineOperand &getLdStBaseOp(const MachineInstr &MI); 250 251 /// Returns the immediate offset operator of a load/store. 252 static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); 253 254 /// Returns whether the physical register is FP or NEON. 255 static bool isFpOrNEON(Register Reg); 256 257 /// Returns the shift amount operator of a load/store. 258 static const MachineOperand &getLdStAmountOp(const MachineInstr &MI); 259 260 /// Returns whether the instruction is FP or NEON. 261 static bool isFpOrNEON(const MachineInstr &MI); 262 263 /// Returns whether the instruction is in H form (16 bit operands) 264 static bool isHForm(const MachineInstr &MI); 265 266 /// Returns whether the instruction is in Q form (128 bit operands) 267 static bool isQForm(const MachineInstr &MI); 268 269 /// Returns whether the instruction can be compatible with non-zero BTYPE. 270 static bool hasBTISemantics(const MachineInstr &MI); 271 272 /// Returns the index for the immediate for a given instruction. 273 static unsigned getLoadStoreImmIdx(unsigned Opc); 274 275 /// Return true if pairing the given load or store may be paired with another. 276 static bool isPairableLdStInst(const MachineInstr &MI); 277 278 /// Returns true if MI is one of the TCRETURN* instructions. 279 static bool isTailCallReturnInst(const MachineInstr &MI); 280 281 /// Return the opcode that set flags when possible. The caller is 282 /// responsible for ensuring the opc has a flag setting equivalent. 283 static unsigned convertToFlagSettingOpc(unsigned Opc); 284 285 /// Return true if this is a load/store that can be potentially paired/merged. 286 bool isCandidateToMergeOrPair(const MachineInstr &MI) const; 287 288 /// Hint that pairing the given load or store is unprofitable. 289 static void suppressLdStPair(MachineInstr &MI); 290 291 std::optional<ExtAddrMode> 292 getAddrModeFromMemoryOp(const MachineInstr &MemI, 293 const TargetRegisterInfo *TRI) const override; 294 295 bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, 296 const MachineInstr &AddrI, 297 ExtAddrMode &AM) const override; 298 299 MachineInstr *emitLdStWithAddr(MachineInstr &MemI, 300 const ExtAddrMode &AM) const override; 301 302 bool getMemOperandsWithOffsetWidth( 303 const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps, 304 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, 305 const TargetRegisterInfo *TRI) const override; 306 307 /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`. 308 /// This is true for some SVE instructions like ldr/str that have a 309 /// 'reg + imm' addressing mode where the immediate is an index to the 310 /// scalable vector located at 'reg + imm * vscale x #bytes'. 311 bool getMemOperandWithOffsetWidth(const MachineInstr &MI, 312 const MachineOperand *&BaseOp, 313 int64_t &Offset, bool &OffsetIsScalable, 314 TypeSize &Width, 315 const TargetRegisterInfo *TRI) const; 316 317 /// Return the immediate offset of the base register in a load/store \p LdSt. 318 MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const; 319 320 /// Returns true if opcode \p Opc is a memory operation. If it is, set 321 /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly. 322 /// 323 /// For unscaled instructions, \p Scale is set to 1. All values are in bytes. 324 /// MinOffset/MaxOffset are the un-scaled limits of the immediate in the 325 /// instruction, the actual offset limit is [MinOffset*Scale, 326 /// MaxOffset*Scale]. 327 static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, 328 int64_t &MinOffset, int64_t &MaxOffset); 329 330 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 331 int64_t Offset1, bool OffsetIsScalable1, 332 ArrayRef<const MachineOperand *> BaseOps2, 333 int64_t Offset2, bool OffsetIsScalable2, 334 unsigned ClusterSize, 335 unsigned NumBytes) const override; 336 337 void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 338 const DebugLoc &DL, MCRegister DestReg, 339 MCRegister SrcReg, bool KillSrc, unsigned Opcode, 340 llvm::ArrayRef<unsigned> Indices) const; 341 void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 342 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 343 bool KillSrc, unsigned Opcode, unsigned ZeroReg, 344 llvm::ArrayRef<unsigned> Indices) const; 345 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 346 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 347 bool KillSrc, bool RenamableDest = false, 348 bool RenamableSrc = false) const override; 349 350 void storeRegToStackSlot( 351 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, 352 bool isKill, int FrameIndex, const TargetRegisterClass *RC, 353 const TargetRegisterInfo *TRI, Register VReg, 354 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; 355 356 void loadRegFromStackSlot( 357 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 358 Register DestReg, int FrameIndex, const TargetRegisterClass *RC, 359 const TargetRegisterInfo *TRI, Register VReg, 360 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; 361 362 // This tells target independent code that it is okay to pass instructions 363 // with subreg operands to foldMemoryOperandImpl. 364 bool isSubregFoldable() const override { return true; } 365 366 using TargetInstrInfo::foldMemoryOperandImpl; 367 MachineInstr * 368 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 369 ArrayRef<unsigned> Ops, 370 MachineBasicBlock::iterator InsertPt, int FrameIndex, 371 LiveIntervals *LIS = nullptr, 372 VirtRegMap *VRM = nullptr) const override; 373 374 /// \returns true if a branch from an instruction with opcode \p BranchOpc 375 /// bytes is capable of jumping to a position \p BrOffset bytes away. 376 bool isBranchOffsetInRange(unsigned BranchOpc, 377 int64_t BrOffset) const override; 378 379 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 380 381 void insertIndirectBranch(MachineBasicBlock &MBB, 382 MachineBasicBlock &NewDestBB, 383 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 384 int64_t BrOffset, RegScavenger *RS) const override; 385 386 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 387 MachineBasicBlock *&FBB, 388 SmallVectorImpl<MachineOperand> &Cond, 389 bool AllowModify = false) const override; 390 bool analyzeBranchPredicate(MachineBasicBlock &MBB, 391 MachineBranchPredicate &MBP, 392 bool AllowModify) const override; 393 unsigned removeBranch(MachineBasicBlock &MBB, 394 int *BytesRemoved = nullptr) const override; 395 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 396 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 397 const DebugLoc &DL, 398 int *BytesAdded = nullptr) const override; 399 400 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> 401 analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; 402 403 bool 404 reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; 405 bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond, 406 Register, Register, Register, int &, int &, 407 int &) const override; 408 void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 409 const DebugLoc &DL, Register DstReg, 410 ArrayRef<MachineOperand> Cond, Register TrueReg, 411 Register FalseReg) const override; 412 413 void insertNoop(MachineBasicBlock &MBB, 414 MachineBasicBlock::iterator MI) const override; 415 416 MCInst getNop() const override; 417 418 bool isSchedulingBoundary(const MachineInstr &MI, 419 const MachineBasicBlock *MBB, 420 const MachineFunction &MF) const override; 421 422 /// analyzeCompare - For a comparison instruction, return the source registers 423 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 424 /// Return true if the comparison instruction can be analyzed. 425 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 426 Register &SrcReg2, int64_t &CmpMask, 427 int64_t &CmpValue) const override; 428 /// optimizeCompareInstr - Convert the instruction supplying the argument to 429 /// the comparison into one that sets the zero bit in the flags register. 430 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 431 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 432 const MachineRegisterInfo *MRI) const override; 433 bool optimizeCondBranch(MachineInstr &MI) const override; 434 435 CombinerObjective getCombinerObjective(unsigned Pattern) const override; 436 /// Return true when a code sequence can improve throughput. It 437 /// should be called only for instructions in loops. 438 /// \param Pattern - combiner pattern 439 bool isThroughputPattern(unsigned Pattern) const override; 440 /// Return true when there is potentially a faster code sequence 441 /// for an instruction chain ending in ``Root``. All potential patterns are 442 /// listed in the ``Patterns`` array. 443 bool getMachineCombinerPatterns(MachineInstr &Root, 444 SmallVectorImpl<unsigned> &Patterns, 445 bool DoRegPressureReduce) const override; 446 /// Return true when Inst is associative and commutative so that it can be 447 /// reassociated. If Invert is true, then the inverse of Inst operation must 448 /// be checked. 449 bool isAssociativeAndCommutative(const MachineInstr &Inst, 450 bool Invert) const override; 451 /// When getMachineCombinerPatterns() finds patterns, this function generates 452 /// the instructions that could replace the original code sequence 453 void genAlternativeCodeSequence( 454 MachineInstr &Root, unsigned Pattern, 455 SmallVectorImpl<MachineInstr *> &InsInstrs, 456 SmallVectorImpl<MachineInstr *> &DelInstrs, 457 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override; 458 /// AArch64 supports MachineCombiner. 459 bool useMachineCombiner() const override; 460 461 bool expandPostRAPseudo(MachineInstr &MI) const override; 462 463 std::pair<unsigned, unsigned> 464 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 465 ArrayRef<std::pair<unsigned, const char *>> 466 getSerializableDirectMachineOperandTargetFlags() const override; 467 ArrayRef<std::pair<unsigned, const char *>> 468 getSerializableBitmaskMachineOperandTargetFlags() const override; 469 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 470 getSerializableMachineMemOperandTargetFlags() const override; 471 472 bool isFunctionSafeToOutlineFrom(MachineFunction &MF, 473 bool OutlineFromLinkOnceODRs) const override; 474 std::optional<std::unique_ptr<outliner::OutlinedFunction>> 475 getOutliningCandidateInfo( 476 const MachineModuleInfo &MMI, 477 std::vector<outliner::Candidate> &RepeatedSequenceLocs, 478 unsigned MinRepeats) const override; 479 void mergeOutliningCandidateAttributes( 480 Function &F, std::vector<outliner::Candidate> &Candidates) const override; 481 outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, 482 MachineBasicBlock::iterator &MIT, 483 unsigned Flags) const override; 484 SmallVector< 485 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> 486 getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override; 487 void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, 488 const outliner::OutlinedFunction &OF) const override; 489 MachineBasicBlock::iterator 490 insertOutlinedCall(Module &M, MachineBasicBlock &MBB, 491 MachineBasicBlock::iterator &It, MachineFunction &MF, 492 outliner::Candidate &C) const override; 493 bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; 494 495 void buildClearRegister(Register Reg, MachineBasicBlock &MBB, 496 MachineBasicBlock::iterator Iter, DebugLoc &DL, 497 bool AllowSideEffects = true) const override; 498 499 /// Returns the vector element size (B, H, S or D) of an SVE opcode. 500 uint64_t getElementSizeForOpcode(unsigned Opc) const; 501 /// Returns true if the opcode is for an SVE instruction that sets the 502 /// condition codes as if it's results had been fed to a PTEST instruction 503 /// along with the same general predicate. 504 bool isPTestLikeOpcode(unsigned Opc) const; 505 /// Returns true if the opcode is for an SVE WHILE## instruction. 506 bool isWhileOpcode(unsigned Opc) const; 507 /// Returns true if the instruction has a shift by immediate that can be 508 /// executed in one cycle less. 509 static bool isFalkorShiftExtFast(const MachineInstr &MI); 510 /// Return true if the instructions is a SEH instruciton used for unwinding 511 /// on Windows. 512 static bool isSEHInstruction(const MachineInstr &MI); 513 514 std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI, 515 Register Reg) const override; 516 517 bool isFunctionSafeToSplit(const MachineFunction &MF) const override; 518 519 bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override; 520 521 std::optional<ParamLoadedValue> 522 describeLoadedValue(const MachineInstr &MI, Register Reg) const override; 523 524 unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; 525 526 bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, 527 MachineRegisterInfo &MRI) const override; 528 529 static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, 530 int64_t &NumBytes, 531 int64_t &NumPredicateVectors, 532 int64_t &NumDataVectors); 533 static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, 534 int64_t &ByteSized, 535 int64_t &VGSized); 536 537 // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can 538 // be used for a load/store of NumBytes. BaseReg is always present and 539 // implicit. 540 bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, 541 unsigned Scale) const; 542 543 // Decrement the SP, issuing probes along the way. `TargetReg` is the new top 544 // of the stack. `FrameSetup` is passed as true, if the allocation is a part 545 // of constructing the activation frame of a function. 546 MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, 547 Register TargetReg, 548 bool FrameSetup) const; 549 550 #define GET_INSTRINFO_HELPER_DECLS 551 #include "AArch64GenInstrInfo.inc" 552 553 protected: 554 /// If the specific machine instruction is an instruction that moves/copies 555 /// value from one register to another register return destination and source 556 /// registers as machine operands. 557 std::optional<DestSourcePair> 558 isCopyInstrImpl(const MachineInstr &MI) const override; 559 std::optional<DestSourcePair> 560 isCopyLikeInstrImpl(const MachineInstr &MI) const override; 561 562 private: 563 unsigned getInstBundleLength(const MachineInstr &MI) const; 564 565 /// Sets the offsets on outlined instructions in \p MBB which use SP 566 /// so that they will be valid post-outlining. 567 /// 568 /// \param MBB A \p MachineBasicBlock in an outlined function. 569 void fixupPostOutline(MachineBasicBlock &MBB) const; 570 571 void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL, 572 MachineBasicBlock *TBB, 573 ArrayRef<MachineOperand> Cond) const; 574 bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, 575 const MachineRegisterInfo &MRI) const; 576 bool removeCmpToZeroOrOne(MachineInstr &CmpInstr, unsigned SrcReg, 577 int CmpValue, const MachineRegisterInfo &MRI) const; 578 579 /// Returns an unused general-purpose register which can be used for 580 /// constructing an outlined call if one exists. Returns 0 otherwise. 581 Register findRegisterToSaveLRTo(outliner::Candidate &C) const; 582 583 /// Remove a ptest of a predicate-generating operation that already sets, or 584 /// can be made to set, the condition codes in an identical manner 585 bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg, 586 unsigned PredReg, 587 const MachineRegisterInfo *MRI) const; 588 std::optional<unsigned> 589 canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask, 590 MachineInstr *Pred, const MachineRegisterInfo *MRI) const; 591 592 /// verifyInstruction - Perform target specific instruction verification. 593 bool verifyInstruction(const MachineInstr &MI, 594 StringRef &ErrInfo) const override; 595 }; 596 597 struct UsedNZCV { 598 bool N = false; 599 bool Z = false; 600 bool C = false; 601 bool V = false; 602 603 UsedNZCV() = default; 604 605 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { 606 this->N |= UsedFlags.N; 607 this->Z |= UsedFlags.Z; 608 this->C |= UsedFlags.C; 609 this->V |= UsedFlags.V; 610 return *this; 611 } 612 }; 613 614 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV 615 /// flags are not alive in successors of the same \p CmpInstr and \p MI parent. 616 /// \returns std::nullopt otherwise. 617 /// 618 /// Collect instructions using that flags in \p CCUseInstrs if provided. 619 std::optional<UsedNZCV> 620 examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, 621 const TargetRegisterInfo &TRI, 622 SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr); 623 624 /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI 625 /// which either reads or clobbers NZCV. 626 bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, 627 const MachineInstr &UseMI, 628 const TargetRegisterInfo *TRI); 629 630 MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, 631 unsigned Reg, const StackOffset &Offset, 632 bool LastAdjustmentWasScalable = true); 633 MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, 634 const StackOffset &OffsetFromDefCFA); 635 636 /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg 637 /// plus Offset. This is intended to be used from within the prolog/epilog 638 /// insertion (PEI) pass, where a virtual scratch register may be allocated 639 /// if necessary, to be replaced by the scavenger at the end of PEI. 640 void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 641 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, 642 StackOffset Offset, const TargetInstrInfo *TII, 643 MachineInstr::MIFlag = MachineInstr::NoFlags, 644 bool SetNZCV = false, bool NeedsWinCFI = false, 645 bool *HasWinCFI = nullptr, bool EmitCFAOffset = false, 646 StackOffset InitialOffset = {}, 647 unsigned FrameReg = AArch64::SP); 648 649 /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the 650 /// FP. Return false if the offset could not be handled directly in MI, and 651 /// return the left-over portion by reference. 652 bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 653 unsigned FrameReg, StackOffset &Offset, 654 const AArch64InstrInfo *TII); 655 656 /// Use to report the frame offset status in isAArch64FrameOffsetLegal. 657 enum AArch64FrameOffsetStatus { 658 AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. 659 AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal. 660 AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. 661 }; 662 663 /// Check if the @p Offset is a valid frame offset for @p MI. 664 /// The returned value reports the validity of the frame offset for @p MI. 665 /// It uses the values defined by AArch64FrameOffsetStatus for that. 666 /// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to 667 /// use an offset.eq 668 /// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be 669 /// rewritten in @p MI. 670 /// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the 671 /// amount that is off the limit of the legal offset. 672 /// If set, @p OutUseUnscaledOp will contain the whether @p MI should be 673 /// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. 674 /// If set, @p EmittableOffset contains the amount that can be set in @p MI 675 /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that 676 /// is a legal offset. 677 int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, 678 bool *OutUseUnscaledOp = nullptr, 679 unsigned *OutUnscaledOp = nullptr, 680 int64_t *EmittableOffset = nullptr); 681 682 static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } 683 684 static inline bool isCondBranchOpcode(int Opc) { 685 switch (Opc) { 686 case AArch64::Bcc: 687 case AArch64::CBZW: 688 case AArch64::CBZX: 689 case AArch64::CBNZW: 690 case AArch64::CBNZX: 691 case AArch64::TBZW: 692 case AArch64::TBZX: 693 case AArch64::TBNZW: 694 case AArch64::TBNZX: 695 return true; 696 default: 697 return false; 698 } 699 } 700 701 static inline bool isIndirectBranchOpcode(int Opc) { 702 switch (Opc) { 703 case AArch64::BR: 704 case AArch64::BRAA: 705 case AArch64::BRAB: 706 case AArch64::BRAAZ: 707 case AArch64::BRABZ: 708 return true; 709 } 710 return false; 711 } 712 713 static inline bool isPTrueOpcode(unsigned Opc) { 714 switch (Opc) { 715 case AArch64::PTRUE_B: 716 case AArch64::PTRUE_H: 717 case AArch64::PTRUE_S: 718 case AArch64::PTRUE_D: 719 return true; 720 default: 721 return false; 722 } 723 } 724 725 /// Return opcode to be used for indirect calls. 726 unsigned getBLRCallOpcode(const MachineFunction &MF); 727 728 /// Return XPAC opcode to be used for a ptrauth strip using the given key. 729 static inline unsigned getXPACOpcodeForKey(AArch64PACKey::ID K) { 730 using namespace AArch64PACKey; 731 switch (K) { 732 case IA: case IB: return AArch64::XPACI; 733 case DA: case DB: return AArch64::XPACD; 734 } 735 llvm_unreachable("Unhandled AArch64PACKey::ID enum"); 736 } 737 738 /// Return AUT opcode to be used for a ptrauth auth using the given key, or its 739 /// AUT*Z variant that doesn't take a discriminator operand, using zero instead. 740 static inline unsigned getAUTOpcodeForKey(AArch64PACKey::ID K, bool Zero) { 741 using namespace AArch64PACKey; 742 switch (K) { 743 case IA: return Zero ? AArch64::AUTIZA : AArch64::AUTIA; 744 case IB: return Zero ? AArch64::AUTIZB : AArch64::AUTIB; 745 case DA: return Zero ? AArch64::AUTDZA : AArch64::AUTDA; 746 case DB: return Zero ? AArch64::AUTDZB : AArch64::AUTDB; 747 } 748 llvm_unreachable("Unhandled AArch64PACKey::ID enum"); 749 } 750 751 /// Return PAC opcode to be used for a ptrauth sign using the given key, or its 752 /// PAC*Z variant that doesn't take a discriminator operand, using zero instead. 753 static inline unsigned getPACOpcodeForKey(AArch64PACKey::ID K, bool Zero) { 754 using namespace AArch64PACKey; 755 switch (K) { 756 case IA: return Zero ? AArch64::PACIZA : AArch64::PACIA; 757 case IB: return Zero ? AArch64::PACIZB : AArch64::PACIB; 758 case DA: return Zero ? AArch64::PACDZA : AArch64::PACDA; 759 case DB: return Zero ? AArch64::PACDZB : AArch64::PACDB; 760 } 761 llvm_unreachable("Unhandled AArch64PACKey::ID enum"); 762 } 763 764 // struct TSFlags { 765 #define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits 766 #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits 767 #define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits 768 #define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits 769 #define TSFLAG_SME_MATRIX_TYPE(X) ((X) << 11) // 3-bits 770 // } 771 772 namespace AArch64 { 773 774 enum ElementSizeType { 775 ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7), 776 ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0), 777 ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1), 778 ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2), 779 ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3), 780 ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4), 781 }; 782 783 enum DestructiveInstType { 784 DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf), 785 NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0), 786 DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), 787 DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2), 788 DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3), 789 DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4), 790 DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5), 791 DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6), 792 DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7), 793 DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8), 794 DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9), 795 }; 796 797 enum FalseLaneType { 798 FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3), 799 FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1), 800 FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2), 801 }; 802 803 // NOTE: This is a bit field. 804 static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1); 805 static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2); 806 807 enum SMEMatrixType { 808 SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7), 809 SMEMatrixNone = TSFLAG_SME_MATRIX_TYPE(0x0), 810 SMEMatrixTileB = TSFLAG_SME_MATRIX_TYPE(0x1), 811 SMEMatrixTileH = TSFLAG_SME_MATRIX_TYPE(0x2), 812 SMEMatrixTileS = TSFLAG_SME_MATRIX_TYPE(0x3), 813 SMEMatrixTileD = TSFLAG_SME_MATRIX_TYPE(0x4), 814 SMEMatrixTileQ = TSFLAG_SME_MATRIX_TYPE(0x5), 815 SMEMatrixArray = TSFLAG_SME_MATRIX_TYPE(0x6), 816 }; 817 818 #undef TSFLAG_ELEMENT_SIZE_TYPE 819 #undef TSFLAG_DESTRUCTIVE_INST_TYPE 820 #undef TSFLAG_FALSE_LANE_TYPE 821 #undef TSFLAG_INSTR_FLAGS 822 #undef TSFLAG_SME_MATRIX_TYPE 823 824 int getSVEPseudoMap(uint16_t Opcode); 825 int getSVERevInstr(uint16_t Opcode); 826 int getSVENonRevInstr(uint16_t Opcode); 827 828 int getSMEPseudoMap(uint16_t Opcode); 829 } 830 831 } // end namespace llvm 832 833 #endif 834