Lines Matching +full:ras +full:- +full:to +full:- +full:cas
1 //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
21 def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">,
23 def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
25 def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
27 def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
29 def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
31 def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
33 def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
35 def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
37 def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">,
39 def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">,
41 def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
42 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
43 def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
45 def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
47 def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
49 def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">,
51 def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
52 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
54 def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
57 def HasEL3 : Predicate<"Subtarget->hasEL3()">,
60 def HasVH : Predicate<"Subtarget->hasVH()">,
63 def HasLOR : Predicate<"Subtarget->hasLOR()">,
66 def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
69 def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">,
70 AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
72 def HasJS : Predicate<"Subtarget->hasJS()">,
75 def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
78 def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
81 def HasNV : Predicate<"Subtarget->hasNV()">,
84 def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
87 def HasDIT : Predicate<"Subtarget->hasDIT()">,
90 def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
93 def HasAM : Predicate<"Subtarget->hasAM()">,
96 def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
99 def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
100 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
102 def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
105 def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
106 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
108 def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
109 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110 def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
112 def HasSM4 : Predicate<"Subtarget->hasSM4()">,
114 def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
116 def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
118 def HasAES : Predicate<"Subtarget->hasAES()">,
120 def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
122 def HasCRC : Predicate<"Subtarget->hasCRC()">,
124 def HasCSSC : Predicate<"Subtarget->hasCSSC()">,
126 def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">;
127 def HasLSE : Predicate<"Subtarget->hasLSE()">,
129 def HasNoLSE : Predicate<"!Subtarget->hasLSE()">;
130 def HasRAS : Predicate<"Subtarget->hasRAS()">,
131 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
132 def HasRDM : Predicate<"Subtarget->hasRDM()">,
134 def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
136 def HasNoFullFP16 : Predicate<"!Subtarget->hasFullFP16()">;
137 def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
139 def HasSPE : Predicate<"Subtarget->hasSPE()">,
141 def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
143 "fuse-aes">;
144 def HasSVE : Predicate<"Subtarget->isSVEAvailable()">,
146 def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
148 def HasSVE2p1 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
150 def HasSVE2AES : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2AES()">,
151 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
152 def HasSVE2SM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
153 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
154 def HasSVE2SHA3 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
155 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
156 def HasSVE2BitPerm : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2BitPerm()">,
157 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
158 def HasB16B16 : Predicate<"Subtarget->hasB16B16()">,
161 : Predicate<"Subtarget->hasSME()">,
163 def HasSME : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">,
165 def HasSMEF64F64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">,
166 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
167 def HasSMEF16F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">,
168 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
169 def HasSMEFA64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">,
170 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
171 def HasSMEI16I64 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">,
172 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
174 : Predicate<"Subtarget->hasSME2()">,
176 def HasSME2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">,
178 def HasSME2p1 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">,
180 def HasFP8 : Predicate<"Subtarget->hasFP8()">,
182 def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
184 def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
186 def HasSSVE_FP8FMA : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
187 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
190 "ssve-fp8fma or (sve2 and fp8fma)">;
191 def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">,
193 def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
194 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
197 "ssve-fp8dot2 or (sve2 and fp8dot2)">;
198 def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
200 def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
201 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
204 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
205 def HasLUT : Predicate<"Subtarget->hasLUT()">,
207 def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
208 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
209 def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
210 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
211 def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
212 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
217 : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
221 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
225 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
229 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
232 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
235 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
239 : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
241 "sme-f16f16 or sme-f8f16">;
246 : Predicate<"Subtarget->hasNEON()">,
248 def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
250 def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
252 def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
254 def HasSB : Predicate<"Subtarget->hasSB()">,
256 def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
258 def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
260 def HasBTI : Predicate<"Subtarget->hasBTI()">,
262 def HasMTE : Predicate<"Subtarget->hasMTE()">,
264 def HasTME : Predicate<"Subtarget->hasTME()">,
266 def HasETE : Predicate<"Subtarget->hasETE()">,
268 def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
270 def HasBF16 : Predicate<"Subtarget->hasBF16()">,
272 def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">;
273 def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
275 def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
277 def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
279 def HasFPAC : Predicate<"Subtarget->hasFPAC())">,
281 def HasXS : Predicate<"Subtarget->hasXS()">,
283 def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
285 def HasLS64 : Predicate<"Subtarget->hasLS64()">,
287 def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
289 def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
290 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
291 def HasHBC : Predicate<"Subtarget->hasHBC()">,
293 def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
295 def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">,
297 def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">,
299 def HasITE : Predicate<"Subtarget->hasITE()">,
301 def HasTHE : Predicate<"Subtarget->hasTHE()">,
303 def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">,
305 def HasLSE128 : Predicate<"Subtarget->hasLSE128()">,
307 def HasD128 : Predicate<"Subtarget->hasD128()">,
309 def HasCHK : Predicate<"Subtarget->hasCHK()">,
311 def HasGCS : Predicate<"Subtarget->hasGCS()">,
313 def HasCPA : Predicate<"Subtarget->hasCPA()">,
315 def IsLE : Predicate<"Subtarget->isLittleEndian()">;
316 def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
317 def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
319 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
321 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
327 def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
329 def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
331 def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
338 //===----------------------------------------------------------------------===//
339 // AArch64-specific DAG Nodes.
342 // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
348 // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
355 // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
427 def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
460 // non-extending masked load fragment.
464 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
465 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
466 !cast<MaskedLoadSDNode>(N)->isNonTemporal();
472 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
473 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
474 cast<MaskedLoadSDNode>(N)->isUnindexed();
479 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
484 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
489 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
495 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
496 cast<MaskedLoadSDNode>(N)->isUnindexed();
501 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
506 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
511 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
517 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
518 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
519 cast<MaskedLoadSDNode>(N)->isNonTemporal();
522 // non-truncating masked store fragment.
526 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
527 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
528 !cast<MaskedStoreSDNode>(N)->isNonTemporal();
534 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
535 cast<MaskedStoreSDNode>(N)->isUnindexed();
540 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
545 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
550 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
556 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
557 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
558 cast<MaskedStoreSDNode>(N)->isNonTemporal();
567 bool Signed = MGS->isIndexSigned() ||
568 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
569 return Signed && MGS->isIndexScaled();
576 bool Signed = MGS->isIndexSigned() ||
577 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
578 return Signed && !MGS->isIndexScaled();
585 bool Signed = MGS->isIndexSigned() ||
586 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
587 return !Signed && MGS->isIndexScaled();
594 bool Signed = MGS->isIndexSigned() ||
595 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
596 return !Signed && !MGS->isIndexScaled();
613 // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
615 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
616 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
619 // top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
621 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
622 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
625 // topbitsallzero - Return true if all bits except the lowest bit are known zero
627 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
628 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
631 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
632 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
649 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
654 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
659 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
664 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
669 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
684 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
768 return N->getFlags().hasExact();
957 if (N->getOpcode() == ISD::ADD)
959 return CurDAG->isADDLike(SDValue(N,0));
962 // Only handle G_ADD for now. FIXME. build capability to compute whether
968 // Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
970 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
971 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
974 //===----------------------------------------------------------------------===//
976 //===----------------------------------------------------------------------===//
979 // We could compute these on a per-module basis but doing so requires accessing
981 // to that (see post-commit review comments for r301750).
986 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
988 // Register restrictions for indirect tail-calls:
989 // - If branch target enforcement is enabled, indirect calls must use x16 or
992 // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address
1000 def TailCallX16X17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1002 def TailCallX17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1003 // BTI off, PAuthLR on: Any non-callee-saved register except x16
1004 def TailCallNotX16 : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1005 // BTI off, PAuthLR off: Any non-callee-saved register
1006 def TailCallAny : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1008 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1009 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1011 // optimizing. This allows us to selectively use patterns without impacting
1013 // FIXME: One day there will probably be a nicer way to check for this, but
1015 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
1022 //===----------------------------------------------------------------------===//
1024 //===----------------------------------------------------------------------===//
1026 //===----------------------------------------------------------------------===//
1030 // We set Sched to empty list because we expect these instructions to simply get
1043 // stack-clash protection is enabled.
1051 // stack-clash protection is enabled.
1058 // when stack-clash protection is enabled.
1131 // In general these get lowered into a sequence of three 4-byte instructions.
1132 // 32-bit jump table destination is actually only 2 instructions since we can
1133 // use the table itself as a PC-relative base. But optimization occurs after
1148 // A hardened but more expensive version of jump-table dispatch.
1151 // a plain BR) in a single non-attackable sequence.
1153 // We take the final entry index as an operand to allow isel freedom. This does
1154 // mean that the index can be attacker-controlled. To address that, we also do
1156 // jump-table array. When it doesn't, this branches to the first entry.
1157 // We might want to trap instead.
1160 // to avoid signing jump-table entries and turning them into pointers.
1175 // Space-consuming pseudo to aid testing of placement and reachability
1177 // occupies; register operands can be used to enforce dependency and constrain
1194 // This gets lowered to a pair of 4-byte instructions.
1198 // This gets lowered to a 4-byte instruction.
1204 //===----------------------------------------------------------------------===//
1206 //===----------------------------------------------------------------------===//
1218 // In order to be able to write readable assembly, LLVM should accept assembly
1220 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1230 // As far as LLVM is concerned this writes to the system's exclusive monitors.
1253 let CRm{1-0} = 0b11;
1254 let Inst{9-8} = 0b10;
1263 // Branch Record Buffer two-word mnemonic instructions
1266 let Inst{31-8} = 0b110101010000100101110010;
1267 let Inst{7-5} = op2;
1280 // ARMv9.4-A Guarded Control Stack
1283 let Inst{20-8} = 0b0100001110111;
1284 let Inst{7-5} = op2;
1294 let Inst{20-19} = 0b01;
1295 let Inst{18-16} = op1;
1296 let Inst{15-8} = 0b01110111;
1297 let Inst{7-5} = op2;
1310 let Inst{20-19} = 0b01;
1311 let Inst{18-16} = op1;
1312 let Inst{15-8} = 0b01110111;
1313 let Inst{7-5} = op2;
1323 // FIXME: mayStore = 1 only needed to match the intrinsic definition
1327 def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1345 let Inst{31-15} = 0b11011001000111110;
1346 let Inst{14-12} = op;
1347 let Inst{11-10} = 0b11;
1348 let Inst{9-5} = Rn;
1349 let Inst{4-0} = Rt;
1355 // ARMv8.2-A Dot Product
1363 // ARMv8.6-A BFloat
1378 // Vector-scalar BFDOT:
1379 // The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1380 // register (the instruction uses a single 32-bit lane from it), so the pattern
1398 // Round FP32 to BF16.
1411 // The second operand is used in the dup operation to repeat the indexed
1436 // ARMv8.2-A FP16 Fused Multiply-Add Long
1448 // Armv8.2-A Crypto extensions
1565 // v8.3a complex add and multiply-accumulate. No predicate here, that is done
1672 // In order to be able to write readable assembly, LLVM should accept assembly
1674 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1767 // This directly manipulates x16/x17 to materialize the discriminator.
1781 let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1800 // guarantees are safe to use for sensitive operations.
1812 let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1828 // guarantees are safe to use for sensitive operations.
1840 // AUT and re-PAC a value, using different keys/data.
1842 // guarantees are safe to use for sensitive operations.
1889 // Size 16: 4 fixed + 8 variable, to compute discriminator.
1916 // v9.5-A pointer authentication extensions
1919 // disassembling if we don't have the pauth-lr feature.
1969 let Inst{20-5} = 0b0000001000000000;
1981 let Inst{18-16} = 0b000;
1982 let Inst{11-8} = 0b0000;
1983 let Unpredictable{11-8} = 0b1111;
1984 let Inst{7-5} = 0b001;
1988 let Inst{18-16} = 0b000;
1989 let Inst{11-8} = 0b0000;
1990 let Unpredictable{11-8} = 0b1111;
1991 let Inst{7-5} = 0b010;
1996 // Armv8.5-A speculation barrier
1998 let Inst{20-5} = 0b0001100110000111;
1999 let Unpredictable{11-8} = 0b1111;
2023 // This gets lowered into a 24-byte instruction sequence
2108 //===----------------------------------------------------------------------===//
2110 //===----------------------------------------------------------------------===//
2189 // directly to the real instructions and get rid of these pseudos.
2201 // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
2202 // eventual expansion code fewer bits to worry about getting right. Marshalling
2215 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
2222 // The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
2230 return CurDAG->getTargetConstant(
2231 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
2235 return CurDAG->getTargetConstant(
2236 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
2277 //===----------------------------------------------------------------------===//
2279 //===----------------------------------------------------------------------===//
2307 return N->getOpcode() == ISD::CopyFromReg &&
2308 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2311 // Use SUBS instead of SUB to enable CSE between SUBS and SUB.
2333 // Because of the immediate format for add/sub-imm instructions, the
2334 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2347 // Because of the immediate format for add/sub-imm instructions, the
2348 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2400 // Multiply-add
2528 // Multiply-high
2543 // v8.1 atomic CAS
2544 defm CAS : CompareAndSwap<0, 0, "">;
2602 // v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2689 // Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2702 // A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2703 // Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2717 //===----------------------------------------------------------------------===//
2719 //===----------------------------------------------------------------------===//
2728 // used). Actually, it seems to be working right now, but putting logical_immXX
2786 //===----------------------------------------------------------------------===//
2788 //===----------------------------------------------------------------------===//
2826 // Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2834 //===----------------------------------------------------------------------===//
2836 //===----------------------------------------------------------------------===//
2849 //===----------------------------------------------------------------------===//
2851 //===----------------------------------------------------------------------===//
2859 uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2860 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2864 uint64_t enc = 31 - N->getZExtValue();
2865 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2868 // min(7, 31 - shift_amt)
2870 uint64_t enc = 31 - N->getZExtValue();
2872 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2875 // min(15, 31 - shift_amt)
2877 uint64_t enc = 31 - N->getZExtValue();
2879 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2883 uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2884 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2888 uint64_t enc = 63 - N->getZExtValue();
2889 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2892 // min(7, 63 - shift_amt)
2894 uint64_t enc = 63 - N->getZExtValue();
2896 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2899 // min(15, 63 - shift_amt)
2901 uint64_t enc = 63 - N->getZExtValue();
2903 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2906 // min(31, 63 - shift_amt)
2908 uint64_t enc = 63 - N->getZExtValue();
2910 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2952 //===----------------------------------------------------------------------===//
2954 //===----------------------------------------------------------------------===//
2958 //===----------------------------------------------------------------------===//
2960 //===----------------------------------------------------------------------===//
2993 def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2995 def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2997 def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2999 def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
3001 def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
3003 def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
3053 //===----------------------------------------------------------------------===//
3054 // PC-relative instructions.
3055 //===----------------------------------------------------------------------===//
3075 //===----------------------------------------------------------------------===//
3077 //===----------------------------------------------------------------------===//
3085 // Default to the LR register.
3125 // Create a separate pseudo-instruction for codegen to use so that we don't
3135 // This is a directive-like pseudo-instruction. The purpose is to insert an
3143 // Pseudo instruction to tell the streamer to emit a 'B' character into the
3147 // Pseudo instruction to tell the streamer to emit a 'G' character into the
3151 // FIXME: maybe the scratch register used shouldn't be fixed to X1?
3153 // This gets lowered to an instruction sequence which takes 16 bytes
3163 //===----------------------------------------------------------------------===//
3165 //===----------------------------------------------------------------------===//
3168 // Armv8.8-A variant form which hints to the branch predictor that
3169 // this branch is very likely to go the same way nearly all the time
3173 //===----------------------------------------------------------------------===//
3174 // Compare-and-branch instructions.
3175 //===----------------------------------------------------------------------===//
3179 //===----------------------------------------------------------------------===//
3180 // Test-bit-and-branch instructions.
3181 //===----------------------------------------------------------------------===//
3185 //===----------------------------------------------------------------------===//
3187 //===----------------------------------------------------------------------===//
3197 //===----------------------------------------------------------------------===//
3199 //===----------------------------------------------------------------------===//
3213 // DCPSn defaults to an immediate operand of zero if unspecified.
3220 //===----------------------------------------------------------------------===//
3222 //===----------------------------------------------------------------------===//
3235 // Pair (pre-indexed)
3246 // Pair (post-indexed)
3272 //---
3274 //---
3282 // Floating-point
3291 // Load sign-extended half-word
3295 // Load sign-extended byte
3299 // Load sign-extended word
3302 // Pre-fetch.
3306 // Thus, it is safe to directly map the vector loads with interesting
3308 // FIXME: We could do the same for bitconvert to floating point vectors.
3371 // We must do vector loads with LD1 in big-endian.
3385 // We must do vector loads with LD1 in big-endian.
3397 // zextload -> i64
3416 // zextloadi1 -> zextloadi8
3419 // extload -> zextload
3424 // extloadi1 -> zextloadi8
3429 // zextload -> i64
3441 // extload -> zextload
3446 // zextloadi1 -> zextloadi8
3450 //---
3452 //---
3482 // Thus, it is safe to directly map the vector loads with interesting
3484 // FIXME: We could do the same for bitconvert to floating point vectors.
3519 // We must use LD1 to perform vector loads in big-endian.
3540 // We must use LD1 to perform vector loads in big-endian.
3569 // zextload -> i64
3575 // zextloadi1 -> zextloadi8
3581 // extload -> zextload
3597 // load sign-extended half-word
3607 // load sign-extended byte
3617 // load sign-extended word
3623 // load zero-extended word
3627 // Pre-fetch.
3635 //---
3640 const DataLayout &DL = MF->getDataLayout();
3641 Align Align = G->getGlobal()->getPointerAlignment(DL);
3642 return Align >= 4 && G->getOffset() % 4 == 0;
3645 return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3662 // load sign-extended word
3675 //---
3750 // anyext -> zext
3782 //---
3783 // LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3785 // Define new assembler match classes as we want to only match these when
3788 // canonical form (the scaled operand) to take precedence.
3834 // zextload -> i64
3840 // load sign-extended half-word
3850 // load sign-extended byte
3860 // load sign-extended word
3866 // zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3897 // Half-vector patterns
3933 // Pre-fetch.
3938 //---
3946 // load sign-extended half-word
3950 // load sign-extended byte
3954 // load sign-extended word
3957 //---
3958 // (immediate pre-indexed)
3969 // load sign-extended half-word
3973 // load sign-extended byte
3977 // load zero-extended byte
3981 // load sign-extended word
3984 //---
3985 // (immediate post-indexed)
3996 // load sign-extended half-word
4000 // load sign-extended byte
4004 // load zero-extended byte
4008 // load sign-extended word
4011 //===----------------------------------------------------------------------===//
4013 //===----------------------------------------------------------------------===//
4016 // FIXME: Use dedicated range-checked addressing mode operand here.
4025 // Pair (pre-indexed)
4034 // Pair (post-indexed)
4059 //---
4069 // Floating-point
4124 // We must use ST1 to store vectors in big-endian.
4138 // We must use ST1 to store vectors in big-endian.
4150 // Match stores from lane 0 to the appropriate subreg's store.
4177 //---
4226 // We must use ST1 to store vectors in big-endian.
4253 // We must use ST1 to store vectors in big-endian.
4292 // Match stores from lane 0 to the appropriate subreg's store.
4313 //---
4377 // We must use ST1 to store vectors in big-endian.
4403 // We must use ST1 to store vectors in big-endian.
4443 // Match stores from lane 0 to the appropriate subreg's store.
4460 //---
4461 // STR mnemonics fall back to STUR for negative or unaligned offsets.
4484 //---
4492 //---
4493 // (immediate pre-indexed)
4548 //---
4549 // (immediate post-indexed)
4611 //===----------------------------------------------------------------------===//
4613 //===----------------------------------------------------------------------===//
4636 Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4637 of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4639 LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4641 case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4673 // v8.1a "Limited Order Region" extension load-acquire instructions
4679 // v8.1a "Limited Order Region" extension store-release instructions
4692 //===----------------------------------------------------------------------===//
4693 // Scaled floating point to integer conversion instructions.
4694 //===----------------------------------------------------------------------===//
4833 //===----------------------------------------------------------------------===//
4834 // Scaled integer to floating point conversion instructions.
4835 //===----------------------------------------------------------------------===//
4868 //===----------------------------------------------------------------------===//
4869 // Unscaled integer to floating point conversion instruction.
4870 //===----------------------------------------------------------------------===//
4905 //===----------------------------------------------------------------------===//
4907 //===----------------------------------------------------------------------===//
4910 // Helper to get bf16 into fp32.
4920 // Pattern for bf16 -> fp32.
4923 // Pattern for bf16 -> fp64.
4927 //===----------------------------------------------------------------------===//
4929 //===----------------------------------------------------------------------===//
4954 // Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
4989 //===----------------------------------------------------------------------===//
4991 //===----------------------------------------------------------------------===//
5052 //===----------------------------------------------------------------------===//
5054 //===----------------------------------------------------------------------===//
5070 // Here we handle first -(a + b*c) for FNMADD:
5082 // Now it's time for "(-a) + (-b)*c"
5094 //===----------------------------------------------------------------------===//
5096 //===----------------------------------------------------------------------===//
5101 //===----------------------------------------------------------------------===//
5103 //===----------------------------------------------------------------------===//
5108 //===----------------------------------------------------------------------===//
5110 //===----------------------------------------------------------------------===//
5118 // CSEL instructions providing f128 types need to be handled by a
5119 // pseudo-instruction since the eventual code will need to introduce basic
5132 //===----------------------------------------------------------------------===//
5134 //===----------------------------------------------------------------------===//
5159 //===----------------------------------------------------------------------===//
5176 //===----------------------------------------------------------------------===//
5178 //===----------------------------------------------------------------------===//
5189 //===----------------------------------------------------------------------===//
5191 //===----------------------------------------------------------------------===//
5195 // Match UABDL in log2-shuffle patterns.
5340 // Aliases for MVN -> NOT.
5390 // Patterns for vector long shift (by element width). These need to match all
5391 // three of zext, sext and anyext so it's easier to pull the patterns out of the
5420 // trunc(umin(X, 255)) -> UQXTRN v8i8
5423 // trunc(umin(X, 65535)) -> UQXTRN v4i16
5426 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
5434 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5443 // concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5448 // concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
5454 // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5467 // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5492 //===----------------------------------------------------------------------===//
5494 //===----------------------------------------------------------------------===//
5904 //===----------------------------------------------------------------------===//
5906 //===----------------------------------------------------------------------===//
5994 //===----------------------------------------------------------------------===//
5996 //===----------------------------------------------------------------------===//
6011 //===----------------------------------------------------------------------===//
6013 //===----------------------------------------------------------------------===//
6057 // Round FP64 to BF16.
6156 // Some float -> int -> float conversion patterns for which we want to keep the
6157 // int values in FP registers using the corresponding NEON instructions to
6158 // avoid more costly int <-> fp register transfers.
6176 // int -> float conversion of value in lane 0 of simd vector should use
6177 // correct cvtf variant to avoid costly fpr <-> gpr register transfers.
6190 // fp16: integer extraction from vector must be at least 32-bits to be legal.
6191 // Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6197 // unsigned 32-bit extracted element is truncated to 16-bits using AND
6203 // If an integer is about to be converted to a floating point value,
6205 // Here are the patterns for 8 and 16-bits to float.
6206 // 8-bits -> float.
6236 // 16-bits -> float.
6247 // 32-bits are handled in target specific dag combine:
6249 // 64-bits integer to 32-bits floating point, not possible with
6253 // Here are the patterns for 8, 16, 32, and 64-bits to double.
6254 // 8-bits -> double.
6265 // 16-bits -> double.
6276 // 32-bits -> double.
6287 // 64-bits -> double are handled in target specific dag combine:
6291 //===----------------------------------------------------------------------===//
6292 // Advanced SIMD three different-sized vector instructions.
6293 //===----------------------------------------------------------------------===//
6463 //----------------------------------------------------------------------------
6465 //----------------------------------------------------------------------------
6470 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6477 // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6478 // 128-bit vector.
6481 // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6482 // single 128-bit EXT.
6487 // A 64-bit EXT of the high half of a 128-bit register can be done using a
6488 // 128-bit EXT of the whole register with an adjustment to the immediate. The
6508 //----------------------------------------------------------------------------
6510 //----------------------------------------------------------------------------
6524 // concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6525 // concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6526 // concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6535 // trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6536 // trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6537 // trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6566 //----------------------------------------------------------------------------
6568 //----------------------------------------------------------------------------
6585 //----------------------------------------------------------------------------
6587 //----------------------------------------------------------------------------
6593 //----------------------------------------------------------------------------
6595 //----------------------------------------------------------------------------
6599 //----------------------------------------------------------------------------
6601 //----------------------------------------------------------------------------
6611 // below, so the second operand does not matter. Re-use the first input
6612 // operand, so no additional dependencies need to be introduced.
6659 //----------------------------------------------------------------------------
6661 //----------------------------------------------------------------------------
6679 // DUP from a 64-bit register to a 64-bit register is just a copy
6732 // instruction even if the types don't match: we just have to remap the lane
6733 // carefully. N.b. this trick only applies to truncations.
6735 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6738 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6741 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6807 // Extracting i8 or i16 elements will have the zero-extend transformed to
6978 // FIXME refactor to a shared class/dev parameterized on vector type, vector
7045 // vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
7057 // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
7089 // All concat_vectors operations are canonicalised to act on i64 vectors for
7097 // If the high lanes are zero we can instead emit a d->d register mov, which
7116 //----------------------------------------------------------------------------
7118 //----------------------------------------------------------------------------
7215 // Patterns for across-vector intrinsics, that have a node equivalent, that
7221 // directly match the latter to the instruction.
7239 // If none did, fallback to the explicit patterns, consuming the vector_extract.
7344 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7349 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7370 // because GlobalISel allows us to specify the return register to be a FPR
7470 // The vaddlv_s32 intrinsic gets mapped to SADDLP.
7476 // The vaddlv_u32 intrinsic gets mapped to UADDLP.
7483 //------------------------------------------------------------------------------
7485 //------------------------------------------------------------------------------
7568 // Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7589 // Using the MOVI to materialize fp constants.
7673 //----------------------------------------------------------------------------
7675 //----------------------------------------------------------------------------
7685 // On the other hand, there are quite a few valid combinatorial options due to
7686 // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7702 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7722 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7743 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7744 // (DUPLANE from 64-bit would be trivial).
7755 // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7769 // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7838 //----------------------------------------------------------------------------
7840 //----------------------------------------------------------------------------
7847 // Having the same base pattern for fp <--> int totally freaks it out.
7877 // Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7959 //----------------------------------------------------------------------------
7961 //----------------------------------------------------------------------------
8143 // Vector bf16 -> fp32 is implemented morally as a zext + shift.
8259 // If an integer is about to be converted to a floating point value,
8263 // The sign extension has to be explicitly added and is only supported for
8264 // one step: byte-to-half, half-to-word, word-to-doubleword.
8265 // SCVTF GPR -> FPR is 9 cycles.
8266 // SCVTF FPR -> FPR is 4 cyclces.
8267 // (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
8268 // Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
8271 // 8-bits -> float. 2 sizes step-up.
8297 // 16-bits -> float. 1 size step-up.
8318 // 32-bits to 32-bits are handled in target specific dag combine:
8320 // 64-bits integer to 32-bits floating point, not possible with
8324 // Here are the patterns for 8, 16, 32, and 64-bits to double.
8325 // 8-bits -> double. 3 size step-up: give up.
8326 // 16-bits -> double. 2 size step.
8351 // 32-bits -> double. 1 size step-up.
8372 // 64-bits -> double are handled in target specific dag combine:
8376 //----------------------------------------------------------------------------
8377 // AdvSIMD Load-Store Structure
8378 //----------------------------------------------------------------------------
8414 //---
8415 // Single-element
8416 //---
8511 // In this case, the index must be adjusted to match LD1 type.
8531 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8534 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
8537 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8693 //----------------------------------------------------------------------------
8695 //----------------------------------------------------------------------------
8745 //----------------------------------------------------------------------------
8746 // Compiler-pseudos
8747 //----------------------------------------------------------------------------
8755 // When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8760 // To sign extend, we use a signed bitfield move instruction (SBFM) on the
8761 // containing super-reg.
8797 // AddedComplexity for the following patterns since we want to match sext + sra
8798 // patterns before we attempt to match a single sra node.
8801 // original value which is to be sign extended. E.g. we support shifts up to
8802 // bitwidth-1 bits.
8818 // To truncate, we can simply extract from a subregister.
8827 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8880 // v1 = BITCAST v2i32 v0 to v4i16
8884 // STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8885 // is, they treat the vector as a sequence of elements to be byte-swapped.
8887 // to use LD1/ST1 only to simplify compiler implementation.
8893 // v2 = BITCAST v2i32 v1 to v4i16
8897 // But this is now broken - the value stored is different to the value loaded
8898 // due to lane reordering. To fix this, on every BITCAST we must perform two
8903 // v3 = BITCAST v2i32 v2 to v4i16
8912 // There is also no 128-bit REV instruction. This must be synthesized with an
8916 // a) Identity conversions - vNfX <-> vNiX
8917 // b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
9515 // A 64-bit subvector insert to the first 128-bit vector position
9539 // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
9547 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
9557 // Prefer using the bottom lanes of addp Rn, Rn compared to
9576 // add(uzp1(X, Y), uzp2(X, Y)) -> addp(X, Y)
9601 // Scalar 64-bit shifts in FPR64 registers.
9611 // Patterns for nontemporal/no-allocate stores.
9612 // We have to resort to tricks to turn a single-input store into a store pair,
9613 // because there is no single-input nontemporal store, only STNP.
9635 // FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
9650 // Tail call return handling. These are all compiler pseudo-instructions,
9657 // Indirect tail-call with any register allowed, used by MachineOutliner when
9659 // FIXME: If we have to add any more hacks like this, we should instead relax
9664 // Indirect tail-calls with reduced register classes, needed for BTI and
9698 // to reason about, so is preferred when it's possible to use it.
9749 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9810 // FIXME: add patterns to generate vector by element dot product.
9811 // FIXME: add SVE dot-product patterns.
9814 // Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9815 // so that it can be used as input to inline asm, and vice versa.
9823 foreach i = 0-7 in {
9883 // MOPS operations always contain three 4-byte instructions
9905 //-----------------------------------------------------------------------------
9913 //-----------------------------------------------------------------------------
9925 //===----------------------------===//
9927 //===----------------------------===//
9934 //===----------------------------------------------------------------------===//
9936 //===----------------------------------------------------------------------===//
9943 //===----------------------------------------------------------------------===//
9944 // General Data-Processing Instructions (FEAT_V94_DP)
9945 //===----------------------------------------------------------------------===//
9962 let Inst{2-0} = Rt{2-0};
9963 let Inst{4-3} = 0b11;
9964 let Inst{9-5} = Rn;
9965 let Inst{11-10} = 0b10;
9966 let Inst{13-12} = Rt{4-3};
9969 let Inst{20-16} = Rm;
9970 let Inst{31-21} = 0b11111000101;
9975 // Fail, the decoder should attempt to decode RPRFM. This requires setting
9976 // the decoder namespace to "Fallback".
9980 //===----------------------------------------------------------------------===//
9981 // 128-bit Atomics (FEAT_LSE128)
9982 //===----------------------------------------------------------------------===//
9998 //===----------------------------------------------------------------------===//
10000 //===----------------------------------------------------------------------===//
10004 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
10005 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
10021 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
10022 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
10048 //===----------------------------------------------------------------------===//
10049 // 128-bit System Instructions (FEAT_SYSINSTR128)
10050 //===----------------------------------------------------------------------===//
10060 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
10062 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
10064 // is based off of the asm template (maybe) and therefore wants to print 5 operands.
10065 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
10073 let Inst{20-19} = 0b01;
10074 let Inst{18-16} = op1;
10075 let Inst{15-12} = Cn;
10076 let Inst{11-8} = Cm;
10077 let Inst{7-5} = op2;
10078 let Inst{4-0} = 0b11111;
10085 //---
10086 // 128-bit System Registers (FEAT_SYSREG128)
10087 //---
10108 let Inst{20-5} = systemreg;
10116 let Inst{20-5} = systemreg;
10120 //===----------------------------===//
10122 //===----------------------------===//
10165 //===----------------------------------------------------------------------===//
10167 //===----------------------------------------------------------------------===//
10173 // Scalar multiply-add/subtract
10184 // Extract the LSB of the fp32 *truncated* to bf16.