AArch64InstrInfo.td - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines Matching +full:ras +full:- +full:to +full:- +full:cas
1 //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
21 def HasV8_0a         : Predicate<"Subtarget->hasV8_0aOps()">,
23 def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
25 def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
27 def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
29 def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
31 def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
33 def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
35 def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
37 def HasV8_8a         : Predicate<"Subtarget->hasV8_8aOps()">,
39 def HasV8_9a         : Predicate<"Subtarget->hasV8_9aOps()">,
41 def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
42                                  AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
43 def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
45 def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
47 def HasV9_3a         : Predicate<"Subtarget->hasV9_3aOps()">,
49 def HasV9_4a         : Predicate<"Subtarget->hasV9_4aOps()">,
51 def HasV8_0r         : Predicate<"Subtarget->hasV8_0rOps()">,
52                                  AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
54 def HasEL2VMSA       : Predicate<"Subtarget->hasEL2VMSA()">,
57 def HasEL3           : Predicate<"Subtarget->hasEL3()">,
60 def HasVH            : Predicate<"Subtarget->hasVH()">,
63 def HasLOR           : Predicate<"Subtarget->hasLOR()">,
66 def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
69 def HasPAuthLR       : Predicate<"Subtarget->hasPAuthLR()">,
70                        AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
72 def HasJS            : Predicate<"Subtarget->hasJS()">,
75 def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
78 def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
81 def HasNV            : Predicate<"Subtarget->hasNV()">,
84 def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
87 def HasDIT           : Predicate<"Subtarget->hasDIT()">,
90 def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
93 def HasAM            : Predicate<"Subtarget->hasAM()">,
96 def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
99 def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
100                        AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
102 def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
105 def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPC_IMMO()">,
106                        AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
108 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
109                                AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110 def HasNEON          : Predicate<"Subtarget->isNeonAvailable()">,
112 def HasSM4           : Predicate<"Subtarget->hasSM4()">,
114 def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
116 def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
118 def HasAES           : Predicate<"Subtarget->hasAES()">,
120 def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
122 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
124 def HasCSSC          : Predicate<"Subtarget->hasCSSC()">,
126 def HasNoCSSC        : Predicate<"!Subtarget->hasCSSC()">;
127 def HasLSE           : Predicate<"Subtarget->hasLSE()">,
129 def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
130 def HasRAS           : Predicate<"Subtarget->hasRAS()">,
131                                  AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
132 def HasRDM           : Predicate<"Subtarget->hasRDM()">,
134 def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
136 def HasNoFullFP16    : Predicate<"!Subtarget->hasFullFP16()">;
137 def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
139 def HasSPE           : Predicate<"Subtarget->hasSPE()">,
141 def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
143                                  "fuse-aes">;
144 def HasSVE           : Predicate<"Subtarget->isSVEAvailable()">,
146 def HasSVE2          : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
148 def HasSVE2p1        : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
150 def HasSVE2AES       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2AES()">,
151                                  AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
152 def HasSVE2SM4       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
153                                  AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
154 def HasSVE2SHA3      : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
155                                  AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
156 def HasSVE2BitPerm   : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2BitPerm()">,
157                                  AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
158 def HasB16B16        : Predicate<"Subtarget->hasB16B16()">,
161                      : Predicate<"Subtarget->hasSME()">,
163 def HasSME           : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">,
165 def HasSMEF64F64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">,
166                                  AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
167 def HasSMEF16F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">,
168                                  AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
169 def HasSMEFA64       : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">,
170                                  AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
171 def HasSMEI16I64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">,
172                                  AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
174                      : Predicate<"Subtarget->hasSME2()">,
176 def HasSME2          : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">,
178 def HasSME2p1        : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">,
180 def HasFP8           : Predicate<"Subtarget->hasFP8()">,
182 def HasFAMINMAX      : Predicate<"Subtarget->hasFAMINMAX()">,
184 def HasFP8FMA        : Predicate<"Subtarget->hasFP8FMA()">,
186 def HasSSVE_FP8FMA   : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
187                                  "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
190                                                            "ssve-fp8fma or (sve2 and fp8fma)">;
191 def HasFP8DOT2       : Predicate<"Subtarget->hasFP8DOT2()">,
193 def HasSSVE_FP8DOT2  : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
194                                  "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
197                                 "ssve-fp8dot2 or (sve2 and fp8dot2)">;
198 def HasFP8DOT4       : Predicate<"Subtarget->hasFP8DOT4()">,
200 def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
201                                  "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
204                                  "ssve-fp8dot4 or (sve2 and fp8dot4)">;
205 def HasLUT          : Predicate<"Subtarget->hasLUT()">,
207 def HasSME_LUTv2     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
208                                  AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
209 def HasSMEF8F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
210                                  AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
211 def HasSMEF8F32     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
212                                  AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
217     : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
221     : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
225     : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
229     : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
232     : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
235     : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
239     : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
241                 "sme-f16f16 or sme-f8f16">;
246     : Predicate<"Subtarget->hasNEON()">,
248 def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
250 def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
252 def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
254 def HasSB            : Predicate<"Subtarget->hasSB()">,
256 def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
258 def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
260 def HasBTI           : Predicate<"Subtarget->hasBTI()">,
262 def HasMTE           : Predicate<"Subtarget->hasMTE()">,
264 def HasTME           : Predicate<"Subtarget->hasTME()">,
266 def HasETE           : Predicate<"Subtarget->hasETE()">,
268 def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
270 def HasBF16          : Predicate<"Subtarget->hasBF16()">,
272 def HasNoBF16        : Predicate<"!Subtarget->hasBF16()">;
273 def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
275 def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
277 def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
279 def HasFPAC          : Predicate<"Subtarget->hasFPAC())">,
281 def HasXS            : Predicate<"Subtarget->hasXS()">,
283 def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
285 def HasLS64          : Predicate<"Subtarget->hasLS64()">,
287 def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
289 def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
290                        AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
291 def HasHBC           : Predicate<"Subtarget->hasHBC()">,
293 def HasMOPS          : Predicate<"Subtarget->hasMOPS()">,
295 def HasCLRBHB        : Predicate<"Subtarget->hasCLRBHB()">,
297 def HasSPECRES2      : Predicate<"Subtarget->hasSPECRES2()">,
299 def HasITE           : Predicate<"Subtarget->hasITE()">,
301 def HasTHE           : Predicate<"Subtarget->hasTHE()">,
303 def HasRCPC3         : Predicate<"Subtarget->hasRCPC3()">,
305 def HasLSE128        : Predicate<"Subtarget->hasLSE128()">,
307 def HasD128          : Predicate<"Subtarget->hasD128()">,
309 def HasCHK           : Predicate<"Subtarget->hasCHK()">,
311 def HasGCS           : Predicate<"Subtarget->hasGCS()">,
313 def HasCPA           : Predicate<"Subtarget->hasCPA()">,
315 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
316 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
317 def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
319     : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
321     : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
327 def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
329 def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
331 def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
338 //===----------------------------------------------------------------------===//
339 // AArch64-specific DAG Nodes.
342 // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
348 // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
355 // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
427 def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
460 // non-extending masked load fragment.
464   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
465          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
466          !cast<MaskedLoadSDNode>(N)->isNonTemporal();
472   return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
473           cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
474          cast<MaskedLoadSDNode>(N)->isUnindexed();
479   return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
484   return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
489   return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
495   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
496          cast<MaskedLoadSDNode>(N)->isUnindexed();
501   return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
506   return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
511   return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
517    return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
518           cast<MaskedLoadSDNode>(N)->isUnindexed() &&
519           cast<MaskedLoadSDNode>(N)->isNonTemporal();
522 // non-truncating masked store fragment.
526   return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
527          cast<MaskedStoreSDNode>(N)->isUnindexed() &&
528          !cast<MaskedStoreSDNode>(N)->isNonTemporal();
534   return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
535          cast<MaskedStoreSDNode>(N)->isUnindexed();
540   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
545   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
550   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
556   return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
557          cast<MaskedStoreSDNode>(N)->isUnindexed() &&
558          cast<MaskedStoreSDNode>(N)->isNonTemporal();
567     bool Signed = MGS->isIndexSigned() ||
568         MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
569     return Signed && MGS->isIndexScaled();
576     bool Signed = MGS->isIndexSigned() ||
577         MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
578     return Signed && !MGS->isIndexScaled();
585     bool Signed = MGS->isIndexSigned() ||
586         MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
587     return !Signed && MGS->isIndexScaled();
594     bool Signed = MGS->isIndexSigned() ||
595         MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
596     return !Signed && !MGS->isIndexScaled();
613 // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
615   return SDValue(N,0)->getValueType(0) == MVT::i32 &&
616          CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
619 // top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
621   return SDValue(N,0)->getValueType(0) == MVT::i64 &&
622          CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
625 // topbitsallzero - Return true if all bits except the lowest bit are known zero
627   return SDValue(N,0)->getValueType(0) == MVT::i32 &&
628          CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
631   return SDValue(N,0)->getValueType(0) == MVT::i64 &&
632          CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
649                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
654                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
659                              SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
664                                       SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
669                              SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
684                                  SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
768   return N->getFlags().hasExact();
957    if (N->getOpcode() == ISD::ADD)
959    return CurDAG->isADDLike(SDValue(N,0));
962      // Only handle G_ADD for now. FIXME. build capability to compute whether
968 // Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
970   return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
971          CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
974 //===----------------------------------------------------------------------===//
976 //===----------------------------------------------------------------------===//
979 // We could compute these on a per-module basis but doing so requires accessing
981 // to that (see post-commit review comments for r301750).
986   def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
988   // Register restrictions for indirect tail-calls:
989   // - If branch target enforcement is enabled, indirect calls must use x16 or
992   // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address
1000   def TailCallX16X17 : Predicate<[{  MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1002   def TailCallX17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1003   // BTI off, PAuthLR on: Any non-callee-saved register except x16
1004   def TailCallNotX16 : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1005   // BTI off, PAuthLR off: Any non-callee-saved register
1006   def TailCallAny : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1008   def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1009   def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1011   // optimizing. This allows us to selectively use patterns without impacting
1013   // FIXME: One day there will probably be a nicer way to check for this, but
1015   def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
1022 //===----------------------------------------------------------------------===//
1024 //===----------------------------------------------------------------------===//
1026 //===----------------------------------------------------------------------===//
1030 // We set Sched to empty list because we expect these instructions to simply get
1043 // stack-clash protection is enabled.
1051 // stack-clash protection is enabled.
1058 // when stack-clash protection is enabled.
1131 // In general these get lowered into a sequence of three 4-byte instructions.
1132 // 32-bit jump table destination is actually only 2 instructions since we can
1133 // use the table itself as a PC-relative base. But optimization occurs after
1148 // A hardened but more expensive version of jump-table dispatch.
1151 // a plain BR) in a single non-attackable sequence.
1153 // We take the final entry index as an operand to allow isel freedom. This does
1154 // mean that the index can be attacker-controlled.  To address that, we also do
1156 // jump-table array.  When it doesn't, this branches to the first entry.
1157 // We might want to trap instead.
1160 // to avoid signing jump-table entries and turning them into pointers.
1175 // Space-consuming pseudo to aid testing of placement and reachability
1177 // occupies; register operands can be used to enforce dependency and constrain
1194   // This gets lowered to a pair of 4-byte instructions.
1198   // This gets lowered to a 4-byte instruction.
1204 //===----------------------------------------------------------------------===//
1206 //===----------------------------------------------------------------------===//
1218 // In order to be able to write readable assembly, LLVM should accept assembly
1220 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1230 // As far as LLVM is concerned this writes to the system's exclusive monitors.
1253   let CRm{1-0}   = 0b11;
1254   let Inst{9-8}  = 0b10;
1263 // Branch Record Buffer two-word mnemonic instructions
1266   let Inst{31-8} = 0b110101010000100101110010;
1267   let Inst{7-5} = op2;
1280 // ARMv9.4-A Guarded Control Stack
1283   let Inst{20-8} = 0b0100001110111;
1284   let Inst{7-5} = op2;
1294   let Inst{20-19} = 0b01;
1295   let Inst{18-16} = op1;
1296   let Inst{15-8} = 0b01110111;
1297   let Inst{7-5} = op2;
1310   let Inst{20-19} = 0b01;
1311   let Inst{18-16} = op1;
1312   let Inst{15-8} = 0b01110111;
1313   let Inst{7-5} = op2;
1323 // FIXME: mayStore = 1 only needed to match the intrinsic definition
1327 def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1345   let Inst{31-15} = 0b11011001000111110;
1346   let Inst{14-12} = op;
1347   let Inst{11-10} = 0b11;
1348   let Inst{9-5} = Rn;
1349   let Inst{4-0} = Rt;
1355 // ARMv8.2-A Dot Product
1363 // ARMv8.6-A BFloat
1378 // Vector-scalar BFDOT:
1379 // The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1380 // register (the instruction uses a single 32-bit lane from it), so the pattern
1398 // Round FP32 to BF16.
1411 // The second operand is used in the dup operation to repeat the indexed
1436 // ARMv8.2-A FP16 Fused Multiply-Add Long
1448 // Armv8.2-A Crypto extensions
1565 // v8.3a complex add and multiply-accumulate. No predicate here, that is done
1672 // In order to be able to write readable assembly, LLVM should accept assembly
1674 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1767   // This directly manipulates x16/x17 to materialize the discriminator.
1781     let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1800   // guarantees are safe to use for sensitive operations.
1812     let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1828   // guarantees are safe to use for sensitive operations.
1840   // AUT and re-PAC a value, using different keys/data.
1842   // guarantees are safe to use for sensitive operations.
1889   // Size 16: 4 fixed + 8 variable, to compute discriminator.
1916 // v9.5-A pointer authentication extensions
1919 // disassembling if we don't have the pauth-lr feature.
1969   let Inst{20-5} = 0b0000001000000000;
1981   let Inst{18-16} = 0b000;
1982   let Inst{11-8} = 0b0000;
1983   let Unpredictable{11-8} = 0b1111;
1984   let Inst{7-5} = 0b001;
1988   let Inst{18-16} = 0b000;
1989   let Inst{11-8} = 0b0000;
1990   let Unpredictable{11-8} = 0b1111;
1991   let Inst{7-5} = 0b010;
1996 // Armv8.5-A speculation barrier
1998   let Inst{20-5} = 0b0001100110000111;
1999   let Unpredictable{11-8} = 0b1111;
2023 // This gets lowered into a 24-byte instruction sequence
2108 //===----------------------------------------------------------------------===//
2110 //===----------------------------------------------------------------------===//
2189 // directly to the real instructions and get rid of these pseudos.
2201 // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
2202 // eventual expansion code fewer bits to worry about getting right. Marshalling
2215   return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
2222 // The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
2230 return CurDAG->getTargetConstant(
2231   N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
2235 return CurDAG->getTargetConstant(
2236   N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
2277 //===----------------------------------------------------------------------===//
2279 //===----------------------------------------------------------------------===//
2307   return N->getOpcode() == ISD::CopyFromReg &&
2308          cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2311 // Use SUBS instead of SUB to enable CSE between SUBS and SUB.
2333 // Because of the immediate format for add/sub-imm instructions, the
2334 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2347 // Because of the immediate format for add/sub-imm instructions, the
2348 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2400 // Multiply-add
2528 // Multiply-high
2543 // v8.1 atomic CAS
2544 defm CAS   : CompareAndSwap<0, 0, "">;
2602 // v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2689 // Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2702 // A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2703 // Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2717 //===----------------------------------------------------------------------===//
2719 //===----------------------------------------------------------------------===//
2728 // used). Actually, it seems to be working right now, but putting logical_immXX
2786 //===----------------------------------------------------------------------===//
2788 //===----------------------------------------------------------------------===//
2826 // Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2834 //===----------------------------------------------------------------------===//
2836 //===----------------------------------------------------------------------===//
2849 //===----------------------------------------------------------------------===//
2851 //===----------------------------------------------------------------------===//
2859   uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2860   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2864   uint64_t enc = 31 - N->getZExtValue();
2865   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2868 // min(7, 31 - shift_amt)
2870   uint64_t enc = 31 - N->getZExtValue();
2872   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2875 // min(15, 31 - shift_amt)
2877   uint64_t enc = 31 - N->getZExtValue();
2879   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2883   uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2884   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2888   uint64_t enc = 63 - N->getZExtValue();
2889   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2892 // min(7, 63 - shift_amt)
2894   uint64_t enc = 63 - N->getZExtValue();
2896   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2899 // min(15, 63 - shift_amt)
2901   uint64_t enc = 63 - N->getZExtValue();
2903   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2906 // min(31, 63 - shift_amt)
2908   uint64_t enc = 63 - N->getZExtValue();
2910   return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2952 //===----------------------------------------------------------------------===//
2954 //===----------------------------------------------------------------------===//
2958 //===----------------------------------------------------------------------===//
2960 //===----------------------------------------------------------------------===//
2993 def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2995 def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2997 def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2999 def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
3001 def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
3003 def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
3053 //===----------------------------------------------------------------------===//
3054 // PC-relative instructions.
3055 //===----------------------------------------------------------------------===//
3075 //===----------------------------------------------------------------------===//
3077 //===----------------------------------------------------------------------===//
3085 // Default to the LR register.
3125 // Create a separate pseudo-instruction for codegen to use so that we don't
3135 // This is a directive-like pseudo-instruction. The purpose is to insert an
3143 // Pseudo instruction to tell the streamer to emit a 'B' character into the
3147 // Pseudo instruction to tell the streamer to emit a 'G' character into the
3151 // FIXME: maybe the scratch register used shouldn't be fixed to X1?
3153 // This gets lowered to an instruction sequence which takes 16 bytes
3163 //===----------------------------------------------------------------------===//
3165 //===----------------------------------------------------------------------===//
3168 // Armv8.8-A variant form which hints to the branch predictor that
3169 // this branch is very likely to go the same way nearly all the time
3173 //===----------------------------------------------------------------------===//
3174 // Compare-and-branch instructions.
3175 //===----------------------------------------------------------------------===//
3179 //===----------------------------------------------------------------------===//
3180 // Test-bit-and-branch instructions.
3181 //===----------------------------------------------------------------------===//
3185 //===----------------------------------------------------------------------===//
3187 //===----------------------------------------------------------------------===//
3197 //===----------------------------------------------------------------------===//
3199 //===----------------------------------------------------------------------===//
3213 // DCPSn defaults to an immediate operand of zero if unspecified.
3220 //===----------------------------------------------------------------------===//
3222 //===----------------------------------------------------------------------===//
3235 // Pair (pre-indexed)
3246 // Pair (post-indexed)
3272 //---
3274 //---
3282 // Floating-point
3291 // Load sign-extended half-word
3295 // Load sign-extended byte
3299 // Load sign-extended word
3302 // Pre-fetch.
3306 // Thus, it is safe to directly map the vector loads with interesting
3308 // FIXME: We could do the same for bitconvert to floating point vectors.
3371   // We must do vector loads with LD1 in big-endian.
3385   // We must do vector loads with LD1 in big-endian.
3397 // zextload -> i64
3416   // zextloadi1 -> zextloadi8
3419   // extload -> zextload
3424   // extloadi1 -> zextloadi8
3429 // zextload -> i64
3441   // extload -> zextload
3446   // zextloadi1 -> zextloadi8
3450 //---
3452 //---
3482 // Thus, it is safe to directly map the vector loads with interesting
3484 // FIXME: We could do the same for bitconvert to floating point vectors.
3519   // We must use LD1 to perform vector loads in big-endian.
3540   // We must use LD1 to perform vector loads in big-endian.
3569 // zextload -> i64
3575 // zextloadi1 -> zextloadi8
3581 // extload -> zextload
3597 // load sign-extended half-word
3607 // load sign-extended byte
3617 // load sign-extended word
3623 // load zero-extended word
3627 // Pre-fetch.
3635 //---
3640     const DataLayout &DL = MF->getDataLayout();
3641     Align Align = G->getGlobal()->getPointerAlignment(DL);
3642     return Align >= 4 && G->getOffset() % 4 == 0;
3645     return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3662 // load sign-extended word
3675 //---
3750 //  anyext -> zext
3782 //---
3783 // LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3785 // Define new assembler match classes as we want to only match these when
3788 // canonical form (the scaled operand) to take precedence.
3834 // zextload -> i64
3840 // load sign-extended half-word
3850 // load sign-extended byte
3860 // load sign-extended word
3866 // zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3897   // Half-vector patterns
3933 // Pre-fetch.
3938 //---
3946 // load sign-extended half-word
3950 // load sign-extended byte
3954 // load sign-extended word
3957 //---
3958 // (immediate pre-indexed)
3969 // load sign-extended half-word
3973 // load sign-extended byte
3977 // load zero-extended byte
3981 // load sign-extended word
3984 //---
3985 // (immediate post-indexed)
3996 // load sign-extended half-word
4000 // load sign-extended byte
4004 // load zero-extended byte
4008 // load sign-extended word
4011 //===----------------------------------------------------------------------===//
4013 //===----------------------------------------------------------------------===//
4016 // FIXME: Use dedicated range-checked addressing mode operand here.
4025 // Pair (pre-indexed)
4034 // Pair (post-indexed)
4059 //---
4069 // Floating-point
4124   // We must use ST1 to store vectors in big-endian.
4138   // We must use ST1 to store vectors in big-endian.
4150 // Match stores from lane 0 to the appropriate subreg's store.
4177 //---
4226   // We must use ST1 to store vectors in big-endian.
4253   // We must use ST1 to store vectors in big-endian.
4292 // Match stores from lane 0 to the appropriate subreg's store.
4313 //---
4377   // We must use ST1 to store vectors in big-endian.
4403   // We must use ST1 to store vectors in big-endian.
4443 // Match stores from lane 0 to the appropriate subreg's store.
4460 //---
4461 // STR mnemonics fall back to STUR for negative or unaligned offsets.
4484 //---
4492 //---
4493 // (immediate pre-indexed)
4548 //---
4549 // (immediate post-indexed)
4611 //===----------------------------------------------------------------------===//
4613 //===----------------------------------------------------------------------===//
4636 Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4637 of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4639 LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4641 case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4673   // v8.1a "Limited Order Region" extension load-acquire instructions
4679   // v8.1a "Limited Order Region" extension store-release instructions
4692 //===----------------------------------------------------------------------===//
4693 // Scaled floating point to integer conversion instructions.
4694 //===----------------------------------------------------------------------===//
4833 //===----------------------------------------------------------------------===//
4834 // Scaled integer to floating point conversion instructions.
4835 //===----------------------------------------------------------------------===//
4868 //===----------------------------------------------------------------------===//
4869 // Unscaled integer to floating point conversion instruction.
4870 //===----------------------------------------------------------------------===//
4905 //===----------------------------------------------------------------------===//
4907 //===----------------------------------------------------------------------===//
4910 // Helper to get bf16 into fp32.
4920 // Pattern for bf16 -> fp32.
4923 // Pattern for bf16 -> fp64.
4927 //===----------------------------------------------------------------------===//
4929 //===----------------------------------------------------------------------===//
4954 // Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
4989 //===----------------------------------------------------------------------===//
4991 //===----------------------------------------------------------------------===//
5052 //===----------------------------------------------------------------------===//
5054 //===----------------------------------------------------------------------===//
5070 // Here we handle first -(a + b*c) for FNMADD:
5082 // Now it's time for "(-a) + (-b)*c"
5094 //===----------------------------------------------------------------------===//
5096 //===----------------------------------------------------------------------===//
5101 //===----------------------------------------------------------------------===//
5103 //===----------------------------------------------------------------------===//
5108 //===----------------------------------------------------------------------===//
5110 //===----------------------------------------------------------------------===//
5118 // CSEL instructions providing f128 types need to be handled by a
5119 // pseudo-instruction since the eventual code will need to introduce basic
5132 //===----------------------------------------------------------------------===//
5134 //===----------------------------------------------------------------------===//
5159 //===----------------------------------------------------------------------===//
5176 //===----------------------------------------------------------------------===//
5178 //===----------------------------------------------------------------------===//
5189 //===----------------------------------------------------------------------===//
5191 //===----------------------------------------------------------------------===//
5195 // Match UABDL in log2-shuffle patterns.
5340 // Aliases for MVN -> NOT.
5390 // Patterns for vector long shift (by element width). These need to match all
5391 // three of zext, sext and anyext so it's easier to pull the patterns out of the
5420 // trunc(umin(X, 255)) -> UQXTRN v8i8
5423 // trunc(umin(X, 65535)) -> UQXTRN v4i16
5426 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
5434 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5443 // concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5448 // concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
5454 // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5467 // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5492 //===----------------------------------------------------------------------===//
5494 //===----------------------------------------------------------------------===//
5904 //===----------------------------------------------------------------------===//
5906 //===----------------------------------------------------------------------===//
5994 //===----------------------------------------------------------------------===//
5996 //===----------------------------------------------------------------------===//
6011 //===----------------------------------------------------------------------===//
6013 //===----------------------------------------------------------------------===//
6057 // Round FP64 to BF16.
6156 // Some float -> int -> float conversion patterns for which we want to keep the
6157 // int values in FP registers using the corresponding NEON instructions to
6158 // avoid more costly int <-> fp register transfers.
6176 // int -> float conversion of value in lane 0 of simd vector should use 
6177 // correct cvtf variant to avoid costly fpr <-> gpr register transfers.
6190 // fp16: integer extraction from vector must be at least 32-bits to be legal.
6191 // Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6197 // unsigned 32-bit extracted element is truncated to 16-bits using AND
6203 // If an integer is about to be converted to a floating point value,
6205 // Here are the patterns for 8 and 16-bits to float.
6206 // 8-bits -> float.
6236 // 16-bits -> float.
6247 // 32-bits are handled in target specific dag combine:
6249 // 64-bits integer to 32-bits floating point, not possible with
6253 // Here are the patterns for 8, 16, 32, and 64-bits to double.
6254 // 8-bits -> double.
6265 // 16-bits -> double.
6276 // 32-bits -> double.
6287 // 64-bits -> double are handled in target specific dag combine:
6291 //===----------------------------------------------------------------------===//
6292 // Advanced SIMD three different-sized vector instructions.
6293 //===----------------------------------------------------------------------===//
6463 //----------------------------------------------------------------------------
6465 //----------------------------------------------------------------------------
6470   return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6477   // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6478   // 128-bit vector.
6481   // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6482   // single 128-bit EXT.
6487   // A 64-bit EXT of the high half of a 128-bit register can be done using a
6488   // 128-bit EXT of the whole register with an adjustment to the immediate. The
6508 //----------------------------------------------------------------------------
6510 //----------------------------------------------------------------------------
6524 // concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6525 // concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6526 // concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6535 // trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6536 // trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6537 // trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6566 //----------------------------------------------------------------------------
6568 //----------------------------------------------------------------------------
6585 //----------------------------------------------------------------------------
6587 //----------------------------------------------------------------------------
6593 //----------------------------------------------------------------------------
6595 //----------------------------------------------------------------------------
6599 //----------------------------------------------------------------------------
6601 //----------------------------------------------------------------------------
6611 // below, so the second operand does not matter. Re-use the first input
6612 // operand, so no additional dependencies need to be introduced.
6659 //----------------------------------------------------------------------------
6661 //----------------------------------------------------------------------------
6679 // DUP from a 64-bit register to a 64-bit register is just a copy
6732 // instruction even if the types don't match: we just have to remap the lane
6733 // carefully. N.b. this trick only applies to truncations.
6735   return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6738   return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6741   return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6807 // Extracting i8 or i16 elements will have the zero-extend transformed to
6978 // FIXME refactor to a shared class/dev parameterized on vector type, vector
7045 // vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
7057 // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
7089 // All concat_vectors operations are canonicalised to act on i64 vectors for
7097   // If the high lanes are zero we can instead emit a d->d register mov, which
7116 //----------------------------------------------------------------------------
7118 //----------------------------------------------------------------------------
7215 // Patterns for across-vector intrinsics, that have a node equivalent, that
7221 // directly match the latter to the instruction.
7239 // If none did, fallback to the explicit patterns, consuming the vector_extract.
7344 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7349 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7370 // because GlobalISel allows us to specify the return register to be a FPR
7470 // The vaddlv_s32 intrinsic gets mapped to SADDLP.
7476 // The vaddlv_u32 intrinsic gets mapped to UADDLP.
7483 //------------------------------------------------------------------------------
7485 //------------------------------------------------------------------------------
7568 // Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7589   // Using the MOVI to materialize fp constants.
7673 //----------------------------------------------------------------------------
7675 //----------------------------------------------------------------------------
7685 // On the other hand, there are quite a few valid combinatorial options due to
7686 // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7702   // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7722   // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7743   // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7744   // (DUPLANE from 64-bit would be trivial).
7755   // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7769   // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7838 //----------------------------------------------------------------------------
7840 //----------------------------------------------------------------------------
7847 // Having the same base pattern for fp <--> int totally freaks it out.
7877 // Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7959 //----------------------------------------------------------------------------
7961 //----------------------------------------------------------------------------
8143 // Vector bf16 -> fp32 is implemented morally as a zext + shift.
8259 // If an integer is about to be converted to a floating point value,
8263 // The sign extension has to be explicitly added and is only supported for
8264 // one step: byte-to-half, half-to-word, word-to-doubleword.
8265 // SCVTF GPR -> FPR is 9 cycles.
8266 // SCVTF FPR -> FPR is 4 cyclces.
8267 // (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
8268 // Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
8271 // 8-bits -> float. 2 sizes step-up.
8297 // 16-bits -> float. 1 size step-up.
8318 // 32-bits to 32-bits are handled in target specific dag combine:
8320 // 64-bits integer to 32-bits floating point, not possible with
8324 // Here are the patterns for 8, 16, 32, and 64-bits to double.
8325 // 8-bits -> double. 3 size step-up: give up.
8326 // 16-bits -> double. 2 size step.
8351 // 32-bits -> double. 1 size step-up.
8372 // 64-bits -> double are handled in target specific dag combine:
8376 //----------------------------------------------------------------------------
8377 // AdvSIMD Load-Store Structure
8378 //----------------------------------------------------------------------------
8414 //---
8415 // Single-element
8416 //---
8511 // In this case, the index must be adjusted to match LD1 type.
8531   return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8534   return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
8537   return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8693 //----------------------------------------------------------------------------
8695 //----------------------------------------------------------------------------
8745 //----------------------------------------------------------------------------
8746 // Compiler-pseudos
8747 //----------------------------------------------------------------------------
8755 // When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8760 // To sign extend, we use a signed bitfield move instruction (SBFM) on the
8761 // containing super-reg.
8797 // AddedComplexity for the following patterns since we want to match sext + sra
8798 // patterns before we attempt to match a single sra node.
8801 // original value which is to be sign extended. E.g. we support shifts up to
8802 // bitwidth-1 bits.
8818 // To truncate, we can simply extract from a subregister.
8827   return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8880 //   v1 = BITCAST v2i32 v0 to v4i16
8884 // STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8885 // is, they treat the vector as a sequence of elements to be byte-swapped.
8887 // to use LD1/ST1 only to simplify compiler implementation.
8893 //   v2 = BITCAST v2i32 v1 to v4i16
8897 // But this is now broken - the value stored is different to the value loaded
8898 // due to lane reordering. To fix this, on every BITCAST we must perform two
8903 //   v3 = BITCAST v2i32 v2 to v4i16
8912 // There is also no 128-bit REV instruction. This must be synthesized with an
8916 //   a) Identity conversions -  vNfX <-> vNiX
8917 //   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
9515 // A 64-bit subvector insert to the first 128-bit vector position
9539 // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
9547     // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
9557 // Prefer using the bottom lanes of addp Rn, Rn compared to
9576 // add(uzp1(X, Y), uzp2(X, Y)) -> addp(X, Y)
9601 // Scalar 64-bit shifts in FPR64 registers.
9611 // Patterns for nontemporal/no-allocate stores.
9612 // We have to resort to tricks to turn a single-input store into a store pair,
9613 // because there is no single-input nontemporal store, only STNP.
9635 // FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
9650 // Tail call return handling. These are all compiler pseudo-instructions,
9657   // Indirect tail-call with any register allowed, used by MachineOutliner when
9659   // FIXME: If we have to add any more hacks like this, we should instead relax
9664   // Indirect tail-calls with reduced register classes, needed for BTI and
9698 // to reason about, so is preferred when it's possible to use it.
9749 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9810   // FIXME: add patterns to generate vector by element dot product.
9811   // FIXME: add SVE dot-product patterns.
9814 // Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9815 // so that it can be used as input to inline asm, and vice versa.
9823 foreach i = 0-7 in {
9883 // MOPS operations always contain three 4-byte instructions
9905 //-----------------------------------------------------------------------------
9913 //-----------------------------------------------------------------------------
9925 //===----------------------------===//
9927 //===----------------------------===//
9934 //===----------------------------------------------------------------------===//
9936 //===----------------------------------------------------------------------===//
9943 //===----------------------------------------------------------------------===//
9944 // General Data-Processing Instructions (FEAT_V94_DP)
9945 //===----------------------------------------------------------------------===//
9962   let Inst{2-0} = Rt{2-0};
9963   let Inst{4-3} = 0b11;
9964   let Inst{9-5} = Rn;
9965   let Inst{11-10} = 0b10;
9966   let Inst{13-12} = Rt{4-3};
9969   let Inst{20-16} = Rm;
9970   let Inst{31-21} = 0b11111000101;
9975   // Fail, the decoder should attempt to decode RPRFM. This requires setting
9976   // the decoder namespace to "Fallback".
9980 //===----------------------------------------------------------------------===//
9981 // 128-bit Atomics (FEAT_LSE128)
9982 //===----------------------------------------------------------------------===//
9998 //===----------------------------------------------------------------------===//
10000 //===----------------------------------------------------------------------===//
10004   def STILPWpre:   BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
10005   def STILPXpre:   BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
10021   def STLRWpre:   BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback),            (ins GPR32:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
10022   def STLRXpre:   BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback),            (ins GPR64:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
10048 //===----------------------------------------------------------------------===//
10049 // 128-bit System Instructions (FEAT_SYSINSTR128)
10050 //===----------------------------------------------------------------------===//
10060     // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
10062     // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
10064     // is based off of the asm template (maybe) and therefore wants to print 5 operands.
10065     // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
10073     let Inst{20-19} = 0b01;
10074     let Inst{18-16} = op1;
10075     let Inst{15-12} = Cn;
10076     let Inst{11-8}  = Cm;
10077     let Inst{7-5}   = op2;
10078     let Inst{4-0}   = 0b11111;
10085 //---
10086 // 128-bit System Registers (FEAT_SYSREG128)
10087 //---
10108     let Inst{20-5} = systemreg;
10116     let Inst{20-5} = systemreg;
10120 //===----------------------------===//
10122 //===----------------------------===//
10165 //===----------------------------------------------------------------------===//
10167 //===----------------------------------------------------------------------===//
10173   // Scalar multiply-add/subtract
10184                            // Extract the LSB of the fp32 *truncated* to bf16.