xref: /llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td (revision 36b3c43524c8ca86a5050496b8773f07c5ccddff)
1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13
14//===----------------------------------------------------------------------===//
15// ARM Instruction Predicate Definitions.
16//
17
18class AssemblerPredicateWithAll<dag cond, string name="">
19    : AssemblerPredicate<(any_of FeatureAll, cond), name>;
20
21def HasV8_0a         : Predicate<"Subtarget->hasV8_0aOps()">,
22                                 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">;
23def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
24                                 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
25def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
26                                 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
27def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
28                                 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
29def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
30                                 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
31def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
32                                 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
33def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
34                                 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
35def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
36                                 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
37def HasV8_8a         : Predicate<"Subtarget->hasV8_8aOps()">,
38                                 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">;
39def HasV8_9a         : Predicate<"Subtarget->hasV8_9aOps()">,
40                                 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">;
41def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
42                                 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
43def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
44                                 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
45def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
46                                 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
47def HasV9_3a         : Predicate<"Subtarget->hasV9_3aOps()">,
48                                 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
49def HasV9_4a         : Predicate<"Subtarget->hasV9_4aOps()">,
50                                 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
51def HasV8_0r         : Predicate<"Subtarget->hasV8_0rOps()">,
52                                 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
53
54def HasEL2VMSA       : Predicate<"Subtarget->hasEL2VMSA()">,
55                       AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
56
57def HasEL3           : Predicate<"Subtarget->hasEL3()">,
58                       AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
59
60def HasVH            : Predicate<"Subtarget->hasVH()">,
61                       AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
62
63def HasLOR           : Predicate<"Subtarget->hasLOR()">,
64                       AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
65
66def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
67                       AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
68
69def HasPAuthLR       : Predicate<"Subtarget->hasPAuthLR()">,
70                       AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
71
72def HasJS            : Predicate<"Subtarget->hasJS()">,
73                       AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
74
75def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
76                       AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
77
78def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
79                       AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
80
81def HasNV            : Predicate<"Subtarget->hasNV()">,
82                       AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
83
84def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
85                       AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
86
87def HasDIT           : Predicate<"Subtarget->hasDIT()">,
88                       AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
89
90def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
91                       AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
92
93def HasAM            : Predicate<"Subtarget->hasAM()">,
94                       AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
95
96def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
97                       AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
98
99def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
100                       AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
101
102def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
103                       AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
104
105def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPC_IMMO()">,
106                       AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
107
108def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
109                               AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110def HasNEON          : Predicate<"Subtarget->isNeonAvailable()">,
111                                 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
112def HasSM4           : Predicate<"Subtarget->hasSM4()">,
113                                 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
114def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
115                                 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
116def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
117                                 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
118def HasAES           : Predicate<"Subtarget->hasAES()">,
119                                 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
120def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
121                                 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
122def HasCRC           : Predicate<"Subtarget->hasCRC()">,
123                                 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
124def HasCSSC          : Predicate<"Subtarget->hasCSSC()">,
125                                 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">;
126def HasNoCSSC        : Predicate<"!Subtarget->hasCSSC()">;
127def HasLSE           : Predicate<"Subtarget->hasLSE()">,
128                                 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
129def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
130def HasRAS           : Predicate<"Subtarget->hasRAS()">,
131                                 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
132def HasRDM           : Predicate<"Subtarget->hasRDM()">,
133                                 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
134def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
135                                 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
136def HasNoFullFP16    : Predicate<"!Subtarget->hasFullFP16()">;
137def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
138                                 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
139def HasSPE           : Predicate<"Subtarget->hasSPE()">,
140                                 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
141def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
142                                 AssemblerPredicateWithAll<(all_of FeatureFuseAES),
143                                 "fuse-aes">;
144def HasSVE           : Predicate<"Subtarget->isSVEAvailable()">,
145                                 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
146def HasSVEB16B16     : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEB16B16()">,
147                                 AssemblerPredicateWithAll<(all_of FeatureSVEB16B16), "sve-b16b16">;
148def HasSVE2          : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
149                                 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
150def HasSVE2p1        : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
151                                 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
152def HasSVEAES       : Predicate<"Subtarget->hasSVEAES()">,
153                                 AssemblerPredicateWithAll<(all_of FeatureSVEAES), "sve-aes">;
154def HasSVE2SM4       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
155                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
156def HasSVE2SHA3      : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
157                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
158def HasSVEBitPerm   : Predicate<"Subtarget->hasSVEBitPerm()">,
159                                 AssemblerPredicateWithAll<(all_of FeatureSVEBitPerm), "sve-bitperm">;
160def HasSMEandIsNonStreamingSafe
161                     : Predicate<"Subtarget->hasSME()">,
162                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
163def HasSME           : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">,
164                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
165def HasSMEF64F64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">,
166                                 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
167def HasSMEF16F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">,
168                                 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
169def HasSMEFA64       : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">,
170                                 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
171def HasSMEI16I64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">,
172                                 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
173def HasSMEB16B16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEB16B16()">,
174                                 AssemblerPredicateWithAll<(all_of FeatureSMEB16B16), "sme-b16b16">;
175def HasSME2andIsNonStreamingSafe
176                     : Predicate<"Subtarget->hasSME2()">,
177                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
178def HasSME2          : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">,
179                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
180def HasSME2p1        : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">,
181                                 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
182def HasFP8           : Predicate<"Subtarget->hasFP8()">,
183                                 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
184def HasFAMINMAX      : Predicate<"Subtarget->hasFAMINMAX()">,
185                                 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
186def HasFP8FMA        : Predicate<"Subtarget->hasFP8FMA()">,
187                                 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
188def HasSSVE_FP8FMA   : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
189                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
190                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
191                                                           (all_of FeatureSVE2, FeatureFP8FMA)),
192                                                           "ssve-fp8fma or (sve2 and fp8fma)">;
193def HasFP8DOT2       : Predicate<"Subtarget->hasFP8DOT2()">,
194                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">;
195def HasSSVE_FP8DOT2  : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
196                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
197                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
198                                                           (all_of FeatureSVE2, FeatureFP8DOT2)),
199                                "ssve-fp8dot2 or (sve2 and fp8dot2)">;
200def HasFP8DOT4       : Predicate<"Subtarget->hasFP8DOT4()">,
201                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
202def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
203                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
204                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
205                                                           (all_of FeatureSVE2, FeatureFP8DOT4)),
206                                 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
207def HasLUT          : Predicate<"Subtarget->hasLUT()">,
208                                 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
209def HasSME_LUTv2    : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
210                                 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
211def HasSMEF8F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
212                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
213def HasSMEF8F32     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
214                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
215def HasSME_MOP4     : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_MOP4())">,
216                                 AssemblerPredicateWithAll<(all_of FeatureSME_MOP4), "sme-mop4">;
217def HasSME_TMOP     : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_TMOP())">,
218                                 AssemblerPredicateWithAll<(all_of FeatureSME_TMOP), "sme-tmop">;
219
220def HasCMPBR        : Predicate<"Subtarget->hasCMPBR()">,
221                                 AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">;
222def HasF8F32MM      : Predicate<"Subtarget->hasF8F32MM()">,
223                                 AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">;
224def HasF8F16MM      : Predicate<"Subtarget->hasF8F16MM()">,
225                                 AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">;
226def HasFPRCVT       : Predicate<"Subtarget->hasFPRCVT()">,
227                                 AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">;
228def HasLSFE         : Predicate<"Subtarget->hasLSFE()">,
229                                 AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">;
230def HasSME2p2       : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
231                                 AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">;
232def HasSVEAES2      : Predicate<"Subtarget->hasSVEAES2()">,
233                                 AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">;
234def HasSVEBFSCALE   : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
235                                 AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">;
236def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
237                                 AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">;
238def HasPCDPHINT      : Predicate<"Subtarget->hasPCDPHINT()">,
239                       AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">;
240def HasLSUI          : Predicate<"Subtarget->hasLSUI()">,
241                       AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">;
242def HasOCCMO         : Predicate<"Subtarget->hasOCCMO()">,
243                       AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">;
244
245// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
246// they should be enabled if either has been specified.
247def HasSVE_or_SME
248    : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
249                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
250                "sve or sme">;
251def HasNonStreamingSVE_or_SME2p2
252    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
253                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
254                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2),
255                "sve or sme2p2">;
256def HasSVE2_or_SME
257    : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
258                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
259                "sve2 or sme">;
260def HasSVE2_or_SME2
261    : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
262                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
263                "sve2 or sme2">;
264def HasNonStreamingSVE2_or_SSVE_AES
265    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
266                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
267                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_AES), "sve2 or ssve-aes">;
268def HasSVE2p1_or_SME
269    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
270                 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
271def HasSVE2p1_or_SME2
272    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
273                 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
274def HasSVE2p1_or_SME2p1
275    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
276                 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
277def HasSVE2p2_or_SME2p2
278    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">,
279                 AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2), "sme2p2 or sve2p2">;
280def HasNonStreamingSVE2p1_or_SSVE_AES
281    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()) ||"
282                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
283                AssemblerPredicateWithAll<(any_of FeatureSVE2p1, FeatureSSVE_AES), "sve2p1 or ssve-aes">;
284def HasSMEF16F16_or_SMEF8F16
285    : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
286                AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
287                "sme-f16f16 or sme-f8f16">;
288def HasNonStreamingSVE2p2_or_SME2p2
289    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||"
290                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
291                AssemblerPredicateWithAll<(any_of FeatureSVE2p2, FeatureSME2p2),
292                "sme2p2 or sve2p2">;
293def HasNonStreamingSVE2_or_SSVE_BitPerm
294    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
295                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_BitPerm())">,
296                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_BitPerm), "sve2 or ssve-bitperm">;
297
298// A subset of NEON instructions are legal in Streaming SVE execution mode,
299// so don't need the additional check for 'isNeonAvailable'.
300def HasNEONandIsStreamingSafe
301    : Predicate<"Subtarget->hasNEON()">,
302      AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
303// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
304def HasNEONandIsSME2p2StreamingSafe
305    : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
306    AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
307def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
308                                 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
309def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
310                       AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
311def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
312                       AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
313def HasSB            : Predicate<"Subtarget->hasSB()">,
314                       AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
315def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
316                       AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
317def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
318                       AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
319def HasBTI           : Predicate<"Subtarget->hasBTI()">,
320                       AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
321def HasMTE           : Predicate<"Subtarget->hasMTE()">,
322                       AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
323def HasTME           : Predicate<"Subtarget->hasTME()">,
324                       AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
325def HasETE           : Predicate<"Subtarget->hasETE()">,
326                       AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
327def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
328                       AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
329def HasBF16          : Predicate<"Subtarget->hasBF16()">,
330                       AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
331def HasNoBF16        : Predicate<"!Subtarget->hasBF16()">;
332def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
333                       AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
334def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
335                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
336def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
337                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
338def HasFPAC          : Predicate<"Subtarget->hasFPAC())">,
339                       AssemblerPredicateWithAll<(all_of FeatureFPAC), "fpac">;
340def HasXS            : Predicate<"Subtarget->hasXS()">,
341                       AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
342def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
343                       AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
344def HasLS64          : Predicate<"Subtarget->hasLS64()">,
345                       AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
346def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
347                       AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
348def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
349                       AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
350def HasHBC           : Predicate<"Subtarget->hasHBC()">,
351                       AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
352def HasMOPS          : Predicate<"Subtarget->hasMOPS()">,
353                       AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
354def HasCLRBHB        : Predicate<"Subtarget->hasCLRBHB()">,
355                       AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
356def HasSPECRES2      : Predicate<"Subtarget->hasSPECRES2()">,
357                       AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
358def HasITE           : Predicate<"Subtarget->hasITE()">,
359                       AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
360def HasTHE           : Predicate<"Subtarget->hasTHE()">,
361                       AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
362def HasRCPC3         : Predicate<"Subtarget->hasRCPC3()">,
363                       AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
364def HasLSE128        : Predicate<"Subtarget->hasLSE128()">,
365                       AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
366def HasD128          : Predicate<"Subtarget->hasD128()">,
367                       AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
368def HasCHK           : Predicate<"Subtarget->hasCHK()">,
369                       AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
370def HasGCS           : Predicate<"Subtarget->hasGCS()">,
371                       AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
372def HasCPA           : Predicate<"Subtarget->hasCPA()">,
373                       AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
374def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
375def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
376def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
377def UseExperimentalZeroingPseudos
378    : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
379def UseAlternateSExtLoadCVTF32
380    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
381
382def UseNegativeImmediates
383    : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
384                                             "NegativeImmediates">;
385
386def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
387
388def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
389
390def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
391
392def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">;
393
394def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
395                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
396                                                       SDTCisInt<1>]>>;
397
398
399//===----------------------------------------------------------------------===//
400// AArch64-specific DAG Nodes.
401//
402
403// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
404def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
405                                              [SDTCisSameAs<0, 2>,
406                                               SDTCisSameAs<0, 3>,
407                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
408
409// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
410def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
411                                            [SDTCisSameAs<0, 1>,
412                                             SDTCisSameAs<0, 2>,
413                                             SDTCisInt<0>,
414                                             SDTCisVT<3, i32>]>;
415
416// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
417def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
418                                            [SDTCisSameAs<0, 2>,
419                                             SDTCisSameAs<0, 3>,
420                                             SDTCisInt<0>,
421                                             SDTCisVT<1, i32>,
422                                             SDTCisVT<4, i32>]>;
423
424def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
425                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
426                                      SDTCisVT<2, i32>]>;
427def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
428def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
429                                        SDTCisVT<2, OtherVT>]>;
430
431
432def SDT_AArch64CSel  : SDTypeProfile<1, 4,
433                                   [SDTCisSameAs<0, 1>,
434                                    SDTCisSameAs<0, 2>,
435                                    SDTCisInt<3>,
436                                    SDTCisVT<4, i32>]>;
437def SDT_AArch64CCMP : SDTypeProfile<1, 5,
438                                    [SDTCisVT<0, i32>,
439                                     SDTCisInt<1>,
440                                     SDTCisSameAs<1, 2>,
441                                     SDTCisInt<3>,
442                                     SDTCisInt<4>,
443                                     SDTCisVT<5, i32>]>;
444def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
445                                     [SDTCisVT<0, i32>,
446                                      SDTCisFP<1>,
447                                      SDTCisSameAs<1, 2>,
448                                      SDTCisInt<3>,
449                                      SDTCisInt<4>,
450                                      SDTCisVT<5, i32>]>;
451def SDT_AArch64FCmp  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
452                                            SDTCisFP<1>,
453                                            SDTCisSameAs<2, 1>]>;
454def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
455def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
456def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
457def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
458                                          SDTCisSameAs<0, 1>,
459                                          SDTCisSameAs<0, 2>]>;
460def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
461def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
462def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
463                                           SDTCisInt<2>, SDTCisInt<3>]>;
464def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
465def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
466                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
467def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
468def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
469                                         SDTCisVec<2>, SDTCisSameAs<2,3>]>;
470
471def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
472                                                 SDTCisSameAs<0,1>,
473                                                 SDTCisSameAs<0,2>]>;
474
475def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
476def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
477def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
478def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
479                                           SDTCisSameAs<0,2>]>;
480def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
481                                           SDTCisSameAs<0,2>,
482                                           SDTCisSameAs<0,3>]>;
483def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
484def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
485
486def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
487
488def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
489                                                 SDTCisPtrTy<1>]>;
490
491def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
492
493def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
494def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
495def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
496def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
497def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
498def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
499
500// Generates the general dynamic sequences, i.e.
501//  adrp  x0, :tlsdesc:var
502//  ldr   x1, [x0, #:tlsdesc_lo12:var]
503//  add   x0, x0, #:tlsdesc_lo12:var
504//  .tlsdesccall var
505//  blr   x1
506
507// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
508// number of operands (the variable)
509def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
510                                          [SDTCisPtrTy<0>]>;
511
512def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
513                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
514                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
515                                         SDTCisSameAs<1, 4>]>;
516
517def SDT_AArch64TBL : SDTypeProfile<1, 2, [
518  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
519]>;
520
521// non-extending masked load fragment.
522def nonext_masked_load :
523  PatFrag<(ops node:$ptr, node:$pred, node:$def),
524          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
525  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
526         cast<MaskedLoadSDNode>(N)->isUnindexed() &&
527         !cast<MaskedLoadSDNode>(N)->isNonTemporal();
528}]>;
529// Any/Zero extending masked load fragments.
530def azext_masked_load :
531  PatFrag<(ops node:$ptr, node:$pred, node:$def),
532          (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
533  return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
534          cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
535         cast<MaskedLoadSDNode>(N)->isUnindexed();
536}]>;
537def azext_masked_load_i8 :
538  PatFrag<(ops node:$ptr, node:$pred, node:$def),
539          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
540  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
541}]>;
542def azext_masked_load_i16 :
543  PatFrag<(ops node:$ptr, node:$pred, node:$def),
544          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
545  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
546}]>;
547def azext_masked_load_i32 :
548  PatFrag<(ops node:$ptr, node:$pred, node:$def),
549          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
550  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
551}]>;
552// Sign extending masked load fragments.
553def sext_masked_load :
554  PatFrag<(ops node:$ptr, node:$pred, node:$def),
555          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
556  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
557         cast<MaskedLoadSDNode>(N)->isUnindexed();
558}]>;
559def sext_masked_load_i8 :
560  PatFrag<(ops node:$ptr, node:$pred, node:$def),
561          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
562  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
563}]>;
564def sext_masked_load_i16 :
565  PatFrag<(ops node:$ptr, node:$pred, node:$def),
566          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
567  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
568}]>;
569def sext_masked_load_i32 :
570  PatFrag<(ops node:$ptr, node:$pred, node:$def),
571          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
572  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
573}]>;
574
575def non_temporal_load :
576   PatFrag<(ops node:$ptr, node:$pred, node:$def),
577           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
578   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
579          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
580          cast<MaskedLoadSDNode>(N)->isNonTemporal();
581}]>;
582
583// non-truncating masked store fragment.
584def nontrunc_masked_store :
585  PatFrag<(ops node:$val, node:$ptr, node:$pred),
586          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
587  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
588         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
589         !cast<MaskedStoreSDNode>(N)->isNonTemporal();
590}]>;
591// truncating masked store fragments.
592def trunc_masked_store :
593  PatFrag<(ops node:$val, node:$ptr, node:$pred),
594          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
595  return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
596         cast<MaskedStoreSDNode>(N)->isUnindexed();
597}]>;
598def trunc_masked_store_i8 :
599  PatFrag<(ops node:$val, node:$ptr, node:$pred),
600          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
601  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
602}]>;
603def trunc_masked_store_i16 :
604  PatFrag<(ops node:$val, node:$ptr, node:$pred),
605          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
606  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
607}]>;
608def trunc_masked_store_i32 :
609  PatFrag<(ops node:$val, node:$ptr, node:$pred),
610          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
611  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
612}]>;
613
614def non_temporal_store :
615  PatFrag<(ops node:$val, node:$ptr, node:$pred),
616          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
617  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
618         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
619         cast<MaskedStoreSDNode>(N)->isNonTemporal();
620}]>;
621
622multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
623  // offsets = (signed)Index << sizeof(elt)
624  def NAME#_signed_scaled :
625    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
626            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
627    auto MGS = cast<MaskedGatherScatterSDNode>(N);
628    bool Signed = MGS->isIndexSigned() ||
629        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
630    return Signed && MGS->isIndexScaled();
631  }]>;
632  // offsets = (signed)Index
633  def NAME#_signed_unscaled :
634    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
635            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
636    auto MGS = cast<MaskedGatherScatterSDNode>(N);
637    bool Signed = MGS->isIndexSigned() ||
638        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
639    return Signed && !MGS->isIndexScaled();
640  }]>;
641  // offsets = (unsigned)Index << sizeof(elt)
642  def NAME#_unsigned_scaled :
643    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
644            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
645    auto MGS = cast<MaskedGatherScatterSDNode>(N);
646    bool Signed = MGS->isIndexSigned() ||
647        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
648    return !Signed && MGS->isIndexScaled();
649  }]>;
650  // offsets = (unsigned)Index
651  def NAME#_unsigned_unscaled :
652    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
653            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
654    auto MGS = cast<MaskedGatherScatterSDNode>(N);
655    bool Signed = MGS->isIndexSigned() ||
656        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
657    return !Signed && !MGS->isIndexScaled();
658  }]>;
659}
660
661defm nonext_masked_gather    : masked_gather_scatter<nonext_masked_gather>;
662defm azext_masked_gather_i8  : masked_gather_scatter<azext_masked_gather_i8>;
663defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
664defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
665defm sext_masked_gather_i8   : masked_gather_scatter<sext_masked_gather_i8>;
666defm sext_masked_gather_i16  : masked_gather_scatter<sext_masked_gather_i16>;
667defm sext_masked_gather_i32  : masked_gather_scatter<sext_masked_gather_i32>;
668
669defm nontrunc_masked_scatter  : masked_gather_scatter<nontrunc_masked_scatter>;
670defm trunc_masked_scatter_i8  : masked_gather_scatter<trunc_masked_scatter_i8>;
671defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
672defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
673
674// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
675def top16Zero: PatLeaf<(i32 GPR32:$src), [{
676  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
677         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
678  }]>;
679
680// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
681def top32Zero: PatLeaf<(i64 GPR64:$src), [{
682  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
683         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
684  }]>;
685
686// topbitsallzero - Return true if all bits except the lowest bit are known zero
687def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
688  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
689         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
690  }]>;
691def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
692  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
693         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
694  }]>;
695
696// Node definitions.
697def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
698def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
699def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
700def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
701def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
702                                SDCallSeqStart<[ SDTCisVT<0, i32>,
703                                                 SDTCisVT<1, i32> ]>,
704                                [SDNPHasChain, SDNPOutGlue]>;
705def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
706                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
707                                               SDTCisVT<1, i32> ]>,
708                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
709def AArch64call          : SDNode<"AArch64ISD::CALL",
710                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
711                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
712                                 SDNPVariadic]>;
713
714def AArch64call_bti      : SDNode<"AArch64ISD::CALL_BTI",
715                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
716                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
717                                 SDNPVariadic]>;
718
719def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
720                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
721                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
722                              SDNPVariadic]>;
723
724def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64",
725                                      SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
726                                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
727                                       SDNPVariadic]>;
728
729def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL",
730                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
731                                                   SDTCisVT<1, i32>,
732                                                   SDTCisVT<2, i64>,
733                                                   SDTCisVT<3, i64>]>,
734                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
735                              SDNPVariadic]>;
736
737def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN",
738                             SDTypeProfile<0, 5, [SDTCisPtrTy<0>,
739                                                  SDTCisVT<2, i32>,
740                                                  SDTCisVT<3, i64>,
741                                                  SDTCisVT<4, i64>]>,
742                             [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
743
744def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER",
745                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
746                                                       SDTCisPtrTy<1>,
747                                                       SDTCisVT<2, i32>,
748                                                       SDTCisVT<3, i64>,
749                                                       SDTCisVT<4, i64>]>,
750                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
751                                  SDNPVariadic]>;
752
753def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
754                                [SDNPHasChain]>;
755def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
756                                [SDNPHasChain]>;
757def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
758                                [SDNPHasChain]>;
759def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
760                                [SDNPHasChain]>;
761def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
762                                [SDNPHasChain]>;
763
764
765def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
766def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
767def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
768def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
769def AArch64retglue       : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
770                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
771def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
772def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
773def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
774                            [SDNPCommutative]>;
775def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
776def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
777                            [SDNPCommutative]>;
778def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
779def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
780
781def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
782def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
783def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
784
785def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
786
787def AArch64fcmp         : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
788def AArch64strict_fcmp  : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
789                                 [SDNPHasChain]>;
790def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
791                                 [SDNPHasChain]>;
792def AArch64any_fcmp     : PatFrags<(ops node:$lhs, node:$rhs),
793                                   [(AArch64strict_fcmp node:$lhs, node:$rhs),
794                                    (AArch64fcmp node:$lhs, node:$rhs)]>;
795
796def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
797def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
798def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
799def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
800def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
801def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
802
803def AArch64insr      : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
804
805def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
806def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
807def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
808def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
809def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
810def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
811
812def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
813def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
814def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
815def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
816def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
817def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
818def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
819
820def AArch64rev16_scalar : SDNode<"AArch64ISD::REV16", SDTIntUnaryOp>;
821
822def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
823def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
824def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
825def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
826
827def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
828
829def AArch64vashr_exact : PatFrag<(ops          node:$lhs, node:$rhs),
830                                 (AArch64vashr node:$lhs, node:$rhs), [{
831  return N->getFlags().hasExact();
832}]>;
833
834def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
835def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
836def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
837def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
838def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
839def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
840def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
841def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
842def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
843
844def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
845
846def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
847def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
848def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
849def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
850def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
851
852def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
853def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
854def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
855
856def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
857def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
858def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
859def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
860def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
861def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
862                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
863
864def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
865def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
866def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
867def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
868def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
869
870def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
871def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
872                               [(f32 (int_aarch64_sisd_fcvtxn (f64 node:$Rn))),
873                                (f32 (AArch64fcvtxn_n (f64 node:$Rn)))]>;
874def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
875                                 [(int_aarch64_neon_fcvtxn node:$Rn),
876                                  (AArch64fcvtxn_n node:$Rn)]>;
877
878//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
879
880def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
881def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
882
883def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
884                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
885
886def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
887                               [SDNPHasChain, SDNPSideEffect]>;
888
889def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
890def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
891
892def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
893                                    SDT_AArch64TLSDescCallSeq,
894                                    [SDNPOutGlue, SDNPHasChain, SDNPVariadic]>;
895
896def AArch64tlsdesc_auth_callseq : SDNode<"AArch64ISD::TLSDESC_AUTH_CALLSEQ",
897                                    SDT_AArch64TLSDescCallSeq,
898                                    [SDNPOutGlue, SDNPHasChain, SDNPVariadic]>;
899
900def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
901                                 SDT_AArch64WrapperLarge>;
902
903def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
904
905def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
906                                    SDTCisSameAs<1, 2>]>;
907def AArch64pmull    : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull,
908                             [SDNPCommutative]>;
909def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
910                             [SDNPCommutative]>;
911def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
912                             [SDNPCommutative]>;
913
914def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
915def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
916def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
917def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
918
919def AArch64sdot     : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
920def AArch64udot     : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
921def AArch64usdot    : SDNode<"AArch64ISD::USDOT", SDT_AArch64Dot>;
922
923def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
924def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
925def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
926def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
927def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
928def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
929def AArch64uaddlv   : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
930def AArch64saddlv   : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>;
931
932def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
933                               [(abdu node:$lhs, node:$rhs),
934                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
935def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
936                               [(abds node:$lhs, node:$rhs),
937                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
938
939def AArch64addp_n   : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
940def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
941def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
942def AArch64addp     : PatFrags<(ops node:$Rn, node:$Rm),
943                               [(AArch64addp_n node:$Rn, node:$Rm),
944                                (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
945def AArch64uaddlp   : PatFrags<(ops node:$src),
946                               [(AArch64uaddlp_n node:$src),
947                                (int_aarch64_neon_uaddlp node:$src)]>;
948def AArch64saddlp   : PatFrags<(ops node:$src),
949                               [(AArch64saddlp_n node:$src),
950                                (int_aarch64_neon_saddlp node:$src)]>;
951def AArch64faddp     : PatFrags<(ops node:$Rn, node:$Rm),
952                                [(AArch64addp_n node:$Rn, node:$Rm),
953                                 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
954def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
955def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS),
956                            [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)),
957                             (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>;
958def AArch64facge     : PatFrags<(ops node:$Rn, node:$Rm),
959                                [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
960                                 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
961def AArch64facgt     : PatFrags<(ops node:$Rn, node:$Rm),
962                                [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
963                                 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
964
965def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
966                              [(vecreduce_fmax node:$Rn),
967                               (int_aarch64_neon_fmaxnmv node:$Rn)]>;
968def AArch64fminnmv : PatFrags<(ops node:$Rn),
969                              [(vecreduce_fmin node:$Rn),
970                               (int_aarch64_neon_fminnmv node:$Rn)]>;
971def AArch64fmaxv : PatFrags<(ops node:$Rn),
972                            [(vecreduce_fmaximum node:$Rn),
973                             (int_aarch64_neon_fmaxv node:$Rn)]>;
974def AArch64fminv : PatFrags<(ops node:$Rn),
975                            [(vecreduce_fminimum node:$Rn),
976                             (int_aarch64_neon_fminv node:$Rn)]>;
977
978def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
979def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
980def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
981def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
982def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
983
984def SDT_AArch64unpk : SDTypeProfile<1, 1, [
985    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
986]>;
987def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
988def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
989def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
990def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
991
992def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
993def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
994def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
995def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
996def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
997def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
998
999def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
1000
1001def AArch64probedalloca
1002    : SDNode<"AArch64ISD::PROBED_ALLOCA",
1003             SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
1004             [SDNPHasChain, SDNPMayStore]>;
1005
1006def AArch64mrs : SDNode<"AArch64ISD::MRS",
1007                        SDTypeProfile<2, 1, [SDTCisVT<0, i64>,
1008                                             SDTCisVT<1, i32>,
1009                                             SDTCisVT<2, i32>]>,
1010                        [SDNPHasChain]>;
1011
1012def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
1013def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
1014def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
1015                            [(AArch64rshrnb node:$rs, node:$i),
1016                            (int_aarch64_sve_rshrnb node:$rs, node:$i)]>;
1017
1018def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
1019                             [SDTCisInt<0>, SDTCisVec<1>]>, []>;
1020
1021// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
1022// have no common bits.
1023def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
1024                         [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
1025   if (N->getOpcode() == ISD::ADD)
1026     return true;
1027   return CurDAG->isADDLike(SDValue(N,0));
1028}]> {
1029  let GISelPredicateCode = [{
1030     // Only handle G_ADD for now. FIXME. build capability to compute whether
1031     // operands of G_OR have common bits set or not.
1032     return MI.getOpcode() == TargetOpcode::G_ADD;
1033  }];
1034}
1035
1036// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
1037def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
1038  return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
1039         CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
1040}]>;
1041
1042// Match "nnan" flagged calls to fminnum and fmmaxnum. Then semantically equivalent
1043// to fmaximum/fminimum.
1044def fmaxnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
1045                           (fmaxnum node:$Rn, node:$Rm), [{
1046  return N->getFlags().hasNoNaNs();
1047  }]>;
1048def fminnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
1049                           (fminnum node:$Rn, node:$Rm), [{
1050  return N->getFlags().hasNoNaNs();
1051  }]>;
1052
1053//===----------------------------------------------------------------------===//
1054
1055//===----------------------------------------------------------------------===//
1056
1057// AArch64 Instruction Predicate Definitions.
1058// We could compute these on a per-module basis but doing so requires accessing
1059// the Function object through the <Target>Subtarget and objections were raised
1060// to that (see post-commit review comments for r301750).
1061let RecomputePerFunction = 1 in {
1062  def ForCodeSize   : Predicate<"shouldOptForSize(MF)">;
1063  def NotForCodeSize   : Predicate<"!shouldOptForSize(MF)">;
1064  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
1065  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
1066
1067  // Register restrictions for indirect tail-calls:
1068  // - If branch target enforcement is enabled, indirect calls must use x16 or
1069  //   x17, because these are the only registers which can target the BTI C
1070  //   instruction.
1071  // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address
1072  //   of the signing instruction. This can't be changed because it is used by a
1073  //   HINT instruction which only accepts x16. We can't load anything from the
1074  //   stack after this because the authentication instruction checks that SP is
1075  //   the same as it was at function entry, so we can't have anything on the
1076  //   stack.
1077
1078  // BTI on, PAuthLR off: x16 or x17
1079  def TailCallX16X17 : Predicate<[{  MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1080  // BTI on, PAuthLR on: x17 only
1081  def TailCallX17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1082  // BTI off, PAuthLR on: Any non-callee-saved register except x16
1083  def TailCallNotX16 : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1084  // BTI off, PAuthLR off: Any non-callee-saved register
1085  def TailCallAny : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1086
1087  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1088  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1089  // Toggles patterns which aren't beneficial in GlobalISel when we aren't
1090  // optimizing. This allows us to selectively use patterns without impacting
1091  // SelectionDAG's behaviour.
1092  // FIXME: One day there will probably be a nicer way to check for this, but
1093  // today is not that day.
1094  def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
1095}
1096
1097include "AArch64InstrFormats.td"
1098include "SVEInstrFormats.td"
1099include "SMEInstrFormats.td"
1100
1101//===----------------------------------------------------------------------===//
1102
1103//===----------------------------------------------------------------------===//
1104// Miscellaneous instructions.
1105//===----------------------------------------------------------------------===//
1106
1107let hasSideEffects = 1, isCodeGenOnly = 1 in {
1108let Defs = [SP], Uses = [SP] in {
1109// We set Sched to empty list because we expect these instructions to simply get
1110// removed in most cases.
1111def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1112                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
1113                              Sched<[]>;
1114def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1115                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
1116                            Sched<[]>;
1117
1118}
1119
1120let Defs = [SP, NZCV], Uses = [SP] in {
1121// Probed stack allocation of a constant size, used in function prologues when
1122// stack-clash protection is enabled.
1123def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch),
1124                               (ins i64imm:$stacksize, i64imm:$fixed_offset,
1125                                i64imm:$scalable_offset),
1126                               []>,
1127                               Sched<[]>;
1128
1129// Probed stack allocation of a variable size, used in function prologues when
1130// stack-clash protection is enabled.
1131def PROBED_STACKALLOC_VAR : Pseudo<(outs),
1132                                   (ins GPR64sp:$target),
1133                                   []>,
1134                                   Sched<[]>;
1135
1136// Probed stack allocations of a variable size, used for allocas of unknown size
1137// when stack-clash protection is enabled.
1138let usesCustomInserter = 1 in
1139def PROBED_STACKALLOC_DYN : Pseudo<(outs),
1140                                   (ins GPR64common:$target),
1141                                   [(AArch64probedalloca GPR64common:$target)]>,
1142                                   Sched<[]>;
1143
1144} // Defs = [SP, NZCV], Uses = [SP] in
1145} // hasSideEffects = 1, isCodeGenOnly = 1
1146
1147let isReMaterializable = 1, isCodeGenOnly = 1 in {
1148// FIXME: The following pseudo instructions are only needed because remat
1149// cannot handle multiple instructions.  When that changes, they can be
1150// removed, along with the AArch64Wrapper node.
1151
1152let AddedComplexity = 10 in
1153def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
1154                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
1155              Sched<[WriteLDAdr]>;
1156
1157// The MOVaddr instruction should match only when the add is not folded
1158// into a load or store address.
1159def MOVaddr
1160    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1161             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
1162                                            tglobaladdr:$low))]>,
1163      Sched<[WriteAdrAdr]>;
1164def MOVaddrJT
1165    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1166             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
1167                                             tjumptable:$low))]>,
1168      Sched<[WriteAdrAdr]>;
1169def MOVaddrCP
1170    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1171             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
1172                                             tconstpool:$low))]>,
1173      Sched<[WriteAdrAdr]>;
1174def MOVaddrBA
1175    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1176             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
1177                                             tblockaddress:$low))]>,
1178      Sched<[WriteAdrAdr]>;
1179def MOVaddrTLS
1180    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1181             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
1182                                            tglobaltlsaddr:$low))]>,
1183      Sched<[WriteAdrAdr]>;
1184def MOVaddrEXT
1185    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1186             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
1187                                            texternalsym:$low))]>,
1188      Sched<[WriteAdrAdr]>;
1189// Normally AArch64addlow either gets folded into a following ldr/str,
1190// or together with an adrp into MOVaddr above. For cases with TLS, it
1191// might appear without either of them, so allow lowering it into a plain
1192// add.
1193def ADDlowTLS
1194    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
1195             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
1196                                            tglobaltlsaddr:$low))]>,
1197      Sched<[WriteAdr]>;
1198
1199} // isReMaterializable, isCodeGenOnly
1200
1201def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
1202          (LOADgot tglobaltlsaddr:$addr)>;
1203
1204def : Pat<(AArch64LOADgot texternalsym:$addr),
1205          (LOADgot texternalsym:$addr)>;
1206
1207def : Pat<(AArch64LOADgot tconstpool:$addr),
1208          (LOADgot tconstpool:$addr)>;
1209
1210// In general these get lowered into a sequence of three 4-byte instructions.
1211// 32-bit jump table destination is actually only 2 instructions since we can
1212// use the table itself as a PC-relative base. But optimization occurs after
1213// branch relaxation so be pessimistic.
1214let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
1215    isNotDuplicable = 1 in {
1216def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1217                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1218                      Sched<[]>;
1219def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1220                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1221                      Sched<[]>;
1222def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1223                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1224                     Sched<[]>;
1225}
1226
1227// A hardened but more expensive version of jump-table dispatch.
1228// This combines the target address computation (otherwise done using the
1229// JumpTableDest pseudos above) with the branch itself (otherwise done using
1230// a plain BR) in a single non-attackable sequence.
1231//
1232// We take the final entry index as an operand to allow isel freedom. This does
1233// mean that the index can be attacker-controlled.  To address that, we also do
1234// limited checking of the offset, mainly ensuring it still points within the
1235// jump-table array.  When it doesn't, this branches to the first entry.
1236// We might want to trap instead.
1237//
1238// This is intended for use in conjunction with ptrauth for other code pointers,
1239// to avoid signing jump-table entries and turning them into pointers.
1240//
1241// Entry index is passed in x16.  Clobbers x16/x17/nzcv.
1242let isNotDuplicable = 1 in
1243def BR_JumpTable : Pseudo<(outs), (ins i32imm:$jti), []>, Sched<[]> {
1244  let isBranch = 1;
1245  let isTerminator = 1;
1246  let isIndirectBranch = 1;
1247  let isBarrier = 1;
1248  let isNotDuplicable = 1;
1249  let Defs = [X16,X17,NZCV];
1250  let Uses = [X16];
1251  let Size = 44; // 28 fixed + 16 variable, for table size materialization
1252}
1253
1254// Space-consuming pseudo to aid testing of placement and reachability
1255// algorithms. Immediate operand is the number of bytes this "instruction"
1256// occupies; register operands can be used to enforce dependency and constrain
1257// the scheduler.
1258let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
1259def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
1260                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
1261            Sched<[]>;
1262
1263let hasSideEffects = 1, isCodeGenOnly = 1 in {
1264  def SpeculationSafeValueX
1265      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
1266  def SpeculationSafeValueW
1267      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
1268}
1269
1270// SpeculationBarrierEndBB must only be used after an unconditional control
1271// flow, i.e. after a terminator for which isBarrier is True.
1272let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
1273  // This gets lowered to a pair of 4-byte instructions.
1274  let Size = 8 in
1275  def SpeculationBarrierISBDSBEndBB
1276      : Pseudo<(outs), (ins), []>, Sched<[]>;
1277  // This gets lowered to a 4-byte instruction.
1278  let Size = 4 in
1279  def SpeculationBarrierSBEndBB
1280      : Pseudo<(outs), (ins), []>, Sched<[]>;
1281}
1282
1283//===----------------------------------------------------------------------===//
1284// System instructions.
1285//===----------------------------------------------------------------------===//
1286
1287def HINT : HintI<"hint">;
1288def : InstAlias<"nop",  (HINT 0b000)>;
1289def : InstAlias<"yield",(HINT 0b001)>;
1290def : InstAlias<"wfe",  (HINT 0b010)>;
1291def : InstAlias<"wfi",  (HINT 0b011)>;
1292def : InstAlias<"sev",  (HINT 0b100)>;
1293def : InstAlias<"sevl", (HINT 0b101)>;
1294def : InstAlias<"dgh",  (HINT 0b110)>;
1295def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
1296def : InstAlias<"csdb", (HINT 20)>;
1297
1298let Predicates = [HasPCDPHINT] in {
1299    def STSHH: STSHHI;
1300}
1301
1302// In order to be able to write readable assembly, LLVM should accept assembly
1303// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
1304// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1305// should not emit these mnemonics unless BTI is enabled.
1306def : InstAlias<"bti",  (HINT 32), 0>;
1307def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
1308def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
1309def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
1310
1311// v8.2a Statistical Profiling extension
1312def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
1313
1314// As far as LLVM is concerned this writes to the system's exclusive monitors.
1315let mayLoad = 1, mayStore = 1 in
1316def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
1317
1318// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
1319// model patterns with sufficiently fine granularity.
1320let mayLoad = ?, mayStore = ? in {
1321def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
1322                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
1323
1324def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
1325                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
1326
1327def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
1328                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
1329
1330def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
1331  let CRm        = 0b0010;
1332  let Inst{12}   = 0;
1333  let Predicates = [HasTRACEV8_4];
1334}
1335
1336def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
1337  let CRm{1-0}   = 0b11;
1338  let Inst{9-8}  = 0b10;
1339  let Predicates = [HasXS];
1340}
1341
1342let Predicates = [HasWFxT] in {
1343def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
1344def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
1345}
1346
1347// Branch Record Buffer two-word mnemonic instructions
1348class BRBEI<bits<3> op2, string keyword>
1349    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
1350  let Inst{31-8} = 0b110101010000100101110010;
1351  let Inst{7-5} = op2;
1352  let Predicates = [HasBRBE];
1353}
1354def BRB_IALL: BRBEI<0b100, "\tiall">;
1355def BRB_INJ:  BRBEI<0b101, "\tinj">;
1356
1357}
1358
1359// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
1360def : TokenAlias<"INJ", "inj">;
1361def : TokenAlias<"IALL", "iall">;
1362
1363
1364// ARMv9.4-A Guarded Control Stack
1365class GCSNoOp<bits<3> op2, string mnemonic>
1366    : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
1367  let Inst{20-8} = 0b0100001110111;
1368  let Inst{7-5} = op2;
1369  let Predicates = [HasGCS];
1370}
1371def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
1372def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
1373def GCSPOPX  : GCSNoOp<0b110, "gcspopx">;
1374
1375class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
1376            list<dag> pattern = []>
1377    : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
1378  let Inst{20-19} = 0b01;
1379  let Inst{18-16} = op1;
1380  let Inst{15-8} = 0b01110111;
1381  let Inst{7-5} = op2;
1382  let Predicates = [HasGCS];
1383  let hasSideEffects = 1;
1384}
1385
1386let mayStore = 1, mayLoad = 1 in
1387def GCSSS1   : GCSRtIn<0b011, 0b010, "gcsss1">;
1388let mayStore = 1 in
1389def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
1390
1391class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
1392            list<dag> pattern = []>
1393    : RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> {
1394  let Inst{20-19} = 0b01;
1395  let Inst{18-16} = op1;
1396  let Inst{15-8} = 0b01110111;
1397  let Inst{7-5} = op2;
1398  let Predicates = [HasGCS];
1399  let hasSideEffects = 1;
1400  // The input register is unchanged when GCS is disabled, so we need it as
1401  // both an input and output operand.
1402  let Constraints = "$src = $Rt";
1403}
1404
1405let mayStore = 1, mayLoad = 1 in
1406def GCSSS2  : GCSRtOut<0b011, 0b011, "gcsss2">;
1407// FIXME: mayStore = 1 only needed to match the intrinsic definition
1408let mayStore = 1, mayLoad = 1 in
1409def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm",
1410                       [(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>;
1411def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1412
1413def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
1414def GCSB_DSYNC         : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1415
1416def : TokenAlias<"DSYNC", "dsync">;
1417
1418let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1419  def CHKFEAT   : SystemNoOperands<0b000, "hint\t#40",
1420                                   [(set X16, (int_aarch64_chkfeat X16))]>;
1421}
1422def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
1423def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1424
1425class GCSSt<string mnemonic, bits<3> op>
1426    : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, [$Rn]", "", []>, Sched<[]> {
1427  bits<5> Rt;
1428  bits<5> Rn;
1429  let Inst{31-15} = 0b11011001000111110;
1430  let Inst{14-12} = op;
1431  let Inst{11-10} = 0b11;
1432  let Inst{9-5} = Rn;
1433  let Inst{4-0} = Rt;
1434  let Predicates = [HasGCS];
1435}
1436def GCSSTR  : GCSSt<"gcsstr",  0b000>;
1437def GCSSTTR : GCSSt<"gcssttr", 0b001>;
1438
1439// ARMv8.2-A Dot Product
1440let Predicates = [HasDotProd] in {
1441defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
1442defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1443defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
1444defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1445}
1446
1447// ARMv8.6-A BFloat
1448let Predicates = [HasNEON, HasBF16] in {
1449defm BFDOT       : SIMDThreeSameVectorBFDot<1, "bfdot">;
1450defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
1451def BFMMLA       : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
1452def BFMLALB      : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1453def BFMLALT      : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1454def BFMLALBIdx   : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1455def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1456def BFCVTN       : SIMD_BFCVTN;
1457def BFCVTN2      : SIMD_BFCVTN2;
1458
1459def : Pat<(concat_vectors (v4bf16 V64:$Rd), (any_fpround (v4f32 V128:$Rn))),
1460          (BFCVTN2 (v8bf16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub)), V128:$Rn)>;
1461
1462// Vector-scalar BFDOT:
1463// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1464// register (the instruction uses a single 32-bit lane from it), so the pattern
1465// is a bit tricky.
1466def : Pat<(v2f32 (int_aarch64_neon_bfdot
1467                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1468                    (v4bf16 (bitconvert
1469                      (v2i32 (AArch64duplane32
1470                        (v4i32 (bitconvert
1471                          (v8bf16 (insert_subvector undef,
1472                            (v4bf16 V64:$Rm),
1473                            (i64 0))))),
1474                        VectorIndexS:$idx)))))),
1475          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1476                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
1477                             VectorIndexS:$idx)>;
1478}
1479
1480let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in {
1481def BFCVT : BF16ToSinglePrecision<"bfcvt">;
1482}
1483
1484// ARMv8.6A AArch64 matrix multiplication
1485let Predicates = [HasMatMulInt8] in {
1486def  SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
1487def  UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1488def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1489defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", AArch64usdot>;
1490defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", AArch64usdot>;
1491
1492// sudot lane has a pattern where usdot is expected (there is no sudot).
1493// The second operand is used in the dup operation to repeat the indexed
1494// element.
1495class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
1496                         string rhs_kind, RegisterOperand RegType,
1497                         ValueType AccumType, ValueType InputType>
1498      : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind,
1499                                        lhs_kind, rhs_kind, RegType, AccumType,
1500                                        InputType, VectorIndexS, null_frag> {
1501  let Pattern = [(set (AccumType RegType:$dst),
1502                      (AccumType (AArch64usdot (AccumType RegType:$Rd),
1503                                 (InputType (bitconvert (AccumType
1504                                    (AArch64duplane32 (v4i32 V128:$Rm),
1505                                        VectorIndexS:$idx)))),
1506                                 (InputType RegType:$Rn))))];
1507}
1508
1509multiclass SIMDSUDOTIndex {
1510  def v8i8  : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
1511  def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1512}
1513
1514defm SUDOTlane : SIMDSUDOTIndex;
1515
1516}
1517
1518// ARMv8.2-A FP16 Fused Multiply-Add Long
1519let Predicates = [HasNEON, HasFP16FML] in {
1520defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1521defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1522defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1523defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1524defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
1525defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
1526defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1527defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1528}
1529
1530// Armv8.2-A Crypto extensions
1531let Predicates = [HasSHA3] in {
1532def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
1533def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
1534def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
1535def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
1536def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
1537def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
1538def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
1539def XAR       : CryptoRRRi6<"xar">;
1540
1541class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
1542  : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
1543        (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
1544
1545def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1546          (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1547
1548def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
1549def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
1550def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
1551
1552def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
1553def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
1554def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
1555def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
1556
1557class EOR3_pattern<ValueType VecTy>
1558  : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1559        (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1560
1561def : EOR3_pattern<v16i8>;
1562def : EOR3_pattern<v8i16>;
1563def : EOR3_pattern<v4i32>;
1564def : EOR3_pattern<v2i64>;
1565
1566class BCAX_pattern<ValueType VecTy>
1567  : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
1568        (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1569
1570def : BCAX_pattern<v16i8>;
1571def : BCAX_pattern<v8i16>;
1572def : BCAX_pattern<v4i32>;
1573def : BCAX_pattern<v2i64>;
1574
1575def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
1576def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
1577def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
1578def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
1579
1580def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1581def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1582def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1583def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1584
1585def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1586def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1587def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1588def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1589
1590def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1591          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1592
1593def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1594          (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1595
1596def : Pat<(xor  (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1597          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1598
1599} // HasSHA3
1600
1601let Predicates = [HasSM4] in {
1602def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1603def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1604def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1605def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1606def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
1607def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1608def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1609def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1610def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1611
1612def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1613          (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1614
1615class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1616  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1617        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1618
1619class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1620  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1621        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1622
1623class SM4_pattern<Instruction INST, Intrinsic OpNode>
1624  : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1625        (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1626
1627def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1628def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1629
1630def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1631def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1632def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1633def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1634
1635def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1636def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1637} // HasSM4
1638
1639let Predicates = [HasRCPC] in {
1640  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1641  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
1642  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
1643  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
1644  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
1645}
1646
1647// v8.3a complex add and multiply-accumulate. No predicate here, that is done
1648// inside the multiclass as the FP16 versions need different predicates.
1649defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1650                                               "fcmla", null_frag>;
1651defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1652                                           "fcadd", null_frag>;
1653defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1654
1655let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1656  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1657            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1658  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1659            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1660  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1661            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1662  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1663            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1664}
1665
1666let Predicates = [HasComplxNum, HasNEON] in {
1667  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1668            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1669  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1670            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1671  foreach Ty = [v4f32, v2f64] in {
1672    def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1673              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1674    def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1675              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1676  }
1677}
1678
1679multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1680  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1681            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1682  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1683            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1684  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1685            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1686  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1687            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1688}
1689
1690multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1691  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1692            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1693  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1694            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1695  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1696            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1697  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1698            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1699}
1700
1701
1702let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1703  defm : FCMLA_PATS<v4f16, V64>;
1704  defm : FCMLA_PATS<v8f16, V128>;
1705
1706  defm : FCMLA_LANE_PATS<v4f16, V64,
1707                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1708  defm : FCMLA_LANE_PATS<v8f16, V128,
1709                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1710}
1711let Predicates = [HasComplxNum, HasNEON] in {
1712  defm : FCMLA_PATS<v2f32, V64>;
1713  defm : FCMLA_PATS<v4f32, V128>;
1714  defm : FCMLA_PATS<v2f64, V128>;
1715
1716  defm : FCMLA_LANE_PATS<v4f32, V128,
1717                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1718}
1719
1720// v8.3a Pointer Authentication
1721// These instructions inhabit part of the hint space and so can be used for
1722// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1723// important for compatibility with other assemblers (e.g. GAS) when building
1724// software compatible with both CPUs that do or don't implement PA.
1725let Uses = [LR], Defs = [LR] in {
1726  def PACIAZ   : SystemNoOperands<0b000, "hint\t#24">;
1727  def PACIBZ   : SystemNoOperands<0b010, "hint\t#26">;
1728  let isAuthenticated = 1 in {
1729    def AUTIAZ   : SystemNoOperands<0b100, "hint\t#28">;
1730    def AUTIBZ   : SystemNoOperands<0b110, "hint\t#30">;
1731  }
1732}
1733let Uses = [LR, SP], Defs = [LR] in {
1734  def PACIASP  : SystemNoOperands<0b001, "hint\t#25">;
1735  def PACIBSP  : SystemNoOperands<0b011, "hint\t#27">;
1736  let isAuthenticated = 1 in {
1737    def AUTIASP  : SystemNoOperands<0b101, "hint\t#29">;
1738    def AUTIBSP  : SystemNoOperands<0b111, "hint\t#31">;
1739  }
1740}
1741let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1742  def PACIA1716  : SystemNoOperands<0b000, "hint\t#8">;
1743  def PACIB1716  : SystemNoOperands<0b010, "hint\t#10">;
1744  let isAuthenticated = 1 in {
1745    def AUTIA1716  : SystemNoOperands<0b100, "hint\t#12">;
1746    def AUTIB1716  : SystemNoOperands<0b110, "hint\t#14">;
1747  }
1748}
1749
1750let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1751  def XPACLRI   : SystemNoOperands<0b111, "hint\t#7">;
1752}
1753
1754// In order to be able to write readable assembly, LLVM should accept assembly
1755// inputs that use pointer authentication mnemonics, even with PA disabled.
1756// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1757// should not emit these mnemonics unless PA is enabled.
1758def : InstAlias<"paciaz", (PACIAZ), 0>;
1759def : InstAlias<"pacibz", (PACIBZ), 0>;
1760def : InstAlias<"autiaz", (AUTIAZ), 0>;
1761def : InstAlias<"autibz", (AUTIBZ), 0>;
1762def : InstAlias<"paciasp", (PACIASP), 0>;
1763def : InstAlias<"pacibsp", (PACIBSP), 0>;
1764def : InstAlias<"autiasp", (AUTIASP), 0>;
1765def : InstAlias<"autibsp", (AUTIBSP), 0>;
1766def : InstAlias<"pacia1716", (PACIA1716), 0>;
1767def : InstAlias<"pacib1716", (PACIB1716), 0>;
1768def : InstAlias<"autia1716", (AUTIA1716), 0>;
1769def : InstAlias<"autib1716", (AUTIB1716), 0>;
1770def : InstAlias<"xpaclri", (XPACLRI), 0>;
1771
1772// Pseudos
1773
1774let Uses = [LR, SP], Defs = [LR] in {
1775// Insertion point of LR signing code.
1776def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]> {
1777  // When using PAuthLR, the address of one of the instructions this expands
1778  // into is used as an input to the signature calculation, so this must not be
1779  // duplicated.
1780  let isNotDuplicable = 1;
1781}
1782// Insertion point of LR authentication code.
1783// The RET terminator of the containing machine basic block may be replaced
1784// with a combined RETA(A|B) instruction when rewriting this Pseudo.
1785def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1786}
1787
1788def PAUTH_BLEND : Pseudo<(outs GPR64:$disc),
1789                         (ins GPR64:$addr_disc, i32imm:$int_disc), []>, Sched<[]>;
1790
1791// These pointer authentication instructions require armv8.3a
1792let Predicates = [HasPAuth] in {
1793
1794  // When PA is enabled, a better mnemonic should be emitted.
1795  def : InstAlias<"paciaz", (PACIAZ), 1>;
1796  def : InstAlias<"pacibz", (PACIBZ), 1>;
1797  def : InstAlias<"autiaz", (AUTIAZ), 1>;
1798  def : InstAlias<"autibz", (AUTIBZ), 1>;
1799  def : InstAlias<"paciasp", (PACIASP), 1>;
1800  def : InstAlias<"pacibsp", (PACIBSP), 1>;
1801  def : InstAlias<"autiasp", (AUTIASP), 1>;
1802  def : InstAlias<"autibsp", (AUTIBSP), 1>;
1803  def : InstAlias<"pacia1716", (PACIA1716), 1>;
1804  def : InstAlias<"pacib1716", (PACIB1716), 1>;
1805  def : InstAlias<"autia1716", (AUTIA1716), 1>;
1806  def : InstAlias<"autib1716", (AUTIB1716), 1>;
1807  def : InstAlias<"xpaclri", (XPACLRI), 1>;
1808
1809  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
1810                      SDPatternOperator op> {
1811    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm,  "ia"), op>;
1812    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm,  "ib"), op>;
1813    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm,  "da"), op>;
1814    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm,  "db"), op>;
1815    def IZA  : SignAuthZero<prefix_z,  0b00, !strconcat(asm, "iza"), op>;
1816    def DZA  : SignAuthZero<prefix_z,  0b10, !strconcat(asm, "dza"), op>;
1817    def IZB  : SignAuthZero<prefix_z,  0b01, !strconcat(asm, "izb"), op>;
1818    def DZB  : SignAuthZero<prefix_z,  0b11, !strconcat(asm, "dzb"), op>;
1819  }
1820
1821  defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
1822  defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
1823
1824  def XPACI : ClearAuth<0, "xpaci">;
1825  def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>;
1826  def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1827
1828  def XPACD : ClearAuth<1, "xpacd">;
1829  def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>;
1830  def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>;
1831
1832  def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
1833
1834  // Combined Instructions
1835  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1836    def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
1837    def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
1838  }
1839  let isCall = 1, Defs = [LR], Uses = [SP] in {
1840    def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
1841    def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
1842  }
1843
1844  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1845    def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
1846    def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
1847  }
1848  let isCall = 1, Defs = [LR], Uses = [SP] in {
1849    def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
1850    def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
1851  }
1852
1853  // BLRA pseudo, a generalized version of BLRAA/BLRAB/Z.
1854  // This directly manipulates x16/x17 to materialize the discriminator.
1855  // x16/x17 are generally used as the safe registers for sensitive ptrauth
1856  // operations (such as raw address manipulation or discriminator
1857  // materialization here), in part because they're handled in a safer way by
1858  // the kernel, notably on Darwin.
1859  def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1860                                 GPR64:$AddrDisc),
1861                    [(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
1862                                      GPR64:$AddrDisc)]>, Sched<[]> {
1863    let isCodeGenOnly = 1;
1864    let hasSideEffects = 1;
1865    let mayStore = 0;
1866    let mayLoad = 0;
1867    let isCall = 1;
1868    let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1869    let Defs = [X16,X17,LR];
1870    let Uses = [SP];
1871  }
1872
1873  def BLRA_RVMARKER : Pseudo<
1874        (outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1875                     GPR64:$AddrDisc),
1876        [(AArch64authcall_rvmarker tglobaladdr:$rvfunc,
1877                                   GPR64noip:$Rn, timm:$Key, timm:$Disc,
1878                                   GPR64:$AddrDisc)]>, Sched<[]> {
1879    let isCodeGenOnly = 1;
1880    let isCall = 1;
1881    let Defs = [X16,X17,LR];
1882    let Uses = [SP];
1883  }
1884
1885  // BRA pseudo, generalized version of BRAA/BRAB/Z.
1886  // This directly manipulates x16/x17, which are the only registers the OS
1887  // guarantees are safe to use for sensitive operations.
1888  def BRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1889                                GPR64noip:$AddrDisc), []>, Sched<[]> {
1890    let isCodeGenOnly = 1;
1891    let hasNoSchedulingInfo = 1;
1892    let hasSideEffects = 1;
1893    let mayStore = 0;
1894    let mayLoad = 0;
1895    let isBranch = 1;
1896    let isTerminator = 1;
1897    let isBarrier = 1;
1898    let isIndirectBranch = 1;
1899    let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1900    let Defs = [X17];
1901  }
1902
1903  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1904    def RETAA   : AuthReturn<0b010, 0, "retaa">;
1905    def RETAB   : AuthReturn<0b010, 1, "retab">;
1906    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
1907    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
1908  }
1909
1910  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
1911  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
1912
1913  // AUT pseudo.
1914  // This directly manipulates x16/x17, which are the only registers the OS
1915  // guarantees are safe to use for sensitive operations.
1916  def AUT : Pseudo<(outs), (ins i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc),
1917                   []>, Sched<[WriteI, ReadI]> {
1918    let isCodeGenOnly = 1;
1919    let hasSideEffects = 1;
1920    let mayStore = 0;
1921    let mayLoad = 0;
1922    let Size = 32;
1923    let Defs = [X16,X17,NZCV];
1924    let Uses = [X16];
1925  }
1926
1927  // AUT and re-PAC a value, using different keys/data.
1928  // This directly manipulates x16/x17, which are the only registers the OS
1929  // guarantees are safe to use for sensitive operations.
1930  def AUTPAC
1931      : Pseudo<(outs),
1932               (ins i32imm:$AUTKey, i64imm:$AUTDisc, GPR64noip:$AUTAddrDisc,
1933                    i32imm:$PACKey, i64imm:$PACDisc, GPR64noip:$PACAddrDisc),
1934               []>, Sched<[WriteI, ReadI]> {
1935    let isCodeGenOnly = 1;
1936    let hasSideEffects = 1;
1937    let mayStore = 0;
1938    let mayLoad = 0;
1939    let Size = 48;
1940    let Defs = [X16,X17,NZCV];
1941    let Uses = [X16];
1942  }
1943
1944  // Materialize a signed global address, with adrp+add and PAC.
1945  def MOVaddrPAC : Pseudo<(outs),
1946                          (ins i64imm:$Addr, i32imm:$Key,
1947                               GPR64noip:$AddrDisc, i64imm:$Disc), []>,
1948               Sched<[WriteI, ReadI]> {
1949    let isReMaterializable = 1;
1950    let isCodeGenOnly = 1;
1951    let Size = 40; // 12 fixed + 28 variable, for pointer offset, and discriminator
1952    let Defs = [X16,X17];
1953  }
1954
1955  // Materialize a signed global address, using a GOT load and PAC.
1956  def LOADgotPAC : Pseudo<(outs),
1957                          (ins i64imm:$Addr, i32imm:$Key,
1958                               GPR64noip:$AddrDisc, i64imm:$Disc), []>,
1959               Sched<[WriteI, ReadI]> {
1960    let isReMaterializable = 1;
1961    let isCodeGenOnly = 1;
1962    let Size = 68; // 12 fixed + 56 variable, for pointer offset, discriminator and
1963                   // ELF signed GOT signed pointer authentication (if no FPAC)
1964    let Defs = [X16,X17,NZCV];
1965  }
1966
1967  def LOADgotAUTH : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), []>,
1968               Sched<[WriteI, ReadI]> {
1969    let Defs = [X16,X17,NZCV];
1970    let Size = 44;
1971  }
1972
1973  // Load a signed global address from a special $auth_ptr$ stub slot.
1974  def LOADauthptrstatic : Pseudo<(outs GPR64:$dst),
1975                              (ins i64imm:$Addr, i32imm:$Key,
1976                                   i64imm:$Disc), []>,
1977               Sched<[WriteI, ReadI]> {
1978    let isReMaterializable = 1;
1979    let isCodeGenOnly = 1;
1980    let Size = 8;
1981  }
1982
1983  // Size 16: 4 fixed + 8 variable, to compute discriminator.
1984  // The size returned by getInstSizeInBytes() is incremented according
1985  // to the variant of LR check.
1986  // As the check requires either x16 or x17 as a scratch register and
1987  // authenticated tail call instructions have two register operands,
1988  // make sure at least one register is usable as a scratch one - for that
1989  // purpose, use tcGPRnotx16x17 register class for one of the operands.
1990  let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
1991      Defs = [X16,X17], Uses = [SP] in {
1992    def AUTH_TCRETURN
1993      : Pseudo<(outs), (ins tcGPRnotx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
1994                            i64imm:$Disc, tcGPR64:$AddrDisc),
1995               []>, Sched<[WriteBrReg]>;
1996    def AUTH_TCRETURN_BTI
1997      : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
1998                            i64imm:$Disc, tcGPRnotx16x17:$AddrDisc),
1999               []>, Sched<[WriteBrReg]>;
2000  }
2001
2002  let Predicates = [TailCallAny] in
2003    def : Pat<(AArch64authtcret tcGPRnotx16x17:$dst, (i32 timm:$FPDiff), (i32 timm:$Key),
2004                                (i64 timm:$Disc), tcGPR64:$AddrDisc),
2005              (AUTH_TCRETURN tcGPRnotx16x17:$dst, imm:$FPDiff, imm:$Key, imm:$Disc,
2006                             tcGPR64:$AddrDisc)>;
2007
2008  let Predicates = [TailCallX16X17] in
2009    def : Pat<(AArch64authtcret tcGPRx16x17:$dst, (i32 timm:$FPDiff),
2010                                (i32 timm:$Key), (i64 timm:$Disc),
2011                                tcGPRnotx16x17:$AddrDisc),
2012              (AUTH_TCRETURN_BTI tcGPRx16x17:$dst, imm:$FPDiff, imm:$Key,
2013                                 imm:$Disc, tcGPRnotx16x17:$AddrDisc)>;
2014}
2015
2016// v9.5-A pointer authentication extensions
2017
2018// Always accept "pacm" as an alias for "hint #39", but don't emit it when
2019// disassembling if we don't have the pauth-lr feature.
2020let CRm = 0b0100 in {
2021  def PACM : SystemNoOperands<0b111, "hint\t#39">;
2022}
2023def : InstAlias<"pacm", (PACM), 0>;
2024
2025let Predicates = [HasPAuthLR] in {
2026  let Defs = [LR], Uses = [LR, SP] in {
2027    //                                opcode2, opcode,   asm
2028    def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">;
2029    def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">;
2030    def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">;
2031    def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">;
2032    //                             opc,  asm
2033    def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">;
2034    def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">;
2035    //                              opcode2, opcode,   asm
2036    def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppcr">;
2037    def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppcr">;
2038  }
2039  let Defs = [X17], Uses = [X15, X16, X17] in {
2040    //                                  opcode2, opcode,   asm
2041    def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">;
2042    def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">;
2043    def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">;
2044    def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">;
2045  }
2046
2047  let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in {
2048    //                                   opc,   op2,     asm
2049    def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">;
2050    def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">;
2051    //                                 op3,      asm
2052    def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppcr">;
2053    def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppcr">;
2054  }
2055  def : InstAlias<"pacm", (PACM), 1>;
2056}
2057
2058
2059// v8.3a floating point conversion for javascript
2060let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
2061def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
2062                                      "fjcvtzs",
2063                                      [(set GPR32:$Rd,
2064                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
2065  let Inst{31} = 0;
2066} // HasJS, HasFPARMv8
2067
2068// v8.4 Flag manipulation instructions
2069let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
2070def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
2071  let Inst{20-5} = 0b0000001000000000;
2072}
2073def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
2074def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
2075def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
2076                        "{\t$Rn, $imm, $mask}">;
2077} // HasFlagM
2078
2079// v8.5 flag manipulation instructions
2080let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
2081
2082def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
2083  let Inst{18-16} = 0b000;
2084  let Inst{11-8} = 0b0000;
2085  let Unpredictable{11-8} = 0b1111;
2086  let Inst{7-5} = 0b001;
2087}
2088
2089def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
2090  let Inst{18-16} = 0b000;
2091  let Inst{11-8} = 0b0000;
2092  let Unpredictable{11-8} = 0b1111;
2093  let Inst{7-5} = 0b010;
2094}
2095} // HasAltNZCV
2096
2097
2098// Armv8.5-A speculation barrier
2099def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
2100  let Inst{20-5} = 0b0001100110000111;
2101  let Unpredictable{11-8} = 0b1111;
2102  let Predicates = [HasSB];
2103  let hasSideEffects = 1;
2104}
2105
2106def : InstAlias<"clrex", (CLREX 0xf)>;
2107def : InstAlias<"isb", (ISB 0xf)>;
2108def : InstAlias<"ssbb", (DSB 0)>;
2109def : InstAlias<"pssbb", (DSB 4)>;
2110def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
2111
2112def MRS    : MRSI;
2113def MSR    : MSRI;
2114def MSRpstateImm1 : MSRpstateImm0_1;
2115def MSRpstateImm4 : MSRpstateImm0_15;
2116
2117def : Pat<(AArch64mrs imm:$id),
2118          (MRS imm:$id)>;
2119
2120// The thread pointer (on Linux, at least, where this has been implemented) is
2121// TPIDR_EL0.
2122def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
2123                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
2124
2125// This gets lowered into a 24-byte instruction sequence
2126let Defs = [ X9, X16, X17, NZCV ], Size = 24 in {
2127def KCFI_CHECK : Pseudo<
2128  (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>;
2129}
2130
2131let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
2132def HWASAN_CHECK_MEMACCESS : Pseudo<
2133  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
2134  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
2135  Sched<[]>;
2136}
2137
2138let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
2139def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
2140  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
2141  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
2142  Sched<[]>;
2143}
2144
2145let Defs = [ X16, X17, LR, NZCV ] in {
2146def HWASAN_CHECK_MEMACCESS_FIXEDSHADOW : Pseudo<
2147  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow),
2148  [(int_hwasan_check_memaccess_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>,
2149  Sched<[]>;
2150}
2151
2152let Defs = [ X16, X17, LR, NZCV ] in {
2153def HWASAN_CHECK_MEMACCESS_SHORTGRANULES_FIXEDSHADOW : Pseudo<
2154  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow),
2155  [(int_hwasan_check_memaccess_shortgranules_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>,
2156  Sched<[]>;
2157}
2158
2159// The virtual cycle counter register is CNTVCT_EL0.
2160def : Pat<(readcyclecounter), (MRS 0xdf02)>;
2161
2162// FPCR and FPSR registers.
2163let Uses = [FPCR] in
2164def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins),
2165                      [(set GPR64:$dst, (int_aarch64_get_fpcr))]>,
2166               PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>,
2167               Sched<[WriteSys]>;
2168let Defs = [FPCR] in
2169def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val),
2170                      [(int_aarch64_set_fpcr i64:$val)]>,
2171               PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>,
2172               Sched<[WriteSys]>;
2173
2174let Uses = [FPSR] in
2175def MRS_FPSR : Pseudo<(outs GPR64:$dst), (ins),
2176                      [(set GPR64:$dst, (int_aarch64_get_fpsr))]>,
2177               PseudoInstExpansion<(MRS GPR64:$dst, 0xda21)>,
2178               Sched<[WriteSys]>;
2179let Defs = [FPSR] in
2180def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val),
2181                      [(int_aarch64_set_fpsr i64:$val)]>,
2182               PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>,
2183               Sched<[WriteSys]>;
2184
2185let Defs = [FPMR] in
2186def MSR_FPMR : Pseudo<(outs), (ins GPR64:$val),
2187                      [(int_aarch64_set_fpmr i64:$val)]>,
2188               PseudoInstExpansion<(MSR 0xda22, GPR64:$val)>,
2189               Sched<[WriteSys]>;
2190
2191// Generic system instructions
2192def SYSxt  : SystemXtI<0, "sys">;
2193def SYSLxt : SystemLXtI<1, "sysl">;
2194
2195def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
2196                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
2197                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
2198
2199
2200let Predicates = [HasTME] in {
2201
2202def TSTART : TMSystemI<0b0000, "tstart",
2203                      [(set GPR64:$Rt, (int_aarch64_tstart))]>;
2204
2205def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
2206
2207def TCANCEL : TMSystemException<0b011, "tcancel",
2208                                [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
2209
2210def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
2211  let mayLoad = 0;
2212  let mayStore = 0;
2213}
2214} // HasTME
2215
2216//===----------------------------------------------------------------------===//
2217// Move immediate instructions.
2218//===----------------------------------------------------------------------===//
2219
2220defm MOVK : InsertImmediate<0b11, "movk">;
2221defm MOVN : MoveImmediate<0b00, "movn">;
2222
2223let PostEncoderMethod = "fixMOVZ" in
2224defm MOVZ : MoveImmediate<0b10, "movz">;
2225
2226// First group of aliases covers an implicit "lsl #0".
2227def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
2228def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
2229def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
2230def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
2231def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
2232def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
2233
2234// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
2235def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
2236def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
2237def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
2238def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
2239
2240def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
2241def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
2242def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
2243def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
2244
2245def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
2246def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
2247def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
2248def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
2249
2250def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
2251def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
2252
2253def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
2254def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
2255
2256def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
2257def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
2258
2259// Final group of aliases covers true "mov $Rd, $imm" cases.
2260multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
2261                          int width, int shift> {
2262  def _asmoperand : AsmOperandClass {
2263    let Name = basename # width # "_lsl" # shift # "MovAlias";
2264    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
2265                               # shift # ">";
2266    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
2267  }
2268
2269  def _movimm : Operand<i32> {
2270    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
2271  }
2272
2273  def : InstAlias<"mov $Rd, $imm",
2274                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
2275}
2276
2277defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
2278defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
2279
2280defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
2281defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
2282defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
2283defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
2284
2285defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
2286defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
2287
2288defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
2289defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
2290defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
2291defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
2292
2293let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
2294    isAsCheapAsAMove = 1 in {
2295// FIXME: The following pseudo instructions are only needed because remat
2296// cannot handle multiple instructions.  When that changes, we can select
2297// directly to the real instructions and get rid of these pseudos.
2298
2299def MOVi32imm
2300    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
2301             [(set GPR32:$dst, imm:$src)]>,
2302      Sched<[WriteImm]>;
2303def MOVi64imm
2304    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
2305             [(set GPR64:$dst, imm:$src)]>,
2306      Sched<[WriteImm]>;
2307} // isReMaterializable, isCodeGenOnly
2308
2309// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
2310// eventual expansion code fewer bits to worry about getting right. Marshalling
2311// the types is a little tricky though:
2312def i64imm_32bit : ImmLeaf<i64, [{
2313  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
2314}]>;
2315
2316def s64imm_32bit : ImmLeaf<i64, [{
2317  int64_t Imm64 = static_cast<int64_t>(Imm);
2318  return Imm64 >= std::numeric_limits<int32_t>::min() &&
2319         Imm64 <= std::numeric_limits<int32_t>::max();
2320}]>;
2321
2322def trunc_imm : SDNodeXForm<imm, [{
2323  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue(), SDLoc(N), MVT::i32);
2324}]>;
2325
2326def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
2327  GISDNodeXFormEquiv<trunc_imm>;
2328
2329let Predicates = [OptimizedGISelOrOtherSelector] in {
2330// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
2331// copies.
2332def : Pat<(i64 i64imm_32bit:$src),
2333          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
2334}
2335
2336// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
2337def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
2338return CurDAG->getTargetConstant(
2339  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
2340}]>;
2341
2342def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
2343return CurDAG->getTargetConstant(
2344  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
2345}]>;
2346
2347
2348def : Pat<(f32 fpimm:$in),
2349  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
2350def : Pat<(f64 fpimm:$in),
2351  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
2352
2353
2354// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
2355// sequences.
2356def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
2357                             tglobaladdr:$g1, tglobaladdr:$g0),
2358          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
2359                                  tglobaladdr:$g1, 16),
2360                          tglobaladdr:$g2, 32),
2361                  tglobaladdr:$g3, 48)>;
2362
2363def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
2364                             tblockaddress:$g1, tblockaddress:$g0),
2365          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
2366                                  tblockaddress:$g1, 16),
2367                          tblockaddress:$g2, 32),
2368                  tblockaddress:$g3, 48)>;
2369
2370def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
2371                             tconstpool:$g1, tconstpool:$g0),
2372          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
2373                                  tconstpool:$g1, 16),
2374                          tconstpool:$g2, 32),
2375                  tconstpool:$g3, 48)>;
2376
2377def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
2378                             tjumptable:$g1, tjumptable:$g0),
2379          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
2380                                  tjumptable:$g1, 16),
2381                          tjumptable:$g2, 32),
2382                  tjumptable:$g3, 48)>;
2383
2384
2385//===----------------------------------------------------------------------===//
2386// Arithmetic instructions.
2387//===----------------------------------------------------------------------===//
2388
2389// Add/subtract with carry.
2390defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
2391defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
2392
2393def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
2394def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
2395def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
2396def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
2397
2398// Add/subtract
2399defm ADD : AddSub<0, "add", "sub", add>;
2400defm SUB : AddSub<1, "sub", "add">;
2401
2402def : InstAlias<"mov $dst, $src",
2403                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
2404def : InstAlias<"mov $dst, $src",
2405                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
2406def : InstAlias<"mov $dst, $src",
2407                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
2408def : InstAlias<"mov $dst, $src",
2409                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
2410
2411defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
2412defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
2413
2414def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
2415  return N->getOpcode() == ISD::CopyFromReg &&
2416         cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2417}]>;
2418
2419// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
2420def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
2421          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
2422def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
2423          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
2424def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
2425          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
2426def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
2427          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
2428def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
2429          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
2430def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
2431          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
2432let AddedComplexity = 1 in {
2433def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
2434          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
2435def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
2436          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
2437def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
2438          (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
2439}
2440
2441// Because of the immediate format for add/sub-imm instructions, the
2442// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2443//  These patterns capture that transformation.
2444let AddedComplexity = 1 in {
2445def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2446          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2447def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2448          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2449def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2450          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2451def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2452          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2453}
2454
2455// Because of the immediate format for add/sub-imm instructions, the
2456// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2457//  These patterns capture that transformation.
2458let AddedComplexity = 1 in {
2459def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2460          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2461def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2462          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2463def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2464          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2465def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2466          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2467}
2468
2469def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2470def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2471def : InstAlias<"neg $dst, $src$shift",
2472                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2473def : InstAlias<"neg $dst, $src$shift",
2474                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2475
2476def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2477def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2478def : InstAlias<"negs $dst, $src$shift",
2479                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2480def : InstAlias<"negs $dst, $src$shift",
2481                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2482
2483
2484// Unsigned/Signed divide
2485defm UDIV : Div<0, "udiv", udiv>;
2486defm SDIV : Div<1, "sdiv", sdiv>;
2487
2488def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
2489def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
2490def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
2491def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
2492
2493// Variable shift
2494defm ASRV : Shift<0b10, "asr", sra>;
2495defm LSLV : Shift<0b00, "lsl", shl>;
2496defm LSRV : Shift<0b01, "lsr", srl>;
2497defm RORV : Shift<0b11, "ror", rotr>;
2498
2499def : ShiftAlias<"asrv", ASRVWr, GPR32>;
2500def : ShiftAlias<"asrv", ASRVXr, GPR64>;
2501def : ShiftAlias<"lslv", LSLVWr, GPR32>;
2502def : ShiftAlias<"lslv", LSLVXr, GPR64>;
2503def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
2504def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
2505def : ShiftAlias<"rorv", RORVWr, GPR32>;
2506def : ShiftAlias<"rorv", RORVXr, GPR64>;
2507
2508// Multiply-add
2509let AddedComplexity = 5 in {
2510defm MADD : MulAccum<0, "madd">;
2511defm MSUB : MulAccum<1, "msub">;
2512
2513def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
2514          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2515def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
2516          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2517
2518def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
2519          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2520def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
2521          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2522def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
2523          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2524def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
2525          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2526} // AddedComplexity = 5
2527
2528let AddedComplexity = 5 in {
2529def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
2530def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
2531def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
2532def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
2533
2534def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
2535          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2536def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
2537          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2538def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
2539          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2540def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
2541          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2542def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
2543          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2544def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
2545          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2546
2547def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
2548          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2549def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
2550          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2551
2552def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
2553          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2554def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
2555          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2556def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
2557          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2558                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2559
2560def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2561          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2562def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2563          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2564def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
2565          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2566                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2567
2568def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
2569          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2570def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
2571          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2572def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
2573                    GPR64:$Ra)),
2574          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2575                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2576
2577def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2578          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2579def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2580          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2581def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
2582                                    (s64imm_32bit:$C)))),
2583          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2584                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2585
2586def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
2587          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2588def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
2589          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2590
2591def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
2592          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2593def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
2594          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2595
2596def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2597          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2598def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2599          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2600
2601def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2602          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2603def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2604          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2605
2606def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
2607          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2608def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
2609          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2610
2611def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
2612          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2613def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
2614          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2615
2616def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
2617          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2618def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2619          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2620
2621def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
2622          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2623def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2624          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2625} // AddedComplexity = 5
2626
2627def : MulAccumWAlias<"mul", MADDWrrr>;
2628def : MulAccumXAlias<"mul", MADDXrrr>;
2629def : MulAccumWAlias<"mneg", MSUBWrrr>;
2630def : MulAccumXAlias<"mneg", MSUBXrrr>;
2631def : WideMulAccumAlias<"smull", SMADDLrrr>;
2632def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
2633def : WideMulAccumAlias<"umull", UMADDLrrr>;
2634def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
2635
2636// Multiply-high
2637def SMULHrr : MulHi<0b010, "smulh", mulhs>;
2638def UMULHrr : MulHi<0b110, "umulh", mulhu>;
2639
2640// CRC32
2641def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
2642def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
2643def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
2644def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2645
2646def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2647def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2648def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2649def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2650
2651// v8.1 atomic CAS
2652defm CAS   : CompareAndSwap<0, 0, "">;
2653defm CASA  : CompareAndSwap<1, 0, "a">;
2654defm CASL  : CompareAndSwap<0, 1, "l">;
2655defm CASAL : CompareAndSwap<1, 1, "al">;
2656
2657// v8.1 atomic CASP
2658defm CASP   : CompareAndSwapPair<0, 0, "">;
2659defm CASPA  : CompareAndSwapPair<1, 0, "a">;
2660defm CASPL  : CompareAndSwapPair<0, 1, "l">;
2661defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2662
2663// v9.6-a atomic CAST
2664let Predicates = [HasLSUI] in {
2665defm CAST   : CompareAndSwapUnprivileged<0b11, 0, 0, "">;
2666defm CASLT  : CompareAndSwapUnprivileged<0b11, 0, 1, "l">;
2667defm CASAT  : CompareAndSwapUnprivileged<0b11, 1, 0, "a">;
2668defm CASALT : CompareAndSwapUnprivileged<0b11, 1, 1, "al">;
2669
2670def : MnemonicAlias<"cas", "cast">;
2671def : MnemonicAlias<"casl", "caslt">;
2672def : MnemonicAlias<"casa", "casat">;
2673def : MnemonicAlias<"casal", "casalt">;
2674
2675// v9.6-a atomic CASPT
2676defm CASPT   : CompareAndSwapPairUnprivileged<0b01, 0, 0, "">;
2677defm CASPLT  : CompareAndSwapPairUnprivileged<0b01, 0, 1, "l">;
2678defm CASPAT  : CompareAndSwapPairUnprivileged<0b01, 1, 0, "a">;
2679defm CASPALT : CompareAndSwapPairUnprivileged<0b01, 1, 1, "al">;
2680
2681def : MnemonicAlias<"casp", "caspt">;
2682def : MnemonicAlias<"caspl", "casplt">;
2683def : MnemonicAlias<"caspa", "caspat">;
2684def : MnemonicAlias<"caspal", "caspalt">;
2685}
2686
2687// v8.1 atomic SWP
2688defm SWP   : Swap<0, 0, "">;
2689defm SWPA  : Swap<1, 0, "a">;
2690defm SWPL  : Swap<0, 1, "l">;
2691defm SWPAL : Swap<1, 1, "al">;
2692
2693// v9.6a atomic swap (FEAT_LSUI)
2694let Predicates = [HasLSUI] in {
2695  defm SWPT   : SwapLSUI<0, 0, "">;
2696  defm SWPTA  : SwapLSUI<1, 0, "a">;
2697  defm SWPTL  : SwapLSUI<0, 1, "l">;
2698  defm SWPTAL : SwapLSUI<1, 1, "al">;
2699
2700  def : MnemonicAlias<"swp", "swpt">;
2701  def : MnemonicAlias<"swpa", "swpta">;
2702  def : MnemonicAlias<"swpl", "swptl">;
2703  def : MnemonicAlias<"swpal", "swptal">;
2704}
2705
2706// v9.6-a unprivileged atomic LD<OP> (FEAT_LSUI)
2707let Predicates = [HasLSUI] in {
2708  defm LDTADD   : LDOPregisterLSUI<0b000, "add", 0, 0, "">;
2709  defm LDTADDA  : LDOPregisterLSUI<0b000, "add", 1, 0, "a">;
2710  defm LDTADDL  : LDOPregisterLSUI<0b000, "add", 0, 1, "l">;
2711  defm LDTADDAL : LDOPregisterLSUI<0b000, "add", 1, 1, "al">;
2712
2713  defm LDTCLR   : LDOPregisterLSUI<0b001, "clr", 0, 0, "">;
2714  defm LDTCLRA  : LDOPregisterLSUI<0b001, "clr", 1, 0, "a">;
2715  defm LDTCLRL  : LDOPregisterLSUI<0b001, "clr", 0, 1, "l">;
2716  defm LDTCLRAL : LDOPregisterLSUI<0b001, "clr", 1, 1, "al">;
2717
2718  defm LDTSET   : LDOPregisterLSUI<0b011, "set", 0, 0, "">;
2719  defm LDTSETA  : LDOPregisterLSUI<0b011, "set", 1, 0, "a">;
2720  defm LDTSETL  : LDOPregisterLSUI<0b011, "set", 0, 1, "l">;
2721  defm LDTSETAL : LDOPregisterLSUI<0b011, "set", 1, 1, "al">;
2722
2723  defm : STOPregisterLSUI<"sttadd","LDTADD">; // STTADDx
2724  defm : STOPregisterLSUI<"sttclr","LDTCLR">; // STTCLRx
2725  defm : STOPregisterLSUI<"sttset","LDTSET">; // STTSETx
2726}
2727
2728// v9.6-a FEAT_RME_GPC3
2729def APAS : APASI;
2730
2731// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2732defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
2733defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
2734defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
2735defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2736
2737defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
2738defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
2739defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
2740defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2741
2742defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
2743defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
2744defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
2745defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2746
2747defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
2748defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
2749defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
2750defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2751
2752defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
2753defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
2754defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
2755defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2756
2757defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
2758defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
2759defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
2760defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2761
2762defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
2763defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
2764defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
2765defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2766
2767defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
2768defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
2769defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
2770defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2771
2772// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2773defm : STOPregister<"stadd","LDADD">; // STADDx
2774defm : STOPregister<"stclr","LDCLR">; // STCLRx
2775defm : STOPregister<"steor","LDEOR">; // STEORx
2776defm : STOPregister<"stset","LDSET">; // STSETx
2777defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
2778defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
2779defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
2780defm : STOPregister<"stumin","LDUMIN">;// STUMINx
2781
2782// v8.5 Memory Tagging Extension
2783let Predicates = [HasMTE] in {
2784
2785def IRG   : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg",
2786                                 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>;
2787
2788def GMI   : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi",
2789                                 int_aarch64_gmi, GPR64sp>, Sched<[]> {
2790  let isNotDuplicable = 1;
2791}
2792def ADDG  : AddSubG<0, "addg", null_frag>;
2793def SUBG  : AddSubG<1, "subg", null_frag>;
2794
2795def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2796
2797def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
2798def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2799  let Defs = [NZCV];
2800}
2801
2802def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
2803
2804def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
2805
2806def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
2807          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
2808def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
2809          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2810
2811def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2812
2813let mayLoad = 1 in
2814def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2815                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
2816let mayStore = 1 in {
2817def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
2818                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
2819def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
2820                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
2821  let Inst{23} = 0;
2822}
2823} // mayStore = 1
2824
2825defm STG   : MemTagStore<0b00, "stg">;
2826defm STZG  : MemTagStore<0b01, "stzg">;
2827defm ST2G  : MemTagStore<0b10, "st2g">;
2828defm STZ2G : MemTagStore<0b11, "stz2g">;
2829
2830def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2831          (STGi $Rn, $Rm, $imm)>;
2832def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2833          (STZGi $Rn, $Rm, $imm)>;
2834def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2835          (ST2Gi $Rn, $Rm, $imm)>;
2836def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2837          (STZ2Gi $Rn, $Rm, $imm)>;
2838
2839defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
2840def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
2841def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
2842
2843def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2844          (STGi GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2845
2846def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
2847          (STGPi $Rt, $Rt2, $Rn, $imm)>;
2848
2849def IRGstack
2850    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
2851      Sched<[]>;
2852def TAGPstack
2853    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
2854      Sched<[]>;
2855
2856// Explicit SP in the first operand prevents ShrinkWrap optimization
2857// from leaving this instruction out of the stack frame. When IRGstack
2858// is transformed into IRG, this operand is replaced with the actual
2859// register / expression for the tagged base pointer of the current function.
2860def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
2861
2862// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2863// $Rn_wback is one past the end of the range. $Rm is the loop counter.
2864let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
2865def STGloop_wback
2866    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2867             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2868      Sched<[WriteAdr, WriteST]>;
2869
2870def STZGloop_wback
2871    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2872             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2873      Sched<[WriteAdr, WriteST]>;
2874
2875// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2876// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2877def STGloop
2878    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2879             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2880      Sched<[WriteAdr, WriteST]>;
2881
2882def STZGloop
2883    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2884             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2885      Sched<[WriteAdr, WriteST]>;
2886}
2887
2888} // Predicates = [HasMTE]
2889
2890//===----------------------------------------------------------------------===//
2891// Logical instructions.
2892//===----------------------------------------------------------------------===//
2893
2894// (immediate)
2895defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
2896defm AND  : LogicalImm<0b00, "and", and, "bic">;
2897defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
2898defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
2899
2900// FIXME: these aliases *are* canonical sometimes (when movz can't be
2901// used). Actually, it seems to be working right now, but putting logical_immXX
2902// here is a bit dodgy on the AsmParser side too.
2903def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
2904                                          logical_imm32:$imm), 0>;
2905def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
2906                                          logical_imm64:$imm), 0>;
2907
2908
2909// (register)
2910defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
2911defm BICS : LogicalRegS<0b11, 1, "bics",
2912                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
2913defm AND  : LogicalReg<0b00, 0, "and", and>;
2914defm BIC  : LogicalReg<0b00, 1, "bic",
2915                       BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
2916defm EON  : LogicalReg<0b10, 1, "eon",
2917                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
2918defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
2919defm ORN  : LogicalReg<0b01, 1, "orn",
2920                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
2921defm ORR  : LogicalReg<0b01, 0, "orr", or>;
2922
2923def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
2924def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
2925
2926def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
2927def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
2928
2929def : InstAlias<"mvn $Wd, $Wm$sh",
2930                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
2931def : InstAlias<"mvn $Xd, $Xm$sh",
2932                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
2933
2934def : InstAlias<"tst $src1, $src2",
2935                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
2936def : InstAlias<"tst $src1, $src2",
2937                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
2938
2939def : InstAlias<"tst $src1, $src2",
2940                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
2941def : InstAlias<"tst $src1, $src2",
2942                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
2943
2944def : InstAlias<"tst $src1, $src2$sh",
2945               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
2946def : InstAlias<"tst $src1, $src2$sh",
2947               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
2948
2949
2950def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
2951def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
2952
2953// Emit (and 0xFFFFFFFF) as a ORRWrr move which may be eliminated.
2954let AddedComplexity = 6 in
2955def : Pat<(i64 (and GPR64:$Rn, 0xffffffff)),
2956          (SUBREG_TO_REG (i64 0), (ORRWrr WZR, (EXTRACT_SUBREG GPR64:$Rn, sub_32)), sub_32)>;
2957
2958
2959//===----------------------------------------------------------------------===//
2960// One operand data processing instructions.
2961//===----------------------------------------------------------------------===//
2962
2963defm CLS    : OneOperandData<0b000101, "cls">;
2964defm CLZ    : OneOperandData<0b000100, "clz", ctlz>;
2965defm RBIT   : OneOperandData<0b000000, "rbit", bitreverse>;
2966
2967def  REV16Wr : OneWRegData<0b000001, "rev16",
2968                                     UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2969def  REV16Xr : OneXRegData<0b000001, "rev16", null_frag>;
2970
2971def : Pat<(cttz GPR32:$Rn),
2972          (CLZWr (RBITWr GPR32:$Rn))>;
2973def : Pat<(cttz GPR64:$Rn),
2974          (CLZXr (RBITXr GPR64:$Rn))>;
2975def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2976                (i32 1))),
2977          (CLSWr GPR32:$Rn)>;
2978def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2979                (i64 1))),
2980          (CLSXr GPR64:$Rn)>;
2981def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2982def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2983
2984// Unlike the other one operand instructions, the instructions with the "rev"
2985// mnemonic do *not* just different in the size bit, but actually use different
2986// opcode bits for the different sizes.
2987def REVWr   : OneWRegData<0b000010, "rev", bswap>;
2988def REVXr   : OneXRegData<0b000011, "rev", bswap>;
2989def REV32Xr : OneXRegData<0b000010, "rev32",
2990                                    UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2991
2992def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2993
2994// The bswap commutes with the rotr so we want a pattern for both possible
2995// orders.
2996def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2997def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2998
2999// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
3000def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
3001def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
3002
3003def : Pat<(AArch64rev16_scalar GPR32:$Rn), (REV16Wr GPR32:$Rn)>;
3004def : Pat<(AArch64rev16_scalar GPR64:$Rn), (REV16Xr GPR64:$Rn)>;
3005
3006def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
3007              (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
3008          (REV16Xr GPR64:$Rn)>;
3009
3010//===----------------------------------------------------------------------===//
3011// Bitfield immediate extraction instruction.
3012//===----------------------------------------------------------------------===//
3013let hasSideEffects = 0 in
3014defm EXTR : ExtractImm<"extr">;
3015def : InstAlias<"ror $dst, $src, $shift",
3016            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
3017def : InstAlias<"ror $dst, $src, $shift",
3018            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
3019
3020def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
3021          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
3022def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
3023          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
3024
3025//===----------------------------------------------------------------------===//
3026// Other bitfield immediate instructions.
3027//===----------------------------------------------------------------------===//
3028let hasSideEffects = 0 in {
3029defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
3030defm SBFM : BitfieldImm<0b00, "sbfm">;
3031defm UBFM : BitfieldImm<0b10, "ubfm">;
3032}
3033
3034def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
3035  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
3036  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3037}]>;
3038
3039def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
3040  uint64_t enc = 31 - N->getZExtValue();
3041  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3042}]>;
3043
3044// min(7, 31 - shift_amt)
3045def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
3046  uint64_t enc = 31 - N->getZExtValue();
3047  enc = enc > 7 ? 7 : enc;
3048  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3049}]>;
3050
3051// min(15, 31 - shift_amt)
3052def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
3053  uint64_t enc = 31 - N->getZExtValue();
3054  enc = enc > 15 ? 15 : enc;
3055  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3056}]>;
3057
3058def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
3059  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
3060  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3061}]>;
3062
3063def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
3064  uint64_t enc = 63 - N->getZExtValue();
3065  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3066}]>;
3067
3068// min(7, 63 - shift_amt)
3069def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
3070  uint64_t enc = 63 - N->getZExtValue();
3071  enc = enc > 7 ? 7 : enc;
3072  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3073}]>;
3074
3075// min(15, 63 - shift_amt)
3076def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
3077  uint64_t enc = 63 - N->getZExtValue();
3078  enc = enc > 15 ? 15 : enc;
3079  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3080}]>;
3081
3082// min(31, 63 - shift_amt)
3083def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
3084  uint64_t enc = 63 - N->getZExtValue();
3085  enc = enc > 31 ? 31 : enc;
3086  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
3087}]>;
3088
3089def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
3090          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
3091                              (i64 (i32shift_b imm0_31:$imm)))>;
3092def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
3093          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
3094                              (i64 (i64shift_b imm0_63:$imm)))>;
3095
3096let AddedComplexity = 10 in {
3097def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
3098          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
3099def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
3100          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
3101}
3102
3103def : InstAlias<"asr $dst, $src, $shift",
3104                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
3105def : InstAlias<"asr $dst, $src, $shift",
3106                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
3107def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
3108def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
3109def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
3110def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
3111def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
3112
3113def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
3114          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
3115def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
3116          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
3117
3118def : InstAlias<"lsr $dst, $src, $shift",
3119                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
3120def : InstAlias<"lsr $dst, $src, $shift",
3121                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
3122def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
3123def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
3124def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
3125def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
3126def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
3127
3128//===----------------------------------------------------------------------===//
3129// Conditional comparison instructions.
3130//===----------------------------------------------------------------------===//
3131defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
3132defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
3133
3134//===----------------------------------------------------------------------===//
3135// Conditional select instructions.
3136//===----------------------------------------------------------------------===//
3137defm CSEL  : CondSelect<0, 0b00, "csel">;
3138
3139def inc : PatFrag<(ops node:$in), (add_and_or_is_add node:$in, 1)>;
3140defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
3141defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
3142defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
3143
3144def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
3145          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
3146def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
3147          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
3148def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
3149          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
3150def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
3151          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
3152def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
3153          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
3154def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
3155          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
3156
3157def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
3158          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
3159def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
3160          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
3161def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
3162          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
3163def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
3164          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
3165def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
3166          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
3167def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
3168          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
3169def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
3170          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
3171def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
3172          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
3173def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
3174          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
3175def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
3176          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
3177def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
3178          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
3179def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
3180          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
3181
3182def : Pat<(add_and_or_is_add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3183          (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
3184def : Pat<(add_and_or_is_add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3185          (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
3186
3187def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3188          (CSINCWr GPR32:$val, WZR, imm:$cc)>;
3189def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
3190          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
3191def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3192          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
3193
3194def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3195          (CSELWr WZR, GPR32:$val, imm:$cc)>;
3196def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
3197          (CSELXr XZR, GPR64:$val, imm:$cc)>;
3198def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3199          (CSELXr XZR, GPR64:$val, imm:$cc)>;
3200
3201// The inverse of the condition code from the alias instruction is what is used
3202// in the aliased instruction. The parser all ready inverts the condition code
3203// for these aliases.
3204def : InstAlias<"cset $dst, $cc",
3205                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
3206def : InstAlias<"cset $dst, $cc",
3207                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
3208
3209def : InstAlias<"csetm $dst, $cc",
3210                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
3211def : InstAlias<"csetm $dst, $cc",
3212                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
3213
3214def : InstAlias<"cinc $dst, $src, $cc",
3215                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
3216def : InstAlias<"cinc $dst, $src, $cc",
3217                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
3218
3219def : InstAlias<"cinv $dst, $src, $cc",
3220                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
3221def : InstAlias<"cinv $dst, $src, $cc",
3222                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
3223
3224def : InstAlias<"cneg $dst, $src, $cc",
3225                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
3226def : InstAlias<"cneg $dst, $src, $cc",
3227                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
3228
3229//===----------------------------------------------------------------------===//
3230// PC-relative instructions.
3231//===----------------------------------------------------------------------===//
3232let isReMaterializable = 1 in {
3233let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
3234def ADR  : ADRI<0, "adr", adrlabel,
3235                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
3236} // hasSideEffects = 0
3237
3238def ADRP : ADRI<1, "adrp", adrplabel,
3239                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
3240} // isReMaterializable = 1
3241
3242// page address of a constant pool entry, block address
3243def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
3244def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
3245def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
3246def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
3247def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
3248def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
3249def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
3250
3251//===----------------------------------------------------------------------===//
3252// Unconditional branch (register) instructions.
3253//===----------------------------------------------------------------------===//
3254
3255let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
3256def RET  : BranchReg<0b0010, "ret", []>;
3257def DRPS : SpecialReturn<0b0101, "drps">;
3258def ERET : SpecialReturn<0b0100, "eret">;
3259} // isReturn = 1, isTerminator = 1, isBarrier = 1
3260
3261// Default to the LR register.
3262def : InstAlias<"ret", (RET LR)>;
3263
3264let isCall = 1, Defs = [LR], Uses = [SP] in {
3265  def BLR : BranchReg<0b0001, "blr", []>;
3266  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
3267                Sched<[WriteBrReg]>,
3268                PseudoInstExpansion<(BLR GPR64:$Rn)>;
3269  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
3270                     Sched<[WriteBrReg]>;
3271  def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
3272                Sched<[WriteBrReg]>;
3273  let Uses = [X16, SP] in
3274  def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>,
3275                Sched<[WriteBrReg]>,
3276                PseudoInstExpansion<(BLR X16)>;
3277} // isCall
3278
3279def : Pat<(AArch64call GPR64:$Rn),
3280          (BLR GPR64:$Rn)>,
3281      Requires<[NoSLSBLRMitigation]>;
3282def : Pat<(AArch64call GPR64noip:$Rn),
3283          (BLRNoIP GPR64noip:$Rn)>,
3284      Requires<[SLSBLRMitigation]>;
3285
3286def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
3287          (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
3288      Requires<[NoSLSBLRMitigation]>;
3289
3290def : Pat<(AArch64call_bti GPR64:$Rn),
3291          (BLR_BTI GPR64:$Rn)>,
3292      Requires<[NoSLSBLRMitigation]>;
3293def : Pat<(AArch64call_bti GPR64noip:$Rn),
3294          (BLR_BTI GPR64noip:$Rn)>,
3295      Requires<[SLSBLRMitigation]>;
3296
3297let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
3298def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
3299} // isBranch, isTerminator, isBarrier, isIndirectBranch
3300
3301// Create a separate pseudo-instruction for codegen to use so that we don't
3302// flag lr as used in every function. It'll be restored before the RET by the
3303// epilogue if it's legitimately used.
3304def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
3305                   Sched<[WriteBrReg]> {
3306  let isTerminator = 1;
3307  let isBarrier = 1;
3308  let isReturn = 1;
3309}
3310
3311// This is a directive-like pseudo-instruction. The purpose is to insert an
3312// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
3313// (which in the usual case is a BLR).
3314let hasSideEffects = 1 in
3315def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
3316  let AsmString = ".tlsdesccall $sym";
3317}
3318
3319// Pseudo instruction to tell the streamer to emit a 'B' character into the
3320// augmentation string.
3321def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
3322
3323// Pseudo instruction to tell the streamer to emit a 'G' character into the
3324// augmentation string.
3325def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
3326
3327// FIXME: maybe the scratch register used shouldn't be fixed to X1?
3328// FIXME: can "hasSideEffects be dropped?
3329// This gets lowered to an instruction sequence which takes 16 bytes
3330let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
3331    isCodeGenOnly = 1 in
3332def TLSDESC_CALLSEQ
3333    : Pseudo<(outs), (ins i64imm:$sym),
3334             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
3335      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
3336let isCall = 1, Defs = [NZCV, LR, X0, X16], hasSideEffects = 1, Size = 16,
3337    isCodeGenOnly = 1 in
3338def TLSDESC_AUTH_CALLSEQ
3339    : Pseudo<(outs), (ins i64imm:$sym),
3340             [(AArch64tlsdesc_auth_callseq tglobaltlsaddr:$sym)]>,
3341      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
3342def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
3343          (TLSDESC_CALLSEQ texternalsym:$sym)>;
3344def : Pat<(AArch64tlsdesc_auth_callseq texternalsym:$sym),
3345          (TLSDESC_AUTH_CALLSEQ texternalsym:$sym)>;
3346
3347//===----------------------------------------------------------------------===//
3348// Conditional branch (immediate) instruction.
3349//===----------------------------------------------------------------------===//
3350def Bcc : BranchCond<0, "b">;
3351
3352// Armv8.8-A variant form which hints to the branch predictor that
3353// this branch is very likely to go the same way nearly all the time
3354// (even though it is not known at compile time _which_ way that is).
3355def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
3356
3357//===----------------------------------------------------------------------===//
3358// Compare-and-branch instructions.
3359//===----------------------------------------------------------------------===//
3360defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
3361defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
3362
3363//===----------------------------------------------------------------------===//
3364// Test-bit-and-branch instructions.
3365//===----------------------------------------------------------------------===//
3366defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
3367defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
3368
3369//===----------------------------------------------------------------------===//
3370// Unconditional branch (immediate) instructions.
3371//===----------------------------------------------------------------------===//
3372let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
3373def B  : BranchImm<0, "b", [(br bb:$addr)]>;
3374} // isBranch, isTerminator, isBarrier
3375
3376let isCall = 1, Defs = [LR], Uses = [SP] in {
3377def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
3378} // isCall
3379def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
3380
3381//===----------------------------------------------------------------------===//
3382// Exception generation instructions.
3383//===----------------------------------------------------------------------===//
3384let isTrap = 1 in {
3385def BRK   : ExceptionGeneration<0b001, 0b00, "brk",
3386                                [(int_aarch64_break timm32_0_65535:$imm)]>;
3387}
3388def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
3389def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
3390def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
3391def HLT   : ExceptionGeneration<0b010, 0b00, "hlt",
3392                                [(int_aarch64_hlt timm32_0_65535:$imm)]>;
3393def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
3394def SMC   : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
3395def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
3396
3397// DCPSn defaults to an immediate operand of zero if unspecified.
3398def : InstAlias<"dcps1", (DCPS1 0)>;
3399def : InstAlias<"dcps2", (DCPS2 0)>;
3400def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
3401
3402def UDF : UDFType<0, "udf">;
3403
3404//===----------------------------------------------------------------------===//
3405// Load instructions.
3406//===----------------------------------------------------------------------===//
3407
3408// Pair (indexed, offset)
3409defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
3410defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
3411let Predicates = [HasFPARMv8] in {
3412defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
3413defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
3414defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
3415}
3416
3417defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3418
3419// Pair (pre-indexed)
3420def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
3421def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
3422let Predicates = [HasFPARMv8] in {
3423def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3424def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3425def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3426}
3427
3428def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3429
3430// Pair (post-indexed)
3431def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
3432def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
3433let Predicates = [HasFPARMv8] in {
3434def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3435def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3436def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3437}
3438
3439def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3440
3441
3442// Pair (no allocate)
3443defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
3444defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
3445let Predicates = [HasFPARMv8] in {
3446defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
3447defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
3448defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
3449}
3450
3451def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3452          (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
3453
3454def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3455          (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
3456//---
3457// (register offset)
3458//---
3459
3460// Integer
3461defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
3462defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
3463defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
3464defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
3465
3466// Floating-point
3467let Predicates = [HasFPARMv8] in {
3468defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", i8, load>;
3469defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
3470defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
3471defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
3472defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
3473}
3474
3475// Load sign-extended half-word
3476defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
3477defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
3478
3479// Load sign-extended byte
3480defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
3481defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
3482
3483// Load sign-extended word
3484defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
3485
3486// Pre-fetch.
3487defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
3488
3489// Match all load 64 bits width whose type is compatible with FPR64
3490multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
3491                        Instruction LOADW, Instruction LOADX> {
3492
3493  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3494            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3495
3496  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3497            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3498}
3499
3500let AddedComplexity = 10 in {
3501let Predicates = [IsLE] in {
3502  // We must do vector loads with LD1 in big-endian.
3503  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
3504  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
3505  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
3506  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
3507  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
3508  defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
3509}
3510
3511defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
3512defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
3513
3514// Match all load 128 bits width whose type is compatible with FPR128
3515let Predicates = [IsLE] in {
3516  // We must do vector loads with LD1 in big-endian.
3517  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
3518  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
3519  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
3520  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
3521  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
3522  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
3523  defm : VecROLoadPat<ro128, v8bf16,  LDRQroW, LDRQroX>;
3524  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
3525}
3526} // AddedComplexity = 10
3527
3528// zextload -> i64
3529multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
3530                            Instruction INSTW, Instruction INSTX> {
3531  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3532            (SUBREG_TO_REG (i64 0),
3533                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
3534                           sub_32)>;
3535
3536  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3537            (SUBREG_TO_REG (i64 0),
3538                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
3539                           sub_32)>;
3540}
3541
3542let AddedComplexity = 10 in {
3543  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
3544  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
3545  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
3546
3547  // zextloadi1 -> zextloadi8
3548  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
3549
3550  // extload -> zextload
3551  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3552  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3553  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3554
3555  // extloadi1 -> zextloadi8
3556  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
3557}
3558
3559
3560// zextload -> i64
3561multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
3562                            Instruction INSTW, Instruction INSTX> {
3563  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3564            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3565
3566  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3567            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3568
3569}
3570
3571let AddedComplexity = 10 in {
3572  // extload -> zextload
3573  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3574  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3575  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3576
3577  // zextloadi1 -> zextloadi8
3578  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
3579}
3580
3581//---
3582// (unsigned immediate)
3583//---
3584defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
3585                   [(set GPR64z:$Rt,
3586                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3587defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
3588                   [(set GPR32z:$Rt,
3589                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3590let Predicates = [HasFPARMv8] in {
3591defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3592                   [(set FPR8Op:$Rt,
3593                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
3594defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3595                   [(set (f16 FPR16Op:$Rt),
3596                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
3597defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3598                   [(set (f32 FPR32Op:$Rt),
3599                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3600defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3601                   [(set (f64 FPR64Op:$Rt),
3602                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3603defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3604                 [(set (f128 FPR128Op:$Rt),
3605                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
3606}
3607
3608// bf16 load pattern
3609def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3610           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
3611
3612// Match all load 64 bits width whose type is compatible with FPR64
3613let Predicates = [IsLE] in {
3614  // We must use LD1 to perform vector loads in big-endian.
3615  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3616            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3617  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3618            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3619  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3620            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3621  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3622            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3623  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3624            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3625  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3626            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3627}
3628def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3629          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3630def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3631          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3632
3633// Match all load 128 bits width whose type is compatible with FPR128
3634let Predicates = [IsLE] in {
3635  // We must use LD1 to perform vector loads in big-endian.
3636  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3637            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3638  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3639            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3640  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3641            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3642  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3643            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3644  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3645            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3646  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3647            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3648  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3649            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3650  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3651            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3652}
3653def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3654          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3655
3656defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
3657                    [(set GPR32:$Rt,
3658                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
3659                                                     uimm12s2:$offset)))]>;
3660defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
3661                    [(set GPR32:$Rt,
3662                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
3663                                                   uimm12s1:$offset)))]>;
3664// zextload -> i64
3665def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3666    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3667def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3668    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3669
3670// zextloadi1 -> zextloadi8
3671def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3672          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3673def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3674    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3675
3676// extload -> zextload
3677def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3678          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
3679def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3680          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3681def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3682          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3683def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3684    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3685def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3686    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3687def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3688    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3689def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3690    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3691
3692// load sign-extended half-word
3693defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
3694                     [(set GPR32:$Rt,
3695                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3696                                                      uimm12s2:$offset)))]>;
3697defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
3698                     [(set GPR64:$Rt,
3699                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3700                                                      uimm12s2:$offset)))]>;
3701
3702// load sign-extended byte
3703defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
3704                     [(set GPR32:$Rt,
3705                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3706                                                    uimm12s1:$offset)))]>;
3707defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
3708                     [(set GPR64:$Rt,
3709                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3710                                                    uimm12s1:$offset)))]>;
3711
3712// load sign-extended word
3713defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
3714                     [(set GPR64:$Rt,
3715                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
3716                                                      uimm12s4:$offset)))]>;
3717
3718// load zero-extended word
3719def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3720      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3721
3722// Pre-fetch.
3723def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
3724                        [(AArch64Prefetch timm:$Rt,
3725                                        (am_indexed64 GPR64sp:$Rn,
3726                                                      uimm12s8:$offset))]>;
3727
3728def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
3729
3730//---
3731// (literal)
3732
3733def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
3734  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
3735    const DataLayout &DL = MF->getDataLayout();
3736    Align Align = G->getGlobal()->getPointerAlignment(DL);
3737    return Align >= 4 && G->getOffset() % 4 == 0;
3738  }
3739  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
3740    return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3741  return false;
3742}]>;
3743
3744def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
3745  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3746def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
3747  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3748let Predicates = [HasFPARMv8] in {
3749def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3750  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3751def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3752  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3753def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3754  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3755}
3756
3757// load sign-extended word
3758def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
3759  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
3760
3761let AddedComplexity = 20 in {
3762def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
3763        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
3764}
3765
3766// prefetch
3767def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
3768//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
3769
3770//---
3771// (unscaled immediate)
3772defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
3773                    [(set GPR64z:$Rt,
3774                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3775defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
3776                    [(set GPR32z:$Rt,
3777                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3778let Predicates = [HasFPARMv8] in {
3779defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3780                    [(set FPR8Op:$Rt,
3781                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3782defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3783                    [(set (f16 FPR16Op:$Rt),
3784                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3785defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3786                    [(set (f32 FPR32Op:$Rt),
3787                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3788defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3789                    [(set (f64 FPR64Op:$Rt),
3790                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3791defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3792                    [(set (f128 FPR128Op:$Rt),
3793                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
3794}
3795
3796defm LDURHH
3797    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
3798             [(set GPR32:$Rt,
3799                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3800defm LDURBB
3801    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
3802             [(set GPR32:$Rt,
3803                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3804
3805// bf16 load pattern
3806def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3807           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
3808
3809// Match all load 64 bits width whose type is compatible with FPR64
3810let Predicates = [IsLE] in {
3811  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3812            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3813  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3814            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3815  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3816            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3817  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3818            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3819  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3820            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3821}
3822def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3823          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3824def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3825          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3826
3827// Match all load 128 bits width whose type is compatible with FPR128
3828let Predicates = [IsLE] in {
3829  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3830            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3831  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3832            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3833  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3834            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3835  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3836            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3837  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3838            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3839  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3840            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3841  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3842            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3843}
3844
3845//  anyext -> zext
3846def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3847          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3848def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3849          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3850def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3851          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3852def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3853    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3854def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3855    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3856def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3857    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3858def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3859    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3860// unscaled zext
3861def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3862          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3863def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3864          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3865def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3866          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3867def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3868    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3869def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3870    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3871def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3872    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3873def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3874    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3875
3876
3877//---
3878// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3879
3880// Define new assembler match classes as we want to only match these when
3881// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
3882// associate a DiagnosticType either, as we want the diagnostic for the
3883// canonical form (the scaled operand) to take precedence.
3884class SImm9OffsetOperand<int Width> : AsmOperandClass {
3885  let Name = "SImm9OffsetFB" # Width;
3886  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
3887  let RenderMethod = "addImmOperands";
3888}
3889
3890def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
3891def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
3892def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
3893def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
3894def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
3895
3896def simm9_offset_fb8 : Operand<i64> {
3897  let ParserMatchClass = SImm9OffsetFB8Operand;
3898}
3899def simm9_offset_fb16 : Operand<i64> {
3900  let ParserMatchClass = SImm9OffsetFB16Operand;
3901}
3902def simm9_offset_fb32 : Operand<i64> {
3903  let ParserMatchClass = SImm9OffsetFB32Operand;
3904}
3905def simm9_offset_fb64 : Operand<i64> {
3906  let ParserMatchClass = SImm9OffsetFB64Operand;
3907}
3908def simm9_offset_fb128 : Operand<i64> {
3909  let ParserMatchClass = SImm9OffsetFB128Operand;
3910}
3911
3912def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3913                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3914def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3915                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3916let Predicates = [HasFPARMv8] in {
3917def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3918                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3919def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3920                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3921def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3922                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3923def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3924                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3925def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3926               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3927}
3928
3929// zextload -> i64
3930def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3931  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3932def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3933  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3934
3935// load sign-extended half-word
3936defm LDURSHW
3937    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
3938               [(set GPR32:$Rt,
3939                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3940defm LDURSHX
3941    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
3942              [(set GPR64:$Rt,
3943                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3944
3945// load sign-extended byte
3946defm LDURSBW
3947    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
3948                [(set GPR32:$Rt,
3949                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3950defm LDURSBX
3951    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
3952                [(set GPR64:$Rt,
3953                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3954
3955// load sign-extended word
3956defm LDURSW
3957    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3958              [(set GPR64:$Rt,
3959                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3960
3961// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3962def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3963                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3964def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3965                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3966def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3967                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3968def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3969                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3970def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3971                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3972def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3973                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3974def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3975                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3976
3977// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros, load, 0)
3978// can use a single load. Same for scalar_to_vector(load) or insert(undef, load, 0).
3979multiclass LoadInsertVTPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType ScalarVT,
3980                                Instruction LoadInst, Instruction UnscaledLoadInst,
3981                                Instruction ROWLoadInst, Instruction ROXLoadInst,
3982                                ROAddrMode ro, ComplexPattern Addr, ComplexPattern UnscaledAddr,
3983                                Operand AddrImm, SubRegIndex SubReg> {
3984  // Scaled
3985  def : Pat <(vector_insert (VT immAllZerosV),
3986                (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3987            (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3988  // Unscaled
3989  def : Pat <(vector_insert (VT immAllZerosV),
3990                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3991             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3992  // roW
3993  def : Pat <(vector_insert (VT immAllZerosV),
3994                 (ScalarVT (LoadOp (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), (i64 0)),
3995             (SUBREG_TO_REG (i64 0), (ROWLoadInst GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), SubReg)>;
3996  // roX
3997  def : Pat <(vector_insert (VT immAllZerosV),
3998                 (ScalarVT (LoadOp (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), (i64 0)),
3999             (SUBREG_TO_REG (i64 0), (ROXLoadInst GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), SubReg)>;
4000
4001  // Undef equivalents of the patterns above.
4002  def : Pat <(VT (vec_ins_or_scal_vec
4003                (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))))),
4004            (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
4005  def : Pat <(VT (vec_ins_or_scal_vec
4006                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))))),
4007             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
4008  def : Pat <(VT (vec_ins_or_scal_vec
4009                 (ScalarVT (LoadOp (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))))),
4010             (SUBREG_TO_REG (i64 0), (ROWLoadInst GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), SubReg)>;
4011  def : Pat <(VT (vec_ins_or_scal_vec
4012                 (ScalarVT (LoadOp (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))))),
4013             (SUBREG_TO_REG (i64 0), (ROXLoadInst GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), SubReg)>;
4014}
4015
4016multiclass LoadInsertPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
4017                              ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
4018                              Instruction ROWLoadInst, Instruction ROXLoadInst,
4019                              ROAddrMode ro, ComplexPattern Addr, ComplexPattern UnscaledAddr,
4020                              Operand AddrImm, SubRegIndex SubReg> {
4021  defm : LoadInsertVTPatterns<LoadOp, VT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
4022                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
4023  defm : LoadInsertVTPatterns<LoadOp, HVT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
4024                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
4025  defm : LoadInsertVTPatterns<LoadOp, SVT, ScalarVT, LoadInst, UnscaledLoadInst, ROWLoadInst,
4026                              ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
4027}
4028
4029defm : LoadInsertPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,
4030                          LDRBui, LDURBi, LDRBroW, LDRBroX,
4031                          ro8, am_indexed8,  am_unscaled8,  uimm12s1, bsub>;
4032defm : LoadInsertPatterns<extloadi16, v8i16,  v4i16,  nxv8i16,  i32,
4033                          LDRHui, LDURHi, LDRHroW, LDRHroX,
4034                          ro16, am_indexed16, am_unscaled16, uimm12s2, hsub>;
4035defm : LoadInsertPatterns<load,       v4i32,  v2i32,  nxv4i32,  i32,
4036                          LDRSui, LDURSi, LDRSroW, LDRSroX,
4037                          ro32, am_indexed32, am_unscaled32, uimm12s4, ssub>;
4038defm : LoadInsertPatterns<load,       v2i64,  isVoid, nxv2i64,  i64,
4039                          LDRDui, LDURDi, LDRDroW, LDRDroX,
4040                          ro64, am_indexed64, am_unscaled64, uimm12s8, dsub>;
4041defm : LoadInsertPatterns<load,       v8f16,  v4f16,  nxv8f16,  f16,
4042                          LDRHui, LDURHi, LDRHroW, LDRHroX,
4043                          ro16, am_indexed16, am_unscaled16, uimm12s2, hsub>;
4044defm : LoadInsertPatterns<load,       v8bf16, v4bf16, nxv8bf16, bf16,
4045                          LDRHui, LDURHi, LDRHroW, LDRHroX,
4046                          ro16, am_indexed16, am_unscaled16, uimm12s2, hsub>;
4047defm : LoadInsertPatterns<load,       v4f32,  v2f32,  nxv4f32,  f32,
4048                          LDRSui, LDURSi, LDRSroW, LDRSroX,
4049                          ro32, am_indexed32, am_unscaled32, uimm12s4, ssub>;
4050defm : LoadInsertPatterns<load,       v2f64,  isVoid, nxv2f64,  f64,
4051                          LDRDui, LDURDi, LDRDroW, LDRDroX,
4052                          ro64, am_indexed64, am_unscaled64, uimm12s8, dsub>;
4053
4054// Extra patterns for v1f64 scalar_to_vector(load), which need to avoid the
4055// SUBREG_TO_REG used above.
4056def : Pat <(v1i64 (scalar_to_vector (i64
4057               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
4058           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
4059def : Pat <(v1i64 (scalar_to_vector (i64
4060               (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
4061           (LDURDi GPR64sp:$Rn, simm9:$offset)>;
4062def : Pat <(v1i64 (scalar_to_vector (i64
4063               (load (ro64.Wpat GPR64sp:$Rn, GPR32:$Rm, ro64.Wext:$extend))))),
4064           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro64.Wext:$extend)>;
4065def : Pat <(v1i64 (scalar_to_vector (i64
4066               (load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))),
4067           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>;
4068
4069// Pre-fetch.
4070defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
4071                  [(AArch64Prefetch timm:$Rt,
4072                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4073
4074//---
4075// (unscaled immediate, unprivileged)
4076defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
4077defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
4078
4079defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
4080defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
4081
4082// load sign-extended half-word
4083defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
4084defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
4085
4086// load sign-extended byte
4087defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
4088defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
4089
4090// load sign-extended word
4091defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
4092
4093//---
4094// (immediate pre-indexed)
4095def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
4096def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
4097let Predicates = [HasFPARMv8] in {
4098def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
4099def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
4100def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
4101def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
4102def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
4103}
4104
4105// load sign-extended half-word
4106def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
4107def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
4108
4109// load sign-extended byte
4110def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
4111def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
4112
4113// load zero-extended byte
4114def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
4115def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
4116
4117// load sign-extended word
4118def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
4119
4120//---
4121// (immediate post-indexed)
4122def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
4123def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
4124let Predicates = [HasFPARMv8] in {
4125def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
4126def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
4127def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
4128def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
4129def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
4130}
4131
4132// load sign-extended half-word
4133def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
4134def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
4135
4136// load sign-extended byte
4137def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
4138def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
4139
4140// load zero-extended byte
4141def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
4142def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
4143
4144// load sign-extended word
4145def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
4146
4147//===----------------------------------------------------------------------===//
4148// Store instructions.
4149//===----------------------------------------------------------------------===//
4150
4151// Pair (indexed, offset)
4152// FIXME: Use dedicated range-checked addressing mode operand here.
4153defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
4154defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
4155let Predicates = [HasFPARMv8] in {
4156defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
4157defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
4158defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
4159}
4160
4161// Pair (pre-indexed)
4162def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
4163def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
4164let Predicates = [HasFPARMv8] in {
4165def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
4166def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
4167def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
4168}
4169
4170// Pair (post-indexed)
4171def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
4172def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
4173let Predicates = [HasFPARMv8] in {
4174def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
4175def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
4176def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
4177}
4178
4179// Pair (no allocate)
4180defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
4181defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
4182let Predicates = [HasFPARMv8] in {
4183defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
4184defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
4185defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
4186}
4187
4188// Armv9.6-a Load/store pair (FEAT_LSUI)
4189let Predicates = [HasLSUI] in {
4190  defm LDTP    : LoadPairOffset<0b11, 0, GPR64z, simm7s8, "ldtp">;
4191  def LDTPpre  : LoadPairPreIdx<0b11, 0, GPR64z, simm7s8, "ldtp">;
4192  def LDTPpost : LoadPairPostIdx<0b11, 0, GPR64z, simm7s8, "ldtp">;
4193
4194  defm STTNPX : StorePairNoAllocLSUI<0b11, 0, GPR64z, simm7s8, "sttnp">;
4195  defm LDTNPX : LoadPairNoAllocLSUI<0b11, 0, GPR64z, simm7s8, "ldtnp">;
4196
4197  defm STTP    : StorePairOffset<0b11, 0, GPR64z, simm7s8, "sttp">;
4198  def STTPpre  : StorePairPreIdx<0b11, 0, GPR64z, simm7s8, "sttp">;
4199  def STTPpost : StorePairPostIdx<0b11, 0, GPR64z, simm7s8, "sttp">;
4200}
4201
4202let Predicates = [HasLSUI, HasNEON] in {
4203  defm LDTPQ    : LoadPairOffset<0b11, 1, FPR128Op, simm7s16, "ldtp">;
4204  def LDTPQpre  : LoadPairPreIdx<0b11, 1, FPR128Op, simm7s16, "ldtp">;
4205  def LDTPQpost : LoadPairPostIdx<0b11, 1, FPR128Op, simm7s16, "ldtp">;
4206
4207  defm STTNPQ : StorePairNoAllocLSUI<0b11, 1, FPR128Op, simm7s16, "sttnp">;
4208  defm LDTNPQ : LoadPairNoAllocLSUI<0b11, 1, FPR128Op, simm7s16, "ldtnp">;
4209
4210  defm STTPQ    : StorePairOffset<0b11, 1, FPR128Op, simm7s16, "sttp">;
4211  def STTPQpre  : StorePairPreIdx<0b11, 1, FPR128Op, simm7s16, "sttp">;
4212  def STTPQpost : StorePairPostIdx<0b11, 1, FPR128Op, simm7s16, "sttp">;
4213}
4214
4215def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
4216          (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
4217
4218def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
4219          (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
4220
4221
4222//---
4223// (Register offset)
4224
4225// Integer
4226defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
4227defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
4228defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
4229defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
4230
4231
4232// Floating-point
4233let Predicates = [HasFPARMv8] in {
4234defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", i8, store>;
4235defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
4236defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
4237defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
4238defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
4239}
4240
4241let Predicates = [UseSTRQro], AddedComplexity = 10 in {
4242  def : Pat<(store (f128 FPR128:$Rt),
4243                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
4244                                        ro_Wextend128:$extend)),
4245            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
4246  def : Pat<(store (f128 FPR128:$Rt),
4247                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
4248                                        ro_Xextend128:$extend)),
4249            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
4250}
4251
4252multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
4253                                 Instruction STRW, Instruction STRX> {
4254
4255  def : Pat<(storeop GPR64:$Rt,
4256                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
4257            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
4258                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
4259
4260  def : Pat<(storeop GPR64:$Rt,
4261                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
4262            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
4263                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
4264}
4265
4266let AddedComplexity = 10 in {
4267  // truncstore i64
4268  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
4269  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
4270  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
4271}
4272
4273multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
4274                         Instruction STRW, Instruction STRX> {
4275  def : Pat<(store (VecTy FPR:$Rt),
4276                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
4277            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
4278
4279  def : Pat<(store (VecTy FPR:$Rt),
4280                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
4281            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
4282}
4283
4284let AddedComplexity = 10 in {
4285// Match all store 64 bits width whose type is compatible with FPR64
4286let Predicates = [IsLE] in {
4287  // We must use ST1 to store vectors in big-endian.
4288  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
4289  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
4290  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
4291  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
4292  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
4293  defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
4294}
4295
4296defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
4297defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
4298
4299// Match all store 128 bits width whose type is compatible with FPR128
4300let Predicates = [IsLE, UseSTRQro] in {
4301  // We must use ST1 to store vectors in big-endian.
4302  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
4303  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
4304  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
4305  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
4306  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
4307  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
4308  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
4309  defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
4310}
4311} // AddedComplexity = 10
4312
4313// Match stores from lane 0 to the appropriate subreg's store.
4314multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
4315                              ValueType VecTy, ValueType STy,
4316                              ValueType SubRegTy,
4317                              SubRegIndex SubRegIdx,
4318                              Instruction STRW, Instruction STRX> {
4319
4320  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
4321                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
4322            (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4323                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
4324
4325  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
4326                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
4327            (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4328                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
4329}
4330
4331let AddedComplexity = 19 in {
4332  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
4333  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
4334  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
4335  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
4336  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
4337  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
4338}
4339
4340//---
4341// (unsigned immediate)
4342defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
4343                   [(store GPR64z:$Rt,
4344                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
4345defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
4346                    [(store GPR32z:$Rt,
4347                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
4348let Predicates = [HasFPARMv8] in {
4349defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
4350                    [(store FPR8Op:$Rt,
4351                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
4352defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
4353                    [(store (f16 FPR16Op:$Rt),
4354                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
4355defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
4356                    [(store (f32 FPR32Op:$Rt),
4357                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
4358defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
4359                    [(store (f64 FPR64Op:$Rt),
4360                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
4361defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
4362}
4363
4364defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
4365                     [(truncstorei16 GPR32z:$Rt,
4366                                     (am_indexed16 GPR64sp:$Rn,
4367                                                   uimm12s2:$offset))]>;
4368defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
4369                     [(truncstorei8 GPR32z:$Rt,
4370                                    (am_indexed8 GPR64sp:$Rn,
4371                                                 uimm12s1:$offset))]>;
4372
4373// bf16 store pattern
4374def : Pat<(store (bf16 FPR16Op:$Rt),
4375                 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
4376          (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
4377
4378let AddedComplexity = 10 in {
4379
4380// Match all store 64 bits width whose type is compatible with FPR64
4381def : Pat<(store (v1i64 FPR64:$Rt),
4382                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4383          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4384def : Pat<(store (v1f64 FPR64:$Rt),
4385                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4386          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4387
4388let Predicates = [IsLE] in {
4389  // We must use ST1 to store vectors in big-endian.
4390  def : Pat<(store (v2f32 FPR64:$Rt),
4391                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4392            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4393  def : Pat<(store (v8i8 FPR64:$Rt),
4394                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4395            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4396  def : Pat<(store (v4i16 FPR64:$Rt),
4397                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4398            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4399  def : Pat<(store (v2i32 FPR64:$Rt),
4400                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4401            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4402  def : Pat<(store (v4f16 FPR64:$Rt),
4403                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4404            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4405  def : Pat<(store (v4bf16 FPR64:$Rt),
4406                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4407            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4408}
4409
4410// Match all store 128 bits width whose type is compatible with FPR128
4411def : Pat<(store (f128  FPR128:$Rt),
4412                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4413          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4414
4415let Predicates = [IsLE] in {
4416  // We must use ST1 to store vectors in big-endian.
4417  def : Pat<(store (v4f32 FPR128:$Rt),
4418                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4419            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4420  def : Pat<(store (v2f64 FPR128:$Rt),
4421                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4422            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4423  def : Pat<(store (v16i8 FPR128:$Rt),
4424                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4425            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4426  def : Pat<(store (v8i16 FPR128:$Rt),
4427                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4428            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4429  def : Pat<(store (v4i32 FPR128:$Rt),
4430                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4431            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4432  def : Pat<(store (v2i64 FPR128:$Rt),
4433                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4434            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4435  def : Pat<(store (v8f16 FPR128:$Rt),
4436                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4437            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4438  def : Pat<(store (v8bf16 FPR128:$Rt),
4439                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4440            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4441}
4442
4443// truncstore i64
4444def : Pat<(truncstorei32 GPR64:$Rt,
4445                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
4446  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
4447def : Pat<(truncstorei16 GPR64:$Rt,
4448                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
4449  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
4450def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
4451  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
4452
4453} // AddedComplexity = 10
4454
4455// Match stores from lane 0 to the appropriate subreg's store.
4456multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
4457                            ValueType VTy, ValueType STy,
4458                            ValueType SubRegTy,
4459                            SubRegIndex SubRegIdx, Operand IndexType,
4460                            Instruction STR> {
4461  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))),
4462                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
4463            (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4464                 GPR64sp:$Rn, IndexType:$offset)>;
4465}
4466
4467let AddedComplexity = 19 in {
4468  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
4469  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
4470  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
4471  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
4472  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
4473  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
4474}
4475
4476//---
4477// (unscaled immediate)
4478defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
4479                         [(store GPR64z:$Rt,
4480                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4481defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
4482                         [(store GPR32z:$Rt,
4483                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4484let Predicates = [HasFPARMv8] in {
4485defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4486                         [(store FPR8Op:$Rt,
4487                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4488defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
4489                         [(store (f16 FPR16Op:$Rt),
4490                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4491defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
4492                         [(store (f32 FPR32Op:$Rt),
4493                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4494defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
4495                         [(store (f64 FPR64Op:$Rt),
4496                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4497defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
4498                         [(store (f128 FPR128Op:$Rt),
4499                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
4500}
4501defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
4502                         [(truncstorei16 GPR32z:$Rt,
4503                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4504defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
4505                         [(truncstorei8 GPR32z:$Rt,
4506                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4507
4508// bf16 store pattern
4509def : Pat<(store (bf16 FPR16Op:$Rt),
4510                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4511          (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4512
4513// Armv8.4 Weaker Release Consistency enhancements
4514//         LDAPR & STLR with Immediate Offset instructions
4515let Predicates = [HasRCPC_IMMO] in {
4516defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
4517defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
4518defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
4519defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
4520defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
4521defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
4522defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
4523defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
4524defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
4525defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
4526defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
4527defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
4528defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
4529}
4530
4531// Match all store 64 bits width whose type is compatible with FPR64
4532def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4533          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4534def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4535          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4536
4537let AddedComplexity = 10 in {
4538
4539let Predicates = [IsLE] in {
4540  // We must use ST1 to store vectors in big-endian.
4541  def : Pat<(store (v2f32 FPR64:$Rt),
4542                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4543            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4544  def : Pat<(store (v8i8 FPR64:$Rt),
4545                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4546            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4547  def : Pat<(store (v4i16 FPR64:$Rt),
4548                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4549            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4550  def : Pat<(store (v2i32 FPR64:$Rt),
4551                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4552            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4553  def : Pat<(store (v4f16 FPR64:$Rt),
4554                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4555            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4556  def : Pat<(store (v4bf16 FPR64:$Rt),
4557                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4558            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4559}
4560
4561// Match all store 128 bits width whose type is compatible with FPR128
4562def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4563          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4564
4565let Predicates = [IsLE] in {
4566  // We must use ST1 to store vectors in big-endian.
4567  def : Pat<(store (v4f32 FPR128:$Rt),
4568                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4569            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4570  def : Pat<(store (v2f64 FPR128:$Rt),
4571                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4572            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4573  def : Pat<(store (v16i8 FPR128:$Rt),
4574                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4575            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4576  def : Pat<(store (v8i16 FPR128:$Rt),
4577                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4578            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4579  def : Pat<(store (v4i32 FPR128:$Rt),
4580                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4581            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4582  def : Pat<(store (v2i64 FPR128:$Rt),
4583                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4584            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4585  def : Pat<(store (v2f64 FPR128:$Rt),
4586                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4587            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4588  def : Pat<(store (v8f16 FPR128:$Rt),
4589                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4590            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4591  def : Pat<(store (v8bf16 FPR128:$Rt),
4592                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4593            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4594}
4595
4596} // AddedComplexity = 10
4597
4598// unscaled i64 truncating stores
4599def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
4600  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4601def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4602  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4603def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4604  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4605
4606// Match stores from lane 0 to the appropriate subreg's store.
4607multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
4608                             ValueType VTy, ValueType STy,
4609                             ValueType SubRegTy,
4610                             SubRegIndex SubRegIdx, Instruction STR> {
4611  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
4612}
4613
4614let AddedComplexity = 19 in {
4615  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
4616  defm : VecStoreULane0Pat<store,         v8f16, f16, f16, hsub, STURHi>;
4617  defm : VecStoreULane0Pat<store,         v4i32, i32, i32, ssub, STURSi>;
4618  defm : VecStoreULane0Pat<store,         v4f32, f32, i32, ssub, STURSi>;
4619  defm : VecStoreULane0Pat<store,         v2i64, i64, i64, dsub, STURDi>;
4620  defm : VecStoreULane0Pat<store,         v2f64, f64, i64, dsub, STURDi>;
4621}
4622
4623//---
4624// STR mnemonics fall back to STUR for negative or unaligned offsets.
4625def : InstAlias<"str $Rt, [$Rn, $offset]",
4626                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4627def : InstAlias<"str $Rt, [$Rn, $offset]",
4628                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4629let Predicates = [HasFPARMv8] in {
4630def : InstAlias<"str $Rt, [$Rn, $offset]",
4631                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4632def : InstAlias<"str $Rt, [$Rn, $offset]",
4633                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4634def : InstAlias<"str $Rt, [$Rn, $offset]",
4635                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4636def : InstAlias<"str $Rt, [$Rn, $offset]",
4637                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4638def : InstAlias<"str $Rt, [$Rn, $offset]",
4639                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
4640}
4641
4642def : InstAlias<"strb $Rt, [$Rn, $offset]",
4643                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4644def : InstAlias<"strh $Rt, [$Rn, $offset]",
4645                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4646
4647//---
4648// (unscaled immediate, unprivileged)
4649defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
4650defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
4651
4652defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
4653defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
4654
4655//---
4656// (immediate pre-indexed)
4657def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
4658def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
4659let Predicates = [HasFPARMv8] in {
4660def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, i8>;
4661def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
4662def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
4663def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
4664def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4665}
4666
4667def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
4668def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
4669
4670// bf16 pre-index store
4671def : Pat<(pre_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4672          (STRHpre FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4673
4674// truncstore i64
4675def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4676  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4677           simm9:$off)>;
4678def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4679  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4680            simm9:$off)>;
4681def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4682  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4683            simm9:$off)>;
4684
4685def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4686          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4687def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4688          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4689def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4690          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4691def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4692          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4693def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4694          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4695def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4696          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4697def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4698          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4699def : Pat<(pre_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4700          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4701
4702def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4703          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4704def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4705          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4706def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4707          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4708def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4709          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4710def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4711          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4712def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4713          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4714def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4715          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4716def : Pat<(pre_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4717          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4718
4719//---
4720// (immediate post-indexed)
4721def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
4722def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
4723let Predicates = [HasFPARMv8] in {
4724def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, i8>;
4725def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
4726def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
4727def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
4728def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4729}
4730
4731def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
4732def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
4733
4734// truncstore i64
4735def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4736  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4737            simm9:$off)>;
4738def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4739  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4740             simm9:$off)>;
4741def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4742  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4743             simm9:$off)>;
4744
4745def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4746          (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4747
4748def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4749          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4750def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4751          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4752def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4753          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4754def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4755          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4756def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4757          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4758def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4759          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4760def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4761          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4762def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4763          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4764
4765def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4766          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4767def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4768          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4769def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4770          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4771def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4772          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4773def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4774          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4775def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4776          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4777def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4778          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4779def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4780          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4781
4782//===----------------------------------------------------------------------===//
4783// Load/store exclusive instructions.
4784//===----------------------------------------------------------------------===//
4785
4786def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4787def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4788def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4789def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4790
4791def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4792def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4793def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4794def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4795
4796def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4797def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4798def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4799def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4800
4801def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4802def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4803def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4804def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4805
4806/*
4807Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4808of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4809alias for the case of immediate #0. This is because new STLR versions (from
4810LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4811appropriate anymore (it parses and discards the optional zero). This is not the
4812case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4813and the immediate values are not inside the [] brackets and thus not accepted
4814by GPR64sp0 parser.
4815*/
4816def STLRW0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4817def STLRX0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4818def STLRB0  : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4819def STLRH0  : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4820
4821def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4822def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4823def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4824def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4825
4826def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
4827def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
4828def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
4829def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
4830
4831def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4832def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4833
4834def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4835def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4836
4837def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4838def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4839
4840def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4841def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4842
4843let Predicates = [HasLOR] in {
4844  // v8.1a "Limited Order Region" extension load-acquire instructions
4845  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4846  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4847  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4848  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4849
4850  // v8.1a "Limited Order Region" extension store-release instructions
4851  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4852  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4853  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4854  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4855
4856  // Aliases for when offset=0
4857  def STLLRW0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4858  def STLLRX0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4859  def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]",  (STLLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4860  def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]",  (STLLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4861}
4862
4863// v9.6-a Unprivileged load store operations
4864let Predicates = [HasLSUI] in {
4865defm LDTXRW : LoadUnprivilegedLSUI<0b10, GPR32, "ldtxr">;
4866defm LDTXRX : LoadUnprivilegedLSUI<0b11, GPR64, "ldtxr">;
4867
4868def : MnemonicAlias<"ldxr", "ldtxr">;
4869
4870def LDATXRW : LoadExclusiveLSUI <0b10, 1, 1, GPR32, "ldatxr">;
4871def LDATXRX : LoadExclusiveLSUI <0b11, 1, 1, GPR64, "ldatxr">;
4872
4873def : MnemonicAlias<"ldaxr", "ldatxr">;
4874
4875defm STTXRW : StoreUnprivilegedLSUI<0b10, GPR32, "sttxr">;
4876defm STTXRX : StoreUnprivilegedLSUI<0b11, GPR64, "sttxr">;
4877
4878def : MnemonicAlias<"stxr", "sttxr">;
4879
4880def STLTXRW : StoreExclusiveLSUI<0b10, 0, 1, GPR32, "stltxr">;
4881def STLTXRX : StoreExclusiveLSUI<0b11, 0, 1, GPR64, "stltxr">;
4882
4883def : MnemonicAlias<"stlxr", "stltxr">;
4884}
4885
4886//===----------------------------------------------------------------------===//
4887// Scaled floating point to integer conversion instructions.
4888//===----------------------------------------------------------------------===//
4889
4890defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
4891defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
4892defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
4893defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
4894defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
4895defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
4896defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
4897defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
4898defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4899defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4900defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4901defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4902
4903let Predicates = [HasNEON, HasFPRCVT] in{
4904  defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
4905  defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
4906  defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
4907  defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
4908  defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
4909  defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
4910  defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
4911  defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
4912  defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
4913  defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
4914}
4915
4916// AArch64's FCVT instructions saturate when out of range.
4917multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
4918  let Predicates = [HasFullFP16] in {
4919  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
4920            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4921  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
4922            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4923  }
4924  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
4925            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4926  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
4927            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4928  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
4929            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4930  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
4931            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4932
4933  let Predicates = [HasFullFP16] in {
4934  def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
4935            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4936  def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
4937            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4938  }
4939  def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
4940            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4941  def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
4942            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4943  def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
4944            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4945  def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
4946            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4947
4948  let Predicates = [HasFullFP16] in {
4949  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
4950            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4951  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
4952            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4953  }
4954  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
4955            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4956  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
4957            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4958  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
4959            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4960  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
4961            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4962
4963  let Predicates = [HasFullFP16] in {
4964  def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4965            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4966  def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4967            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4968  }
4969  def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4970            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4971  def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4972            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4973  def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4974            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4975  def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4976            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4977}
4978
4979defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
4980defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
4981
4982multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
4983  let Predicates = [HasFullFP16] in {
4984  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
4985  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
4986  }
4987  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
4988  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
4989  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
4990  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
4991
4992  let Predicates = [HasFullFP16] in {
4993  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4994            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4995  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4996            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4997  }
4998  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4999            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
5000  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
5001            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
5002  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
5003            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
5004  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
5005            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
5006}
5007
5008defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
5009defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
5010
5011multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
5012  def : Pat<(i32 (to_int (round f32:$Rn))),
5013            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
5014  def : Pat<(i64 (to_int (round f32:$Rn))),
5015            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
5016  def : Pat<(i32 (to_int (round f64:$Rn))),
5017            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
5018  def : Pat<(i64 (to_int (round f64:$Rn))),
5019            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
5020
5021  // These instructions saturate like fp_to_[su]int_sat.
5022  let Predicates = [HasFullFP16] in {
5023  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
5024            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
5025  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
5026            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
5027  }
5028  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
5029            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
5030  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
5031            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
5032  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
5033            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
5034  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
5035            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
5036}
5037
5038defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
5039defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
5040defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
5041defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
5042defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
5043defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
5044defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
5045defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
5046
5047
5048
5049let Predicates = [HasFullFP16] in {
5050  def : Pat<(i32 (any_lround f16:$Rn)),
5051            (FCVTASUWHr f16:$Rn)>;
5052  def : Pat<(i64 (any_lround f16:$Rn)),
5053            (FCVTASUXHr f16:$Rn)>;
5054  def : Pat<(i64 (any_llround f16:$Rn)),
5055            (FCVTASUXHr f16:$Rn)>;
5056}
5057def : Pat<(i32 (any_lround f32:$Rn)),
5058          (FCVTASUWSr f32:$Rn)>;
5059def : Pat<(i32 (any_lround f64:$Rn)),
5060          (FCVTASUWDr f64:$Rn)>;
5061def : Pat<(i64 (any_lround f32:$Rn)),
5062          (FCVTASUXSr f32:$Rn)>;
5063def : Pat<(i64 (any_lround f64:$Rn)),
5064          (FCVTASUXDr f64:$Rn)>;
5065def : Pat<(i64 (any_llround f32:$Rn)),
5066          (FCVTASUXSr f32:$Rn)>;
5067def : Pat<(i64 (any_llround f64:$Rn)),
5068          (FCVTASUXDr f64:$Rn)>;
5069
5070//===----------------------------------------------------------------------===//
5071// Scaled integer to floating point conversion instructions.
5072//===----------------------------------------------------------------------===//
5073
5074defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
5075defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
5076
5077let Predicates = [HasNEON, HasFPRCVT] in {
5078  defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
5079  defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
5080}
5081
5082def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
5083          (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
5084def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
5085          (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
5086def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
5087          (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
5088
5089def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
5090          (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
5091def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
5092          (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
5093def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
5094          (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
5095
5096def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
5097          (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
5098def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
5099          (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
5100def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
5101          (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
5102
5103def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
5104          (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
5105def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
5106          (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
5107def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
5108          (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
5109
5110//===----------------------------------------------------------------------===//
5111// Unscaled integer to floating point conversion instruction.
5112//===----------------------------------------------------------------------===//
5113
5114defm FMOV : UnscaledConversion<"fmov">;
5115
5116// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
5117let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1,
5118    Predicates = [HasFPARMv8] in {
5119def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
5120    Sched<[WriteF]>;
5121def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
5122    Sched<[WriteF]>;
5123def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
5124    Sched<[WriteF]>;
5125}
5126
5127// Similarly add aliases
5128def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
5129    Requires<[HasFullFP16]>;
5130let Predicates = [HasFPARMv8] in {
5131def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
5132def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
5133}
5134
5135def : Pat<(bf16 fpimm0),
5136          (FMOVH0)>;
5137
5138// Pattern for FP16 and BF16 immediates
5139let Predicates = [HasFullFP16] in {
5140  def : Pat<(f16 fpimm:$in),
5141            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
5142
5143  def : Pat<(bf16 fpimm:$in),
5144            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>;
5145}
5146
5147//===----------------------------------------------------------------------===//
5148// Floating point conversion instruction.
5149//===----------------------------------------------------------------------===//
5150
5151defm FCVT : FPConversion<"fcvt">;
5152
5153//===----------------------------------------------------------------------===//
5154// Floating point single operand instructions.
5155//===----------------------------------------------------------------------===//
5156
5157defm FABS   : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
5158defm FMOV   : SingleOperandFPDataNoException<0b0000, "fmov">;
5159defm FNEG   : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
5160defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
5161defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
5162defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
5163defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
5164defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
5165
5166defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
5167defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
5168
5169let SchedRW = [WriteFDiv] in {
5170defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
5171}
5172
5173let Predicates = [HasFRInt3264] in {
5174  defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
5175  defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
5176  defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
5177  defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
5178} // HasFRInt3264
5179
5180// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
5181def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
5182          (FRINT32ZDr FPR64:$Rn)>;
5183def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
5184          (FRINT64ZDr FPR64:$Rn)>;
5185def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
5186          (FRINT32XDr FPR64:$Rn)>;
5187def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
5188          (FRINT64XDr FPR64:$Rn)>;
5189
5190// Emitting strict_lrint as two instructions is valid as any exceptions that
5191// occur will happen in exactly one of the instructions (e.g. if the input is
5192// not an integer the inexact exception will happen in the FRINTX but not then
5193// in the FCVTZS as the output of FRINTX is an integer).
5194let Predicates = [HasFullFP16] in {
5195  def : Pat<(i32 (any_lrint f16:$Rn)),
5196            (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
5197  def : Pat<(i64 (any_lrint f16:$Rn)),
5198            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
5199  def : Pat<(i64 (any_llrint f16:$Rn)),
5200            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
5201}
5202def : Pat<(i32 (any_lrint f32:$Rn)),
5203          (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
5204def : Pat<(i32 (any_lrint f64:$Rn)),
5205          (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
5206def : Pat<(i64 (any_lrint f32:$Rn)),
5207          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
5208def : Pat<(i64 (any_lrint f64:$Rn)),
5209          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
5210def : Pat<(i64 (any_llrint f32:$Rn)),
5211          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
5212def : Pat<(i64 (any_llrint f64:$Rn)),
5213          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
5214
5215//===----------------------------------------------------------------------===//
5216// Floating point two operand instructions.
5217//===----------------------------------------------------------------------===//
5218
5219defm FADD   : TwoOperandFPData<0b0010, "fadd", any_fadd>;
5220let SchedRW = [WriteFDiv] in {
5221defm FDIV   : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
5222}
5223defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
5224defm FMAX   : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
5225defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
5226defm FMIN   : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
5227let SchedRW = [WriteFMul] in {
5228defm FMUL   : TwoOperandFPData<0b0000, "fmul", any_fmul>;
5229defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
5230}
5231defm FSUB   : TwoOperandFPData<0b0011, "fsub", any_fsub>;
5232
5233multiclass FMULScalarFromIndexedLane0Patterns<string inst,
5234                                              string inst_f16_suffix,
5235                                              string inst_f32_suffix,
5236                                              string inst_f64_suffix,
5237                                              SDPatternOperator OpNode,
5238                                              list<Predicate> preds = []> {
5239  let Predicates = !listconcat(preds, [HasFullFP16]) in {
5240  def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
5241                         (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
5242            (!cast<Instruction>(inst # inst_f16_suffix)
5243              FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>;
5244  }
5245  let Predicates = preds in {
5246  def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
5247                         (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
5248            (!cast<Instruction>(inst # inst_f32_suffix)
5249              FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
5250  def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
5251                         (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
5252            (!cast<Instruction>(inst # inst_f64_suffix)
5253              FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
5254  }
5255}
5256
5257defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
5258                                          any_fmul>;
5259
5260// Match reassociated forms of FNMUL.
5261def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
5262          (FNMULHrr FPR16:$a, FPR16:$b)>,
5263          Requires<[HasFullFP16]>;
5264def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)),
5265          (FNMULSrr FPR32:$a, FPR32:$b)>;
5266def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)),
5267          (FNMULDrr FPR64:$a, FPR64:$b)>;
5268
5269def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5270          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
5271def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5272          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
5273def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5274          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
5275def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5276          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
5277
5278def : Pat<(fminnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
5279          (FMINNMDrr FPR64:$a, FPR64:$b)>;
5280def : Pat<(fmaxnum_ieee (f64 FPR64:$a), (f64 FPR64:$b)),
5281          (FMAXNMDrr FPR64:$a, FPR64:$b)>;
5282def : Pat<(f64 (fcanonicalize f64:$a)),
5283          (FMINNMDrr   f64:$a, f64:$a)>;
5284def : Pat<(fminnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5285          (FMINNMSrr FPR32:$a, FPR32:$b)>;
5286def : Pat<(fmaxnum_ieee (f32 FPR32:$a), (f32 FPR32:$b)),
5287          (FMAXNMSrr FPR32:$a, FPR32:$b)>;
5288def : Pat<(f32 (fcanonicalize f32:$a)),
5289          (FMINNMSrr   f32:$a, f32:$a)>;
5290
5291let Predicates = [HasFullFP16] in {
5292def : Pat<(fminnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5293          (FMINNMHrr FPR16:$a, FPR16:$b)>;
5294def : Pat<(fmaxnum_ieee (f16 FPR16:$a), (f16 FPR16:$b)),
5295          (FMAXNMHrr FPR16:$a, FPR16:$b)>;
5296def : Pat<(f16 (fcanonicalize f16:$a)),
5297          (FMINNMHrr   f16:$a, f16:$a)>;
5298}
5299//===----------------------------------------------------------------------===//
5300// Floating point three operand instructions.
5301//===----------------------------------------------------------------------===//
5302
5303defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
5304defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
5305     TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
5306defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
5307     TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
5308defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
5309     TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
5310
5311// The following def pats catch the case where the LHS of an FMA is negated.
5312// The TriOpFrag above catches the case where the middle operand is negated.
5313
5314// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
5315// the NEON variant.
5316
5317// Here we handle first -(a + b*c) for FNMADD:
5318
5319let Predicates = [HasNEON, HasFullFP16] in
5320def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
5321          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
5322
5323def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
5324          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
5325
5326def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
5327          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5328
5329// Now it's time for "(-a) + (-b)*c"
5330
5331let Predicates = [HasNEON, HasFullFP16] in
5332def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
5333          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
5334
5335def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
5336          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
5337
5338def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
5339          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5340
5341//===----------------------------------------------------------------------===//
5342// Floating point comparison instructions.
5343//===----------------------------------------------------------------------===//
5344
5345defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
5346defm FCMP  : FPComparison<0, "fcmp", AArch64any_fcmp>;
5347
5348//===----------------------------------------------------------------------===//
5349// Floating point conditional comparison instructions.
5350//===----------------------------------------------------------------------===//
5351
5352defm FCCMPE : FPCondComparison<1, "fccmpe">;
5353defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
5354
5355//===----------------------------------------------------------------------===//
5356// Floating point conditional select instruction.
5357//===----------------------------------------------------------------------===//
5358
5359defm FCSEL : FPCondSelect<"fcsel">;
5360
5361let Predicates = [HasFullFP16] in
5362def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)),
5363          (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>;
5364
5365// CSEL instructions providing f128 types need to be handled by a
5366// pseudo-instruction since the eventual code will need to introduce basic
5367// blocks and control flow.
5368let Predicates = [HasFPARMv8] in
5369def F128CSEL : Pseudo<(outs FPR128:$Rd),
5370                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
5371                      [(set (f128 FPR128:$Rd),
5372                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
5373                                       (i32 imm:$cond), NZCV))]> {
5374  let Uses = [NZCV];
5375  let usesCustomInserter = 1;
5376  let hasNoSchedulingInfo = 1;
5377}
5378
5379//===----------------------------------------------------------------------===//
5380// Instructions used for emitting unwind opcodes on ARM64 Windows.
5381//===----------------------------------------------------------------------===//
5382let isPseudo = 1 in {
5383  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
5384  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
5385  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
5386  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5387  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5388  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5389  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5390  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5391  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5392  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5393  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5394  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
5395  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
5396  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
5397  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
5398  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
5399  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
5400  def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
5401  def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5402  def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5403}
5404
5405// Pseudo instructions for Windows EH
5406//===----------------------------------------------------------------------===//
5407let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
5408    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
5409   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret bb)]>, Sched<[]>;
5410   let usesCustomInserter = 1 in
5411     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
5412                    Sched<[]>;
5413}
5414
5415// Pseudo instructions for homogeneous prolog/epilog
5416let isPseudo = 1 in {
5417  // Save CSRs in order, {FPOffset}
5418  def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
5419  // Restore CSRs in order
5420  def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
5421}
5422
5423//===----------------------------------------------------------------------===//
5424// Floating point immediate move.
5425//===----------------------------------------------------------------------===//
5426
5427let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
5428defm FMOV : FPMoveImmediate<"fmov">;
5429}
5430
5431let Predicates = [HasFullFP16] in {
5432  def : Pat<(bf16 fpimmbf16:$in),
5433            (FMOVHi (fpimm16XForm bf16:$in))>;
5434}
5435
5436//===----------------------------------------------------------------------===//
5437// Advanced SIMD two vector instructions.
5438//===----------------------------------------------------------------------===//
5439
5440defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
5441                                          AArch64uabd>;
5442// Match UABDL in log2-shuffle patterns.
5443def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
5444                           (zext (v8i8 V64:$opB))))),
5445          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
5446def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
5447                           (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
5448          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
5449def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
5450                           (zext (v4i16 V64:$opB))))),
5451          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
5452def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
5453                           (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
5454          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
5455def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
5456                           (zext (v2i32 V64:$opB))))),
5457          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
5458def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
5459                           (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
5460          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
5461
5462defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
5463defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
5464defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
5465defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
5466defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
5467defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
5468defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
5469defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
5470defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
5471defm FABS   : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
5472
5473def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
5474          (CMLTv8i8rz V64:$Rn)>;
5475def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
5476          (CMLTv4i16rz V64:$Rn)>;
5477def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
5478          (CMLTv2i32rz V64:$Rn)>;
5479def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
5480          (CMLTv16i8rz V128:$Rn)>;
5481def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
5482          (CMLTv8i16rz V128:$Rn)>;
5483def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
5484          (CMLTv4i32rz V128:$Rn)>;
5485def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
5486          (CMLTv2i64rz V128:$Rn)>;
5487
5488defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5489defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5490defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5491defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5492defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5493defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
5494defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
5495defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
5496def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
5497          (FCVTLv4i16 V64:$Rn)>;
5498def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
5499                                                                (i64 4)))),
5500          (FCVTLv8i16 V128:$Rn)>;
5501def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
5502          (FCVTLv2i32 V64:$Rn)>;
5503def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
5504          (FCVTLv4i32 V128:$Rn)>;
5505def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
5506          (FCVTLv4i16 V64:$Rn)>;
5507def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
5508          (FCVTLv8i16 V128:$Rn)>;
5509
5510defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
5511defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
5512defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
5513defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
5514defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
5515def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
5516          (FCVTNv4i16 V128:$Rn)>;
5517def : Pat<(concat_vectors V64:$Rd,
5518                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
5519          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5520def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
5521          (FCVTNv2i32 V128:$Rn)>;
5522def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
5523          (FCVTNv4i16 V128:$Rn)>;
5524def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
5525          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5526def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
5527          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5528defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
5529defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
5530defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
5531                                        AArch64fcvtxnv>;
5532defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
5533defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
5534
5535// AArch64's FCVT instructions saturate when out of range.
5536multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
5537  let Predicates = [HasFullFP16] in {
5538  def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
5539            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
5540  def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
5541            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
5542
5543  def : Pat<(v4i16 (to_int_sat_gi v4f16:$Rn)),
5544            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
5545  def : Pat<(v8i16 (to_int_sat_gi v8f16:$Rn)),
5546            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
5547  }
5548  def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
5549            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
5550  def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
5551            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
5552  def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
5553            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
5554
5555  def : Pat<(v2i32 (to_int_sat_gi v2f32:$Rn)),
5556            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
5557  def : Pat<(v4i32 (to_int_sat_gi v4f32:$Rn)),
5558            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
5559  def : Pat<(v2i64 (to_int_sat_gi v2f64:$Rn)),
5560            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
5561}
5562defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
5563defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
5564
5565def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
5566def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
5567def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
5568def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
5569def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
5570
5571def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
5572def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
5573def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
5574def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
5575def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
5576
5577defm FNEG   : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
5578defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
5579defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
5580defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
5581defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
5582defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
5583defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
5584defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
5585defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
5586
5587let Predicates = [HasFRInt3264] in {
5588  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
5589  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
5590  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
5591  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
5592} // HasFRInt3264
5593
5594defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
5595defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
5596defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
5597                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5598defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
5599// Aliases for MVN -> NOT.
5600let Predicates = [HasNEON] in {
5601def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
5602                (NOTv8i8 V64:$Vd, V64:$Vn)>;
5603def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
5604                (NOTv16i8 V128:$Vd, V128:$Vn)>;
5605}
5606
5607def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5608def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5609def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5610def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5611def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5612def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5613
5614defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
5615defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
5616defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
5617defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
5618defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
5619       BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
5620defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
5621defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
5622defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
5623defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5624defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5625defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", truncssat_s>;
5626defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", truncssat_u>;
5627defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
5628defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
5629       BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
5630defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
5631defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
5632defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", truncusat_u>;
5633defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
5634defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
5635defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
5636defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
5637
5638def : Pat<(v4f16  (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5639def : Pat<(v4f16  (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5640def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5641def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5642def : Pat<(v8f16  (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5643def : Pat<(v8f16  (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5644def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5645def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5646def : Pat<(v2f32  (AArch64rev64 V64:$Rn)),  (REV64v2i32 V64:$Rn)>;
5647def : Pat<(v4f32  (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
5648
5649// Patterns for vector long shift (by element width). These need to match all
5650// three of zext, sext and anyext so it's easier to pull the patterns out of the
5651// definition.
5652multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
5653  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
5654            (SHLLv8i8 V64:$Rn)>;
5655  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
5656            (SHLLv16i8 V128:$Rn)>;
5657  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
5658            (SHLLv4i16 V64:$Rn)>;
5659  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
5660            (SHLLv8i16 V128:$Rn)>;
5661  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
5662            (SHLLv2i32 V64:$Rn)>;
5663  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
5664            (SHLLv4i32 V128:$Rn)>;
5665}
5666
5667defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
5668defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
5669defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
5670
5671// Select BSWAP vector instructions into REV instructions
5672def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),
5673          (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>;
5674def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))),
5675          (v8i16 (REV16v16i8 (v8i16 V128:$Rn)))>;
5676def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))),
5677          (v2i32 (REV32v8i8 (v2i32 V64:$Rn)))>;
5678def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))),
5679          (v4i32 (REV32v16i8 (v4i32 V128:$Rn)))>;
5680def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))),
5681          (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
5682
5683//===----------------------------------------------------------------------===//
5684// Advanced SIMD three vector instructions.
5685//===----------------------------------------------------------------------===//
5686
5687defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
5688defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
5689defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
5690defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
5691defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
5692defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
5693defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
5694defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
5695foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
5696def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
5697}
5698defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
5699let Predicates = [HasNEON] in {
5700foreach VT = [ v2f32, v4f32, v2f64 ] in
5701def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5702}
5703let Predicates = [HasNEON, HasFullFP16] in {
5704foreach VT = [ v4f16, v8f16 ] in
5705def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5706}
5707defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
5708defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
5709defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
5710defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
5711defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5712defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5713defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5714defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
5715defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
5716defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
5717defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
5718defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
5719defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
5720defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
5721defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
5722defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
5723
5724let Predicates = [HasNEON] in {
5725def : Pat<(v2f64 (fminnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
5726          (v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5727def : Pat<(v2f64 (fmaxnum_ieee (v2f64 V128:$Rn), (v2f64 V128:$Rm))),
5728          (v2f64 (FMAXNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rm)))>;
5729def : Pat<(v2f64 (fcanonicalize (v2f64 V128:$Rn))),
5730          (v2f64 (FMINNMv2f64 (v2f64 V128:$Rn), (v2f64 V128:$Rn)))>;
5731def : Pat<(v4f32 (fminnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5732          (v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5733def : Pat<(v4f32 (fmaxnum_ieee (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
5734          (v4f32 (FMAXNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rm)))>;
5735def : Pat<(v4f32 (fcanonicalize (v4f32 V128:$Rn))),
5736          (v4f32 (FMINNMv4f32 (v4f32 V128:$Rn), (v4f32 V128:$Rn)))>;
5737def : Pat<(v2f32 (fminnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5738          (v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5739def : Pat<(v2f32 (fmaxnum_ieee (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5740          (v2f32 (FMAXNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm)))>;
5741def : Pat<(v2f32 (fcanonicalize (v2f32 V64:$Rn))),
5742          (v2f32 (FMINNMv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rn)))>;
5743}
5744
5745let Predicates = [HasNEON, HasFullFP16] in {
5746def : Pat<(v8f16 (fminnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5747          (v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5748def : Pat<(v8f16 (fmaxnum_ieee (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
5749          (v8f16 (FMAXNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm)))>;
5750def : Pat<(v8f16 (fcanonicalize (v8f16 V128:$Rn))),
5751          (v8f16 (FMINNMv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rn)))>;
5752def : Pat<(v4f16 (fminnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5753          (v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
5754def : Pat<(v4f16 (fmaxnum_ieee (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
5755          (v4f16 (FMAXNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm)))>;
5756def : Pat<(v4f16 (fcanonicalize (v4f16 V64:$Rn))),
5757          (v4f16 (FMINNMv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rn)))>;
5758}
5759
5760// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
5761// instruction expects the addend first, while the fma intrinsic puts it last.
5762defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
5763            TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
5764defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
5765            TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
5766
5767defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
5768defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
5769defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
5770defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
5771defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
5772
5773// MLA and MLS are generated in MachineCombine
5774defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
5775defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
5776
5777defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
5778defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
5779defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
5780      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
5781defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
5782defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
5783defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
5784defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
5785defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
5786defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
5787defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
5788defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
5789defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
5790defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
5791defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
5792defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
5793defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
5794defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
5795defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
5796defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
5797defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
5798defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
5799      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
5800defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
5801defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
5802defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
5803defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
5804defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
5805defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
5806defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5807defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5808defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
5809defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5810defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5811defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5812defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5813defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5814defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5815                                                  int_aarch64_neon_sqrdmlah>;
5816defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5817                                                    int_aarch64_neon_sqrdmlsh>;
5818
5819// Extra saturate patterns, other than the intrinsics matches above
5820defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5821defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5822defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5823defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5824
5825defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
5826defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
5827                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
5828defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5829defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
5830                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
5831defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
5832
5833// Pseudo bitwise select pattern BSP.
5834// It is expanded into BSL/BIT/BIF after register allocation.
5835defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
5836                                                      (and (vnot node:$LHS), node:$RHS))>>;
5837defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5838defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit">;
5839defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5840
5841def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
5842          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5843def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
5844          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5845def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
5846          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5847def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
5848          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5849
5850def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
5851          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5852def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
5853          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5854def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
5855          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5856def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
5857          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5858
5859// The following SetCC patterns are used for GlobalISel only
5860multiclass SelectSetCC<PatFrags InFrag, string INST> {
5861  def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5862            (v8i8 (!cast<Instruction>(INST # v8i8) (v8i8 V64:$Rn), (v8i8 V64:$Rm)))>;
5863  def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
5864            (v16i8 (!cast<Instruction>(INST # v16i8) (v16i8 V128:$Rn), (v16i8 V128:$Rm)))>;
5865  def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5866            (v4i16 (!cast<Instruction>(INST # v4i16) (v4i16 V64:$Rn), (v4i16 V64:$Rm)))>;
5867  def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
5868            (v8i16 (!cast<Instruction>(INST # v8i16) (v8i16 V128:$Rn), (v8i16 V128:$Rm)))>;
5869  def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5870            (v2i32 (!cast<Instruction>(INST # v2i32) (v2i32 V64:$Rn), (v2i32 V64:$Rm)))>;
5871  def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
5872            (v4i32 (!cast<Instruction>(INST # v4i32) (v4i32 V128:$Rn), (v4i32 V128:$Rm)))>;
5873  def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))),
5874            (v2i64 (!cast<Instruction>(INST # v2i64) (v2i64 V128:$Rn), (v2i64 V128:$Rm)))>;
5875}
5876
5877defm : SelectSetCC<seteq, "CMEQ">;
5878defm : SelectSetCC<setgt, "CMGT">;
5879defm : SelectSetCC<setge, "CMGE">;
5880defm : SelectSetCC<setugt, "CMHI">;
5881defm : SelectSetCC<setuge, "CMHS">;
5882
5883multiclass SelectSetCCSwapOperands<PatFrags InFrag, string INST> {
5884  def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5885            (v8i8 (!cast<Instruction>(INST # v8i8) (v8i8 V64:$Rm), (v8i8 V64:$Rn)))>;
5886  def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
5887            (v16i8 (!cast<Instruction>(INST # v16i8) (v16i8 V128:$Rm), (v16i8 V128:$Rn)))>;
5888  def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5889            (v4i16 (!cast<Instruction>(INST # v4i16) (v4i16 V64:$Rm), (v4i16 V64:$Rn)))>;
5890  def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
5891            (v8i16 (!cast<Instruction>(INST # v8i16) (v8i16 V128:$Rm), (v8i16 V128:$Rn)))>;
5892  def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5893            (v2i32 (!cast<Instruction>(INST # v2i32) (v2i32 V64:$Rm), (v2i32 V64:$Rn)))>;
5894  def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
5895            (v4i32 (!cast<Instruction>(INST # v4i32) (v4i32 V128:$Rm), (v4i32 V128:$Rn)))>;
5896  def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))),
5897            (v2i64 (!cast<Instruction>(INST # v2i64) (v2i64 V128:$Rm), (v2i64 V128:$Rn)))>;
5898}
5899
5900defm : SelectSetCCSwapOperands<setlt, "CMGT">;
5901defm : SelectSetCCSwapOperands<setle, "CMGE">;
5902defm : SelectSetCCSwapOperands<setult, "CMHI">;
5903defm : SelectSetCCSwapOperands<setule, "CMHS">;
5904
5905multiclass SelectSetCCZeroRHS<PatFrags InFrag, string INST> {
5906  def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), immAllZerosV)),
5907            (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
5908  def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), immAllZerosV)),
5909            (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
5910  def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), immAllZerosV)),
5911            (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
5912  def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), immAllZerosV)),
5913            (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
5914  def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), immAllZerosV)),
5915            (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
5916  def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), immAllZerosV)),
5917            (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
5918  def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), immAllZerosV)),
5919            (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
5920}
5921
5922defm : SelectSetCCZeroRHS<seteq, "CMEQ">;
5923defm : SelectSetCCZeroRHS<setgt, "CMGT">;
5924defm : SelectSetCCZeroRHS<setge, "CMGE">;
5925defm : SelectSetCCZeroRHS<setlt, "CMLT">;
5926defm : SelectSetCCZeroRHS<setle, "CMLE">;
5927
5928multiclass SelectSetCCZeroLHS<PatFrags InFrag, string INST> {
5929  def : Pat<(v8i8 (InFrag immAllZerosV, (v8i8 V64:$Rn))),
5930            (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
5931  def : Pat<(v16i8 (InFrag immAllZerosV, (v16i8 V128:$Rn))),
5932            (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
5933  def : Pat<(v4i16 (InFrag immAllZerosV, (v4i16 V64:$Rn))),
5934            (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
5935  def : Pat<(v8i16 (InFrag immAllZerosV, (v8i16 V128:$Rn))),
5936            (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
5937  def : Pat<(v2i32 (InFrag immAllZerosV, (v2i32 V64:$Rn))),
5938            (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
5939  def : Pat<(v4i32 (InFrag immAllZerosV, (v4i32 V128:$Rn))),
5940            (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
5941  def : Pat<(v2i64 (InFrag immAllZerosV, (v2i64 V128:$Rn))),
5942            (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
5943}
5944
5945defm : SelectSetCCZeroLHS<seteq, "CMEQ">;
5946defm : SelectSetCCZeroLHS<setgt, "CMLT">;
5947defm : SelectSetCCZeroLHS<setge, "CMLE">;
5948defm : SelectSetCCZeroLHS<setlt, "CMGT">;
5949defm : SelectSetCCZeroLHS<setle, "CMGE">;
5950
5951let Predicates = [HasNEON] in {
5952def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
5953                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5954def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
5955                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5956def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
5957                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5958def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
5959                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5960
5961def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
5962                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5963def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
5964                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5965def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
5966                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5967def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5968                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5969
5970def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
5971                "|cmls.8b\t$dst, $src1, $src2}",
5972                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5973def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
5974                "|cmls.16b\t$dst, $src1, $src2}",
5975                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5976def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
5977                "|cmls.4h\t$dst, $src1, $src2}",
5978                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5979def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
5980                "|cmls.8h\t$dst, $src1, $src2}",
5981                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5982def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
5983                "|cmls.2s\t$dst, $src1, $src2}",
5984                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5985def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
5986                "|cmls.4s\t$dst, $src1, $src2}",
5987                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5988def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
5989                "|cmls.2d\t$dst, $src1, $src2}",
5990                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5991
5992def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
5993                "|cmlo.8b\t$dst, $src1, $src2}",
5994                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5995def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
5996                "|cmlo.16b\t$dst, $src1, $src2}",
5997                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5998def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
5999                "|cmlo.4h\t$dst, $src1, $src2}",
6000                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
6001def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
6002                "|cmlo.8h\t$dst, $src1, $src2}",
6003                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
6004def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
6005                "|cmlo.2s\t$dst, $src1, $src2}",
6006                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
6007def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
6008                "|cmlo.4s\t$dst, $src1, $src2}",
6009                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
6010def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
6011                "|cmlo.2d\t$dst, $src1, $src2}",
6012                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
6013
6014def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
6015                "|cmle.8b\t$dst, $src1, $src2}",
6016                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
6017def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
6018                "|cmle.16b\t$dst, $src1, $src2}",
6019                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
6020def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
6021                "|cmle.4h\t$dst, $src1, $src2}",
6022                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
6023def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
6024                "|cmle.8h\t$dst, $src1, $src2}",
6025                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
6026def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
6027                "|cmle.2s\t$dst, $src1, $src2}",
6028                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
6029def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
6030                "|cmle.4s\t$dst, $src1, $src2}",
6031                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
6032def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
6033                "|cmle.2d\t$dst, $src1, $src2}",
6034                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
6035
6036def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
6037                "|cmlt.8b\t$dst, $src1, $src2}",
6038                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
6039def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
6040                "|cmlt.16b\t$dst, $src1, $src2}",
6041                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
6042def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
6043                "|cmlt.4h\t$dst, $src1, $src2}",
6044                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
6045def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
6046                "|cmlt.8h\t$dst, $src1, $src2}",
6047                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
6048def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
6049                "|cmlt.2s\t$dst, $src1, $src2}",
6050                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
6051def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
6052                "|cmlt.4s\t$dst, $src1, $src2}",
6053                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
6054def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
6055                "|cmlt.2d\t$dst, $src1, $src2}",
6056                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
6057
6058let Predicates = [HasNEON, HasFullFP16] in {
6059def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
6060                "|fcmle.4h\t$dst, $src1, $src2}",
6061                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
6062def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
6063                "|fcmle.8h\t$dst, $src1, $src2}",
6064                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
6065}
6066def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
6067                "|fcmle.2s\t$dst, $src1, $src2}",
6068                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
6069def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
6070                "|fcmle.4s\t$dst, $src1, $src2}",
6071                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
6072def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
6073                "|fcmle.2d\t$dst, $src1, $src2}",
6074                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
6075
6076let Predicates = [HasNEON, HasFullFP16] in {
6077def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
6078                "|fcmlt.4h\t$dst, $src1, $src2}",
6079                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
6080def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
6081                "|fcmlt.8h\t$dst, $src1, $src2}",
6082                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
6083}
6084def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
6085                "|fcmlt.2s\t$dst, $src1, $src2}",
6086                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
6087def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
6088                "|fcmlt.4s\t$dst, $src1, $src2}",
6089                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
6090def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
6091                "|fcmlt.2d\t$dst, $src1, $src2}",
6092                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
6093
6094let Predicates = [HasNEON, HasFullFP16] in {
6095def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
6096                "|facle.4h\t$dst, $src1, $src2}",
6097                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
6098def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
6099                "|facle.8h\t$dst, $src1, $src2}",
6100                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
6101}
6102def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
6103                "|facle.2s\t$dst, $src1, $src2}",
6104                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
6105def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
6106                "|facle.4s\t$dst, $src1, $src2}",
6107                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
6108def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
6109                "|facle.2d\t$dst, $src1, $src2}",
6110                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
6111
6112let Predicates = [HasNEON, HasFullFP16] in {
6113def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
6114                "|faclt.4h\t$dst, $src1, $src2}",
6115                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
6116def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
6117                "|faclt.8h\t$dst, $src1, $src2}",
6118                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
6119}
6120def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
6121                "|faclt.2s\t$dst, $src1, $src2}",
6122                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
6123def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
6124                "|faclt.4s\t$dst, $src1, $src2}",
6125                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
6126def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
6127                "|faclt.2d\t$dst, $src1, $src2}",
6128                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
6129}
6130
6131//===----------------------------------------------------------------------===//
6132// Advanced SIMD three scalar instructions.
6133//===----------------------------------------------------------------------===//
6134
6135defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
6136defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
6137defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
6138defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
6139defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
6140defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
6141defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
6142defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
6143def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
6144          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
6145let Predicates = [HasNEON, HasFullFP16] in {
6146def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
6147}
6148let Predicates = [HasNEON] in {
6149def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
6150def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
6151}
6152defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
6153                                     int_aarch64_neon_facge>;
6154defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
6155                                     int_aarch64_neon_facgt>;
6156defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
6157defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
6158defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
6159defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
6160defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
6161defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
6162defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
6163defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
6164defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
6165defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
6166defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
6167defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
6168defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
6169defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
6170defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
6171defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
6172defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
6173defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
6174defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
6175defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
6176defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
6177let Predicates = [HasRDM] in {
6178  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
6179  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
6180  def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
6181                                            (i32 FPR32:$Rm))),
6182            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
6183  def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
6184                                            (i32 FPR32:$Rm))),
6185            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
6186}
6187
6188defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
6189                                          int_aarch64_neon_fmulx,
6190                                          [HasNEONandIsStreamingSafe]>;
6191
6192let Predicates = [HasNEON] in {
6193def : InstAlias<"cmls $dst, $src1, $src2",
6194                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6195def : InstAlias<"cmle $dst, $src1, $src2",
6196                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6197def : InstAlias<"cmlo $dst, $src1, $src2",
6198                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6199def : InstAlias<"cmlt $dst, $src1, $src2",
6200                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6201}
6202let Predicates = [HasFPARMv8] in {
6203def : InstAlias<"fcmle $dst, $src1, $src2",
6204                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
6205def : InstAlias<"fcmle $dst, $src1, $src2",
6206                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6207def : InstAlias<"fcmlt $dst, $src1, $src2",
6208                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
6209def : InstAlias<"fcmlt $dst, $src1, $src2",
6210                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6211def : InstAlias<"facle $dst, $src1, $src2",
6212                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
6213def : InstAlias<"facle $dst, $src1, $src2",
6214                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6215def : InstAlias<"faclt $dst, $src1, $src2",
6216                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
6217def : InstAlias<"faclt $dst, $src1, $src2",
6218                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
6219}
6220
6221//===----------------------------------------------------------------------===//
6222// Advanced SIMD three scalar instructions (mixed operands).
6223//===----------------------------------------------------------------------===//
6224defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
6225                                       int_aarch64_neon_sqdmulls_scalar>;
6226defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
6227defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
6228
6229def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
6230                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
6231                                                        (i32 FPR32:$Rm))))),
6232          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
6233def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
6234                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
6235                                                        (i32 FPR32:$Rm))))),
6236          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
6237
6238//===----------------------------------------------------------------------===//
6239// Advanced SIMD two scalar instructions.
6240//===----------------------------------------------------------------------===//
6241
6242defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs, [HasNoCSSC]>;
6243defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
6244defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
6245defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
6246defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
6247defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
6248defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
6249defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
6250defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
6251defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
6252defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
6253defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
6254defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
6255defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
6256defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
6257defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
6258defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
6259defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
6260defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
6261def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
6262defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
6263defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
6264defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
6265defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
6266defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
6267defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
6268                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
6269defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
6270defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
6271defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
6272defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
6273defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
6274defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
6275                                     int_aarch64_neon_suqadd>;
6276defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
6277defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
6278defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
6279                                    int_aarch64_neon_usqadd>;
6280
6281def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
6282          (CMLTv1i64rz V64:$Rn)>;
6283
6284// Round FP64 to BF16.
6285let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
6286def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
6287          (BFCVT (FCVTXNv1i64 $Rn))>;
6288
6289def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
6290          (FCVTASv1i64 FPR64:$Rn)>;
6291def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
6292          (FCVTAUv1i64 FPR64:$Rn)>;
6293def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
6294          (FCVTMSv1i64 FPR64:$Rn)>;
6295def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
6296          (FCVTMUv1i64 FPR64:$Rn)>;
6297def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
6298          (FCVTNSv1i64 FPR64:$Rn)>;
6299def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
6300          (FCVTNUv1i64 FPR64:$Rn)>;
6301def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
6302          (FCVTPSv1i64 FPR64:$Rn)>;
6303def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
6304          (FCVTPUv1i64 FPR64:$Rn)>;
6305def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
6306          (FCVTZSv1i64 FPR64:$Rn)>;
6307def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
6308          (FCVTZUv1i64 FPR64:$Rn)>;
6309
6310def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
6311          (FRECPEv1f16 FPR16:$Rn)>;
6312def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
6313          (FRECPEv1i32 FPR32:$Rn)>;
6314def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
6315          (FRECPEv1i64 FPR64:$Rn)>;
6316def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
6317          (FRECPEv1i64 FPR64:$Rn)>;
6318
6319def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
6320          (FRECPEv1i32 FPR32:$Rn)>;
6321def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
6322          (FRECPEv2f32 V64:$Rn)>;
6323def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
6324          (FRECPEv4f32 FPR128:$Rn)>;
6325def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
6326          (FRECPEv1i64 FPR64:$Rn)>;
6327def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
6328          (FRECPEv1i64 FPR64:$Rn)>;
6329def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
6330          (FRECPEv2f64 FPR128:$Rn)>;
6331
6332def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
6333          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
6334def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
6335          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
6336def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
6337          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
6338def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
6339          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
6340def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
6341          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
6342
6343def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
6344          (FRECPXv1f16 FPR16:$Rn)>;
6345def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
6346          (FRECPXv1i32 FPR32:$Rn)>;
6347def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
6348          (FRECPXv1i64 FPR64:$Rn)>;
6349
6350def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
6351          (FRSQRTEv1f16 FPR16:$Rn)>;
6352def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
6353          (FRSQRTEv1i32 FPR32:$Rn)>;
6354def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
6355          (FRSQRTEv1i64 FPR64:$Rn)>;
6356def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
6357          (FRSQRTEv1i64 FPR64:$Rn)>;
6358
6359def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
6360          (FRSQRTEv1i32 FPR32:$Rn)>;
6361def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
6362          (FRSQRTEv2f32 V64:$Rn)>;
6363def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
6364          (FRSQRTEv4f32 FPR128:$Rn)>;
6365def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
6366          (FRSQRTEv1i64 FPR64:$Rn)>;
6367def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
6368          (FRSQRTEv1i64 FPR64:$Rn)>;
6369def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
6370          (FRSQRTEv2f64 FPR128:$Rn)>;
6371
6372def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
6373          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
6374def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
6375          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
6376def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
6377          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
6378def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
6379          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
6380def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
6381          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
6382
6383// Some float -> int -> float conversion patterns for which we want to keep the
6384// int values in FP registers using the corresponding NEON instructions to
6385// avoid more costly int <-> fp register transfers.
6386let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
6387def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
6388          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
6389def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
6390          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
6391def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
6392          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
6393def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
6394          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
6395
6396let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
6397def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
6398          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
6399def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
6400          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
6401}
6402
6403// int -> float conversion of value in lane 0 of simd vector should use
6404// correct cvtf variant to avoid costly fpr <-> gpr register transfers.
6405def : Pat<(f32 (sint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
6406          (SCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>;
6407
6408def : Pat<(f32 (uint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
6409          (UCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>;
6410
6411def : Pat<(f64 (sint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
6412          (SCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>;
6413
6414def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
6415          (UCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>;
6416
6417// fp16: integer extraction from vector must be at least 32-bits to be legal.
6418// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6419let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
6420def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
6421                (v8i16 FPR128:$Rn), (i64 0))), i16)))),
6422          (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
6423
6424// unsigned 32-bit extracted element is truncated to 16-bits using AND
6425def : Pat<(f16 (uint_to_fp (i32 (and (i32 (vector_extract
6426                (v8i16 FPR128:$Rn), (i64 0))), (i32 65535))))),
6427          (UCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
6428}
6429
6430// If an integer is about to be converted to a floating point value,
6431// just load it on the floating point unit.
6432// Here are the patterns for 8 and 16-bits to float.
6433// 8-bits -> float.
6434multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
6435                             SDPatternOperator loadop, Instruction UCVTF,
6436                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
6437                             SubRegIndex sub> {
6438  def : Pat<(DstTy (uint_to_fp (SrcTy
6439                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
6440                                      ro.Wext:$extend))))),
6441           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
6442                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
6443                                 sub))>;
6444
6445  def : Pat<(DstTy (uint_to_fp (SrcTy
6446                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
6447                                      ro.Wext:$extend))))),
6448           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
6449                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
6450                                 sub))>;
6451}
6452
6453defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
6454                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
6455def : Pat <(f32 (uint_to_fp (i32
6456               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
6457           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6458                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
6459def : Pat <(f32 (uint_to_fp (i32
6460                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
6461           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6462                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
6463// 16-bits -> float.
6464defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
6465                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
6466def : Pat <(f32 (uint_to_fp (i32
6467                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
6468           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6469                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
6470def : Pat <(f32 (uint_to_fp (i32
6471                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
6472           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6473                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
6474// 32-bits are handled in target specific dag combine:
6475// performIntToFpCombine.
6476// 64-bits integer to 32-bits floating point, not possible with
6477// UCVTF on floating point registers (both source and destination
6478// must have the same size).
6479
6480// Here are the patterns for 8, 16, 32, and 64-bits to double.
6481// 8-bits -> double.
6482defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
6483                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
6484def : Pat <(f64 (uint_to_fp (i32
6485                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
6486           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6487                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
6488def : Pat <(f64 (uint_to_fp (i32
6489                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
6490           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6491                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
6492// 16-bits -> double.
6493defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
6494                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
6495def : Pat <(f64 (uint_to_fp (i32
6496                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
6497           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6498                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
6499def : Pat <(f64 (uint_to_fp (i32
6500                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
6501           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6502                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
6503// 32-bits -> double.
6504defm : UIntToFPROLoadPat<f64, i32, load,
6505                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
6506def : Pat <(f64 (uint_to_fp (i32
6507                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
6508           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6509                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
6510def : Pat <(f64 (uint_to_fp (i32
6511                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
6512           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6513                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
6514// 64-bits -> double are handled in target specific dag combine:
6515// performIntToFpCombine.
6516} // let Predicates = [HasNEON]
6517
6518//===----------------------------------------------------------------------===//
6519// Advanced SIMD three different-sized vector instructions.
6520//===----------------------------------------------------------------------===//
6521
6522defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
6523defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
6524defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
6525defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
6526defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
6527defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
6528                                             AArch64sabd>;
6529defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
6530                                          AArch64sabd>;
6531defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
6532            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
6533defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
6534                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
6535defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
6536    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6537defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
6538    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6539defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
6540defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
6541                                               int_aarch64_neon_sqadd>;
6542defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
6543                                               int_aarch64_neon_sqsub>;
6544defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
6545                                     int_aarch64_neon_sqdmull>;
6546defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
6547                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
6548defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
6549                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
6550defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
6551                                              AArch64uabd>;
6552defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
6553                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
6554defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
6555                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
6556defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
6557    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6558defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
6559    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6560defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
6561defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
6562                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
6563defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
6564                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
6565
6566// Additional patterns for [SU]ML[AS]L
6567multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
6568  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
6569  def : Pat<(v4i16 (opnode
6570                    V64:$Ra,
6571                    (v4i16 (extract_subvector
6572                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
6573                            (i64 0))))),
6574             (EXTRACT_SUBREG (v8i16 (INST8B
6575                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
6576                                     V64:$Rn, V64:$Rm)), dsub)>;
6577  def : Pat<(v2i32 (opnode
6578                    V64:$Ra,
6579                    (v2i32 (extract_subvector
6580                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
6581                            (i64 0))))),
6582             (EXTRACT_SUBREG (v4i32 (INST4H
6583                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
6584                                     V64:$Rn, V64:$Rm)), dsub)>;
6585  def : Pat<(v1i64 (opnode
6586                    V64:$Ra,
6587                    (v1i64 (extract_subvector
6588                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
6589                            (i64 0))))),
6590             (EXTRACT_SUBREG (v2i64 (INST2S
6591                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
6592                                     V64:$Rn, V64:$Rm)), dsub)>;
6593}
6594
6595defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
6596     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
6597defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
6598     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
6599defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
6600     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
6601defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
6602     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
6603
6604
6605multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
6606  def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
6607                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
6608            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
6609  def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
6610                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
6611            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
6612  def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
6613                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
6614            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
6615
6616  def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
6617                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
6618            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6619  def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
6620                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
6621            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6622  def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
6623                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
6624            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6625}
6626
6627defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
6628defm : Neon_addl_extract_patterns<add, sext, "SADD">;
6629defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
6630defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
6631
6632// CodeGen patterns for addhn and subhn instructions, which can actually be
6633// written in LLVM IR without too much difficulty.
6634
6635// Prioritize ADDHN and SUBHN over UZP2.
6636let AddedComplexity = 10 in {
6637
6638// ADDHN
6639def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
6640          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6641def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6642                                           (i32 16))))),
6643          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6644def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6645                                           (i32 32))))),
6646          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6647def : Pat<(concat_vectors (v8i8 V64:$Rd),
6648                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6649                                                    (i32 8))))),
6650          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6651                            V128:$Rn, V128:$Rm)>;
6652def : Pat<(concat_vectors (v4i16 V64:$Rd),
6653                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6654                                                    (i32 16))))),
6655          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6656                            V128:$Rn, V128:$Rm)>;
6657def : Pat<(concat_vectors (v2i32 V64:$Rd),
6658                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6659                                                    (i32 32))))),
6660          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6661                            V128:$Rn, V128:$Rm)>;
6662
6663// SUBHN
6664def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
6665          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6666def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6667                                           (i32 16))))),
6668          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6669def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6670                                           (i32 32))))),
6671          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6672def : Pat<(concat_vectors (v8i8 V64:$Rd),
6673                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6674                                                    (i32 8))))),
6675          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6676                            V128:$Rn, V128:$Rm)>;
6677def : Pat<(concat_vectors (v4i16 V64:$Rd),
6678                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6679                                                    (i32 16))))),
6680          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6681                            V128:$Rn, V128:$Rm)>;
6682def : Pat<(concat_vectors (v2i32 V64:$Rd),
6683                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6684                                                    (i32 32))))),
6685          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6686                            V128:$Rn, V128:$Rm)>;
6687
6688} // AddedComplexity = 10
6689
6690//----------------------------------------------------------------------------
6691// AdvSIMD bitwise extract from vector instruction.
6692//----------------------------------------------------------------------------
6693
6694defm EXT : SIMDBitwiseExtract<"ext">;
6695
6696def AdjustExtImm : SDNodeXForm<imm, [{
6697  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6698}]>;
6699multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
6700  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
6701            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
6702  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
6703            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
6704  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6705  // 128-bit vector.
6706  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
6707            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
6708  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6709  // single 128-bit EXT.
6710  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
6711                              (extract_subvector V128:$Rn, (i64 N)),
6712                              (i32 imm:$imm))),
6713            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
6714  // A 64-bit EXT of the high half of a 128-bit register can be done using a
6715  // 128-bit EXT of the whole register with an adjustment to the immediate. The
6716  // top half of the other operand will be unset, but that doesn't matter as it
6717  // will not be used.
6718  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
6719                              V64:$Rm,
6720                              (i32 imm:$imm))),
6721            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
6722                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6723                                      (AdjustExtImm imm:$imm)), dsub)>;
6724}
6725
6726defm : ExtPat<v8i8, v16i8, 8>;
6727defm : ExtPat<v4i16, v8i16, 4>;
6728defm : ExtPat<v4f16, v8f16, 4>;
6729defm : ExtPat<v4bf16, v8bf16, 4>;
6730defm : ExtPat<v2i32, v4i32, 2>;
6731defm : ExtPat<v2f32, v4f32, 2>;
6732defm : ExtPat<v1i64, v2i64, 1>;
6733defm : ExtPat<v1f64, v2f64, 1>;
6734
6735//----------------------------------------------------------------------------
6736// AdvSIMD zip vector
6737//----------------------------------------------------------------------------
6738
6739defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
6740defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
6741defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
6742defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
6743defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
6744defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
6745
6746def trunc_optional_assert_ext : PatFrags<(ops node:$op0),
6747                                         [(trunc node:$op0),
6748                                          (assertzext (trunc node:$op0)),
6749                                          (assertsext (trunc node:$op0))]>;
6750
6751// concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6752// concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6753// concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6754class concat_trunc_to_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy>
6755  : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6756                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))),
6757        (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>;
6758def : concat_trunc_to_uzp1_pat<v8i16, v8i8, v16i8>;
6759def : concat_trunc_to_uzp1_pat<v4i32, v4i16, v8i16>;
6760def : concat_trunc_to_uzp1_pat<v2i64, v2i32, v4i32>;
6761
6762// trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6763// trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6764// trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6765class trunc_concat_trunc_to_xtn_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy,
6766                                         ValueType Ty>
6767  : Pat<(Ty (trunc_optional_assert_ext
6768                    (ConcatTy (concat_vectors
6769                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6770                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))),
6771        (!cast<Instruction>("XTN"#Ty) (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>;
6772def : trunc_concat_trunc_to_xtn_uzp1_pat<v4i32, v4i16, v8i16, v8i8>;
6773def : trunc_concat_trunc_to_xtn_uzp1_pat<v2i64, v2i32, v4i32, v4i16>;
6774
6775def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))),
6776          (UZP1v8i8 V64:$Vn, V64:$Vm)>;
6777def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))),
6778          (UZP1v4i16 V64:$Vn, V64:$Vm)>;
6779
6780def : Pat<(v16i8 (concat_vectors
6781                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
6782                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
6783          (UZP2v16i8 V128:$Vn, V128:$Vm)>;
6784def : Pat<(v8i16 (concat_vectors
6785                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
6786                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
6787          (UZP2v8i16 V128:$Vn, V128:$Vm)>;
6788def : Pat<(v4i32 (concat_vectors
6789                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
6790                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
6791          (UZP2v4i32 V128:$Vn, V128:$Vm)>;
6792
6793//----------------------------------------------------------------------------
6794// AdvSIMD TBL/TBX instructions
6795//----------------------------------------------------------------------------
6796
6797defm TBL : SIMDTableLookup<    0, "tbl">;
6798defm TBX : SIMDTableLookupTied<1, "tbx">;
6799
6800def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6801          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
6802def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6803          (TBLv16i8One V128:$Ri, V128:$Rn)>;
6804
6805def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
6806                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6807          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
6808def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
6809                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6810          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
6811
6812//----------------------------------------------------------------------------
6813// AdvSIMD LUT instructions
6814//----------------------------------------------------------------------------
6815let Predicates = [HasLUT] in {
6816  defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
6817  defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
6818
6819  multiclass Luti2_patterns<Instruction Instr, ValueType VT64, ValueType VT128>{
6820    def : Pat<(VT128 (int_aarch64_neon_vluti2_lane VT64:$Rn,
6821                   v8i8:$Rm, i32:$idx)),
6822              (Instr (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub),
6823              (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),  VectorIndexS32b_timm:$idx)>;
6824    def : Pat<(VT128 (int_aarch64_neon_vluti2_laneq VT64:$Rn,
6825                   v16i8:$Rm, i32:$idx)),
6826              (Instr (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub),
6827              V128:$Rm,  VectorIndexS32b_timm:$idx)>;
6828    def : Pat<(VT128 (int_aarch64_neon_vluti2_lane VT128:$Rn,
6829                   v8i8:$Rm, i32:$idx)),
6830              (Instr V128:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
6831              VectorIndexS32b_timm:$idx)>;
6832    def : Pat<(VT128 (int_aarch64_neon_vluti2_laneq VT128:$Rn,
6833                   v16i8:$Rm, i32:$idx)),
6834              (Instr V128:$Rn, V128:$Rm,  VectorIndexS32b_timm:$idx)>;
6835  }
6836
6837  defm : Luti2_patterns<LUT2_B, v8i8, v16i8>;
6838  defm : Luti2_patterns<LUT2_H, v4i16, v8i16>;
6839  defm : Luti2_patterns<LUT2_H, v4f16, v8f16>;
6840  defm : Luti2_patterns<LUT2_H, v4bf16, v8bf16>;
6841
6842  def : Pat<(v16i8 (int_aarch64_neon_vluti4q_laneq v16i8:$Rn,
6843                    v16i8:$Rm, i32:$idx)),
6844            (LUT4_B VecListOne16b:$Rn, V128:$Rm,  VectorIndexD32b_timm:$idx)>;
6845  def : Pat<(v16i8 (int_aarch64_neon_vluti4q_lane v16i8:$Rn,
6846                    v8i8:$Rm, i32:$idx)),
6847            (LUT4_B VecListOne16b:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),  VectorIndexD32b_timm:$idx)>;
6848
6849  foreach VT = [v8i16, v8f16, v8bf16] in {
6850    def : Pat<(VT (int_aarch64_neon_vluti4q_laneq_x2 VT:$Rn1,
6851                   VT:$Rn2, v16i8:$Rm, i32:$idx)),
6852              (LUT4_H (REG_SEQUENCE QQ, VecListOne8h:$Rn1, qsub0, VecListOne8h:$Rn2, qsub1), V128:$Rm,  VectorIndexS32b_timm:$idx)>;
6853    def : Pat<(VT (int_aarch64_neon_vluti4q_lane_x2 VT:$Rn1,
6854                   VT:$Rn2, v8i8:$Rm, i32:$idx)),
6855              (LUT4_H (REG_SEQUENCE QQ, VecListOne8h:$Rn1, qsub0, VecListOne8h:$Rn2, qsub1),
6856                      (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),  VectorIndexS32b_timm:$idx)>;
6857  }
6858}
6859
6860//----------------------------------------------------------------------------
6861// AdvSIMD scalar DUP instruction
6862//----------------------------------------------------------------------------
6863
6864defm DUP : SIMDScalarDUP<"mov">;
6865
6866//----------------------------------------------------------------------------
6867// AdvSIMD scalar pairwise instructions
6868//----------------------------------------------------------------------------
6869
6870defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
6871defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
6872defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
6873defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
6874defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
6875defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
6876
6877// Only the lower half of the result of the inner FADDP is used in the patterns
6878// below, so the second operand does not matter. Re-use the first input
6879// operand, so no additional dependencies need to be introduced.
6880let Predicates = [HasFullFP16] in {
6881def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
6882            (FADDPv2i16p
6883              (EXTRACT_SUBREG
6884                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
6885               dsub))>;
6886def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
6887          (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
6888}
6889def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
6890          (FADDPv2i32p
6891            (EXTRACT_SUBREG
6892              (FADDPv4f32 V128:$Rn, V128:$Rn),
6893             dsub))>;
6894def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
6895          (FADDPv2i32p V64:$Rn)>;
6896def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
6897          (FADDPv2i64p V128:$Rn)>;
6898
6899def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
6900          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6901def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
6902          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6903def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
6904          (FADDPv2i32p V64:$Rn)>;
6905def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
6906          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
6907def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
6908          (FADDPv2i64p V128:$Rn)>;
6909def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
6910          (FMAXNMPv2i32p V64:$Rn)>;
6911def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
6912          (FMAXNMPv2i64p V128:$Rn)>;
6913def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
6914          (FMAXPv2i32p V64:$Rn)>;
6915def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
6916          (FMAXPv2i64p V128:$Rn)>;
6917def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
6918          (FMINNMPv2i32p V64:$Rn)>;
6919def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
6920          (FMINNMPv2i64p V128:$Rn)>;
6921def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
6922          (FMINPv2i32p V64:$Rn)>;
6923def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
6924          (FMINPv2i64p V128:$Rn)>;
6925
6926//----------------------------------------------------------------------------
6927// AdvSIMD INS/DUP instructions
6928//----------------------------------------------------------------------------
6929
6930def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
6931def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
6932def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
6933def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
6934def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
6935def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
6936def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
6937
6938def DUPv2i64lane : SIMDDup64FromElement;
6939def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
6940def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
6941def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
6942def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
6943def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
6944def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
6945
6946// DUP from a 64-bit register to a 64-bit register is just a copy
6947def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
6948          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
6949def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
6950          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
6951
6952def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
6953          (v2f32 (DUPv2i32lane
6954            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6955            (i64 0)))>;
6956def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
6957          (v4f32 (DUPv4i32lane
6958            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6959            (i64 0)))>;
6960def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
6961          (v2f64 (DUPv2i64lane
6962            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
6963            (i64 0)))>;
6964def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
6965          (v4f16 (DUPv4i16lane
6966            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6967            (i64 0)))>;
6968def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
6969          (v4bf16 (DUPv4i16lane
6970            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6971            (i64 0)))>;
6972def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
6973          (v8f16 (DUPv8i16lane
6974            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6975            (i64 0)))>;
6976def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
6977          (v8bf16 (DUPv8i16lane
6978            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6979            (i64 0)))>;
6980
6981def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6982          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6983def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6984          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6985
6986def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6987          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6988def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6989          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6990
6991def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6992          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
6993def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6994         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
6995def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
6996          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
6997
6998// Also covers DUP (truncate i64 to i32)
6999def : Pat<(v2i32 (AArch64dup (i32 (extractelt (v4i32 V128:$Rn), imm:$idx)))),
7000          (DUPv2i32lane V128:$Rn, imm:$idx)>;
7001def : Pat<(v4i32 (AArch64dup (i32 (extractelt (v4i32 V128:$Rn), imm:$idx)))),
7002          (DUPv4i32lane V128:$Rn, imm:$idx)>;
7003
7004// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
7005// instruction even if the types don't match: we just have to remap the lane
7006// carefully. N.b. this trick only applies to truncations.
7007def VecIndex_x2 : SDNodeXForm<imm, [{
7008  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
7009}]>;
7010def VecIndex_x4 : SDNodeXForm<imm, [{
7011  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
7012}]>;
7013def VecIndex_x8 : SDNodeXForm<imm, [{
7014  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
7015}]>;
7016
7017class DUPWithTruncPat<ValueType ResVT, ValueType SrcVT, ValueType ScalVT,
7018           Instruction DUP, SDNodeXForm IdxXFORM>
7019  : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (SrcVT V128:$Rn), imm:$idx)))),
7020        (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
7021
7022// DUP (truncate i16 to i8)
7023def : DUPWithTruncPat<v8i8,  v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
7024def : DUPWithTruncPat<v16i8, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
7025// DUP (truncate i32/64 to i8)
7026def : DUPWithTruncPat<v8i8,  v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
7027def : DUPWithTruncPat<v16i8, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
7028// DUP (truncate i32/i64 to i16)
7029def : DUPWithTruncPat<v4i16, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
7030def : DUPWithTruncPat<v8i16, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
7031
7032// SMOV and UMOV definitions, with some extra patterns for convenience
7033defm SMOV : SMov;
7034defm UMOV : UMov;
7035
7036def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
7037          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
7038def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
7039          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
7040def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
7041          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
7042def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
7043          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
7044def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
7045          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
7046def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
7047          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
7048
7049def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
7050            VectorIndexB:$idx)))), i8),
7051          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
7052def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
7053            VectorIndexH:$idx)))), i16),
7054          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
7055
7056// Extracting i8 or i16 elements will have the zero-extend transformed to
7057// an 'and' mask by type legalization since neither i8 nor i16 are legal types
7058// for AArch64. Match these patterns here since UMOV already zeroes out the high
7059// bits of the destination register.
7060def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
7061               (i32 0xff)),
7062          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
7063def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
7064               (i32 0xffff)),
7065          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
7066
7067def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
7068            VectorIndexB:$idx)))), (i64 0xff))),
7069          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
7070def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
7071            VectorIndexH:$idx)))), (i64 0xffff))),
7072          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
7073
7074defm INS : SIMDIns;
7075
7076def : Pat<(v16i8 (vec_ins_or_scal_vec GPR32:$Rn)),
7077          (SUBREG_TO_REG (i32 0),
7078                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
7079def : Pat<(v8i8 (vec_ins_or_scal_vec GPR32:$Rn)),
7080          (SUBREG_TO_REG (i32 0),
7081                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
7082
7083// The top bits will be zero from the FMOVWSr
7084def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
7085          (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
7086
7087def : Pat<(v8i16 (vec_ins_or_scal_vec GPR32:$Rn)),
7088          (SUBREG_TO_REG (i32 0),
7089                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
7090def : Pat<(v4i16 (vec_ins_or_scal_vec GPR32:$Rn)),
7091          (SUBREG_TO_REG (i32 0),
7092                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
7093
7094def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
7095          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7096def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
7097          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7098
7099def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
7100          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7101def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
7102          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7103
7104def : Pat<(v2i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
7105            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
7106                                  (i32 FPR32:$Rn), ssub))>;
7107def : Pat<(v4i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
7108            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
7109                                  (i32 FPR32:$Rn), ssub))>;
7110def : Pat<(v2i64 (vec_ins_or_scal_vec (i64 FPR64:$Rn))),
7111            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
7112                                  (i64 FPR64:$Rn), dsub))>;
7113
7114def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
7115          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7116def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
7117          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7118
7119def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
7120          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7121def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
7122          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
7123
7124def : Pat<(v4f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
7125          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
7126def : Pat<(v2f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
7127          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
7128
7129def : Pat<(v2f64 (vec_ins_or_scal_vec (f64 FPR64:$Rn))),
7130          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
7131
7132def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
7133            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
7134          (EXTRACT_SUBREG
7135            (INSvi16lane
7136              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
7137              VectorIndexS:$imm,
7138              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
7139              (i64 0)),
7140            dsub)>;
7141
7142def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
7143          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
7144def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
7145          (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
7146def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
7147          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
7148def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
7149          (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
7150def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
7151          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
7152
7153def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
7154            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
7155          (INSvi16lane
7156            V128:$Rn, VectorIndexH:$imm,
7157            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
7158            (i64 0))>;
7159
7160def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
7161            (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
7162          (EXTRACT_SUBREG
7163            (INSvi16lane
7164              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
7165              VectorIndexS:$imm,
7166              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
7167              (i64 0)),
7168            dsub)>;
7169
7170def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
7171            (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
7172          (INSvi16lane
7173            V128:$Rn, VectorIndexH:$imm,
7174            (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
7175            (i64 0))>;
7176
7177def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
7178            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
7179          (EXTRACT_SUBREG
7180            (INSvi32lane
7181              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
7182              VectorIndexS:$imm,
7183              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
7184              (i64 0)),
7185            dsub)>;
7186def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
7187            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
7188          (INSvi32lane
7189            V128:$Rn, VectorIndexS:$imm,
7190            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
7191            (i64 0))>;
7192def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
7193            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
7194          (INSvi64lane
7195            V128:$Rn, VectorIndexD:$imm,
7196            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
7197            (i64 0))>;
7198
7199def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
7200          (EXTRACT_SUBREG
7201            (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
7202                        VectorIndexS:$imm, GPR32:$Rm),
7203            dsub)>;
7204def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
7205          (EXTRACT_SUBREG
7206            (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
7207                        VectorIndexH:$imm, GPR32:$Rm),
7208            dsub)>;
7209def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
7210          (EXTRACT_SUBREG
7211            (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
7212                       VectorIndexB:$imm, GPR32:$Rm),
7213            dsub)>;
7214
7215def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
7216          (EXTRACT_SUBREG
7217            (INSvi8lane (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
7218                       VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0)),
7219            dsub)>;
7220def : Pat<(v16i8 (vector_insert (v16i8 V128:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
7221          (INSvi8lane V128:$Rn, VectorIndexB:$imm,
7222             (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0))>;
7223
7224// Copy an element at a constant index in one vector into a constant indexed
7225// element of another.
7226// FIXME refactor to a shared class/dev parameterized on vector type, vector
7227// index type and INS extension
7228def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
7229                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
7230                   VectorIndexB:$idx2)),
7231          (v16i8 (INSvi8lane
7232                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
7233          )>;
7234def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
7235                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
7236                   VectorIndexH:$idx2)),
7237          (v8i16 (INSvi16lane
7238                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
7239          )>;
7240def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
7241                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
7242                   VectorIndexS:$idx2)),
7243          (v4i32 (INSvi32lane
7244                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
7245          )>;
7246def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
7247                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
7248                   VectorIndexD:$idx2)),
7249          (v2i64 (INSvi64lane
7250                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
7251          )>;
7252
7253// Move elements between vectors
7254multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE,
7255                                ValueType VTScal, Operand SVEIdxTy, Instruction INS> {
7256  // Extracting from the lowest 128-bits of an SVE vector
7257  def : Pat<(VT128 (vector_insert VT128:$Rn,
7258                      (VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
7259                      (i64 imm:$Immd))),
7260            (INS VT128:$Rn, imm:$Immd, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), SVEIdxTy:$Immn)>;
7261
7262  def : Pat<(VT64 (vector_insert VT64:$Rn,
7263                      (VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
7264                      (i64 imm:$Immd))),
7265            (EXTRACT_SUBREG
7266                (INS (SUBREG_TO_REG (i64 0), VT64:$Rn, dsub), imm:$Immd,
7267                     (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), SVEIdxTy:$Immn),
7268                 dsub)>;
7269  // Extracting from another NEON vector
7270  def : Pat<(VT128 (vector_insert V128:$src,
7271                        (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
7272                        (i64 imm:$Immd))),
7273            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
7274
7275  def : Pat<(VT128 (vector_insert V128:$src,
7276                        (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
7277                        (i64 imm:$Immd))),
7278            (INS V128:$src, imm:$Immd,
7279                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
7280
7281  def : Pat<(VT64 (vector_insert V64:$src,
7282                        (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
7283                        (i64 imm:$Immd))),
7284            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
7285                                 imm:$Immd, V128:$Rn, imm:$Immn),
7286                            dsub)>;
7287
7288  def : Pat<(VT64 (vector_insert V64:$src,
7289                        (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
7290                        (i64 imm:$Immd))),
7291            (EXTRACT_SUBREG
7292                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
7293                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
7294                dsub)>;
7295}
7296
7297defm : Neon_INS_elt_pattern<v8f16,  v4f16,  nxv8f16,  f16,  VectorIndexH, INSvi16lane>;
7298defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane>;
7299defm : Neon_INS_elt_pattern<v4f32,  v2f32,  nxv4f32,  f32,  VectorIndexS, INSvi32lane>;
7300defm : Neon_INS_elt_pattern<v2f64,  v1f64,  nxv2f64,  f64,  VectorIndexD, INSvi64lane>;
7301
7302defm : Neon_INS_elt_pattern<v16i8,  v8i8,   nxv16i8,  i32,  VectorIndexB, INSvi8lane>;
7303defm : Neon_INS_elt_pattern<v8i16,  v4i16,  nxv8i16,  i32,  VectorIndexH, INSvi16lane>;
7304defm : Neon_INS_elt_pattern<v4i32,  v2i32,  nxv4i32,  i32,  VectorIndexS, INSvi32lane>;
7305defm : Neon_INS_elt_pattern<v2i64,  v1i64,  nxv2i64,  i64,  VectorIndexD, INSvi64lane>;
7306
7307// Insert from bitcast
7308// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
7309def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
7310          (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
7311def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
7312          (EXTRACT_SUBREG
7313            (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
7314                         imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
7315            dsub)>;
7316def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
7317          (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
7318
7319// bitcast of an extract
7320// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
7321def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
7322          (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
7323def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
7324          (EXTRACT_SUBREG V128:$src, ssub)>;
7325def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
7326          (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
7327def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
7328          (EXTRACT_SUBREG V128:$src, dsub)>;
7329
7330// Floating point vector extractions are codegen'd as either a sequence of
7331// subregister extractions, or a MOV (aka DUP here) if
7332// the lane number is anything other than zero.
7333def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
7334          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
7335def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
7336          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
7337def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
7338          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
7339def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
7340          (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
7341
7342
7343def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
7344          (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
7345def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
7346          (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
7347def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
7348          (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
7349def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
7350          (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
7351
7352// All concat_vectors operations are canonicalised to act on i64 vectors for
7353// AArch64. In the general case we need an instruction, which had just as well be
7354// INS.
7355multiclass ConcatPat<ValueType DstTy, ValueType SrcTy,
7356                     ComplexPattern ExtractHigh> {
7357  def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
7358            (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
7359                         (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
7360
7361  // If the high lanes are zero we can instead emit a d->d register mov, which
7362  // will implicitly clear the upper bits.
7363  def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), immAllZerosV)),
7364            (SUBREG_TO_REG (i64 0), (FMOVDr V64:$Rn), dsub)>;
7365
7366  // If the high lanes are undef we can just ignore them:
7367  def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
7368            (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
7369
7370  // Concatting the high half of two vectors is the insert of the first
7371  // into the low half of the second.
7372  def : Pat<(DstTy (concat_vectors (ExtractHigh (DstTy V128:$Rn)),
7373                                   (ExtractHigh (DstTy V128:$Rm)))),
7374            (INSvi64lane V128:$Rm, (i64 0), V128:$Rn, (i64 1))>;
7375}
7376
7377defm : ConcatPat<v2i64, v1i64, extract_high_v2i64>;
7378defm : ConcatPat<v2f64, v1f64, extract_high_v2f64>;
7379defm : ConcatPat<v4i32, v2i32, extract_high_v4i32>;
7380defm : ConcatPat<v4f32, v2f32, extract_high_v4f32>;
7381defm : ConcatPat<v8i16, v4i16, extract_high_v8i16>;
7382defm : ConcatPat<v8f16, v4f16, extract_high_v8f16>;
7383defm : ConcatPat<v8bf16, v4bf16, extract_high_v8bf16>;
7384defm : ConcatPat<v16i8, v8i8, extract_high_v16i8>;
7385
7386//----------------------------------------------------------------------------
7387// AdvSIMD across lanes instructions
7388//----------------------------------------------------------------------------
7389
7390defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
7391defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
7392defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
7393defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
7394defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
7395defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
7396defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
7397defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
7398defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
7399defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
7400defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
7401
7402multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
7403  // Patterns for addv(addlp(x)) ==> addlv
7404  def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
7405              (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
7406              (i64 0))), (i64 0))),
7407            (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
7408              (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
7409  def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
7410            (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
7411              (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
7412  def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
7413            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
7414
7415  // Patterns for addp(addlp(x))) ==> addlv
7416  def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
7417            (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
7418  def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
7419            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
7420}
7421
7422defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
7423defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
7424
7425// Pattern is used for GlobalISel
7426multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> {
7427  // Patterns for addv(addlp(x)) ==> addlv
7428  def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))),
7429            (!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>;
7430  def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))),
7431            (!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>;
7432  def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))),
7433            (!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>;
7434
7435  // Patterns for addp(addlp(x))) ==> addlv
7436  def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))),
7437            (!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>;
7438  def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))),
7439            (!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>;
7440}
7441
7442defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>;
7443defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>;
7444
7445def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
7446          (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>;
7447
7448def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
7449          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;
7450
7451def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
7452          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>;
7453
7454multiclass SIMDAcrossLaneLongReductionIntrinsic<string Opc, SDPatternOperator addlv> {
7455  def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))),
7456            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i8v") V64:$Rn), hsub))>;
7457
7458  def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))),
7459            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i16v") V64:$Rn), ssub))>;
7460
7461  def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))),
7462            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v16i8v") V128:$Rn), hsub))>;
7463
7464  def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))),
7465            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i16v") V128:$Rn), ssub))>;
7466
7467  def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))),
7468            (v2i64 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i32v") V128:$Rn), dsub))>;
7469}
7470
7471defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>;
7472defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>;
7473
7474// Patterns for across-vector intrinsics, that have a node equivalent, that
7475// returns a vector (with only the low lane defined) instead of a scalar.
7476// In effect, opNode is the same as (scalar_to_vector (IntNode)).
7477multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
7478                                    SDPatternOperator opNode> {
7479// If a lane instruction caught the vector_extract around opNode, we can
7480// directly match the latter to the instruction.
7481def : Pat<(v8i8 (opNode V64:$Rn)),
7482          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
7483           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
7484def : Pat<(v16i8 (opNode V128:$Rn)),
7485          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7486           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
7487def : Pat<(v4i16 (opNode V64:$Rn)),
7488          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
7489           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
7490def : Pat<(v8i16 (opNode V128:$Rn)),
7491          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
7492           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
7493def : Pat<(v4i32 (opNode V128:$Rn)),
7494          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
7495           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
7496
7497
7498// If none did, fallback to the explicit patterns, consuming the vector_extract.
7499def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
7500            (i64 0)), (i64 0))),
7501          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
7502            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
7503            bsub), ssub)>;
7504def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
7505          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7506            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
7507            bsub), ssub)>;
7508def : Pat<(i32 (vector_extract (insert_subvector undef,
7509            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
7510          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
7511            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
7512            hsub), ssub)>;
7513def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
7514          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
7515            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
7516            hsub), ssub)>;
7517def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
7518          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
7519            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
7520            ssub), ssub)>;
7521
7522}
7523
7524multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
7525                                          SDPatternOperator opNode>
7526    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
7527// If there is a sign extension after this intrinsic, consume it as smov already
7528// performed it
7529def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
7530            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
7531          (i32 (SMOVvi8to32
7532            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7533              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
7534            (i64 0)))>;
7535def : Pat<(i32 (sext_inreg (i32 (vector_extract
7536            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
7537          (i32 (SMOVvi8to32
7538            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7539             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
7540            (i64 0)))>;
7541def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
7542            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
7543          (i32 (SMOVvi16to32
7544           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7545            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
7546           (i64 0)))>;
7547def : Pat<(i32 (sext_inreg (i32 (vector_extract
7548            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
7549          (i32 (SMOVvi16to32
7550            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7551             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
7552            (i64 0)))>;
7553}
7554
7555multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
7556                                            SDPatternOperator opNode>
7557    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
7558// If there is a masking operation keeping only what has been actually
7559// generated, consume it.
7560def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
7561            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
7562      (i32 (EXTRACT_SUBREG
7563        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7564          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
7565        ssub))>;
7566def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
7567            maski8_or_more)),
7568        (i32 (EXTRACT_SUBREG
7569          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7570            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
7571          ssub))>;
7572def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
7573            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
7574          (i32 (EXTRACT_SUBREG
7575            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7576              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
7577            ssub))>;
7578def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
7579            maski16_or_more)),
7580        (i32 (EXTRACT_SUBREG
7581          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7582            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
7583          ssub))>;
7584}
7585
7586// For vecreduce_add, used by GlobalISel not SDAG
7587def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
7588          (i8 (ADDVv8i8v V64:$Rn))>;
7589def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
7590          (i8 (ADDVv16i8v V128:$Rn))>;
7591def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
7592          (i16 (ADDVv4i16v V64:$Rn))>;
7593def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
7594          (i16 (ADDVv8i16v V128:$Rn))>;
7595def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
7596          (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7597def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
7598          (i32 (ADDVv4i32v V128:$Rn))>;
7599def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
7600          (i64 (ADDPv2i64p V128:$Rn))>;
7601
7602defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
7603// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7604def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
7605          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
7606
7607defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
7608// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7609def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
7610          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
7611
7612defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
7613def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
7614          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
7615
7616defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
7617def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
7618          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
7619
7620defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
7621def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
7622          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
7623
7624defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
7625def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
7626          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
7627
7628// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment
7629// because GlobalISel allows us to specify the return register to be a FPR
7630multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc,
7631                                               SDPatternOperator opNode> {
7632def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))),
7633          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>;
7634
7635def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))),
7636          (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>;
7637
7638def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))),
7639          (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>;
7640
7641def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))),
7642          (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>;
7643
7644def : Pat<(i32 (opNode (v4i32 V128:$Rn))),
7645          (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>;
7646}
7647
7648// For v2i32 source type, the pairwise instruction can be used instead
7649defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>;
7650def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))),
7651          (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7652
7653defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>;
7654def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))),
7655          (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7656
7657defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>;
7658def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))),
7659          (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7660
7661defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>;
7662def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))),
7663          (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7664
7665// The SADDLV v2i32 gets mapped to SADDLP.
7666def : Pat<(v2i64 (AArch64saddlv (v2i32 V64:$Rn))),
7667          (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (SADDLPv2i32_v1i64 V64:$Rn), dsub))>;
7668// The UADDLV v2i32 gets mapped to UADDLP.
7669def : Pat<(v2i64 (AArch64uaddlv (v2i32 V64:$Rn))),
7670          (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLPv2i32_v1i64 V64:$Rn), dsub))>;
7671
7672//------------------------------------------------------------------------------
7673// AdvSIMD modified immediate instructions
7674//------------------------------------------------------------------------------
7675
7676// AdvSIMD BIC
7677defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
7678// AdvSIMD ORR
7679defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
7680
7681let Predicates = [HasNEON] in {
7682def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7683def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7684def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7685def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7686
7687def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7688def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7689def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7690def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7691
7692def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7693def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7694def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7695def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7696
7697def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7698def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7699def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7700def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7701}
7702
7703// AdvSIMD FMOV
7704def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
7705                                              "fmov", ".2d",
7706                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7707def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
7708                                              "fmov", ".2s",
7709                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7710def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
7711                                              "fmov", ".4s",
7712                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7713let Predicates = [HasNEON, HasFullFP16] in {
7714def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
7715                                              "fmov", ".4h",
7716                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7717def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
7718                                              "fmov", ".8h",
7719                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7720} // Predicates = [HasNEON, HasFullFP16]
7721
7722// AdvSIMD MOVI
7723
7724// EDIT byte mask: scalar
7725let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7726def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
7727                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
7728// The movi_edit node has the immediate value already encoded, so we use
7729// a plain imm0_255 here.
7730def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
7731          (MOVID imm0_255:$shift)>;
7732
7733// EDIT byte mask: 2d
7734
7735// The movi_edit node has the immediate value already encoded, so we use
7736// a plain imm0_255 in the pattern
7737let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7738def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
7739                                                simdimmtype10,
7740                                                "movi", ".2d",
7741                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
7742
7743def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7744def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7745def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7746def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7747def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7748def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7749def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7750def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7751
7752def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7753def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7754def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7755def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7756
7757// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7758// extract is free and this gives better MachineCSE results.
7759def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7760def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7761def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7762def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7763def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
7764def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
7765def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
7766def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
7767
7768def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7769def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7770def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7771def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7772
7773// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7774let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7775defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
7776
7777let Predicates = [HasNEON] in {
7778  // Using the MOVI to materialize fp constants.
7779  def : Pat<(f32 fpimm32SIMDModImmType4:$in),
7780            (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
7781                                       (i32 24)),
7782                            ssub)>;
7783}
7784
7785let Predicates = [HasNEON] in {
7786def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7787def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7788def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7789def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7790
7791def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7792def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7793def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7794def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7795}
7796
7797def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7798          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
7799def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7800          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
7801def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7802          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
7803def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7804          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
7805
7806let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7807// EDIT per word: 2s & 4s with MSL shifter
7808def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
7809                      [(set (v2i32 V64:$Rd),
7810                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7811def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
7812                      [(set (v4i32 V128:$Rd),
7813                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7814
7815// Per byte: 8b & 16b
7816def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
7817                                                 "movi", ".8b",
7818                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
7819
7820def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
7821                                                 "movi", ".16b",
7822                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
7823}
7824
7825// AdvSIMD MVNI
7826
7827// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7828let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7829defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
7830
7831let Predicates = [HasNEON] in {
7832def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7833def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7834def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7835def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7836
7837def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7838def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7839def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7840def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7841}
7842
7843def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7844          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
7845def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7846          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
7847def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7848          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
7849def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7850          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
7851
7852// EDIT per word: 2s & 4s with MSL shifter
7853let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7854def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
7855                      [(set (v2i32 V64:$Rd),
7856                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7857def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
7858                      [(set (v4i32 V128:$Rd),
7859                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7860}
7861
7862//----------------------------------------------------------------------------
7863// AdvSIMD indexed element
7864//----------------------------------------------------------------------------
7865
7866let hasSideEffects = 0 in {
7867  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
7868  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
7869}
7870
7871// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
7872// instruction expects the addend first, while the intrinsic expects it last.
7873
7874// On the other hand, there are quite a few valid combinatorial options due to
7875// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7876defm : SIMDFPIndexedTiedPatterns<"FMLA",
7877           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
7878defm : SIMDFPIndexedTiedPatterns<"FMLA",
7879           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
7880
7881defm : SIMDFPIndexedTiedPatterns<"FMLS",
7882           TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
7883defm : SIMDFPIndexedTiedPatterns<"FMLS",
7884           TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
7885defm : SIMDFPIndexedTiedPatterns<"FMLS",
7886           TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
7887defm : SIMDFPIndexedTiedPatterns<"FMLS",
7888           TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
7889
7890multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
7891  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7892  // and DUP scalar.
7893  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7894                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7895                                           VectorIndexS:$idx))),
7896            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7897  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7898                           (v2f32 (AArch64duplane32
7899                                      (v4f32 (insert_subvector undef,
7900                                                 (v2f32 (fneg V64:$Rm)),
7901                                                 (i64 0))),
7902                                      VectorIndexS:$idx)))),
7903            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7904                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7905                               VectorIndexS:$idx)>;
7906  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7907                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7908            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7909                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7910
7911  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7912  // and DUP scalar.
7913  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7914                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7915                                           VectorIndexS:$idx))),
7916            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
7917                               VectorIndexS:$idx)>;
7918  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7919                           (v4f32 (AArch64duplane32
7920                                      (v4f32 (insert_subvector undef,
7921                                                 (v2f32 (fneg V64:$Rm)),
7922                                                 (i64 0))),
7923                                      VectorIndexS:$idx)))),
7924            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7925                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7926                               VectorIndexS:$idx)>;
7927  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7928                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7929            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7930                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7931
7932  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7933  // (DUPLANE from 64-bit would be trivial).
7934  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7935                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
7936                                           VectorIndexD:$idx))),
7937            (FMLSv2i64_indexed
7938                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7939  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7940                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
7941            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
7942                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
7943
7944  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7945  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7946                         (vector_extract (v4f32 (fneg V128:$Rm)),
7947                                         VectorIndexS:$idx))),
7948            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7949                V128:$Rm, VectorIndexS:$idx)>;
7950  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7951                         (vector_extract (v4f32 (insert_subvector undef,
7952                                                    (v2f32 (fneg V64:$Rm)),
7953                                                    (i64 0))),
7954                                         VectorIndexS:$idx))),
7955            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7956                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
7957
7958  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7959  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
7960                         (vector_extract (v2f64 (fneg V128:$Rm)),
7961                                         VectorIndexS:$idx))),
7962            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
7963                V128:$Rm, VectorIndexS:$idx)>;
7964}
7965
7966defm : FMLSIndexedAfterNegPatterns<
7967           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
7968defm : FMLSIndexedAfterNegPatterns<
7969           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
7970
7971defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
7972defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
7973
7974def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7975          (FMULv2i32_indexed V64:$Rn,
7976            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7977            (i64 0))>;
7978def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7979          (FMULv4i32_indexed V128:$Rn,
7980            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7981            (i64 0))>;
7982def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
7983          (FMULv2i64_indexed V128:$Rn,
7984            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
7985            (i64 0))>;
7986
7987defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
7988defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
7989
7990defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
7991                                     int_aarch64_neon_sqdmulh_laneq>;
7992defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
7993                                      int_aarch64_neon_sqrdmulh_laneq>;
7994
7995// Generated by MachineCombine
7996defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
7997defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
7998
7999defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
8000defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
8001    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
8002defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
8003    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
8004defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
8005defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
8006                                           int_aarch64_neon_sqadd>;
8007defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
8008                                           int_aarch64_neon_sqsub>;
8009defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
8010                                          int_aarch64_neon_sqrdmlah>;
8011defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
8012                                          int_aarch64_neon_sqrdmlsh>;
8013defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
8014defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
8015    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
8016defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
8017    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
8018defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
8019
8020// A scalar sqdmull with the second operand being a vector lane can be
8021// handled directly with the indexed instruction encoding.
8022def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
8023                                          (vector_extract (v4i32 V128:$Vm),
8024                                                           VectorIndexS:$idx)),
8025          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
8026
8027//----------------------------------------------------------------------------
8028// AdvSIMD scalar shift instructions
8029//----------------------------------------------------------------------------
8030defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
8031defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
8032defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
8033defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
8034// Codegen patterns for the above. We don't put these directly on the
8035// instructions because TableGen's type inference can't handle the truth.
8036// Having the same base pattern for fp <--> int totally freaks it out.
8037def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
8038          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
8039def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
8040          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
8041def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
8042          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
8043def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
8044          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
8045def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
8046                                            vecshiftR64:$imm)),
8047          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
8048def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
8049                                            vecshiftR64:$imm)),
8050          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
8051def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
8052          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
8053def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
8054          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
8055def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
8056                                            vecshiftR64:$imm)),
8057          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
8058def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
8059          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
8060def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
8061                                            vecshiftR64:$imm)),
8062          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
8063def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
8064          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
8065
8066// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
8067
8068def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
8069          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
8070def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
8071          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
8072def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
8073          (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
8074def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
8075            (and FPR32:$Rn, (i32 65535)),
8076            vecshiftR16:$imm)),
8077          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
8078def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
8079          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
8080def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
8081          (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
8082def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
8083          (i32 (INSERT_SUBREG
8084            (i32 (IMPLICIT_DEF)),
8085            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
8086            hsub))>;
8087def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
8088          (i64 (INSERT_SUBREG
8089            (i64 (IMPLICIT_DEF)),
8090            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
8091            hsub))>;
8092def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
8093          (i32 (INSERT_SUBREG
8094            (i32 (IMPLICIT_DEF)),
8095            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
8096            hsub))>;
8097def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
8098          (i64 (INSERT_SUBREG
8099            (i64 (IMPLICIT_DEF)),
8100            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
8101            hsub))>;
8102def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
8103          (i32 (INSERT_SUBREG
8104            (i32 (IMPLICIT_DEF)),
8105            (FACGE16 FPR16:$Rn, FPR16:$Rm),
8106            hsub))>;
8107def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
8108          (i32 (INSERT_SUBREG
8109            (i32 (IMPLICIT_DEF)),
8110            (FACGT16 FPR16:$Rn, FPR16:$Rm),
8111            hsub))>;
8112
8113defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
8114defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
8115defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
8116                                     int_aarch64_neon_sqrshrn>;
8117defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
8118                                     int_aarch64_neon_sqrshrun>;
8119defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
8120defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
8121defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
8122                                     int_aarch64_neon_sqshrn>;
8123defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
8124                                     int_aarch64_neon_sqshrun>;
8125defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
8126defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
8127defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
8128    TriOpFrag<(add node:$LHS,
8129                   (AArch64srshri node:$MHS, node:$RHS))>>;
8130defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
8131defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
8132    TriOpFrag<(add_and_or_is_add node:$LHS,
8133                   (AArch64vashr node:$MHS, node:$RHS))>>;
8134defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
8135                                     int_aarch64_neon_uqrshrn>;
8136defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
8137defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
8138                                     int_aarch64_neon_uqshrn>;
8139defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
8140defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
8141    TriOpFrag<(add node:$LHS,
8142                   (AArch64urshri node:$MHS, node:$RHS))>>;
8143defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
8144defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
8145    TriOpFrag<(add_and_or_is_add node:$LHS,
8146                   (AArch64vlshr node:$MHS, node:$RHS))>>;
8147
8148//----------------------------------------------------------------------------
8149// AdvSIMD vector shift instructions
8150//----------------------------------------------------------------------------
8151defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
8152defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
8153defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
8154                                   int_aarch64_neon_vcvtfxs2fp>;
8155defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
8156defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
8157
8158let Predicates = [HasNEON] in {
8159def : Pat<(v2f32 (sint_to_fp (v2i32 (AArch64vashr_exact v2i32:$Vn, i32:$shift)))),
8160          (SCVTFv2i32_shift $Vn, vecshiftR32:$shift)>;
8161
8162def : Pat<(v4f32 (sint_to_fp (v4i32 (AArch64vashr_exact v4i32:$Vn, i32:$shift)))),
8163          (SCVTFv4i32_shift $Vn, vecshiftR32:$shift)>;
8164
8165def : Pat<(v2f64 (sint_to_fp (v2i64 (AArch64vashr_exact v2i64:$Vn, i32:$shift)))),
8166          (SCVTFv2i64_shift $Vn, vecshiftR64:$shift)>;
8167}
8168
8169let Predicates = [HasNEON, HasFullFP16] in {
8170def : Pat<(v4f16 (sint_to_fp (v4i16 (AArch64vashr_exact v4i16:$Vn, i32:$shift)))),
8171          (SCVTFv4i16_shift $Vn, vecshiftR16:$shift)>;
8172
8173def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))),
8174          (SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>;
8175}
8176
8177// X << 1 ==> X + X
8178class SHLToADDPat<ValueType ty, RegisterClass regtype>
8179  : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
8180            (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
8181
8182def : SHLToADDPat<v16i8, FPR128>;
8183def : SHLToADDPat<v8i16, FPR128>;
8184def : SHLToADDPat<v4i32, FPR128>;
8185def : SHLToADDPat<v2i64, FPR128>;
8186def : SHLToADDPat<v8i8,  FPR64>;
8187def : SHLToADDPat<v4i16, FPR64>;
8188def : SHLToADDPat<v2i32, FPR64>;
8189
8190defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
8191                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
8192defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
8193def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
8194                                      (i32 vecshiftL64:$imm))),
8195          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
8196defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
8197                  BinOpFrag<(truncssat_s (AArch64srshri node:$LHS, node:$RHS))>>;
8198defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
8199                  BinOpFrag<(truncssat_u (AArch64srshri node:$LHS, node:$RHS))>>;
8200defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
8201defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
8202defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
8203                  BinOpFrag<(truncssat_s (AArch64vashr node:$LHS, node:$RHS))>>;
8204defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
8205                  BinOpFrag<(truncssat_u (AArch64vashr node:$LHS, node:$RHS))>>;
8206defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
8207def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
8208                                      (i32 vecshiftR64:$imm))),
8209          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
8210defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
8211defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
8212                 TriOpFrag<(add node:$LHS,
8213                                (AArch64srshri node:$MHS, node:$RHS))> >;
8214defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
8215                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
8216
8217defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
8218defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
8219                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
8220defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
8221                        int_aarch64_neon_vcvtfxu2fp>;
8222defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
8223                  BinOpFrag<(truncusat_u (AArch64urshri node:$LHS, node:$RHS))>>;
8224defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
8225defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
8226                  BinOpFrag<(truncusat_u (AArch64vlshr node:$LHS, node:$RHS))>>;
8227defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
8228defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
8229                TriOpFrag<(add node:$LHS,
8230                               (AArch64urshri node:$MHS, node:$RHS))> >;
8231defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
8232                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
8233defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
8234defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
8235                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
8236
8237def VImm0080:         PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>;
8238def VImm00008000:     PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>;
8239def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>;
8240
8241// RADDHN patterns for when RSHRN shifts by half the size of the vector element
8242def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))),
8243          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
8244def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))),
8245          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
8246let AddedComplexity = 5 in
8247def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))),
8248          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
8249def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
8250          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
8251def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
8252          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
8253def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
8254          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
8255
8256// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
8257def : Pat<(v16i8 (concat_vectors
8258                 (v8i8 V64:$Vd),
8259                 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))),
8260          (RADDHNv8i16_v16i8
8261                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8262                 (v8i16 (MOVIv2d_ns (i32 0))))>;
8263def : Pat<(v8i16 (concat_vectors
8264                 (v4i16 V64:$Vd),
8265                 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))))),
8266          (RADDHNv4i32_v8i16
8267                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8268                 (v4i32 (MOVIv2d_ns (i32 0))))>;
8269let AddedComplexity = 5 in
8270def : Pat<(v4i32 (concat_vectors
8271                 (v2i32 V64:$Vd),
8272                 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))),
8273          (RADDHNv2i64_v4i32
8274                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8275                 (v2i64 (MOVIv2d_ns (i32 0))))>;
8276def : Pat<(v16i8 (concat_vectors
8277                 (v8i8 V64:$Vd),
8278                 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
8279          (RADDHNv8i16_v16i8
8280                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8281                 (v8i16 (MOVIv2d_ns (i32 0))))>;
8282def : Pat<(v8i16 (concat_vectors
8283                 (v4i16 V64:$Vd),
8284                 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
8285          (RADDHNv4i32_v8i16
8286                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8287                 (v4i32 (MOVIv2d_ns (i32 0))))>;
8288def : Pat<(v4i32 (concat_vectors
8289                 (v2i32 V64:$Vd),
8290                 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
8291          (RADDHNv2i64_v4i32
8292                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8293                 (v2i64 (MOVIv2d_ns (i32 0))))>;
8294
8295// SHRN patterns for when a logical right shift was used instead of arithmetic
8296// (the immediate guarantees no sign bits actually end up in the result so it
8297// doesn't matter).
8298def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
8299          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
8300def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
8301          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
8302def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
8303          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
8304
8305def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
8306                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
8307                                                    vecshiftR16Narrow:$imm)))),
8308          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
8309                           V128:$Rn, vecshiftR16Narrow:$imm)>;
8310def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
8311                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
8312                                                    vecshiftR32Narrow:$imm)))),
8313          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
8314                           V128:$Rn, vecshiftR32Narrow:$imm)>;
8315def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
8316                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
8317                                                    vecshiftR64Narrow:$imm)))),
8318          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
8319                           V128:$Rn, vecshiftR32Narrow:$imm)>;
8320
8321def : Pat<(shl (v8i16 (zext (v8i8 V64:$Rm))), (v8i16 (AArch64dup (i32 imm32_0_7:$size)))),
8322          (USHLLv8i8_shift V64:$Rm, (i32 imm32_0_7:$size))>;
8323def : Pat<(shl (v4i32 (zext (v4i16 V64:$Rm))), (v4i32 (AArch64dup (i32 imm32_0_15:$size)))),
8324          (USHLLv4i16_shift V64:$Rm, (i32 imm32_0_15:$size))>;
8325def : Pat<(shl (v2i64 (zext (v2i32 V64:$Rm))), (v2i64 (AArch64dup (i64 imm0_31:$size)))),
8326          (USHLLv2i32_shift V64:$Rm, (trunc_imm imm0_31:$size))>;
8327
8328def : Pat<(shl (v8i16 (sext (v8i8 V64:$Rm))), (v8i16 (AArch64dup (i32 imm32_0_7:$size)))),
8329          (SSHLLv8i8_shift V64:$Rm, (i32 imm32_0_7:$size))>;
8330def : Pat<(shl (v4i32 (sext (v4i16 V64:$Rm))), (v4i32 (AArch64dup (i32 imm32_0_15:$size)))),
8331          (SSHLLv4i16_shift V64:$Rm, (i32 imm32_0_15:$size))>;
8332def : Pat<(shl (v2i64 (sext (v2i32 V64:$Rm))), (v2i64 (AArch64dup (i64 imm0_31:$size)))),
8333          (SSHLLv2i32_shift V64:$Rm, (trunc_imm imm0_31:$size))>;
8334
8335// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
8336// Anyexts are implemented as zexts.
8337def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
8338def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
8339def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
8340def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
8341def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
8342def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
8343def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
8344def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
8345def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
8346// Also match an extend from the upper half of a 128 bit source register.
8347def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
8348          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
8349def : Pat<(v8i16 (zext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
8350          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
8351def : Pat<(v8i16 (sext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
8352          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
8353def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
8354          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
8355def : Pat<(v4i32 (zext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
8356          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
8357def : Pat<(v4i32 (sext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
8358          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
8359def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
8360          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
8361def : Pat<(v2i64 (zext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
8362          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
8363def : Pat<(v2i64 (sext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
8364          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
8365
8366let Predicates = [HasNEON] in {
8367// Vector shift sxtl aliases
8368def : InstAlias<"sxtl.8h $dst, $src1",
8369                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8370def : InstAlias<"sxtl $dst.8h, $src1.8b",
8371                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8372def : InstAlias<"sxtl.4s $dst, $src1",
8373                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8374def : InstAlias<"sxtl $dst.4s, $src1.4h",
8375                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8376def : InstAlias<"sxtl.2d $dst, $src1",
8377                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8378def : InstAlias<"sxtl $dst.2d, $src1.2s",
8379                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8380
8381// Vector shift sxtl2 aliases
8382def : InstAlias<"sxtl2.8h $dst, $src1",
8383                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8384def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
8385                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8386def : InstAlias<"sxtl2.4s $dst, $src1",
8387                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8388def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
8389                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8390def : InstAlias<"sxtl2.2d $dst, $src1",
8391                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8392def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
8393                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8394
8395// Vector shift uxtl aliases
8396def : InstAlias<"uxtl.8h $dst, $src1",
8397                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8398def : InstAlias<"uxtl $dst.8h, $src1.8b",
8399                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8400def : InstAlias<"uxtl.4s $dst, $src1",
8401                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8402def : InstAlias<"uxtl $dst.4s, $src1.4h",
8403                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8404def : InstAlias<"uxtl.2d $dst, $src1",
8405                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8406def : InstAlias<"uxtl $dst.2d, $src1.2s",
8407                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8408
8409// Vector shift uxtl2 aliases
8410def : InstAlias<"uxtl2.8h $dst, $src1",
8411                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8412def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
8413                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8414def : InstAlias<"uxtl2.4s $dst, $src1",
8415                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8416def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
8417                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8418def : InstAlias<"uxtl2.2d $dst, $src1",
8419                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8420def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
8421                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8422}
8423
8424def abs_f16 :
8425  OutPatFrag<(ops node:$Rn),
8426             (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
8427	         (i32 (ANDWri
8428		   (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
8429		                           node:$Rn, hsub), GPR32)),
8430	           (i32 (logical_imm32_XFORM(i32 0x7fff))))),
8431	         FPR32)), hsub)>;
8432
8433def : Pat<(f16 (fabs (f16 FPR16:$Rn))), (f16 (abs_f16 (f16 FPR16:$Rn)))>;
8434def : Pat<(bf16 (fabs (bf16 FPR16:$Rn))), (bf16 (abs_f16 (bf16 FPR16:$Rn)))>;
8435
8436def neg_f16 :
8437  OutPatFrag<(ops node:$Rn),
8438             (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
8439	         (i32 (EORWri
8440		   (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
8441		                           node:$Rn, hsub), GPR32)),
8442	           (i32 (logical_imm32_XFORM(i32 0x8000))))),
8443	         FPR32)), hsub)>;
8444
8445def : Pat<(f16 (fneg (f16 FPR16:$Rn))), (f16 (neg_f16 (f16 FPR16:$Rn)))>;
8446def : Pat<(bf16 (fneg (bf16 FPR16:$Rn))), (bf16 (neg_f16 (bf16 FPR16:$Rn)))>;
8447
8448let Predicates = [HasNEON] in {
8449def : Pat<(v4f16 (fabs (v4f16 V64:$Rn))), (v4f16 (BICv4i16 (v4f16 V64:$Rn), (i32 128), (i32 8)))>;
8450def : Pat<(v4bf16 (fabs (v4bf16 V64:$Rn))), (v4bf16 (BICv4i16 (v4bf16 V64:$Rn), (i32 128), (i32 8)))>;
8451def : Pat<(v8f16 (fabs (v8f16 V128:$Rn))), (v8f16 (BICv8i16 (v8f16 V128:$Rn), (i32 128), (i32 8)))>;
8452def : Pat<(v8bf16 (fabs (v8bf16 V128:$Rn))), (v8bf16 (BICv8i16 (v8bf16 V128:$Rn), (i32 128), (i32 8)))>;
8453
8454def : Pat<(v4f16 (fneg (v4f16 V64:$Rn))), (v4f16 (EORv8i8 (v4f16 V64:$Rn), (MOVIv4i16 (i32 128), (i32 8))))>;
8455def : Pat<(v4bf16 (fneg (v4bf16 V64:$Rn))), (v4bf16 (EORv8i8 (v4bf16 V64:$Rn), (v4i16 (MOVIv4i16 (i32 0x80), (i32 8)))))>;
8456def : Pat<(v8f16 (fneg (v8f16 V128:$Rn))), (v8f16 (EORv16i8 (v8f16 V128:$Rn), (MOVIv8i16 (i32 128), (i32 8))))>;
8457def : Pat<(v8bf16 (fneg (v8bf16 V128:$Rn))), (v8bf16 (EORv16i8 (v8bf16 V128:$Rn), (v8i16 (MOVIv8i16 (i32 0x80), (i32 8)))))>;
8458}
8459
8460// If an integer is about to be converted to a floating point value,
8461// just load it on the floating point unit.
8462// These patterns are more complex because floating point loads do not
8463// support sign extension.
8464// The sign extension has to be explicitly added and is only supported for
8465// one step: byte-to-half, half-to-word, word-to-doubleword.
8466// SCVTF GPR -> FPR is 9 cycles.
8467// SCVTF FPR -> FPR is 4 cyclces.
8468// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
8469// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
8470// and still being faster.
8471// However, this is not good for code size.
8472// 8-bits -> float. 2 sizes step-up.
8473class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
8474  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
8475        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
8476                            (SSHLLv4i16_shift
8477                              (f64
8478                                (EXTRACT_SUBREG
8479                                  (SSHLLv8i8_shift
8480                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8481                                        INST,
8482                                        bsub),
8483                                    0),
8484                                  dsub)),
8485                               0),
8486                             ssub)))>,
8487    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8488
8489def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
8490                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
8491def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
8492                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
8493def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
8494                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
8495def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
8496                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
8497
8498// 16-bits -> float. 1 size step-up.
8499class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
8500  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
8501        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
8502                            (SSHLLv4i16_shift
8503                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8504                                  INST,
8505                                  hsub),
8506                                0),
8507                            ssub)))>,
8508    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8509
8510def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
8511                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
8512def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
8513                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
8514def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
8515                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
8516def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
8517                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
8518
8519// 32-bits to 32-bits are handled in target specific dag combine:
8520// performIntToFpCombine.
8521// 64-bits integer to 32-bits floating point, not possible with
8522// SCVTF on floating point registers (both source and destination
8523// must have the same size).
8524
8525// Here are the patterns for 8, 16, 32, and 64-bits to double.
8526// 8-bits -> double. 3 size step-up: give up.
8527// 16-bits -> double. 2 size step.
8528class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
8529  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
8530           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
8531                              (SSHLLv2i32_shift
8532                                 (f64
8533                                  (EXTRACT_SUBREG
8534                                    (SSHLLv4i16_shift
8535                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8536                                        INST,
8537                                        hsub),
8538                                     0),
8539                                   dsub)),
8540                               0),
8541                             dsub)))>,
8542    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8543
8544def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
8545                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
8546def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
8547                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
8548def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
8549                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
8550def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
8551                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
8552// 32-bits -> double. 1 size step-up.
8553class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
8554  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
8555           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
8556                              (SSHLLv2i32_shift
8557                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8558                                  INST,
8559                                  ssub),
8560                               0),
8561                             dsub)))>,
8562    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8563
8564def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
8565                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
8566def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
8567                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
8568def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
8569                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
8570def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
8571                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
8572
8573// 64-bits -> double are handled in target specific dag combine:
8574// performIntToFpCombine.
8575
8576
8577//----------------------------------------------------------------------------
8578// AdvSIMD Load-Store Structure
8579//----------------------------------------------------------------------------
8580defm LD1 : SIMDLd1Multiple<"ld1">;
8581defm LD2 : SIMDLd2Multiple<"ld2">;
8582defm LD3 : SIMDLd3Multiple<"ld3">;
8583defm LD4 : SIMDLd4Multiple<"ld4">;
8584
8585defm ST1 : SIMDSt1Multiple<"st1">;
8586defm ST2 : SIMDSt2Multiple<"st2">;
8587defm ST3 : SIMDSt3Multiple<"st3">;
8588defm ST4 : SIMDSt4Multiple<"st4">;
8589
8590class Ld1Pat<ValueType ty, Instruction INST>
8591  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
8592
8593def : Ld1Pat<v16i8, LD1Onev16b>;
8594def : Ld1Pat<v8i16, LD1Onev8h>;
8595def : Ld1Pat<v4i32, LD1Onev4s>;
8596def : Ld1Pat<v2i64, LD1Onev2d>;
8597def : Ld1Pat<v8i8,  LD1Onev8b>;
8598def : Ld1Pat<v4i16, LD1Onev4h>;
8599def : Ld1Pat<v2i32, LD1Onev2s>;
8600def : Ld1Pat<v1i64, LD1Onev1d>;
8601
8602class St1Pat<ValueType ty, Instruction INST>
8603  : Pat<(store ty:$Vt, GPR64sp:$Rn),
8604        (INST ty:$Vt, GPR64sp:$Rn)>;
8605
8606def : St1Pat<v16i8, ST1Onev16b>;
8607def : St1Pat<v8i16, ST1Onev8h>;
8608def : St1Pat<v4i32, ST1Onev4s>;
8609def : St1Pat<v2i64, ST1Onev2d>;
8610def : St1Pat<v8i8,  ST1Onev8b>;
8611def : St1Pat<v4i16, ST1Onev4h>;
8612def : St1Pat<v2i32, ST1Onev2s>;
8613def : St1Pat<v1i64, ST1Onev1d>;
8614
8615//---
8616// Single-element
8617//---
8618
8619defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
8620defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
8621defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
8622defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
8623let mayLoad = 1, hasSideEffects = 0 in {
8624defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
8625defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
8626defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
8627defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
8628defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
8629defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
8630defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
8631defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
8632defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
8633defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
8634defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
8635defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
8636defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
8637defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
8638defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
8639defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
8640}
8641
8642def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
8643          (LD1Rv8b GPR64sp:$Rn)>;
8644def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
8645          (LD1Rv16b GPR64sp:$Rn)>;
8646def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
8647          (LD1Rv4h GPR64sp:$Rn)>;
8648def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
8649          (LD1Rv8h GPR64sp:$Rn)>;
8650def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
8651          (LD1Rv2s GPR64sp:$Rn)>;
8652def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
8653          (LD1Rv4s GPR64sp:$Rn)>;
8654def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
8655          (LD1Rv2d GPR64sp:$Rn)>;
8656def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
8657          (LD1Rv1d GPR64sp:$Rn)>;
8658
8659def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8660          (LD1Rv8b GPR64sp:$Rn)>;
8661def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
8662          (LD1Rv16b GPR64sp:$Rn)>;
8663def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8664          (LD1Rv4h GPR64sp:$Rn)>;
8665def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
8666          (LD1Rv8h GPR64sp:$Rn)>;
8667def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8668          (LD1Rv2s GPR64sp:$Rn)>;
8669def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
8670          (LD1Rv4s GPR64sp:$Rn)>;
8671def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
8672          (LD1Rv2d GPR64sp:$Rn)>;
8673
8674// Grab the floating point version too
8675def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
8676          (LD1Rv2s GPR64sp:$Rn)>;
8677def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
8678          (LD1Rv4s GPR64sp:$Rn)>;
8679def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
8680          (LD1Rv2d GPR64sp:$Rn)>;
8681def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
8682          (LD1Rv1d GPR64sp:$Rn)>;
8683def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
8684          (LD1Rv4h GPR64sp:$Rn)>;
8685def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
8686          (LD1Rv8h GPR64sp:$Rn)>;
8687def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
8688          (LD1Rv4h GPR64sp:$Rn)>;
8689def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
8690          (LD1Rv8h GPR64sp:$Rn)>;
8691
8692class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
8693                    ValueType VTy, ValueType STy, Instruction LD1>
8694  : Pat<(vector_insert (VTy VecListOne128:$Rd),
8695           (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8696        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
8697
8698def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
8699def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
8700def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
8701def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
8702def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
8703def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
8704def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
8705def : Ld1Lane128Pat<load,       VectorIndexH, v8bf16, bf16, LD1i16>;
8706
8707// Generate LD1 for extload if memory type does not match the
8708// destination type, for example:
8709//
8710//   (v4i32 (insert_vector_elt (load anyext from i8) idx))
8711//
8712// In this case, the index must be adjusted to match LD1 type.
8713//
8714class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
8715                         VecIndex, ValueType VTy, ValueType STy,
8716                         Instruction LD1, SDNodeXForm IdxOp>
8717  : Pat<(vector_insert (VTy VecListOne128:$Rd),
8718                       (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8719        (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
8720
8721class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
8722                        ValueType VTy, ValueType STy, Instruction LD1,
8723                        SDNodeXForm IdxOp>
8724  : Pat<(vector_insert (VTy VecListOne64:$Rd),
8725                       (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8726        (EXTRACT_SUBREG
8727            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8728                (IdxOp VecIndex:$idx), GPR64sp:$Rn),
8729            dsub)>;
8730
8731def VectorIndexStoH : SDNodeXForm<imm, [{
8732  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8733}]>;
8734def VectorIndexStoB : SDNodeXForm<imm, [{
8735  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
8736}]>;
8737def VectorIndexHtoB : SDNodeXForm<imm, [{
8738  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8739}]>;
8740
8741def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
8742def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
8743def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
8744
8745def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
8746def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
8747def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
8748
8749// Same as above, but the first element is populated using
8750// scalar_to_vector + insert_subvector instead of insert_vector_elt.
8751let Predicates = [HasNEON] in {
8752  class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
8753                          SDPatternOperator ExtLoad, Instruction LD1>
8754    : Pat<(ResultTy (vec_ins_or_scal_vec (i32 (ExtLoad GPR64sp:$Rn)))),
8755            (ResultTy (EXTRACT_SUBREG
8756              (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
8757
8758  def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
8759  def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
8760  def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
8761}
8762class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
8763                   ValueType VTy, ValueType STy, Instruction LD1>
8764  : Pat<(vector_insert (VTy VecListOne64:$Rd),
8765           (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8766        (EXTRACT_SUBREG
8767            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8768                          VecIndex:$idx, GPR64sp:$Rn),
8769            dsub)>;
8770
8771def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
8772def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
8773def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
8774def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
8775def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
8776def : Ld1Lane64Pat<load,       VectorIndexH, v4bf16, bf16, LD1i16>;
8777
8778
8779defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
8780defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
8781defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
8782defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
8783
8784// Stores
8785defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
8786defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
8787defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
8788defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
8789
8790let AddedComplexity = 19 in
8791class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8792                    ValueType VTy, ValueType STy, Instruction ST1>
8793  : Pat<(scalar_store
8794             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8795             GPR64sp:$Rn),
8796        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
8797
8798def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
8799def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
8800def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
8801def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
8802def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
8803def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
8804def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
8805def : St1Lane128Pat<store,         VectorIndexH, v8bf16, bf16, ST1i16>;
8806
8807let AddedComplexity = 19 in
8808class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8809                   ValueType VTy, ValueType STy, Instruction ST1>
8810  : Pat<(scalar_store
8811             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8812             GPR64sp:$Rn),
8813        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8814             VecIndex:$idx, GPR64sp:$Rn)>;
8815
8816def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
8817def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
8818def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
8819def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
8820def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
8821def : St1Lane64Pat<store,         VectorIndexH, v4bf16, bf16, ST1i16>;
8822
8823multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8824                             ValueType VTy, ValueType STy, Instruction ST1,
8825                             int offset> {
8826  def : Pat<(scalar_store
8827              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8828              GPR64sp:$Rn, offset),
8829        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8830             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8831
8832  def : Pat<(scalar_store
8833              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8834              GPR64sp:$Rn, GPR64:$Rm),
8835        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8836             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8837}
8838
8839defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
8840defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
8841                        2>;
8842defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
8843defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
8844defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
8845defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
8846defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
8847defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
8848
8849multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8850                             ValueType VTy, ValueType STy, Instruction ST1,
8851                             int offset> {
8852  def : Pat<(scalar_store
8853              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8854              GPR64sp:$Rn, offset),
8855        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8856
8857  def : Pat<(scalar_store
8858              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8859              GPR64sp:$Rn, GPR64:$Rm),
8860        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8861}
8862
8863defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
8864                         1>;
8865defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
8866                         2>;
8867defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
8868defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
8869defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
8870defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
8871defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
8872defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
8873
8874let mayStore = 1, hasSideEffects = 0 in {
8875defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
8876defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
8877defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
8878defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
8879defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
8880defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
8881defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
8882defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
8883defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
8884defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
8885defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
8886defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
8887}
8888
8889defm ST1 : SIMDLdSt1SingleAliases<"st1">;
8890defm ST2 : SIMDLdSt2SingleAliases<"st2">;
8891defm ST3 : SIMDLdSt3SingleAliases<"st3">;
8892defm ST4 : SIMDLdSt4SingleAliases<"st4">;
8893
8894//----------------------------------------------------------------------------
8895// Crypto extensions
8896//----------------------------------------------------------------------------
8897
8898let Predicates = [HasAES] in {
8899let isCommutable = 1 in {
8900def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
8901def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
8902}
8903def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
8904def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
8905}
8906
8907// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
8908// for AES fusion on some CPUs.
8909let hasSideEffects = 0, mayStore = 0, mayLoad = 0, Predicates = [HasAES] in {
8910def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8911                        Sched<[WriteVq]>;
8912def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8913                         Sched<[WriteVq]>;
8914}
8915
8916// Only use constrained versions of AES(I)MC instructions if they are paired with
8917// AESE/AESD.
8918def : Pat<(v16i8 (int_aarch64_crypto_aesmc
8919            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
8920                                            (v16i8 V128:$src2))))),
8921          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
8922                                             (v16i8 V128:$src2)))))>,
8923          Requires<[HasFuseAES]>;
8924
8925def : Pat<(v16i8 (int_aarch64_crypto_aesimc
8926            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
8927                                            (v16i8 V128:$src2))))),
8928          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
8929                                              (v16i8 V128:$src2)))))>,
8930          Requires<[HasFuseAES]>;
8931
8932let Predicates = [HasSHA2] in {
8933def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
8934def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
8935def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
8936def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
8937def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
8938def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
8939def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
8940
8941def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
8942def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
8943def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
8944}
8945
8946//----------------------------------------------------------------------------
8947// Compiler-pseudos
8948//----------------------------------------------------------------------------
8949// FIXME: Like for X86, these should go in their own separate .td file.
8950
8951// For an anyext, we don't care what the high bits are, so we can perform an
8952// INSERT_SUBREF into an IMPLICIT_DEF.
8953def : Pat<(i64 (anyext GPR32:$src)),
8954          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
8955
8956// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8957// then assert the extension has happened.
8958def : Pat<(i64 (zext GPR32:$src)),
8959          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
8960
8961// To sign extend, we use a signed bitfield move instruction (SBFM) on the
8962// containing super-reg.
8963def : Pat<(i64 (sext GPR32:$src)),
8964   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
8965def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
8966def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
8967def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
8968def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
8969def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
8970def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
8971def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
8972
8973def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
8974          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
8975                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
8976def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
8977          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
8978                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
8979
8980def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
8981          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
8982                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
8983def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
8984          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
8985                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
8986
8987def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
8988          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8989                   (i64 (i64shift_a        imm0_63:$imm)),
8990                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8991
8992def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
8993          (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8994                   (i64 (i64shift_a        imm0_63:$imm)),
8995                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8996
8997// sra patterns have an AddedComplexity of 10, so make sure we have a higher
8998// AddedComplexity for the following patterns since we want to match sext + sra
8999// patterns before we attempt to match a single sra node.
9000let AddedComplexity = 20 in {
9001// We support all sext + sra combinations which preserve at least one bit of the
9002// original value which is to be sign extended. E.g. we support shifts up to
9003// bitwidth-1 bits.
9004def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
9005          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
9006def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
9007          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
9008
9009def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
9010          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
9011def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
9012          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
9013
9014def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
9015          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
9016                   (i64 imm0_31:$imm), 31)>;
9017} // AddedComplexity = 20
9018
9019// To truncate, we can simply extract from a subregister.
9020def : Pat<(i32 (trunc GPR64sp:$src)),
9021          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
9022
9023// __builtin_trap() uses the BRK instruction on AArch64.
9024def : Pat<(trap), (BRK 1)>;
9025def : Pat<(debugtrap), (BRK 0xF000)>;
9026
9027def ubsan_trap_xform : SDNodeXForm<timm, [{
9028  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
9029}]>;
9030
9031def gi_ubsan_trap_xform : GICustomOperandRenderer<"renderUbsanTrap">,
9032  GISDNodeXFormEquiv<ubsan_trap_xform>;
9033
9034def ubsan_trap_imm : TImmLeaf<i32, [{
9035  return isUInt<8>(Imm);
9036}], ubsan_trap_xform>;
9037
9038def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
9039
9040// Multiply high patterns which multiply the lower subvector using smull/umull
9041// and the upper subvector with smull2/umull2. Then shuffle the high the high
9042// part of both results together.
9043def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
9044          (UZP2v16i8
9045           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
9046                            (EXTRACT_SUBREG V128:$Rm, dsub)),
9047           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
9048def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
9049          (UZP2v8i16
9050           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
9051                             (EXTRACT_SUBREG V128:$Rm, dsub)),
9052           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
9053def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
9054          (UZP2v4i32
9055           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
9056                             (EXTRACT_SUBREG V128:$Rm, dsub)),
9057           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
9058
9059def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
9060          (UZP2v16i8
9061           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
9062                            (EXTRACT_SUBREG V128:$Rm, dsub)),
9063           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
9064def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
9065          (UZP2v8i16
9066           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
9067                             (EXTRACT_SUBREG V128:$Rm, dsub)),
9068           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
9069def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
9070          (UZP2v4i32
9071           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
9072                             (EXTRACT_SUBREG V128:$Rm, dsub)),
9073           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
9074
9075// Conversions within AdvSIMD types in the same register size are free.
9076// But because we need a consistent lane ordering, in big endian many
9077// conversions require one or more REV instructions.
9078//
9079// Consider a simple memory load followed by a bitconvert then a store.
9080//   v0 = load v2i32
9081//   v1 = BITCAST v2i32 v0 to v4i16
9082//        store v4i16 v2
9083//
9084// In big endian mode every memory access has an implicit byte swap. LDR and
9085// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
9086// is, they treat the vector as a sequence of elements to be byte-swapped.
9087// The two pairs of instructions are fundamentally incompatible. We've decided
9088// to use LD1/ST1 only to simplify compiler implementation.
9089//
9090// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
9091// the original code sequence:
9092//   v0 = load v2i32
9093//   v1 = REV v2i32                  (implicit)
9094//   v2 = BITCAST v2i32 v1 to v4i16
9095//   v3 = REV v4i16 v2               (implicit)
9096//        store v4i16 v3
9097//
9098// But this is now broken - the value stored is different to the value loaded
9099// due to lane reordering. To fix this, on every BITCAST we must perform two
9100// other REVs:
9101//   v0 = load v2i32
9102//   v1 = REV v2i32                  (implicit)
9103//   v2 = REV v2i32
9104//   v3 = BITCAST v2i32 v2 to v4i16
9105//   v4 = REV v4i16
9106//   v5 = REV v4i16 v4               (implicit)
9107//        store v4i16 v5
9108//
9109// This means an extra two instructions, but actually in most cases the two REV
9110// instructions can be combined into one. For example:
9111//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
9112//
9113// There is also no 128-bit REV instruction. This must be synthesized with an
9114// EXT instruction.
9115//
9116// Most bitconverts require some sort of conversion. The only exceptions are:
9117//   a) Identity conversions -  vNfX <-> vNiX
9118//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
9119//
9120
9121// Natural vector casts (64 bit)
9122foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
9123  foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
9124    def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
9125              (VT FPR64:$src)>;
9126
9127// Natural vector casts (128 bit)
9128foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
9129  foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
9130    def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
9131              (VT FPR128:$src)>;
9132
9133let Predicates = [IsLE] in {
9134def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9135def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9136def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9137def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9138def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9139def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9140
9141def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
9142          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9143def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
9144          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9145def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
9146          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9147def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
9148          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9149def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
9150          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9151def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
9152          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9153def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
9154          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9155}
9156let Predicates = [IsBE] in {
9157def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
9158                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
9159def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
9160                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
9161def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
9162                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
9163def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
9164                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
9165def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
9166                  (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
9167def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
9168                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
9169
9170def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
9171          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
9172def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
9173          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
9174def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
9175          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
9176def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
9177          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
9178def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
9179          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
9180def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
9181          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
9182}
9183def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9184def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9185def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
9186          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9187def : Pat<(v1i64 (vec_ins_or_scal_vec GPR64:$Xn)),
9188          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9189def : Pat<(v1f64 (vec_ins_or_scal_vec GPR64:$Xn)),
9190          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9191def : Pat<(v1f64 (vec_ins_or_scal_vec (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
9192
9193def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
9194          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
9195def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
9196          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
9197def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
9198          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
9199def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
9200          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
9201def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
9202          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9203
9204def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
9205def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
9206
9207let Predicates = [IsLE] in {
9208def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
9209def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
9210def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
9211def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
9212def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
9213def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
9214}
9215let Predicates = [IsBE] in {
9216def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
9217                             (v1i64 (REV64v2i32 FPR64:$src))>;
9218def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
9219                             (v1i64 (REV64v4i16 FPR64:$src))>;
9220def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
9221                             (v1i64 (REV64v8i8 FPR64:$src))>;
9222def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
9223                             (v1i64 (REV64v4i16 FPR64:$src))>;
9224def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
9225                             (v1i64 (REV64v4i16 FPR64:$src))>;
9226def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
9227                             (v1i64 (REV64v2i32 FPR64:$src))>;
9228}
9229def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
9230def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
9231
9232let Predicates = [IsLE] in {
9233def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
9234def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
9235def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
9236def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
9237def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
9238def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
9239def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
9240}
9241let Predicates = [IsBE] in {
9242def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
9243                             (v2i32 (REV64v2i32 FPR64:$src))>;
9244def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
9245                             (v2i32 (REV32v4i16 FPR64:$src))>;
9246def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
9247                             (v2i32 (REV32v8i8 FPR64:$src))>;
9248def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
9249                             (v2i32 (REV64v2i32 FPR64:$src))>;
9250def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
9251                             (v2i32 (REV64v2i32 FPR64:$src))>;
9252def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
9253                             (v2i32 (REV32v4i16 FPR64:$src))>;
9254def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
9255                             (v2i32 (REV32v4i16 FPR64:$src))>;
9256}
9257def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
9258
9259let Predicates = [IsLE] in {
9260def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
9261def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
9262def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
9263def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
9264def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
9265def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
9266}
9267let Predicates = [IsBE] in {
9268def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
9269                             (v4i16 (REV64v4i16 FPR64:$src))>;
9270def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
9271                             (v4i16 (REV32v4i16 FPR64:$src))>;
9272def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
9273                             (v4i16 (REV16v8i8 FPR64:$src))>;
9274def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
9275                             (v4i16 (REV64v4i16 FPR64:$src))>;
9276def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
9277                             (v4i16 (REV32v4i16 FPR64:$src))>;
9278def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
9279                             (v4i16 (REV64v4i16 FPR64:$src))>;
9280}
9281def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
9282def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
9283
9284let Predicates = [IsLE] in {
9285def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
9286def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
9287def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
9288def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
9289def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
9290def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
9291
9292def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
9293def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
9294def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))), (v4bf16 FPR64:$src)>;
9295def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))), (v4bf16 FPR64:$src)>;
9296def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
9297def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
9298}
9299let Predicates = [IsBE] in {
9300def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
9301                             (v4f16 (REV64v4i16 FPR64:$src))>;
9302def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
9303                             (v4f16 (REV32v4i16 FPR64:$src))>;
9304def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
9305                             (v4f16 (REV16v8i8 FPR64:$src))>;
9306def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
9307                             (v4f16 (REV64v4i16 FPR64:$src))>;
9308def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
9309                             (v4f16 (REV32v4i16 FPR64:$src))>;
9310def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
9311                             (v4f16 (REV64v4i16 FPR64:$src))>;
9312
9313def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
9314                             (v4bf16 (REV64v4i16 FPR64:$src))>;
9315def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
9316                             (v4bf16 (REV32v4i16 FPR64:$src))>;
9317def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))),
9318                             (v4bf16 (REV16v8i8 FPR64:$src))>;
9319def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))),
9320                             (v4bf16 (REV64v4i16 FPR64:$src))>;
9321def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
9322                             (v4bf16 (REV32v4i16 FPR64:$src))>;
9323def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
9324                             (v4bf16 (REV64v4i16 FPR64:$src))>;
9325}
9326def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
9327def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
9328
9329let Predicates = [IsLE] in {
9330def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
9331def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
9332def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
9333def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
9334def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
9335def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
9336def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
9337def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))), (v8i8  FPR64:$src)>;
9338}
9339let Predicates = [IsBE] in {
9340def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
9341                             (v8i8 (REV64v8i8 FPR64:$src))>;
9342def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
9343                             (v8i8 (REV32v8i8 FPR64:$src))>;
9344def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
9345                             (v8i8 (REV16v8i8 FPR64:$src))>;
9346def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
9347                             (v8i8 (REV64v8i8 FPR64:$src))>;
9348def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
9349                             (v8i8 (REV32v8i8 FPR64:$src))>;
9350def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
9351                             (v8i8 (REV64v8i8 FPR64:$src))>;
9352def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
9353                             (v8i8 (REV16v8i8 FPR64:$src))>;
9354def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))),
9355                             (v8i8 (REV16v8i8 FPR64:$src))>;
9356}
9357
9358let Predicates = [IsLE] in {
9359def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
9360def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
9361def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
9362def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
9363def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
9364def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))), (f64   FPR64:$src)>;
9365}
9366let Predicates = [IsBE] in {
9367def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
9368                             (f64 (REV64v2i32 FPR64:$src))>;
9369def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
9370                             (f64 (REV64v4i16 FPR64:$src))>;
9371def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
9372                             (f64 (REV64v2i32 FPR64:$src))>;
9373def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
9374                             (f64 (REV64v8i8 FPR64:$src))>;
9375def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
9376                             (f64 (REV64v4i16 FPR64:$src))>;
9377def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))),
9378                             (f64 (REV64v4i16 FPR64:$src))>;
9379}
9380def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
9381def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
9382
9383let Predicates = [IsLE] in {
9384def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
9385def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
9386def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
9387def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
9388def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
9389def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
9390}
9391let Predicates = [IsBE] in {
9392def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
9393                             (v1f64 (REV64v2i32 FPR64:$src))>;
9394def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
9395                             (v1f64 (REV64v4i16 FPR64:$src))>;
9396def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
9397                             (v1f64 (REV64v8i8 FPR64:$src))>;
9398def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
9399                             (v1f64 (REV64v2i32 FPR64:$src))>;
9400def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
9401                             (v1f64 (REV64v4i16 FPR64:$src))>;
9402def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
9403                             (v1f64 (REV64v4i16 FPR64:$src))>;
9404}
9405def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
9406def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
9407
9408let Predicates = [IsLE] in {
9409def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
9410def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
9411def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
9412def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
9413def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
9414def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
9415def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
9416}
9417let Predicates = [IsBE] in {
9418def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
9419                             (v2f32 (REV64v2i32 FPR64:$src))>;
9420def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
9421                             (v2f32 (REV32v4i16 FPR64:$src))>;
9422def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
9423                             (v2f32 (REV32v8i8 FPR64:$src))>;
9424def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
9425                             (v2f32 (REV64v2i32 FPR64:$src))>;
9426def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
9427                             (v2f32 (REV64v2i32 FPR64:$src))>;
9428def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
9429                             (v2f32 (REV32v4i16 FPR64:$src))>;
9430def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
9431                             (v2f32 (REV32v4i16 FPR64:$src))>;
9432}
9433def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
9434
9435let Predicates = [IsLE] in {
9436def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
9437def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
9438def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
9439def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
9440def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
9441def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
9442def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
9443def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
9444}
9445let Predicates = [IsBE] in {
9446def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
9447                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
9448def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
9449                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
9450                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
9451def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
9452                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
9453                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
9454def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
9455                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
9456                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
9457def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
9458                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
9459                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
9460def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
9461                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
9462def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
9463                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
9464                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
9465def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
9466                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
9467                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
9468}
9469
9470let Predicates = [IsLE] in {
9471def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
9472def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
9473def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
9474def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
9475def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
9476def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
9477def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
9478}
9479let Predicates = [IsBE] in {
9480def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
9481                             (v2f64 (EXTv16i8 FPR128:$src,
9482                                              FPR128:$src, (i32 8)))>;
9483def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
9484                             (v2f64 (REV64v4i32 FPR128:$src))>;
9485def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
9486                             (v2f64 (REV64v8i16 FPR128:$src))>;
9487def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
9488                             (v2f64 (REV64v8i16 FPR128:$src))>;
9489def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
9490                             (v2f64 (REV64v8i16 FPR128:$src))>;
9491def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
9492                             (v2f64 (REV64v16i8 FPR128:$src))>;
9493def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
9494                             (v2f64 (REV64v4i32 FPR128:$src))>;
9495}
9496def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
9497
9498let Predicates = [IsLE] in {
9499def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
9500def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
9501def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
9502def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
9503def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
9504def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
9505def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
9506}
9507let Predicates = [IsBE] in {
9508def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
9509                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
9510                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
9511def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
9512                             (v4f32 (REV32v8i16 FPR128:$src))>;
9513def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
9514                             (v4f32 (REV32v8i16 FPR128:$src))>;
9515def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
9516                             (v4f32 (REV32v8i16 FPR128:$src))>;
9517def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
9518                             (v4f32 (REV32v16i8 FPR128:$src))>;
9519def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
9520                             (v4f32 (REV64v4i32 FPR128:$src))>;
9521def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
9522                             (v4f32 (REV64v4i32 FPR128:$src))>;
9523}
9524def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
9525
9526let Predicates = [IsLE] in {
9527def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
9528def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
9529def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
9530def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
9531def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
9532def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
9533def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
9534}
9535let Predicates = [IsBE] in {
9536def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
9537                             (v2i64 (EXTv16i8 FPR128:$src,
9538                                              FPR128:$src, (i32 8)))>;
9539def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
9540                             (v2i64 (REV64v4i32 FPR128:$src))>;
9541def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
9542                             (v2i64 (REV64v8i16 FPR128:$src))>;
9543def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
9544                             (v2i64 (REV64v16i8 FPR128:$src))>;
9545def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
9546                             (v2i64 (REV64v4i32 FPR128:$src))>;
9547def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
9548                             (v2i64 (REV64v8i16 FPR128:$src))>;
9549def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
9550                             (v2i64 (REV64v8i16 FPR128:$src))>;
9551}
9552def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
9553
9554let Predicates = [IsLE] in {
9555def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
9556def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
9557def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
9558def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
9559def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
9560def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
9561def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
9562}
9563let Predicates = [IsBE] in {
9564def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
9565                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
9566                                              (REV64v4i32 FPR128:$src),
9567                                              (i32 8)))>;
9568def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
9569                             (v4i32 (REV64v4i32 FPR128:$src))>;
9570def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
9571                             (v4i32 (REV32v8i16 FPR128:$src))>;
9572def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
9573                             (v4i32 (REV32v16i8 FPR128:$src))>;
9574def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
9575                             (v4i32 (REV64v4i32 FPR128:$src))>;
9576def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
9577                             (v4i32 (REV32v8i16 FPR128:$src))>;
9578def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
9579                             (v4i32 (REV32v8i16 FPR128:$src))>;
9580}
9581def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
9582
9583let Predicates = [IsLE] in {
9584def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
9585def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
9586def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
9587def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
9588def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
9589def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
9590}
9591let Predicates = [IsBE] in {
9592def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
9593                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9594                                              (REV64v8i16 FPR128:$src),
9595                                              (i32 8)))>;
9596def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
9597                             (v8i16 (REV64v8i16 FPR128:$src))>;
9598def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
9599                             (v8i16 (REV32v8i16 FPR128:$src))>;
9600def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
9601                             (v8i16 (REV16v16i8 FPR128:$src))>;
9602def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
9603                             (v8i16 (REV64v8i16 FPR128:$src))>;
9604def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
9605                             (v8i16 (REV32v8i16 FPR128:$src))>;
9606}
9607def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
9608def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
9609
9610let Predicates = [IsLE] in {
9611def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
9612def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
9613def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
9614def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
9615def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
9616def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
9617
9618def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))), (v8bf16 FPR128:$src)>;
9619def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
9620def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
9621def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
9622def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
9623def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
9624}
9625let Predicates = [IsBE] in {
9626def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
9627                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9628                                              (REV64v8i16 FPR128:$src),
9629                                              (i32 8)))>;
9630def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
9631                             (v8f16 (REV64v8i16 FPR128:$src))>;
9632def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
9633                             (v8f16 (REV32v8i16 FPR128:$src))>;
9634def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
9635                             (v8f16 (REV16v16i8 FPR128:$src))>;
9636def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
9637                             (v8f16 (REV64v8i16 FPR128:$src))>;
9638def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
9639                             (v8f16 (REV32v8i16 FPR128:$src))>;
9640
9641def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))),
9642                             (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9643                                              (REV64v8i16 FPR128:$src),
9644                                              (i32 8)))>;
9645def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
9646                             (v8bf16 (REV64v8i16 FPR128:$src))>;
9647def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
9648                             (v8bf16 (REV32v8i16 FPR128:$src))>;
9649def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
9650                             (v8bf16 (REV16v16i8 FPR128:$src))>;
9651def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
9652                             (v8bf16 (REV64v8i16 FPR128:$src))>;
9653def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
9654                             (v8bf16 (REV32v8i16 FPR128:$src))>;
9655}
9656def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
9657def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
9658
9659let Predicates = [IsLE] in {
9660def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
9661def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
9662def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
9663def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
9664def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
9665def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
9666def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
9667def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
9668}
9669let Predicates = [IsBE] in {
9670def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
9671                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
9672                                              (REV64v16i8 FPR128:$src),
9673                                              (i32 8)))>;
9674def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
9675                             (v16i8 (REV64v16i8 FPR128:$src))>;
9676def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
9677                             (v16i8 (REV32v16i8 FPR128:$src))>;
9678def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
9679                             (v16i8 (REV16v16i8 FPR128:$src))>;
9680def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
9681                             (v16i8 (REV64v16i8 FPR128:$src))>;
9682def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
9683                             (v16i8 (REV32v16i8 FPR128:$src))>;
9684def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
9685                             (v16i8 (REV16v16i8 FPR128:$src))>;
9686def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
9687                             (v16i8 (REV16v16i8 FPR128:$src))>;
9688}
9689
9690def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
9691           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9692def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
9693           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9694def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
9695           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9696def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
9697           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9698def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
9699           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9700def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
9701           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9702def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
9703           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9704def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
9705           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9706
9707def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
9708          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9709def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
9710          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9711def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
9712          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9713def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
9714          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9715
9716// A 64-bit subvector insert to the first 128-bit vector position
9717// is a subregister copy that needs no instruction.
9718multiclass InsertSubvectorUndef<ValueType Ty> {
9719  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
9720            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9721  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
9722            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9723  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
9724            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9725  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
9726            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9727  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
9728            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9729  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
9730            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9731  def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
9732            (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9733  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
9734            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9735}
9736
9737defm : InsertSubvectorUndef<i32>;
9738defm : InsertSubvectorUndef<i64>;
9739
9740// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
9741// or v2f32.
9742def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
9743                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
9744           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
9745def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
9746                         (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
9747           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
9748    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
9749    // so we match on v4f32 here, not v2f32. This will also catch adding
9750    // the low two lanes of a true v4f32 vector.
9751def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
9752                    (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
9753          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9754def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
9755                    (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
9756          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9757
9758// Prefer using the bottom lanes of addp Rn, Rn compared to
9759// addp extractlow(Rn), extracthigh(Rn)
9760def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
9761                       (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
9762          (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
9763def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
9764                       (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
9765          (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
9766def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
9767                       (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
9768          (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
9769
9770def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
9771                        (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
9772          (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
9773def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
9774                        (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
9775          (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
9776
9777// add(uzp1(X, Y), uzp2(X, Y)) -> addp(X, Y)
9778def : Pat<(v2i64 (add (AArch64zip1 (v2i64 FPR128:$Rn), (v2i64 FPR128:$Rm)),
9779                      (AArch64zip2 (v2i64 FPR128:$Rn), (v2i64 FPR128:$Rm)))),
9780          (v2i64 (ADDPv2i64 $Rn, $Rm))>;
9781def : Pat<(v4i32 (add (AArch64uzp1 (v4i32 FPR128:$Rn), (v4i32 FPR128:$Rm)),
9782                      (AArch64uzp2 (v4i32 FPR128:$Rn), (v4i32 FPR128:$Rm)))),
9783          (v4i32 (ADDPv4i32 $Rn, $Rm))>;
9784def : Pat<(v8i16 (add (AArch64uzp1 (v8i16 FPR128:$Rn), (v8i16 FPR128:$Rm)),
9785                      (AArch64uzp2 (v8i16 FPR128:$Rn), (v8i16 FPR128:$Rm)))),
9786          (v8i16 (ADDPv8i16 $Rn, $Rm))>;
9787def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
9788                      (AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
9789          (v16i8 (ADDPv16i8 $Rn, $Rm))>;
9790
9791def : Pat<(v2i32 (add (AArch64zip1 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
9792                                   (extract_subvector (v4i32 FPR128:$Rn), (i64 2))),
9793                      (AArch64zip2 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
9794                                   (extract_subvector (v4i32 FPR128:$Rn), (i64 2))))),
9795          (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub)>;
9796def : Pat<(v4i16 (add (trunc (v4i32 (bitconvert FPR128:$Rn))),
9797                      (extract_subvector (AArch64uzp2 (v8i16 FPR128:$Rn), undef), (i64 0)))),
9798          (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub)>;
9799def : Pat<(v8i8  (add (trunc (v8i16 (bitconvert FPR128:$Rn))),
9800                      (extract_subvector (AArch64uzp2 (v16i8 FPR128:$Rn), undef), (i64 0)))),
9801          (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub)>;
9802
9803def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
9804                       (AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
9805          (v2f64 (FADDPv2f64 $Rn, $Rm))>;
9806def : Pat<(v4f32 (fadd (AArch64uzp1 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)),
9807                       (AArch64uzp2 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)))),
9808          (v4f32 (FADDPv4f32 $Rn, $Rm))>;
9809let Predicates = [HasFullFP16] in
9810def : Pat<(v8f16 (fadd (AArch64uzp1 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)),
9811                       (AArch64uzp2 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)))),
9812          (v8f16 (FADDPv8f16 $Rn, $Rm))>;
9813
9814// Scalar 64-bit shifts in FPR64 registers.
9815def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9816          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9817def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9818          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9819def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9820          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9821def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9822          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9823
9824// Patterns for nontemporal/no-allocate stores.
9825// We have to resort to tricks to turn a single-input store into a store pair,
9826// because there is no single-input nontemporal store, only STNP.
9827let Predicates = [IsLE] in {
9828let AddedComplexity = 15 in {
9829class NTStore128Pat<ValueType VT> :
9830  Pat<(nontemporalstore (VT FPR128:$Rt),
9831        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
9832      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
9833              (DUPi64 FPR128:$Rt, (i64 1)),
9834              GPR64sp:$Rn, simm7s8:$offset)>;
9835
9836def : NTStore128Pat<v2i64>;
9837def : NTStore128Pat<v4i32>;
9838def : NTStore128Pat<v8i16>;
9839def : NTStore128Pat<v16i8>;
9840
9841class NTStore64Pat<ValueType VT> :
9842  Pat<(nontemporalstore (VT FPR64:$Rt),
9843        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9844      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
9845              (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
9846              GPR64sp:$Rn, simm7s4:$offset)>;
9847
9848// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
9849def : NTStore64Pat<v1f64>;
9850def : NTStore64Pat<v1i64>;
9851def : NTStore64Pat<v2i32>;
9852def : NTStore64Pat<v4i16>;
9853def : NTStore64Pat<v8i8>;
9854
9855def : Pat<(nontemporalstore GPR64:$Rt,
9856            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9857          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
9858                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
9859                  GPR64sp:$Rn, simm7s4:$offset)>;
9860} // AddedComplexity=10
9861} // Predicates = [IsLE]
9862
9863// Tail call return handling. These are all compiler pseudo-instructions,
9864// so no encoding information or anything like that.
9865let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
9866  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
9867                   Sched<[WriteBrReg]>;
9868  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
9869                   Sched<[WriteBrReg]>;
9870  // Indirect tail-call with any register allowed, used by MachineOutliner when
9871  // this is proven safe.
9872  // FIXME: If we have to add any more hacks like this, we should instead relax
9873  // some verifier checks for outlined functions.
9874  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
9875                      Sched<[WriteBrReg]>;
9876
9877  // Indirect tail-calls with reduced register classes, needed for BTI and
9878  // PAuthLR.
9879  def TCRETURNrix16x17 : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff), []>,
9880                      Sched<[WriteBrReg]>;
9881  def TCRETURNrix17 : Pseudo<(outs), (ins tcGPRx17:$dst, i32imm:$FPDiff), []>,
9882                      Sched<[WriteBrReg]>;
9883  def TCRETURNrinotx16 : Pseudo<(outs), (ins tcGPRnotx16:$dst, i32imm:$FPDiff), []>,
9884                      Sched<[WriteBrReg]>;
9885}
9886
9887def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
9888          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
9889      Requires<[TailCallAny]>;
9890def : Pat<(AArch64tcret tcGPRx16x17:$dst, (i32 timm:$FPDiff)),
9891          (TCRETURNrix16x17 tcGPRx16x17:$dst, imm:$FPDiff)>,
9892      Requires<[TailCallX16X17]>;
9893def : Pat<(AArch64tcret tcGPRx17:$dst, (i32 timm:$FPDiff)),
9894          (TCRETURNrix17 tcGPRx17:$dst, imm:$FPDiff)>,
9895      Requires<[TailCallX17]>;
9896def : Pat<(AArch64tcret tcGPRnotx16:$dst, (i32 timm:$FPDiff)),
9897          (TCRETURNrinotx16 tcGPRnotx16:$dst, imm:$FPDiff)>,
9898      Requires<[TailCallNotX16]>;
9899
9900def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
9901          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9902def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
9903          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9904
9905let Size = 8 in
9906def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
9907def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
9908
9909// Extracting lane zero is a special case where we can just use a plain
9910// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
9911// rest of the compiler, especially the register allocator and copy propagation,
9912// to reason about, so is preferred when it's possible to use it.
9913let AddedComplexity = 10 in {
9914  def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
9915  def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
9916  def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
9917}
9918
9919// dot_v4i8
9920class mul_v4i8<SDPatternOperator ldop> :
9921  PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
9922          (mul (ldop (add node:$Rn, node:$offset)),
9923               (ldop (add node:$Rm, node:$offset)))>;
9924class mulz_v4i8<SDPatternOperator ldop> :
9925  PatFrag<(ops node:$Rn, node:$Rm),
9926          (mul (ldop node:$Rn), (ldop node:$Rm))>;
9927
9928def load_v4i8 :
9929  OutPatFrag<(ops node:$R),
9930             (INSERT_SUBREG
9931              (v2i32 (IMPLICIT_DEF)),
9932               (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
9933              ssub)>;
9934
9935class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
9936  Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
9937           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
9938           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
9939                (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
9940      (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
9941                                (load_v4i8 GPR64sp:$Rn),
9942                                (load_v4i8 GPR64sp:$Rm))),
9943                      sub_32)>, Requires<[HasDotProd]>;
9944
9945// dot_v8i8
9946class ee_v8i8<SDPatternOperator extend> :
9947  PatFrag<(ops node:$V, node:$K),
9948          (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
9949
9950class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9951  PatFrag<(ops node:$M, node:$N, node:$K),
9952          (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
9953                 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
9954
9955class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9956  PatFrag<(ops node:$M, node:$N),
9957          (i32 (extractelt
9958           (v4i32 (AArch64uaddv
9959            (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
9960                 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
9961           (i64 0)))>;
9962
9963// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9964def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
9965
9966class odot_v8i8<Instruction DOT> :
9967  OutPatFrag<(ops node:$Vm, node:$Vn),
9968             (EXTRACT_SUBREG
9969              (VADDV_32
9970               (i64 (DOT (DUPv2i32gpr WZR),
9971                         (v8i8 node:$Vm),
9972                         (v8i8 node:$Vn)))),
9973              sub_32)>;
9974
9975class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
9976                    SDPatternOperator extend> :
9977  Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
9978      (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
9979  Requires<[HasDotProd]>;
9980
9981// dot_v16i8
9982class ee_v16i8<SDPatternOperator extend> :
9983  PatFrag<(ops node:$V, node:$K1, node:$K2),
9984          (v4i16 (extract_subvector
9985           (v8i16 (extend
9986            (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
9987
9988class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
9989  PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
9990          (v4i32
9991           (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
9992                  (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
9993
9994class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
9995  PatFrag<(ops node:$M, node:$N),
9996          (i32 (extractelt
9997           (v4i32 (AArch64uaddv
9998            (add
9999             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
10000                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
10001             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
10002                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
10003           (i64 0)))>;
10004
10005class odot_v16i8<Instruction DOT> :
10006  OutPatFrag<(ops node:$Vm, node:$Vn),
10007             (i32 (ADDVv4i32v
10008              (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
10009
10010class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
10011                SDPatternOperator extend> :
10012  Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
10013      (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
10014  Requires<[HasDotProd]>;
10015
10016let AddedComplexity = 10 in {
10017  def : dot_v4i8<SDOTv8i8, sextloadi8>;
10018  def : dot_v4i8<UDOTv8i8, zextloadi8>;
10019  def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
10020  def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
10021  def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
10022  def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
10023
10024  // FIXME: add patterns to generate vector by element dot product.
10025  // FIXME: add SVE dot-product patterns.
10026}
10027
10028// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
10029// so that it can be used as input to inline asm, and vice versa.
10030def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
10031def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
10032def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
10033                             GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
10034          (REG_SEQUENCE GPR64x8Class,
10035              $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
10036              $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
10037foreach i = 0-7 in {
10038  def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
10039            (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
10040}
10041
10042let Predicates = [HasLS64] in {
10043  let mayLoad = 1 in
10044  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
10045                                          (outs GPR64x8:$Rt)>;
10046  let mayStore = 1 in
10047  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
10048                                          (outs)>;
10049  def ST64BV:   Store64BV<0b011, "st64bv">;
10050  def ST64BV0:  Store64BV<0b010, "st64bv0">;
10051
10052  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
10053    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
10054          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
10055
10056  def : ST64BPattern<int_aarch64_st64b, ST64B>;
10057  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
10058  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
10059}
10060
10061let Predicates = [HasMOPS] in {
10062  let Defs = [NZCV] in {
10063    defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
10064
10065    defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
10066
10067    defm SETP : MOPSMemorySetInsns<0b00, "setp">;
10068  }
10069  let Uses = [NZCV] in {
10070    defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
10071    defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
10072
10073    defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
10074    defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
10075
10076    defm SETM : MOPSMemorySetInsns<0b01, "setm">;
10077    defm SETE : MOPSMemorySetInsns<0b10, "sete">;
10078  }
10079}
10080let Predicates = [HasMOPS, HasMTE] in {
10081  let Defs = [NZCV] in {
10082    defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
10083  }
10084  let Uses = [NZCV] in {
10085    defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
10086    // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
10087    defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
10088  }
10089}
10090
10091// MOPS operations always contain three 4-byte instructions
10092let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
10093  let mayLoad = 1 in {
10094    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
10095                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
10096                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
10097    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
10098                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
10099                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
10100  }
10101  let mayLoad = 0 in {
10102    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
10103                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
10104                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb,@earlyclobber $Rn_wb">, Sched<[]>;
10105  }
10106}
10107let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
10108  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
10109                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
10110                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
10111}
10112
10113//-----------------------------------------------------------------------------
10114// v8.3 Pointer Authentication late patterns
10115
10116def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
10117          (PAUTH_BLEND GPR64:$Rd, (trunc_imm imm64_0_65535:$imm))>;
10118def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
10119          (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
10120
10121//-----------------------------------------------------------------------------
10122
10123// This gets lowered into an instruction sequence of 20 bytes
10124let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
10125def StoreSwiftAsyncContext
10126      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
10127               []>, Sched<[]>;
10128
10129def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
10130def : Pat<(AArch64AssertZExtBool GPR32:$op),
10131          (i32 GPR32:$op)>;
10132
10133//===----------------------------===//
10134// 2022 Architecture Extensions:
10135//===----------------------------===//
10136
10137def : InstAlias<"clrbhb",  (HINT 22), 0>;
10138let Predicates = [HasCLRBHB] in {
10139  def : InstAlias<"clrbhb",  (HINT 22), 1>;
10140}
10141
10142//===----------------------------------------------------------------------===//
10143// Translation Hardening Extension (FEAT_THE)
10144//===----------------------------------------------------------------------===//
10145defm RCW     : ReadCheckWriteCompareAndSwap;
10146
10147defm RCWCLR  : ReadCheckWriteOperation<0b001, "clr">;
10148defm RCWSET  : ReadCheckWriteOperation<0b011, "set">;
10149defm RCWSWP  : ReadCheckWriteOperation<0b010, "swp">;
10150
10151//===----------------------------------------------------------------------===//
10152// General Data-Processing Instructions (FEAT_V94_DP)
10153//===----------------------------------------------------------------------===//
10154defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>;
10155defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
10156defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
10157
10158defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>;
10159defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
10160defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
10161defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
10162
10163def RPRFM:
10164    I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn),
10165      "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>,
10166    Sched<[]> {
10167  bits<6> Rt;
10168  bits<5> Rn;
10169  bits<5> Rm;
10170  let Inst{2-0} = Rt{2-0};
10171  let Inst{4-3} = 0b11;
10172  let Inst{9-5} = Rn;
10173  let Inst{11-10} = 0b10;
10174  let Inst{13-12} = Rt{4-3};
10175  let Inst{14} = 0b1;
10176  let Inst{15} = Rt{5};
10177  let Inst{20-16} = Rm;
10178  let Inst{31-21} = 0b11111000101;
10179  let mayLoad = 0;
10180  let mayStore = 0;
10181  let hasSideEffects = 1;
10182  // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
10183  // Fail, the decoder should attempt to decode RPRFM. This requires setting
10184  // the decoder namespace to "Fallback".
10185  let DecoderNamespace = "Fallback";
10186}
10187
10188//===----------------------------------------------------------------------===//
10189// 128-bit Atomics (FEAT_LSE128)
10190//===----------------------------------------------------------------------===//
10191let Predicates = [HasLSE128] in {
10192  def SWPP     : LSE128Base<0b000, 0b00, 0b1, "swpp">;
10193  def SWPPA    : LSE128Base<0b000, 0b10, 0b1, "swppa">;
10194  def SWPPAL   : LSE128Base<0b000, 0b11, 0b1, "swppal">;
10195  def SWPPL    : LSE128Base<0b000, 0b01, 0b1, "swppl">;
10196  def LDCLRP   : LSE128Base<0b001, 0b00, 0b0, "ldclrp">;
10197  def LDCLRPA  : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">;
10198  def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">;
10199  def LDCLRPL  : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">;
10200  def LDSETP   : LSE128Base<0b011, 0b00, 0b0, "ldsetp">;
10201  def LDSETPA  : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">;
10202  def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">;
10203  def LDSETPL  : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">;
10204}
10205
10206//===----------------------------------------------------------------------===//
10207// RCPC Instructions (FEAT_LRCPC3)
10208//===----------------------------------------------------------------------===//
10209
10210let Predicates = [HasRCPC3] in {
10211  //                                              size   opc    opc2
10212  def STILPWpre:   BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
10213  def STILPXpre:   BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
10214  def STILPW:      BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
10215  def STILPX:      BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
10216  def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
10217  def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
10218  def LDIAPPW:     BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
10219  def LDIAPPX:     BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
10220
10221  def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
10222  def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
10223
10224  // Aliases for when offset=0
10225  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
10226  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
10227
10228  //                                         size   opc
10229  def STLRWpre:   BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback),            (ins GPR32:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
10230  def STLRXpre:   BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback),            (ins GPR64:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
10231  def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #4",   "$Rn = $wback">;
10232  def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #8",   "$Rn = $wback">;
10233}
10234
10235let Predicates = [HasRCPC3, HasNEON] in {
10236  //                                              size   opc regtype
10237  defm STLURb:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8  , (outs), (ins FPR8  :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10238  defm STLURh:  LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10239  defm STLURs:  LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10240  defm STLURd:  LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10241  defm STLURq:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10242  defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8  , (outs FPR8  :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10243  defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10244  defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10245  defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10246  defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10247
10248  //                                L
10249  def STL1:  LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
10250  def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;
10251
10252  // Aliases for when offset=0
10253  def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
10254}
10255
10256//===----------------------------------------------------------------------===//
10257// 128-bit System Instructions (FEAT_SYSINSTR128)
10258//===----------------------------------------------------------------------===//
10259let Predicates = [HasD128] in {
10260  def SYSPxt  : SystemPXtI<0, "sysp">;
10261
10262  def SYSPxt_XZR
10263    : BaseSystemI<0, (outs),
10264        (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair),
10265        "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">,
10266      Sched<[WriteSys]>
10267  {
10268    // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
10269    // and therefore autogenerates a decoder that builds an MC representation that has 4 fields
10270    // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
10271    // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc
10272    // is based off of the asm template (maybe) and therefore wants to print 5 operands.
10273    // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
10274    // overlap with the main SYSP instruction.
10275    let DecoderMethod = "DecodeSyspXzrInstruction";
10276    bits<3> op1;
10277    bits<4> Cn;
10278    bits<4> Cm;
10279    bits<3> op2;
10280    let Inst{22}    = 0b1; // override BaseSystemI
10281    let Inst{20-19} = 0b01;
10282    let Inst{18-16} = op1;
10283    let Inst{15-12} = Cn;
10284    let Inst{11-8}  = Cm;
10285    let Inst{7-5}   = op2;
10286    let Inst{4-0}   = 0b11111;
10287  }
10288
10289  def : InstAlias<"sysp $op1, $Cn, $Cm, $op2",
10290                  (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
10291}
10292
10293//---
10294// 128-bit System Registers (FEAT_SYSREG128)
10295//---
10296
10297// Instruction encoding:
10298//
10299//          31       22|21|20|19|18 16|15 12|11 8|7 5|4 0
10300// MRRS      1101010101| 1| 1|o0|  op1|   Cn|  Cm|op2| Rt
10301// MSRR      1101010101| 0| 1|o0|  op1|   Cn|  Cm|op2| Rt
10302
10303// Instruction syntax:
10304//
10305// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
10306// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
10307//
10308// ...where t is even (X0, X2, etc).
10309
10310let Predicates = [HasD128] in {
10311  def MRRS : RtSystemI128<1,
10312    (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg),
10313    "mrrs", "\t$Rt, $systemreg">
10314  {
10315    bits<16> systemreg;
10316    let Inst{20-5} = systemreg;
10317  }
10318
10319  def MSRR : RtSystemI128<0,
10320    (outs),  (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt),
10321    "msrr", "\t$systemreg, $Rt">
10322  {
10323    bits<16> systemreg;
10324    let Inst{20-5} = systemreg;
10325  }
10326}
10327
10328//===----------------------------===//
10329// 2023 Architecture Extensions:
10330//===----------------------------===//
10331
10332let Predicates = [HasFP8] in {
10333  defm F1CVTL  : SIMD_FP8_CVTL<0b00, "f1cvtl", v8f16, int_aarch64_neon_fp8_cvtl1>;
10334  defm F2CVTL  : SIMD_FP8_CVTL<0b01, "f2cvtl", v8f16, int_aarch64_neon_fp8_cvtl2>;
10335  defm BF1CVTL : SIMD_FP8_CVTL<0b10, "bf1cvtl", v8bf16, int_aarch64_neon_fp8_cvtl1>;
10336  defm BF2CVTL : SIMD_FP8_CVTL<0b11, "bf2cvtl", v8bf16, int_aarch64_neon_fp8_cvtl2>;
10337  defm FCVTN_F16 : SIMD_FP8_CVTN_F16<"fcvtn", int_aarch64_neon_fp8_fcvtn>;
10338  defm FCVTN_F32 : SIMD_FP8_CVTN_F32<"fcvtn", int_aarch64_neon_fp8_fcvtn>;
10339  defm FSCALE : SIMDThreeVectorFscale<0b1, 0b1, 0b111, "fscale", int_aarch64_neon_fp8_fscale>;
10340} // End let Predicates = [HasFP8]
10341
10342// fminimum(abs(a), abs(b)) -> famin(a, b)
10343// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
10344def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
10345                             [(int_aarch64_neon_famin node:$Rn, node:$Rm),
10346                              (fminimum (fabs node:$Rn), (fabs node:$Rm)),
10347                              (fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
10348
10349// fmaximum(abs(a), abs(b)) -> famax(a, b)
10350// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
10351def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
10352                             [(int_aarch64_neon_famax node:$Rn, node:$Rm),
10353                              (fmaximum (fabs node:$Rn), (fabs node:$Rm)),
10354                              (fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
10355
10356let Predicates = [HasNEON, HasFAMINMAX] in {
10357 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", AArch64famax>;
10358 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", AArch64famin>;
10359} // End let Predicates = [HasNEON, HasFAMINMAX]
10360
10361let Predicates = [HasFP8FMA] in {
10362 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb", int_aarch64_neon_fp8_fmlalb_lane>;
10363 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt", int_aarch64_neon_fp8_fmlalt_lane>;
10364 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb", int_aarch64_neon_fp8_fmlallbb_lane>;
10365 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt", int_aarch64_neon_fp8_fmlallbt_lane>;
10366 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb", int_aarch64_neon_fp8_fmlalltb_lane>;
10367 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt", int_aarch64_neon_fp8_fmlalltt_lane>;
10368}
10369
10370let Predicates = [HasFP8FMA], Uses = [FPMR, FPCR], mayLoad = 1 in {
10371 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb", int_aarch64_neon_fp8_fmlalb>;
10372 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt", int_aarch64_neon_fp8_fmlalt>;
10373 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb", int_aarch64_neon_fp8_fmlallbb>;
10374 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt", int_aarch64_neon_fp8_fmlallbt>;
10375 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb", int_aarch64_neon_fp8_fmlalltb>;
10376 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt", int_aarch64_neon_fp8_fmlalltt>;
10377} // End let Predicates = [HasFP8FMA]
10378
10379let Predicates = [HasFP8DOT2] in {
10380 defm FDOTlane : SIMD_FP8_Dot2_Index<"fdot", int_aarch64_neon_fp8_fdot2_lane>;
10381 defm FDOT : SIMD_FP8_Dot2<"fdot", int_aarch64_neon_fp8_fdot2>;
10382} // End let Predicates = [HasFP8DOT2]
10383
10384let Predicates = [HasFP8DOT4] in {
10385 defm FDOTlane : SIMD_FP8_Dot4_Index<"fdot", int_aarch64_neon_fp8_fdot4_lane>;
10386 defm FDOT : SIMD_FP8_Dot4<"fdot", int_aarch64_neon_fp8_fdot4>;
10387} // End let Predicates = [HasFP8DOT4]
10388
10389//===----------------------------------------------------------------------===//
10390// Checked Pointer Arithmetic (FEAT_CPA)
10391//===----------------------------------------------------------------------===//
10392let Predicates = [HasCPA] in {
10393  // Scalar add/subtract
10394  defm ADDPT : AddSubCPA<0, "addpt">;
10395  defm SUBPT : AddSubCPA<1, "subpt">;
10396
10397  // Scalar multiply-add/subtract
10398  def MADDPT : MulAccumCPA<0, "maddpt">;
10399  def MSUBPT : MulAccumCPA<1, "msubpt">;
10400}
10401
10402def round_v4fp32_to_v4bf16 :
10403  OutPatFrag<(ops node:$Rn),
10404             // NaN? Round : Quiet(NaN)
10405             (BSPv16i8 (FCMEQv4f32 $Rn, $Rn),
10406                       (ADDv4i32
10407                         (ADDv4i32 $Rn,
10408                           // Extract the LSB of the fp32 *truncated* to bf16.
10409                           (ANDv16i8 (USHRv4i32_shift V128:$Rn, (i32 16)),
10410                                     (MOVIv4i32 (i32 1), (i32 0)))),
10411                         // Bias which will help us break ties correctly.
10412                         (MOVIv4s_msl (i32 127), (i32 264))),
10413                       // Set the quiet bit in the NaN.
10414                       (ORRv4i32 $Rn, (i32 64), (i32 16)))>;
10415
10416multiclass PromoteUnaryv8f16Tov4f32<SDPatternOperator InOp, Instruction OutInst> {
10417  let Predicates = [HasNoFullFP16] in
10418  def : Pat<(InOp (v8f16 V128:$Rn)),
10419            (v8f16 (FCVTNv8i16
10420              (INSERT_SUBREG (IMPLICIT_DEF),
10421                             (v4f16 (FCVTNv4i16
10422                               (v4f32 (OutInst
10423                                 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))),
10424               dsub),
10425              (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn))))))>;
10426
10427  let Predicates = [HasBF16] in
10428  def : Pat<(InOp (v8bf16 V128:$Rn)),
10429            (v8bf16 (BFCVTN2
10430              (INSERT_SUBREG (IMPLICIT_DEF),
10431                (v4bf16 (BFCVTN
10432                  (v4f32 (OutInst
10433                    (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))),
10434                dsub),
10435              (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn))))))>;
10436
10437  let Predicates = [HasNoBF16] in
10438  def : Pat<(InOp (v8bf16 V128:$Rn)),
10439            (UZP2v8i16
10440              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10441                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub))))))),
10442              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10443                  (v4f32 (SHLLv8i16 V128:$Rn))))))>;
10444}
10445defm : PromoteUnaryv8f16Tov4f32<any_fceil,  	FRINTPv4f32>;
10446defm : PromoteUnaryv8f16Tov4f32<any_ffloor, 	FRINTMv4f32>;
10447defm : PromoteUnaryv8f16Tov4f32<any_fnearbyint, FRINTIv4f32>;
10448defm : PromoteUnaryv8f16Tov4f32<any_fround, 	FRINTAv4f32>;
10449defm : PromoteUnaryv8f16Tov4f32<any_froundeven, FRINTNv4f32>;
10450defm : PromoteUnaryv8f16Tov4f32<any_frint,  	FRINTXv4f32>;
10451defm : PromoteUnaryv8f16Tov4f32<any_ftrunc, 	FRINTZv4f32>;
10452
10453multiclass PromoteBinaryv8f16Tov4f32<SDPatternOperator InOp, Instruction OutInst> {
10454  let Predicates = [HasNoFullFP16] in
10455  def : Pat<(InOp (v8f16 V128:$Rn), (v8f16 V128:$Rm)),
10456            (v8f16 (FCVTNv8i16
10457              (INSERT_SUBREG (IMPLICIT_DEF),
10458                             (v4f16 (FCVTNv4i16
10459                               (v4f32 (OutInst
10460                                 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
10461                                 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))),
10462               dsub),
10463              (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn)),
10464                              (v4f32 (FCVTLv8i16 V128:$Rm))))))>;
10465
10466  let Predicates = [HasBF16] in
10467  def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)),
10468            (v8bf16 (BFCVTN2
10469              (INSERT_SUBREG (IMPLICIT_DEF),
10470                (v4bf16 (BFCVTN
10471                  (v4f32 (OutInst
10472                    (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
10473                    (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))),
10474                dsub),
10475              (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn)),
10476                              (v4f32 (SHLLv8i16 V128:$Rm))))))>;
10477
10478  let Predicates = [HasNoBF16] in
10479  def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)),
10480            (UZP2v8i16
10481              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10482                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
10483                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub))))))),
10484              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10485                  (v4f32 (SHLLv8i16 V128:$Rn)),
10486                  (v4f32 (SHLLv8i16 V128:$Rm))))))>;
10487}
10488defm : PromoteBinaryv8f16Tov4f32<any_fadd, FADDv4f32>;
10489defm : PromoteBinaryv8f16Tov4f32<any_fdiv, FDIVv4f32>;
10490defm : PromoteBinaryv8f16Tov4f32<any_fmul, FMULv4f32>;
10491defm : PromoteBinaryv8f16Tov4f32<any_fsub, FSUBv4f32>;
10492
10493let Predicates = [HasCMPBR] in {
10494 defm CBGT : CmpBranchRegister<0b000, "cbgt">;
10495 defm CBGE : CmpBranchRegister<0b001, "cbge">;
10496 defm CBHI : CmpBranchRegister<0b010, "cbhi">;
10497 defm CBHS : CmpBranchRegister<0b011, "cbhs">;
10498 defm CBEQ : CmpBranchRegister<0b110, "cbeq">;
10499 defm CBNE : CmpBranchRegister<0b111, "cbne">;
10500
10501 def CBHGTWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b000, 0b11, "cbhgt">;
10502 def CBHGEWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b001, 0b11, "cbhge">;
10503 def CBHHIWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b010, 0b11, "cbhhi">;
10504 def CBHHSWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b011, 0b11, "cbhhs">;
10505 def CBHEQWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b110, 0b11, "cbheq">;
10506 def CBHNEWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b111, 0b11, "cbhne">;
10507
10508 def CBBGTWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b000, 0b10, "cbbgt">;
10509 def CBBGEWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b001, 0b10, "cbbge">;
10510 def CBBHIWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b010, 0b10, "cbbhi">;
10511 def CBBHSWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b011, 0b10, "cbbhs">;
10512 def CBBEQWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b110, 0b10, "cbbeq">;
10513 def CBBNEWrr : BaseCmpBranchRegister<GPR32, 0b0, 0b111, 0b10, "cbbne">;
10514
10515 defm CBGT : CmpBranchImmediate<0b000, "uimm6", "cbgt">;
10516 defm CBLT : CmpBranchImmediate<0b001, "uimm6", "cblt">;
10517 defm CBHI : CmpBranchImmediate<0b010, "uimm6", "cbhi">;
10518 defm CBLO : CmpBranchImmediate<0b011, "uimm6", "cblo">;
10519 defm CBEQ : CmpBranchImmediate<0b110, "uimm6", "cbeq">;
10520 defm CBNE : CmpBranchImmediate<0b111, "uimm6", "cbne">;
10521
10522 defm : CmpBranchImmediateAlias<"cbge", "CBGT", "uimm6p1">;
10523 defm : CmpBranchImmediateAlias<"cbhs", "CBHI", "uimm6p1">;
10524 defm : CmpBranchImmediateAlias<"cble", "CBLT", "uimm6m1">;
10525 defm : CmpBranchImmediateAlias<"cbls", "CBLO", "uimm6m1">;
10526
10527 defm : CmpBranchRegisterAlias<"cble", "CBGE">;
10528 defm : CmpBranchRegisterAlias<"cblo", "CBHI">;
10529 defm : CmpBranchRegisterAlias<"cbls", "CBHS">;
10530 defm : CmpBranchRegisterAlias<"cblt", "CBGT">;
10531
10532 defm : CmpBranchWRegisterAlias<"cbble", "CBBGE">;
10533 defm : CmpBranchWRegisterAlias<"cbblo", "CBBHI">;
10534 defm : CmpBranchWRegisterAlias<"cbbls", "CBBHS">;
10535 defm : CmpBranchWRegisterAlias<"cbblt", "CBBGT">;
10536
10537 defm : CmpBranchWRegisterAlias<"cbhle", "CBHGE">;
10538 defm : CmpBranchWRegisterAlias<"cbhlo", "CBHHI">;
10539 defm : CmpBranchWRegisterAlias<"cbhls", "CBHHS">;
10540 defm : CmpBranchWRegisterAlias<"cbhlt", "CBHGT">;
10541} // HasCMPBR
10542
10543
10544//===-----------------------------------------------------===//
10545// Atomic floating-point in-memory instructions (FEAT_LSFE)
10546//===-----------------------------------------------------===//
10547
10548let Predicates = [HasLSFE] in {
10549  // Floating-point Atomic Load
10550  defm LDFADDA    : AtomicFPLoad<0b10, 0b000, "ldfadda">;
10551  defm LDFADDAL   : AtomicFPLoad<0b11, 0b000, "ldfaddal">;
10552  defm LDFADD     : AtomicFPLoad<0b00, 0b000, "ldfadd">;
10553  defm LDFADDL    : AtomicFPLoad<0b01, 0b000, "ldfaddl">;
10554  defm LDFMAXA    : AtomicFPLoad<0b10, 0b100, "ldfmaxa">;
10555  defm LDFMAXAL   : AtomicFPLoad<0b11, 0b100, "ldfmaxal">;
10556  defm LDFMAX     : AtomicFPLoad<0b00, 0b100, "ldfmax">;
10557  defm LDFMAXL    : AtomicFPLoad<0b01, 0b100, "ldfmaxl">;
10558  defm LDFMINA    : AtomicFPLoad<0b10, 0b101, "ldfmina">;
10559  defm LDFMINAL   : AtomicFPLoad<0b11, 0b101, "ldfminal">;
10560  defm LDFMIN     : AtomicFPLoad<0b00, 0b101, "ldfmin">;
10561  defm LDFMINL    : AtomicFPLoad<0b01, 0b101, "ldfminl">;
10562  defm LDFMAXNMA  : AtomicFPLoad<0b10, 0b110, "ldfmaxnma">;
10563  defm LDFMAXNMAL : AtomicFPLoad<0b11, 0b110, "ldfmaxnmal">;
10564  defm LDFMAXNM   : AtomicFPLoad<0b00, 0b110, "ldfmaxnm">;
10565  defm LDFMAXNML  : AtomicFPLoad<0b01, 0b110, "ldfmaxnml">;
10566  defm LDFMINNMA  : AtomicFPLoad<0b10, 0b111, "ldfminnma">;
10567  defm LDFMINNMAL : AtomicFPLoad<0b11, 0b111, "ldfminnmal">;
10568  defm LDFMINMN   : AtomicFPLoad<0b00, 0b111, "ldfminnm">;
10569  defm LDFMINNML  : AtomicFPLoad<0b01, 0b111, "ldfminnml">;
10570  // BFloat16
10571  def LDBFADDA    : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b000, "ldbfadda">;
10572  def LDBFADDAL   : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b000, "ldbfaddal">;
10573  def LDBFADD     : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b000, "ldbfadd">;
10574  def LDBFADDL    : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b000, "ldbfaddl">;
10575  def LDBFMAXA    : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b100, "ldbfmaxa">;
10576  def LDBFMAXAL   : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b100, "ldbfmaxal">;
10577  def LDBFMAX     : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b100, "ldbfmax">;
10578  def LDBFMAXL    : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b100, "ldbfmaxl">;
10579  def LDBFMINA    : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b101, "ldbfmina">;
10580  def LDBFMINAL   : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b101, "ldbfminal">;
10581  def LDBFMIN     : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b101, "ldbfmin">;
10582  def LDBFMINL    : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b101, "ldbfminl">;
10583  def LDBFMAXNMA  : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b110, "ldbfmaxnma">;
10584  def LDBFMAXNMAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b110, "ldbfmaxnmal">;
10585  def LDBFMAXNM   : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b110, "ldbfmaxnm">;
10586  def LDBFMAXNML  : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b110, "ldbfmaxnml">;
10587  def LDBFMINNMA  : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b111, "ldbfminnma">;
10588  def LDBFMINNMAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b111, "ldbfminnmal">;
10589  def LDBFMINNM   : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b111, "ldbfminnm">;
10590  def LDBFMINNML  : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b111, "ldbfminnml">;
10591
10592  // Floating-point Atomic Store
10593  defm STFADD    : AtomicFPStore<0b0, 0b000, "stfadd">;
10594  defm STFADDL   : AtomicFPStore<0b1, 0b000, "stfaddl">;
10595  defm STFMAX    : AtomicFPStore<0b0, 0b100, "stfmax">;
10596  defm STFMAXL   : AtomicFPStore<0b1, 0b100, "stfmaxl">;
10597  defm STFMIN    : AtomicFPStore<0b0, 0b101, "stfmin">;
10598  defm STFMINL   : AtomicFPStore<0b1, 0b101, "stfminl">;
10599  defm STFMAXNM  : AtomicFPStore<0b0, 0b110, "stfmaxnm">;
10600  defm STFMAXNML : AtomicFPStore<0b1, 0b110, "stfmaxnml">;
10601  defm STFMINNM  : AtomicFPStore<0b0, 0b111, "stfminnm">;
10602  defm STFMINNML : AtomicFPStore<0b1, 0b111, "stfminnml">;
10603  // BFloat16
10604  def STBFADD    : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b000, "stbfadd">;
10605  def STBFADDL   : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b000, "stbfaddl">;
10606  def STBFMAX    : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b100, "stbfmax">;
10607  def STBFMAXL   : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b100, "stbfmaxl">;
10608  def STBFMIN    : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b101, "stbfmin">;
10609  def STBFMINL   : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b101, "stbfminl">;
10610  def STBFMAXNM  : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b110, "stbfmaxnm">;
10611  def STBFMAXNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b110, "stbfmaxnml">;
10612  def STBFMINNM  : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b111, "stbfminnm">;
10613  def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
10614}
10615
10616let Uses = [FPMR, FPCR] in
10617defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
10618
10619include "AArch64InstrAtomics.td"
10620include "AArch64SVEInstrInfo.td"
10621include "AArch64SMEInstrInfo.td"
10622include "AArch64InstrGISel.td"
10623