xref: /llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td (revision 69b47845403e78f61987945033e1fd72e9dc044a)
1//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse V1 processors.
10//
11// References:
12// - "Arm Neoverse V1 Software Optimization Guide"
13// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing"
14//   https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm
15// - "Neoverse V1"
16//   https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1
17
18//
19//===----------------------------------------------------------------------===//
20
21def NeoverseV1Model : SchedMachineModel {
22  let IssueWidth            =  15; // Maximum micro-ops dispatch rate.
23  let MicroOpBufferSize     = 256; // Micro-op re-order buffer.
24  let LoadLatency           =   4; // Optimistic load latency.
25  let MispredictPenalty     =  11; // Cycles cost of branch mispredicted.
26  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
27  let CompleteModel         =   1;
28
29  list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
30                                                    SMEUnsupported.F,
31                                                    [HasMTE, HasCPA,
32                                                    HasCSSC]);
33}
34
35//===----------------------------------------------------------------------===//
36// Define each kind of processor resource and number available on Neoverse V1.
37// Instructions are first fetched and then decoded into internal macro-ops
38// (MOPs).  From there, the MOPs proceed through register renaming and dispatch
39// stages.  A MOP can be split into one or more micro-ops further down the
40// pipeline, after the decode stage.  Once dispatched, micro-ops wait for their
41// operands and issue out-of-order to one of the issue pipelines.  Each issue
42// pipeline can accept one micro-op per cycle.
43
44let SchedModel = NeoverseV1Model in {
45
46// Define the issue ports.
47def V1UnitB   : ProcResource<2>;  // Branch 0/1
48def V1UnitS   : ProcResource<2>;  // Integer single cycle 0/1
49def V1UnitM0  : ProcResource<1>;  // Integer multicycle 0
50def V1UnitM1  : ProcResource<1>;  // Integer multicycle 1
51def V1UnitL01 : ProcResource<2>;  // Load/Store 0/1
52def V1UnitL2  : ProcResource<1>;  // Load 2
53def V1UnitD   : ProcResource<2>;  // Store data 0/1
54def V1UnitV0  : ProcResource<1>;  // FP/ASIMD 0
55def V1UnitV1  : ProcResource<1>;  // FP/ASIMD 1
56def V1UnitV2  : ProcResource<1>;  // FP/ASIMD 2
57def V1UnitV3  : ProcResource<1>;  // FP/ASIMD 3
58def V1UnitFlg : ProcResource<3>;  // Flags
59
60def V1UnitI   : ProcResGroup<[V1UnitS,
61                              V1UnitM0, V1UnitM1]>;   // Integer units
62def V1UnitM   : ProcResGroup<[V1UnitM0, V1UnitM1]>;   // Integer multicycle units
63def V1UnitL   : ProcResGroup<[V1UnitL01, V1UnitL2]>;  // Load units
64def V1UnitV   : ProcResGroup<[V1UnitV0, V1UnitV1,
65                              V1UnitV2, V1UnitV3]>;   // FP/ASIMD units
66def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>;   // FP/ASIMD 0/1 units
67def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>;   // FP/ASIMD 0/2 units
68def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>;   // FP/ASIMD 1/3 units
69
70// Define commonly used read types.
71
72// No generic forwarding is provided for these types.
73def : ReadAdvance<ReadI,       0>;
74def : ReadAdvance<ReadISReg,   0>;
75def : ReadAdvance<ReadIEReg,   0>;
76def : ReadAdvance<ReadIM,      0>;
77def : ReadAdvance<ReadIMA,     0>;
78def : ReadAdvance<ReadID,      0>;
79def : ReadAdvance<ReadExtrHi,  0>;
80def : ReadAdvance<ReadAdrBase, 0>;
81def : ReadAdvance<ReadST,      0>;
82def : ReadAdvance<ReadVLD,     0>;
83
84def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
85def : WriteRes<WriteBarrier, []> { let Latency = 1; }
86def : WriteRes<WriteHint,    []> { let Latency = 1; }
87
88
89//===----------------------------------------------------------------------===//
90// Define generic 0 micro-op types
91
92let Latency = 0, NumMicroOps = 0 in
93def V1Write_0c_0Z : SchedWriteRes<[]>;
94
95
96//===----------------------------------------------------------------------===//
97// Define generic 1 micro-op types
98
99def V1Write_1c_1B      : SchedWriteRes<[V1UnitB]>   { let Latency = 1; }
100def V1Write_1c_1I      : SchedWriteRes<[V1UnitI]>   { let Latency = 1; }
101def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]>   { let Latency = 1; }
102def V1Write_4c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 4; }
103def V1Write_6c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 6; }
104def V1Write_1c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
105def V1Write_4c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
106def V1Write_6c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
107def V1Write_2c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 2; }
108def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]>   { let Latency = 2; }
109def V1Write_3c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 3; }
110def V1Write_4c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 4; }
111def V1Write_1c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 1; }
112def V1Write_2c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 2; }
113def V1Write_3c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 3; }
114def V1Write_5c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 5; }
115def V1Write_12c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 12;
116                                                      let ReleaseAtCycles = [5]; }
117def V1Write_20c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 20;
118                                                      let ReleaseAtCycles = [5]; }
119def V1Write_2c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 2; }
120def V1Write_3c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 3; }
121def V1Write_4c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 4; }
122def V1Write_5c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 5; }
123def V1Write_2c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 2; }
124def V1Write_3c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 3; }
125def V1Write_4c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 4; }
126def V1Write_6c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 6; }
127def V1Write_10c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 10;
128                                                      let ReleaseAtCycles = [7]; }
129def V1Write_12c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 12;
130                                                      let ReleaseAtCycles = [7]; }
131def V1Write_13c10_1V0  : SchedWriteRes<[V1UnitV0]>  { let Latency = 13;
132                                                      let ReleaseAtCycles = [10]; }
133def V1Write_15c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 15;
134                                                      let ReleaseAtCycles = [7]; }
135def V1Write_16c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 16;
136                                                      let ReleaseAtCycles = [7]; }
137def V1Write_20c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 20;
138                                                      let ReleaseAtCycles = [7]; }
139def V1Write_2c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
140def V1Write_3c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
141def V1Write_4c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
142def V1Write_5c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
143def V1Write_3c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
144def V1Write_4c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
145def V1Write_7c7_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
146                                                      let ReleaseAtCycles = [7]; }
147def V1Write_10c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
148                                                      let ReleaseAtCycles = [7]; }
149def V1Write_13c5_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
150                                                      let ReleaseAtCycles = [5]; }
151def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
152                                                      let ReleaseAtCycles = [11]; }
153def V1Write_15c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
154                                                      let ReleaseAtCycles = [7]; }
155def V1Write_16c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
156                                                      let ReleaseAtCycles = [7]; }
157def V1Write_2c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 2; }
158def V1Write_3c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 3; }
159def V1Write_4c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 4; }
160def V1Write_2c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
161def V1Write_4c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
162
163//===----------------------------------------------------------------------===//
164// Define generic 2 micro-op types
165
166let Latency = 1, NumMicroOps = 2 in
167def V1Write_1c_1B_1S     : SchedWriteRes<[V1UnitB, V1UnitS]>;
168let Latency = 6, NumMicroOps = 2 in
169def V1Write_6c_1B_1M0    : SchedWriteRes<[V1UnitB, V1UnitM0]>;
170let Latency = 3, NumMicroOps = 2 in
171def V1Write_3c_1I_1M     : SchedWriteRes<[V1UnitI, V1UnitM]>;
172let Latency = 5, NumMicroOps = 2 in
173def V1Write_5c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
174let Latency = 7, NumMicroOps = 2 in
175def V1Write_7c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
176let Latency = 6, NumMicroOps = 2 in
177def V1Write_6c_2L        : SchedWriteRes<[V1UnitL, V1UnitL]>;
178let Latency = 6, NumMicroOps = 2 in
179def V1Write_6c_1L_1M     : SchedWriteRes<[V1UnitL, V1UnitM]>;
180let Latency = 8, NumMicroOps = 2 in
181def V1Write_8c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
182let Latency = 9, NumMicroOps = 2 in
183def V1Write_9c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
184let Latency = 11, NumMicroOps = 2 in
185def V1Write_11c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
186let Latency = 1, NumMicroOps = 2 in
187def V1Write_1c_1L01_1D   : SchedWriteRes<[V1UnitL01, V1UnitD]>;
188let Latency = 6, NumMicroOps = 2 in
189def V1Write_6c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
190let Latency = 7, NumMicroOps = 2 in
191def V1Write_7c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
192let Latency = 2, NumMicroOps = 2 in
193def V1Write_2c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
194let Latency = 4, NumMicroOps = 2 in
195def V1Write_4c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
196let Latency = 6, NumMicroOps = 2 in
197def V1Write_6c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
198let Latency = 2, NumMicroOps = 2 in
199def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
200let Latency = 4, NumMicroOps = 2 in
201def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
202let Latency = 2, NumMicroOps = 2 in
203def V1Write_2c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
204let Latency = 3, NumMicroOps = 2 in
205def V1Write_3c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
206let Latency = 9, NumMicroOps = 2 in
207def V1Write_9c_1M0_1L    : SchedWriteRes<[V1UnitM0, V1UnitL]>;
208let Latency = 5, NumMicroOps = 2 in
209def V1Write_5c_1M0_1V    : SchedWriteRes<[V1UnitM0, V1UnitV]>;
210let Latency = 4, NumMicroOps = 2 in
211def V1Write_4c_1M0_1V0    : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
212let Latency = 7, NumMicroOps = 2 in
213def V1Write_7c_1M0_1V0   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
214let Latency = 5, NumMicroOps = 2 in
215def V1Write_5c_1M0_1V01    : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
216let Latency = 6, NumMicroOps = 2 in
217def V1Write_6c_1M0_1V1   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
218let Latency = 9, NumMicroOps = 2 in
219def V1Write_9c_1M0_1V1    : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
220let Latency = 4, NumMicroOps = 2 in
221def V1Write_4c_2V        : SchedWriteRes<[V1UnitV, V1UnitV]>;
222let Latency = 8, NumMicroOps = 2 in
223def V1Write_8c_1V_1V01   : SchedWriteRes<[V1UnitV, V1UnitV01]>;
224let Latency = 4, NumMicroOps = 2 in
225def V1Write_4c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
226let Latency = 5, NumMicroOps = 2 in
227def V1Write_5c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
228let Latency = 2, NumMicroOps = 2 in
229def V1Write_2c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
230let Latency = 4, NumMicroOps = 2 in
231def V1Write_4c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
232let Latency = 4, NumMicroOps = 2 in
233def V1Write_4c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
234let Latency = 6, NumMicroOps = 2 in
235def V1Write_6c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
236let Latency = 4, NumMicroOps = 2 in
237def V1Write_4c_1V13_1V   : SchedWriteRes<[V1UnitV13, V1UnitV]>;
238let Latency = 4, NumMicroOps = 2 in
239def V1Write_4c_2V13      : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
240
241//===----------------------------------------------------------------------===//
242// Define generic 3 micro-op types
243
244let Latency = 2, NumMicroOps = 3 in
245def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
246let Latency = 7, NumMicroOps = 3 in
247def V1Write_7c_2M0_1V01     : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
248let Latency = 8, NumMicroOps = 3 in
249def V1Write_8c_1L_2V        : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
250let Latency = 6, NumMicroOps = 3 in
251def V1Write_6c_3L           : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
252let Latency = 2, NumMicroOps = 3 in
253def V1Write_2c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
254let Latency = 4, NumMicroOps = 3 in
255def V1Write_4c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
256let Latency = 2, NumMicroOps = 3 in
257def V1Write_2c_2L01_1V01    : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
258let Latency = 6, NumMicroOps = 3 in
259def V1Write_6c_3V           : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
260let Latency = 4, NumMicroOps = 3 in
261def V1Write_4c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
262let Latency = 6, NumMicroOps = 3 in
263def V1Write_6c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
264let Latency = 8, NumMicroOps = 3 in
265def V1Write_8c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
266
267//===----------------------------------------------------------------------===//
268// Define generic 4 micro-op types
269
270let Latency = 8, NumMicroOps = 4 in
271def V1Write_8c_2M0_2V0   : SchedWriteRes<[V1UnitM0, V1UnitM0,
272                                          V1UnitV0, V1UnitV0]>;
273let Latency = 7, NumMicroOps = 4 in
274def V1Write_7c_4L        : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
275let Latency = 8, NumMicroOps = 4 in
276def V1Write_8c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
277                                             V1UnitV, V1UnitV]>;
278let Latency = 9, NumMicroOps = 4 in
279def V1Write_9c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
280                                             V1UnitV, V1UnitV]>;
281let Latency = 11, NumMicroOps = 4 in
282def V1Write_11c_2L_2V       : SchedWriteRes<[V1UnitL, V1UnitL,
283                                             V1UnitV, V1UnitV]>;
284let Latency = 10, NumMicroOps = 4 in
285def V1Write_10c_2L01_2V     : SchedWriteRes<[V1UnitL01, V1UnitL01,
286                                             V1UnitV, V1UnitV]>;
287let Latency = 2, NumMicroOps = 4 in
288def V1Write_2c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
289                                             V1UnitV01, V1UnitV01]>;
290let Latency = 4, NumMicroOps = 4 in
291def V1Write_4c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
292                                             V1UnitV01, V1UnitV01]>;
293let Latency = 8, NumMicroOps = 4 in
294def V1Write_8c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
295                                             V1UnitV01, V1UnitV01]>;
296let Latency = 9, NumMicroOps = 4 in
297def V1Write_9c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
298                                             V1UnitV01, V1UnitV01]>;
299let Latency = 10, NumMicroOps = 4 in
300def V1Write_10c_2L01_2V01   : SchedWriteRes<[V1UnitL01, V1UnitL01,
301                                             V1UnitV01, V1UnitV01]>;
302let Latency = 10, NumMicroOps = 4 in
303def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
304                                             V1UnitV1, V1UnitV1]>;
305let Latency = 12, NumMicroOps = 4 in
306def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
307                                             V1UnitV1, V1UnitV1]>;
308let Latency = 6, NumMicroOps = 4 in
309def V1Write_6c_4V0          : SchedWriteRes<[V1UnitV0, V1UnitV0,
310                                             V1UnitV0, V1UnitV0]>;
311let Latency = 12, NumMicroOps = 4 in
312def V1Write_12c_4V01        : SchedWriteRes<[V1UnitV01, V1UnitV01,
313                                             V1UnitV01, V1UnitV01]>;
314let Latency = 6, NumMicroOps = 4 in
315def V1Write_6c_4V02         : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
316
317//===----------------------------------------------------------------------===//
318// Define generic 5 micro-op types
319
320let Latency = 8, NumMicroOps = 5 in
321def V1Write_8c_2L_3V            : SchedWriteRes<[V1UnitL, V1UnitL,
322                                                 V1UnitV, V1UnitV, V1UnitV]>;
323let Latency = 14, NumMicroOps = 5 in
324def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV,
325                                                 V1UnitV0,
326                                                 V1UnitV1, V1UnitV1,
327                                                 V1UnitV13]>;
328let Latency = 9, NumMicroOps = 5 in
329def V1Write_9c_1V_4V01          : SchedWriteRes<[V1UnitV,
330                                                 V1UnitV01, V1UnitV01,
331                                                 V1UnitV01, V1UnitV01]>;
332let Latency = 6, NumMicroOps = 5 in
333def V1Write_6c_5V01             : SchedWriteRes<[V1UnitV01, V1UnitV01,
334                                                 V1UnitV01, V1UnitV01, V1UnitV01]>;
335
336//===----------------------------------------------------------------------===//
337// Define generic 6 micro-op types
338
339let Latency = 6, NumMicroOps = 6 in
340def V1Write_6c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
341                                           V1UnitV, V1UnitV, V1UnitV]>;
342let Latency = 8, NumMicroOps = 6 in
343def V1Write_8c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
344                                           V1UnitV, V1UnitV, V1UnitV]>;
345let Latency = 2, NumMicroOps = 6 in
346def V1Write_2c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
347                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
348let Latency = 5, NumMicroOps = 6 in
349def V1Write_5c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
350                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
351let Latency = 6, NumMicroOps = 6 in
352def V1Write_6c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
353                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
354let Latency = 11, NumMicroOps = 6 in
355def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
356                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
357let Latency = 11, NumMicroOps = 6 in
358def V1Write_11c_1V_5V01   : SchedWriteRes<[V1UnitV,
359                                           V1UnitV01, V1UnitV01,
360                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
361let Latency = 13, NumMicroOps = 6 in
362def V1Write_13c_6V01      : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01,
363                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
364
365//===----------------------------------------------------------------------===//
366// Define generic 7 micro-op types
367
368let Latency = 8, NumMicroOps = 7 in
369def V1Write_8c_3L_4V         : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
370                                              V1UnitV, V1UnitV, V1UnitV, V1UnitV]>;
371let Latency = 8, NumMicroOps = 7 in
372def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
373                                              V1UnitS,
374                                              V1UnitV01, V1UnitV01, V1UnitV01]>;
375
376//===----------------------------------------------------------------------===//
377// Define generic 8 micro-op types
378
379let Latency = 9, NumMicroOps = 8 in
380def V1Write_9c_4L_4V      : SchedWriteRes<[V1UnitL, V1UnitL,
381                                           V1UnitL, V1UnitL,
382                                           V1UnitV, V1UnitV,
383                                           V1UnitV, V1UnitV]>;
384let Latency = 2, NumMicroOps = 8 in
385def V1Write_2c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
386                                           V1UnitL01, V1UnitL01,
387                                           V1UnitV01, V1UnitV01,
388                                           V1UnitV01, V1UnitV01]>;
389let Latency = 4, NumMicroOps = 8 in
390def V1Write_4c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
391                                           V1UnitL01, V1UnitL01,
392                                           V1UnitV01, V1UnitV01,
393                                           V1UnitV01, V1UnitV01]>;
394let Latency = 12, NumMicroOps = 8 in
395def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
396                                           V1UnitL01, V1UnitL01,
397                                           V1UnitV01, V1UnitV01,
398                                           V1UnitV01, V1UnitV01]>;
399
400//===----------------------------------------------------------------------===//
401// Define generic 10 micro-op types
402
403let Latency = 13, NumMicroOps = 10 in
404def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
405                                              V1UnitL01, V1UnitL01,
406                                              V1UnitS, V1UnitS,
407                                              V1UnitV01, V1UnitV01,
408                                              V1UnitV01, V1UnitV01]>;
409let Latency = 7, NumMicroOps = 10 in
410def V1Write_7c_5L01_5V       : SchedWriteRes<[V1UnitL01, V1UnitL01,
411                                              V1UnitL01, V1UnitL01, V1UnitL01,
412                                              V1UnitV, V1UnitV,
413                                              V1UnitV, V1UnitV, V1UnitV]>;
414let Latency = 11, NumMicroOps = 10 in
415def V1Write_11c_10V0         : SchedWriteRes<[V1UnitV0,
416                                              V1UnitV0, V1UnitV0, V1UnitV0,
417                                              V1UnitV0, V1UnitV0, V1UnitV0,
418                                              V1UnitV0, V1UnitV0, V1UnitV0]>;
419
420//===----------------------------------------------------------------------===//
421// Define generic 12 micro-op types
422
423let Latency = 7, NumMicroOps = 12 in
424def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
425                                          V1UnitL01, V1UnitL01, V1UnitL01,
426                                          V1UnitV01, V1UnitV01, V1UnitV01,
427                                          V1UnitV01, V1UnitV01, V1UnitV01]>;
428
429//===----------------------------------------------------------------------===//
430// Define generic 15 micro-op types
431
432let Latency = 7, NumMicroOps = 15 in
433def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
434                                           V1UnitL01, V1UnitL01, V1UnitL01,
435                                           V1UnitS, V1UnitS,
436                                           V1UnitS, V1UnitS, V1UnitS,
437                                           V1UnitV, V1UnitV,
438                                           V1UnitV, V1UnitV, V1UnitV]>;
439
440
441//===----------------------------------------------------------------------===//
442// Define generic 18 micro-op types
443
444let Latency = 19, NumMicroOps = 18 in
445def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
446                                         V1UnitL01, V1UnitL01, V1UnitL01,
447                                         V1UnitL01, V1UnitL01, V1UnitL01,
448                                         V1UnitV, V1UnitV, V1UnitV,
449                                         V1UnitV, V1UnitV, V1UnitV,
450                                         V1UnitV, V1UnitV, V1UnitV]>;
451let Latency = 19, NumMicroOps = 18 in
452def V1Write_19c_18V0    : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0,
453                                         V1UnitV0, V1UnitV0, V1UnitV0,
454                                         V1UnitV0, V1UnitV0, V1UnitV0,
455                                         V1UnitV0, V1UnitV0, V1UnitV0,
456                                         V1UnitV0, V1UnitV0, V1UnitV0,
457                                         V1UnitV0, V1UnitV0, V1UnitV0]>;
458
459//===----------------------------------------------------------------------===//
460// Define generic 27 micro-op types
461
462let Latency = 11, NumMicroOps = 27 in
463def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
464                                            V1UnitL01, V1UnitL01, V1UnitL01,
465                                            V1UnitL01, V1UnitL01, V1UnitL01,
466                                            V1UnitS, V1UnitS, V1UnitS,
467                                            V1UnitS, V1UnitS, V1UnitS,
468                                            V1UnitS, V1UnitS, V1UnitS,
469                                            V1UnitV, V1UnitV, V1UnitV,
470                                            V1UnitV, V1UnitV, V1UnitV,
471                                            V1UnitV, V1UnitV, V1UnitV]>;
472
473//===----------------------------------------------------------------------===//
474// Define forwarded types
475
476// NOTE: SOG, p. 20, n. 2: Accumulator forwarding is not supported for
477// consumers of 64 bit multiply high operations?
478def V1Wr_IM   : SchedWriteRes<[V1UnitM]>  { let Latency = 2; }
479def V1Wr_IMA  : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
480def V1WriteIM : SchedWriteVariant<
481                  [SchedVar<NeoverseMULIdiomPred, [V1Wr_IM]>,
482                   SchedVar<NoSchedPred,          [V1Wr_IMA]>]>;
483def V1Rd_IMA : SchedReadAdvance<1, [V1Wr_IMA]>;
484
485def V1Wr_FMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
486def V1Rd_FMA : SchedReadAdvance<2, [WriteFMul, V1Wr_FMA]>;
487
488def V1Wr_ADA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
489def V1Rd_ADA : SchedReadAdvance<3, [V1Wr_ADA]>;
490
491def V1Wr_VDOT : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
492def V1Rd_VDOT : SchedReadAdvance<2, [V1Wr_VDOT]>;
493
494def V1Wr_VMMA : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
495def V1Rd_VMMA : SchedReadAdvance<2, [V1Wr_VMMA]>;
496
497def V1Wr_VMA : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
498def V1Rd_VMA : SchedReadAdvance<3, [V1Wr_VMA]>;
499
500def V1Wr_VMAL : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
501def V1Rd_VMAL : SchedReadAdvance<3, [V1Wr_VMAL]>;
502
503def V1Wr_VSA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
504def V1Rd_VSA : SchedReadAdvance<3, [V1Wr_VSA]>;
505
506def V1Wr_FCMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
507def V1Rd_FCMA : SchedReadAdvance<2, [V1Wr_FCMA]>;
508
509def V1Wr_FPM : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
510def V1Wr_FPMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
511def V1Rd_FPMA : SchedReadAdvance<2, [V1Wr_FPM, V1Wr_FPMA]>;
512
513def V1Wr_FPMAL : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
514def V1Rd_FPMAL : SchedReadAdvance<3, [V1Wr_FPMAL]>;
515
516def V1Wr_BFD : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
517def V1Rd_BFD : SchedReadAdvance<2, [V1Wr_BFD]>;
518
519def V1Wr_BFMMA : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
520def V1Rd_BFMMA : SchedReadAdvance<2, [V1Wr_BFMMA]>;
521
522def V1Wr_BFMLA : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
523def V1Rd_BFMLA : SchedReadAdvance<2, [V1Wr_BFMLA]>;
524
525def V1Wr_CRC : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
526def V1Rd_CRC : SchedReadAdvance<1, [V1Wr_CRC]>;
527
528def V1Wr_ZDOTB : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
529def V1Rd_ZDOTB : SchedReadAdvance<2, [V1Wr_ZDOTB]>;
530
531def V1Wr_ZUDOTB : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
532def V1Rd_ZUDOTB : SchedReadAdvance<2, [V1Wr_ZUDOTB]>;
533
534def V1Wr_ZDOTH : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
535def V1Rd_ZDOTH : SchedReadAdvance<3, [V1Wr_ZDOTH]>;
536
537def V1Wr_ZMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
538def V1Rd_ZMMA : SchedReadAdvance<2, [V1Wr_ZMMA]>;
539
540let Latency = 5, NumMicroOps = 2 in
541def V1Wr_ZMAD : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
542def V1Rd_ZMAD : SchedReadAdvance<3, [V1Wr_ZMAD]>;
543
544def V1Wr_ZFCMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
545def V1Rd_ZFCMA : SchedReadAdvance<3, [V1Wr_ZFCMA]>;
546
547def V1Wr_ZFMA : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
548def V1Rd_ZFMA : SchedReadAdvance<2, [V1Wr_ZFMA]>;
549
550def V1Wr_ZBFDOT : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
551def V1Rd_ZBFDOT : SchedReadAdvance<2, [V1Wr_ZBFDOT]>;
552def V1Wr_ZBFMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
553def V1Rd_ZBFMMA : SchedReadAdvance<2, [V1Wr_ZBFMMA]>;
554def V1Wr_ZBFMAL : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
555def V1Rd_ZBFMAL : SchedReadAdvance<3, [V1Wr_ZBFMAL]>;
556
557// Miscellaneous Instructions
558// -----------------------------------------------------------------------------
559
560// COPY
561def : InstRW<[V1Write_1c_1I], (instrs COPY)>;
562
563// MSR
564def : WriteRes<WriteSys, []> { let Latency = 1; }
565
566
567// Branch Instructions
568// -----------------------------------------------------------------------------
569
570// Branch, immed
571// Compare and branch
572def : SchedAlias<WriteBr, V1Write_1c_1B>;
573
574// Branch, register
575def : SchedAlias<WriteBrReg, V1Write_1c_1B>;
576
577// Branch and link, immed
578// Branch and link, register
579def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>;
580
581// Compare and branch
582def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
583
584
585// Arithmetic and Logical Instructions
586// -----------------------------------------------------------------------------
587
588// ALU, basic
589// Conditional compare
590// Conditional select
591// Logical, basic
592// Address generation
593// Count leading
594// Reverse bits/bytes
595// Move immediate
596def : SchedAlias<WriteI, V1Write_1c_1I>;
597
598// ALU, basic, flagset
599def : InstRW<[V1Write_1c_1I_1Flg],
600             (instregex "^(ADD|SUB)S[WX]r[ir]$",
601                        "^(ADC|SBC)S[WX]r$",
602                        "^ANDS[WX]ri$",
603                        "^(AND|BIC)S[WX]rr$")>;
604
605// ALU, extend and shift
606def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
607
608// Arithmetic, LSL shift, shift <= 4
609// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
610def V1WriteISReg : SchedWriteVariant<
611                     [SchedVar<IsCheapLSL,  [V1Write_1c_1I]>,
612                      SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
613def              : SchedAlias<WriteISReg, V1WriteISReg>;
614
615// Arithmetic, flagset, LSL shift, shift <= 4
616// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4
617def V1WriteISRegS : SchedWriteVariant<
618                      [SchedVar<IsCheapLSL,  [V1Write_1c_1I_1Flg]>,
619                       SchedVar<NoSchedPred, [V1Write_2c_1M_1Flg]>]>;
620def               : InstRW<[V1WriteISRegS],
621                           (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
622
623// Logical, shift, no flagset
624def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
625
626// Logical, shift, flagset
627def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
628
629// Flag manipulation instructions
630def : InstRW<[V1Write_1c_1I_1Flg], (instrs SETF8, SETF16, RMIF, CFINV)>;
631
632
633// Divide and multiply instructions
634// -----------------------------------------------------------------------------
635
636// Divide
637def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
638def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
639
640def : SchedAlias<WriteIM32, V1Write_2c_1M>;
641def : SchedAlias<WriteIM64, V1Write_2c_1M>;
642
643// Multiply
644// Multiply accumulate, W-form
645// Multiply accumulate, X-form
646def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA],
647             (instregex "^M(ADD|SUB)[WX]rrr$")>;
648
649// Multiply accumulate long
650// Multiply long
651def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA],
652             (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
653// Multiply high
654def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
655
656
657// Pointer Authentication Instructions (v8.3 PAC)
658// -----------------------------------------------------------------------------
659
660// Authenticate data address
661// Authenticate instruction address
662// Compute pointer authentication code for data address
663// Compute pointer authentication code, using generic key
664// Compute pointer authentication code for instruction address
665def : InstRW<[V1Write_5c_1M0], (instregex "^AUT",
666                                          "^PAC")>;
667
668// Branch and link, register, with pointer authentication
669// Branch, register, with pointer authentication
670// Branch, return, with pointer authentication
671def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$",
672                                             "^E?RETA[AB]$")>;
673
674// Load register, with pointer authentication
675def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
676
677// Strip pointer authentication code
678def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
679
680
681// Miscellaneous data-processing instructions
682// -----------------------------------------------------------------------------
683
684// Bitfield extract, one reg
685// Bitfield extract, two regs
686def V1WriteExtr : SchedWriteVariant<
687                    [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>,
688                     SchedVar<NoSchedPred,       [V1Write_3c_1I_1M]>]>;
689def : SchedAlias<WriteExtr, V1WriteExtr>;
690
691// Bitfield move, basic
692// Variable shift
693def : SchedAlias<WriteIS, V1Write_1c_1I>;
694
695// Bitfield move, insert
696def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
697
698// Move immediate
699def : SchedAlias<WriteImm, V1Write_1c_1I>;
700
701
702// Load instructions
703// -----------------------------------------------------------------------------
704
705// Load register, immed offset
706def : SchedAlias<WriteLD, V1Write_4c_1L>;
707
708// Load register, immed offset, index
709def : SchedAlias<WriteLDIdx, V1Write_4c_1L>;
710def : SchedAlias<WriteAdr,   V1Write_1c_1I>;
711
712// Load pair, immed offset
713def : SchedAlias<WriteLDHi, V1Write_4c_1L>;
714def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
715def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z],
716             (instrs LDPWpost, LDPWpre)>;
717
718// Load pair, signed immed offset, signed words
719def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
720
721// Load pair, immed post or pre-index, signed words
722def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z],
723             (instrs LDPSWpost, LDPSWpre)>;
724
725
726// Store instructions
727// -----------------------------------------------------------------------------
728
729// Store register, immed offset
730def : SchedAlias<WriteST, V1Write_1c_1L01_1D>;
731
732// Store register, immed offset, index
733def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>;
734
735// Store pair, immed offset
736def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>;
737
738
739// FP data processing instructions
740// -----------------------------------------------------------------------------
741
742// FP absolute value
743// FP arithmetic
744// FP min/max
745// FP negate
746def : SchedAlias<WriteF, V1Write_2c_1V>;
747
748// FP compare
749def : SchedAlias<WriteFCmp, V1Write_2c_1V0>;
750
751// FP divide
752// FP square root
753def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>;
754
755// FP divide, H-form
756// FP square root, H-form
757def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>;
758
759// FP divide, S-form
760// FP square root, S-form
761def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>;
762
763// FP divide, D-form
764def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
765
766// FP square root, D-form
767def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
768
769// FP multiply
770def : WriteRes<WriteFMul, [V1UnitV]> { let Latency = 3; }
771
772// FP multiply accumulate
773def : InstRW<[V1Wr_FMA, ReadDefault, ReadDefault, V1Rd_FMA],
774             (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
775
776// FP round to integral
777def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
778                                           "^FRINT(32|64)[XZ][SD]r$")>;
779
780// FP select
781def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>;
782
783
784// FP miscellaneous instructions
785// -----------------------------------------------------------------------------
786
787// FP convert, from gen to vec reg
788def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
789
790// FP convert, from vec to gen reg
791def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
792
793// FP convert, Javascript from vec to gen reg
794def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>;
795
796// FP convert, from vec to vec reg
797def : SchedAlias<WriteFCvt, V1Write_3c_1V02>;
798
799// FP move, immed
800def : SchedAlias<WriteFImm, V1Write_2c_1V>;
801
802// FP move, register
803def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
804
805// FP transfer, from gen to low half of vec reg
806def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
807
808// FP transfer, from gen to high half of vec reg
809def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
810
811// FP transfer, from vec to gen reg
812def : SchedAlias<WriteFCopy, V1Write_2c_1V1>;
813
814
815// FP load instructions
816// -----------------------------------------------------------------------------
817
818// Load vector reg, literal, S/D/Q forms
819// Load vector reg, unscaled immed
820// Load vector reg, unsigned immed
821def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
822                                                      "^LDUR[BHSDQ]i$",
823                                                      "^LDR[BHSDQ]ui$")>;
824
825// Load vector reg, immed post-index
826// Load vector reg, immed pre-index
827def : InstRW<[WriteAdr, V1Write_6c_1L],
828             (instregex "^LDR[BHSDQ](post|pre)$")>;
829
830// Load vector reg, register offset, basic
831// Load vector reg, register offset, scale, S/D-form
832// Load vector reg, register offset, extend
833// Load vector reg, register offset, extend, scale, S/D-form
834def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
835
836// Load vector reg, register offset, scale, H/Q-form
837// Load vector reg, register offset, extend, scale, H/Q-form
838def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
839
840// Load vector pair, immed offset, S/D-form
841def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>;
842
843// Load vector pair, immed offset, Q-form
844def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
845
846// Load vector pair, immed post-index, S/D-form
847// Load vector pair, immed pre-index, S/D-form
848def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z],
849             (instregex "^LDP[SD](pre|post)$")>;
850
851// Load vector pair, immed post-index, Q-form
852// Load vector pair, immed pre-index, Q-form
853def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi],
854             (instrs LDPQpost, LDPQpre)>;
855
856
857// FP store instructions
858// -----------------------------------------------------------------------------
859
860// Store vector reg, unscaled immed, B/H/S/D/Q-form
861def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
862
863// Store vector reg, immed post-index, B/H/S/D/Q-form
864// Store vector reg, immed pre-index, B/H/S/D/Q-form
865def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
866             (instregex "^STR[BHSDQ](pre|post)$")>;
867
868// Store vector reg, unsigned immed, B/H/S/D/Q-form
869def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
870
871// Store vector reg, register offset, basic, B/S/D-form
872// Store vector reg, register offset, scale, B/S/D-form
873// Store vector reg, register offset, extend, B/S/D-form
874// Store vector reg, register offset, extend, scale, B/S/D-form
875def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase],
876             (instregex "^STR[BSD]ro[WX]$")>;
877
878// Store vector reg, register offset, basic, H/Q-form
879// Store vector reg, register offset, scale, H/Q-form
880// Store vector reg, register offset, extend, H/Q-form
881// Store vector reg, register offset, extend, scale, H/Q-form
882def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase],
883             (instregex "^STR[HQ]ro[WX]$")>;
884
885// Store vector pair, immed offset, S/D/Q-form
886def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>;
887
888// Store vector pair, immed post-index, S/D-form
889// Store vector pair, immed pre-index, S/D-form
890def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
891             (instregex "^STP[SD](pre|post)$")>;
892
893// Store vector pair, immed post-index, Q-form
894// Store vector pair, immed pre-index, Q-form
895def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>;
896
897
898// ASIMD integer instructions
899// -----------------------------------------------------------------------------
900
901// ASIMD absolute diff
902// ASIMD absolute diff long
903// ASIMD arith, basic
904// ASIMD arith, complex
905// ASIMD arith, pair-wise
906// ASIMD compare
907// ASIMD logical
908// ASIMD max/min, basic and pair-wise
909def : SchedAlias<WriteVd, V1Write_2c_1V>;
910def : SchedAlias<WriteVq, V1Write_2c_1V>;
911
912// ASIMD absolute diff accum
913// ASIMD absolute diff accum long
914// ASIMD pairwise add and accumulate long
915def : InstRW<[V1Wr_ADA, V1Rd_ADA], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
916
917// ASIMD arith, reduce, 4H/4S
918// ASIMD max/min, reduce, 4H/4S
919def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$",
920                                           "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
921
922// ASIMD arith, reduce, 8B/8H
923// ASIMD max/min, reduce, 8B/8H
924def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$",
925                                              "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
926
927// ASIMD arith, reduce, 16B
928// ASIMD max/min, reduce, 16B
929def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
930                                           "[SU](MAX|MIN)Vv16i8v$")>;
931
932// ASIMD dot product
933// ASIMD dot product using signed and unsigned integers
934def : InstRW<[V1Wr_VDOT, V1Rd_VDOT],
935             (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
936
937// ASIMD matrix multiply-accumulate
938def : InstRW<[V1Wr_VMMA, V1Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
939
940// ASIMD multiply
941def : InstRW<[V1Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
942
943// ASIMD multiply accumulate
944def : InstRW<[V1Wr_VMA, V1Rd_VMA], (instregex "^MLAv", "^MLSv")>;
945
946// ASIMD multiply accumulate long
947def : InstRW<[V1Wr_VMAL, V1Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
948
949// ASIMD multiply accumulate high
950def : InstRW<[V1Write_4c_1V02], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
951
952// ASIMD multiply accumulate saturating long
953def : InstRW<[V1Write_4c_1V02], (instregex "^SQDML[AS]L[iv]")>;
954
955// ASIMD multiply/multiply long (8x8) polynomial
956def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
957
958// ASIMD multiply long
959def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
960
961// ASIMD shift accumulate
962def : InstRW<[V1Wr_VSA, V1Rd_VSA], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
963
964// ASIMD shift by immed, complex
965// ASIMD shift by register, complex
966def : InstRW<[V1Write_4c_1V13],
967             (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
968                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
969                        "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
970                        "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
971
972// ASIMD shift by immed, basic
973// ASIMD shift by immed and insert, basic
974// ASIMD shift by register, basic
975def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
976                                          "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
977
978
979// ASIMD FP instructions
980// -----------------------------------------------------------------------------
981
982// ASIMD FP absolute value/difference
983// ASIMD FP arith, normal
984// ASIMD FP compare
985// ASIMD FP max/min, normal
986// ASIMD FP max/min, pairwise
987// ASIMD FP negate
988// Covered by "SchedAlias (WriteV[dq]...)" above
989
990// ASIMD FP complex add
991def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$")>;
992
993// ASIMD FP complex multiply add
994def : InstRW<[V1Wr_FCMA, V1Rd_FCMA], (instregex "^FCMLAv")>;
995
996// ASIMD FP multiply
997def : InstRW<[V1Wr_FPM], (instregex "^FMULX?v")>;
998
999// ASIMD FP multiply accumulate
1000def : InstRW<[V1Wr_FPMA, V1Rd_FPMA], (instregex "^FML[AS]v")>;
1001
1002// ASIMD FP multiply accumulate long
1003def : InstRW<[V1Wr_FPMAL, V1Rd_FPMAL], (instregex "^FML[AS]L2?v")>;
1004
1005// ASIMD FP convert, long (F16 to F32)
1006def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
1007
1008// ASIMD FP convert, long (F32 to F64)
1009def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>;
1010
1011// ASIMD FP convert, narrow (F32 to F16)
1012def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>;
1013
1014// ASIMD FP convert, narrow (F64 to F32)
1015def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
1016                                           "^FCVTXN(v[24]f32|v1i64)$")>;
1017
1018// ASIMD FP convert, other, D-form F32 and Q-form F64
1019def : InstRW<[V1Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1020                                           "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
1021                                           "^FCVT[AMNPZ][SU]v1i64$",
1022                                           "^FCVTZ[SU]d$",
1023                                           "^[SU]CVTFv2f(32|64)$",
1024                                           "^[SU]CVTFv2i(32|64)_shift$",
1025                                           "^[SU]CVTFv1i64$",
1026                                           "^[SU]CVTFd$")>;
1027
1028// ASIMD FP convert, other, D-form F16 and Q-form F32
1029def : InstRW<[V1Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1030                                           "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
1031                                           "^FCVT[AMNPZ][SU]v1i32$",
1032                                           "^FCVTZ[SU]s$",
1033                                           "^[SU]CVTFv4f(16|32)$",
1034                                           "^[SU]CVTFv4i(16|32)_shift$",
1035                                           "^[SU]CVTFv1i32$",
1036                                           "^[SU]CVTFs$")>;
1037
1038// ASIMD FP convert, other, Q-form F16
1039def : InstRW<[V1Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1040                                           "^FCVT[AMNPZ][SU]v8i16_shift$",
1041                                           "^FCVT[AMNPZ][SU]v1f16$",
1042                                           "^FCVTZ[SU]h$",
1043                                           "^[SU]CVTFv8f16$",
1044                                           "^[SU]CVTFv8i16_shift$",
1045                                           "^[SU]CVTFv1i16$",
1046                                           "^[SU]CVTFh$")>;
1047
1048// ASIMD FP divide, D-form, F16
1049// ASIMD FP square root, D-form, F16
1050def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>;
1051
1052// ASIMD FP divide, F32
1053// ASIMD FP square root, F32
1054def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32,
1055                                          FSQRTv2f32, FSQRTv4f32)>;
1056
1057// ASIMD FP divide, Q-form, F16
1058def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>;
1059
1060// ASIMD FP divide, Q-form, F64
1061def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>;
1062
1063// ASIMD FP square root, Q-form, F16
1064def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>;
1065
1066// ASIMD FP square root, Q-form, F64
1067def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>;
1068
1069// ASIMD FP max/min, reduce, F32 and D-form F16
1070def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
1071
1072// ASIMD FP max/min, reduce, Q-form F16
1073def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
1074
1075// ASIMD FP round, D-form F32 and Q-form F64
1076def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
1077
1078// ASIMD FP round, D-form F16 and Q-form F32
1079def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
1080
1081// ASIMD FP round, Q-form F16
1082def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1083
1084
1085// ASIMD BF instructions
1086// -----------------------------------------------------------------------------
1087
1088// ASIMD convert, F32 to BF16
1089def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
1090
1091// ASIMD dot product
1092def : InstRW<[V1Wr_BFD, V1Rd_BFD], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
1093
1094// ASIMD matrix multiply accumulate
1095def : InstRW<[V1Wr_BFMMA, V1Rd_BFMMA], (instrs BFMMLA)>;
1096
1097// ASIMD multiply accumulate long
1098def : InstRW<[V1Wr_BFMLA, V1Rd_BFMLA], (instregex "^BFMLAL[BT](Idx)?$")>;
1099
1100// Scalar convert, F32 to BF16
1101def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
1102
1103
1104// ASIMD miscellaneous instructions
1105// -----------------------------------------------------------------------------
1106
1107// ASIMD bit reverse
1108// ASIMD bitwise insert
1109// ASIMD count
1110// ASIMD duplicate, element
1111// ASIMD extract
1112// ASIMD extract narrow
1113// ASIMD insert, element to element
1114// ASIMD move, FP immed
1115// ASIMD move, integer immed
1116// ASIMD reverse
1117// ASIMD table lookup, 1 or 2 table regs
1118// ASIMD table lookup extension, 1 table reg
1119// ASIMD transfer, element to gen reg
1120// ASIMD transpose
1121// ASIMD unzip/zip
1122// Covered by "SchedAlias (WriteV[dq]...)" above
1123
1124// ASIMD duplicate, gen reg
1125def : InstRW<[V1Write_3c_1M0],
1126             (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
1127
1128// ASIMD extract narrow, saturating
1129def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1130
1131// ASIMD reciprocal and square root estimate, D-form U32
1132// ASIMD reciprocal and square root estimate, D-form F32 and F64
1133def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32,
1134                                        URSQRTEv2i32,
1135                                        FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
1136                                        FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>;
1137
1138// ASIMD reciprocal and square root estimate, Q-form U32
1139// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64
1140def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32,
1141                                        URSQRTEv4i32,
1142                                        FRECPEv1f16, FRECPEv4f16,
1143                                        FRECPEv4f32, FRECPEv2f64,
1144                                        FRSQRTEv1f16, FRSQRTEv4f16,
1145                                        FRSQRTEv4f32, FRSQRTEv2f64)>;
1146
1147// ASIMD reciprocal and square root estimate, Q-form F16
1148def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16,
1149                                        FRSQRTEv8f16)>;
1150
1151// ASIMD reciprocal exponent
1152def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>;
1153
1154// ASIMD reciprocal step
1155def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
1156                                         "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
1157
1158// ASIMD table lookup, 1 or 2 table regs
1159// ASIMD table lookup extension, 1 table reg
1160def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$",
1161                                           "^TBXv(8|16)i8One$")>;
1162
1163// ASIMD table lookup, 3 table regs
1164// ASIMD table lookup extension, 2 table reg
1165def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three,
1166                                        TBXv8i8Two, TBXv16i8Two)>;
1167
1168// ASIMD table lookup, 4 table regs
1169def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1170
1171// ASIMD table lookup extension, 3 table reg
1172def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>;
1173
1174// ASIMD table lookup extension, 4 table reg
1175def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>;
1176
1177// ASIMD transfer, element to gen reg
1178def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
1179                                         "^UMOVvi(8|16|32|64)$")>;
1180
1181// ASIMD transfer, gen reg to element
1182def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1183
1184
1185// ASIMD load instructions
1186// -----------------------------------------------------------------------------
1187
1188// ASIMD load, 1 element, multiple, 1 reg
1189def : InstRW<[V1Write_6c_1L],
1190             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1191def : InstRW<[WriteAdr, V1Write_6c_1L],
1192             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1193
1194// ASIMD load, 1 element, multiple, 2 reg
1195def : InstRW<[V1Write_6c_2L],
1196             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1197def : InstRW<[WriteAdr, V1Write_6c_2L],
1198             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1199
1200// ASIMD load, 1 element, multiple, 3 reg
1201def : InstRW<[V1Write_6c_3L],
1202             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1203def : InstRW<[WriteAdr, V1Write_6c_3L],
1204             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1205
1206// ASIMD load, 1 element, multiple, 4 reg, D-form
1207def : InstRW<[V1Write_6c_2L],
1208             (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1209def : InstRW<[WriteAdr, V1Write_6c_2L],
1210             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1211
1212// ASIMD load, 1 element, multiple, 4 reg, Q-form
1213def : InstRW<[V1Write_7c_4L],
1214             (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1215def : InstRW<[WriteAdr, V1Write_7c_4L],
1216             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1217
1218// ASIMD load, 1 element, one lane
1219// ASIMD load, 1 element, all lanes
1220def : InstRW<[V1Write_8c_1L_1V],
1221             (instregex "^LD1(i|Rv)(8|16|32|64)$",
1222                        "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1223def : InstRW<[WriteAdr, V1Write_8c_1L_1V],
1224             (instregex "^LD1i(8|16|32|64)_POST$",
1225                        "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1226
1227// ASIMD load, 2 element, multiple, D-form
1228def : InstRW<[V1Write_8c_1L_2V],
1229             (instregex "^LD2Twov(8b|4h|2s)$")>;
1230def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
1231             (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
1232
1233// ASIMD load, 2 element, multiple, Q-form
1234def : InstRW<[V1Write_8c_2L_2V],
1235             (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
1236def : InstRW<[WriteAdr, V1Write_8c_2L_2V],
1237             (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
1238
1239// ASIMD load, 2 element, one lane
1240// ASIMD load, 2 element, all lanes
1241def : InstRW<[V1Write_8c_1L_2V],
1242             (instregex "^LD2i(8|16|32|64)$",
1243                        "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1244def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
1245             (instregex "^LD2i(8|16|32|64)_POST$",
1246                        "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1247
1248// ASIMD load, 3 element, multiple, D-form
1249// ASIMD load, 3 element, one lane
1250// ASIMD load, 3 element, all lanes
1251def : InstRW<[V1Write_8c_2L_3V],
1252             (instregex "^LD3Threev(8b|4h|2s)$",
1253                        "^LD3i(8|16|32|64)$",
1254                        "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1255def : InstRW<[WriteAdr, V1Write_8c_2L_3V],
1256             (instregex "^LD3Threev(8b|4h|2s)_POST$",
1257                        "^LD3i(8|16|32|64)_POST$",
1258                        "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1259
1260// ASIMD load, 3 element, multiple, Q-form
1261def : InstRW<[V1Write_8c_3L_3V],
1262             (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
1263def : InstRW<[WriteAdr, V1Write_8c_3L_3V],
1264             (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
1265
1266// ASIMD load, 4 element, multiple, D-form
1267// ASIMD load, 4 element, one lane
1268// ASIMD load, 4 element, all lanes
1269def : InstRW<[V1Write_8c_3L_4V],
1270             (instregex "^LD4Fourv(8b|4h|2s)$",
1271                        "^LD4i(8|16|32|64)$",
1272                        "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1273def : InstRW<[WriteAdr, V1Write_8c_3L_4V],
1274             (instregex "^LD4Fourv(8b|4h|2s)_POST$",
1275                        "^LD4i(8|16|32|64)_POST$",
1276                        "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1277
1278// ASIMD load, 4 element, multiple, Q-form
1279def : InstRW<[V1Write_9c_4L_4V],
1280             (instregex "^LD4Fourv(16b|8h|4s|2d)$")>;
1281def : InstRW<[WriteAdr, V1Write_9c_4L_4V],
1282             (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>;
1283
1284
1285// ASIMD store instructions
1286// -----------------------------------------------------------------------------
1287
1288// ASIMD store, 1 element, multiple, 1 reg
1289// ASIMD store, 1 element, multiple, 2 reg, D-form
1290def : InstRW<[V1Write_2c_1L01_1V01],
1291             (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$",
1292                        "^ST1Twov(8b|4h|2s|1d)$")>;
1293def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
1294             (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$",
1295                        "^ST1Twov(8b|4h|2s|1d)_POST$")>;
1296
1297// ASIMD store, 1 element, multiple, 2 reg, Q-form
1298// ASIMD store, 1 element, multiple, 3 reg, D-form
1299// ASIMD store, 1 element, multiple, 4 reg, D-form
1300def : InstRW<[V1Write_2c_2L01_2V01],
1301             (instregex "^ST1Twov(16b|8h|4s|2d)$",
1302                        "^ST1Threev(8b|4h|2s|1d)$",
1303                        "^ST1Fourv(8b|4h|2s|1d)$")>;
1304def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01],
1305             (instregex "^ST1Twov(16b|8h|4s|2d)_POST$",
1306                        "^ST1Threev(8b|4h|2s|1d)_POST$",
1307                        "^ST1Fourv(8b|4h|2s|1d)_POST$")>;
1308
1309// ASIMD store, 1 element, multiple, 3 reg, Q-form
1310def : InstRW<[V1Write_2c_3L01_3V01],
1311             (instregex "^ST1Threev(16b|8h|4s|2d)$")>;
1312def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01],
1313             (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>;
1314
1315// ASIMD store, 1 element, multiple, 4 reg, Q-form
1316def : InstRW<[V1Write_2c_4L01_4V01],
1317             (instregex "^ST1Fourv(16b|8h|4s|2d)$")>;
1318def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01],
1319             (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>;
1320
1321// ASIMD store, 1 element, one lane
1322// ASIMD store, 2 element, multiple, D-form
1323// ASIMD store, 2 element, one lane
1324def : InstRW<[V1Write_4c_1L01_1V01],
1325             (instregex "^ST1i(8|16|32|64)$",
1326                        "^ST2Twov(8b|4h|2s)$",
1327                        "^ST2i(8|16|32|64)$")>;
1328def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01],
1329             (instregex "^ST1i(8|16|32|64)_POST$",
1330                        "^ST2Twov(8b|4h|2s)_POST$",
1331                        "^ST2i(8|16|32|64)_POST$")>;
1332
1333// ASIMD store, 2 element, multiple, Q-form
1334// ASIMD store, 3 element, multiple, D-form
1335// ASIMD store, 3 element, one lane
1336// ASIMD store, 4 element, one lane, D
1337def : InstRW<[V1Write_4c_2L01_2V01],
1338             (instregex "^ST2Twov(16b|8h|4s|2d)$",
1339                        "^ST3Threev(8b|4h|2s)$",
1340                        "^ST3i(8|16|32|64)$",
1341                        "^ST4i64$")>;
1342def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01],
1343             (instregex "^ST2Twov(16b|8h|4s|2d)_POST$",
1344                        "^ST3Threev(8b|4h|2s)_POST$",
1345                        "^ST3i(8|16|32|64)_POST$",
1346                        "^ST4i64_POST$")>;
1347
1348// ASIMD store, 3 element, multiple, Q-form
1349def : InstRW<[V1Write_5c_3L01_3V01],
1350             (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
1351def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01],
1352             (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
1353
1354// ASIMD store, 4 element, multiple, D-form
1355def : InstRW<[V1Write_6c_3L01_3V01],
1356             (instregex "^ST4Fourv(8b|4h|2s)$")>;
1357def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01],
1358             (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
1359
1360// ASIMD store, 4 element, multiple, Q-form, B/H/S
1361def : InstRW<[V1Write_7c_6L01_6V01],
1362             (instregex "^ST4Fourv(16b|8h|4s)$")>;
1363def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01],
1364             (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
1365
1366// ASIMD store, 4 element, multiple, Q-form, D
1367def : InstRW<[V1Write_4c_4L01_4V01],
1368             (instrs ST4Fourv2d)>;
1369def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01],
1370             (instrs ST4Fourv2d_POST)>;
1371
1372// ASIMD store, 4 element, one lane, B/H/S
1373def : InstRW<[V1Write_6c_3L_3V],
1374             (instregex "^ST4i(8|16|32)$")>;
1375def : InstRW<[WriteAdr, V1Write_6c_3L_3V],
1376             (instregex "^ST4i(8|16|32)_POST$")>;
1377
1378
1379// Cryptography extensions
1380// -----------------------------------------------------------------------------
1381
1382// Crypto polynomial (64x64) multiply long
1383// Covered by "SchedAlias (WriteV[dq]...)" above
1384
1385// Crypto AES ops
1386def V1WriteVC : WriteSequence<[V1Write_2c_1V]>;
1387def V1ReadVC  : SchedReadAdvance<2, [V1WriteVC]>;
1388def           : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>;
1389def           : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>;
1390
1391// Crypto SHA1 hash acceleration op
1392// Crypto SHA1 schedule acceleration ops
1393// Crypto SHA256 schedule acceleration ops
1394// Crypto SHA512 hash acceleration ops
1395// Crypto SM3 ops
1396def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$",
1397                                          "^SHA256SU[01]rr$",
1398                                          "^SHA512(H2?|SU[01])$",
1399                                          "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>;
1400
1401// Crypto SHA1 hash acceleration ops
1402// Crypto SHA256 hash acceleration ops
1403// Crypto SM4 ops
1404def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
1405                                          "^SHA256H2?rrr$",
1406                                          "^SM4E(KEY)?$")>;
1407
1408// Crypto SHA3 ops
1409def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1410
1411
1412// CRC instruction
1413// -----------------------------------------------------------------------------
1414
1415// CRC checksum ops
1416def : InstRW<[V1Wr_CRC, V1Rd_CRC], (instregex "^CRC32C?[BHWX]rr$")>;
1417
1418
1419// SVE Predicate instructions
1420// -----------------------------------------------------------------------------
1421
1422// Loop control, based on predicate
1423def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>;
1424def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1425
1426// Loop control, based on predicate and flag setting
1427def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP,
1428                                       BRKPAS_PPzPP, BRKPBS_PPzPP)>;
1429
1430// Loop control, based on GPR
1431def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1432
1433// Loop terminate
1434def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1435
1436// Predicate counting scalar
1437// Predicate counting scalar, active predicate
1438def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1439def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$",
1440                                          "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1441                                          "^UQ(DEC|INC)[BHWD]_WPiI$",
1442                                          "^CNTP_XPP_[BHSD]$",
1443                                          "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$",
1444                                          "^UQ(DEC|INC)P_WP_[BHSD]$",
1445                                          "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>;
1446
1447// Predicate counting vector, active predicate
1448def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
1449
1450// Predicate logical
1451def : InstRW<[V1Write_1c_1M0],
1452             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1453
1454// Predicate logical, flag setting
1455def : InstRW<[V1Write_2c_2M0],
1456             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>;
1457
1458// Predicate reverse
1459// Predicate set/initialize/find next
1460// Predicate transpose
1461// Predicate unpack and widen
1462// Predicate zip/unzip
1463def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$",
1464                                          "^PFALSE$", "^PFIRST_B$",
1465                                          "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$",
1466                                          "^TRN[12]_PPP_[BHSDQ]$",
1467                                          "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1468
1469// Predicate set/initialize/find next
1470// Predicate unpack and widen
1471def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP,
1472                                       PUNPKHI_PP, PUNPKLO_PP)>;
1473
1474// Predicate select
1475def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>;
1476
1477// Predicate set/initialize, set flags
1478def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>;
1479
1480
1481
1482// SVE integer instructions
1483// -----------------------------------------------------------------------------
1484
1485// Arithmetic, basic
1486// Logical
1487def : InstRW<[V1Write_2c_1V01],
1488             (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]",
1489                        "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]",
1490                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
1491                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
1492                        "^[SU]ABD_ZP[mZ]Z_[BHSD]",
1493                        "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]",
1494                        "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$",
1495                        "^SUBR_Z(I|P[mZ]Z)_[BHSD]",
1496                        "^(AND|EOR|ORR)_ZI$",
1497                        "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ",
1498                        "^EOR(BT|TB)_ZZZ_[BHSD]$",
1499                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>;
1500
1501// Arithmetic, shift
1502def : InstRW<[V1Write_2c_1V1],
1503             (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]",
1504                        "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]",
1505                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
1506                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1507                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
1508
1509// Arithmetic, shift right for divide
1510def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
1511
1512// Count/reverse bits
1513def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
1514
1515// Broadcast logical bitmask immediate to vector
1516def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>;
1517
1518// Compare and set flags
1519def : InstRW<[V1Write_4c_1M0_1V0],
1520             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1521                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1522
1523// Conditional extract operations, scalar form
1524def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1525
1526// Conditional extract operations, SIMD&FP scalar and vector forms
1527def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1528                                          "^COMPACT_ZPZ_[SD]$",
1529                                          "^SPLICE_ZPZZ?_[BHSD]$")>;
1530
1531// Convert to floating point, 64b to float or convert to double
1532def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1533                                          "^[SU]CVTF_ZPmZ_StoD")>;
1534
1535// Convert to floating point, 32b to single or half
1536def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
1537
1538// Convert to floating point, 16b to half
1539def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
1540
1541// Copy, scalar
1542def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
1543
1544// Copy, scalar SIMD&FP or imm
1545def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
1546
1547// Divides, 32 bit
1548def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
1549                                             "^[SU]DIV_ZPZZ_S")>;
1550
1551// Divides, 64 bit
1552def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
1553                                             "^[SU]DIV_ZPZZ_D")>;
1554
1555// Dot product, 8 bit
1556def : InstRW<[V1Wr_ZDOTB, V1Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>;
1557
1558// Dot product, 8 bit, using signed and unsigned integers
1559def : InstRW<[V1Wr_ZUDOTB, V1Rd_ZUDOTB],
1560             (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
1561
1562// Dot product, 16 bit
1563def : InstRW<[V1Wr_ZDOTH, V1Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>;
1564
1565// Duplicate, immediate and indexed form
1566def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
1567                                           "^DUP_ZZI_[BHSDQ]$")>;
1568
1569// Duplicate, scalar form
1570def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1571
1572// Extend, sign or zero
1573def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
1574                                          "^[SU]XTH_ZPmZ_[SD]",
1575                                          "^[SU]XTW_ZPmZ_[D]")>;
1576
1577// Extract
1578def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>;
1579
1580// Extract/insert operation, SIMD and FP scalar form
1581def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1582                                          "^INSR_ZV_[BHSD]$")>;
1583
1584// Extract/insert operation, scalar
1585def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1586                                              "^INSR_ZR_[BHSD]$")>;
1587
1588// Horizontal operations, B, H, S form, imm, imm
1589def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>;
1590
1591// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar
1592def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1593
1594// Horizontal operations, D form, imm, imm
1595def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>;
1596
1597// Horizontal operations, D form, scalar, imm / scalar / imm, scalar
1598def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1599
1600// Move prefix
1601def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1602                                           "^MOVPRFX_ZZ$")>;
1603
1604// Matrix multiply-accumulate
1605def : InstRW<[V1Wr_ZMMA, V1Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1606
1607// Multiply, B, H, S element size
1608def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
1609                                          "^MUL_ZPZZ_[BHS]",
1610                                          "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
1611                                          "^[SU]MULH_ZPZZ_[BHS]")>;
1612
1613// Multiply, D element size
1614def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
1615                                          "^MUL_ZPZZ_D",
1616                                          "^[SU]MULH_(ZPmZ|ZZZ)_D",
1617                                          "^[SU]MULH_ZPZZ_D")>;
1618
1619// Multiply accumulate, D element size
1620def : InstRW<[V1Wr_ZMAD, V1Rd_ZMAD],
1621             (instregex "^ML[AS]_ZPZZZ_D")>;
1622def : InstRW<[V1Wr_ZMAD, ReadDefault, V1Rd_ZMAD],
1623             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
1624
1625// Multiply accumulate, B, H, S element size
1626// NOTE: This is not specified in the SOG.
1627def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
1628
1629// Predicate counting vector
1630def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
1631
1632// Reduction, arithmetic, B form
1633def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13],
1634             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1635
1636// Reduction, arithmetic, H form
1637def : InstRW<[V1Write_12c_1V_1V01_2V1],
1638             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1639
1640// Reduction, arithmetic, S form
1641def : InstRW<[V1Write_10c_1V_1V01_2V1],
1642             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1643
1644// Reduction, arithmetic, D form
1645def : InstRW<[V1Write_8c_1V_1V01],
1646             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1647
1648// Reduction, logical
1649def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
1650
1651// Reverse, vector
1652def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
1653                                           "^REVB_ZPmZ_[HSD]$",
1654                                           "^REVH_ZPmZ_[SD]$",
1655                                           "^REVW_ZPmZ_D$")>;
1656
1657// Select, vector form
1658// Table lookup
1659// Table lookup extension
1660// Transpose, vector form
1661// Unpack and extend
1662// Zip/unzip
1663def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$",
1664                                           "^TB[LX]_ZZZ_[BHSD]$",
1665                                           "^TRN[12]_ZZZ_[BHSDQ]$",
1666                                           "^[SU]UNPK(HI|LO)_ZZ_[HSD]$",
1667                                           "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1668
1669
1670// SVE floating-point instructions
1671// -----------------------------------------------------------------------------
1672
1673// Floating point absolute value/difference
1674def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]",
1675                                           "^FABD_ZPZZ_[HSD]",
1676                                           "^FABS_ZPmZ_[HSD]")>;
1677
1678// Floating point arithmetic
1679def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
1680                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
1681                                           "^FADDP_ZPmZZ_[HSD]",
1682                                           "^FNEG_ZPmZ_[HSD]",
1683                                           "^FSUBR_ZPm[IZ]_[HSD]",
1684                                           "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
1685
1686// Floating point associative add, F16
1687def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>;
1688
1689// Floating point associative add, F32
1690def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>;
1691
1692// Floating point associative add, F64
1693def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>;
1694
1695// Floating point compare
1696def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
1697                                          "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$",
1698                                          "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>;
1699
1700// Floating point complex add
1701def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1702
1703// Floating point complex multiply add
1704def : InstRW<[V1Wr_ZFCMA, ReadDefault, V1Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
1705def : InstRW<[V1Wr_ZFCMA, V1Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]")>;
1706
1707// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1708// Floating point convert to integer, F32
1709def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
1710                                          "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
1711
1712// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
1713// Floating point convert to integer, F64
1714def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
1715                                          "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
1716
1717// Floating point convert to integer, F16
1718def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
1719
1720// Floating point copy
1721def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
1722                                           "^FDUP_ZI_[HSD]$")>;
1723
1724// Floating point divide, F16
1725def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
1726
1727// Floating point divide, F32
1728def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
1729
1730// Floating point divide, F64
1731def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
1732
1733// Floating point min/max
1734def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
1735                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
1736
1737// Floating point multiply
1738def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
1739                                           "^FMULX_ZPZZ_[HSD]",
1740                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
1741                                           "^FMUL_ZPZ[IZ]_[HSD]")>;
1742
1743// Floating point multiply accumulate
1744def : InstRW<[V1Wr_ZFMA, ReadDefault, V1Rd_ZFMA],
1745             (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
1746                        "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
1747def : InstRW<[V1Wr_ZFMA, V1Rd_ZFMA],
1748             (instregex "^FML[AS]_ZZZI_[HSD]",
1749                        "^FN?ML[AS]_ZPZZZ_[HSD]")>;
1750
1751// Floating point reciprocal step
1752def : InstRW<[V1Write_4c_1V01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
1753
1754// Floating point reciprocal estimate, F16
1755def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
1756
1757// Floating point reciprocal estimate, F32
1758def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
1759
1760// Floating point reciprocal estimate, F64
1761def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
1762
1763// Floating point reciprocal exponent
1764def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>;
1765
1766// Floating point reduction, F16
1767def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
1768
1769// Floating point reduction, F32
1770def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
1771
1772// Floating point reduction, F64
1773def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
1774
1775// Floating point round to integral, F16
1776def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
1777
1778// Floating point round to integral, F32
1779def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
1780
1781// Floating point round to integral, F64
1782def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
1783
1784// Floating point square root, F16
1785def : InstRW<[V1Write_13c10_1V0], (instregex "^FSQRT_ZPmZ_H")>;
1786
1787// Floating point square root, F32
1788def : InstRW<[V1Write_10c7_1V0], (instregex "^FSQRT_ZPmZ_S")>;
1789
1790// Floating point square root, F64
1791def : InstRW<[V1Write_16c7_1V0], (instregex "^FSQRT_ZPmZ_D")>;
1792
1793// Floating point trigonometric
1794def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
1795                                           "^FTMAD_ZZI_[HSD]$",
1796                                           "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
1797
1798
1799// SVE BFloat16 (BF16) instructions
1800// -----------------------------------------------------------------------------
1801
1802// Convert, F32 to BF16
1803def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
1804
1805// Dot product
1806def : InstRW<[V1Wr_ZBFDOT, V1Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
1807
1808// Matrix multiply accumulate
1809def : InstRW<[V1Wr_ZBFMMA, V1Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
1810
1811// Multiply accumulate long
1812def : InstRW<[V1Wr_ZBFMAL, V1Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
1813
1814
1815// SVE Load instructions
1816// -----------------------------------------------------------------------------
1817
1818// Load vector
1819def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>;
1820
1821// Load predicate
1822def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>;
1823
1824// Contiguous load, scalar + imm
1825// Contiguous load, scalar + scalar
1826// Contiguous load broadcast, scalar + imm
1827// Contiguous load broadcast, scalar + scalar
1828def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$",
1829                                           "^LD1S?B_[HSD]_IMM$",
1830                                           "^LD1S?H_[SD]_IMM$",
1831                                           "^LD1S?W_D_IMM$",
1832                                           "^LD1[BWD]$",
1833                                           "^LD1S?B_[HSD]$",
1834                                           "^LD1S?W_D$",
1835                                           "^LD1R[BHWD]_IMM$",
1836                                           "^LD1RSW_IMM$",
1837                                           "^LD1RS?B_[HSD]_IMM$",
1838                                           "^LD1RS?H_[SD]_IMM$",
1839                                           "^LD1RS?W_D_IMM$",
1840                                           "^LD1RQ_[BHWD]_IMM$",
1841                                           "^LD1RQ_[BWD]$")>;
1842def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$",
1843                                              "^LD1S?H_[SD]$",
1844                                              "^LD1RQ_H$")>;
1845
1846// Non temporal load, scalar + imm
1847def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>;
1848
1849// Non temporal load, scalar + scalar
1850def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
1851def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;
1852
1853// Contiguous first faulting load, scalar + scalar
1854def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H$",
1855                                              "^LDFF1S?H_[SD]$")>;
1856def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]$",
1857                                              "^LDFF1S?B_[HSD]$",
1858                                              "^LDFF1S?W_D$")>;
1859
1860// Contiguous non faulting load, scalar + imm
1861def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$",
1862                                           "^LDNF1S?B_[HSD]_IMM$",
1863                                           "^LDNF1S?H_[SD]_IMM$",
1864                                           "^LDNF1S?W_D_IMM$")>;
1865
1866// Contiguous Load two structures to two vectors, scalar + imm
1867def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
1868
1869// Contiguous Load two structures to two vectors, scalar + scalar
1870def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>;
1871def : InstRW<[V1Write_9c_2L01_2V01],  (instregex "^LD2[BWD]$")>;
1872
1873// Contiguous Load three structures to three vectors, scalar + imm
1874def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>;
1875
1876// Contiguous Load three structures to three vectors, scalar + scalar
1877def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>;
1878
1879// Contiguous Load four structures to four vectors, scalar + imm
1880def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
1881
1882// Contiguous Load four structures to four vectors, scalar + scalar
1883def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
1884
1885// Gather load, vector + imm, 32-bit element size
1886def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
1887                                             "^GLD(FF)?1W_IMM$")>;
1888
1889// Gather load, vector + imm, 64-bit element size
1890def : InstRW<[V1Write_9c_2L_2V],
1891             (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
1892                        "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$",
1893                        "^GLD(FF)?1D_IMM$",
1894                        "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>;
1895
1896// Gather load, 32-bit scaled offset
1897def : InstRW<[V1Write_11c_2L_2V],
1898             (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
1899                        "^GLD(FF)?1W_[SU]XTW_SCALED")>;
1900
1901// Gather load, 32-bit unpacked unscaled offset
1902def : InstRW<[V1Write_9c_1L_1V],
1903             (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
1904                        "^GLD(FF)?1W_[SU]XTW$")>;
1905
1906// Prefetch
1907// NOTE: This is not specified in the SOG.
1908def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>;
1909
1910
1911// SVE Store instructions
1912// -----------------------------------------------------------------------------
1913
1914// Store from predicate reg
1915def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>;
1916
1917// Store from vector reg
1918def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>;
1919
1920// Contiguous store, scalar + imm
1921// Contiguous store, scalar + scalar
1922def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
1923                                              "^ST1B_[HSD]_IMM$",
1924                                              "^ST1H_[SD]_IMM$",
1925                                              "^ST1W_D_IMM$",
1926                                              "^ST1[BWD]$",
1927                                              "^ST1B_[HSD]$",
1928                                              "^ST1W_D$")>;
1929def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
1930
1931// Contiguous store two structures from two vectors, scalar + imm
1932// Contiguous store two structures from two vectors, scalar + scalar
1933def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$",
1934                                              "^ST2[BWD]$")>;
1935def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>;
1936
1937// Contiguous store three structures from three vectors, scalar + imm
1938def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
1939
1940// Contiguous store three structures from three vectors, scalar + scalar
1941def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>;
1942
1943// Contiguous store four structures from four vectors, scalar + imm
1944def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
1945
1946// Contiguous store four structures from four vectors, scalar + scalar
1947def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>;
1948
1949// Non temporal store, scalar + imm
1950// Non temporal store, scalar + scalar
1951def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$",
1952                                              "^STNT1[BWD]_ZRR$")>;
1953def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
1954
1955// Scatter store vector + imm 32-bit element size
1956// Scatter store, 32-bit scaled offset
1957// Scatter store, 32-bit unscaled offset
1958def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
1959                                               "^SST1W_IMM$",
1960                                               "^SST1(H_S|W)_[SU]XTW_SCALED$",
1961                                               "^SST1[BH]_S_[SU]XTW$",
1962                                               "^SST1W_[SU]XTW$")>;
1963
1964// Scatter store, 32-bit unpacked unscaled offset
1965// Scatter store, 32-bit unpacked scaled offset
1966def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
1967                                              "^SST1D_[SU]XTW$",
1968                                              "^SST1[HW]_D_[SU]XTW_SCALED$",
1969                                              "^SST1D_[SU]XTW_SCALED$")>;
1970
1971// Scatter store vector + imm 64-bit element size
1972// Scatter store, 64-bit scaled offset
1973// Scatter store, 64-bit unscaled offset
1974def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
1975                                              "^SST1D_IMM$",
1976                                              "^SST1[HW]_D_SCALED$",
1977                                              "^SST1D_SCALED$",
1978                                              "^SST1[BHW]_D$",
1979                                              "^SST1D$")>;
1980
1981
1982// SVE Miscellaneous instructions
1983// -----------------------------------------------------------------------------
1984
1985// Read first fault register, unpredicated
1986// Set first fault register
1987// Write to first fault register
1988def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P,
1989                                       SETFFR,
1990                                       WRFFR)>;
1991
1992// Read first fault register, predicated
1993def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>;
1994
1995// Read first fault register and set flags
1996def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
1997
1998
1999}
2000