xref: /llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td (revision 865104a1042e824254b130c00c7f8ee0e0e0f6c5)
1//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
10//
11//===----------------------------------------------------------------------===//
12
13def imm_to_tile8   : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAB0, 0>",  []>;
14def imm_to_tile16  : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAH0, 1>",  []>;
15def imm_to_tile32  : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAS0, 3>",  []>;
16def imm_to_tile64  : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAD0, 7>",  []>;
17def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAQ0, 15>", []>;
18def imm_to_zt      : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZT0,  0>",  []>;
19
20def tileslice8   : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
21def tileslice16  : ComplexPattern<i32 , 2, "SelectSMETileSlice<7,  1>", []>;
22def tileslice32  : ComplexPattern<i32 , 2, "SelectSMETileSlice<3,  1>", []>;
23def tileslice64  : ComplexPattern<i32 , 2, "SelectSMETileSlice<1,  1>", []>;
24def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0,  1>", []>; // nop
25
26def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>;
27def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6,  2>", []>;
28def tileslicerange1s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<2,  2>", []>;
29def tileslicerange0s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<0,  2>", []>;
30
31def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>;
32def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4,  4>", []>;
33def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0,  4>", []>;
34
35let WantsRoot = true in
36def am_sme_indexed_b4 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0, 15>">;
37
38// The FORM_TRANSPOSED_REG_TUPLE pseudos defined below are intended to
39// improve register allocation for intrinsics which use strided and contiguous
40// multi-vector registers, avoiding unnecessary copies.
41// If the operands of the pseudo are copies where the source register is in
42// the StridedOrContiguous class, the pseudo is used to provide a hint to the
43// register allocator suggesting a contigious multi-vector register which
44// matches the subregister sequence used by the operands.
45// If the operands do not match this pattern, the pseudos are expanded
46// to a REG_SEQUENCE using the post-isel hook.
47
48def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO :
49  Pseudo<(outs ZPR2:$tup),
50         (ins ZPR:$zn0, ZPR:$zn1), []>, Sched<[]>{
51  let hasSideEffects = 0;
52  let hasPostISelHook = 1;
53}
54
55def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO :
56  Pseudo<(outs ZPR4:$tup),
57         (ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>, Sched<[]>{
58  let hasSideEffects = 0;
59  let hasPostISelHook = 1;
60}
61
62def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
63def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
64                             [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
65def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore,
66                             [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
67
68//===----------------------------------------------------------------------===//
69// SME Pseudo Classes
70//===----------------------------------------------------------------------===//
71
72def getSMEPseudoMap : InstrMapping {
73  let FilterClass = "SMEPseudo2Instr";
74  let RowFields = ["PseudoName"];
75  let ColFields = ["IsInstr"];
76  let KeyCol = ["0"];
77  let ValueCols = [["1"]];
78}
79
80class SMEPseudo2Instr<string name, bit instr> {
81  string PseudoName = name;
82  bit IsInstr = instr;
83}
84
85class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
86    : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
87                          zpr_ty:$zn, zpr_ty:$zm), []>,
88      Sched<[]> {
89  // Translated to the actual instructions in AArch64ISelLowering.cpp
90  let SMEMatrixType = za_flag;
91  let usesCustomInserter = 1;
92}
93
94class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
95                                            ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
96    : SMEPseudo2Instr<name, 0>,
97      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> {
98  let SMEMatrixType = za_flag;
99  let usesCustomInserter = 1;
100}
101
102class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
103                                           SMEMatrixTypeEnum za_flag>
104    : SMEPseudo2Instr<name, 0>,
105      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> {
106  let SMEMatrixType = za_flag;
107  let usesCustomInserter = 1;
108}
109
110class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
111                                           ZPRRegOp zpr_ty, Operand imm_ty, SMEMatrixTypeEnum za_flag>
112    : SMEPseudo2Instr<name, 0>,
113      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> {
114  let SMEMatrixType = za_flag;
115  let usesCustomInserter = 1;
116}
117
118class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
119    : SMEPseudo2Instr<name, 0>,
120      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
121  let SMEMatrixType = za_flag;
122  let usesCustomInserter = 1;
123}
124
125class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
126    : SMEPseudo2Instr<name, 0>,
127      Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
128  let SMEMatrixType = za_flag;
129  let usesCustomInserter = 1;
130}
131
132class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum za_flag>
133    : SMEPseudo2Instr<name, 0>,
134      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> {
135  let SMEMatrixType = za_flag;
136  let usesCustomInserter = 1;
137}
138
139class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
140    : SMEPseudo2Instr<name, 0>,
141      Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
142  let SMEMatrixType = za_flag;
143  let usesCustomInserter = 1;
144}
145
146class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
147                                      SMEMatrixTypeEnum za_flag>
148    : SMEPseudo2Instr<name, 0>,
149      Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
150  let SMEMatrixType = za_flag;
151  let usesCustomInserter = 1;
152}
153
154//===----------------------------------------------------------------------===//
155// SME pattern match helpers.
156//===----------------------------------------------------------------------===//
157
158class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
159                                     ValueType vt, ComplexPattern tileslice>
160    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm),
161          (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>;
162
163
164class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
165                                         ValueType vt, ComplexPattern tileslice>
166    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
167          (!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2),
168                                              zpr_ty:$Zm)>;
169class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
170                                         ValueType vt, ComplexPattern tileslice>
171    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
172                     vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
173          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
174                                              (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
175                                              zpr_ty:$Zm)>;
176
177class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
178    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
179          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
180                                              (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
181                                              (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
182
183class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
184    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
185                     vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4),
186          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
187                                              (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
188                                              (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>;
189
190class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
191                                    Operand imm_ty, ComplexPattern tileslice>
192   : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)),
193         (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>;
194
195
196class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
197                                        Operand imm_ty, ComplexPattern tileslice>
198    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
199          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
200                                              (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>;
201
202class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
203                                        Operand imm_ty, ComplexPattern tileslice>
204    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
205                     vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
206          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
207                                              (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
208                                              zpr_ty:$Zm, imm_ty:$i)>;
209
210class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
211    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
212                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
213
214class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
215    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
216                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
217                                            imm_ty:$i)>;
218
219class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
220    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
221                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
222
223class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
224    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
225          (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
226
227class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
228    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
229          (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
230
231class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
232    : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
233          (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
234
235class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
236    : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
237          (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
238
239class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
240    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
241    (!cast<Instruction>(name) $base, $offset)>;
242
243class SME2_Tile_Movaz_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, Operand tile_imm, Operand index_ty, ComplexPattern tileslice>
244    : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))),
245          (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset)>;
246
247
248//===----------------------------------------------------------------------===//
249// SME pattern match helpers.
250//===----------------------------------------------------------------------===//
251
252class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType pg_ty, ValueType vt>
253    : Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm),
254          (!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>;
255
256
257//===----------------------------------------------------------------------===//
258// SME smstart/smstop
259//===----------------------------------------------------------------------===//
260
261// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
262// both fields:
263//
264//   MSR SVCRSM, #<imm1>
265//   MSR SVCRZA, #<imm1>
266//   MSR SVCRSMZA, #<imm1>
267//
268// It's tricky to using the existing pstate operand defined in
269// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
270// when these fields are also encoded in CRm[3:1].
271def MSRpstatesvcrImm1
272  : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr",
273                      "\t$pstatefield, $imm">,
274    Sched<[WriteSys]> {
275  bits<3> pstatefield;
276  bit imm;
277  let Inst{18-16} = 0b011; // op1
278  let Inst{11-9} = pstatefield;
279  let Inst{8} = imm;
280  let Inst{7-5} = 0b011; // op2
281  let hasPostISelHook = 1;
282}
283
284def : InstAlias<"smstart",    (MSRpstatesvcrImm1 0b011, 0b1)>;
285def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
286def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
287
288def : InstAlias<"smstop",     (MSRpstatesvcrImm1 0b011, 0b0)>;
289def : InstAlias<"smstop sm",  (MSRpstatesvcrImm1 0b001, 0b0)>;
290def : InstAlias<"smstop za",  (MSRpstatesvcrImm1 0b010, 0b0)>;
291
292
293//===----------------------------------------------------------------------===//
294// SME Outer Products
295//===----------------------------------------------------------------------===//
296
297class sme_fp_outer_product_inst<bit S, bits<2> sz, bits<2> op, MatrixTileOperand za_ty,
298                                ZPRRegOp zpr_ty, string mnemonic>
299    : I<(outs za_ty:$ZAda),
300      (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
301        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
302        "", []>,
303      Sched<[]> {
304  bits<5> Zm;
305  bits<3> Pm;
306  bits<3> Pn;
307  bits<5> Zn;
308  let Inst{31-25} = 0b1000000;
309  let Inst{24}    = op{1};
310  let Inst{23}    = 0b1;
311  let Inst{22-21} = sz;
312  let Inst{20-16} = Zm;
313  let Inst{15-13} = Pm;
314  let Inst{12-10} = Pn;
315  let Inst{9-5}   = Zn;
316  let Inst{4}     = S;
317  let Inst{3}     = op{0};
318
319  let Constraints = "$ZAda = $_ZAda";
320}
321
322multiclass sme_outer_product_fp32<bit S, bits<2> sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> {
323  def NAME : sme_fp_outer_product_inst<S, sz, 0b00, TileOp32, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
324    bits<2> ZAda;
325    let Inst{1-0} = ZAda;
326    let Inst{2}   = 0b0;
327  }
328
329  def NAME # _PSEUDO : sme_outer_product_pseudo<zpr_ty, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
330
331  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>;
332}
333
334multiclass sme2_fp8_fmopa_za32<string mnemonic, SDPatternOperator intrinsic> {
335    def NAME : sme_fp_outer_product_inst<0, 0b01, 0b00, TileOp32, ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> {
336      bits<2> ZAda;
337      let Inst{1-0} = ZAda;
338      let Inst{2}   = 0b0;
339
340      let Uses = [FPMR, FPCR];
341    }
342
343    let mayStore = 1, mayLoad = 1 in
344    def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
345
346    def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv16i1, nxv16i8>;
347}
348
349multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
350  def NAME : sme_fp_outer_product_inst<S, 0b10, 0b00, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
351    bits<3> ZAda;
352    let Inst{2-0} = ZAda;
353  }
354
355  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
356
357  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>;
358}
359
360multiclass sme2_fp8_fmopa_za16<string mnemonic, SDPatternOperator intrinsic> {
361  def NAME : sme_fp_outer_product_inst<0, {0, 0b1}, 0b01, TileOp16, ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> {
362    bits<1> ZAda;
363    let Inst{2-1} = 0b00;
364    let Inst{0}   = ZAda;
365
366    let Uses = [FPMR, FPCR];
367  }
368
369  let mayStore = 1, mayLoad = 1 in
370  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileH>, SMEPseudo2Instr<NAME, 0>;
371
372  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_1, nxv16i1, nxv16i8>;
373}
374
375multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, ValueType vt, SDPatternOperator intrinsic = null_frag> {
376  def NAME : sme_fp_outer_product_inst<s, {0,bf}, 0b11, TileOp16, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
377    bits<1> ZAda;
378    let Inst{2-1} = 0b00;
379    let Inst{0}   = ZAda;
380  }
381
382  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileH>, SMEPseudo2Instr<NAME, 0>;
383
384  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_1, nxv8i1, vt>;
385}
386
387class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2,
388                                 MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
389                                 string mnemonic>
390    : I<(outs za_ty:$ZAda),
391        (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
392        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
393        "", []>,
394      Sched<[]> {
395  bits<5> Zm;
396  bits<3> Pm;
397  bits<3> Pn;
398  bits<5> Zn;
399  let Inst{31-25} = 0b1010000;
400  let Inst{24}    = opc{2}; // u0
401  let Inst{23}    = 0b1;
402  let Inst{22}    = sz;
403  let Inst{21}    = opc{1}; // u1
404  let Inst{20-16} = Zm;
405  let Inst{15-13} = Pm;
406  let Inst{12-10} = Pn;
407  let Inst{9-5}   = Zn;
408  let Inst{4}     = opc{0};  //S;
409  let Inst{3}     = sme2;
410
411  let Constraints = "$ZAda = $_ZAda";
412}
413
414multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
415                                     SDPatternOperator op> {
416  def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0,  TileOp32,
417                                        ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> {
418    bits<2> ZAda;
419    let Inst{1-0} = ZAda;
420    let Inst{2}   = 0b0;
421  }
422
423  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
424
425  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv16i1, nxv16i8>;
426}
427
428multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
429                                     SDPatternOperator op> {
430  def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64,
431                                        ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
432    bits<3> ZAda;
433    let Inst{2-0} = ZAda;
434  }
435
436  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
437
438  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>;
439}
440
441class sme_int_sparse_outer_product_i32<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
442    : I<(outs TileOp32:$ZAda),
443        (ins  TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
444        mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
445        "", []>,
446      Sched<[]> {
447  bits<2> ZAda;
448  bits<4> Zn;
449  bits<5> Zm;
450  bits<3> Zk;
451  bits<2> imm;
452  let Inst{31-25} = 0b1000000;
453  let Inst{24}    = opc{4};
454  let Inst{23-22} = 0b01;
455  let Inst{21}    = opc{3};
456  let Inst{20-16} = Zm;
457  let Inst{15}    = opc{2};
458  let Inst{14}    = 0b0;
459  let Inst{13}    = opc{1};
460  let Inst{12-10} = Zk;
461  let Inst{9-6}   = Zn;
462  let Inst{5-4}   = imm;
463  let Inst{3}     = opc{0};
464  let Inst{2}     = 0b0;
465  let Inst{1-0}   = ZAda;
466
467  let Constraints = "$ZAda = $_ZAda";
468}
469
470class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
471    : I<(outs TileOp32:$ZAda),
472        (ins  TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
473        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
474        "", []>,
475      Sched<[]> {
476  bits<5> Zm;
477  bits<3> Pm;
478  bits<3> Pn;
479  bits<5> Zn;
480  bits<2> ZAda;
481  let Inst{31-25} = 0b1000000;
482  let Inst{24}    = !if(opc{2}, 0, 1);
483  let Inst{23-22} = 0b10;
484  let Inst{21}    = opc{1};
485  let Inst{20-16} = Zm;
486  let Inst{15-13} = Pm;
487  let Inst{12-10} = Pn;
488  let Inst{9-5}   = Zn;
489  let Inst{4}     = opc{0};
490  let Inst{3}     = opc{2};
491  let Inst{2}     = 0b0;
492  let Inst{1-0}   = ZAda;
493
494  let Constraints = "$ZAda = $_ZAda";
495}
496
497multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
498  def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
499
500  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
501
502  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8bf16>;
503}
504
505multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
506  def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
507
508  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
509
510  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>;
511}
512
513class sme_quarter_outer_product_i64<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
514    : I<(outs TileOp64:$ZAda),
515        (ins  TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
516        mnemonic, "\t$ZAda, $Zn, $Zm",
517        "", []>,
518      Sched<[]> {
519  bits<3> ZAda;
520  bits<3> Zn;
521  bits<3> Zm;
522  let Inst{31-25} = 0b1010000;
523  let Inst{24}    = zn_u_pair{1}; // u0
524  let Inst{23-22} = 0b11;
525  let Inst{21}    = zm_u_pair{1}; // u1
526  let Inst{20}    = zm_u_pair{0}; // M
527  let Inst{19-17} = Zm;
528  let Inst{16-10} = 0b0000000;
529  let Inst{9}     = zn_u_pair{0}; // N
530  let Inst{8-6}   = Zn;
531  let Inst{5}     = 0;
532  let Inst{4}     = subtr;
533  let Inst{3}     = 0b1;
534  let Inst{2-0}   = ZAda;
535
536  let Constraints = "$ZAda = $_ZAda";
537}
538
539class sme_quarter_outer_product_i8_i32<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
540    : I<(outs TileOp32:$ZAda),
541        (ins  TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
542        mnemonic, "\t$ZAda, $Zn, $Zm",
543        "", []>,
544      Sched<[]> {
545  bits<2> ZAda;
546  bits<3> Zn;
547  bits<3> Zm;
548  let Inst{31-25} = 0b1000000;
549  let Inst{24}    = zn_u_pair{1}; // u0
550  let Inst{23-22} = 0b00;
551  let Inst{21}    = zm_u_pair{1}; // u1
552  let Inst{20}    = zm_u_pair{0}; // M
553  let Inst{19-17} = Zm;
554  let Inst{16-10} = 0b0100000;
555  let Inst{9}     = zn_u_pair{0}; // N
556  let Inst{8-6}   = Zn;
557  let Inst{5}     = 0;
558  let Inst{4}     = subtr;
559  let Inst{3-2}   = 0b00;
560  let Inst{1-0}   = ZAda;
561
562  let Constraints = "$ZAda = $_ZAda";
563}
564
565class sme_quarter_outer_product_i16_i32<bit u0, bit N, bit M, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
566    : I<(outs TileOp32:$ZAda),
567        (ins  TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
568        mnemonic, "\t$ZAda, $Zn, $Zm",
569        "", []>,
570      Sched<[]> {
571  bits<2> ZAda;
572  bits<3> Zn;
573  bits<3> Zm;
574  let Inst{31-25} = 0b1000000;
575  let Inst{24}    = u0;
576  let Inst{23-21} = 0b000;
577  let Inst{20}    = M;
578  let Inst{19-17} = Zm;
579  let Inst{16-10} = 0b0100000;
580  let Inst{9}     = N;
581  let Inst{8-6}   = Zn;
582  let Inst{5}     = 0;
583  let Inst{4}     = subtr;
584  let Inst{3-2}   = 0b10;
585  let Inst{1-0}   = ZAda;
586
587  let Constraints = "$ZAda = $_ZAda";
588}
589
590multiclass sme_quarter_outer_product_i8_i32<bit zn_u, bit zm_u, bit subtr, string mnemonic>{
591  def _MZZ_BToS   : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 0}, subtr,
592                                                        ZPR8Mul2_Lo, ZPR8Mul2_Hi, mnemonic>;
593  def _M2ZZ_BToS  : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 0}, subtr,
594                                                         ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, mnemonic>;
595  def _MZ2Z_BToS  : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 1}, subtr,
596                                                         ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, mnemonic>;
597  def _M2Z2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 1}, subtr,
598                                                          ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi, mnemonic>;
599}
600
601multiclass sme_quarter_outer_product_i16_i32<bit unsigned, bit subtr, string mnemonic>{
602  def _MZZ_HToS   : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b0, subtr,
603                                                        ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>;
604  def _M2ZZ_HToS  : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b0, subtr,
605                                                         ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>;
606  def _MZ2Z_HToS  : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b1, subtr,
607                                                         ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>;
608  def _M2Z2Z_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b1, subtr,
609                                                          ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>;
610}
611
612multiclass sme_quarter_outer_product_i64<bit zn_u, bit zm_u, bit subtr, string mnemonic>{
613  def _MZZ_HtoD   : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 0}, subtr,
614                                                        ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>;
615  def _M2ZZ_HtoD  : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr,
616                                                         ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>;
617  def _MZ2Z_HtoD  : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 1}, subtr,
618                                                         ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>;
619  def _M2Z2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 1}, subtr,
620                                                          ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>;
621}
622
623//===----------------------------------------------------------------------===//
624// SME Add Vector to Tile
625//===----------------------------------------------------------------------===//
626
627class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
628                                  ZPRRegOp zpr_ty, string mnemonic>
629    : I<(outs tile_ty:$ZAda),
630        (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
631        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
632        "", []>, Sched<[]> {
633  bits<3> Pm;
634  bits<3> Pn;
635  bits<5> Zn;
636  let Inst{31-23} = 0b110000001;
637  let Inst{22}    = op;
638  let Inst{21-17} = 0b01000;
639  let Inst{16}    = V;
640  let Inst{15-13} = Pm;
641  let Inst{12-10} = Pn;
642  let Inst{9-5}   = Zn;
643  let Inst{4-3}   = 0b00;
644
645  let Constraints = "$ZAda = $_ZAda";
646}
647
648class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
649    : Pseudo<(outs),
650             (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
651      Sched<[]> {
652  // Translated to the actual instructions in AArch64ISelLowering.cpp
653  let SMEMatrixType = za_flag;
654  let usesCustomInserter = 1;
655}
656
657multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> {
658    def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
659  bits<2> ZAda;
660  let Inst{2}   = 0b0;
661  let Inst{1-0} = ZAda;
662  }
663
664  def _PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
665
666  def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
667            (nxv4i32 ZPR32:$zn)),
668          (!cast<Instruction>(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>;
669}
670
671multiclass sme_add_vector_to_tile_u64<bit V, string mnemonic, SDPatternOperator op> {
672    def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
673  bits<3> ZAda;
674  let Inst{2-0} = ZAda;
675  }
676
677  def _PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
678
679  let Predicates = [HasSMEI16I64] in {
680  def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
681                (nxv2i64 ZPR64:$zn)),
682            (!cast<Instruction>(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>;
683  }
684}
685
686//===----------------------------------------------------------------------===//
687// SME Contiguous Loads
688//===----------------------------------------------------------------------===//
689
690class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
691                         string mnemonic, string argstr>
692    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
693  bits<5> Rm;
694  bits<2> Rv;
695  bits<3> Pg;
696  bits<5> Rn;
697  let Inst{31-25} = 0b1110000;
698  let Inst{24}    = Q;
699  let Inst{23-22} = msz;
700  let Inst{21}    = 0b0;
701  let Inst{20-16} = Rm;
702  let Inst{15}    = V;
703  let Inst{14-13} = Rv;
704  let Inst{12-10} = Pg;
705  let Inst{9-5}   = Rn;
706  let Inst{4}     = 0b0;
707
708  let mayLoad = 1;
709}
710
711class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
712                         MatrixTileVectorOperand tile_ty, bit is_col,
713                         Operand imm_ty, RegisterOperand gpr_ty>
714    : sme_mem_ld_ss_base<
715        Q, is_col, msz, (outs tile_ty:$ZAt),
716        (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
717             gpr_ty:$Rm),
718        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
719
720multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
721                                   MatrixTileVectorOperand tile_ty,
722                                   Operand imm_ty, RegisterOperand gpr_ty,
723                                   string pg_suffix=""> {
724  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
725                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
726  // Default XZR offset aliases
727  def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
728                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
729  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
730                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
731}
732
733multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
734                              string pg_suffix=""> {
735  defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
736                                 !if(is_col, TileVectorOpV8, TileVectorOpH8),
737                                 sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
738  defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
739                                 !if(is_col, TileVectorOpV16, TileVectorOpH16),
740                                 sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
741  defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
742                                 !if(is_col, TileVectorOpV32, TileVectorOpH32),
743                                 sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
744  defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
745                                 !if(is_col, TileVectorOpV64, TileVectorOpH64),
746                                 sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
747  defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
748                                 !if(is_col, TileVectorOpV128, TileVectorOpH128),
749                                 sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
750}
751
752multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
753  defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
754}
755
756multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load,
757                                  Operand tile_ty, Operand offset_ty,
758                                  ComplexPattern addr,
759                                  ComplexPattern tileslice> {
760  // base, tileslice
761  def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile,
762                  (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
763            (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>;
764
765  // reg + reg, tileslice
766  let AddedComplexity = 1 in {
767    def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
768                    tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
769                                              offset_ty:$imm))),
770              (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>;
771  }
772}
773
774class sme_load_pseudo
775    : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
776                          i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
777      Sched<[]> {
778  // Translated to the actual instructions in AArch64ISelLowering.cpp
779  let usesCustomInserter = 1;
780  let mayLoad = 1;
781}
782
783multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
784  def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
785                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
786                              is_col, sme_elm_idx0_15, GPR64shifted8> {
787    bits<4> imm;
788    let Inst{3-0} = imm;
789  }
790  def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
791                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
792                              is_col, sme_elm_idx0_7, GPR64shifted16> {
793    bits<1> ZAt;
794    bits<3> imm;
795    let Inst{3}   = ZAt;
796    let Inst{2-0} = imm;
797  }
798  def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
799                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
800                              is_col, sme_elm_idx0_3, GPR64shifted32> {
801    bits<2> ZAt;
802    bits<2> imm;
803    let Inst{3-2} = ZAt;
804    let Inst{1-0} = imm;
805  }
806  def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
807                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
808                              is_col, sme_elm_idx0_1, GPR64shifted64> {
809    bits<3> ZAt;
810    bits<1> imm;
811    let Inst{3-1} = ZAt;
812    let Inst{0}   = imm;
813  }
814  def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
815                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
816                              is_col, sme_elm_idx0_0, GPR64shifted128> {
817    bits<4> ZAt;
818    let Inst{3-0} = ZAt;
819  }
820
821  defm : sme_mem_ld_ss_aliases<NAME, is_col>;
822
823  // Pseudo instructions for lowering intrinsics, using immediates instead of
824  // tile registers.
825  def _PSEUDO_B : sme_load_pseudo;
826  def _PSEUDO_H : sme_load_pseudo;
827  def _PSEUDO_S : sme_load_pseudo;
828  def _PSEUDO_D : sme_load_pseudo;
829  def _PSEUDO_Q : sme_load_pseudo;
830
831  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
832                                !if(is_col, int_aarch64_sme_ld1b_vert,
833                                            int_aarch64_sme_ld1b_horiz),
834                                sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0,
835                                tileslice8>;
836  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
837                                !if(is_col, int_aarch64_sme_ld1h_vert,
838                                            int_aarch64_sme_ld1h_horiz),
839                                timm32_0_1, timm32_0_7, am_sve_regreg_lsl1,
840                                tileslice16>;
841  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
842                                !if(is_col, int_aarch64_sme_ld1w_vert,
843                                            int_aarch64_sme_ld1w_horiz),
844                                timm32_0_3, timm32_0_3, am_sve_regreg_lsl2,
845                                tileslice32>;
846  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
847                                !if(is_col, int_aarch64_sme_ld1d_vert,
848                                            int_aarch64_sme_ld1d_horiz),
849                                timm32_0_7, timm32_0_1, am_sve_regreg_lsl3,
850                                tileslice64>;
851  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
852                                !if(is_col, int_aarch64_sme_ld1q_vert,
853                                            int_aarch64_sme_ld1q_horiz),
854                                timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
855                                tileslice128>;
856}
857
858multiclass sme_mem_ld_ss<string mnemonic> {
859  defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
860  defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
861}
862
863//===----------------------------------------------------------------------===//
864// SME Contiguous Stores
865//===----------------------------------------------------------------------===//
866
867class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
868                         string mnemonic, string argstr>
869    : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
870  bits<5> Rm;
871  bits<2> Rv;
872  bits<3> Pg;
873  bits<5> Rn;
874  let Inst{31-25} = 0b1110000;
875  let Inst{24}    = Q;
876  let Inst{23-22} = msz;
877  let Inst{21}    = 0b1;
878  let Inst{20-16} = Rm;
879  let Inst{15}    = V;
880  let Inst{14-13} = Rv;
881  let Inst{12-10} = Pg;
882  let Inst{9-5}   = Rn;
883  let Inst{4}     = 0b0;
884
885  let mayStore = 1;
886  let hasSideEffects = 1;
887}
888
889class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
890                         MatrixTileVectorOperand tile_ty, bit is_col,
891                         Operand imm_ty, RegisterOperand gpr_ty>
892    : sme_mem_st_ss_base<
893        Q, is_col, msz,
894        (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
895             GPR64sp:$Rn, gpr_ty:$Rm),
896        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
897
898multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
899  defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
900}
901
902multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store,
903                                  Operand offset_ty,
904                                  ComplexPattern imm2tile,
905                                  ComplexPattern addr,
906                                  ComplexPattern tileslice> {
907  // base, tileslice
908  def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile),
909                   (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
910            (Inst $tile, $idx, $imm, $pg, $base, XZR)>;
911
912  // reg + reg, tileslice
913  let AddedComplexity = 1 in {
914    def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
915                     (imm2tile untyped:$tile),
916                     (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
917              (Inst $tile, $idx, $imm, $pg, $base, $offset)>;
918  }
919}
920
921multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
922  def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
923                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
924                              is_col, sme_elm_idx0_15, GPR64shifted8> {
925    bits<4> imm;
926    let Inst{3-0} = imm;
927  }
928  def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
929                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
930                              is_col, sme_elm_idx0_7, GPR64shifted16> {
931    bits<1> ZAt;
932    bits<3> imm;
933    let Inst{3}   = ZAt;
934    let Inst{2-0} = imm;
935  }
936  def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
937                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
938                              is_col, sme_elm_idx0_3, GPR64shifted32> {
939    bits<2> ZAt;
940    bits<2> imm;
941    let Inst{3-2} = ZAt;
942    let Inst{1-0} = imm;
943  }
944  def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
945                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
946                              is_col, sme_elm_idx0_1, GPR64shifted64> {
947    bits<3> ZAt;
948    bits<1> imm;
949    let Inst{3-1} = ZAt;
950    let Inst{0}   = imm;
951  }
952  def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
953                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
954                              is_col, sme_elm_idx0_0, GPR64shifted128> {
955    bits<4> ZAt;
956    let Inst{3-0} = ZAt;
957  }
958
959  defm : sme_mem_st_ss_aliases<NAME, is_col>;
960
961  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
962                                !if(is_col, int_aarch64_sme_st1b_vert,
963                                            int_aarch64_sme_st1b_horiz),
964                                timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0,
965                                tileslice8>;
966  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
967                                !if(is_col, int_aarch64_sme_st1h_vert,
968                                            int_aarch64_sme_st1h_horiz),
969                                timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1,
970                                tileslice16>;
971  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
972                                !if(is_col, int_aarch64_sme_st1w_vert,
973                                            int_aarch64_sme_st1w_horiz),
974                                timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2,
975                                tileslice32>;
976  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
977                                !if(is_col, int_aarch64_sme_st1d_vert,
978                                            int_aarch64_sme_st1d_horiz),
979                                timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3,
980                                tileslice64>;
981  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
982                                !if(is_col, int_aarch64_sme_st1q_vert,
983                                            int_aarch64_sme_st1q_horiz),
984                                sme_elm_idx0_0, imm_to_tile128,
985                                am_sve_regreg_lsl4, tileslice128>;
986}
987
988multiclass sme_mem_st_ss<string mnemonic> {
989  defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
990  defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
991}
992
993//===----------------------------------------------------------------------===//
994// SME Save and Restore Array
995//===----------------------------------------------------------------------===//
996
997class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr>
998    : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
999        []>,
1000      Sched<[]> {
1001  bits<2> Rv;
1002  bits<5> Rn;
1003  bits<4> imm4;
1004  let Inst{31-22} = 0b1110000100;
1005  let Inst{21}    = isStore;
1006  let Inst{20-15} = 0b000000;
1007  let Inst{14-13} = Rv;
1008  let Inst{12-10} = 0b000;
1009  let Inst{9-5}   = Rn;
1010  let Inst{4}     = 0b0;
1011  let Inst{3-0}   = imm4;
1012}
1013
1014let mayStore = 1 in
1015class sme_spill_inst<string opcodestr>
1016    : sme_spill_fill_base<0b1, (outs),
1017                          (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
1018                               sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
1019                               imm32_0_15:$offset),
1020                          opcodestr>;
1021let mayLoad = 1 in
1022class sme_fill_inst<string opcodestr>
1023    : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
1024                          (ins MatrixIndexGPR32Op12_15:$Rv,
1025                               sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
1026                               imm32_0_15:$offset),
1027                          opcodestr>;
1028multiclass sme_spill<string opcodestr> {
1029  def NAME : sme_spill_inst<opcodestr>;
1030  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
1031                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
1032                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
1033
1034  def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)),
1035          (!cast<Instruction>(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>;
1036}
1037
1038multiclass sme_fill<string opcodestr> {
1039  def NAME : sme_fill_inst<opcodestr>;
1040  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
1041                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
1042                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
1043  def NAME # _PSEUDO
1044      : Pseudo<(outs),
1045               (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4,
1046                    GPR64sp:$base), []>,
1047        Sched<[]> {
1048    // Translated to actual instruction in AArch64ISelLowering.cpp
1049    let usesCustomInserter = 1;
1050    let mayLoad = 1;
1051  }
1052  def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm),
1053          (!cast<Instruction>(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>;
1054}
1055
1056//===----------------------------------------------------------------------===//
1057// Move instructions
1058//===----------------------------------------------------------------------===//
1059
1060class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
1061                              string mnemonic, string argstr>
1062    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
1063  bits<2> Rv;
1064  bits<3> Pg;
1065  bits<5> Zn;
1066  let Inst{31-24} = 0b11000000;
1067  let Inst{23-22} = sz;
1068  let Inst{21-17} = 0b00000;
1069  let Inst{16}    = Q;
1070  let Inst{15}    = V;
1071  let Inst{14-13} = Rv;
1072  let Inst{12-10} = Pg;
1073  let Inst{9-5}   = Zn;
1074  let Inst{4}     = 0b0;
1075}
1076
1077class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
1078                              bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
1079                              string mnemonic>
1080    : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
1081        (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
1082        mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{
1083
1084  let Constraints = "$ZAd = $_ZAd";
1085}
1086
1087
1088multiclass sme_vector_to_tile_aliases<Instruction inst,
1089                                      MatrixTileVectorOperand tile_ty,
1090                                      ZPRRegOp zpr_ty, Operand imm_ty> {
1091  def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
1092                  (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
1093}
1094
1095multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
1096                                       ValueType ppr_vt, Operand imm_ty,
1097                                       Operand offset_ty,
1098                                       SDPatternOperator op,
1099                                       ComplexPattern tileslice> {
1100  def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
1101                                              offset_ty:$imm)),
1102                (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
1103            (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
1104}
1105
1106class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag>
1107    : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
1108                          i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
1109      Sched<[]> {
1110  // Translated to the actual instructions in AArch64ISelLowering.cpp
1111  let SMEMatrixType = za_flag;
1112  let usesCustomInserter = 1;
1113}
1114
1115multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
1116  def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
1117                                                          TileVectorOpH8),
1118                                   is_col, sme_elm_idx0_15, ZPR8, mnemonic>,
1119                                   SMEPseudo2Instr<NAME # _B, 1> {
1120    bits<4> imm;
1121    let Inst{3-0} = imm;
1122  }
1123  def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
1124                                                          TileVectorOpH16),
1125                                   is_col, sme_elm_idx0_7, ZPR16, mnemonic>,
1126                                   SMEPseudo2Instr<NAME # _H, 1> {
1127    bits<1> ZAd;
1128    bits<3> imm;
1129    let Inst{3}   = ZAd;
1130    let Inst{2-0} = imm;
1131  }
1132  def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
1133                                                          TileVectorOpH32),
1134                                   is_col, sme_elm_idx0_3, ZPR32, mnemonic>,
1135                                   SMEPseudo2Instr<NAME # _S, 1> {
1136    bits<2> ZAd;
1137    bits<2> imm;
1138    let Inst{3-2} = ZAd;
1139    let Inst{1-0} = imm;
1140  }
1141  def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
1142                                                          TileVectorOpH64),
1143                                   is_col, sme_elm_idx0_1, ZPR64, mnemonic>,
1144                                   SMEPseudo2Instr<NAME # _D, 1> {
1145    bits<3> ZAd;
1146    bits<1> imm;
1147    let Inst{3-1} = ZAd;
1148    let Inst{0}   = imm;
1149  }
1150  def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
1151                                                          TileVectorOpH128),
1152                                   is_col, sme_elm_idx0_0, ZPR128, mnemonic>,
1153                                   SMEPseudo2Instr<NAME # _Q, 1> {
1154    bits<4> ZAd;
1155    bits<1> imm;
1156    let Inst{3-0} = ZAd;
1157  }
1158
1159  // Pseudo instructions for lowering intrinsics, using immediates instead of
1160  // tile registers.
1161  def _PSEUDO_B : sme_mova_insert_pseudo<SMEMatrixTileB>, SMEPseudo2Instr<NAME # _B, 0>;
1162  def _PSEUDO_H : sme_mova_insert_pseudo<SMEMatrixTileH>, SMEPseudo2Instr<NAME # _H, 0>;
1163  def _PSEUDO_S : sme_mova_insert_pseudo<SMEMatrixTileS>, SMEPseudo2Instr<NAME # _S, 0>;
1164  def _PSEUDO_D : sme_mova_insert_pseudo<SMEMatrixTileD>, SMEPseudo2Instr<NAME # _D, 0>;
1165  def _PSEUDO_Q : sme_mova_insert_pseudo<SMEMatrixTileQ>, SMEPseudo2Instr<NAME # _Q, 0>;
1166
1167  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
1168                                    !if(is_col, TileVectorOpV8,
1169                                                TileVectorOpH8),
1170                                    ZPR8, sme_elm_idx0_15>;
1171  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
1172                                    !if(is_col, TileVectorOpV16,
1173                                                TileVectorOpH16),
1174                                    ZPR16, sme_elm_idx0_7>;
1175  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
1176                                    !if(is_col, TileVectorOpV32,
1177                                                TileVectorOpH32),
1178                                    ZPR32, sme_elm_idx0_3>;
1179  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
1180                                    !if(is_col, TileVectorOpV64,
1181                                                TileVectorOpH64),
1182                                    ZPR64, sme_elm_idx0_1>;
1183  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
1184                                    !if(is_col, TileVectorOpV128,
1185                                                TileVectorOpH128),
1186                                    ZPR128, sme_elm_idx0_0>;
1187
1188  defvar op = !if(is_col, int_aarch64_sme_write_vert,
1189                          int_aarch64_sme_write_horiz);
1190
1191  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
1192                                     nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15,
1193                                     op, tileslice8>;
1194  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
1195                                     nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
1196                                     op, tileslice16>;
1197  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
1198                                     nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
1199                                     op, tileslice16>;
1200  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
1201                                     nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
1202                                     op, tileslice16>;
1203  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
1204                                     nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
1205                                     op, tileslice32>;
1206  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
1207                                     nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
1208                                     op, tileslice32>;
1209  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
1210                                     nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
1211                                     op, tileslice64>;
1212  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
1213                                     nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
1214                                     op, tileslice64>;
1215
1216  defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
1217                           int_aarch64_sme_writeq_horiz);
1218
1219  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1220                                     nxv16i8, nxv16i1, sme_elm_idx0_15,
1221                                     sme_elm_idx0_0, opq, tileslice128>;
1222  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1223                                     nxv8i16, nxv8i1, sme_elm_idx0_15,
1224                                     sme_elm_idx0_0, opq, tileslice128>;
1225  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1226                                     nxv8f16, nxv8i1, sme_elm_idx0_15,
1227                                     sme_elm_idx0_0, opq, tileslice128>;
1228  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1229                                     nxv8bf16, nxv8i1, sme_elm_idx0_15,
1230                                     sme_elm_idx0_0, opq, tileslice128>;
1231  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1232                                     nxv4i32, nxv4i1, sme_elm_idx0_15,
1233                                     sme_elm_idx0_0, opq, tileslice128>;
1234  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1235                                     nxv4f32, nxv4i1, sme_elm_idx0_15,
1236                                     sme_elm_idx0_0, opq, tileslice128>;
1237  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1238                                     nxv2i64, nxv2i1, sme_elm_idx0_15,
1239                                     sme_elm_idx0_0, opq, tileslice128>;
1240  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1241                                     nxv2f64, nxv2i1, sme_elm_idx0_15,
1242                                     sme_elm_idx0_0, opq, tileslice128>;
1243}
1244
1245multiclass sme_vector_to_tile<string mnemonic> {
1246  defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
1247  defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
1248}
1249
1250class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
1251                              string mnemonic, string argstr>
1252    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
1253  bits<2> Rv;
1254  bits<3> Pg;
1255  bits<5> Zd;
1256  let Inst{31-24} = 0b11000000;
1257  let Inst{23-22} = sz;
1258  let Inst{21-17} = 0b00001;
1259  let Inst{16}    = Q;
1260  let Inst{15}    = V;
1261  let Inst{14-13} = Rv;
1262  let Inst{12-10} = Pg;
1263  let Inst{9}     = 0b0;
1264  let Inst{4-0}   = Zd;
1265}
1266
1267class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
1268                              MatrixTileVectorOperand tile_ty,
1269                              bit is_col, Operand imm_ty, string mnemonic>
1270    : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
1271        (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
1272        mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> {
1273
1274  let Constraints = "$Zd = $_Zd";
1275}
1276
1277multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
1278                                      MatrixTileVectorOperand tile_ty,
1279                                      Operand imm_ty > {
1280  def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
1281                  (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
1282}
1283
1284multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
1285                                       ValueType ppr_vt, Operand offset_ty,
1286                                       ComplexPattern imm2tile,
1287                                       ComplexPattern tileslice,
1288                                       SDPatternOperator op> {
1289  def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
1290                        (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
1291            (inst $passthru, $pg, $tile, $idx, 0)>;
1292  let AddedComplexity = 1 in {
1293    def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
1294                          (imm2tile untyped:$tile),
1295                          (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
1296                                          offset_ty:$imm)))),
1297              (inst $passthru, $pg, $tile, $idx, $imm)>;
1298  }
1299}
1300
1301multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
1302  def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
1303                                                                TileVectorOpH8),
1304                                   is_col, sme_elm_idx0_15, mnemonic> {
1305    bits<4> imm;
1306    let Inst{8-5} = imm;
1307  }
1308  def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
1309                                                                 TileVectorOpH16),
1310                                   is_col, sme_elm_idx0_7, mnemonic> {
1311    bits<1> ZAn;
1312    bits<3> imm;
1313    let Inst{8}   = ZAn;
1314    let Inst{7-5} = imm;
1315  }
1316  def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
1317                                                                 TileVectorOpH32),
1318                                   is_col, sme_elm_idx0_3, mnemonic> {
1319    bits<2> ZAn;
1320    bits<2> imm;
1321    let Inst{8-7} = ZAn;
1322    let Inst{6-5} = imm;
1323  }
1324  def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
1325                                                                 TileVectorOpH64),
1326                                   is_col, sme_elm_idx0_1, mnemonic> {
1327    bits<3> ZAn;
1328    bits<1> imm;
1329    let Inst{8-6} = ZAn;
1330    let Inst{5}   = imm;
1331  }
1332  def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
1333                                                                  TileVectorOpH128),
1334                                   is_col, sme_elm_idx0_0, mnemonic> {
1335    bits<4> ZAn;
1336    let Inst{8-5} = ZAn;
1337  }
1338
1339  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
1340                                    !if(is_col, TileVectorOpV8,
1341                                                TileVectorOpH8), sme_elm_idx0_15>;
1342  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
1343                                    !if(is_col, TileVectorOpV16,
1344                                                TileVectorOpH16), sme_elm_idx0_7>;
1345  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
1346                                    !if(is_col, TileVectorOpV32,
1347                                                TileVectorOpH32), sme_elm_idx0_3>;
1348  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
1349                                    !if(is_col, TileVectorOpV64,
1350                                                TileVectorOpH64), sme_elm_idx0_1>;
1351  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
1352                                    !if(is_col, TileVectorOpV128,
1353                                                TileVectorOpH128), sme_elm_idx0_0>;
1354
1355  defvar op = !if(is_col, int_aarch64_sme_read_vert,
1356                          int_aarch64_sme_read_horiz);
1357
1358  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
1359                                     nxv16i8, nxv16i1, sme_elm_idx0_15,
1360                                     imm_to_tile8, tileslice8, op>;
1361  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1362                                     nxv8i16, nxv8i1, sme_elm_idx0_7,
1363                                     imm_to_tile16, tileslice16, op>;
1364  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1365                                     nxv8f16, nxv8i1, sme_elm_idx0_7,
1366                                     imm_to_tile16, tileslice16, op>;
1367  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1368                                     nxv8bf16, nxv8i1, sme_elm_idx0_7,
1369                                     imm_to_tile16, tileslice16, op>;
1370  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
1371                                     nxv4i32, nxv4i1, sme_elm_idx0_3,
1372                                     imm_to_tile32, tileslice32, op>;
1373  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
1374                                     nxv4f32, nxv4i1, sme_elm_idx0_3,
1375                                     imm_to_tile32, tileslice32, op>;
1376  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
1377                                     nxv2i64, nxv2i1, sme_elm_idx0_1,
1378                                     imm_to_tile64, tileslice64, op>;
1379  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
1380                                     nxv2f64, nxv2i1, sme_elm_idx0_1,
1381                                     imm_to_tile64, tileslice64, op>;
1382
1383  defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
1384                           int_aarch64_sme_readq_horiz);
1385
1386  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1387                                     nxv16i8, nxv16i1, sme_elm_idx0_0,
1388                                     imm_to_tile128, tileslice128, opq>;
1389  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1390                                     nxv8i16, nxv8i1, sme_elm_idx0_0,
1391                                     imm_to_tile128, tileslice128, opq>;
1392  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1393                                     nxv8f16, nxv8i1, sme_elm_idx0_0,
1394                                     imm_to_tile128, tileslice128, opq>;
1395  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1396                                     nxv8bf16, nxv8i1, sme_elm_idx0_0,
1397                                     imm_to_tile128, tileslice128, opq>;
1398  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1399                                     nxv4i32, nxv4i1, sme_elm_idx0_0,
1400                                     imm_to_tile128, tileslice128, opq>;
1401  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1402                                     nxv4f32, nxv4i1, sme_elm_idx0_0,
1403                                     imm_to_tile128, tileslice128, opq>;
1404  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1405                                     nxv2i64, nxv2i1, sme_elm_idx0_0,
1406                                     imm_to_tile128, tileslice128, opq>;
1407  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1408                                     nxv2f64, nxv2i1, sme_elm_idx0_0,
1409                                     imm_to_tile128, tileslice128, opq>;
1410}
1411
1412multiclass sme_tile_to_vector<string mnemonic> {
1413  defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
1414  defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
1415}
1416
1417//===----------------------------------------------------------------------===//
1418// SME Zero
1419//===----------------------------------------------------------------------===//
1420
1421// NOTE: This definition isn't really correct because there are outputs, i.e.
1422// the tile registers being zeroed. We fix this up in a custom inserter that
1423// marks the appropriate registers as being implicitly defined.
1424class sme_zero_inst<string mnemonic>
1425    : I<(outs), (ins MatrixTileList:$imm),
1426        mnemonic, "\t$imm", "", []>, Sched<[]> {
1427  bits<8> imm;
1428  let Inst{31-8} = 0b110000000000100000000000;
1429  let Inst{7-0}  = imm;
1430}
1431
1432multiclass sme_zero<string mnemonic> {
1433  def NAME : sme_zero_inst<mnemonic>;
1434
1435  def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
1436  def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
1437  def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
1438  def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
1439  def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
1440  def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
1441  def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
1442  def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
1443  def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
1444  def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
1445  def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
1446  def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
1447  def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
1448  def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
1449  def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
1450
1451  def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>,
1452      Sched<[]> {
1453    // Translated to the actual instructions in AArch64ISelLowering.cpp
1454    let usesCustomInserter = 1;
1455  }
1456
1457  def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm),
1458            (!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>;
1459}
1460
1461//===----------------------------------------------------------------------===//
1462// SVE2 Instructions
1463//===----------------------------------------------------------------------===//
1464
1465class sve2_int_perm_revd<string asm>
1466    : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
1467        asm, "\t$Zd, $Pg/m, $Zn", "", []>,
1468      Sched<[]> {
1469  bits<5> Zd;
1470  bits<3> Pg;
1471  bits<5> Zn;
1472  let Inst{31-24} = 0b00000101;
1473  let Inst{23-22} = 0b00; // size
1474  let Inst{21-13} = 0b101110100;
1475  let Inst{12-10} = Pg;
1476  let Inst{9-5}   = Zn;
1477  let Inst{4-0}   = Zd;
1478
1479  let Constraints = "$Zd = $_Zd";
1480}
1481
1482multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
1483  def NAME : sve2_int_perm_revd<asm>;
1484
1485  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
1486  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME)>;
1487  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME)>;
1488  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME)>;
1489
1490  def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>;
1491  def : SVE_1_Op_Passthru_Pat<nxv8f16,  op, nxv8i1, nxv8f16,  !cast<Instruction>(NAME)>;
1492  def : SVE_1_Op_Passthru_Pat<nxv4f32,  op, nxv4i1, nxv4f32,  !cast<Instruction>(NAME)>;
1493  def : SVE_1_Op_Passthru_Pat<nxv2f64,  op, nxv2i1, nxv2f64,  !cast<Instruction>(NAME)>;
1494
1495}
1496
1497class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
1498    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
1499        asm, "\t$Zd, $Zn, $Zm", "", []>,
1500      Sched<[]> {
1501  bits<5> Zm;
1502  bits<5> Zn;
1503  bits<5> Zd;
1504  let Inst{31-24} = 0b01000100;
1505  let Inst{23-22} = sz;
1506  let Inst{21}    = 0b0;
1507  let Inst{20-16} = Zm;
1508  let Inst{15-11} = 0b11000;
1509  let Inst{10}    = U;
1510  let Inst{9-5}   = Zn;
1511  let Inst{4-0}   = Zd;
1512
1513  let Constraints = "$Zd = $_Zd";
1514  let DestructiveInstType = DestructiveOther;
1515  let ElementSize = zpr_ty.ElementSize;
1516}
1517
1518multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
1519  def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
1520  def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
1521  def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
1522  def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
1523
1524  def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1525  def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1526  def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1527  def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
1528}
1529
1530class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
1531    : I<(outs PPRorPNRAny:$Pd), (ins PPRorPNRAny:$Pn, ppr_ty:$Pm,
1532                            MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
1533        asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
1534      Sched<[]> {
1535  bits<2> Rv;
1536  bits<4> Pn;
1537  bits<4> Pm;
1538  bits<4> Pd;
1539  let Inst{31-24} = 0b00100101;
1540  let Inst{21}    = 0b1;
1541  let Inst{17-16} = Rv;
1542  let Inst{15-14} = 0b01;
1543  let Inst{13-10} = Pn;
1544  let Inst{9}     = 0b0;
1545  let Inst{8-5}   = Pm;
1546  let Inst{4}     = 0b0;
1547  let Inst{3-0}   = Pd;
1548}
1549
1550multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
1551  def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
1552    bits<4> imm;
1553    let Inst{23-22} = imm{3-2};
1554    let Inst{20-19} = imm{1-0};
1555    let Inst{18}    = 0b1;
1556  }
1557  def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
1558    bits<3> imm;
1559    let Inst{23-22} = imm{2-1};
1560    let Inst{20}    = imm{0};
1561    let Inst{19-18} = 0b10;
1562  }
1563  def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
1564    bits<2> imm;
1565    let Inst{23-22} = imm{1-0};
1566    let Inst{20-18} = 0b100;
1567  }
1568  def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
1569    bits<1> imm;
1570    let Inst{23}    = imm;
1571    let Inst{22}    = 0b1;
1572    let Inst{20-18} = 0b000;
1573  }
1574
1575  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
1576             MatrixIndexGPR32Op12_15:$idx)),
1577            (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
1578  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
1579             MatrixIndexGPR32Op12_15:$idx)),
1580            (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
1581  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
1582             MatrixIndexGPR32Op12_15:$idx)),
1583            (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
1584  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
1585             MatrixIndexGPR32Op12_15:$idx)),
1586            (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
1587
1588  let AddedComplexity = 1 in {
1589    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
1590               (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
1591              (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
1592    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
1593               (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
1594              (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
1595    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
1596               (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
1597              (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
1598    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
1599               (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
1600              (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
1601  }
1602}
1603
1604//===----------------------------------------------------------------------===//
1605// SME2 Instructions
1606//===----------------------------------------------------------------------===//
1607
1608//===----------------------------------------------------------------------===//
1609// SME2 single-multi ternary int/fp, two/four registers
1610
1611class sme2_dot_mla_add_sub_array_vg24_single<bits<7> op,
1612                                         MatrixOperand matrix_ty,
1613                                         RegisterOperand multi_vector_ty,
1614                                         ZPRRegOp zpr_ty,
1615                                         string mnemonic>
1616   : I<(outs matrix_ty:$ZAd),
1617       (ins  matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1618       sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm),
1619       mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm",
1620       "", []> , Sched<[]> {
1621  bits<4> Zm;
1622  bits<5> Zn;
1623  bits<2> Rv;
1624  bits<3> imm3;
1625  let Inst{31-23} = 0b110000010;
1626  let Inst{22}    = op{6}; //sz
1627  let Inst{21}    = 0b1;
1628  let Inst{20}    = op{5}; //vgx4
1629  let Inst{19-16} = Zm;
1630  let Inst{15}    = 0b0;
1631  let Inst{14-13} = Rv;
1632  let Inst{12-10} = op{4-2};
1633  let Inst{9-5}   = Zn;
1634  let Inst{4-3}   = op{1-0};
1635  let Inst{2-0}   = imm3;
1636  let Constraints = "$ZAd = $_ZAd";
1637}
1638
1639multiclass sme2_dot_mla_add_sub_array_vg24_single<string mnemonic, bits<7> op,
1640                                              MatrixOperand matrix_ty,
1641                                              RegisterOperand multi_vector_ty,
1642                                              ZPRRegOp zpr_ty>{
1643  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1644
1645  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1646                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1647}
1648
1649multiclass sme2_dot_mla_add_sub_array_vg2_single<string mnemonic, bits<7> op,
1650                                              MatrixOperand matrix_ty,
1651                                              RegisterOperand multi_vector_ty,
1652                                              ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
1653  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1654
1655  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1656                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1657
1658  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
1659
1660  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
1661}
1662
1663multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op,
1664                                              MatrixOperand matrix_ty,
1665                                              RegisterOperand multi_vector_ty,
1666                                              ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
1667  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1668
1669  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1670                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1671
1672  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
1673
1674  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
1675}
1676
1677//===----------------------------------------------------------------------===//
1678// SME2 multiple vectors ternary INT/FP  two and four registers
1679class sme2_dot_mla_add_sub_array_vg2_multi<bits<7> op,
1680                                       MatrixOperand matrix_ty,
1681                                       RegisterOperand multi_vector_ty,
1682                                       string mnemonic>
1683   : I<(outs matrix_ty:$ZAd),
1684       (ins  matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1685       sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
1686       mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm",
1687       "", []>, Sched<[]>{
1688  bits<4> Zm;
1689  bits<4> Zn;
1690  bits<2> Rv;
1691  bits<3> imm3;
1692  let Inst{31-23} = 0b110000011;
1693  let Inst{22}    = op{6}; //sz
1694  let Inst{21}    = 0b1;
1695  let Inst{20-17} = Zm;
1696  let Inst{16-15} = 0b00;
1697  let Inst{14-13} = Rv;
1698  let Inst{12-10} = op{5-3};
1699  let Inst{9-6}   = Zn;
1700  let Inst{5-3}   = op{2-0};
1701  let Inst{2-0}   = imm3;
1702  let Constraints = "$ZAd = $_ZAd";
1703}
1704
1705multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<7> op,
1706                                            MatrixOperand  matrix_ty,
1707                                            RegisterOperand multi_vector_ty, ValueType zpr_ty,
1708                                            SDPatternOperator intrinsic> {
1709  def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1710
1711  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
1712
1713  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
1714
1715  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1716                  (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
1717}
1718
1719class sme2_dot_mla_add_sub_array_vg4_multi<bits<7> op,
1720                                            MatrixOperand matrix_ty,
1721                                            RegisterOperand multi_vector_ty,
1722                                            string mnemonic>
1723   : I<(outs matrix_ty:$ZAd),
1724       (ins  matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1725        sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
1726        mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm",
1727        "", []>, Sched<[]>{
1728  bits<3> Zm;
1729  bits<3> Zn;
1730  bits<2> Rv;
1731  bits<3> imm3;
1732  let Inst{31-23} = 0b110000011;
1733  let Inst{22}    = op{6}; //sz
1734  let Inst{21}    = 0b1;
1735  let Inst{20-18} = Zm;
1736  let Inst{17-15} = 0b010;
1737  let Inst{14-13} = Rv;
1738  let Inst{12-10} = op{5-3};
1739  let Inst{9-7}   = Zn;
1740  let Inst{6}     = 0b0;
1741  let Inst{5-3}   = op{2-0};
1742  let Inst{2-0}   = imm3;
1743  let Constraints = "$ZAd = $_ZAd";
1744}
1745
1746multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<7> op,
1747                                            MatrixOperand  matrix_ty,
1748                                            RegisterOperand multi_vector_ty,
1749                                            ValueType zpr_ty, SDPatternOperator intrinsic>{
1750  def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1751
1752  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
1753
1754  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
1755
1756  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1757                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
1758}
1759
1760//===----------------------------------------------------------------------===//
1761// SME2 multiple vectors binary two or four  registers
1762
1763class sme2_multivec_accum_add_sub<string mnemonic, bit sz, bit vg4, bits<3> op,
1764                                  MatrixOperand matrix_ty,
1765                                  RegisterOperand vector_ty>
1766    : I<(outs matrix_ty:$ZAdn),
1767        (ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm),
1768        mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm",
1769        "", []>, Sched<[]> {
1770  bits<2> Rv;
1771  bits<3> imm3;
1772  let Inst{31-23} = 0b110000011;
1773  let Inst{22}    = sz;
1774  let Inst{21-19} = 0b100;
1775  let Inst{18}    = op{2};
1776  let Inst{17}    = 0b0;
1777  let Inst{16}    = vg4;
1778  let Inst{15}    = 0b0;
1779  let Inst{14-13} = Rv;
1780  let Inst{12-10} = 0b111;
1781  let Inst{5}     = 0b0;
1782  let Inst{4-3}   = op{1-0};
1783  let Inst{2-0}   = imm3;
1784
1785  let Constraints = "$ZAdn = $_ZAdn";
1786}
1787
1788class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op,
1789                                      MatrixOperand matrix_ty,
1790                                      RegisterOperand vector_ty>
1791    : sme2_multivec_accum_add_sub<mnemonic, sz, 0b0, op, matrix_ty, vector_ty> {
1792  bits<4> Zm;
1793  let Inst{9-6} = Zm;
1794}
1795
1796
1797multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op,
1798                                           MatrixOperand matrix_ty,
1799                                           RegisterOperand vector_ty,
1800                                           ValueType vty,
1801                                           SDPatternOperator intrinsic> {
1802  def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
1803                                             SMEPseudo2Instr<NAME, 1>;
1804  def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
1805  (!cast<Instruction>(NAME) matrix_ty:$ZAdn,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
1806
1807  def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
1808  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
1809}
1810
1811class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op,
1812                                      MatrixOperand matrix_ty,
1813                                      RegisterOperand vector_ty>
1814    : sme2_multivec_accum_add_sub<mnemonic, sz, 0b1, op, matrix_ty, vector_ty> {
1815  bits<3> Zm;
1816  let Inst{9-7} = Zm;
1817  let Inst{6}   = 0b0;
1818}
1819
1820multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op,
1821                                           MatrixOperand matrix_ty,
1822                                           RegisterOperand vector_ty,
1823                                           ValueType vty,
1824                                           SDPatternOperator intrinsic> {
1825  def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
1826                                             SMEPseudo2Instr<NAME, 1>;
1827  def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
1828  (!cast<Instruction>(NAME) matrix_ty:$ZAdn,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
1829
1830  def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
1831  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
1832}
1833
1834//===----------------------------------------------------------------------===//
1835// SME2 Multi-vector - Multiple and Single SVE Destructive
1836// Two and Four registers
1837
1838class sme2_sve_destructive_vector_vg2_single<bits<2> sz, bits<7> op,
1839                                             RegisterOperand vector_ty,
1840                                             ZPRRegOp zpr_ty,
1841                                             string mnemonic>
1842    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
1843        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1844        "", []>, Sched<[]> {
1845  bits<4> Zm;
1846  bits<4> Zdn;
1847  let Inst{31-24} = 0b11000001;
1848  let Inst{23-22} = sz;
1849  let Inst{21-20} = 0b10;
1850  let Inst{19-16} = Zm;
1851  let Inst{15-11} = 0b10100;
1852  let Inst{10-5}  = op{6-1};
1853  let Inst{4-1}   = Zdn;
1854  let Inst{0}     = op{0};
1855
1856  let Constraints = "$Zdn = $_Zdn";
1857}
1858
1859multiclass sme2_fp_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
1860  def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
1861  def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
1862  def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
1863}
1864
1865multiclass sme2_int_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
1866  def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>;
1867  def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
1868  def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
1869  def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
1870}
1871
1872// SME2.1 fmax/fmin instructions.
1873multiclass sme2p1_bf_max_min_vector_vg2_single<string mnemonic, bits<7>op> {
1874  def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r,
1875                                                  ZPR4b16, mnemonic>;
1876}
1877
1878class sme2_sve_destructive_vector_vg4_single<bits<2> sz, bits<7> op,
1879                                             RegisterOperand vector_ty,
1880                                             ZPRRegOp zpr_ty,
1881                                             string mnemonic>
1882    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
1883        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1884        "", []>, Sched<[]> {
1885  bits<4> Zm;
1886  bits<3> Zdn;
1887  let Inst{31-24} = 0b11000001;
1888  let Inst{23-22} = sz;
1889  let Inst{21-20} = 0b10;
1890  let Inst{19-16} = Zm;
1891  let Inst{15-11} = 0b10101;
1892  let Inst{10-5}  = op{6-1};
1893  let Inst{4-2}   = Zdn;
1894  let Inst{1}     = 0b0;
1895  let Inst{0}     = op{0};
1896
1897  let Constraints = "$Zdn = $_Zdn";
1898}
1899
1900multiclass sme2_fp_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
1901  def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
1902  def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
1903  def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
1904}
1905
1906multiclass sme2_int_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
1907  def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>;
1908  def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
1909  def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
1910  def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
1911}
1912
1913// SME2.1 fmax/fmin instructions.
1914multiclass sme2p1_bf_max_min_vector_vg4_single<string mnemonic, bits<7>op> {
1915  def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r,
1916                                                  ZPR4b16, mnemonic>;
1917}
1918
1919class sme2_sve_destructive_vector_vg2_multi<bits<2> sz, bits<7> op,
1920                                            RegisterOperand vector_ty,
1921                                            string mnemonic>
1922    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
1923        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1924        "", []>, Sched<[]> {
1925  bits<4> Zm;
1926  bits<4> Zdn;
1927  let Inst{31-24} = 0b11000001;
1928  let Inst{23-22} = sz;
1929  let Inst{21}    = 0b1;
1930  let Inst{20-17} = Zm;
1931  let Inst{16-11} = 0b010110;
1932  let Inst{10-5}  = op{6-1};
1933  let Inst{4-1}   = Zdn;
1934  let Inst{0}     = op{0};
1935
1936  let Constraints = "$Zdn = $_Zdn";
1937}
1938
1939multiclass sme2_fp_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
1940  def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
1941  def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
1942  def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
1943}
1944
1945multiclass sme2_int_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
1946  def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>;
1947  def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
1948  def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
1949  def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
1950}
1951
1952// SME2.1 fmax/fmin instructions.
1953multiclass sme2p1_bf_max_min_vector_vg2_multi<string mnemonic, bits<7>op> {
1954  def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r,
1955                                                 mnemonic>;
1956}
1957
1958class sme2_sve_destructive_vector_vg4_multi<bits<2> sz, bits<7> op,
1959                                            RegisterOperand vector_ty,
1960                                            string mnemonic>
1961    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
1962        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1963        "", []>, Sched<[]> {
1964  bits<3> Zm;
1965  bits<3> Zdn;
1966  let Inst{31-24} = 0b11000001;
1967  let Inst{23-22} = sz;
1968  let Inst{21}    = 0b1;
1969  let Inst{20-18} = Zm;
1970  let Inst{17-11} = 0b0010111;
1971  let Inst{10-5}  = op{6-1};
1972  let Inst{4-2}   = Zdn;
1973  let Inst{1}     = 0b0;
1974  let Inst{0}     = op{0};
1975
1976  let Constraints = "$Zdn = $_Zdn";
1977}
1978
1979multiclass sme2_fp_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
1980  def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
1981  def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
1982  def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
1983}
1984
1985multiclass sme2_int_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
1986  def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>;
1987  def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
1988  def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
1989  def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
1990}
1991
1992// SME2.1 fmax/fmin instructions.
1993multiclass sme2p1_bf_max_min_vector_vg4_multi<string mnemonic, bits<7>op> {
1994  def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r,
1995                                                 mnemonic>;
1996}
1997
1998//===----------------------------------------------------------------------===//
1999// SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources
2000
2001class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty,
2002                                     RegisterOperand multi_vector_ty,
2003                                     string mnemonic, string vg_acronym="">
2004    : I<(outs MatrixOp32:$ZAda),
2005        (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3),
2006        mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3",
2007        "", []>, Sched<[]> {
2008  bits<4> Zm;
2009  bits<2> Rv;
2010  let Inst{31-24} = 0b11000001;
2011  let Inst{23-22} = op0;
2012  let Inst{21}    = 0b0;
2013  let Inst{20}    = !if(!eq(vg_acronym, ""), 0, 1);
2014  let Inst{19-16} = Zm;
2015  let Inst{14-13} = Rv;
2016  let Inst{12}    = 0b1;
2017  let Inst{4-3}   = op;
2018
2019  let Constraints = "$ZAda = $_ZAda";
2020}
2021
2022multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
2023  def _HtoS : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16,
2024                                          mnemonic>, SMEPseudo2Instr<NAME # _HtoS, 1> {
2025    bits<3> i3;
2026    bits<5> Zn;
2027    bits<3> imm;
2028    let Inst{15}    = i3{2};
2029    let Inst{11-10} = i3{1-0};
2030    let Inst{9-5}   = Zn;
2031    let Inst{2-0}   = imm;
2032  }
2033
2034  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2035
2036  def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>;
2037}
2038
2039class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op>
2040    : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZ_h_mul_r,
2041                                     mnemonic, "vgx2"> {
2042  bits<3> i3;
2043  bits<4> Zn;
2044  bits<2> imm;
2045  let Inst{15}    = 0b0;
2046  let Inst{11-10} = i3{2-1};
2047  let Inst{9-6}   = Zn;
2048  let Inst{5}     = 0b0;
2049  let Inst{2}     = i3{0};
2050  let Inst{1-0}   = imm;
2051}
2052
2053multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
2054  def _HtoS : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
2055
2056  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2057
2058  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
2059
2060  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
2061                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
2062}
2063
2064multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2065  def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
2066
2067  def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2068
2069  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
2070
2071  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
2072                 (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
2073}
2074
2075class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op>
2076    : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZZZ_h_mul_r,
2077                                      mnemonic, "vgx4"> {
2078  bits<3> i3;
2079  bits<3> Zn;
2080  bits<2> imm;
2081  let Inst{15}    = 0b1;
2082  let Inst{11-10} = i3{2-1};
2083  let Inst{9-7}   = Zn;
2084  let Inst{6-5}   = 0b00;
2085  let Inst{2}     = i3{0};
2086  let Inst{1-0}   = imm;
2087}
2088
2089multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
2090  def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
2091
2092  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2093
2094  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
2095
2096  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
2097                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
2098}
2099
2100multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2101  def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
2102
2103  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2104
2105  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
2106
2107  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
2108                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
2109}
2110
2111class sme2_mla_long_array<bits<2>op0, bits<2> op,
2112                          MatrixOperand matrix_ty,
2113                          Operand index_ty,
2114                          RegisterOperand first_vector_ty,
2115                          RegisterOperand second_vector_ty,
2116                          string mnemonic, string vg_acronym="">
2117   : I<(outs matrix_ty:$ZAda),
2118       (ins  matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv,
2119       index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm),
2120       mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm",
2121       "", []> , Sched<[]> {
2122  bits<2> Rv;
2123  let Inst{31-24} = 0b11000001;
2124  let Inst{23-22} = op0;
2125  let Inst{21}    = 0b1;
2126  let Inst{15}    = 0b0;
2127  let Inst{14-13} = Rv;
2128  let Inst{12-11} = 0b01;
2129  let Inst{10}    = !if(!eq(vg_acronym, ""), 1, 0);
2130  let Inst{4-3}   = op;
2131
2132  let Constraints = "$ZAda = $_ZAda";
2133}
2134
2135multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
2136  def _HtoS : sme2_mla_long_array<op0, op, MatrixOp32, uimm3s2range, ZPR16, ZPR4b16,
2137                               mnemonic> , SMEPseudo2Instr<NAME # _HtoS, 1>{
2138    bits<4> Zm;
2139    bits<5> Zn;
2140    bits<3> imm;
2141    let Inst{20}    = 0b0;
2142    let Inst{19-16} = Zm;
2143    let Inst{9-5}   = Zn;
2144    let Inst{2-0}   = imm;
2145  }
2146
2147  def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>;
2148
2149  def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
2150}
2151
2152class sme2_mla_long_array_single_16b<string mnemonic>
2153    : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8,  mnemonic> {
2154    bits<4> Zm;
2155    bits<5> Zn;
2156    bits<3> imm;
2157    let Inst{20}    = 0b1;
2158    let Inst{19-16} = Zm;
2159    let Inst{9-5}   = Zn;
2160    let Inst{2-0}   = imm;
2161    let Uses = [FPMR, FPCR];
2162}
2163
2164multiclass sme2_fp8_fmlal_single_za16<string mnemonic, SDPatternOperator intrinsic> {
2165  def NAME : sme2_mla_long_array_single_16b<mnemonic>, SMEPseudo2Instr<NAME, 1>;
2166
2167  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm3s2range, ZPR8, ZPR4b8, SMEMatrixArray>;
2168
2169  def: SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm3s2range, ZPR4b8, nxv16i8, tileslicerange3s2>;
2170}
2171
2172class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
2173                                      MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
2174                                      ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
2175    : sme2_mla_long_array<op0, op, matrix_ty, uimm2s2range, multi_vector_ty, zpr_ty,
2176                          mnemonic, vg_acronym> {
2177  bits<4> Zm;
2178  bits<5> Zn;
2179  bits<2> imm;
2180  let Inst{20}    = vg4;
2181  let Inst{19-16} = Zm;
2182  let Inst{9-5}   = Zn;
2183  let Inst{2}     = o2;
2184  let Inst{1-0}   = imm;
2185}
2186
2187multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2188                                             RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
2189                                             ValueType zpr_ty, SDPatternOperator intrinsic, list<Register> uses=[]> {
2190  def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty,  multi_vector_ty,
2191                                           vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> {
2192    let Uses = uses;
2193  }
2194
2195  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty,
2196                                                        vector_ty, SMEMatrixArray>;
2197
2198  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
2199                                           tileslicerange2s2>;
2200
2201  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2202                 (!cast<Instruction>(NAME) matrix_ty:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2203                  uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
2204}
2205
2206multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2207  def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic,
2208                                             "vgx2">, SMEPseudo2Instr<NAME # _HtoS, 1>;
2209
2210  def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
2211
2212  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
2213
2214  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2215                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
2216}
2217
2218multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2219                                             RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
2220                                             ValueType zpr_ty, SDPatternOperator intrinsic, list<Register> uses=[]> {
2221  def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
2222                                             vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
2223    let Uses = uses;
2224  }
2225
2226  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
2227                                                      SMEMatrixArray>;
2228
2229  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
2230                                           tileslicerange2s2>;
2231
2232  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2233                 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2234                  uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
2235}
2236
2237multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2238  def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16,  mnemonic,
2239                                           "vgx4">, SMEPseudo2Instr<NAME # _HtoS, 1>;
2240
2241  def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
2242
2243  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
2244
2245  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2246                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
2247}
2248
2249class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<3> op,
2250                                    MatrixOperand matrix_ty, RegisterOperand multi_vector_ty>
2251   : sme2_mla_long_array<op0, op{1-0},  matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
2252                        mnemonic, "vgx2"> {
2253  bits<4> Zm;
2254  bits<4> Zn;
2255  bits<2> imm;
2256  let Inst{20-17} = Zm;
2257  let Inst{16}    = 0b0;
2258  let Inst{9-6}   = Zn;
2259  let Inst{5}     = op{2};  // fp8
2260  let Inst{2}     = 0b0;
2261  let Inst{1-0}   = imm;
2262}
2263
2264multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2265                                            RegisterOperand multi_vector_ty, ValueType zpr_ty,
2266                                            SDPatternOperator intrinsic, list<Register> uses=[]> {
2267  def NAME : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
2268                                           SMEPseudo2Instr<NAME, 1> {
2269    let Uses = uses;
2270  }
2271
2272  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
2273
2274  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
2275
2276  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2277                  (!cast<Instruction>(NAME) matrix_ty:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2278                  uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
2279}
2280
2281multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2282  def _HtoS : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZ_h_mul_r>,
2283                                         SMEPseudo2Instr<NAME # _HtoS, 1>;
2284
2285  def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
2286
2287  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
2288
2289  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
2290                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
2291}
2292
2293class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<3> op,
2294                                    MatrixOperand matrix_ty,
2295                                    RegisterOperand multi_vector_ty>
2296   : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
2297                         mnemonic, "vgx4"> {
2298  bits<3> Zm;
2299  bits<3> Zn;
2300  bits<2> imm;
2301  let Inst{20-18} = Zm;
2302  let Inst{17}    = 0b0;
2303  let Inst{16}    = 0b1;
2304  let Inst{9-7}   = Zn;
2305  let Inst{6}     = 0b0;
2306  let Inst{5}     = op{2};  //fp8
2307  let Inst{2}     = 0b0;
2308  let Inst{1-0}   = imm;
2309}
2310
2311multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2312                                            RegisterOperand multi_vector_ty, ValueType zpr_ty,
2313                                            SDPatternOperator intrinsic, list<Register> uses=[]> {
2314  def NAME : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
2315                                           SMEPseudo2Instr<NAME, 1> {
2316    let Uses = uses;
2317  }
2318
2319  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
2320
2321  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
2322
2323  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2324                 (!cast<Instruction>(NAME) matrix_ty:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2325                  uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
2326}
2327
2328multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2329  def _HtoS : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZZZ_h_mul_r>,
2330                                            SMEPseudo2Instr<NAME # _HtoS, 1>;
2331
2332  def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
2333
2334  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
2335
2336  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
2337                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
2338}
2339
2340//===----------------------------------------------------------------------===//
2341class sme2_frint_cvt_vg2_multi<bits<2>sz, bits<5>op, RegisterOperand first_ty,
2342                               RegisterOperand second_ty, string mnemonic>
2343    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2344        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2345  bits<4> Zn;
2346  bits<4> Zd;
2347  let Inst{31-24} = 0b11000001;
2348  let Inst{23-22} = sz;
2349  let Inst{21-20} = 0b10;
2350  let Inst{19-16} = op{4-1};
2351  let Inst{15-10} = 0b111000;
2352  let Inst{9-6}   = Zn;
2353  let Inst{5}     = op{0};
2354  let Inst{4-1}   = Zd;
2355  let Inst{0}     = 0b0;
2356}
2357
2358// SME2 multi-vec FP to int convert two registers
2359// SME2 multi-vec int to FP two registers
2360multiclass sme2_fp_cvt_vg2_multi<string mnemonic, bits<5> op> {
2361  def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
2362}
2363
2364// SME2 multi-vec FRINT two registers
2365multiclass sme2_frint_vector_vg2_multi<string mnemonic, bits<5> op> {
2366  def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
2367}
2368
2369class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_ty,
2370                                   RegisterOperand second_ty, string mnemonic>
2371    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2372        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2373  bits<3> Zn;
2374  bits<3> Zd;
2375  let Inst{31-24} = 0b11000001;
2376  let Inst{23-22} = sz;
2377  let Inst{21-20} = 0b11;
2378  let Inst{19-16} = op{6-3};
2379  let Inst{15-10} = 0b111000;
2380  let Inst{9-7}   = Zn;
2381  let Inst{6-5}   = op{2-1};
2382  let Inst{4-2}   = Zd;
2383  let Inst{1}     = op{0};
2384  let Inst{0}     = 0b0;
2385}
2386
2387// SME2 multi-vec FP to int convert four registers
2388// SME2 multi-vec int to FP four registers
2389multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> {
2390  def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>;
2391}
2392
2393// SME2 multi-vec quadwords ZIP four registers
2394multiclass sme2_zip_vector_vg4<string mnemonic, bits<7> op> {
2395  def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r,
2396                                        mnemonic>;
2397  def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r,
2398                                        mnemonic>;
2399  def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
2400                                        mnemonic>;
2401  def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r,
2402                                         mnemonic>;
2403}
2404
2405// SME2 multi-vec quadwords ZIP four registers
2406multiclass sme2_zip_vector_vg4_Q<string mnemonic, bits<7> op> {
2407  def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r,
2408                                         mnemonic>;
2409}
2410
2411// SME2 multi-vec FRINT four registers
2412multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
2413  def _S :  sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
2414                                         mnemonic>;
2415}
2416
2417class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
2418                           RegisterOperand first_ty, RegisterOperand second_ty>
2419    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2420        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2421  bits<4> Zn;
2422  bits<5> Zd;
2423  let Inst{31-23} = 0b110000010;
2424  let Inst{22}    = op{4};
2425  let Inst{21-19} = 0b100;
2426  let Inst{18-16} = op{3-1};
2427  let Inst{15-10} = 0b111000;
2428  let Inst{9-6}   = Zn;
2429  let Inst{5}     = op{0};
2430  let Inst{4-0}   = Zd;
2431}
2432
2433// SME2 multi-vec FP down convert two registers
2434// SME2 multi-vec int down convert two registers
2435multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
2436                               ValueType in_vt, SDPatternOperator intrinsic> {
2437  def NAME :  sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
2438  def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
2439}
2440
2441// SME2 multi-vec FP8 down convert two registers
2442multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op, ValueType in_vt, SDPatternOperator intrinsic> {
2443  def NAME :  sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>{
2444    let mayLoad = 1;
2445    let mayStore = 0;
2446    let Uses = [FPMR, FPCR];
2447  }
2448  def : Pat<(nxv16i8 (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
2449            (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
2450}
2451
2452class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
2453                           RegisterOperand second_ty, string mnemonic>
2454    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2455        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2456  bits<5> Zn;
2457  bits<4> Zd;
2458  let Inst{31-24} = 0b11000001;
2459  let Inst{23-22} = sz;
2460  let Inst{21-19} = 0b100;
2461  let Inst{18-16} = op;
2462  let Inst{15-10} = 0b111000;
2463  let Inst{9-5}   = Zn;
2464  let Inst{4-1}   = Zd;
2465  let Inst{0}     = u;
2466}
2467
2468// SME2 multi-vec unpack two registers
2469multiclass sme2_unpk_vector_vg2<string mnemonic, bit u> {
2470  def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>;
2471  def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>;
2472  def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>;
2473}
2474
2475// SME2.1 multi-vec convert two registers
2476multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
2477  def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
2478}
2479
2480// SME2 multi-vec FP8 up convert two registers
2481multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
2482  def NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>{
2483    let Uses = [FPMR, FPCR];
2484  }
2485}
2486
2487
2488class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2,  RegisterOperand first_ty,
2489                          RegisterOperand second_ty, string mnemonic>
2490    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2491        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2492  bits<3> Zn;
2493  bits<5> Zd;
2494  let Inst{31-24} = 0b11000001;
2495  let Inst{23}    = sz;
2496  let Inst{22}    = op{2};
2497  let Inst{21-20} = 0b11;
2498  let Inst{19-16} = op2;
2499  let Inst{15-10} = 0b111000;
2500  let Inst{9-7}   = Zn;
2501  let Inst{6-5}   = op{1-0};
2502  let Inst{4-0}   = Zd;
2503}
2504
2505// SME2 multi-vec int down convert four registers
2506multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
2507  def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2508  def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
2509
2510  def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
2511  def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
2512}
2513
2514//SME2 multi-vec FP8 down convert four registers
2515multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N, SDPatternOperator intrinsic> {
2516 def NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic> {
2517    let mayLoad = 1;
2518    let mayStore = 0;
2519    let Uses = [FPMR, FPCR];
2520 }
2521 def : SME2_Cvt_VG4_Pat<NAME, intrinsic, nxv16i8, nxv4f32>;
2522}
2523
2524class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
2525                           RegisterOperand second_ty, string mnemonic>
2526    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2527        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2528  bits<4> Zn;
2529  bits<3> Zd;
2530  let Inst{31-24} = 0b11000001;
2531  let Inst{23-22} = sz;
2532  let Inst{21-10} = 0b110101111000;
2533  let Inst{9-6}   = Zn;
2534  let Inst{5}     = 0b0;
2535  let Inst{4-2}   = Zd;
2536  let Inst{1}     = 0b0;
2537  let Inst{0}     = u;
2538}
2539
2540// SME2 multi-vec UNPK four registers
2541multiclass sme2_unpk_vector_vg4<string mnemonic, bit u> {
2542  def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>;
2543  def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>;
2544  def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>;
2545}
2546
2547//===----------------------------------------------------------------------===//
2548// SME2 multi-vec CLAMP registers
2549
2550class sme2_clamp_vector_vg24_multi<bits<2> sz, bits<3> op1, bit u,
2551                                   RegisterOperand multi_vector_ty,
2552                                   ZPRRegOp vector_ty, string mnemonic>
2553    : I<(outs multi_vector_ty:$Zd),
2554        (ins  multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm),
2555        mnemonic, "\t$Zd, $Zn, $Zm",
2556        "", []>, Sched<[]>{
2557  bits<5> Zm;
2558  bits<5> Zn;
2559  let Inst{31-24} = 0b11000001;
2560  let Inst{23-22} = sz;
2561  let Inst{21}    = 0b1;
2562  let Inst{20-16} = Zm;
2563  let Inst{15-13} = 0b110;
2564  let Inst{12-10} = op1;
2565  let Inst{9-5}   = Zn;
2566  let Inst{0}     = u;
2567
2568  let Constraints = "$Zd = $_Zd";
2569}
2570
2571class sme2_clamp_vector_vg2_multi<bits<2> sz, bits<3> op1, bit u,
2572                                  RegisterOperand multi_vector_ty,
2573                                  ZPRRegOp vector_ty, string mnemonic>
2574    : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
2575                                   mnemonic>{
2576  bits<4> Zd;
2577  let Inst{4-1} = Zd;
2578}
2579
2580multiclass sme2_fp_clamp_vector_vg2_multi<string mnemonic>{
2581  def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>;
2582  def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>;
2583  def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>;
2584}
2585
2586multiclass sme2_int_clamp_vector_vg2_multi<string mnemonic, bit u>{
2587  def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>;
2588  def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>;
2589  def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>;
2590  def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>;
2591}
2592
2593// SME2.1 multi-vec FCLAMP two registers
2594multiclass sme2p1_bfclamp_vector_vg2_multi<string mnemonic> {
2595  def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16,
2596                                           mnemonic>;
2597}
2598
2599class sme2_clamp_vector_vg4_multi<bits<2> sz, bits<3> op1, bit u,
2600                                  RegisterOperand multi_vector_ty,
2601                                  ZPRRegOp vector_ty, string mnemonic>
2602    : sme2_clamp_vector_vg24_multi<sz, op1, u,  multi_vector_ty, vector_ty,
2603                                   mnemonic>{
2604  bits<3> Zd;
2605  let Inst{4-2} = Zd;
2606  let Inst{1}   = 0b0;
2607}
2608
2609multiclass sme2_fp_clamp_vector_vg4_multi<string mnemonic>{
2610  def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>;
2611  def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>;
2612  def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>;
2613}
2614
2615multiclass sme2_int_clamp_vector_vg4_multi<string mnemonic, bit u>{
2616  def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>;
2617  def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>;
2618  def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>;
2619  def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>;
2620}
2621
2622// SME2.1 multi-vec FCLAMP four registers
2623multiclass sme2p1_bfclamp_vector_vg4_multi<string mnemonic> {
2624  def _H : sme2_clamp_vector_vg4_multi<0b00, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16,
2625                                       mnemonic>;
2626}
2627
2628// SME2 multi-vec ZIP two registers
2629class sme2_zip_vector_vg2<bits<2> sz, bit q, bit u,
2630                         RegisterOperand multi_vector_ty,
2631                         ZPRRegOp vector_ty, string mnemonic>
2632    : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
2633        mnemonic, "\t$Zd, $Zn, $Zm",
2634        "", []>, Sched<[]>{
2635  bits<4> Zd;
2636  bits<5> Zm;
2637  bits<5> Zn;
2638  let Inst{31-24} = 0b11000001;
2639  let Inst{23-22} = sz;
2640  let Inst{21}    = 0b1;
2641  let Inst{20-16} = Zm;
2642  let Inst{15-11} = 0b11010;
2643  let Inst{10}    = q;
2644  let Inst{9-5}   = Zn;
2645  let Inst{4-1}   = Zd;
2646  let Inst{0}     = u;
2647}
2648
2649multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
2650  def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>;
2651  def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>;
2652  def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>;
2653  def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>;
2654  def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>;
2655}
2656
2657//===----------------------------------------------------------------------===//
2658// SME2 Dot Products and MLA
2659class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
2660                                     RegisterOperand multi_vector_ty,
2661                                     ZPRRegOp vector_ty, Operand index_ty,
2662                                     string mnemonic>
2663    : I<(outs matrix_ty:$ZAda),
2664        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2665         multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
2666         mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i",
2667        "", []>, Sched<[]> {
2668  bits<4> Zm;
2669  bits<2> Rv;
2670  bits<4> Zn;
2671  bits<3> imm3;
2672  let Inst{31-24} = 0b11000001;
2673  let Inst{23-22} = sz;
2674  let Inst{21-20} = 0b01;
2675  let Inst{19-16} = Zm;
2676  let Inst{15}    = 0b0;
2677  let Inst{14-13} = Rv;
2678  let Inst{12-10} = op{5-3};
2679  let Inst{9-6}   = Zn;
2680  let Inst{5-3}   = op{2-0};
2681  let Inst{2-0}   = imm3;
2682
2683  let Constraints = "$ZAda = $_ZAda";
2684}
2685
2686// SME2 multi-vec ternary indexed two registers 32-bit
2687multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<4> op,
2688                                              RegisterOperand multi_vector_ty,
2689                                              ZPRRegOp vector_ty, ValueType vt,
2690                                              SDPatternOperator intrinsic> {
2691  def NAME : sme2_multi_vec_array_vg2_index<sz, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty,
2692                                             VectorIndexS32b_timm,  mnemonic>, SMEPseudo2Instr<NAME, 1> {
2693    bits<2> i;
2694    let Inst{11-10} = i;
2695  }
2696  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
2697
2698  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
2699
2700  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2701        (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2702        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
2703}
2704
2705// SME2.1 multi-vec ternary indexed two registers 16-bit
2706multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
2707                                                RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
2708                                                ValueType vt, SDPatternOperator intrinsic> {
2709  def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
2710                                            multi_vector_ty, vector_ty,
2711                                            VectorIndexH, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2712    bits<3> i;
2713    let Inst{11-10} = i{2-1};
2714    let Inst{3}     = i{0};
2715  }
2716
2717  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexH32b, SMEMatrixArray>;
2718
2719  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexH32b_timm, tileslice16>;
2720
2721  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2722        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2723        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>;
2724}
2725
2726// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
2727multiclass sme2p1_multi_vec_array_vg2_index_f8f16<string mnemonic, bits<2> sz, bits<3> op,
2728                                                  RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
2729  def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
2730                                            multi_vector_ty, zpr_ty,
2731                                            VectorIndexH, mnemonic> {
2732    bits<3> i;
2733    let Inst{11-10} = i{2-1};
2734    let Inst{3}     = i{0};
2735  }
2736
2737  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2738        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2739        multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
2740}
2741
2742// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
2743// two registers
2744class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
2745   : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
2746                                    ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {
2747
2748  bits<2> i;
2749  let Inst{10} = i{1};
2750  let Inst{3}  = i{0};
2751  let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
2752  let Uses = [FPMR, FPCR];
2753}
2754
2755// SME2 multi-vec ternary indexed two registers 64-bit
2756
2757class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
2758                                         RegisterOperand multi_vector_ty,
2759                                         ZPRRegOp vector_ty,
2760                                         string mnemonic>
2761    : I<(outs MatrixOp64:$ZAda),
2762        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2763         multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
2764        mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1",
2765        "", []>, Sched<[]> {
2766  bits<4> Zm;
2767  bits<2> Rv;
2768  bits<1> i1;
2769  bits<4> Zn;
2770  bits<3> imm3;
2771  let Inst{31-20} = 0b110000011101;
2772  let Inst{19-16} = Zm;
2773  let Inst{15}    = 0b0;
2774  let Inst{14-13} = Rv;
2775  let Inst{12-11} = 0b00;
2776  let Inst{10}    = i1;
2777  let Inst{9-6}   = Zn;
2778  let Inst{5}     = 0b0;
2779  let Inst{4-3}   = op;
2780  let Inst{2-0}   = imm3;
2781
2782  let Constraints = "$ZAda = $_ZAda";
2783}
2784
2785multiclass sme2_multi_vec_array_vg2_index_64b<string mnemonic, bits<2> op,
2786                                              RegisterOperand multi_vector_ty,
2787                                              ZPRRegOp vector_ty, ValueType vt,
2788                                              SDPatternOperator intrinsic> {
2789  def NAME : sme2_multi_vec_array_vg2_index_64b<op, multi_vector_ty, vector_ty,
2790                                                mnemonic>, SMEPseudo2Instr<NAME, 1>;
2791
2792  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
2793
2794  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexD32b_timm, tileslice16>;
2795
2796  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
2797        (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2798        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
2799}
2800
2801class sme2_multi_vec_array_vg4_index<bit sz, bits<7> op, MatrixOperand matrix_ty,
2802                                     RegisterOperand multi_vector_ty,
2803                                     ZPRRegOp vector_ty, Operand index_ty,
2804                                     string mnemonic>
2805    : I<(outs matrix_ty:$ZAda),
2806        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2807         multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
2808         mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i",
2809        "", []>, Sched<[]> {
2810  bits<4> Zm;
2811  bits<2> Rv;
2812  bits<3> Zn;
2813  bits<3> imm3;
2814  let Inst{31-23} = 0b110000010;
2815  let Inst{22}    = sz;
2816  let Inst{21-20} = 0b01;
2817  let Inst{19-16} = Zm;
2818  let Inst{15}    = 0b1;
2819  let Inst{14-13} = Rv;
2820  let Inst{12-10} = op{6-4};
2821  let Inst{9-7}   = Zn;
2822  let Inst{6-3}   = op{3-0};
2823  let Inst{2-0}   = imm3;
2824
2825  let Constraints = "$ZAda = $_ZAda";
2826}
2827
2828// SME2 multi-vec ternary indexed four registers 32-bit
2829multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op,
2830                                              RegisterOperand multi_vector_ty,
2831                                              ZPRRegOp vector_ty, ValueType vt,
2832                                              SDPatternOperator intrinsic> {
2833  def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32,  multi_vector_ty,
2834                                            vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2835   bits<2> i;
2836   let Inst{11-10} = i;
2837  }
2838
2839  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
2840
2841  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
2842
2843  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2844        (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2845        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
2846}
2847
2848// SME2.1 multi-vec ternary indexed four registers 16-bit (FP8)
2849multiclass sme2p1_multi_vec_array_vg4_index_f8f16<string mnemonic, bits<3> op,
2850                                                  RegisterOperand multi_vector_ty,
2851                                                  ZPRRegOp zpr_ty> {
2852  def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16,
2853                                            multi_vector_ty, zpr_ty,
2854                                            VectorIndexH, mnemonic>{
2855    bits<3> i;
2856    let Inst{11-10} = i{2-1};
2857    let Inst{3}     = i{0};
2858  }
2859
2860  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2861        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2862        sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
2863}
2864
2865// SME2.1 multi-vec ternary indexed four registers 16-bit
2866multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<3> op,
2867                                                RegisterOperand multi_vector_ty,
2868                                                ZPRRegOp vector_ty, ValueType vt,
2869                                                SDPatternOperator intrinsic> {
2870  def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16,
2871                                            multi_vector_ty, vector_ty,
2872                                            VectorIndexH, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2873    bits<3> i;
2874    let Inst{11-10} = i{2-1};
2875    let Inst{3}     = i{0};
2876  }
2877
2878  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexH32b_timm, SMEMatrixArray>;
2879
2880  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexH32b_timm, tileslice16>;
2881
2882  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2883        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2884        sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>;
2885}
2886
2887// SME2 multi-vec ternary indexed four registers 64-bit
2888class sme2_multi_vec_array_vg4_index_64b<bits<3> op,
2889                                         RegisterOperand multi_vector_ty,
2890                                         ZPRRegOp vector_ty,
2891                                         string mnemonic>
2892    : I<(outs MatrixOp64:$ZAda),
2893        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2894         multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
2895        mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1",
2896        "", []>, Sched<[]> {
2897  bits<4> Zm;
2898  bits<2> Rv;
2899  bits<1> i1;
2900  bits<3> Zn;
2901  bits<3> imm3;
2902  let Inst{31-20} = 0b110000011101;
2903  let Inst{19-16} = Zm;
2904  let Inst{15}    = 0b1;
2905  let Inst{14-13} = Rv;
2906  let Inst{12}    = 0b0;
2907  let Inst{11}    = op{2};
2908  let Inst{10}    = i1;
2909  let Inst{9-7}   = Zn;
2910  let Inst{6-5}   = 0b00;
2911  let Inst{4-3}   = op{1-0};
2912  let Inst{2-0}   = imm3;
2913
2914  let Constraints = "$ZAda = $_ZAda";
2915}
2916
2917multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
2918                                              RegisterOperand multi_vector_ty,
2919                                              ZPRRegOp vector_ty, ValueType vty,
2920                                              SDPatternOperator intrinsic> {
2921  def NAME : sme2_multi_vec_array_vg4_index_64b<op, multi_vector_ty, vector_ty,
2922                                                mnemonic>, SMEPseudo2Instr<NAME, 1>;
2923
2924  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
2925
2926  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vty, VectorIndexD32b_timm, tileslice16>;
2927
2928  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
2929        (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2930        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
2931}
2932
2933// FMLAL (multiple and indexed vector, FP8 to FP16)
2934class sme2_fp8_fmlal_vg24_index_za16<bits<2> sz, bit vg4, bits<3> op,
2935                                          RegisterOperand multi_vector_ty, string mnemonic>
2936    : I<(outs MatrixOp16:$ZAda),
2937        (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
2938         multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2939         mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2940         "", []>, Sched<[]> {
2941  bits<4> Zm;
2942  bits<2> Rv;
2943  bits<4> i;
2944  bits<2> imm2;
2945  let Inst{31-24} = 0b11000001;
2946  let Inst{23-22} = sz;
2947  let Inst{21-20} = 0b01;
2948  let Inst{19-16} = Zm;
2949  let Inst{15}    = vg4;
2950  let Inst{14-13} = Rv;
2951  let Inst{12}    = op{2};
2952  let Inst{11-10} = i{3-2};
2953  let Inst{5-4}   = op{1-0};
2954  let Inst{3-2}   = i{1-0};
2955  let Inst{1-0}   = imm2;
2956
2957  let Uses = [FPMR, FPCR];
2958  let Constraints = "$ZAda = $_ZAda";
2959}
2960
2961multiclass sme2_fp8_fmlal_index_za16_vgx2<string mnemonic, SDPatternOperator intrinsic> {
2962  def NAME : sme2_fp8_fmlal_vg24_index_za16<0b10, 0b0, 0b111, ZZ_b_mul_r, mnemonic>,  SMEPseudo2Instr<NAME, 1> {
2963    bits<4> Zn;
2964    let Inst{9-6} = Zn;
2965  }
2966  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s2range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2967
2968  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm2s2range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s2>;
2969
2970  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2971                  (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
2972                                            ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
2973}
2974
2975multiclass sme2_fp8_fmlal_index_za16_vgx4<string mnemonic, SDPatternOperator intrinsic> {
2976  def NAME: sme2_fp8_fmlal_vg24_index_za16<0b10, 0b1, 0b110, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2977    bits<3> Zn;
2978    let Inst{9-7} = Zn;
2979    let Inst{6}   = 0b0;
2980  }
2981
2982  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s2range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2983
2984  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm2s2range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s2>;
2985
2986  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2987                 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm,
2988                                           ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
2989}
2990
2991//===----------------------------------------------------------------------===//
2992// FMLAL (single and indexed vector, FP8 to FP16)
2993class sme2_fp8_fmlal_index_za16<string mnemonic, bits<2> sz,bits<2> op>
2994    : I<(outs MatrixOp16:$ZAda),
2995        (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2996        mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2997        "", []>, Sched<[]> {
2998  bits<4> Zm;
2999  bits<2> Rv;
3000  bits<4> i;
3001  bits<5> Zn;
3002  bits<3> imm3;
3003  let Inst{31-24} = 0b11000001;
3004  let Inst{23-22} = sz;
3005  let Inst{21-20} = 0b00;
3006  let Inst{19-16} = Zm;
3007  let Inst{15}    = i{3};
3008  let Inst{14-13} = Rv;
3009  let Inst{12}    = op{1};
3010  let Inst{11-10} = i{2-1};
3011  let Inst{9-5}   = Zn;
3012  let Inst{4}     = op{0};
3013  let Inst{3}     = i{0};
3014  let Inst{2-0}   = imm3;
3015
3016  let Uses = [FPMR, FPCR];
3017  let Constraints = "$ZAda = $_ZAda";
3018}
3019
3020multiclass sme2_fp8_fmlal_index_za16<string mnemonic, SDPatternOperator intrinsic> {
3021  def NAME : sme2_fp8_fmlal_index_za16<mnemonic, 0b11, 0b00>, SMEPseudo2Instr<NAME, 1>;
3022
3023  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm3s2range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
3024
3025  def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm3s2range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange3s2>;
3026}
3027
3028// SME2 multi-vec indexed long long MLA one source 32-bit
3029class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
3030    : I<(outs MatrixOp32:$ZAda),
3031        (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
3032        mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
3033        "", []>, Sched<[]> {
3034  bits<4> Zm;
3035  bits<2> Rv;
3036  bits<4> i;
3037  bits<5> Zn;
3038  bits<2> imm2;
3039  let Inst{31-24} = 0b11000001;
3040  let Inst{23-22} = sz;
3041  let Inst{21-20} = 0b00;
3042  let Inst{19-16} = Zm;
3043  let Inst{15}    = i{3};
3044  let Inst{14-13} = Rv;
3045  let Inst{12-10} = i{2-0};
3046  let Inst{9-5}   = Zn;
3047  let Inst{4-2}   = op;
3048  let Inst{1-0}   = imm2;
3049
3050  let Constraints = "$ZAda = $_ZAda";
3051}
3052
3053multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic, list<Register> uses=[]> {
3054  def NAME : sme2_mla_ll_array_index_32b<mnemonic, sz, op>, SMEPseudo2Instr<NAME, 1> {
3055    let Uses = uses;
3056  }
3057
3058  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
3059
3060  def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s4>;
3061}
3062
3063// SME2 multi-vec indexed long long MLA one source 64-bit
3064
3065class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op>
3066    : I<(outs MatrixOp64:$ZAda),
3067        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
3068        mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
3069        "", []>, Sched<[]> {
3070  bits<4> Zm;
3071  bits<2> Rv;
3072  bits<3> i;
3073  bits<5> Zn;
3074  bits<2> imm2;
3075  let Inst{31-20} = 0b110000011000;
3076  let Inst{19-16} = Zm;
3077  let Inst{15}    = i{2};
3078  let Inst{14-13} = Rv;
3079  let Inst{12}    = 0b0;
3080  let Inst{11-10} = i{1-0};
3081  let Inst{9-5}   = Zn;
3082  let Inst{4-3}   = op;
3083  let Inst{2}     = 0b0;
3084  let Inst{1-0}   = imm2;
3085
3086  let Constraints = "$ZAda = $_ZAda";
3087}
3088
3089multiclass sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
3090  def NAME : sme2_mla_ll_array_index_64b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>;
3091
3092  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
3093
3094  def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s4>;
3095}
3096
3097class sme2_mla_ll_array_vg24_index_32b<bits<2> sz, bit vg4, bits<3> op,
3098                                       RegisterOperand vector_ty,
3099                                       string mnemonic>
3100    : I<(outs MatrixOp32:$ZAda),
3101        (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3102             vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
3103        mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
3104        "", []>, Sched<[]> {
3105  bits<4> Zm;
3106  bits<2> Rv;
3107  bits<4> i;
3108  bit     imm;
3109  let Inst{31-24} = 0b11000001;
3110  let Inst{23-22} = sz;
3111  let Inst{21-20} = 0b01;
3112  let Inst{19-16} = Zm;
3113  let Inst{15}    = vg4;
3114  let Inst{14-13} = Rv;
3115  let Inst{12}    = 0b0;
3116  let Inst{11-10} = i{3-2};
3117  let Inst{5-3}   = op;
3118  let Inst{2-1}   = i{1-0};
3119  let Inst{0}     = imm;
3120
3121  let Constraints = "$ZAda = $_ZAda";
3122}
3123
3124//SME2 multi-vec indexed long long MLA two sources 32-bit
3125
3126multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic, list<Register> uses=[]> {
3127  def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3128   bits<4> Zn;
3129   let Inst{9-6} = Zn;
3130   let Uses = uses;
3131  }
3132
3133  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
3134
3135  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
3136
3137  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
3138                 (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
3139}
3140
3141// SME2 multi-vec indexed long long MLA four sources 32-bit
3142
3143multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<2> sz, bits<4> op, SDPatternOperator intrinsic, list<Register> uses=[]> {
3144  def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b1, op{2-0}, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3145   bits<3> Zn;
3146   let Inst{9-7} = Zn;
3147   let Inst{6}   = op{3};
3148   let Uses = uses;
3149  }
3150
3151  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
3152
3153  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
3154
3155  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
3156                 (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
3157}
3158class sme2_mla_ll_array_vg24_index_64b<bit vg4,  bits<2> op,
3159                                       RegisterOperand vector_ty,
3160                                       string mnemonic>
3161    : I<(outs MatrixOp64:$ZAda),
3162        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3163             vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
3164        mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
3165        "", []>, Sched<[]> {
3166  bits<4> Zm;
3167  bits<2> Rv;
3168  bits<3> i;
3169  bit     imm;
3170  let Inst{31-20} = 0b110000011001;
3171  let Inst{19-16} = Zm;
3172  let Inst{15}    = vg4;
3173  let Inst{14-13} = Rv;
3174  let Inst{12-11} = 0b00;
3175  let Inst{10}    = i{2};
3176  let Inst{5}     = 0b0;
3177  let Inst{4-3}   = op;
3178  let Inst{2-1}   = i{1-0};
3179  let Inst{0}     = imm;
3180
3181  let Constraints = "$ZAda = $_ZAda";
3182}
3183
3184// SME2 multi-vec indexed long long MLA two sources 64-bit
3185
3186multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
3187  def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3188    bits<4> Zn;
3189    let Inst{9-6} = Zn;
3190  }
3191
3192  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
3193
3194  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
3195
3196  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
3197                 (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
3198}
3199
3200// SME2 multi-vec indexed long long MLA four sources 64-bit
3201
3202multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
3203  def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r,  mnemonic>, SMEPseudo2Instr<NAME, 1> {
3204    bits<3> Zn;
3205    let Inst{9-7} = Zn;
3206    let Inst{6}   = 0b0;
3207  }
3208
3209  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
3210
3211  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
3212
3213  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
3214                 (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
3215}
3216
3217
3218//SME2 multiple and single vector long long FMA one source
3219
3220class sme2_mla_ll_array_single<string mnemonic, bits<5> op,
3221                               MatrixOperand matrix_ty, ZPRRegOp vector_ty,
3222                               ZPRRegOp zpr_ty>
3223    : I<(outs matrix_ty:$ZAda),
3224        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm,
3225             vector_ty:$Zn, zpr_ty:$Zm),
3226        mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm",
3227        "", []>, Sched<[]> {
3228  bits<4> Zm;
3229  bits<2> Rv;
3230  bits<5> Zn;
3231  bits<2> imm;
3232  let Inst{31-23} = 0b110000010;
3233  let Inst{22}    = op{4}; //sz
3234  let Inst{21}    = 0b1;
3235  let Inst{20}    = op{3}; //fp8
3236  let Inst{19-16} = Zm;
3237  let Inst{15}    = 0b0;
3238  let Inst{14-13} = Rv;
3239  let Inst{12-10} = 0b001;
3240  let Inst{9-5}   = Zn;
3241  let Inst{4-2}   = op{2-0};
3242  let Inst{1-0}   = imm;
3243
3244  let Constraints = "$ZAda = $_ZAda";
3245}
3246
3247multiclass sme2_mla_ll_array_single<string mnemonic, bits<5> op, MatrixOperand matrix_ty, ZPRRegOp vector_ty,
3248                                    ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic, list<Register> uses=[]> {
3249  def NAME : sme2_mla_ll_array_single<mnemonic, op, matrix_ty, vector_ty, zpr_ty>, SMEPseudo2Instr<NAME, 1> {
3250    let Uses = uses;
3251  }
3252
3253  def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s4range, vector_ty, zpr_ty, SMEMatrixArray>;
3254
3255  def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm2s4range, zpr_ty, vt, tileslicerange2s4>;
3256}
3257
3258class sme2_mla_ll_array_vg24_single<bits<6> op, MatrixOperand matrix_ty,
3259                                    RegisterOperand vector_ty, ZPRRegOp zpr_ty,
3260                                    string mnemonic>
3261    : I<(outs matrix_ty:$ZAda),
3262        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3263             vector_ty:$Zn, zpr_ty:$Zm),
3264        mnemonic, "\t$ZAda[$Rv, $imm,  " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm",
3265        "", []>, Sched<[]> {
3266  bits<4> Zm;
3267  bits<2> Rv;
3268  bits<5> Zn;
3269  bit     imm;
3270  let Inst{31-23} = 0b110000010;
3271  let Inst{22}    = op{5}; //sz
3272  let Inst{21}    = 0b1;
3273  let Inst{20}    = op{4}; //vg4
3274  let Inst{19-16} = Zm;
3275  let Inst{15}    = 0b0;
3276  let Inst{14-13} = Rv;
3277  let Inst{12-10} = 0b000;
3278  let Inst{9-5}   = Zn;
3279  let Inst{4-1}   = op{3-0};
3280  let Inst{0}     = imm;
3281
3282  let Constraints = "$ZAda = $_ZAda";
3283}
3284
3285//SME2 single-multi long long MLA two and four sources
3286
3287multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<6> op,
3288                                          MatrixOperand matrix_ty,
3289                                          RegisterOperand multi_vector_ty,
3290                                          ZPRRegOp zpr_ty, list<Register> uses> {
3291  def NAME: sme2_mla_ll_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3292    let Uses = uses;
3293  }
3294
3295  def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm1s4range, multi_vector_ty, zpr_ty, SMEMatrixArray>;
3296
3297  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm], $Zn, $Zm",
3298                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
3299}
3300
3301multiclass sme2_mla_ll_array_vg2_single<string mnemonic, bits<6> op,
3302                                        MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
3303                                        ZPRRegOp zpr_ty, ValueType vt,SDPatternOperator intrinsic, list<Register> uses=[]> {
3304  defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty, uses>;
3305
3306  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
3307}
3308
3309multiclass sme2_mla_ll_array_vg4_single<string mnemonic, bits<6> op,
3310                                        MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
3311                                        ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic, list<Register> uses=[]> {
3312  defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty, uses>;
3313
3314  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
3315}
3316
3317// SME2 multiple vectors long long MLA two sources
3318
3319class sme2_mla_ll_array_vg2_multi<bits<5> op, MatrixOperand matrix_ty,
3320                                  RegisterOperand vector_ty,string mnemonic>
3321    : I<(outs matrix_ty:$ZAda),
3322        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3323             vector_ty:$Zn, vector_ty:$Zm),
3324        mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm",
3325        "", []>, Sched<[]> {
3326  bits<4> Zm;
3327  bits<2> Rv;
3328  bits<4> Zn;
3329  bit     imm;
3330  let Inst{31-23} = 0b110000011;
3331  let Inst{22}    = op{4};  // sz
3332  let Inst{21}    = 0b1;
3333  let Inst{20-17} = Zm;
3334  let Inst{16-15} = 0b00;
3335  let Inst{14-13} = Rv;
3336  let Inst{12-10} = 0b000;
3337  let Inst{9-6}   = Zn;
3338  let Inst{5-2}   = op{3-0};
3339  let Inst{1}     = 0b0;
3340  let Inst{0}     = imm;
3341
3342  let Constraints = "$ZAda = $_ZAda";
3343}
3344
3345multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<5> op,
3346                                       MatrixOperand matrix_ty,
3347                                       RegisterOperand vector_ty, ValueType vt,
3348                                       SDPatternOperator intrinsic, list<Register> uses=[]> {
3349  def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3350    let Uses = uses;
3351  }
3352
3353  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
3354
3355  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
3356
3357  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
3358                 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
3359}
3360
3361// SME2 multiple vectors long long MLA four sources
3362
3363class sme2_mla_ll_array_vg4_multi<bits<5> op,MatrixOperand matrix_ty,
3364                                  RegisterOperand vector_ty,
3365                                  string mnemonic>
3366    : I<(outs matrix_ty:$ZAda),
3367        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3368             vector_ty:$Zn, vector_ty:$Zm),
3369        mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm",
3370        "", []>, Sched<[]> {
3371  bits<3> Zm;
3372  bits<2> Rv;
3373  bits<3> Zn;
3374  bit     imm;
3375  let Inst{31-23} = 0b110000011;
3376  let Inst{22}    = op{4}; // sz
3377  let Inst{21}    = 0b1;
3378  let Inst{20-18} = Zm;
3379  let Inst{17-15} = 0b010;
3380  let Inst{14-13} = Rv;
3381  let Inst{12-10} = 0b000;
3382  let Inst{9-7}   = Zn;
3383  let Inst{6}     = 0b0;
3384  let Inst{5-2}   = op{3-0};
3385  let Inst{1}     = 0b0;
3386  let Inst{0}     = imm;
3387
3388  let Constraints = "$ZAda = $_ZAda";
3389}
3390
3391multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<5> op,
3392                                       MatrixOperand matrix_ty,
3393                                       RegisterOperand vector_ty, ValueType vt,
3394                                       SDPatternOperator intrinsic, list<Register> uses=[]> {
3395  def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3396    let Uses = uses;
3397  }
3398
3399  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
3400
3401  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
3402
3403  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
3404                 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
3405}
3406
3407//===----------------------------------------------------------------------===//
3408// SME2 Outer Product and Accumulate
3409
3410multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
3411  def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3412    bits<2> ZAda;
3413    let Inst{1-0} = ZAda;
3414    let Inst{2}   = 0b0;
3415  }
3416
3417  def _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
3418
3419  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv8i1, nxv8i16>;
3420}
3421
3422multiclass  sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
3423  def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3424
3425  def _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
3426
3427  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>;
3428}
3429
3430//===----------------------------------------------------------------------===//
3431// SME2 Sparse Outer Product and Accumulate
3432
3433class sme_tmopa_16b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
3434    : I<(outs TileOp16:$ZAda),
3435        (ins  TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
3436        mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
3437        "", []>,
3438      Sched<[]> {
3439  bit ZAda;
3440  bits<4> Zn;
3441  bits<5> Zm;
3442  bits<3> Zk;
3443  bits<2> imm;
3444  let Inst{31-25} = 0b1000000;
3445  let Inst{24}    = opc{4};
3446  let Inst{23-22} = 0b01;
3447  let Inst{21}    = opc{3};
3448  let Inst{20-16} = Zm;
3449  let Inst{15}    = opc{2};
3450  let Inst{14}    = 0b0;
3451  let Inst{13}    = opc{1};
3452  let Inst{12-10} = Zk;
3453  let Inst{9-6}   = Zn;
3454  let Inst{5-4}   = imm;
3455  let Inst{3}     = opc{0};
3456  let Inst{2-1}   = 0b00;
3457  let Inst{0}     = ZAda;
3458
3459  let Constraints = "$ZAda = $_ZAda";
3460}
3461
3462class sme_tmopa_32b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic>
3463    : I<(outs TileOp32:$ZAda),
3464        (ins  TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm),
3465        mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm",
3466        "", []>,
3467      Sched<[]> {
3468  bits<2> ZAda;
3469  bits<4> Zn;
3470  bits<5> Zm;
3471  bits<3> Zk;
3472  bits<2> imm;
3473  let Inst{31-25} = 0b1000000;
3474  let Inst{24}    = opc{4};
3475  let Inst{23-22} = 0b01;
3476  let Inst{21}    = opc{3};
3477  let Inst{20-16} = Zm;
3478  let Inst{15}    = opc{2};
3479  let Inst{14}    = 0b0;
3480  let Inst{13}    = opc{1};
3481  let Inst{12-10} = Zk;
3482  let Inst{9-6}   = Zn;
3483  let Inst{5-4}   = imm;
3484  let Inst{3}     = opc{0};
3485  let Inst{2}     = 0b0;
3486  let Inst{1-0}   = ZAda;
3487
3488  let Constraints = "$ZAda = $_ZAda";
3489}
3490
3491
3492//===----------------------------------------------------------------------===///
3493// SME2 Zero Lookup Table.
3494class sme2_zero_zt<string mnemonic, bits<4> opc>
3495    : I<(outs ZTR:$ZT), (ins ),
3496         mnemonic, "\t\\{ $ZT \\}",
3497         "", []>, Sched<[]> {
3498  let Inst{31-4} = 0b1100000001001000000000000000;
3499  let Inst{3-0}  = opc;
3500}
3501
3502multiclass sme2_zero_zt<string mnemonic, bits<4> opc> {
3503  def NAME : sme2_zero_zt<mnemonic, opc>;
3504  def NAME # _PSEUDO
3505        : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> {
3506    // Translated to actual instruction in AArch64ISelLowering.cpp
3507    let usesCustomInserter = 1;
3508  }
3509  def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)),
3510          (!cast<Instruction>(NAME # _PSEUDO) $zt)>;
3511}
3512
3513//===----------------------------------------------------------------------===//
3514// SME2 lookup table load/store
3515class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
3516    : I<!if(opc{7}, (outs ), (outs ZTR:$ZTt)),
3517        !if(opc{7}, (ins ZTR:$ZTt, GPR64sp:$Rn), (ins GPR64sp:$Rn)),
3518        mnemonic, "\t$ZTt, [$Rn]",
3519        "", []>, Sched<[]> {
3520  bits<5> Rn;
3521  let Inst{31-22} = 0b1110000100;
3522  let Inst{21-16} = opc{7-2};
3523  let Inst{15-10} = 0b100000;
3524  let Inst{9-5}   = Rn;
3525  let Inst{4-2}   = 0b000;
3526  let Inst{1-0}   = opc{1-0};
3527
3528  let mayLoad     = !not(opc{7});
3529  let mayStore    = opc{7};
3530}
3531
3532
3533multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> {
3534  def NAME : sme2_spill_fill_vector<mnemonic, opc>;
3535  def NAME # _PSEUDO
3536      : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> {
3537    // Translated to actual instruction in AArch64ISelLowering.cpp
3538    let usesCustomInserter = 1;
3539  }
3540  def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base),
3541            (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>;
3542}
3543
3544//===----------------------------------------------------------------------===///
3545// SME2 move to/from lookup table
3546class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
3547    : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3),
3548         mnemonic, "\t$Rt, $ZTt[$imm3]",
3549         "", []>, Sched<[]> {
3550  bits<3> imm3;
3551  bits<5> Rt;
3552  let Inst{31-15} = 0b11000000010011000;
3553  let Inst{14-12} = imm3;
3554  let Inst{11-5}  = opc;
3555  let Inst{4-0}   = Rt;
3556}
3557
3558class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
3559    : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt),
3560         mnemonic, "\t$ZTt[$imm3], $Rt",
3561         "", []>, Sched<[]> {
3562  bits<3> imm3;
3563  bits<5> Rt;
3564  let Inst{31-15} = 0b11000000010011100;
3565  let Inst{14-12} = imm3;
3566  let Inst{11-5}  = opc;
3567  let Inst{4-0}   = Rt;
3568}
3569
3570// SME2 move vector to lookup table
3571class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
3572   : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt),
3573        mnemonic, "\t$ZTt[$off2, mul vl], $Zt",
3574        "", []>, Sched<[]> {
3575  bits<5> Zt;
3576  bits<2> off2;
3577  let Inst{31-14} = 0b110000000100111100;
3578  let Inst{13-12} = off2;
3579  let Inst{11-5}  = opc;
3580  let Inst{4-0}   = Zt;
3581}
3582
3583multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc, SDPatternOperator intrinsic_lane, SDPatternOperator intrinsic> {
3584  def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
3585  def NAME # _PSEUDO
3586      : Pseudo<(outs), (ins ZTR:$ZT, sme_elm_idx0_3:$off2, ZPRAny:$Zt), []>, Sched<[]> {
3587    let usesCustomInserter = 1;
3588  }
3589  def : InstAlias<mnemonic # "\t$ZTt, $Zt",
3590                 (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
3591
3592  foreach vt = [nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16] in {
3593    def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), vt:$zn, sme_elm_idx0_3:$imm),
3594              (!cast<Instruction>(NAME # _PSEUDO) $zt, $imm, $zn)>;
3595    def : Pat<(intrinsic (imm_to_zt untyped:$zt), vt:$zn),
3596              (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
3597  }
3598}
3599
3600//===----------------------------------------------------------------------===//
3601// SME2 lookup table expand one register
3602class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
3603                             AsmVectorIndexOpnd index_ty, string mnemonic>
3604    : I<(outs vector_ty:$Zd),
3605        (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3606        mnemonic, "\t$Zd, $ZTt, $Zn$i",
3607        "", []>, Sched<[]> {
3608  bits<5> Zn;
3609  bits<5> Zd;
3610  let Inst{31-19} = 0b1100000011001;
3611  let Inst{18-14} = opc{6-2};
3612  let Inst{13-12} = sz;
3613  let Inst{11-10} = opc{1-0};
3614  let Inst{9-5}   = Zn;
3615  let Inst{4-0}   = Zd;
3616}
3617
3618class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty,
3619                              string mnemonic>
3620    : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> {
3621  bits<4> i;
3622  let Inst{17-14} = i;
3623}
3624
3625multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> {
3626  def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>;
3627  def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>;
3628  def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>;
3629
3630  def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3631             (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3632  def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3633             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3634  def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3635             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3636  def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3637             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3638  def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3639             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3640  def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3641             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3642}
3643
3644class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty,
3645                              string mnemonic>
3646    : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH32b_timm, mnemonic> {
3647  bits<3> i;
3648  let Inst{16-14} = i;
3649}
3650
3651multiclass sme2_luti4_vector_index<string mnemonic, SDPatternOperator intrinsic> {
3652  def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>;
3653  def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>;
3654  def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>;
3655
3656  def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3657             (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3658  def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3659             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3660  def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3661             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3662  def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3663             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3664  def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3665             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3666  def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3667             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3668}
3669
3670// SME2 lookup table expand two contiguous registers
3671class sme2_luti_vector_vg2_index<bits<2> sz, bits<6> opc, RegisterOperand vector_ty,
3672                                 AsmVectorIndexOpnd index_ty, string mnemonic>
3673    : I<(outs vector_ty:$Zd),
3674        (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3675        mnemonic, "\t$Zd, $ZTt, $Zn$i",
3676        "", []>, Sched<[]> {
3677  bits<5> Zn;
3678  bits<4> Zd;
3679  let Inst{31-19} = 0b1100000010001;
3680  let Inst{18-15} = opc{5-2};
3681  let Inst{14}    = 0b1;
3682  let Inst{13-12} = sz;
3683  let Inst{11-10} = opc{1-0};
3684  let Inst{9-5}   = Zn;
3685  let Inst{4-1}   = Zd;
3686  let Inst{0}     = 0b0;
3687}
3688
3689class sme2_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
3690                                  string mnemonic>
3691    : sme2_luti_vector_vg2_index<sz, {1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> {
3692  bits<3> i;
3693  let Inst{17-15} = i;
3694}
3695
3696multiclass sme2_luti2_vector_vg2_index<string mnemonic> {
3697  def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
3698  def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
3699  def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
3700}
3701
3702class sme2_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
3703                                 string mnemonic>
3704    : sme2_luti_vector_vg2_index<sz, {0,1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
3705  bits<2> i;
3706  let Inst{16-15} = i;
3707}
3708
3709multiclass sme2_luti4_vector_vg2_index<string mnemonic> {
3710  def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
3711  def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
3712  def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
3713}
3714
3715// SME2 lookup table expand four contiguous registers
3716class sme2_luti_vector_vg4_index<bits<2> sz, bits<5>opc, RegisterOperand vector_ty,
3717                                 AsmVectorIndexOpnd index_ty, string mnemonic>
3718    : I<(outs vector_ty:$Zd),
3719        (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3720        mnemonic, "\t$Zd, $ZTt, $Zn$i",
3721        "", []>, Sched<[]> {
3722  bits<5> Zn;
3723  bits<3> Zd;
3724  let Inst{31-19} = 0b1100000010001;
3725  let Inst{18-16} = opc{4-2};
3726  let Inst{15-14} = 0b10;
3727  let Inst{13-12} = sz;
3728  let Inst{11-10} = opc{1-0};
3729  let Inst{9-5}   = Zn;
3730  let Inst{4-2}   = Zd;
3731  let Inst{1-0}   = 0b00;
3732}
3733
3734class sme2_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
3735                                  string mnemonic>
3736    : sme2_luti_vector_vg4_index<sz, {1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
3737  bits<2> i;
3738  let Inst{17-16} = i;
3739}
3740
3741multiclass sme2_luti2_vector_vg4_index<string mnemonic> {
3742  def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>;
3743  def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
3744  def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
3745}
3746
3747class sme2_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
3748                                  string mnemonic>
3749    : sme2_luti_vector_vg4_index<sz, {0,1,?,0,0}, vector_ty, VectorIndexD, mnemonic> {
3750  bits<1> i;
3751  let Inst{16}    = i;
3752}
3753
3754multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
3755  def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
3756  def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
3757}
3758
3759//===----------------------------------------------------------------------===//
3760// SME2 MOV
3761class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v,
3762                                           RegisterOperand tile_ty,
3763                                           Operand index_ty,
3764                                           RegisterOperand vector_ty,
3765                                           string mnemonic>
3766   : I<(outs tile_ty:$ZAd),
3767       (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn),
3768       mnemonic, "\t$ZAd[$Rs, $imm], $Zn",
3769       "", []>, Sched<[]> {
3770  bits<2> Rs;
3771  bits<4> Zn;
3772  let Inst{31-24} = 0b11000000;
3773  let Inst{23-22} = sz;
3774  let Inst{21-16} = 0b000100;
3775  let Inst{15}    = v;
3776  let Inst{14-13} = Rs;
3777  let Inst{12-10} = 0b000;
3778  let Inst{9-6}   = Zn;
3779  let Inst{5-3}   = 0b000;
3780
3781  let Constraints = "$ZAd = $_ZAd";
3782}
3783
3784multiclass sme2_mova_vec_to_tile_or_array_aliases<int prefer, Instruction inst,
3785                                                  RegisterOperand tile_or_array_ty,
3786                                                  RegisterOperand  rv_ty,
3787                                                  Operand index_ty,
3788                                                  RegisterOperand vector_ty,
3789                                                  string mnemonic,
3790                                                  string vg_acronym=""> {
3791  def : InstAlias<mnemonic # "\t$ZAd[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn",
3792                  (inst tile_or_array_ty:$ZAd, rv_ty:$Rs, index_ty:$imm, vector_ty:$Zn), prefer>;
3793
3794}
3795
3796// SME2 move vector to tile, two registers
3797multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
3798
3799  def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v,
3800                                                !if(v, TileVectorOpV8,
3801                                                       TileVectorOpH8),
3802                                                uimm3s2range,  ZZ_b_mul_r,
3803                                                mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
3804    bits<3> imm;
3805    let Inst{2-0} = imm;
3806  }
3807
3808  def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v,
3809                                                !if(v, TileVectorOpV16,
3810                                                       TileVectorOpH16),
3811                                                uimm2s2range, ZZ_h_mul_r,
3812                                                mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
3813    bits<1> ZAd;
3814    bits<2> imm;
3815    let Inst{2}   = ZAd;
3816    let Inst{1-0} = imm;
3817  }
3818
3819  def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v,
3820                                                !if(v, TileVectorOpV32,
3821                                                       TileVectorOpH32),
3822                                                 uimm1s2range, ZZ_s_mul_r,
3823                                                 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
3824    bits<2> ZAd;
3825    bits<1> imm;
3826    let Inst{2-1} = ZAd;
3827    let Inst{0}   = imm;
3828  }
3829
3830  def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v,
3831                                                !if(v, TileVectorOpV64,
3832                                                       TileVectorOpH64),
3833                                                uimm0s2range, ZZ_d_mul_r,
3834                                                mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
3835    bits<3> ZAd;
3836    let Inst{2-0} = ZAd;
3837   }
3838
3839  def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
3840  def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
3841  def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
3842  def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
3843
3844  def : SME2_Tile_VG2_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm3s2range, tileslicerange3s2>;
3845  def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm2s2range, tileslicerange2s2>;
3846  def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm2s2range, tileslicerange2s2>;
3847  def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm2s2range, tileslicerange2s2>;
3848  def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm1s2range, tileslicerange1s2>;
3849  def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm1s2range, tileslicerange1s2>;
3850  def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s2range, tileslicerange0s2>;
3851  def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s2range, tileslicerange0s2>;
3852
3853  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
3854                                                !if(v, TileVectorOpV8,
3855                                                       TileVectorOpH8),
3856                                                MatrixIndexGPR32Op12_15,
3857                                                uimm3s2range,  ZZ_b_mul_r,
3858                                                "mov">;
3859  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
3860                                                !if(v, TileVectorOpV16,
3861                                                       TileVectorOpH16),
3862                                                MatrixIndexGPR32Op12_15,
3863                                                uimm2s2range,  ZZ_h_mul_r,
3864                                                "mov">;
3865  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
3866                                                !if(v, TileVectorOpV32,
3867                                                       TileVectorOpH32),
3868                                                MatrixIndexGPR32Op12_15,
3869                                                uimm1s2range,  ZZ_s_mul_r,
3870                                                "mov">;
3871  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
3872                                                !if(v, TileVectorOpV64,
3873                                                       TileVectorOpH64),
3874                                                MatrixIndexGPR32Op12_15,
3875                                                uimm0s2range,  ZZ_d_mul_r,
3876                                                "mov">;
3877
3878  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3879                                                !if(v, TileVectorOpV8,
3880                                                       TileVectorOpH8),
3881                                                MatrixIndexGPR32Op12_15,
3882                                                uimm3s2range,  ZZ_b_mul_r,
3883                                                "mova">;
3884  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3885                                                !if(v, TileVectorOpV16,
3886                                                       TileVectorOpH16),
3887                                                MatrixIndexGPR32Op12_15,
3888                                                uimm2s2range,  ZZ_h_mul_r,
3889                                                "mova">;
3890  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3891                                                !if(v, TileVectorOpV32,
3892                                                       TileVectorOpH32),
3893                                                MatrixIndexGPR32Op12_15,
3894                                                uimm1s2range,  ZZ_s_mul_r,
3895                                                "mova">;
3896  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3897                                                !if(v, TileVectorOpV64,
3898                                                       TileVectorOpH64),
3899                                                MatrixIndexGPR32Op12_15,
3900                                                uimm0s2range,  ZZ_d_mul_r,
3901                                                "mova">;
3902
3903  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3904                                                !if(v, TileVectorOpV8,
3905                                                       TileVectorOpH8),
3906                                                MatrixIndexGPR32Op12_15,
3907                                                uimm3s2range,  ZZ_b_mul_r,
3908                                                "mova">;
3909  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3910                                                !if(v, TileVectorOpV16,
3911                                                       TileVectorOpH16),
3912                                                MatrixIndexGPR32Op12_15,
3913                                                uimm2s2range,  ZZ_h_mul_r,
3914                                                "mova">;
3915  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3916                                                !if(v, TileVectorOpV32,
3917                                                       TileVectorOpH32),
3918                                                MatrixIndexGPR32Op12_15,
3919                                                uimm1s2range,  ZZ_s_mul_r,
3920                                                "mova">;
3921  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3922                                                !if(v, TileVectorOpV64,
3923                                                       TileVectorOpH64),
3924                                                MatrixIndexGPR32Op12_15,
3925                                                uimm0s2range,  ZZ_d_mul_r,
3926                                                "mova">;
3927}
3928
3929multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic,
3930                                           SDPatternOperator int_h, SDPatternOperator int_v>{
3931 defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>;
3932 defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>;
3933}
3934
3935class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op,
3936                                           RegisterOperand tile_ty,
3937                                           Operand index_ty,
3938                                           RegisterOperand vector_ty,
3939                                           string mnemonic>
3940   : I<(outs tile_ty:$ZAd),
3941       (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm,
3942            vector_ty:$Zn),
3943       mnemonic,
3944       "\t$ZAd[$Rs, $imm], $Zn",
3945       "", []>, Sched<[]> {
3946  bits<2> Rs;
3947  bits<3> Zn;
3948  let Inst{31-24} = 0b11000000;
3949  let Inst{23-22} = sz;
3950  let Inst{21-16} = 0b000100;
3951  let Inst{15}    = v;
3952  let Inst{14-13} = Rs;
3953  let Inst{12-10} = 0b001;
3954  let Inst{9-7}   = Zn;
3955  let Inst{6-3}   = 0b0000;
3956  let Inst{2-0}   = op;
3957  let Constraints = "$ZAd = $_ZAd";
3958}
3959
3960// SME2 move vector to tile, four registers
3961multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
3962
3963  def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?},
3964                                                !if(v, TileVectorOpV8,
3965                                                       TileVectorOpH8),
3966                                                uimm2s4range, ZZZZ_b_mul_r,
3967                                                mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
3968    bits<2> imm;
3969    let Inst{1-0} = imm;
3970  }
3971
3972  def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, {0,?,?},
3973                                                !if(v, TileVectorOpV16,
3974                                                       TileVectorOpH16),
3975                                                uimm1s4range, ZZZZ_h_mul_r,
3976                                                mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
3977    bits<1> ZAd;
3978    bits<1> imm;
3979    let Inst{1}   = ZAd;
3980    let Inst{0}   = imm;
3981  }
3982
3983  def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, {0,?,?},
3984                                                !if(v, TileVectorOpV32,
3985                                                       TileVectorOpH32),
3986                                                 uimm0s4range, ZZZZ_s_mul_r,
3987                                                 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
3988    bits<2> ZAd;
3989    let Inst{1-0} = ZAd;
3990  }
3991
3992  def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, {?,?,?},
3993                                                !if(v, TileVectorOpV64,
3994                                                       TileVectorOpH64),
3995                                                uimm0s4range, ZZZZ_d_mul_r,
3996                                                mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
3997    bits<3> ZAd;
3998    let Inst{2-0} = ZAd;
3999  }
4000
4001  def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4002  def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4003  def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4004  def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4005
4006  def : SME2_Tile_VG4_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm2s4range, tileslicerange2s4>;
4007  def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm1s4range, tileslicerange1s4>;
4008  def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm1s4range, tileslicerange1s4>;
4009  def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm1s4range, tileslicerange1s4>;
4010  def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm0s4range, tileslicerange0s4>;
4011  def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm0s4range, tileslicerange0s4>;
4012  def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s4range, tileslicerange0s4>;
4013  def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s4range, tileslicerange0s4>;
4014
4015  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
4016                                                !if(v, TileVectorOpV8,
4017                                                       TileVectorOpH8),
4018                                                MatrixIndexGPR32Op12_15,
4019                                                uimm2s4range, ZZZZ_b_mul_r,
4020                                                "mov">;
4021  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
4022                                                !if(v, TileVectorOpV16,
4023                                                       TileVectorOpH16),
4024                                                MatrixIndexGPR32Op12_15,
4025                                                uimm1s4range, ZZZZ_h_mul_r,
4026                                                "mov">;
4027  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
4028                                                !if(v, TileVectorOpV32,
4029                                                       TileVectorOpH32),
4030                                                MatrixIndexGPR32Op12_15,
4031                                                uimm0s4range, ZZZZ_s_mul_r,
4032                                                "mov">;
4033  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
4034                                                !if(v, TileVectorOpV64,
4035                                                       TileVectorOpH64),
4036                                                MatrixIndexGPR32Op12_15,
4037                                                uimm0s4range, ZZZZ_d_mul_r,
4038                                                "mov">;
4039
4040  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
4041                                                !if(v, TileVectorOpV8,
4042                                                       TileVectorOpH8),
4043                                                MatrixIndexGPR32Op12_15,
4044                                                uimm2s4range, ZZZZ_b_mul_r,
4045                                                "mova">;
4046  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
4047                                                !if(v, TileVectorOpV16,
4048                                                       TileVectorOpH16),
4049                                                MatrixIndexGPR32Op12_15,
4050                                                uimm1s4range, ZZZZ_h_mul_r,
4051                                                "mova">;
4052  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
4053                                                !if(v, TileVectorOpV32,
4054                                                       TileVectorOpH32),
4055                                                MatrixIndexGPR32Op12_15,
4056                                                uimm0s4range, ZZZZ_s_mul_r,
4057                                                "mova">;
4058  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
4059                                                !if(v, TileVectorOpV64,
4060                                                       TileVectorOpH64),
4061                                                MatrixIndexGPR32Op12_15,
4062                                                uimm0s4range, ZZZZ_d_mul_r,
4063                                                "mova">;
4064
4065}
4066
4067multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic,
4068                                           SDPatternOperator int_h, SDPatternOperator int_v>{
4069 defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>;
4070 defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>;
4071}
4072
4073// SME Move into Array
4074class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty,
4075                                        RegisterOperand vector_ty,
4076                                        string mnemonic,
4077                                        string vg_acronym="">
4078   : I<(outs array_ty:$ZAd),
4079       (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm,
4080            vector_ty:$Zn),
4081       mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn",
4082       "", []>, Sched<[]> {
4083  bits<2> Rs;
4084  bits<3> imm;
4085  let Inst{31-15} = 0b11000000000001000;
4086  let Inst{14-13} = Rs;
4087  let Inst{12-11} = 0b01;
4088  let Inst{10-6}  = op;
4089  let Inst{5-3}   = 0b000;
4090  let Inst{2-0}   = imm;
4091
4092  let Constraints = "$ZAd = $_ZAd";
4093}
4094
4095// MOVA (vector to array, two registers)
4096multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator intrinsic> {
4097  def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64,
4098                                               ZZ_d_mul_r, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> {
4099   bits<4> Zn;
4100   let Inst{9-6} = Zn;
4101  }
4102
4103  def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
4104
4105  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8,  sme_elm_idx0_7, tileslice16>;
4106  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16,  sme_elm_idx0_7, tileslice16>;
4107  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16,  sme_elm_idx0_7, tileslice16>;
4108  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
4109  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32,  sme_elm_idx0_7, tileslice16>;
4110  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32,  sme_elm_idx0_7, tileslice16>;
4111  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64,  sme_elm_idx0_7, tileslice16>;
4112  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64,  sme_elm_idx0_7, tileslice16>;
4113
4114  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4115                                                MatrixOp8,
4116                                                MatrixIndexGPR32Op8_11,
4117                                                sme_elm_idx0_7, ZZ_b_mul_r,
4118                                                "mova">;
4119  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4120                                                MatrixOp16,
4121                                                MatrixIndexGPR32Op8_11,
4122                                                sme_elm_idx0_7, ZZ_h_mul_r,
4123                                                "mova">;
4124  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4125                                                MatrixOp32,
4126                                                MatrixIndexGPR32Op8_11,
4127                                                sme_elm_idx0_7, ZZ_s_mul_r,
4128                                                "mova">;
4129  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4130                                                MatrixOp64,
4131                                                MatrixIndexGPR32Op8_11,
4132                                                sme_elm_idx0_7, ZZ_d_mul_r,
4133                                                "mova">;
4134
4135  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4136                                                MatrixOp8,
4137                                                MatrixIndexGPR32Op8_11,
4138                                                sme_elm_idx0_7, ZZ_b_mul_r,
4139                                                "mova", "vgx2">;
4140  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4141                                                MatrixOp16,
4142                                                MatrixIndexGPR32Op8_11,
4143                                                sme_elm_idx0_7, ZZ_h_mul_r,
4144                                                "mova", "vgx2">;
4145  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4146                                                MatrixOp32,
4147                                                MatrixIndexGPR32Op8_11,
4148                                                sme_elm_idx0_7, ZZ_s_mul_r,
4149                                                "mova", "vgx2">;
4150
4151  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4152                                                MatrixOp8,
4153                                                MatrixIndexGPR32Op8_11,
4154                                                sme_elm_idx0_7, ZZ_b_mul_r,
4155                                                "mov">;
4156  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4157                                                MatrixOp16,
4158                                                MatrixIndexGPR32Op8_11,
4159                                                sme_elm_idx0_7, ZZ_h_mul_r,
4160                                                "mov">;
4161  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4162                                                MatrixOp32,
4163                                                MatrixIndexGPR32Op8_11,
4164                                                sme_elm_idx0_7, ZZ_s_mul_r,
4165                                                "mov">;
4166  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4167                                                MatrixOp64,
4168                                                MatrixIndexGPR32Op8_11,
4169                                                sme_elm_idx0_7, ZZ_d_mul_r,
4170                                                "mov">;
4171
4172  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4173                                                MatrixOp8,
4174                                                MatrixIndexGPR32Op8_11,
4175                                                sme_elm_idx0_7, ZZ_b_mul_r,
4176                                                "mov", "vgx2">;
4177  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4178                                                MatrixOp16,
4179                                                MatrixIndexGPR32Op8_11,
4180                                                sme_elm_idx0_7, ZZ_h_mul_r,
4181                                                "mov", "vgx2">;
4182  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4183                                                MatrixOp32,
4184                                                MatrixIndexGPR32Op8_11,
4185                                                sme_elm_idx0_7, ZZ_s_mul_r,
4186                                                "mov", "vgx2">;
4187  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
4188                                                MatrixOp64,
4189                                                MatrixIndexGPR32Op8_11,
4190                                                sme_elm_idx0_7, ZZ_d_mul_r,
4191                                                "mov", "vgx2">;
4192}
4193
4194// MOVA (vector to array, four registers)
4195multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator intrinsic> {
4196  def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64,
4197                                               ZZZZ_d_mul_r, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
4198    bits<3> Zn;
4199    let Inst{9-7} = Zn;
4200  }
4201
4202  def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
4203
4204  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8,  sme_elm_idx0_7, tileslice16>;
4205  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16,  sme_elm_idx0_7, tileslice16>;
4206  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16,  sme_elm_idx0_7, tileslice16>;
4207  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
4208  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32,  sme_elm_idx0_7, tileslice16>;
4209  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32,  sme_elm_idx0_7, tileslice16>;
4210  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64,  sme_elm_idx0_7, tileslice16>;
4211  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64,  sme_elm_idx0_7, tileslice16>;
4212
4213  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4214                                                MatrixOp8,
4215                                                MatrixIndexGPR32Op8_11,
4216                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
4217                                                "mova">;
4218  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4219                                                MatrixOp16,
4220                                                MatrixIndexGPR32Op8_11,
4221                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
4222                                                "mova">;
4223  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4224                                                MatrixOp32,
4225                                                MatrixIndexGPR32Op8_11,
4226                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
4227                                                "mova">;
4228  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4229                                                MatrixOp64,
4230                                                MatrixIndexGPR32Op8_11,
4231                                                sme_elm_idx0_7, ZZZZ_d_mul_r,
4232                                                "mova">;
4233
4234  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4235                                                MatrixOp8,
4236                                                MatrixIndexGPR32Op8_11,
4237                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
4238                                                "mova", "vgx4">;
4239  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4240                                                MatrixOp16,
4241                                                MatrixIndexGPR32Op8_11,
4242                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
4243                                                "mova", "vgx4">;
4244  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4245                                                MatrixOp32,
4246                                                MatrixIndexGPR32Op8_11,
4247                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
4248                                                "mova", "vgx4">;
4249
4250  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4251                                                MatrixOp8,
4252                                                MatrixIndexGPR32Op8_11,
4253                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
4254                                                "mov">;
4255  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4256                                                MatrixOp16,
4257                                                MatrixIndexGPR32Op8_11,
4258                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
4259                                                "mov">;
4260  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4261                                                MatrixOp32,
4262                                                MatrixIndexGPR32Op8_11,
4263                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
4264                                                "mov">;
4265  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4266                                                MatrixOp64,
4267                                                MatrixIndexGPR32Op8_11,
4268                                                sme_elm_idx0_7, ZZZZ_d_mul_r,
4269                                                "mov">;
4270
4271  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4272                                                MatrixOp8,
4273                                                MatrixIndexGPR32Op8_11,
4274                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
4275                                                "mov", "vgx4">;
4276  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4277                                                MatrixOp16,
4278                                                MatrixIndexGPR32Op8_11,
4279                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
4280                                                "mov", "vgx4">;
4281  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
4282                                                MatrixOp32,
4283                                                MatrixIndexGPR32Op8_11,
4284                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
4285                                                "mov", "vgx4">;
4286  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
4287                                                MatrixOp64,
4288                                                MatrixIndexGPR32Op8_11,
4289                                                sme_elm_idx0_7, ZZZZ_d_mul_r,
4290                                                "mov", "vgx4">;
4291
4292}
4293
4294class sme2_mova_tile_to_vec_vg2_multi_base<bits<2> sz, bit v, bits<3> op,
4295                                           RegisterOperand vector_ty,
4296                                           RegisterOperand tile_ty,
4297                                           Operand index_ty,
4298                                           string mnemonic>
4299   : I<!if(op{1}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
4300       (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
4301       mnemonic,
4302       "\t$Zd, $ZAn[$Rs, $imm]",
4303       "", []>, Sched<[]> {
4304  bits<4> Zd;
4305  bits<2> Rs;
4306  let Inst{31-24} = 0b11000000;
4307  let Inst{23-22} = sz;
4308  let Inst{21-16} = 0b000110;
4309  let Inst{15}    = v;
4310  let Inst{14-13} = Rs;
4311  let Inst{12-11} = 0b00;
4312  let Inst{10-8}  = op;
4313  let Inst{4-1}   = Zd;
4314  let Inst{0}     = 0b0;
4315
4316  let Constraints = !if(op{1}, "$ZAn = $_ZAn", "");
4317}
4318
4319multiclass sme2_mova_tile_or_array_to_vec_aliases<int op, Instruction inst,
4320                                                  RegisterOperand vector_ty,
4321                                                  RegisterOperand tile_or_array_ty,
4322                                                  RegisterOperand rv_ty,
4323                                                  Operand index_ty,
4324                                                  string mnemonic,
4325                                                  string vg_acronym=""> {
4326def : InstAlias<mnemonic # "\t$Zd, $ZAn[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
4327                  (inst vector_ty:$Zd, tile_or_array_ty:$ZAn, rv_ty:$Rs, index_ty:$imm), op>;
4328
4329}
4330
4331multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemonic> {
4332
4333  def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r,
4334                                                !if(v, TileVectorOpV8,
4335                                                       TileVectorOpH8),
4336                                                 uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
4337    bits<3> imm;
4338    let Inst{7-5} = imm;
4339  }
4340
4341  def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r,
4342                                                !if(v, TileVectorOpV16,
4343                                                       TileVectorOpH16),
4344                                                 uimm2s2range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
4345    bits<1> ZAn;
4346    bits<2> imm;
4347    let Inst{7}   = ZAn;
4348    let Inst{6-5} = imm;
4349  }
4350
4351  def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r,
4352                                                !if(v, TileVectorOpV32,
4353                                                       TileVectorOpH32),
4354                                                 uimm1s2range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
4355    bits<2> ZAn;
4356    bits<1> imm;
4357    let Inst{7-6} = ZAn;
4358    let Inst{5}   = imm;
4359  }
4360
4361  def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r,
4362                                                !if(v, TileVectorOpV64,
4363                                                       TileVectorOpH64),
4364                                                uimm0s2range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
4365    bits<3> ZAn;
4366    let Inst{7-5} = ZAn;
4367  }
4368
4369  if !eq(mnemonic, "mova") then {
4370  defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _B),
4371                                                ZZ_b_mul_r,
4372                                               !if(v, TileVectorOpV8,
4373                                                      TileVectorOpH8),
4374                                                MatrixIndexGPR32Op12_15,
4375                                                uimm3s2range, "mov">;
4376  defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _H),
4377                                                ZZ_h_mul_r,
4378                                                !if(v, TileVectorOpV16,
4379                                                       TileVectorOpH16),
4380                                                MatrixIndexGPR32Op12_15,
4381                                                uimm2s2range, "mov">;
4382  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
4383                                                ZZ_s_mul_r,
4384                                                !if(v, TileVectorOpV32,
4385                                                       TileVectorOpH32),
4386                                                MatrixIndexGPR32Op12_15,
4387                                                uimm1s2range, "mov">;
4388  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
4389                                                ZZ_d_mul_r,
4390                                                !if(v, TileVectorOpV64,
4391                                                       TileVectorOpH64),
4392                                                MatrixIndexGPR32Op12_15,
4393                                                uimm0s2range, "mov">;
4394  }
4395
4396  defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _B),
4397                                                ZZ_b_mul_r,
4398                                               !if(v, TileVectorOpV8,
4399                                                      TileVectorOpH8),
4400                                                MatrixIndexGPR32Op12_15,
4401                                                uimm3s2range, mnemonic>;
4402  defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _H),
4403                                                ZZ_h_mul_r,
4404                                                !if(v, TileVectorOpV16,
4405                                                       TileVectorOpH16),
4406                                                MatrixIndexGPR32Op12_15,
4407                                                uimm2s2range, mnemonic>;
4408  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
4409                                                ZZ_s_mul_r,
4410                                                !if(v, TileVectorOpV32,
4411                                                       TileVectorOpH32),
4412                                                MatrixIndexGPR32Op12_15,
4413                                                uimm1s2range, mnemonic>;
4414  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
4415                                                ZZ_d_mul_r,
4416                                                !if(v, TileVectorOpV64,
4417                                                       TileVectorOpH64),
4418                                                MatrixIndexGPR32Op12_15,
4419                                                uimm0s2range, mnemonic>;
4420
4421}
4422
4423// SME2 move tile to vector, two registers
4424multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
4425 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b000, mnemonic>;
4426 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>;
4427}
4428
4429
4430// SME2p1 move tile to vector and zero tile, two registers
4431multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
4432 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
4433 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
4434
4435
4436 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4437 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4438 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4439 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4440
4441 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4442 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4443 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4444 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4445}
4446
4447class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
4448                                           RegisterOperand vector_ty,
4449                                           RegisterOperand tile_ty,
4450                                           Operand index_ty,
4451                                           string mnemonic>
4452   : I<!if(op{4}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
4453       (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
4454       mnemonic,
4455       "\t$Zd, $ZAn[$Rs, $imm]",
4456       "", []>, Sched<[]> {
4457  bits<3> Zd;
4458  bits<2> Rs;
4459  let Inst{31-24} = 0b11000000;
4460  let Inst{23-22} = sz;
4461  let Inst{21-16} = 0b000110;
4462  let Inst{15}    = v;
4463  let Inst{14-13} = Rs;
4464  let Inst{12-11} = 0b00;
4465  let Inst{10-5}  = op{5-0};
4466  let Inst{4-2}   = Zd;
4467  let Inst{1-0}   = 0b00;
4468
4469  let Constraints = !if(op{4}, "$ZAn = $_ZAn", "");
4470}
4471
4472multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemonic> {
4473
4474  def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, {opc,0,?,?},
4475                                                ZZZZ_b_mul_r,
4476                                                !if(v, TileVectorOpV8,
4477                                                       TileVectorOpH8),
4478                                                uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
4479    bits<2> imm;
4480    let Inst{6-5} = imm;
4481  }
4482
4483  def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, {opc,0,?,?},
4484                                                ZZZZ_h_mul_r,
4485                                                !if(v, TileVectorOpV16,
4486                                                       TileVectorOpH16),
4487                                                uimm1s4range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
4488    bits<1> ZAn;
4489    bits<1> imm;
4490    let Inst{6}   = ZAn;
4491    let Inst{5}   = imm;
4492  }
4493
4494  def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, {opc,0,?,?},
4495                                                ZZZZ_s_mul_r,
4496                                                !if(v, TileVectorOpV32,
4497                                                       TileVectorOpH32),
4498                                                 uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
4499    bits<2> ZAn;
4500    let Inst{6-5} = ZAn;
4501  }
4502
4503  def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, {opc,?,?,?},
4504                                                ZZZZ_d_mul_r,
4505                                                !if(v, TileVectorOpV64,
4506                                                       TileVectorOpH64),
4507                                                uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
4508    bits<3> ZAn;
4509    let Inst{7-5} = ZAn;
4510  }
4511
4512  if !eq(mnemonic, "mova") then {
4513  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _B),
4514                                                ZZZZ_b_mul_r,
4515                                                !if(v, TileVectorOpV8,
4516                                                      TileVectorOpH8),
4517                                                MatrixIndexGPR32Op12_15,
4518                                                uimm2s4range, "mov">;
4519  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _H),
4520                                                ZZZZ_h_mul_r,
4521                                                !if(v, TileVectorOpV16,
4522                                                       TileVectorOpH16),
4523                                                MatrixIndexGPR32Op12_15,
4524                                                uimm1s4range, "mov">;
4525  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
4526                                                ZZZZ_s_mul_r,
4527                                                !if(v, TileVectorOpV32,
4528                                                      TileVectorOpH32),
4529                                                MatrixIndexGPR32Op12_15,
4530                                                uimm0s4range, "mov">;
4531  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
4532                                                ZZZZ_d_mul_r,
4533                                                !if(v, TileVectorOpV64,
4534                                                       TileVectorOpH64),
4535                                                MatrixIndexGPR32Op12_15,
4536                                                uimm0s4range, "mov">;
4537  }
4538
4539  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _B),
4540                                                ZZZZ_b_mul_r,
4541                                                !if(v, TileVectorOpV8,
4542                                                       TileVectorOpH8),
4543                                                MatrixIndexGPR32Op12_15,
4544                                                uimm2s4range, mnemonic>;
4545  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _H),
4546                                                ZZZZ_h_mul_r,
4547                                                !if(v, TileVectorOpV16,
4548                                                       TileVectorOpH16),
4549                                                MatrixIndexGPR32Op12_15,
4550                                                uimm1s4range, mnemonic>;
4551  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
4552                                                ZZZZ_s_mul_r,
4553                                                !if(v, TileVectorOpV32,
4554                                                      TileVectorOpH32),
4555                                                MatrixIndexGPR32Op12_15,
4556                                                uimm0s4range, mnemonic>;
4557  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
4558                                                ZZZZ_d_mul_r,
4559                                                !if(v, TileVectorOpV64,
4560                                                       TileVectorOpH64),
4561                                                MatrixIndexGPR32Op12_15,
4562                                                uimm0s4range, mnemonic>;
4563
4564}
4565
4566// SME2 move tile to vector, four registers
4567multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
4568 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b100, mnemonic>;
4569 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b100, mnemonic>;
4570}
4571
4572// SME2p1 move tile to vector and zero tile, four registers
4573multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
4574 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
4575 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
4576
4577 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4578 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4579 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4580 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4581
4582 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4583 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4584 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4585 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4586}
4587
4588
4589class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty,
4590                                        RegisterOperand array_ty,
4591                                        string mnemonic, string vg_acronym>
4592   : I<!if(op{2}, (outs vector_ty:$Zd, array_ty:$_ZAn), (outs vector_ty:$Zd)),
4593       (ins array_ty:$ZAn, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm),
4594       mnemonic,
4595       "\t$Zd, $ZAn[$Rs, $imm, " # vg_acronym # "]",
4596       "", []>, Sched<[]> {
4597  bits<2> Rs;
4598  bits<3> imm;
4599  let Inst{31-15} = 0b11000000000001100;
4600  let Inst{14-13} = Rs;
4601  let Inst{12-11} = 0b01;
4602  let Inst{10-8}  = op{3-1};
4603  let Inst{7-5}   = imm;
4604  let Inst{1}     = op{0};
4605  let Inst{0}     = 0b0;
4606  let Constraints = !if(op{2}, "$ZAn = $_ZAn", "");
4607}
4608
4609// move array to vector, two registers.
4610multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
4611  def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64,
4612                                               mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>{
4613    bits<4> Zd;
4614    let Inst{4-1} = Zd;
4615  }
4616
4617  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4618                                                ZZ_b_mul_r, MatrixOp8,
4619                                                MatrixIndexGPR32Op8_11,
4620                                                sme_elm_idx0_7, mnemonic>;
4621  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4622                                                ZZ_h_mul_r, MatrixOp16,
4623                                                MatrixIndexGPR32Op8_11,
4624                                                sme_elm_idx0_7, mnemonic>;
4625  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4626                                                ZZ_s_mul_r, MatrixOp32,
4627                                                MatrixIndexGPR32Op8_11,
4628                                                sme_elm_idx0_7, mnemonic>;
4629  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4630                                                ZZ_d_mul_r,  MatrixOp64,
4631                                                MatrixIndexGPR32Op8_11,
4632                                                sme_elm_idx0_7, mnemonic>;
4633
4634  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4635                                                ZZ_b_mul_r, MatrixOp8,
4636                                                MatrixIndexGPR32Op8_11,
4637                                                sme_elm_idx0_7, mnemonic, "vgx2">;
4638  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4639                                                ZZ_h_mul_r, MatrixOp16,
4640                                                MatrixIndexGPR32Op8_11,
4641                                                sme_elm_idx0_7, mnemonic, "vgx2">;
4642  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4643                                                ZZ_s_mul_r, MatrixOp32,
4644                                                MatrixIndexGPR32Op8_11,
4645                                                sme_elm_idx0_7, mnemonic, "vgx2">;
4646
4647  if !eq(mnemonic, "mova") then {
4648  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4649                                                ZZ_b_mul_r, MatrixOp8,
4650                                                MatrixIndexGPR32Op8_11,
4651                                                sme_elm_idx0_7, "mov">;
4652  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4653                                                ZZ_h_mul_r, MatrixOp16,
4654                                                MatrixIndexGPR32Op8_11,
4655                                                sme_elm_idx0_7, "mov">;
4656  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4657                                                ZZ_s_mul_r, MatrixOp32,
4658                                                MatrixIndexGPR32Op8_11,
4659                                                sme_elm_idx0_7, "mov">;
4660  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4661                                                ZZ_d_mul_r,  MatrixOp64,
4662                                                MatrixIndexGPR32Op8_11,
4663                                                sme_elm_idx0_7, "mov">;
4664
4665  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4666                                                ZZ_b_mul_r, MatrixOp8,
4667                                                MatrixIndexGPR32Op8_11,
4668                                                sme_elm_idx0_7, "mov", "vgx2">;
4669  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4670                                                ZZ_h_mul_r, MatrixOp16,
4671                                                MatrixIndexGPR32Op8_11,
4672                                                sme_elm_idx0_7, "mov", "vgx2">;
4673  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4674                                                ZZ_s_mul_r, MatrixOp32,
4675                                                MatrixIndexGPR32Op8_11,
4676                                                sme_elm_idx0_7, "mov", "vgx2">;
4677  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
4678                                                ZZ_d_mul_r,  MatrixOp64,
4679                                                MatrixIndexGPR32Op8_11,
4680                                                sme_elm_idx0_7, "mov", "vgx2">;
4681  }
4682}
4683
4684multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> {
4685  defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
4686  def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
4687}
4688
4689// move array to vector, four registers
4690multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
4691  def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64,
4692                                               mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
4693    bits<3> Zd;
4694    let Inst{4-2} = Zd;
4695  }
4696
4697  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4698                                                ZZZZ_b_mul_r, MatrixOp8,
4699                                                MatrixIndexGPR32Op8_11,
4700                                                sme_elm_idx0_7, mnemonic>;
4701  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4702                                                ZZZZ_h_mul_r, MatrixOp16,
4703                                                MatrixIndexGPR32Op8_11,
4704                                                sme_elm_idx0_7, mnemonic>;
4705  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4706                                                ZZZZ_s_mul_r, MatrixOp32,
4707                                                MatrixIndexGPR32Op8_11,
4708                                                sme_elm_idx0_7, mnemonic>;
4709  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4710                                                ZZZZ_d_mul_r, MatrixOp64,
4711                                                MatrixIndexGPR32Op8_11,
4712                                                sme_elm_idx0_7, mnemonic>;
4713
4714  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4715                                                ZZZZ_b_mul_r, MatrixOp8,
4716                                                MatrixIndexGPR32Op8_11,
4717                                                sme_elm_idx0_7, mnemonic, "vgx4">;
4718  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4719                                                ZZZZ_h_mul_r, MatrixOp16,
4720                                                MatrixIndexGPR32Op8_11,
4721                                                sme_elm_idx0_7, mnemonic, "vgx4">;
4722  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4723                                                ZZZZ_s_mul_r, MatrixOp32,
4724                                                MatrixIndexGPR32Op8_11,
4725                                                sme_elm_idx0_7, mnemonic, "vgx4">;
4726
4727  if !eq(mnemonic, "mova") then {
4728  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4729                                                ZZZZ_b_mul_r, MatrixOp8,
4730                                                MatrixIndexGPR32Op8_11,
4731                                                sme_elm_idx0_7, "mov">;
4732  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4733                                                ZZZZ_h_mul_r, MatrixOp16,
4734                                                MatrixIndexGPR32Op8_11,
4735                                                sme_elm_idx0_7, "mov">;
4736  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4737                                                ZZZZ_s_mul_r, MatrixOp32,
4738                                                MatrixIndexGPR32Op8_11,
4739                                                sme_elm_idx0_7, "mov">;
4740  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4741                                                ZZZZ_d_mul_r, MatrixOp64,
4742                                                MatrixIndexGPR32Op8_11,
4743                                                sme_elm_idx0_7, "mov">;
4744
4745  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4746                                                ZZZZ_b_mul_r, MatrixOp8,
4747                                                MatrixIndexGPR32Op8_11,
4748                                                sme_elm_idx0_7, "mov", "vgx4">;
4749  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4750                                                ZZZZ_h_mul_r, MatrixOp16,
4751                                                MatrixIndexGPR32Op8_11,
4752                                                sme_elm_idx0_7, "mov", "vgx4">;
4753  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4754                                                ZZZZ_s_mul_r, MatrixOp32,
4755                                                MatrixIndexGPR32Op8_11,
4756                                                sme_elm_idx0_7, "mov", "vgx4">;
4757  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
4758                                                ZZZZ_d_mul_r, MatrixOp64,
4759                                                MatrixIndexGPR32Op8_11,
4760                                                sme_elm_idx0_7, "mov", "vgx4">;
4761  }
4762}
4763
4764multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
4765  defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>;
4766  def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
4767}
4768
4769//===----------------------------------------------------------------------===//
4770// SME2 multi-vec saturating shift right narrow
4771class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
4772    : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
4773        mnemonic, "\t$Zd, $Zn, $imm4",
4774        "", []>, Sched<[]> {
4775  bits<4> imm4;
4776  bits<4> Zn;
4777  bits<5> Zd;
4778  let Inst{31-21} = 0b11000001111;
4779  let Inst{20}    = op;
4780  let Inst{19-16} = imm4;
4781  let Inst{15-10} = 0b110101;
4782  let Inst{9-6}   = Zn;
4783  let Inst{5}     = u;
4784  let Inst{4-0}   = Zd;
4785}
4786
4787multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
4788  def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
4789
4790  def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
4791}
4792
4793class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
4794                                RegisterOperand vector_ty, Operand imm_ty,
4795                                string mnemonic>
4796    : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm),
4797        mnemonic, "\t$Zd, $Zn, $imm",
4798        "", []>, Sched<[]> {
4799  bits<3> Zn;
4800  bits<5> Zd;
4801  let Inst{31-24} = 0b11000001;
4802  let Inst{23-22} = sz;
4803  let Inst{21}    = 0b1;
4804  //  Inst{20-16} = imm5;
4805  let Inst{15-11} = 0b11011;
4806  let Inst{10}    = op{2};
4807  let Inst{9-7}   = Zn;
4808  let Inst{6-5}   = op{1-0};
4809  let Inst{4-0}   = Zd;
4810}
4811
4812multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
4813  def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
4814                                     mnemonic>{
4815    bits<5> imm;
4816    let Inst{20-16} = imm;
4817  }
4818  def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
4819                                      mnemonic> {
4820    bits<6> imm;
4821    let Inst{22}    = imm{5};
4822    let Inst{20-16} = imm{4-0};
4823  }
4824
4825  def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
4826  def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
4827}
4828
4829//===----------------------------------------------------------------------===//
4830// SME2 Multi-vector - SVE Select
4831class sme2_sel_vector_vg24<bits<2> sz, bits<4> op, RegisterOperand vector_ty,
4832                           string mnemonic>
4833    : I<(outs vector_ty:$Zd),
4834        (ins PNRAny_p8to15:$PNg, vector_ty:$Zn, vector_ty:$Zm),
4835        mnemonic, "\t$Zd, $PNg, $Zn, $Zm",
4836        "", []>, Sched<[]> {
4837  bits<3> PNg;
4838  let Inst{31-24} = 0b11000001;
4839  let Inst{23-22} = sz;
4840  let Inst{21}    = 0b1;
4841  let Inst{17-16} = op{3-2};
4842  let Inst{15-13} = 0b100;
4843  let Inst{12-10} = PNg;
4844  let Inst{6}     = op{1};
4845  let Inst{5}     = 0b0;
4846  let Inst{1}     = op{0};
4847  let Inst{0}     = 0b0;
4848}
4849
4850class sme2_sel_vector_vg2<bits<2> sz, RegisterOperand vector_ty,
4851                          string mnemonic>
4852     : sme2_sel_vector_vg24<sz, {?,0,?,?}, vector_ty, mnemonic> {
4853  bits<4> Zm;
4854  bits<4> Zn;
4855  bits<4> Zd;
4856  let Inst{20-17} = Zm;
4857  let Inst{9-6}   = Zn;
4858  let Inst{4-1}   = Zd;
4859}
4860
4861multiclass sme2_sel_vector_vg2<string mnemonic>{
4862  def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>;
4863  def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>;
4864  def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>;
4865  def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>;
4866}
4867class sme2_sel_vector_vg4<bits<2> sz, RegisterOperand vector_ty,
4868                          string mnemonic>
4869     : sme2_sel_vector_vg24<sz, 0b0100, vector_ty, mnemonic> {
4870  bits<3> Zm;
4871  bits<3> Zn;
4872  bits<3> Zd;
4873  let Inst{20-18} = Zm;
4874  let Inst{9-7}   = Zn;
4875  let Inst{4-2}   = Zd;
4876}
4877multiclass sme2_sel_vector_vg4<string mnemonic> {
4878  def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>;
4879  def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>;
4880  def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>;
4881  def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>;
4882}
4883
4884//===----------------------------------------------------------------------===//
4885// Non contiguous Load and Store
4886
4887class sme2_ld_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
4888                                             RegisterOperand multi_vector_ty,
4889                                             RegisterOperand gpr_ty,
4890                                             string mnemonic>
4891   : I<(outs multi_vector_ty:$Zt),
4892       (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4893       mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
4894       "", []>, Sched<[]> {
4895   bits<5> Rm;
4896   bits<3> PNg;
4897   bits<5> Rn;
4898   bits<4> Zt;
4899   let Inst{31-21} = 0b10100001000;
4900   let Inst{20-16} = Rm;
4901   let Inst{15}    = 0b0;
4902   let Inst{14-13} = msz;
4903   let Inst{12-10} = PNg;
4904   let Inst{9-5}   = Rn;
4905   let Inst{4}     = Zt{3};
4906   let Inst{3}     = n;
4907   let Inst{2-0}   = Zt{2-0};
4908
4909   let mayLoad = 1;
4910}
4911
4912class sme2_ld_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
4913                                             RegisterOperand multi_vector_ty,
4914                                             RegisterOperand gpr_ty,
4915                                             string mnemonic>
4916   : I<(outs multi_vector_ty:$Zt),
4917       (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4918       mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
4919       "", []>, Sched<[]> {
4920   bits<5> Rm;
4921   bits<3> PNg;
4922   bits<5> Rn;
4923   bits<3> Zt;
4924   let Inst{31-21} = 0b10100001000;
4925   let Inst{20-16} = Rm;
4926   let Inst{15}    = 0b1;
4927   let Inst{14-13} = msz;
4928   let Inst{12-10} = PNg;
4929   let Inst{9-5}   = Rn;
4930   let Inst{4}     = Zt{2};
4931   let Inst{3}     = n;
4932   let Inst{2}     = 0b0;
4933   let Inst{1-0}   = Zt{1-0};
4934
4935   let mayLoad = 1;
4936}
4937
4938class sme2_ld_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
4939                                                 RegisterOperand multi_vector_ty,
4940                                                 Operand index_ty,
4941                                                 string mnemonic>
4942    : I<(outs multi_vector_ty:$Zt),
4943        (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
4944        mnemonic,  "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]",
4945        "", []>, Sched<[]> {
4946   bits<4> imm4;
4947   bits<3> PNg;
4948   bits<5> Rn;
4949   let Inst{31-20} = 0b101000010100;
4950   let Inst{19-16} = imm4;
4951   let Inst{15}    = op{1};
4952   let Inst{14-13} = msz;
4953   let Inst{12-10} = PNg;
4954   let Inst{9-5}   = Rn;
4955   let Inst{3}     = n;
4956   let Inst{2}     = op{0};
4957
4958   let mayLoad = 1;
4959}
4960
4961multiclass sme2_ld_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
4962                                                     RegisterOperand multi_vector_ty,
4963                                                     Operand index_ty,
4964                                                     string mnemonic>{
4965  def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
4966                                                        multi_vector_ty,
4967                                                        index_ty, mnemonic> {
4968    bits<4> Zt;
4969    let Inst{4} = Zt{3};
4970    let Inst{2-0} = Zt{2-0};
4971  }
4972
4973   def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
4974                  (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
4975}
4976
4977multiclass sme2_ld_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
4978                                                     RegisterOperand multi_vector_ty,
4979                                                     Operand index_ty,
4980                                                     string mnemonic> {
4981  def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
4982                                                        multi_vector_ty,
4983                                                        index_ty, mnemonic> {
4984    bits<3> Zt;
4985    let Inst{4} = Zt{2};
4986    let Inst{1-0} = Zt{1-0};
4987  }
4988
4989   def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
4990                   (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
4991}
4992
4993//===----------------------------------------------------------------------===//
4994// SME2 Non-Contiguous Store
4995class sme2_st_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
4996                                             RegisterOperand multi_vector_ty,
4997                                             RegisterOperand gpr_ty,
4998                                             string mnemonic>
4999   : I<(outs ),
5000       (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
5001       mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
5002       "", []>, Sched<[]> {
5003   bits<5> Rm;
5004   bits<3> PNg;
5005   bits<5> Rn;
5006   bits<4> Zt;
5007   let Inst{31-21} = 0b10100001001;
5008   let Inst{20-16} = Rm;
5009   let Inst{15}    = 0b0;
5010   let Inst{14-13} = msz;
5011   let Inst{12-10} = PNg;
5012   let Inst{9-5}   = Rn;
5013   let Inst{4}     = Zt{3};
5014   let Inst{3}     = n;
5015   let Inst{2-0}   = Zt{2-0};
5016
5017   let mayStore    = 1;
5018}
5019
5020class sme2_st_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
5021                                             RegisterOperand multi_vector_ty,
5022                                             RegisterOperand gpr_ty,
5023                                             string mnemonic>
5024   : I<(outs ),
5025       (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
5026       mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
5027       "", []>, Sched<[]> {
5028   bits<5> Rm;
5029   bits<3> PNg;
5030   bits<5> Rn;
5031   bits<3> Zt;
5032   let Inst{31-21} = 0b10100001001;
5033   let Inst{20-16} = Rm;
5034   let Inst{15}     = 0b1;
5035   let Inst{14-13} = msz;
5036   let Inst{12-10} = PNg;
5037   let Inst{9-5}   = Rn;
5038   let Inst{4}     = Zt{2};
5039   let Inst{3}     = n;
5040   let Inst{2}     = 0b0;
5041   let Inst{1-0}   = Zt{1-0};
5042
5043   let mayStore    = 1;
5044}
5045
5046class sme2_st_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
5047                                                 RegisterOperand multi_vector_ty,
5048                                                 Operand index_ty,
5049                                                 string mnemonic>
5050    : I<(outs ),
5051        (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
5052        mnemonic,  "\t$Zt, $PNg, [$Rn, $imm4, mul vl]",
5053        "", []>, Sched<[]> {
5054   bits<4> imm4;
5055   bits<3> PNg;
5056   bits<5> Rn;
5057   let Inst{31-20} = 0b101000010110;
5058   let Inst{19-16} = imm4;
5059   let Inst{15}    = op{1};
5060   let Inst{14-13} = msz;
5061   let Inst{12-10} = PNg;
5062   let Inst{9-5}   = Rn;
5063   let Inst{3}     = n;
5064   let Inst{2}     = op{0};
5065
5066   let mayStore    = 1;
5067}
5068
5069
5070multiclass sme2_st_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
5071                                                     RegisterOperand multi_vector_ty,
5072                                                     Operand index_ty,
5073                                                     string mnemonic> {
5074  def NAME: sme2_st_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
5075                                                       multi_vector_ty,
5076                                                       index_ty, mnemonic> {
5077    bits<4> Zt;
5078    let Inst{4}   = Zt{3};
5079    let Inst{2-0} = Zt{2-0};
5080  }
5081
5082    def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
5083                   (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
5084}
5085
5086multiclass sme2_st_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
5087                                                     RegisterOperand multi_vector_ty,
5088                                                     Operand index_ty,
5089                                                     string mnemonic> {
5090  def NAME : sme2_st_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
5091                                                        multi_vector_ty,
5092                                                        index_ty, mnemonic> {
5093    bits<3> Zt;
5094    let Inst{4}   = Zt{2};
5095    let Inst{1-0} = Zt{1-0};
5096  }
5097
5098    def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
5099                   (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
5100}
5101
5102//===----------------------------------------------------------------------===//
5103// SME2.1
5104//===----------------------------------------------------------------------===//
5105// SME zeroing move array to vector
5106class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty,
5107                                    RegisterOperand tile_ty, Operand index_ty,
5108                                    string mnemonic>
5109    : I<(outs vector_ty:$Zd, tile_ty:$ZAn),
5110        (ins tile_ty:$_ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
5111        mnemonic, "\t$Zd, $ZAn[$Rs, $imm]",
5112        "", []>, Sched<[]> {
5113  bits<2> Rs;
5114  bits<5> Zd;
5115  let Inst{31-24} = 0b11000000;
5116  let Inst{23-22} = sz;
5117  let Inst{21-17} = 0b00001;
5118  let Inst{16}    = q;
5119  let Inst{15}    = v;
5120  let Inst{14-13} = Rs;
5121  let Inst{12-9}  = 0b0001;
5122  let Inst{4-0}   = Zd;
5123  let Constraints = "$ZAn = $_ZAn";
5124}
5125
5126multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
5127  def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8,
5128                                    !if(v, TileVectorOpV8, TileVectorOpH8),
5129                                    sme_elm_idx0_15, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
5130    bits<4> imm;
5131    let Inst{8-5} = imm;
5132  }
5133
5134  def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16,
5135                                    !if(v, TileVectorOpV16, TileVectorOpH16),
5136                                    sme_elm_idx0_7, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
5137    bits<1> ZAn;
5138    bits<3> imm;
5139    let Inst{8}   = ZAn;
5140    let Inst{7-5} = imm;
5141  }
5142
5143  def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32,
5144                                    !if(v, TileVectorOpV32, TileVectorOpH32),
5145                                    sme_elm_idx0_3, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
5146    bits<2> ZAn;
5147    bits<2> imm;
5148    let Inst{8-7} = ZAn;
5149    let Inst{6-5} = imm;
5150  }
5151
5152  def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64,
5153                                    !if(v, TileVectorOpV64, TileVectorOpH64),
5154                                    sme_elm_idx0_1, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
5155    bits<3> ZAn;
5156    bits<1> imm;
5157    let Inst{8-6} = ZAn;
5158    let Inst{5}   = imm;
5159  }
5160
5161  def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128,
5162                                    !if(v, TileVectorOpV128, TileVectorOpH128),
5163                                    sme_elm_idx0_0, mnemonic>, SMEPseudo2Instr<NAME # _Q, 1> {
5164    bits<4> ZAn;
5165    let Inst{8-5} = ZAn;
5166  }
5167}
5168
5169multiclass sme2p1_movaz_tile_to_vec<string mnemonic, SDPatternOperator intrinsic_horiz, SDPatternOperator intrinsic_vert,
5170                                    SDPatternOperator intrinsic_horiz_q, SDPatternOperator intrinsic_vert_q>{
5171 defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>;
5172 defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>;
5173
5174 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0,  sme_elm_idx0_15, ZPR8,   SMEMatrixTileB>;
5175 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1,  sme_elm_idx0_7,  ZPR16,  SMEMatrixTileH>;
5176 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3,  sme_elm_idx0_3,  ZPR32,  SMEMatrixTileS>;
5177 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7,  sme_elm_idx0_1,  ZPR64,  SMEMatrixTileD>;
5178 def NAME # _H_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_Q, sme_elm_idx0_15, sme_elm_idx0_0,  ZPR128, SMEMatrixTileQ>;
5179
5180 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>;
5181 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>;
5182 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>;
5183 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>;
5184 def NAME # _V_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>;
5185
5186 def : SME2_Tile_Movaz_Pat<NAME # _H_B, intrinsic_horiz, nxv16i8,sme_elm_idx0_0,  sme_elm_idx0_15, tileslice8>;
5187 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
5188 def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
5189 def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
5190 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
5191 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
5192 def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
5193 def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
5194
5195 def : SME2_Tile_Movaz_Pat<NAME # _V_B, intrinsic_vert, nxv16i8, sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>;
5196 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
5197 def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
5198 def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
5199 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
5200 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
5201 def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
5202 def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
5203
5204 // H_Q
5205 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5206 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5207 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5208 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5209 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5210 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5211 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5212 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5213
5214 // _V_Q
5215 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5216 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5217 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5218 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5219 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5220 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5221 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5222 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
5223}
5224
5225//===----------------------------------------------------------------------===//
5226// SME2.1 multiple vectors zero array
5227
5228class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic,
5229                         string vg_acronym="">
5230    : I<(outs MatrixOp64:$ZAd),
5231        (ins MatrixOp64:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm),
5232        mnemonic, "\t$ZAd[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
5233        "", []>, Sched<[]> {
5234  bits <2> Rv;
5235  let Inst{31-18} = 0b11000000000011;
5236  let Inst{17-15} = opc{5-3};
5237  let Inst{14-13} = Rv;
5238  let Inst{12-3} = 0b0000000000;
5239  let Inst{2-0}  = opc{2-0};
5240  let Constraints = "$ZAd = $_ZAd";
5241}
5242
5243multiclass sme2p1_zero_matrix<string mnemonic> {
5244  def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_Z , 1> {
5245    bits<3> imm;
5246    let Inst{2-0} = imm;
5247  }
5248  def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _2Z, 1> {
5249    bits<3> imm;
5250    let Inst{2-0} = imm;
5251  }
5252  def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_2Z, 1> {
5253    bits<2> imm;
5254    let Inst{1-0} = imm;
5255  }
5256  def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_2Z, 1> {
5257    bits<2> imm;
5258    let Inst{1-0} = imm;
5259  }
5260  def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_Z, 1> {
5261    bits<3> imm;
5262    let Inst{2-0} = imm;
5263  }
5264  def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _4Z, 1> {
5265    bits<2> imm;
5266    let Inst{1-0} = imm;
5267  }
5268  def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_4Z, 1> {
5269    bits<1> imm;
5270    let Inst{0}   = imm;
5271  }
5272  def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_4Z, 1> {
5273    bits<1> imm;
5274    let Inst{0}   = imm;
5275  }
5276
5277  def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_Z, sme_elm_idx0_7, SMEMatrixArray>;
5278  def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_Z, sme_elm_idx0_7, SMEMatrixArray>;
5279  def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _2Z, uimm2s2range, SMEMatrixArray>;
5280  def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_2Z, uimm1s2range, SMEMatrixArray>;
5281  def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_2Z, uimm1s2range, SMEMatrixArray>;
5282  def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _4Z, uimm1s4range, SMEMatrixArray>;
5283  def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_4Z, uimm0s4range, SMEMatrixArray>;
5284  def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_4Z, uimm0s4range, SMEMatrixArray>;
5285
5286  def : SME2_Zero_Matrix_Pat<NAME # _VG2_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x2, sme_elm_idx0_7, tileslice16>;
5287  def : SME2_Zero_Matrix_Pat<NAME # _VG4_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x4, sme_elm_idx0_7, tileslice16>;
5288  def : SME2_Zero_Matrix_Pat<NAME # _2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x1, uimm2s2range, tileslicerange2s2>;
5289  def : SME2_Zero_Matrix_Pat<NAME # _VG2_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x2, uimm1s2range, tileslicerange1s2>;
5290  def : SME2_Zero_Matrix_Pat<NAME # _VG4_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x4, uimm1s2range, tileslicerange1s2>;
5291  def : SME2_Zero_Matrix_Pat<NAME # _4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x1, uimm1s4range, tileslicerange1s4>;
5292  def : SME2_Zero_Matrix_Pat<NAME # _VG2_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x2, uimm0s4range, tileslicerange0s4>;
5293  def : SME2_Zero_Matrix_Pat<NAME # _VG4_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x4, uimm0s4range, tileslicerange0s4>;
5294}
5295
5296//===----------------------------------------------------------------------===//
5297// SME2.1 lookup table expand two non-contiguous registers
5298
5299class sme2p1_luti_vector_vg2_index<bits<4> op, bits<2> sz, RegisterOperand vector_ty,
5300                                   AsmVectorIndexOpnd index_ty,
5301                                   string mnemonic>
5302    :  I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
5303          mnemonic, "\t$Zd, $ZTt, $Zn$i",
5304          "", []>, Sched<[]> {
5305  bits<5> Zn;
5306  bits<4> Zd;
5307  let Inst{31-19} = 0b1100000010011;
5308  let Inst{18-15} = op;
5309  let Inst{14}    = 0b1;
5310  let Inst{13-12} = sz;
5311  let Inst{11-10} = 0b00;
5312  let Inst{9-5}   = Zn;
5313  let Inst{4}     = Zd{3};
5314  let Inst{3}     = 0b0;
5315  let Inst{2-0}   = Zd{2-0};
5316}
5317
5318class sme2p1_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
5319                                    AsmVectorIndexOpnd index_ty,
5320                                    string mnemonic>
5321  : sme2p1_luti_vector_vg2_index<{1,?,?,?}, sz, vector_ty, index_ty, mnemonic> {
5322  bits<3> i;
5323  let Inst{17-15} = i;
5324}
5325
5326multiclass sme2p1_luti2_vector_vg2_index<string mnemonic> {
5327  def _B : sme2p1_luti2_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexH,
5328                                         mnemonic>;
5329  def _H : sme2p1_luti2_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexH,
5330                                         mnemonic>;
5331}
5332
5333class sme2p1_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
5334                                    AsmVectorIndexOpnd index_ty,
5335                                    string mnemonic>
5336  : sme2p1_luti_vector_vg2_index<{0b01,?,?}, sz, vector_ty, index_ty, mnemonic> {
5337  bits<2> i;
5338  let Inst{16-15} = i;
5339}
5340multiclass sme2p1_luti4_vector_vg2_index<string mnemonic> {
5341  def _B : sme2p1_luti4_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexS,
5342                                         mnemonic>;
5343  def _H : sme2p1_luti4_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexS,
5344                                         mnemonic>;
5345}
5346
5347// SME2.1 lookup table expand four non-contiguous registers
5348class sme2p1_luti_vector_vg4_index<bits<3> op, bits<2> sz, RegisterOperand vector_ty,
5349                                   AsmVectorIndexOpnd index_ty,
5350                                   string mnemonic>
5351    :  I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
5352          mnemonic, "\t$Zd, $ZTt, $Zn$i",
5353          "", []>, Sched<[]> {
5354  bits<5> Zn;
5355  bits<3> Zd;
5356  let Inst{31-19} = 0b1100000010011;
5357  let Inst{18-16} = op;
5358  let Inst{15-14} = 0b10;
5359  let Inst{13-12} = sz;
5360  let Inst{11-10} = 0b00;
5361  let Inst{9-5}   = Zn;
5362  let Inst{4}     = Zd{2};
5363  let Inst{3-2}   = 0b00;
5364  let Inst{1-0}   = Zd{1-0};
5365}
5366
5367class sme2p1_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
5368                                    AsmVectorIndexOpnd index_ty,
5369                                    string mnemonic>
5370  : sme2p1_luti_vector_vg4_index<{1,?,?}, sz, vector_ty, index_ty, mnemonic> {
5371  bits<2> i;
5372  let Inst{17-16} = i;
5373}
5374
5375multiclass sme2p1_luti2_vector_vg4_index<string mnemonic> {
5376  def _B : sme2p1_luti2_vector_vg4_index<0b00, ZZZZ_b_strided, VectorIndexS,
5377                                         mnemonic>;
5378  def _H : sme2p1_luti2_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexS,
5379                                         mnemonic>;
5380}
5381
5382class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
5383                                    AsmVectorIndexOpnd index_ty,
5384                                    string mnemonic>
5385  : sme2p1_luti_vector_vg4_index<{0b01,?}, sz, vector_ty, index_ty, mnemonic> {
5386  bit i;
5387  let Inst{16}    = i;
5388}
5389
5390multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> {
5391  def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>;
5392}
5393
5394// SME2 lookup table two source registers expand to four contiguous destination registers
5395class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic>
5396  : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
5397       mnemonic, "\t$Zd, $ZTt, $Zn",
5398       "", []>, Sched<[]> {
5399  bits<4> Zn;
5400  bits<3> Zd;
5401  let Inst{31-14} = 0b110000001000101100;
5402  let Inst{13-12} = sz;
5403  let Inst{11-10} = op;
5404  let Inst{9-6}   = Zn;
5405  let Inst{5}     = 0b0;
5406  let Inst{4-2}   = Zd;
5407  let Inst{1-0}   = 0b00;
5408}
5409
5410// SME2 lookup table two source registers expand to four non-contiguous destination registers
5411class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
5412   : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
5413        mnemonic, "\t$Zd, $ZTt, $Zn",
5414        "", []>, Sched<[]> {
5415  bits<4> Zn;
5416  bits<3> Zd;
5417  let Inst{31-14} = 0b110000001001101100;
5418  let Inst{13-12} = sz;
5419  let Inst{11-10} = op;
5420  let Inst{9-6}   = Zn;
5421  let Inst{5}     = 0b0;
5422  let Inst{4}     = Zd{2};
5423  let Inst{3-2}   = 0b00;
5424  let Inst{1-0}   = Zd{1-0};
5425}
5426
5427multiclass sme2_bfscale_single<string mnemonic> {
5428  def _2ZZ : sme2_sve_destructive_vector_vg2_single<0b00, 0b0011000, ZZ_h_mul_r, ZPR4b16, mnemonic>;
5429  def _4ZZ : sme2_sve_destructive_vector_vg4_single<0b00, 0b0011000, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
5430}
5431
5432multiclass sme2_bfscale_multi<string mnemonic> {
5433  def _2Z2Z : sme2_sve_destructive_vector_vg2_multi<0b00, 0b0011000, ZZ_h_mul_r, mnemonic>;
5434  def _4Z4Z : sme2_sve_destructive_vector_vg4_multi<0b00, 0b0011000, ZZZZ_h_mul_r, mnemonic>;
5435}
5436
5437class sme2_bf16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5438    : I<(outs TileOp32:$ZAda),
5439        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5440        mnemonic, "\t$ZAda, $Zn, $Zm",
5441        "", []>, Sched<[]> {
5442  bits<2> ZAda;
5443  bits<3> Zn;
5444  bits<3> Zm;
5445
5446  let Inst{31-21} = 0b10000001000;
5447  let Inst{20} = M;
5448  let Inst{19-17} = Zm;
5449  let Inst{16-10} = 0b0000000;
5450  let Inst{9} = N;
5451  let Inst{8-6} = Zn;
5452  let Inst{5} = 0;
5453  let Inst{4} = S;
5454  let Inst{3-2} = 0b00;
5455  let Inst{1-0} = ZAda;
5456
5457  let Constraints = "$ZAda = $_ZAda";
5458}
5459
5460multiclass sme2_bfmop4as_widening<bit S, string mnemonic> {
5461  // Single vectors
5462  def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
5463
5464  // Multiple and single vectors
5465  def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
5466
5467  // Single and multiple vectors
5468  def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
5469
5470  // Multiple vectors
5471  def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
5472}
5473
5474class sme2_multi2_fmul_sm<bits<2> size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty>
5475    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm),
5476        mnemonic, "\t$Zd, $Zn, $Zm",
5477        "", []>, Sched<[]> {
5478  bits<4> Zd;
5479  bits<4> Zn;
5480  bits<4> Zm;
5481
5482  let Inst{31-24} = 0b11000001;
5483  let Inst{23-22} = size;
5484  let Inst{21}    = 0b1;
5485  let Inst{20-17} = Zm;
5486  let Inst{16-10} = 0b0111010;
5487  let Inst{9-6}   = Zn;
5488  let Inst{5}     = 0b0;
5489  let Inst{4-1}   = Zd;
5490  let Inst{0}     = 0b0;
5491}
5492
5493multiclass sme2_multi2_fmul_sm<string mnemonic> {
5494  def _H : sme2_multi2_fmul_sm<0b01, mnemonic, ZZ_h_mul_r, ZPR4b16>;
5495  def _S : sme2_multi2_fmul_sm<0b10, mnemonic, ZZ_s_mul_r, ZPR4b32>;
5496  def _D : sme2_multi2_fmul_sm<0b11, mnemonic, ZZ_d_mul_r, ZPR4b64>;
5497}
5498
5499class sme2_multi4_fmul_sm<bits<2> size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty>
5500    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm),
5501        mnemonic, "\t$Zd, $Zn, $Zm",
5502        "", []>, Sched<[]> {
5503  bits<3> Zd;
5504  bits<3> Zn;
5505  bits<4> Zm;
5506
5507  let Inst{31-24} = 0b11000001;
5508  let Inst{23-22} = size;
5509  let Inst{21}    = 0b1;
5510  let Inst{20-17} = Zm;
5511  let Inst{16-10} = 0b1111010;
5512  let Inst{9-7}   = Zn;
5513  let Inst{6-5}   = 0b00;
5514  let Inst{4-2}   = Zd;
5515  let Inst{1-0}   = 0b00;
5516}
5517
5518multiclass sme2_multi4_fmul_sm<string mnemonic> {
5519  def _H : sme2_multi4_fmul_sm<0b01, mnemonic, ZZZZ_h_mul_r, ZPR4b16>;
5520  def _S : sme2_multi4_fmul_sm<0b10, mnemonic, ZZZZ_s_mul_r, ZPR4b32>;
5521  def _D : sme2_multi4_fmul_sm<0b11, mnemonic, ZZZZ_d_mul_r, ZPR4b64>;
5522}
5523
5524multiclass sme2_bfmul_single<string mnemonic> {
5525  def _2ZZ  : sme2_multi2_fmul_sm<0b00, mnemonic, ZZ_h_mul_r,   ZPR4b16>;
5526  def _4ZZ  : sme2_multi4_fmul_sm<0b00, mnemonic, ZZZZ_h_mul_r, ZPR4b16>;
5527}
5528
5529class sme2_multi2_fmul_mm<bits<2> size, string mnemonic, RegisterOperand vector_ty>
5530    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
5531        mnemonic, "\t$Zd, $Zn, $Zm",
5532        "", []>, Sched<[]> {
5533  bits<4> Zd;
5534  bits<4> Zn;
5535  bits<4> Zm;
5536
5537  let Inst{31-24} = 0b11000001;
5538  let Inst{23-22} = size;
5539  let Inst{21}    = 0b1;
5540  let Inst{20-17} = Zm;
5541  let Inst{16-10} = 0b0111001;
5542  let Inst{9-6}   = Zn;
5543  let Inst{5}     = 0b0;
5544  let Inst{4-1}   = Zd;
5545  let Inst{0}     = 0b0;
5546}
5547
5548multiclass sme2_multi2_fmul_mm<string mnemonic> {
5549  def _H : sme2_multi2_fmul_mm<0b01, mnemonic, ZZ_h_mul_r>;
5550  def _S : sme2_multi2_fmul_mm<0b10, mnemonic, ZZ_s_mul_r>;
5551  def _D : sme2_multi2_fmul_mm<0b11, mnemonic, ZZ_d_mul_r>;
5552}
5553
5554class sme2_multi4_fmul_mm<bits<2> size, string mnemonic, RegisterOperand vector_ty>
5555    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
5556        mnemonic, "\t$Zd, $Zn, $Zm",
5557        "", []>, Sched<[]> {
5558  bits<3> Zd;
5559  bits<3> Zn;
5560  bits<3> Zm;
5561
5562  let Inst{31-24} = 0b11000001;
5563  let Inst{23-22} = size;
5564  let Inst{21}    = 0b1;
5565  let Inst{20-18} = Zm;
5566  let Inst{17-10} = 0b01111001;
5567  let Inst{9-7}   = Zn;
5568  let Inst{6-5}   = 0b00;
5569  let Inst{4-2}   = Zd;
5570  let Inst{1-0}   = 0b00;
5571}
5572
5573multiclass sme2_multi4_fmul_mm<string mnemonic> {
5574  def _H : sme2_multi4_fmul_mm<0b01, mnemonic, ZZZZ_h_mul_r>;
5575  def _S : sme2_multi4_fmul_mm<0b10, mnemonic, ZZZZ_s_mul_r>;
5576  def _D : sme2_multi4_fmul_mm<0b11, mnemonic, ZZZZ_d_mul_r>;
5577}
5578
5579multiclass sme2_bfmul_multi<string mnemonic> {
5580  def _2Z2Z : sme2_multi2_fmul_mm<0b00, mnemonic, ZZ_h_mul_r>;
5581  def _4Z4Z : sme2_multi4_fmul_mm<0b00, mnemonic, ZZZZ_h_mul_r>;
5582}
5583
5584class sme2_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5585    : I<(outs TileOp16:$ZAda),
5586        (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5587        mnemonic, "\t$ZAda, $Zn, $Zm",
5588        "", []>, Sched<[]> {
5589  bit ZAda;
5590  bits<3> Zn;
5591  bits<3> Zm;
5592
5593  let Inst{31-21} = 0b10000001000;
5594  let Inst{20} = M;
5595  let Inst{19-17} = Zm;
5596  let Inst{16-10} = 0b0000000;
5597  let Inst{9} = N;
5598  let Inst{8-6} = Zn;
5599  let Inst{5} = 0;
5600  let Inst{4} = S;
5601  let Inst{3-1} = 0b100;
5602  let Inst{0} = ZAda;
5603
5604  let Constraints = "$ZAda = $_ZAda";
5605}
5606
5607multiclass sme2_fmop4as_fp16_non_widening<bit S, string mnemonic> {
5608  // Single vectors
5609  def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
5610
5611  // Multiple and single vectors
5612  def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
5613
5614  // Single and multiple vectors
5615  def _MZ2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
5616
5617  // Multiple vectors
5618  def _M2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
5619}
5620
5621class sme2_fp8_fp32_quarter_tile_outer_product<bit M, bit N, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5622    : I<(outs TileOp32:$ZAda),
5623        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5624        mnemonic, "\t$ZAda, $Zn, $Zm",
5625        "", []>, Sched<[]> {
5626  bits<2> ZAda;
5627  bits<3> Zn;
5628  bits<3> Zm;
5629
5630  let Inst{31-21} = 0b10000000001;
5631  let Inst{20} = M;
5632  let Inst{19-17} = Zm;
5633  let Inst{16-10} = 0b0000000;
5634  let Inst{9} = N;
5635  let Inst{8-6} = Zn;
5636  let Inst{5-2} = 0b0000;
5637  let Inst{1-0} = ZAda;
5638
5639  let Constraints = "$ZAda = $_ZAda";
5640}
5641
5642multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic> {
5643  // Single vectors
5644  def _MZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>;
5645
5646  // Multiple and single vectors
5647  def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>;
5648
5649  // Single and multiple vectors
5650  def _MZ2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>;
5651
5652  // Multiple vectors
5653  def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>;
5654}
5655
5656class sme2_bf16_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5657    : I<(outs TileOp16:$ZAda),
5658        (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5659        mnemonic, "\t$ZAda, $Zn, $Zm",
5660        "", []>, Sched<[]> {
5661  bit ZAda;
5662  bits<3> Zn;
5663  bits<3> Zm;
5664
5665  let Inst{31-21} = 0b10000001001;
5666  let Inst{20} = M;
5667  let Inst{19-17} = Zm;
5668  let Inst{16-10} = 0b0000000;
5669  let Inst{9} = N;
5670  let Inst{8-6} = Zn;
5671  let Inst{5} = 0;
5672  let Inst{4} = S;
5673  let Inst{3-1} = 0b100;
5674  let Inst{0} = ZAda;
5675
5676  let Constraints = "$ZAda = $_ZAda";
5677}
5678
5679multiclass sme2_bfmop4as_non_widening<bit S, string mnemonic> {
5680  // Single vectors
5681  def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
5682
5683  // Multiple and single vectors
5684  def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
5685
5686  // Single and multiple vectors
5687  def _MZ2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
5688
5689  // Multiple vectors
5690  def _M2Z2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
5691}
5692
5693class sme2_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5694    : I<(outs TileOp32:$ZAda),
5695        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5696        mnemonic, "\t$ZAda, $Zn, $Zm",
5697        "", []>, Sched<[]> {
5698  bits<2> ZAda;
5699  bits<3> Zn;
5700  bits<3> Zm;
5701
5702  let Inst{31-21} = 0b10000000000;
5703  let Inst{20} = M;
5704  let Inst{19-17} = Zm;
5705  let Inst{16-10} = 0b0000000;
5706  let Inst{9} = N;
5707  let Inst{8-6} = Zn;
5708  let Inst{5} = 0;
5709  let Inst{4} = S;
5710  let Inst{3-2} = 0b00;
5711  let Inst{1-0} = ZAda;
5712
5713  let Constraints = "$ZAda = $_ZAda";
5714}
5715
5716multiclass sme2_fmop4as_fp32_non_widening<bit S, string mnemonic> {
5717  // Single vectors
5718  def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>;
5719
5720  // Multiple and single vectors
5721  def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>;
5722
5723  // Single and multiple vectors
5724  def _MZ2Z_S : sme2_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR32Mul2_Lo, ZZ_s_mul_r_Hi>;
5725
5726  // Multiple vectors
5727  def _M2Z2Z_S : sme2_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZZ_s_mul_r_Hi>;
5728}
5729
5730class sme2_fp64_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5731    : I<(outs TileOp64:$ZAda),
5732        (ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5733        mnemonic, "\t$ZAda, $Zn, $Zm",
5734        "", []>, Sched<[]> {
5735  bits<3> ZAda;
5736  bits<3> Zn;
5737  bits<3> Zm;
5738
5739  let Inst{31-21} = 0b10000000110;
5740  let Inst{20} = M;
5741  let Inst{19-17} = Zm;
5742  let Inst{16-10} = 0b0000000;
5743  let Inst{9} = N;
5744  let Inst{8-6} = Zn;
5745  let Inst{5} = 0;
5746  let Inst{4} = S;
5747  let Inst{3} = 0b1;
5748  let Inst{2-0} = ZAda;
5749
5750  let Constraints = "$ZAda = $_ZAda";
5751}
5752
5753multiclass sme2_fmop4as_fp64_non_widening<bit S, string mnemonic> {
5754  // Single vectors
5755  def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>;
5756
5757  // Multiple and single vectors
5758  def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>;
5759
5760  // Single and multiple vectors
5761  def _MZ2Z_D : sme2_fp64_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR64Mul2_Lo, ZZ_d_mul_r_Hi>;
5762
5763  // Multiple vectors
5764  def _M2Z2Z_D : sme2_fp64_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZZ_d_mul_r_Hi>;
5765}
5766
5767class sme2_fp16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5768    : I<(outs TileOp32:$ZAda),
5769        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5770        mnemonic, "\t$ZAda, $Zn, $Zm",
5771        "", []>, Sched<[]> {
5772  bits<2> ZAda;
5773  bits<3> Zn;
5774  bits<3> Zm;
5775
5776  let Inst{31-21} = 0b10000001001;
5777  let Inst{20} = M;
5778  let Inst{19-17} = Zm;
5779  let Inst{16-10} = 0b0000000;
5780  let Inst{9} = N;
5781  let Inst{8-6} = Zn;
5782  let Inst{5} = 0;
5783  let Inst{4} = S;
5784  let Inst{3-2} = 0b00;
5785  let Inst{1-0} = ZAda;
5786
5787  let Constraints = "$ZAda = $_ZAda";
5788}
5789
5790multiclass sme2_fmop4as_fp16_fp32_widening<bit S, string mnemonic> {
5791  // Single vectors
5792  def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
5793
5794  // Multiple and single vectors
5795  def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
5796
5797  // Single and multiple vectors
5798  def _MZ2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
5799
5800  // Multiple vectors
5801  def _M2Z2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
5802}
5803
5804class sme2_fp8_fp16_quarter_tile_outer_product<bit M, bit N, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
5805    : I<(outs TileOp16:$ZAda),
5806        (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
5807        mnemonic, "\t$ZAda, $Zn, $Zm",
5808        "", []>, Sched<[]> {
5809  bit     ZAda;
5810  bits<3> Zn;
5811  bits<3> Zm;
5812
5813  let Inst{31-21} = 0b10000000001;
5814  let Inst{20} = M;
5815  let Inst{19-17} = Zm;
5816  let Inst{16-10} = 0b0000000;
5817  let Inst{9} = N;
5818  let Inst{8-6} = Zn;
5819  let Inst{5-1} = 0b00100;
5820  let Inst{0} = ZAda;
5821
5822  let Constraints = "$ZAda = $_ZAda";
5823}
5824
5825multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> {
5826  // Single vectors
5827  def _MZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>;
5828
5829  // Multiple and single vectors
5830  def _M2ZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>;
5831
5832  // Single and multiple vectors
5833  def _MZ2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>;
5834
5835  // Multiple vectors
5836  def _M2Z2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>;
5837}
5838
5839// FP8 SME FDOT instructions
5840
5841multiclass sme2_fp8_fdot_index_za16_vg1x2<string mnemonic, bits<3> op,
5842                                          SDPatternOperator intrinsic> {
5843  def NAME : sme2_multi_vec_array_vg2_index<0b11, {op{2},?,?,op{1-0},?}, MatrixOp16,
5844                                            ZZ_b_mul_r, ZPR4b8,
5845                                            VectorIndexH32b_timm, mnemonic>,
5846                                            SMEPseudo2Instr<NAME, 1>{
5847    let Uses=[FPMR, FPCR];
5848
5849    bits<3> i;
5850    let Inst{11-10} = i{2-1};
5851    let Inst{3}     = i{0};
5852  }
5853
5854  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
5855                 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
5856                                           ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexH32b_timm:$i), 0>;
5857
5858
5859  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, ZPR4b8, VectorIndexH32b_timm, SMEMatrixArray>;
5860
5861  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexH32b_timm, tileslice16>;
5862}
5863
5864multiclass sme2_fp8_fdot_index_za16_vg1x4<string mnemonic,
5865                                          SDPatternOperator intrinsic> {
5866  def NAME : sme2_multi_vec_array_vg4_index<0b0, {0b1,?,?,0b100,?}, MatrixOp16,
5867                                            ZZZZ_b_mul_r, ZPR4b8,
5868                                            VectorIndexH32b_timm, mnemonic>,
5869                                            SMEPseudo2Instr<NAME, 1> {
5870    let Uses=[FPMR, FPCR];
5871
5872    bits<3> i;
5873    let Inst{11-10} = i{2-1};
5874    let Inst{3}     = i{0};
5875  }
5876
5877  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
5878                 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
5879                                           sme_elm_idx0_7:$imm3, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexH32b_timm:$i), 0>;
5880
5881
5882  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, ZPR4b8, VectorIndexH32b_timm, SMEMatrixArray>;
5883
5884  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexH32b_timm, tileslice16>;
5885}
5886
5887multiclass sme2_fp8_fdot_index_za32_vg1x2<string mnemonic,
5888                                          SDPatternOperator intrinsic> {
5889  def NAME : sme2_multi_vec_array_vg2_index<0b01, {0b0,?,?,0b111}, MatrixOp32, ZZ_b_mul_r, ZPR4b8,
5890                                            VectorIndexS32b_timm, mnemonic>,
5891                                            SMEPseudo2Instr<NAME, 1> {
5892    let Uses=[FPMR, FPCR];
5893
5894    bits<2> i;
5895    let Inst{11-10} = i;
5896  }
5897
5898  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
5899                 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
5900                                           ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i), 0>;
5901
5902  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, ZPR4b8, VectorIndexS32b_timm, SMEMatrixArray>;
5903
5904  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexS32b_timm, tileslice16>;
5905}
5906
5907multiclass sme2_fp8_fdot_index_za32_vg1x4<string mnemonic,
5908                                          SDPatternOperator intrinsic> {
5909  def NAME : sme2_multi_vec_array_vg4_index<0b1, {0b0,?,?,0b0,0b001}, MatrixOp32, ZZZZ_b_mul_r,
5910                                            ZPR4b8, VectorIndexS32b_timm, mnemonic>,
5911                                            SMEPseudo2Instr<NAME, 1> {
5912    let Uses=[FPMR, FPCR];
5913
5914    bits<2> i;
5915    let Inst{11-10} = i;
5916  }
5917
5918  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
5919                 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
5920                                           ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i), 0>;
5921
5922  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, ZPR4b8, VectorIndexS32b_timm, SMEMatrixArray>;
5923
5924  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexS32b_timm, tileslice16>;
5925}
5926
5927multiclass sme2_fp8_fdotv_index_za32_vg1x4<string mnemonic, bit T, SDPatternOperator intrinsic> {
5928  def NAME : sme2_fp8_multi_vec_array_vg4_index<mnemonic, T>,
5929                                            SMEPseudo2Instr<NAME, 1>;
5930
5931  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, ZPR4b8, VectorIndexS32b_timm, SMEMatrixArray>;
5932
5933  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexS32b_timm, tileslice16>;
5934}
5935
5936multiclass sme2_fp8_fdot_single_vg1x2<string mnemonic, bits<7> op,
5937                                      MatrixOperand matrix_op,
5938                                      SDPatternOperator intrinsic> {
5939  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_op, ZZ_b, ZPR4b8, mnemonic>,
5940                                            SMEPseudo2Instr<NAME, 1> {
5941    let Uses=[FPMR, FPCR];
5942  }
5943
5944  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
5945                 (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b:$Zn, ZPR4b8:$Zm), 0>;
5946
5947  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, ZZ_b, ZPR4b8, SMEMatrixArray>;
5948
5949  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, tileslice16>;
5950}
5951
5952multiclass sme2_fp8_fdot_single_vg1x4<string mnemonic, bits<7> op,
5953                                      MatrixOperand matrix_op,
5954                                      SDPatternOperator intrinsic> {
5955  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_op, ZZZZ_b, ZPR4b8, mnemonic>,
5956                                            SMEPseudo2Instr<NAME, 1> {
5957    let Uses=[FPMR, FPCR];
5958  }
5959
5960  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
5961                 (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b:$Zn, ZPR4b8:$Zm), 0>;
5962
5963  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b, ZPR4b8, SMEMatrixArray>;
5964
5965  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, tileslice16>;
5966}
5967
5968multiclass sme2_fp8_fdot_multi_vg1x2<string mnemonic, bits<7> op,
5969                                     MatrixOperand matrix_op,
5970                                     SDPatternOperator intrinsic> {
5971  def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_op, ZZ_b_mul_r, mnemonic>,
5972                                            SMEPseudo2Instr<NAME, 1> {
5973    let Uses=[FPMR, FPCR];
5974  }
5975
5976  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
5977                  (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b_mul_r:$Zn, ZZ_b_mul_r:$Zm), 0>;
5978
5979  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, SMEMatrixArray>;
5980
5981  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, nxv16i8, tileslice16>;
5982}
5983
5984multiclass sme2_fp8_fdot_multi_vg1x4<string mnemonic, bits<7> op,
5985                                     MatrixOperand matrix_op,
5986                                          SDPatternOperator intrinsic> {
5987  def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_op, ZZZZ_b_mul_r, mnemonic>,
5988                                            SMEPseudo2Instr<NAME, 1> {
5989    let Uses=[FPMR, FPCR];
5990  }
5991
5992  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
5993                 (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b_mul_r:$Zn, ZZZZ_b_mul_r:$Zm), 0>;
5994
5995  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, SMEMatrixArray>;
5996
5997  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, nxv16i8, tileslice16>;
5998}
5999