1//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions. 10// 11//===----------------------------------------------------------------------===// 12 13def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAB0, 0>", []>; 14def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAH0, 1>", []>; 15def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAS0, 3>", []>; 16def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAD0, 7>", []>; 17def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAQ0, 15>", []>; 18def imm_to_zt : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZT0, 0>", []>; 19 20def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>; 21def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>; 22def tileslice32 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3, 1>", []>; 23def tileslice64 : ComplexPattern<i32 , 2, "SelectSMETileSlice<1, 1>", []>; 24def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0, 1>", []>; // nop 25 26def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>; 27def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6, 2>", []>; 28def tileslicerange1s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<2, 2>", []>; 29def tileslicerange0s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 2>", []>; 30 31def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>; 32def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4, 4>", []>; 33def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>; 34 35let WantsRoot = true in 36def am_sme_indexed_b4 : ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0, 15>">; 37 38// The FORM_TRANSPOSED_REG_TUPLE pseudos defined below are intended to 39// improve register allocation for intrinsics which use strided and contiguous 40// multi-vector registers, avoiding unnecessary copies. 41// If the operands of the pseudo are copies where the source register is in 42// the StridedOrContiguous class, the pseudo is used to provide a hint to the 43// register allocator suggesting a contigious multi-vector register which 44// matches the subregister sequence used by the operands. 45// If the operands do not match this pattern, the pseudos are expanded 46// to a REG_SEQUENCE using the post-isel hook. 47 48def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO : 49 Pseudo<(outs ZPR2:$tup), 50 (ins ZPR:$zn0, ZPR:$zn1), []>, Sched<[]>{ 51 let hasSideEffects = 0; 52 let hasPostISelHook = 1; 53} 54 55def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO : 56 Pseudo<(outs ZPR4:$tup), 57 (ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>, Sched<[]>{ 58 let hasSideEffects = 0; 59 let hasPostISelHook = 1; 60} 61 62def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; 63def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, 64 [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; 65def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore, 66 [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; 67 68//===----------------------------------------------------------------------===// 69// SME Pseudo Classes 70//===----------------------------------------------------------------------===// 71 72def getSMEPseudoMap : InstrMapping { 73 let FilterClass = "SMEPseudo2Instr"; 74 let RowFields = ["PseudoName"]; 75 let ColFields = ["IsInstr"]; 76 let KeyCol = ["0"]; 77 let ValueCols = [["1"]]; 78} 79 80class SMEPseudo2Instr<string name, bit instr> { 81 string PseudoName = name; 82 bit IsInstr = instr; 83} 84 85class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag> 86 : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, 87 zpr_ty:$zn, zpr_ty:$zm), []>, 88 Sched<[]> { 89 // Translated to the actual instructions in AArch64ISelLowering.cpp 90 let SMEMatrixType = za_flag; 91 let usesCustomInserter = 1; 92} 93 94class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty, 95 ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag> 96 : SMEPseudo2Instr<name, 0>, 97 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> { 98 let SMEMatrixType = za_flag; 99 let usesCustomInserter = 1; 100} 101 102class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty, 103 SMEMatrixTypeEnum za_flag> 104 : SMEPseudo2Instr<name, 0>, 105 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> { 106 let SMEMatrixType = za_flag; 107 let usesCustomInserter = 1; 108} 109 110class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty, 111 ZPRRegOp zpr_ty, Operand imm_ty, SMEMatrixTypeEnum za_flag> 112 : SMEPseudo2Instr<name, 0>, 113 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> { 114 let SMEMatrixType = za_flag; 115 let usesCustomInserter = 1; 116} 117 118class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag> 119 : SMEPseudo2Instr<name, 0>, 120 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { 121 let SMEMatrixType = za_flag; 122 let usesCustomInserter = 1; 123} 124 125class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag> 126 : SMEPseudo2Instr<name, 0>, 127 Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { 128 let SMEMatrixType = za_flag; 129 let usesCustomInserter = 1; 130} 131 132class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum za_flag> 133 : SMEPseudo2Instr<name, 0>, 134 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> { 135 let SMEMatrixType = za_flag; 136 let usesCustomInserter = 1; 137} 138 139class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag> 140 : SMEPseudo2Instr<name, 0>, 141 Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> { 142 let SMEMatrixType = za_flag; 143 let usesCustomInserter = 1; 144} 145 146class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty, 147 SMEMatrixTypeEnum za_flag> 148 : SMEPseudo2Instr<name, 0>, 149 Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> { 150 let SMEMatrixType = za_flag; 151 let usesCustomInserter = 1; 152} 153 154//===----------------------------------------------------------------------===// 155// SME pattern match helpers. 156//===----------------------------------------------------------------------===// 157 158class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, 159 ValueType vt, ComplexPattern tileslice> 160 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm), 161 (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>; 162 163 164class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, 165 ValueType vt, ComplexPattern tileslice> 166 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm), 167 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2), 168 zpr_ty:$Zm)>; 169class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, 170 ValueType vt, ComplexPattern tileslice> 171 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), 172 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm), 173 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 174 (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), 175 zpr_ty:$Zm)>; 176 177class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice> 178 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2), 179 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 180 (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), 181 (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; 182 183class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice> 184 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), 185 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4), 186 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 187 (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), 188 (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>; 189 190class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt, 191 Operand imm_ty, ComplexPattern tileslice> 192 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)), 193 (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>; 194 195 196class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt, 197 Operand imm_ty, ComplexPattern tileslice> 198 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)), 199 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 200 (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>; 201 202class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt, 203 Operand imm_ty, ComplexPattern tileslice> 204 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), 205 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)), 206 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 207 (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), 208 zpr_ty:$Zm, imm_ty:$i)>; 209 210class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty> 211 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), 212 (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; 213 214class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty> 215 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))), 216 (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3), 217 imm_ty:$i)>; 218 219class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt> 220 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)), 221 (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>; 222 223class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice> 224 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), 225 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; 226 227class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice> 228 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), 229 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; 230 231class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice> 232 : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), 233 (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; 234 235class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice> 236 : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), 237 (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; 238 239class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice> 240 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))), 241 (!cast<Instruction>(name) $base, $offset)>; 242 243class SME2_Tile_Movaz_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, Operand tile_imm, Operand index_ty, ComplexPattern tileslice> 244 : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))), 245 (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset)>; 246 247 248//===----------------------------------------------------------------------===// 249// SME pattern match helpers. 250//===----------------------------------------------------------------------===// 251 252class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType pg_ty, ValueType vt> 253 : Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm), 254 (!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>; 255 256 257//===----------------------------------------------------------------------===// 258// SME smstart/smstop 259//===----------------------------------------------------------------------===// 260 261// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or 262// both fields: 263// 264// MSR SVCRSM, #<imm1> 265// MSR SVCRZA, #<imm1> 266// MSR SVCRSMZA, #<imm1> 267// 268// It's tricky to using the existing pstate operand defined in 269// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2, 270// when these fields are also encoded in CRm[3:1]. 271def MSRpstatesvcrImm1 272 : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr", 273 "\t$pstatefield, $imm">, 274 Sched<[WriteSys]> { 275 bits<3> pstatefield; 276 bit imm; 277 let Inst{18-16} = 0b011; // op1 278 let Inst{11-9} = pstatefield; 279 let Inst{8} = imm; 280 let Inst{7-5} = 0b011; // op2 281 let hasPostISelHook = 1; 282} 283 284def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; 285def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>; 286def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>; 287 288def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>; 289def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>; 290def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; 291 292 293//===----------------------------------------------------------------------===// 294// SME Outer Products 295//===----------------------------------------------------------------------===// 296 297class sme_fp_outer_product_inst<bit S, bits<2> sz, bits<2> op, MatrixTileOperand za_ty, 298 ZPRRegOp zpr_ty, string mnemonic> 299 : I<(outs za_ty:$ZAda), 300 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 301 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 302 "", []>, 303 Sched<[]> { 304 bits<5> Zm; 305 bits<3> Pm; 306 bits<3> Pn; 307 bits<5> Zn; 308 let Inst{31-25} = 0b1000000; 309 let Inst{24} = op{1}; 310 let Inst{23} = 0b1; 311 let Inst{22-21} = sz; 312 let Inst{20-16} = Zm; 313 let Inst{15-13} = Pm; 314 let Inst{12-10} = Pn; 315 let Inst{9-5} = Zn; 316 let Inst{4} = S; 317 let Inst{3} = op{0}; 318 319 let Constraints = "$ZAda = $_ZAda"; 320} 321 322multiclass sme_outer_product_fp32<bit S, bits<2> sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> { 323 def NAME : sme_fp_outer_product_inst<S, sz, 0b00, TileOp32, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> { 324 bits<2> ZAda; 325 let Inst{1-0} = ZAda; 326 let Inst{2} = 0b0; 327 } 328 329 def NAME # _PSEUDO : sme_outer_product_pseudo<zpr_ty, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 330 331 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>; 332} 333 334multiclass sme2_fp8_fmopa_za32<string mnemonic, SDPatternOperator intrinsic> { 335 def NAME : sme_fp_outer_product_inst<0, 0b01, 0b00, TileOp32, ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> { 336 bits<2> ZAda; 337 let Inst{1-0} = ZAda; 338 let Inst{2} = 0b0; 339 340 let Uses = [FPMR, FPCR]; 341 } 342 343 let mayStore = 1, mayLoad = 1 in 344 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 345 346 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv16i1, nxv16i8>; 347} 348 349multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> { 350 def NAME : sme_fp_outer_product_inst<S, 0b10, 0b00, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> { 351 bits<3> ZAda; 352 let Inst{2-0} = ZAda; 353 } 354 355 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>; 356 357 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>; 358} 359 360multiclass sme2_fp8_fmopa_za16<string mnemonic, SDPatternOperator intrinsic> { 361 def NAME : sme_fp_outer_product_inst<0, {0, 0b1}, 0b01, TileOp16, ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> { 362 bits<1> ZAda; 363 let Inst{2-1} = 0b00; 364 let Inst{0} = ZAda; 365 366 let Uses = [FPMR, FPCR]; 367 } 368 369 let mayStore = 1, mayLoad = 1 in 370 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileH>, SMEPseudo2Instr<NAME, 0>; 371 372 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_1, nxv16i1, nxv16i8>; 373} 374 375multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, ValueType vt, SDPatternOperator intrinsic = null_frag> { 376 def NAME : sme_fp_outer_product_inst<s, {0,bf}, 0b11, TileOp16, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { 377 bits<1> ZAda; 378 let Inst{2-1} = 0b00; 379 let Inst{0} = ZAda; 380 } 381 382 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileH>, SMEPseudo2Instr<NAME, 0>; 383 384 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_1, nxv8i1, vt>; 385} 386 387class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2, 388 MatrixTileOperand za_ty, ZPRRegOp zpr_ty, 389 string mnemonic> 390 : I<(outs za_ty:$ZAda), 391 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 392 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 393 "", []>, 394 Sched<[]> { 395 bits<5> Zm; 396 bits<3> Pm; 397 bits<3> Pn; 398 bits<5> Zn; 399 let Inst{31-25} = 0b1010000; 400 let Inst{24} = opc{2}; // u0 401 let Inst{23} = 0b1; 402 let Inst{22} = sz; 403 let Inst{21} = opc{1}; // u1 404 let Inst{20-16} = Zm; 405 let Inst{15-13} = Pm; 406 let Inst{12-10} = Pn; 407 let Inst{9-5} = Zn; 408 let Inst{4} = opc{0}; //S; 409 let Inst{3} = sme2; 410 411 let Constraints = "$ZAda = $_ZAda"; 412} 413 414multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic, 415 SDPatternOperator op> { 416 def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0, TileOp32, 417 ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> { 418 bits<2> ZAda; 419 let Inst{1-0} = ZAda; 420 let Inst{2} = 0b0; 421 } 422 423 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 424 425 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv16i1, nxv16i8>; 426} 427 428multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic, 429 SDPatternOperator op> { 430 def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64, 431 ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { 432 bits<3> ZAda; 433 let Inst{2-0} = ZAda; 434 } 435 436 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>; 437 438 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>; 439} 440 441class sme_int_sparse_outer_product_i32<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> 442 : I<(outs TileOp32:$ZAda), 443 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm), 444 mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm", 445 "", []>, 446 Sched<[]> { 447 bits<2> ZAda; 448 bits<4> Zn; 449 bits<5> Zm; 450 bits<3> Zk; 451 bits<2> imm; 452 let Inst{31-25} = 0b1000000; 453 let Inst{24} = opc{4}; 454 let Inst{23-22} = 0b01; 455 let Inst{21} = opc{3}; 456 let Inst{20-16} = Zm; 457 let Inst{15} = opc{2}; 458 let Inst{14} = 0b0; 459 let Inst{13} = opc{1}; 460 let Inst{12-10} = Zk; 461 let Inst{9-6} = Zn; 462 let Inst{5-4} = imm; 463 let Inst{3} = opc{0}; 464 let Inst{2} = 0b0; 465 let Inst{1-0} = ZAda; 466 467 let Constraints = "$ZAda = $_ZAda"; 468} 469 470class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic> 471 : I<(outs TileOp32:$ZAda), 472 (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 473 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 474 "", []>, 475 Sched<[]> { 476 bits<5> Zm; 477 bits<3> Pm; 478 bits<3> Pn; 479 bits<5> Zn; 480 bits<2> ZAda; 481 let Inst{31-25} = 0b1000000; 482 let Inst{24} = !if(opc{2}, 0, 1); 483 let Inst{23-22} = 0b10; 484 let Inst{21} = opc{1}; 485 let Inst{20-16} = Zm; 486 let Inst{15-13} = Pm; 487 let Inst{12-10} = Pn; 488 let Inst{9-5} = Zn; 489 let Inst{4} = opc{0}; 490 let Inst{3} = opc{2}; 491 let Inst{2} = 0b0; 492 let Inst{1-0} = ZAda; 493 494 let Constraints = "$ZAda = $_ZAda"; 495} 496 497multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> { 498 def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>; 499 500 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 501 502 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8bf16>; 503} 504 505multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> { 506 def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>; 507 508 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 509 510 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>; 511} 512 513class sme_quarter_outer_product_i64<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> 514 : I<(outs TileOp64:$ZAda), 515 (ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 516 mnemonic, "\t$ZAda, $Zn, $Zm", 517 "", []>, 518 Sched<[]> { 519 bits<3> ZAda; 520 bits<3> Zn; 521 bits<3> Zm; 522 let Inst{31-25} = 0b1010000; 523 let Inst{24} = zn_u_pair{1}; // u0 524 let Inst{23-22} = 0b11; 525 let Inst{21} = zm_u_pair{1}; // u1 526 let Inst{20} = zm_u_pair{0}; // M 527 let Inst{19-17} = Zm; 528 let Inst{16-10} = 0b0000000; 529 let Inst{9} = zn_u_pair{0}; // N 530 let Inst{8-6} = Zn; 531 let Inst{5} = 0; 532 let Inst{4} = subtr; 533 let Inst{3} = 0b1; 534 let Inst{2-0} = ZAda; 535 536 let Constraints = "$ZAda = $_ZAda"; 537} 538 539class sme_quarter_outer_product_i8_i32<bits<2> zn_u_pair, bits<2> zm_u_pair, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> 540 : I<(outs TileOp32:$ZAda), 541 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 542 mnemonic, "\t$ZAda, $Zn, $Zm", 543 "", []>, 544 Sched<[]> { 545 bits<2> ZAda; 546 bits<3> Zn; 547 bits<3> Zm; 548 let Inst{31-25} = 0b1000000; 549 let Inst{24} = zn_u_pair{1}; // u0 550 let Inst{23-22} = 0b00; 551 let Inst{21} = zm_u_pair{1}; // u1 552 let Inst{20} = zm_u_pair{0}; // M 553 let Inst{19-17} = Zm; 554 let Inst{16-10} = 0b0100000; 555 let Inst{9} = zn_u_pair{0}; // N 556 let Inst{8-6} = Zn; 557 let Inst{5} = 0; 558 let Inst{4} = subtr; 559 let Inst{3-2} = 0b00; 560 let Inst{1-0} = ZAda; 561 562 let Constraints = "$ZAda = $_ZAda"; 563} 564 565class sme_quarter_outer_product_i16_i32<bit u0, bit N, bit M, bit subtr, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> 566 : I<(outs TileOp32:$ZAda), 567 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 568 mnemonic, "\t$ZAda, $Zn, $Zm", 569 "", []>, 570 Sched<[]> { 571 bits<2> ZAda; 572 bits<3> Zn; 573 bits<3> Zm; 574 let Inst{31-25} = 0b1000000; 575 let Inst{24} = u0; 576 let Inst{23-21} = 0b000; 577 let Inst{20} = M; 578 let Inst{19-17} = Zm; 579 let Inst{16-10} = 0b0100000; 580 let Inst{9} = N; 581 let Inst{8-6} = Zn; 582 let Inst{5} = 0; 583 let Inst{4} = subtr; 584 let Inst{3-2} = 0b10; 585 let Inst{1-0} = ZAda; 586 587 let Constraints = "$ZAda = $_ZAda"; 588} 589 590multiclass sme_quarter_outer_product_i8_i32<bit zn_u, bit zm_u, bit subtr, string mnemonic>{ 591 def _MZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 0}, subtr, 592 ZPR8Mul2_Lo, ZPR8Mul2_Hi, mnemonic>; 593 def _M2ZZ_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 0}, subtr, 594 ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, mnemonic>; 595 def _MZ2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 0}, {zm_u, 1}, subtr, 596 ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, mnemonic>; 597 def _M2Z2Z_BToS : sme_quarter_outer_product_i8_i32<{zn_u, 1}, {zm_u, 1}, subtr, 598 ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi, mnemonic>; 599} 600 601multiclass sme_quarter_outer_product_i16_i32<bit unsigned, bit subtr, string mnemonic>{ 602 def _MZZ_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b0, subtr, 603 ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>; 604 def _M2ZZ_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b0, subtr, 605 ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>; 606 def _MZ2Z_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b0, 0b1, subtr, 607 ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>; 608 def _M2Z2Z_HToS : sme_quarter_outer_product_i16_i32<unsigned, 0b1, 0b1, subtr, 609 ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>; 610} 611 612multiclass sme_quarter_outer_product_i64<bit zn_u, bit zm_u, bit subtr, string mnemonic>{ 613 def _MZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 0}, subtr, 614 ZPR16Mul2_Lo, ZPR16Mul2_Hi, mnemonic>; 615 def _M2ZZ_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 0}, subtr, 616 ZZ_h_mul_r_Lo, ZPR16Mul2_Hi, mnemonic>; 617 def _MZ2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 0}, {zm_u, 1}, subtr, 618 ZPR16Mul2_Lo, ZZ_h_mul_r_Hi, mnemonic>; 619 def _M2Z2Z_HtoD : sme_quarter_outer_product_i64<{zn_u, 1}, {zm_u, 1}, subtr, 620 ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi, mnemonic>; 621} 622 623//===----------------------------------------------------------------------===// 624// SME Add Vector to Tile 625//===----------------------------------------------------------------------===// 626 627class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty, 628 ZPRRegOp zpr_ty, string mnemonic> 629 : I<(outs tile_ty:$ZAda), 630 (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), 631 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", 632 "", []>, Sched<[]> { 633 bits<3> Pm; 634 bits<3> Pn; 635 bits<5> Zn; 636 let Inst{31-23} = 0b110000001; 637 let Inst{22} = op; 638 let Inst{21-17} = 0b01000; 639 let Inst{16} = V; 640 let Inst{15-13} = Pm; 641 let Inst{12-10} = Pn; 642 let Inst{9-5} = Zn; 643 let Inst{4-3} = 0b00; 644 645 let Constraints = "$ZAda = $_ZAda"; 646} 647 648class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag> 649 : Pseudo<(outs), 650 (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, 651 Sched<[]> { 652 // Translated to the actual instructions in AArch64ISelLowering.cpp 653 let SMEMatrixType = za_flag; 654 let usesCustomInserter = 1; 655} 656 657multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> { 658 def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> { 659 bits<2> ZAda; 660 let Inst{2} = 0b0; 661 let Inst{1-0} = ZAda; 662 } 663 664 def _PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 665 666 def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), 667 (nxv4i32 ZPR32:$zn)), 668 (!cast<Instruction>(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>; 669} 670 671multiclass sme_add_vector_to_tile_u64<bit V, string mnemonic, SDPatternOperator op> { 672 def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> { 673 bits<3> ZAda; 674 let Inst{2-0} = ZAda; 675 } 676 677 def _PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>; 678 679 let Predicates = [HasSMEI16I64] in { 680 def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), 681 (nxv2i64 ZPR64:$zn)), 682 (!cast<Instruction>(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>; 683 } 684} 685 686//===----------------------------------------------------------------------===// 687// SME Contiguous Loads 688//===----------------------------------------------------------------------===// 689 690class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins, 691 string mnemonic, string argstr> 692 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 693 bits<5> Rm; 694 bits<2> Rv; 695 bits<3> Pg; 696 bits<5> Rn; 697 let Inst{31-25} = 0b1110000; 698 let Inst{24} = Q; 699 let Inst{23-22} = msz; 700 let Inst{21} = 0b0; 701 let Inst{20-16} = Rm; 702 let Inst{15} = V; 703 let Inst{14-13} = Rv; 704 let Inst{12-10} = Pg; 705 let Inst{9-5} = Rn; 706 let Inst{4} = 0b0; 707 708 let mayLoad = 1; 709} 710 711class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic, 712 MatrixTileVectorOperand tile_ty, bit is_col, 713 Operand imm_ty, RegisterOperand gpr_ty> 714 : sme_mem_ld_ss_base< 715 Q, is_col, msz, (outs tile_ty:$ZAt), 716 (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, 717 gpr_ty:$Rm), 718 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">; 719 720multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst, 721 MatrixTileVectorOperand tile_ty, 722 Operand imm_ty, RegisterOperand gpr_ty, 723 string pg_suffix=""> { 724 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]", 725 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>; 726 // Default XZR offset aliases 727 def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]", 728 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; 729 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]", 730 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; 731} 732 733multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col, 734 string pg_suffix=""> { 735 defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B), 736 !if(is_col, TileVectorOpV8, TileVectorOpH8), 737 sme_elm_idx0_15, GPR64shifted8, pg_suffix>; 738 defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H), 739 !if(is_col, TileVectorOpV16, TileVectorOpH16), 740 sme_elm_idx0_7, GPR64shifted16, pg_suffix>; 741 defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S), 742 !if(is_col, TileVectorOpV32, TileVectorOpH32), 743 sme_elm_idx0_3, GPR64shifted32, pg_suffix>; 744 defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D), 745 !if(is_col, TileVectorOpV64, TileVectorOpH64), 746 sme_elm_idx0_1, GPR64shifted64, pg_suffix>; 747 defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q), 748 !if(is_col, TileVectorOpV128, TileVectorOpH128), 749 sme_elm_idx0_0, GPR64shifted128, pg_suffix>; 750} 751 752multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> { 753 defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">; 754} 755 756multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load, 757 Operand tile_ty, Operand offset_ty, 758 ComplexPattern addr, 759 ComplexPattern tileslice> { 760 // base, tileslice 761 def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile, 762 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 763 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>; 764 765 // reg + reg, tileslice 766 let AddedComplexity = 1 in { 767 def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), 768 tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 769 offset_ty:$imm))), 770 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>; 771 } 772} 773 774class sme_load_pseudo 775 : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, 776 i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, 777 Sched<[]> { 778 // Translated to the actual instructions in AArch64ISelLowering.cpp 779 let usesCustomInserter = 1; 780 let mayLoad = 1; 781} 782 783multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> { 784 def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b", 785 !if(is_col, TileVectorOpV8, TileVectorOpH8), 786 is_col, sme_elm_idx0_15, GPR64shifted8> { 787 bits<4> imm; 788 let Inst{3-0} = imm; 789 } 790 def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h", 791 !if(is_col, TileVectorOpV16, TileVectorOpH16), 792 is_col, sme_elm_idx0_7, GPR64shifted16> { 793 bits<1> ZAt; 794 bits<3> imm; 795 let Inst{3} = ZAt; 796 let Inst{2-0} = imm; 797 } 798 def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w", 799 !if(is_col, TileVectorOpV32, TileVectorOpH32), 800 is_col, sme_elm_idx0_3, GPR64shifted32> { 801 bits<2> ZAt; 802 bits<2> imm; 803 let Inst{3-2} = ZAt; 804 let Inst{1-0} = imm; 805 } 806 def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d", 807 !if(is_col, TileVectorOpV64, TileVectorOpH64), 808 is_col, sme_elm_idx0_1, GPR64shifted64> { 809 bits<3> ZAt; 810 bits<1> imm; 811 let Inst{3-1} = ZAt; 812 let Inst{0} = imm; 813 } 814 def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q", 815 !if(is_col, TileVectorOpV128, TileVectorOpH128), 816 is_col, sme_elm_idx0_0, GPR64shifted128> { 817 bits<4> ZAt; 818 let Inst{3-0} = ZAt; 819 } 820 821 defm : sme_mem_ld_ss_aliases<NAME, is_col>; 822 823 // Pseudo instructions for lowering intrinsics, using immediates instead of 824 // tile registers. 825 def _PSEUDO_B : sme_load_pseudo; 826 def _PSEUDO_H : sme_load_pseudo; 827 def _PSEUDO_S : sme_load_pseudo; 828 def _PSEUDO_D : sme_load_pseudo; 829 def _PSEUDO_Q : sme_load_pseudo; 830 831 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B), 832 !if(is_col, int_aarch64_sme_ld1b_vert, 833 int_aarch64_sme_ld1b_horiz), 834 sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0, 835 tileslice8>; 836 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 837 !if(is_col, int_aarch64_sme_ld1h_vert, 838 int_aarch64_sme_ld1h_horiz), 839 timm32_0_1, timm32_0_7, am_sve_regreg_lsl1, 840 tileslice16>; 841 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 842 !if(is_col, int_aarch64_sme_ld1w_vert, 843 int_aarch64_sme_ld1w_horiz), 844 timm32_0_3, timm32_0_3, am_sve_regreg_lsl2, 845 tileslice32>; 846 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 847 !if(is_col, int_aarch64_sme_ld1d_vert, 848 int_aarch64_sme_ld1d_horiz), 849 timm32_0_7, timm32_0_1, am_sve_regreg_lsl3, 850 tileslice64>; 851 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 852 !if(is_col, int_aarch64_sme_ld1q_vert, 853 int_aarch64_sme_ld1q_horiz), 854 timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, 855 tileslice128>; 856} 857 858multiclass sme_mem_ld_ss<string mnemonic> { 859 defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>; 860 defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>; 861} 862 863//===----------------------------------------------------------------------===// 864// SME Contiguous Stores 865//===----------------------------------------------------------------------===// 866 867class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins, 868 string mnemonic, string argstr> 869 : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> { 870 bits<5> Rm; 871 bits<2> Rv; 872 bits<3> Pg; 873 bits<5> Rn; 874 let Inst{31-25} = 0b1110000; 875 let Inst{24} = Q; 876 let Inst{23-22} = msz; 877 let Inst{21} = 0b1; 878 let Inst{20-16} = Rm; 879 let Inst{15} = V; 880 let Inst{14-13} = Rv; 881 let Inst{12-10} = Pg; 882 let Inst{9-5} = Rn; 883 let Inst{4} = 0b0; 884 885 let mayStore = 1; 886 let hasSideEffects = 1; 887} 888 889class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic, 890 MatrixTileVectorOperand tile_ty, bit is_col, 891 Operand imm_ty, RegisterOperand gpr_ty> 892 : sme_mem_st_ss_base< 893 Q, is_col, msz, 894 (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, 895 GPR64sp:$Rn, gpr_ty:$Rm), 896 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">; 897 898multiclass sme_mem_st_ss_aliases<string inst, bit is_col> { 899 defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>; 900} 901 902multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store, 903 Operand offset_ty, 904 ComplexPattern imm2tile, 905 ComplexPattern addr, 906 ComplexPattern tileslice> { 907 // base, tileslice 908 def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile), 909 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 910 (Inst $tile, $idx, $imm, $pg, $base, XZR)>; 911 912 // reg + reg, tileslice 913 let AddedComplexity = 1 in { 914 def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), 915 (imm2tile untyped:$tile), 916 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 917 (Inst $tile, $idx, $imm, $pg, $base, $offset)>; 918 } 919} 920 921multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> { 922 def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b", 923 !if(is_col, TileVectorOpV8, TileVectorOpH8), 924 is_col, sme_elm_idx0_15, GPR64shifted8> { 925 bits<4> imm; 926 let Inst{3-0} = imm; 927 } 928 def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h", 929 !if(is_col, TileVectorOpV16, TileVectorOpH16), 930 is_col, sme_elm_idx0_7, GPR64shifted16> { 931 bits<1> ZAt; 932 bits<3> imm; 933 let Inst{3} = ZAt; 934 let Inst{2-0} = imm; 935 } 936 def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w", 937 !if(is_col, TileVectorOpV32, TileVectorOpH32), 938 is_col, sme_elm_idx0_3, GPR64shifted32> { 939 bits<2> ZAt; 940 bits<2> imm; 941 let Inst{3-2} = ZAt; 942 let Inst{1-0} = imm; 943 } 944 def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d", 945 !if(is_col, TileVectorOpV64, TileVectorOpH64), 946 is_col, sme_elm_idx0_1, GPR64shifted64> { 947 bits<3> ZAt; 948 bits<1> imm; 949 let Inst{3-1} = ZAt; 950 let Inst{0} = imm; 951 } 952 def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q", 953 !if(is_col, TileVectorOpV128, TileVectorOpH128), 954 is_col, sme_elm_idx0_0, GPR64shifted128> { 955 bits<4> ZAt; 956 let Inst{3-0} = ZAt; 957 } 958 959 defm : sme_mem_st_ss_aliases<NAME, is_col>; 960 961 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B), 962 !if(is_col, int_aarch64_sme_st1b_vert, 963 int_aarch64_sme_st1b_horiz), 964 timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0, 965 tileslice8>; 966 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H), 967 !if(is_col, int_aarch64_sme_st1h_vert, 968 int_aarch64_sme_st1h_horiz), 969 timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1, 970 tileslice16>; 971 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S), 972 !if(is_col, int_aarch64_sme_st1w_vert, 973 int_aarch64_sme_st1w_horiz), 974 timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2, 975 tileslice32>; 976 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D), 977 !if(is_col, int_aarch64_sme_st1d_vert, 978 int_aarch64_sme_st1d_horiz), 979 timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3, 980 tileslice64>; 981 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q), 982 !if(is_col, int_aarch64_sme_st1q_vert, 983 int_aarch64_sme_st1q_horiz), 984 sme_elm_idx0_0, imm_to_tile128, 985 am_sve_regreg_lsl4, tileslice128>; 986} 987 988multiclass sme_mem_st_ss<string mnemonic> { 989 defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>; 990 defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>; 991} 992 993//===----------------------------------------------------------------------===// 994// SME Save and Restore Array 995//===----------------------------------------------------------------------===// 996 997class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr> 998 : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "", 999 []>, 1000 Sched<[]> { 1001 bits<2> Rv; 1002 bits<5> Rn; 1003 bits<4> imm4; 1004 let Inst{31-22} = 0b1110000100; 1005 let Inst{21} = isStore; 1006 let Inst{20-15} = 0b000000; 1007 let Inst{14-13} = Rv; 1008 let Inst{12-10} = 0b000; 1009 let Inst{9-5} = Rn; 1010 let Inst{4} = 0b0; 1011 let Inst{3-0} = imm4; 1012} 1013 1014let mayStore = 1 in 1015class sme_spill_inst<string opcodestr> 1016 : sme_spill_fill_base<0b1, (outs), 1017 (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, 1018 sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 1019 imm32_0_15:$offset), 1020 opcodestr>; 1021let mayLoad = 1 in 1022class sme_fill_inst<string opcodestr> 1023 : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt), 1024 (ins MatrixIndexGPR32Op12_15:$Rv, 1025 sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 1026 imm32_0_15:$offset), 1027 opcodestr>; 1028multiclass sme_spill<string opcodestr> { 1029 def NAME : sme_spill_inst<opcodestr>; 1030 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]", 1031 (!cast<Instruction>(NAME) MatrixOp:$ZAt, 1032 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; 1033 1034 def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)), 1035 (!cast<Instruction>(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>; 1036} 1037 1038multiclass sme_fill<string opcodestr> { 1039 def NAME : sme_fill_inst<opcodestr>; 1040 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]", 1041 (!cast<Instruction>(NAME) MatrixOp:$ZAt, 1042 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; 1043 def NAME # _PSEUDO 1044 : Pseudo<(outs), 1045 (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4, 1046 GPR64sp:$base), []>, 1047 Sched<[]> { 1048 // Translated to actual instruction in AArch64ISelLowering.cpp 1049 let usesCustomInserter = 1; 1050 let mayLoad = 1; 1051 } 1052 def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm), 1053 (!cast<Instruction>(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>; 1054} 1055 1056//===----------------------------------------------------------------------===// 1057// Move instructions 1058//===----------------------------------------------------------------------===// 1059 1060class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins, 1061 string mnemonic, string argstr> 1062 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 1063 bits<2> Rv; 1064 bits<3> Pg; 1065 bits<5> Zn; 1066 let Inst{31-24} = 0b11000000; 1067 let Inst{23-22} = sz; 1068 let Inst{21-17} = 0b00000; 1069 let Inst{16} = Q; 1070 let Inst{15} = V; 1071 let Inst{14-13} = Rv; 1072 let Inst{12-10} = Pg; 1073 let Inst{9-5} = Zn; 1074 let Inst{4} = 0b0; 1075} 1076 1077class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty, 1078 bit is_col, Operand imm_ty, ZPRRegOp zpr_ty, 1079 string mnemonic> 1080 : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd), 1081 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1082 mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{ 1083 1084 let Constraints = "$ZAd = $_ZAd"; 1085} 1086 1087 1088multiclass sme_vector_to_tile_aliases<Instruction inst, 1089 MatrixTileVectorOperand tile_ty, 1090 ZPRRegOp zpr_ty, Operand imm_ty> { 1091 def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn", 1092 (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>; 1093} 1094 1095multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt, 1096 ValueType ppr_vt, Operand imm_ty, 1097 Operand offset_ty, 1098 SDPatternOperator op, 1099 ComplexPattern tileslice> { 1100 def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 1101 offset_ty:$imm)), 1102 (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), 1103 (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>; 1104} 1105 1106class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag> 1107 : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, 1108 i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, 1109 Sched<[]> { 1110 // Translated to the actual instructions in AArch64ISelLowering.cpp 1111 let SMEMatrixType = za_flag; 1112 let usesCustomInserter = 1; 1113} 1114 1115multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> { 1116 def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8, 1117 TileVectorOpH8), 1118 is_col, sme_elm_idx0_15, ZPR8, mnemonic>, 1119 SMEPseudo2Instr<NAME # _B, 1> { 1120 bits<4> imm; 1121 let Inst{3-0} = imm; 1122 } 1123 def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16, 1124 TileVectorOpH16), 1125 is_col, sme_elm_idx0_7, ZPR16, mnemonic>, 1126 SMEPseudo2Instr<NAME # _H, 1> { 1127 bits<1> ZAd; 1128 bits<3> imm; 1129 let Inst{3} = ZAd; 1130 let Inst{2-0} = imm; 1131 } 1132 def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32, 1133 TileVectorOpH32), 1134 is_col, sme_elm_idx0_3, ZPR32, mnemonic>, 1135 SMEPseudo2Instr<NAME # _S, 1> { 1136 bits<2> ZAd; 1137 bits<2> imm; 1138 let Inst{3-2} = ZAd; 1139 let Inst{1-0} = imm; 1140 } 1141 def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64, 1142 TileVectorOpH64), 1143 is_col, sme_elm_idx0_1, ZPR64, mnemonic>, 1144 SMEPseudo2Instr<NAME # _D, 1> { 1145 bits<3> ZAd; 1146 bits<1> imm; 1147 let Inst{3-1} = ZAd; 1148 let Inst{0} = imm; 1149 } 1150 def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128, 1151 TileVectorOpH128), 1152 is_col, sme_elm_idx0_0, ZPR128, mnemonic>, 1153 SMEPseudo2Instr<NAME # _Q, 1> { 1154 bits<4> ZAd; 1155 bits<1> imm; 1156 let Inst{3-0} = ZAd; 1157 } 1158 1159 // Pseudo instructions for lowering intrinsics, using immediates instead of 1160 // tile registers. 1161 def _PSEUDO_B : sme_mova_insert_pseudo<SMEMatrixTileB>, SMEPseudo2Instr<NAME # _B, 0>; 1162 def _PSEUDO_H : sme_mova_insert_pseudo<SMEMatrixTileH>, SMEPseudo2Instr<NAME # _H, 0>; 1163 def _PSEUDO_S : sme_mova_insert_pseudo<SMEMatrixTileS>, SMEPseudo2Instr<NAME # _S, 0>; 1164 def _PSEUDO_D : sme_mova_insert_pseudo<SMEMatrixTileD>, SMEPseudo2Instr<NAME # _D, 0>; 1165 def _PSEUDO_Q : sme_mova_insert_pseudo<SMEMatrixTileQ>, SMEPseudo2Instr<NAME # _Q, 0>; 1166 1167 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B), 1168 !if(is_col, TileVectorOpV8, 1169 TileVectorOpH8), 1170 ZPR8, sme_elm_idx0_15>; 1171 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H), 1172 !if(is_col, TileVectorOpV16, 1173 TileVectorOpH16), 1174 ZPR16, sme_elm_idx0_7>; 1175 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S), 1176 !if(is_col, TileVectorOpV32, 1177 TileVectorOpH32), 1178 ZPR32, sme_elm_idx0_3>; 1179 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D), 1180 !if(is_col, TileVectorOpV64, 1181 TileVectorOpH64), 1182 ZPR64, sme_elm_idx0_1>; 1183 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q), 1184 !if(is_col, TileVectorOpV128, 1185 TileVectorOpH128), 1186 ZPR128, sme_elm_idx0_0>; 1187 1188 defvar op = !if(is_col, int_aarch64_sme_write_vert, 1189 int_aarch64_sme_write_horiz); 1190 1191 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B), 1192 nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15, 1193 op, tileslice8>; 1194 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 1195 nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, 1196 op, tileslice16>; 1197 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 1198 nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, 1199 op, tileslice16>; 1200 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 1201 nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, 1202 op, tileslice16>; 1203 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 1204 nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, 1205 op, tileslice32>; 1206 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 1207 nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, 1208 op, tileslice32>; 1209 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 1210 nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, 1211 op, tileslice64>; 1212 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 1213 nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, 1214 op, tileslice64>; 1215 1216 defvar opq = !if(is_col, int_aarch64_sme_writeq_vert, 1217 int_aarch64_sme_writeq_horiz); 1218 1219 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1220 nxv16i8, nxv16i1, sme_elm_idx0_15, 1221 sme_elm_idx0_0, opq, tileslice128>; 1222 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1223 nxv8i16, nxv8i1, sme_elm_idx0_15, 1224 sme_elm_idx0_0, opq, tileslice128>; 1225 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1226 nxv8f16, nxv8i1, sme_elm_idx0_15, 1227 sme_elm_idx0_0, opq, tileslice128>; 1228 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1229 nxv8bf16, nxv8i1, sme_elm_idx0_15, 1230 sme_elm_idx0_0, opq, tileslice128>; 1231 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1232 nxv4i32, nxv4i1, sme_elm_idx0_15, 1233 sme_elm_idx0_0, opq, tileslice128>; 1234 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1235 nxv4f32, nxv4i1, sme_elm_idx0_15, 1236 sme_elm_idx0_0, opq, tileslice128>; 1237 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1238 nxv2i64, nxv2i1, sme_elm_idx0_15, 1239 sme_elm_idx0_0, opq, tileslice128>; 1240 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1241 nxv2f64, nxv2i1, sme_elm_idx0_15, 1242 sme_elm_idx0_0, opq, tileslice128>; 1243} 1244 1245multiclass sme_vector_to_tile<string mnemonic> { 1246 defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>; 1247 defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>; 1248} 1249 1250class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins, 1251 string mnemonic, string argstr> 1252 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 1253 bits<2> Rv; 1254 bits<3> Pg; 1255 bits<5> Zd; 1256 let Inst{31-24} = 0b11000000; 1257 let Inst{23-22} = sz; 1258 let Inst{21-17} = 0b00001; 1259 let Inst{16} = Q; 1260 let Inst{15} = V; 1261 let Inst{14-13} = Rv; 1262 let Inst{12-10} = Pg; 1263 let Inst{9} = 0b0; 1264 let Inst{4-0} = Zd; 1265} 1266 1267class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty, 1268 MatrixTileVectorOperand tile_ty, 1269 bit is_col, Operand imm_ty, string mnemonic> 1270 : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd), 1271 (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1272 mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> { 1273 1274 let Constraints = "$Zd = $_Zd"; 1275} 1276 1277multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty, 1278 MatrixTileVectorOperand tile_ty, 1279 Operand imm_ty > { 1280 def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]", 1281 (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>; 1282} 1283 1284multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt, 1285 ValueType ppr_vt, Operand offset_ty, 1286 ComplexPattern imm2tile, 1287 ComplexPattern tileslice, 1288 SDPatternOperator op> { 1289 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), 1290 (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)), 1291 (inst $passthru, $pg, $tile, $idx, 0)>; 1292 let AddedComplexity = 1 in { 1293 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), 1294 (imm2tile untyped:$tile), 1295 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 1296 offset_ty:$imm)))), 1297 (inst $passthru, $pg, $tile, $idx, $imm)>; 1298 } 1299} 1300 1301multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> { 1302 def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8, 1303 TileVectorOpH8), 1304 is_col, sme_elm_idx0_15, mnemonic> { 1305 bits<4> imm; 1306 let Inst{8-5} = imm; 1307 } 1308 def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16, 1309 TileVectorOpH16), 1310 is_col, sme_elm_idx0_7, mnemonic> { 1311 bits<1> ZAn; 1312 bits<3> imm; 1313 let Inst{8} = ZAn; 1314 let Inst{7-5} = imm; 1315 } 1316 def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32, 1317 TileVectorOpH32), 1318 is_col, sme_elm_idx0_3, mnemonic> { 1319 bits<2> ZAn; 1320 bits<2> imm; 1321 let Inst{8-7} = ZAn; 1322 let Inst{6-5} = imm; 1323 } 1324 def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64, 1325 TileVectorOpH64), 1326 is_col, sme_elm_idx0_1, mnemonic> { 1327 bits<3> ZAn; 1328 bits<1> imm; 1329 let Inst{8-6} = ZAn; 1330 let Inst{5} = imm; 1331 } 1332 def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128, 1333 TileVectorOpH128), 1334 is_col, sme_elm_idx0_0, mnemonic> { 1335 bits<4> ZAn; 1336 let Inst{8-5} = ZAn; 1337 } 1338 1339 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8, 1340 !if(is_col, TileVectorOpV8, 1341 TileVectorOpH8), sme_elm_idx0_15>; 1342 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16, 1343 !if(is_col, TileVectorOpV16, 1344 TileVectorOpH16), sme_elm_idx0_7>; 1345 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32, 1346 !if(is_col, TileVectorOpV32, 1347 TileVectorOpH32), sme_elm_idx0_3>; 1348 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64, 1349 !if(is_col, TileVectorOpV64, 1350 TileVectorOpH64), sme_elm_idx0_1>; 1351 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128, 1352 !if(is_col, TileVectorOpV128, 1353 TileVectorOpH128), sme_elm_idx0_0>; 1354 1355 defvar op = !if(is_col, int_aarch64_sme_read_vert, 1356 int_aarch64_sme_read_horiz); 1357 1358 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B), 1359 nxv16i8, nxv16i1, sme_elm_idx0_15, 1360 imm_to_tile8, tileslice8, op>; 1361 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 1362 nxv8i16, nxv8i1, sme_elm_idx0_7, 1363 imm_to_tile16, tileslice16, op>; 1364 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 1365 nxv8f16, nxv8i1, sme_elm_idx0_7, 1366 imm_to_tile16, tileslice16, op>; 1367 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 1368 nxv8bf16, nxv8i1, sme_elm_idx0_7, 1369 imm_to_tile16, tileslice16, op>; 1370 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S), 1371 nxv4i32, nxv4i1, sme_elm_idx0_3, 1372 imm_to_tile32, tileslice32, op>; 1373 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S), 1374 nxv4f32, nxv4i1, sme_elm_idx0_3, 1375 imm_to_tile32, tileslice32, op>; 1376 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D), 1377 nxv2i64, nxv2i1, sme_elm_idx0_1, 1378 imm_to_tile64, tileslice64, op>; 1379 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D), 1380 nxv2f64, nxv2i1, sme_elm_idx0_1, 1381 imm_to_tile64, tileslice64, op>; 1382 1383 defvar opq = !if(is_col, int_aarch64_sme_readq_vert, 1384 int_aarch64_sme_readq_horiz); 1385 1386 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1387 nxv16i8, nxv16i1, sme_elm_idx0_0, 1388 imm_to_tile128, tileslice128, opq>; 1389 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1390 nxv8i16, nxv8i1, sme_elm_idx0_0, 1391 imm_to_tile128, tileslice128, opq>; 1392 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1393 nxv8f16, nxv8i1, sme_elm_idx0_0, 1394 imm_to_tile128, tileslice128, opq>; 1395 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1396 nxv8bf16, nxv8i1, sme_elm_idx0_0, 1397 imm_to_tile128, tileslice128, opq>; 1398 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1399 nxv4i32, nxv4i1, sme_elm_idx0_0, 1400 imm_to_tile128, tileslice128, opq>; 1401 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1402 nxv4f32, nxv4i1, sme_elm_idx0_0, 1403 imm_to_tile128, tileslice128, opq>; 1404 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1405 nxv2i64, nxv2i1, sme_elm_idx0_0, 1406 imm_to_tile128, tileslice128, opq>; 1407 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1408 nxv2f64, nxv2i1, sme_elm_idx0_0, 1409 imm_to_tile128, tileslice128, opq>; 1410} 1411 1412multiclass sme_tile_to_vector<string mnemonic> { 1413 defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>; 1414 defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>; 1415} 1416 1417//===----------------------------------------------------------------------===// 1418// SME Zero 1419//===----------------------------------------------------------------------===// 1420 1421// NOTE: This definition isn't really correct because there are outputs, i.e. 1422// the tile registers being zeroed. We fix this up in a custom inserter that 1423// marks the appropriate registers as being implicitly defined. 1424class sme_zero_inst<string mnemonic> 1425 : I<(outs), (ins MatrixTileList:$imm), 1426 mnemonic, "\t$imm", "", []>, Sched<[]> { 1427 bits<8> imm; 1428 let Inst{31-8} = 0b110000000000100000000000; 1429 let Inst{7-0} = imm; 1430} 1431 1432multiclass sme_zero<string mnemonic> { 1433 def NAME : sme_zero_inst<mnemonic>; 1434 1435 def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>; 1436 def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>; 1437 def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>; 1438 def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>; 1439 def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>; 1440 def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>; 1441 def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>; 1442 def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>; 1443 def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>; 1444 def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>; 1445 def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>; 1446 def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>; 1447 def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>; 1448 def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>; 1449 def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>; 1450 1451 def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>, 1452 Sched<[]> { 1453 // Translated to the actual instructions in AArch64ISelLowering.cpp 1454 let usesCustomInserter = 1; 1455 } 1456 1457 def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm), 1458 (!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>; 1459} 1460 1461//===----------------------------------------------------------------------===// 1462// SVE2 Instructions 1463//===----------------------------------------------------------------------===// 1464 1465class sve2_int_perm_revd<string asm> 1466 : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn), 1467 asm, "\t$Zd, $Pg/m, $Zn", "", []>, 1468 Sched<[]> { 1469 bits<5> Zd; 1470 bits<3> Pg; 1471 bits<5> Zn; 1472 let Inst{31-24} = 0b00000101; 1473 let Inst{23-22} = 0b00; // size 1474 let Inst{21-13} = 0b101110100; 1475 let Inst{12-10} = Pg; 1476 let Inst{9-5} = Zn; 1477 let Inst{4-0} = Zd; 1478 1479 let Constraints = "$Zd = $_Zd"; 1480} 1481 1482multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { 1483 def NAME : sve2_int_perm_revd<asm>; 1484 1485 def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>; 1486 def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>; 1487 def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>; 1488 def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>; 1489 1490 def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>; 1491 def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME)>; 1492 def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>; 1493 def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME)>; 1494 1495} 1496 1497class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> 1498 : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm), 1499 asm, "\t$Zd, $Zn, $Zm", "", []>, 1500 Sched<[]> { 1501 bits<5> Zm; 1502 bits<5> Zn; 1503 bits<5> Zd; 1504 let Inst{31-24} = 0b01000100; 1505 let Inst{23-22} = sz; 1506 let Inst{21} = 0b0; 1507 let Inst{20-16} = Zm; 1508 let Inst{15-11} = 0b11000; 1509 let Inst{10} = U; 1510 let Inst{9-5} = Zn; 1511 let Inst{4-0} = Zd; 1512 1513 let Constraints = "$Zd = $_Zd"; 1514 let DestructiveInstType = DestructiveOther; 1515 let ElementSize = zpr_ty.ElementSize; 1516} 1517 1518multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> { 1519 def _B : sve2_clamp<asm, 0b00, U, ZPR8>; 1520 def _H : sve2_clamp<asm, 0b01, U, ZPR16>; 1521 def _S : sve2_clamp<asm, 0b10, U, ZPR32>; 1522 def _D : sve2_clamp<asm, 0b11, U, ZPR64>; 1523 1524 def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; 1525 def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; 1526 def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; 1527 def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; 1528} 1529 1530class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty> 1531 : I<(outs PPRorPNRAny:$Pd), (ins PPRorPNRAny:$Pn, ppr_ty:$Pm, 1532 MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1533 asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>, 1534 Sched<[]> { 1535 bits<2> Rv; 1536 bits<4> Pn; 1537 bits<4> Pm; 1538 bits<4> Pd; 1539 let Inst{31-24} = 0b00100101; 1540 let Inst{21} = 0b1; 1541 let Inst{17-16} = Rv; 1542 let Inst{15-14} = 0b01; 1543 let Inst{13-10} = Pn; 1544 let Inst{9} = 0b0; 1545 let Inst{8-5} = Pm; 1546 let Inst{4} = 0b0; 1547 let Inst{3-0} = Pd; 1548} 1549 1550multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> { 1551 def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> { 1552 bits<4> imm; 1553 let Inst{23-22} = imm{3-2}; 1554 let Inst{20-19} = imm{1-0}; 1555 let Inst{18} = 0b1; 1556 } 1557 def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> { 1558 bits<3> imm; 1559 let Inst{23-22} = imm{2-1}; 1560 let Inst{20} = imm{0}; 1561 let Inst{19-18} = 0b10; 1562 } 1563 def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> { 1564 bits<2> imm; 1565 let Inst{23-22} = imm{1-0}; 1566 let Inst{20-18} = 0b100; 1567 } 1568 def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> { 1569 bits<1> imm; 1570 let Inst{23} = imm; 1571 let Inst{22} = 0b1; 1572 let Inst{20-18} = 0b000; 1573 } 1574 1575 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), 1576 MatrixIndexGPR32Op12_15:$idx)), 1577 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>; 1578 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), 1579 MatrixIndexGPR32Op12_15:$idx)), 1580 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>; 1581 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), 1582 MatrixIndexGPR32Op12_15:$idx)), 1583 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>; 1584 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), 1585 MatrixIndexGPR32Op12_15:$idx)), 1586 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>; 1587 1588 let AddedComplexity = 1 in { 1589 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), 1590 (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))), 1591 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>; 1592 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), 1593 (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))), 1594 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>; 1595 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), 1596 (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))), 1597 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>; 1598 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), 1599 (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))), 1600 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>; 1601 } 1602} 1603 1604//===----------------------------------------------------------------------===// 1605// SME2 Instructions 1606//===----------------------------------------------------------------------===// 1607 1608//===----------------------------------------------------------------------===// 1609// SME2 single-multi ternary int/fp, two/four registers 1610 1611class sme2_dot_mla_add_sub_array_vg24_single<bits<7> op, 1612 MatrixOperand matrix_ty, 1613 RegisterOperand multi_vector_ty, 1614 ZPRRegOp zpr_ty, 1615 string mnemonic> 1616 : I<(outs matrix_ty:$ZAd), 1617 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, 1618 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 1619 mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm", 1620 "", []> , Sched<[]> { 1621 bits<4> Zm; 1622 bits<5> Zn; 1623 bits<2> Rv; 1624 bits<3> imm3; 1625 let Inst{31-23} = 0b110000010; 1626 let Inst{22} = op{6}; //sz 1627 let Inst{21} = 0b1; 1628 let Inst{20} = op{5}; //vgx4 1629 let Inst{19-16} = Zm; 1630 let Inst{15} = 0b0; 1631 let Inst{14-13} = Rv; 1632 let Inst{12-10} = op{4-2}; 1633 let Inst{9-5} = Zn; 1634 let Inst{4-3} = op{1-0}; 1635 let Inst{2-0} = imm3; 1636 let Constraints = "$ZAd = $_ZAd"; 1637} 1638 1639multiclass sme2_dot_mla_add_sub_array_vg24_single<string mnemonic, bits<7> op, 1640 MatrixOperand matrix_ty, 1641 RegisterOperand multi_vector_ty, 1642 ZPRRegOp zpr_ty>{ 1643 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1644 1645 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1646 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 1647} 1648 1649multiclass sme2_dot_mla_add_sub_array_vg2_single<string mnemonic, bits<7> op, 1650 MatrixOperand matrix_ty, 1651 RegisterOperand multi_vector_ty, 1652 ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{ 1653 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1654 1655 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1656 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 1657 1658 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>; 1659 1660 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>; 1661} 1662 1663multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op, 1664 MatrixOperand matrix_ty, 1665 RegisterOperand multi_vector_ty, 1666 ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{ 1667 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1668 1669 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1670 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 1671 1672 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>; 1673 1674 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>; 1675} 1676 1677//===----------------------------------------------------------------------===// 1678// SME2 multiple vectors ternary INT/FP two and four registers 1679class sme2_dot_mla_add_sub_array_vg2_multi<bits<7> op, 1680 MatrixOperand matrix_ty, 1681 RegisterOperand multi_vector_ty, 1682 string mnemonic> 1683 : I<(outs matrix_ty:$ZAd), 1684 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, 1685 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 1686 mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm", 1687 "", []>, Sched<[]>{ 1688 bits<4> Zm; 1689 bits<4> Zn; 1690 bits<2> Rv; 1691 bits<3> imm3; 1692 let Inst{31-23} = 0b110000011; 1693 let Inst{22} = op{6}; //sz 1694 let Inst{21} = 0b1; 1695 let Inst{20-17} = Zm; 1696 let Inst{16-15} = 0b00; 1697 let Inst{14-13} = Rv; 1698 let Inst{12-10} = op{5-3}; 1699 let Inst{9-6} = Zn; 1700 let Inst{5-3} = op{2-0}; 1701 let Inst{2-0} = imm3; 1702 let Constraints = "$ZAd = $_ZAd"; 1703} 1704 1705multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<7> op, 1706 MatrixOperand matrix_ty, 1707 RegisterOperand multi_vector_ty, ValueType zpr_ty, 1708 SDPatternOperator intrinsic> { 1709 def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1710 1711 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>; 1712 1713 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>; 1714 1715 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1716 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 1717} 1718 1719class sme2_dot_mla_add_sub_array_vg4_multi<bits<7> op, 1720 MatrixOperand matrix_ty, 1721 RegisterOperand multi_vector_ty, 1722 string mnemonic> 1723 : I<(outs matrix_ty:$ZAd), 1724 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, 1725 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 1726 mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm", 1727 "", []>, Sched<[]>{ 1728 bits<3> Zm; 1729 bits<3> Zn; 1730 bits<2> Rv; 1731 bits<3> imm3; 1732 let Inst{31-23} = 0b110000011; 1733 let Inst{22} = op{6}; //sz 1734 let Inst{21} = 0b1; 1735 let Inst{20-18} = Zm; 1736 let Inst{17-15} = 0b010; 1737 let Inst{14-13} = Rv; 1738 let Inst{12-10} = op{5-3}; 1739 let Inst{9-7} = Zn; 1740 let Inst{6} = 0b0; 1741 let Inst{5-3} = op{2-0}; 1742 let Inst{2-0} = imm3; 1743 let Constraints = "$ZAd = $_ZAd"; 1744} 1745 1746multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<7> op, 1747 MatrixOperand matrix_ty, 1748 RegisterOperand multi_vector_ty, 1749 ValueType zpr_ty, SDPatternOperator intrinsic>{ 1750 def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1751 1752 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>; 1753 1754 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>; 1755 1756 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1757 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 1758} 1759 1760//===----------------------------------------------------------------------===// 1761// SME2 multiple vectors binary two or four registers 1762 1763class sme2_multivec_accum_add_sub<string mnemonic, bit sz, bit vg4, bits<3> op, 1764 MatrixOperand matrix_ty, 1765 RegisterOperand vector_ty> 1766 : I<(outs matrix_ty:$ZAdn), 1767 (ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 1768 mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm", 1769 "", []>, Sched<[]> { 1770 bits<2> Rv; 1771 bits<3> imm3; 1772 let Inst{31-23} = 0b110000011; 1773 let Inst{22} = sz; 1774 let Inst{21-19} = 0b100; 1775 let Inst{18} = op{2}; 1776 let Inst{17} = 0b0; 1777 let Inst{16} = vg4; 1778 let Inst{15} = 0b0; 1779 let Inst{14-13} = Rv; 1780 let Inst{12-10} = 0b111; 1781 let Inst{5} = 0b0; 1782 let Inst{4-3} = op{1-0}; 1783 let Inst{2-0} = imm3; 1784 1785 let Constraints = "$ZAdn = $_ZAdn"; 1786} 1787 1788class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op, 1789 MatrixOperand matrix_ty, 1790 RegisterOperand vector_ty> 1791 : sme2_multivec_accum_add_sub<mnemonic, sz, 0b0, op, matrix_ty, vector_ty> { 1792 bits<4> Zm; 1793 let Inst{9-6} = Zm; 1794} 1795 1796 1797multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op, 1798 MatrixOperand matrix_ty, 1799 RegisterOperand vector_ty, 1800 ValueType vty, 1801 SDPatternOperator intrinsic> { 1802 def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>, 1803 SMEPseudo2Instr<NAME, 1>; 1804 def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm", 1805 (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>; 1806 1807 def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>; 1808 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>; 1809} 1810 1811class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op, 1812 MatrixOperand matrix_ty, 1813 RegisterOperand vector_ty> 1814 : sme2_multivec_accum_add_sub<mnemonic, sz, 0b1, op, matrix_ty, vector_ty> { 1815 bits<3> Zm; 1816 let Inst{9-7} = Zm; 1817 let Inst{6} = 0b0; 1818} 1819 1820multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op, 1821 MatrixOperand matrix_ty, 1822 RegisterOperand vector_ty, 1823 ValueType vty, 1824 SDPatternOperator intrinsic> { 1825 def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>, 1826 SMEPseudo2Instr<NAME, 1>; 1827 def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm", 1828 (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>; 1829 1830 def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>; 1831 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>; 1832} 1833 1834//===----------------------------------------------------------------------===// 1835// SME2 Multi-vector - Multiple and Single SVE Destructive 1836// Two and Four registers 1837 1838class sme2_sve_destructive_vector_vg2_single<bits<2> sz, bits<7> op, 1839 RegisterOperand vector_ty, 1840 ZPRRegOp zpr_ty, 1841 string mnemonic> 1842 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), 1843 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1844 "", []>, Sched<[]> { 1845 bits<4> Zm; 1846 bits<4> Zdn; 1847 let Inst{31-24} = 0b11000001; 1848 let Inst{23-22} = sz; 1849 let Inst{21-20} = 0b10; 1850 let Inst{19-16} = Zm; 1851 let Inst{15-11} = 0b10100; 1852 let Inst{10-5} = op{6-1}; 1853 let Inst{4-1} = Zdn; 1854 let Inst{0} = op{0}; 1855 1856 let Constraints = "$Zdn = $_Zdn"; 1857} 1858 1859multiclass sme2_fp_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> { 1860 def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; 1861 def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; 1862 def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; 1863} 1864 1865multiclass sme2_int_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> { 1866 def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>; 1867 def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; 1868 def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; 1869 def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; 1870} 1871 1872// SME2.1 fmax/fmin instructions. 1873multiclass sme2p1_bf_max_min_vector_vg2_single<string mnemonic, bits<7>op> { 1874 def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r, 1875 ZPR4b16, mnemonic>; 1876} 1877 1878class sme2_sve_destructive_vector_vg4_single<bits<2> sz, bits<7> op, 1879 RegisterOperand vector_ty, 1880 ZPRRegOp zpr_ty, 1881 string mnemonic> 1882 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), 1883 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1884 "", []>, Sched<[]> { 1885 bits<4> Zm; 1886 bits<3> Zdn; 1887 let Inst{31-24} = 0b11000001; 1888 let Inst{23-22} = sz; 1889 let Inst{21-20} = 0b10; 1890 let Inst{19-16} = Zm; 1891 let Inst{15-11} = 0b10101; 1892 let Inst{10-5} = op{6-1}; 1893 let Inst{4-2} = Zdn; 1894 let Inst{1} = 0b0; 1895 let Inst{0} = op{0}; 1896 1897 let Constraints = "$Zdn = $_Zdn"; 1898} 1899 1900multiclass sme2_fp_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> { 1901 def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; 1902 def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; 1903 def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; 1904} 1905 1906multiclass sme2_int_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> { 1907 def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>; 1908 def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; 1909 def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; 1910 def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; 1911} 1912 1913// SME2.1 fmax/fmin instructions. 1914multiclass sme2p1_bf_max_min_vector_vg4_single<string mnemonic, bits<7>op> { 1915 def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r, 1916 ZPR4b16, mnemonic>; 1917} 1918 1919class sme2_sve_destructive_vector_vg2_multi<bits<2> sz, bits<7> op, 1920 RegisterOperand vector_ty, 1921 string mnemonic> 1922 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), 1923 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1924 "", []>, Sched<[]> { 1925 bits<4> Zm; 1926 bits<4> Zdn; 1927 let Inst{31-24} = 0b11000001; 1928 let Inst{23-22} = sz; 1929 let Inst{21} = 0b1; 1930 let Inst{20-17} = Zm; 1931 let Inst{16-11} = 0b010110; 1932 let Inst{10-5} = op{6-1}; 1933 let Inst{4-1} = Zdn; 1934 let Inst{0} = op{0}; 1935 1936 let Constraints = "$Zdn = $_Zdn"; 1937} 1938 1939multiclass sme2_fp_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> { 1940 def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>; 1941 def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>; 1942 def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>; 1943} 1944 1945multiclass sme2_int_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> { 1946 def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>; 1947 def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>; 1948 def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>; 1949 def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>; 1950} 1951 1952// SME2.1 fmax/fmin instructions. 1953multiclass sme2p1_bf_max_min_vector_vg2_multi<string mnemonic, bits<7>op> { 1954 def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r, 1955 mnemonic>; 1956} 1957 1958class sme2_sve_destructive_vector_vg4_multi<bits<2> sz, bits<7> op, 1959 RegisterOperand vector_ty, 1960 string mnemonic> 1961 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), 1962 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1963 "", []>, Sched<[]> { 1964 bits<3> Zm; 1965 bits<3> Zdn; 1966 let Inst{31-24} = 0b11000001; 1967 let Inst{23-22} = sz; 1968 let Inst{21} = 0b1; 1969 let Inst{20-18} = Zm; 1970 let Inst{17-11} = 0b0010111; 1971 let Inst{10-5} = op{6-1}; 1972 let Inst{4-2} = Zdn; 1973 let Inst{1} = 0b0; 1974 let Inst{0} = op{0}; 1975 1976 let Constraints = "$Zdn = $_Zdn"; 1977} 1978 1979multiclass sme2_fp_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> { 1980 def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>; 1981 def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>; 1982 def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>; 1983} 1984 1985multiclass sme2_int_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> { 1986 def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>; 1987 def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>; 1988 def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>; 1989 def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>; 1990} 1991 1992// SME2.1 fmax/fmin instructions. 1993multiclass sme2p1_bf_max_min_vector_vg4_multi<string mnemonic, bits<7>op> { 1994 def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r, 1995 mnemonic>; 1996} 1997 1998//===----------------------------------------------------------------------===// 1999// SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources 2000 2001class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty, 2002 RegisterOperand multi_vector_ty, 2003 string mnemonic, string vg_acronym=""> 2004 : I<(outs MatrixOp32:$ZAda), 2005 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 2006 mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3", 2007 "", []>, Sched<[]> { 2008 bits<4> Zm; 2009 bits<2> Rv; 2010 let Inst{31-24} = 0b11000001; 2011 let Inst{23-22} = op0; 2012 let Inst{21} = 0b0; 2013 let Inst{20} = !if(!eq(vg_acronym, ""), 0, 1); 2014 let Inst{19-16} = Zm; 2015 let Inst{14-13} = Rv; 2016 let Inst{12} = 0b1; 2017 let Inst{4-3} = op; 2018 2019 let Constraints = "$ZAda = $_ZAda"; 2020} 2021 2022multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 2023 def _HtoS : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16, 2024 mnemonic>, SMEPseudo2Instr<NAME # _HtoS, 1> { 2025 bits<3> i3; 2026 bits<5> Zn; 2027 bits<3> imm; 2028 let Inst{15} = i3{2}; 2029 let Inst{11-10} = i3{1-0}; 2030 let Inst{9-5} = Zn; 2031 let Inst{2-0} = imm; 2032 } 2033 2034 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2035 2036 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>; 2037} 2038 2039class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op> 2040 : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZ_h_mul_r, 2041 mnemonic, "vgx2"> { 2042 bits<3> i3; 2043 bits<4> Zn; 2044 bits<2> imm; 2045 let Inst{15} = 0b0; 2046 let Inst{11-10} = i3{2-1}; 2047 let Inst{9-6} = Zn; 2048 let Inst{5} = 0b0; 2049 let Inst{2} = i3{0}; 2050 let Inst{1-0} = imm; 2051} 2052 2053multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 2054 def _HtoS : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>; 2055 2056 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2057 2058 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>; 2059 2060 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 2061 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 2062} 2063 2064multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2065 def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>; 2066 2067 def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2068 2069 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>; 2070 2071 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 2072 (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 2073} 2074 2075class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op> 2076 : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZZZ_h_mul_r, 2077 mnemonic, "vgx4"> { 2078 bits<3> i3; 2079 bits<3> Zn; 2080 bits<2> imm; 2081 let Inst{15} = 0b1; 2082 let Inst{11-10} = i3{2-1}; 2083 let Inst{9-7} = Zn; 2084 let Inst{6-5} = 0b00; 2085 let Inst{2} = i3{0}; 2086 let Inst{1-0} = imm; 2087} 2088 2089multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 2090 def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>; 2091 2092 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2093 2094 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>; 2095 2096 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 2097 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 2098} 2099 2100multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2101 def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _HtoS, 1>; 2102 2103 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2104 2105 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>; 2106 2107 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 2108 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 2109} 2110 2111class sme2_mla_long_array<bits<2>op0, bits<2> op, 2112 MatrixOperand matrix_ty, 2113 Operand index_ty, 2114 RegisterOperand first_vector_ty, 2115 RegisterOperand second_vector_ty, 2116 string mnemonic, string vg_acronym=""> 2117 : I<(outs matrix_ty:$ZAda), 2118 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, 2119 index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm), 2120 mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm", 2121 "", []> , Sched<[]> { 2122 bits<2> Rv; 2123 let Inst{31-24} = 0b11000001; 2124 let Inst{23-22} = op0; 2125 let Inst{21} = 0b1; 2126 let Inst{15} = 0b0; 2127 let Inst{14-13} = Rv; 2128 let Inst{12-11} = 0b01; 2129 let Inst{10} = !if(!eq(vg_acronym, ""), 1, 0); 2130 let Inst{4-3} = op; 2131 2132 let Constraints = "$ZAda = $_ZAda"; 2133} 2134 2135multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 2136 def _HtoS : sme2_mla_long_array<op0, op, MatrixOp32, uimm3s2range, ZPR16, ZPR4b16, 2137 mnemonic> , SMEPseudo2Instr<NAME # _HtoS, 1>{ 2138 bits<4> Zm; 2139 bits<5> Zn; 2140 bits<3> imm; 2141 let Inst{20} = 0b0; 2142 let Inst{19-16} = Zm; 2143 let Inst{9-5} = Zn; 2144 let Inst{2-0} = imm; 2145 } 2146 2147 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>; 2148 2149 def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>; 2150} 2151 2152class sme2_mla_long_array_single_16b<string mnemonic> 2153 : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> { 2154 bits<4> Zm; 2155 bits<5> Zn; 2156 bits<3> imm; 2157 let Inst{20} = 0b1; 2158 let Inst{19-16} = Zm; 2159 let Inst{9-5} = Zn; 2160 let Inst{2-0} = imm; 2161 let Uses = [FPMR, FPCR]; 2162} 2163 2164multiclass sme2_fp8_fmlal_single_za16<string mnemonic, SDPatternOperator intrinsic> { 2165 def NAME : sme2_mla_long_array_single_16b<mnemonic>, SMEPseudo2Instr<NAME, 1>; 2166 2167 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm3s2range, ZPR8, ZPR4b8, SMEMatrixArray>; 2168 2169 def: SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm3s2range, ZPR4b8, nxv16i8, tileslicerange3s2>; 2170} 2171 2172class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2, 2173 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, 2174 ZPRRegOp zpr_ty, string mnemonic, string vg_acronym> 2175 : sme2_mla_long_array<op0, op, matrix_ty, uimm2s2range, multi_vector_ty, zpr_ty, 2176 mnemonic, vg_acronym> { 2177 bits<4> Zm; 2178 bits<5> Zn; 2179 bits<2> imm; 2180 let Inst{20} = vg4; 2181 let Inst{19-16} = Zm; 2182 let Inst{9-5} = Zn; 2183 let Inst{2} = o2; 2184 let Inst{1-0} = imm; 2185} 2186 2187multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 2188 RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, 2189 ValueType zpr_ty, SDPatternOperator intrinsic, list<Register> uses=[]> { 2190 def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty, multi_vector_ty, 2191 vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> { 2192 let Uses = uses; 2193 } 2194 2195 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, 2196 vector_ty, SMEMatrixArray>; 2197 2198 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty, 2199 tileslicerange2s2>; 2200 2201 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2202 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2203 uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; 2204} 2205 2206multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2207 def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic, 2208 "vgx2">, SMEPseudo2Instr<NAME # _HtoS, 1>; 2209 2210 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>; 2211 2212 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>; 2213 2214 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2215 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; 2216} 2217 2218multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 2219 RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, 2220 ValueType zpr_ty, SDPatternOperator intrinsic, list<Register> uses=[]> { 2221 def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, 2222 vector_ty, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> { 2223 let Uses = uses; 2224 } 2225 2226 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty, 2227 SMEMatrixArray>; 2228 2229 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty, 2230 tileslicerange2s2>; 2231 2232 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2233 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2234 uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; 2235} 2236 2237multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2238 def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16, mnemonic, 2239 "vgx4">, SMEPseudo2Instr<NAME # _HtoS, 1>; 2240 2241 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>; 2242 2243 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>; 2244 2245 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2246 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; 2247} 2248 2249class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<3> op, 2250 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty> 2251 : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty, 2252 mnemonic, "vgx2"> { 2253 bits<4> Zm; 2254 bits<4> Zn; 2255 bits<2> imm; 2256 let Inst{20-17} = Zm; 2257 let Inst{16} = 0b0; 2258 let Inst{9-6} = Zn; 2259 let Inst{5} = op{2}; // fp8 2260 let Inst{2} = 0b0; 2261 let Inst{1-0} = imm; 2262} 2263 2264multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 2265 RegisterOperand multi_vector_ty, ValueType zpr_ty, 2266 SDPatternOperator intrinsic, list<Register> uses=[]> { 2267 def NAME : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>, 2268 SMEPseudo2Instr<NAME, 1> { 2269 let Uses = uses; 2270 } 2271 2272 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>; 2273 2274 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>; 2275 2276 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2277 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2278 uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 2279} 2280 2281multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2282 def _HtoS : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZ_h_mul_r>, 2283 SMEPseudo2Instr<NAME # _HtoS, 1>; 2284 2285 def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>; 2286 2287 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>; 2288 2289 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm", 2290 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; 2291} 2292 2293class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<3> op, 2294 MatrixOperand matrix_ty, 2295 RegisterOperand multi_vector_ty> 2296 : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty, 2297 mnemonic, "vgx4"> { 2298 bits<3> Zm; 2299 bits<3> Zn; 2300 bits<2> imm; 2301 let Inst{20-18} = Zm; 2302 let Inst{17} = 0b0; 2303 let Inst{16} = 0b1; 2304 let Inst{9-7} = Zn; 2305 let Inst{6} = 0b0; 2306 let Inst{5} = op{2}; //fp8 2307 let Inst{2} = 0b0; 2308 let Inst{1-0} = imm; 2309} 2310 2311multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 2312 RegisterOperand multi_vector_ty, ValueType zpr_ty, 2313 SDPatternOperator intrinsic, list<Register> uses=[]> { 2314 def NAME : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>, 2315 SMEPseudo2Instr<NAME, 1> { 2316 let Uses = uses; 2317 } 2318 2319 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>; 2320 2321 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>; 2322 2323 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2324 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2325 uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 2326} 2327 2328multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2329 def _HtoS : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZZZ_h_mul_r>, 2330 SMEPseudo2Instr<NAME # _HtoS, 1>; 2331 2332 def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>; 2333 2334 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>; 2335 2336 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm", 2337 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; 2338} 2339 2340//===----------------------------------------------------------------------===// 2341class sme2_frint_cvt_vg2_multi<bits<2>sz, bits<5>op, RegisterOperand first_ty, 2342 RegisterOperand second_ty, string mnemonic> 2343 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2344 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2345 bits<4> Zn; 2346 bits<4> Zd; 2347 let Inst{31-24} = 0b11000001; 2348 let Inst{23-22} = sz; 2349 let Inst{21-20} = 0b10; 2350 let Inst{19-16} = op{4-1}; 2351 let Inst{15-10} = 0b111000; 2352 let Inst{9-6} = Zn; 2353 let Inst{5} = op{0}; 2354 let Inst{4-1} = Zd; 2355 let Inst{0} = 0b0; 2356} 2357 2358// SME2 multi-vec FP to int convert two registers 2359// SME2 multi-vec int to FP two registers 2360multiclass sme2_fp_cvt_vg2_multi<string mnemonic, bits<5> op> { 2361 def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; 2362} 2363 2364// SME2 multi-vec FRINT two registers 2365multiclass sme2_frint_vector_vg2_multi<string mnemonic, bits<5> op> { 2366 def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; 2367} 2368 2369class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_ty, 2370 RegisterOperand second_ty, string mnemonic> 2371 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2372 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2373 bits<3> Zn; 2374 bits<3> Zd; 2375 let Inst{31-24} = 0b11000001; 2376 let Inst{23-22} = sz; 2377 let Inst{21-20} = 0b11; 2378 let Inst{19-16} = op{6-3}; 2379 let Inst{15-10} = 0b111000; 2380 let Inst{9-7} = Zn; 2381 let Inst{6-5} = op{2-1}; 2382 let Inst{4-2} = Zd; 2383 let Inst{1} = op{0}; 2384 let Inst{0} = 0b0; 2385} 2386 2387// SME2 multi-vec FP to int convert four registers 2388// SME2 multi-vec int to FP four registers 2389multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> { 2390 def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; 2391} 2392 2393// SME2 multi-vec quadwords ZIP four registers 2394multiclass sme2_zip_vector_vg4<string mnemonic, bits<7> op> { 2395 def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r, 2396 mnemonic>; 2397 def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r, 2398 mnemonic>; 2399 def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, 2400 mnemonic>; 2401 def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r, 2402 mnemonic>; 2403} 2404 2405// SME2 multi-vec quadwords ZIP four registers 2406multiclass sme2_zip_vector_vg4_Q<string mnemonic, bits<7> op> { 2407 def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r, 2408 mnemonic>; 2409} 2410 2411// SME2 multi-vec FRINT four registers 2412multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> { 2413 def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, 2414 mnemonic>; 2415} 2416 2417class sme2_cvt_vg2_single<string mnemonic, bits<5> op, 2418 RegisterOperand first_ty, RegisterOperand second_ty> 2419 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2420 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2421 bits<4> Zn; 2422 bits<5> Zd; 2423 let Inst{31-23} = 0b110000010; 2424 let Inst{22} = op{4}; 2425 let Inst{21-19} = 0b100; 2426 let Inst{18-16} = op{3-1}; 2427 let Inst{15-10} = 0b111000; 2428 let Inst{9-6} = Zn; 2429 let Inst{5} = op{0}; 2430 let Inst{4-0} = Zd; 2431} 2432 2433// SME2 multi-vec FP down convert two registers 2434// SME2 multi-vec int down convert two registers 2435multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt, 2436 ValueType in_vt, SDPatternOperator intrinsic> { 2437 def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>; 2438 def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>; 2439} 2440 2441// SME2 multi-vec FP8 down convert two registers 2442multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op, ValueType in_vt, SDPatternOperator intrinsic> { 2443 def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>{ 2444 let mayLoad = 1; 2445 let mayStore = 0; 2446 let Uses = [FPMR, FPCR]; 2447 } 2448 def : Pat<(nxv16i8 (intrinsic in_vt:$Zn1, in_vt:$Zn2)), 2449 (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; 2450} 2451 2452class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty, 2453 RegisterOperand second_ty, string mnemonic> 2454 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2455 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2456 bits<5> Zn; 2457 bits<4> Zd; 2458 let Inst{31-24} = 0b11000001; 2459 let Inst{23-22} = sz; 2460 let Inst{21-19} = 0b100; 2461 let Inst{18-16} = op; 2462 let Inst{15-10} = 0b111000; 2463 let Inst{9-5} = Zn; 2464 let Inst{4-1} = Zd; 2465 let Inst{0} = u; 2466} 2467 2468// SME2 multi-vec unpack two registers 2469multiclass sme2_unpk_vector_vg2<string mnemonic, bit u> { 2470 def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>; 2471 def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>; 2472 def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>; 2473} 2474 2475// SME2.1 multi-vec convert two registers 2476multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> { 2477 def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>; 2478} 2479 2480// SME2 multi-vec FP8 up convert two registers 2481multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> { 2482 def NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>{ 2483 let Uses = [FPMR, FPCR]; 2484 } 2485} 2486 2487 2488class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2, RegisterOperand first_ty, 2489 RegisterOperand second_ty, string mnemonic> 2490 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2491 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2492 bits<3> Zn; 2493 bits<5> Zd; 2494 let Inst{31-24} = 0b11000001; 2495 let Inst{23} = sz; 2496 let Inst{22} = op{2}; 2497 let Inst{21-20} = 0b11; 2498 let Inst{19-16} = op2; 2499 let Inst{15-10} = 0b111000; 2500 let Inst{9-7} = Zn; 2501 let Inst{6-5} = op{1-0}; 2502 let Inst{4-0} = Zd; 2503} 2504 2505// SME2 multi-vec int down convert four registers 2506multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 2507 def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>; 2508 def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>; 2509 2510 def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>; 2511 def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>; 2512} 2513 2514//SME2 multi-vec FP8 down convert four registers 2515multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N, SDPatternOperator intrinsic> { 2516 def NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic> { 2517 let mayLoad = 1; 2518 let mayStore = 0; 2519 let Uses = [FPMR, FPCR]; 2520 } 2521 def : SME2_Cvt_VG4_Pat<NAME, intrinsic, nxv16i8, nxv4f32>; 2522} 2523 2524class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty, 2525 RegisterOperand second_ty, string mnemonic> 2526 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2527 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2528 bits<4> Zn; 2529 bits<3> Zd; 2530 let Inst{31-24} = 0b11000001; 2531 let Inst{23-22} = sz; 2532 let Inst{21-10} = 0b110101111000; 2533 let Inst{9-6} = Zn; 2534 let Inst{5} = 0b0; 2535 let Inst{4-2} = Zd; 2536 let Inst{1} = 0b0; 2537 let Inst{0} = u; 2538} 2539 2540// SME2 multi-vec UNPK four registers 2541multiclass sme2_unpk_vector_vg4<string mnemonic, bit u> { 2542 def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>; 2543 def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>; 2544 def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>; 2545} 2546 2547//===----------------------------------------------------------------------===// 2548// SME2 multi-vec CLAMP registers 2549 2550class sme2_clamp_vector_vg24_multi<bits<2> sz, bits<3> op1, bit u, 2551 RegisterOperand multi_vector_ty, 2552 ZPRRegOp vector_ty, string mnemonic> 2553 : I<(outs multi_vector_ty:$Zd), 2554 (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm), 2555 mnemonic, "\t$Zd, $Zn, $Zm", 2556 "", []>, Sched<[]>{ 2557 bits<5> Zm; 2558 bits<5> Zn; 2559 let Inst{31-24} = 0b11000001; 2560 let Inst{23-22} = sz; 2561 let Inst{21} = 0b1; 2562 let Inst{20-16} = Zm; 2563 let Inst{15-13} = 0b110; 2564 let Inst{12-10} = op1; 2565 let Inst{9-5} = Zn; 2566 let Inst{0} = u; 2567 2568 let Constraints = "$Zd = $_Zd"; 2569} 2570 2571class sme2_clamp_vector_vg2_multi<bits<2> sz, bits<3> op1, bit u, 2572 RegisterOperand multi_vector_ty, 2573 ZPRRegOp vector_ty, string mnemonic> 2574 : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty, 2575 mnemonic>{ 2576 bits<4> Zd; 2577 let Inst{4-1} = Zd; 2578} 2579 2580multiclass sme2_fp_clamp_vector_vg2_multi<string mnemonic>{ 2581 def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; 2582 def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; 2583 def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; 2584} 2585 2586multiclass sme2_int_clamp_vector_vg2_multi<string mnemonic, bit u>{ 2587 def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>; 2588 def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>; 2589 def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>; 2590 def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>; 2591} 2592 2593// SME2.1 multi-vec FCLAMP two registers 2594multiclass sme2p1_bfclamp_vector_vg2_multi<string mnemonic> { 2595 def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16, 2596 mnemonic>; 2597} 2598 2599class sme2_clamp_vector_vg4_multi<bits<2> sz, bits<3> op1, bit u, 2600 RegisterOperand multi_vector_ty, 2601 ZPRRegOp vector_ty, string mnemonic> 2602 : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty, 2603 mnemonic>{ 2604 bits<3> Zd; 2605 let Inst{4-2} = Zd; 2606 let Inst{1} = 0b0; 2607} 2608 2609multiclass sme2_fp_clamp_vector_vg4_multi<string mnemonic>{ 2610 def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; 2611 def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; 2612 def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; 2613} 2614 2615multiclass sme2_int_clamp_vector_vg4_multi<string mnemonic, bit u>{ 2616 def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; 2617 def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; 2618 def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; 2619 def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; 2620} 2621 2622// SME2.1 multi-vec FCLAMP four registers 2623multiclass sme2p1_bfclamp_vector_vg4_multi<string mnemonic> { 2624 def _H : sme2_clamp_vector_vg4_multi<0b00, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, 2625 mnemonic>; 2626} 2627 2628// SME2 multi-vec ZIP two registers 2629class sme2_zip_vector_vg2<bits<2> sz, bit q, bit u, 2630 RegisterOperand multi_vector_ty, 2631 ZPRRegOp vector_ty, string mnemonic> 2632 : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), 2633 mnemonic, "\t$Zd, $Zn, $Zm", 2634 "", []>, Sched<[]>{ 2635 bits<4> Zd; 2636 bits<5> Zm; 2637 bits<5> Zn; 2638 let Inst{31-24} = 0b11000001; 2639 let Inst{23-22} = sz; 2640 let Inst{21} = 0b1; 2641 let Inst{20-16} = Zm; 2642 let Inst{15-11} = 0b11010; 2643 let Inst{10} = q; 2644 let Inst{9-5} = Zn; 2645 let Inst{4-1} = Zd; 2646 let Inst{0} = u; 2647} 2648 2649multiclass sme2_zip_vector_vg2<string mnemonic, bit op> { 2650 def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>; 2651 def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>; 2652 def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>; 2653 def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>; 2654 def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>; 2655} 2656 2657//===----------------------------------------------------------------------===// 2658// SME2 Dot Products and MLA 2659class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty, 2660 RegisterOperand multi_vector_ty, 2661 ZPRRegOp vector_ty, Operand index_ty, 2662 string mnemonic> 2663 : I<(outs matrix_ty:$ZAda), 2664 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2665 multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i), 2666 mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i", 2667 "", []>, Sched<[]> { 2668 bits<4> Zm; 2669 bits<2> Rv; 2670 bits<4> Zn; 2671 bits<3> imm3; 2672 let Inst{31-24} = 0b11000001; 2673 let Inst{23-22} = sz; 2674 let Inst{21-20} = 0b01; 2675 let Inst{19-16} = Zm; 2676 let Inst{15} = 0b0; 2677 let Inst{14-13} = Rv; 2678 let Inst{12-10} = op{5-3}; 2679 let Inst{9-6} = Zn; 2680 let Inst{5-3} = op{2-0}; 2681 let Inst{2-0} = imm3; 2682 2683 let Constraints = "$ZAda = $_ZAda"; 2684} 2685 2686// SME2 multi-vec ternary indexed two registers 32-bit 2687multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<4> op, 2688 RegisterOperand multi_vector_ty, 2689 ZPRRegOp vector_ty, ValueType vt, 2690 SDPatternOperator intrinsic> { 2691 def NAME : sme2_multi_vec_array_vg2_index<sz, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty, 2692 VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2693 bits<2> i; 2694 let Inst{11-10} = i; 2695 } 2696 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>; 2697 2698 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>; 2699 2700 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2701 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2702 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>; 2703} 2704 2705// SME2.1 multi-vec ternary indexed two registers 16-bit 2706multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op, 2707 RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, 2708 ValueType vt, SDPatternOperator intrinsic> { 2709 def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16, 2710 multi_vector_ty, vector_ty, 2711 VectorIndexH, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2712 bits<3> i; 2713 let Inst{11-10} = i{2-1}; 2714 let Inst{3} = i{0}; 2715 } 2716 2717 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexH32b, SMEMatrixArray>; 2718 2719 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexH32b_timm, tileslice16>; 2720 2721 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2722 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2723 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>; 2724} 2725 2726// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers 2727multiclass sme2p1_multi_vec_array_vg2_index_f8f16<string mnemonic, bits<2> sz, bits<3> op, 2728 RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { 2729 def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16, 2730 multi_vector_ty, zpr_ty, 2731 VectorIndexH, mnemonic> { 2732 bits<3> i; 2733 let Inst{11-10} = i{2-1}; 2734 let Inst{3} = i{0}; 2735 } 2736 2737 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2738 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2739 multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; 2740} 2741 2742// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision 2743// two registers 2744class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T> 2745 : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32, 2746 ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> { 2747 2748 bits<2> i; 2749 let Inst{10} = i{1}; 2750 let Inst{3} = i{0}; 2751 let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}"); 2752 let Uses = [FPMR, FPCR]; 2753} 2754 2755// SME2 multi-vec ternary indexed two registers 64-bit 2756 2757class sme2_multi_vec_array_vg2_index_64b<bits<2> op, 2758 RegisterOperand multi_vector_ty, 2759 ZPRRegOp vector_ty, 2760 string mnemonic> 2761 : I<(outs MatrixOp64:$ZAda), 2762 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2763 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 2764 mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1", 2765 "", []>, Sched<[]> { 2766 bits<4> Zm; 2767 bits<2> Rv; 2768 bits<1> i1; 2769 bits<4> Zn; 2770 bits<3> imm3; 2771 let Inst{31-20} = 0b110000011101; 2772 let Inst{19-16} = Zm; 2773 let Inst{15} = 0b0; 2774 let Inst{14-13} = Rv; 2775 let Inst{12-11} = 0b00; 2776 let Inst{10} = i1; 2777 let Inst{9-6} = Zn; 2778 let Inst{5} = 0b0; 2779 let Inst{4-3} = op; 2780 let Inst{2-0} = imm3; 2781 2782 let Constraints = "$ZAda = $_ZAda"; 2783} 2784 2785multiclass sme2_multi_vec_array_vg2_index_64b<string mnemonic, bits<2> op, 2786 RegisterOperand multi_vector_ty, 2787 ZPRRegOp vector_ty, ValueType vt, 2788 SDPatternOperator intrinsic> { 2789 def NAME : sme2_multi_vec_array_vg2_index_64b<op, multi_vector_ty, vector_ty, 2790 mnemonic>, SMEPseudo2Instr<NAME, 1>; 2791 2792 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>; 2793 2794 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexD32b_timm, tileslice16>; 2795 2796 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1", 2797 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2798 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; 2799} 2800 2801class sme2_multi_vec_array_vg4_index<bit sz, bits<7> op, MatrixOperand matrix_ty, 2802 RegisterOperand multi_vector_ty, 2803 ZPRRegOp vector_ty, Operand index_ty, 2804 string mnemonic> 2805 : I<(outs matrix_ty:$ZAda), 2806 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2807 multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i), 2808 mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i", 2809 "", []>, Sched<[]> { 2810 bits<4> Zm; 2811 bits<2> Rv; 2812 bits<3> Zn; 2813 bits<3> imm3; 2814 let Inst{31-23} = 0b110000010; 2815 let Inst{22} = sz; 2816 let Inst{21-20} = 0b01; 2817 let Inst{19-16} = Zm; 2818 let Inst{15} = 0b1; 2819 let Inst{14-13} = Rv; 2820 let Inst{12-10} = op{6-4}; 2821 let Inst{9-7} = Zn; 2822 let Inst{6-3} = op{3-0}; 2823 let Inst{2-0} = imm3; 2824 2825 let Constraints = "$ZAda = $_ZAda"; 2826} 2827 2828// SME2 multi-vec ternary indexed four registers 32-bit 2829multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op, 2830 RegisterOperand multi_vector_ty, 2831 ZPRRegOp vector_ty, ValueType vt, 2832 SDPatternOperator intrinsic> { 2833 def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty, 2834 vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2835 bits<2> i; 2836 let Inst{11-10} = i; 2837 } 2838 2839 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>; 2840 2841 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>; 2842 2843 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2844 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2845 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>; 2846} 2847 2848// SME2.1 multi-vec ternary indexed four registers 16-bit (FP8) 2849multiclass sme2p1_multi_vec_array_vg4_index_f8f16<string mnemonic, bits<3> op, 2850 RegisterOperand multi_vector_ty, 2851 ZPRRegOp zpr_ty> { 2852 def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, 2853 multi_vector_ty, zpr_ty, 2854 VectorIndexH, mnemonic>{ 2855 bits<3> i; 2856 let Inst{11-10} = i{2-1}; 2857 let Inst{3} = i{0}; 2858 } 2859 2860 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2861 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2862 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; 2863} 2864 2865// SME2.1 multi-vec ternary indexed four registers 16-bit 2866multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<3> op, 2867 RegisterOperand multi_vector_ty, 2868 ZPRRegOp vector_ty, ValueType vt, 2869 SDPatternOperator intrinsic> { 2870 def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, 2871 multi_vector_ty, vector_ty, 2872 VectorIndexH, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2873 bits<3> i; 2874 let Inst{11-10} = i{2-1}; 2875 let Inst{3} = i{0}; 2876 } 2877 2878 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexH32b_timm, SMEMatrixArray>; 2879 2880 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexH32b_timm, tileslice16>; 2881 2882 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2883 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2884 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>; 2885} 2886 2887// SME2 multi-vec ternary indexed four registers 64-bit 2888class sme2_multi_vec_array_vg4_index_64b<bits<3> op, 2889 RegisterOperand multi_vector_ty, 2890 ZPRRegOp vector_ty, 2891 string mnemonic> 2892 : I<(outs MatrixOp64:$ZAda), 2893 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2894 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 2895 mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1", 2896 "", []>, Sched<[]> { 2897 bits<4> Zm; 2898 bits<2> Rv; 2899 bits<1> i1; 2900 bits<3> Zn; 2901 bits<3> imm3; 2902 let Inst{31-20} = 0b110000011101; 2903 let Inst{19-16} = Zm; 2904 let Inst{15} = 0b1; 2905 let Inst{14-13} = Rv; 2906 let Inst{12} = 0b0; 2907 let Inst{11} = op{2}; 2908 let Inst{10} = i1; 2909 let Inst{9-7} = Zn; 2910 let Inst{6-5} = 0b00; 2911 let Inst{4-3} = op{1-0}; 2912 let Inst{2-0} = imm3; 2913 2914 let Constraints = "$ZAda = $_ZAda"; 2915} 2916 2917multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op, 2918 RegisterOperand multi_vector_ty, 2919 ZPRRegOp vector_ty, ValueType vty, 2920 SDPatternOperator intrinsic> { 2921 def NAME : sme2_multi_vec_array_vg4_index_64b<op, multi_vector_ty, vector_ty, 2922 mnemonic>, SMEPseudo2Instr<NAME, 1>; 2923 2924 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>; 2925 2926 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vty, VectorIndexD32b_timm, tileslice16>; 2927 2928 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1", 2929 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2930 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; 2931} 2932 2933// FMLAL (multiple and indexed vector, FP8 to FP16) 2934class sme2_fp8_fmlal_vg24_index_za16<bits<2> sz, bit vg4, bits<3> op, 2935 RegisterOperand multi_vector_ty, string mnemonic> 2936 : I<(outs MatrixOp16:$ZAda), 2937 (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, 2938 multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 2939 mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", 2940 "", []>, Sched<[]> { 2941 bits<4> Zm; 2942 bits<2> Rv; 2943 bits<4> i; 2944 bits<2> imm2; 2945 let Inst{31-24} = 0b11000001; 2946 let Inst{23-22} = sz; 2947 let Inst{21-20} = 0b01; 2948 let Inst{19-16} = Zm; 2949 let Inst{15} = vg4; 2950 let Inst{14-13} = Rv; 2951 let Inst{12} = op{2}; 2952 let Inst{11-10} = i{3-2}; 2953 let Inst{5-4} = op{1-0}; 2954 let Inst{3-2} = i{1-0}; 2955 let Inst{1-0} = imm2; 2956 2957 let Uses = [FPMR, FPCR]; 2958 let Constraints = "$ZAda = $_ZAda"; 2959} 2960 2961multiclass sme2_fp8_fmlal_index_za16_vgx2<string mnemonic, SDPatternOperator intrinsic> { 2962 def NAME : sme2_fp8_fmlal_vg24_index_za16<0b10, 0b0, 0b111, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2963 bits<4> Zn; 2964 let Inst{9-6} = Zn; 2965 } 2966 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s2range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 2967 2968 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm2s2range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s2>; 2969 2970 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", 2971 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, 2972 ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; 2973} 2974 2975multiclass sme2_fp8_fmlal_index_za16_vgx4<string mnemonic, SDPatternOperator intrinsic> { 2976 def NAME: sme2_fp8_fmlal_vg24_index_za16<0b10, 0b1, 0b110, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2977 bits<3> Zn; 2978 let Inst{9-7} = Zn; 2979 let Inst{6} = 0b0; 2980 } 2981 2982 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s2range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 2983 2984 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm2s2range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s2>; 2985 2986 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 2987 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, 2988 ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; 2989} 2990 2991//===----------------------------------------------------------------------===// 2992// FMLAL (single and indexed vector, FP8 to FP16) 2993class sme2_fp8_fmlal_index_za16<string mnemonic, bits<2> sz,bits<2> op> 2994 : I<(outs MatrixOp16:$ZAda), 2995 (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 2996 mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2997 "", []>, Sched<[]> { 2998 bits<4> Zm; 2999 bits<2> Rv; 3000 bits<4> i; 3001 bits<5> Zn; 3002 bits<3> imm3; 3003 let Inst{31-24} = 0b11000001; 3004 let Inst{23-22} = sz; 3005 let Inst{21-20} = 0b00; 3006 let Inst{19-16} = Zm; 3007 let Inst{15} = i{3}; 3008 let Inst{14-13} = Rv; 3009 let Inst{12} = op{1}; 3010 let Inst{11-10} = i{2-1}; 3011 let Inst{9-5} = Zn; 3012 let Inst{4} = op{0}; 3013 let Inst{3} = i{0}; 3014 let Inst{2-0} = imm3; 3015 3016 let Uses = [FPMR, FPCR]; 3017 let Constraints = "$ZAda = $_ZAda"; 3018} 3019 3020multiclass sme2_fp8_fmlal_index_za16<string mnemonic, SDPatternOperator intrinsic> { 3021 def NAME : sme2_fp8_fmlal_index_za16<mnemonic, 0b11, 0b00>, SMEPseudo2Instr<NAME, 1>; 3022 3023 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm3s2range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 3024 3025 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm3s2range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange3s2>; 3026} 3027 3028// SME2 multi-vec indexed long long MLA one source 32-bit 3029class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op> 3030 : I<(outs MatrixOp32:$ZAda), 3031 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 3032 mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", 3033 "", []>, Sched<[]> { 3034 bits<4> Zm; 3035 bits<2> Rv; 3036 bits<4> i; 3037 bits<5> Zn; 3038 bits<2> imm2; 3039 let Inst{31-24} = 0b11000001; 3040 let Inst{23-22} = sz; 3041 let Inst{21-20} = 0b00; 3042 let Inst{19-16} = Zm; 3043 let Inst{15} = i{3}; 3044 let Inst{14-13} = Rv; 3045 let Inst{12-10} = i{2-0}; 3046 let Inst{9-5} = Zn; 3047 let Inst{4-2} = op; 3048 let Inst{1-0} = imm2; 3049 3050 let Constraints = "$ZAda = $_ZAda"; 3051} 3052 3053multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic, list<Register> uses=[]> { 3054 def NAME : sme2_mla_ll_array_index_32b<mnemonic, sz, op>, SMEPseudo2Instr<NAME, 1> { 3055 let Uses = uses; 3056 } 3057 3058 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 3059 3060 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s4>; 3061} 3062 3063// SME2 multi-vec indexed long long MLA one source 64-bit 3064 3065class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op> 3066 : I<(outs MatrixOp64:$ZAda), 3067 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 3068 mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", 3069 "", []>, Sched<[]> { 3070 bits<4> Zm; 3071 bits<2> Rv; 3072 bits<3> i; 3073 bits<5> Zn; 3074 bits<2> imm2; 3075 let Inst{31-20} = 0b110000011000; 3076 let Inst{19-16} = Zm; 3077 let Inst{15} = i{2}; 3078 let Inst{14-13} = Rv; 3079 let Inst{12} = 0b0; 3080 let Inst{11-10} = i{1-0}; 3081 let Inst{9-5} = Zn; 3082 let Inst{4-3} = op; 3083 let Inst{2} = 0b0; 3084 let Inst{1-0} = imm2; 3085 3086 let Constraints = "$ZAda = $_ZAda"; 3087} 3088 3089multiclass sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 3090 def NAME : sme2_mla_ll_array_index_64b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>; 3091 3092 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 3093 3094 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s4>; 3095} 3096 3097class sme2_mla_ll_array_vg24_index_32b<bits<2> sz, bit vg4, bits<3> op, 3098 RegisterOperand vector_ty, 3099 string mnemonic> 3100 : I<(outs MatrixOp32:$ZAda), 3101 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 3102 vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 3103 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", 3104 "", []>, Sched<[]> { 3105 bits<4> Zm; 3106 bits<2> Rv; 3107 bits<4> i; 3108 bit imm; 3109 let Inst{31-24} = 0b11000001; 3110 let Inst{23-22} = sz; 3111 let Inst{21-20} = 0b01; 3112 let Inst{19-16} = Zm; 3113 let Inst{15} = vg4; 3114 let Inst{14-13} = Rv; 3115 let Inst{12} = 0b0; 3116 let Inst{11-10} = i{3-2}; 3117 let Inst{5-3} = op; 3118 let Inst{2-1} = i{1-0}; 3119 let Inst{0} = imm; 3120 3121 let Constraints = "$ZAda = $_ZAda"; 3122} 3123 3124//SME2 multi-vec indexed long long MLA two sources 32-bit 3125 3126multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic, list<Register> uses=[]> { 3127 def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3128 bits<4> Zn; 3129 let Inst{9-6} = Zn; 3130 let Uses = uses; 3131 } 3132 3133 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 3134 3135 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>; 3136 3137 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 3138 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; 3139} 3140 3141// SME2 multi-vec indexed long long MLA four sources 32-bit 3142 3143multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<2> sz, bits<4> op, SDPatternOperator intrinsic, list<Register> uses=[]> { 3144 def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b1, op{2-0}, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3145 bits<3> Zn; 3146 let Inst{9-7} = Zn; 3147 let Inst{6} = op{3}; 3148 let Uses = uses; 3149 } 3150 3151 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 3152 3153 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>; 3154 3155 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 3156 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; 3157} 3158class sme2_mla_ll_array_vg24_index_64b<bit vg4, bits<2> op, 3159 RegisterOperand vector_ty, 3160 string mnemonic> 3161 : I<(outs MatrixOp64:$ZAda), 3162 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 3163 vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 3164 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", 3165 "", []>, Sched<[]> { 3166 bits<4> Zm; 3167 bits<2> Rv; 3168 bits<3> i; 3169 bit imm; 3170 let Inst{31-20} = 0b110000011001; 3171 let Inst{19-16} = Zm; 3172 let Inst{15} = vg4; 3173 let Inst{14-13} = Rv; 3174 let Inst{12-11} = 0b00; 3175 let Inst{10} = i{2}; 3176 let Inst{5} = 0b0; 3177 let Inst{4-3} = op; 3178 let Inst{2-1} = i{1-0}; 3179 let Inst{0} = imm; 3180 3181 let Constraints = "$ZAda = $_ZAda"; 3182} 3183 3184// SME2 multi-vec indexed long long MLA two sources 64-bit 3185 3186multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 3187 def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3188 bits<4> Zn; 3189 let Inst{9-6} = Zn; 3190 } 3191 3192 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 3193 3194 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>; 3195 3196 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 3197 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>; 3198} 3199 3200// SME2 multi-vec indexed long long MLA four sources 64-bit 3201 3202multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 3203 def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3204 bits<3> Zn; 3205 let Inst{9-7} = Zn; 3206 let Inst{6} = 0b0; 3207 } 3208 3209 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 3210 3211 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>; 3212 3213 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 3214 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>; 3215} 3216 3217 3218//SME2 multiple and single vector long long FMA one source 3219 3220class sme2_mla_ll_array_single<string mnemonic, bits<5> op, 3221 MatrixOperand matrix_ty, ZPRRegOp vector_ty, 3222 ZPRRegOp zpr_ty> 3223 : I<(outs matrix_ty:$ZAda), 3224 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm, 3225 vector_ty:$Zn, zpr_ty:$Zm), 3226 mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm", 3227 "", []>, Sched<[]> { 3228 bits<4> Zm; 3229 bits<2> Rv; 3230 bits<5> Zn; 3231 bits<2> imm; 3232 let Inst{31-23} = 0b110000010; 3233 let Inst{22} = op{4}; //sz 3234 let Inst{21} = 0b1; 3235 let Inst{20} = op{3}; //fp8 3236 let Inst{19-16} = Zm; 3237 let Inst{15} = 0b0; 3238 let Inst{14-13} = Rv; 3239 let Inst{12-10} = 0b001; 3240 let Inst{9-5} = Zn; 3241 let Inst{4-2} = op{2-0}; 3242 let Inst{1-0} = imm; 3243 3244 let Constraints = "$ZAda = $_ZAda"; 3245} 3246 3247multiclass sme2_mla_ll_array_single<string mnemonic, bits<5> op, MatrixOperand matrix_ty, ZPRRegOp vector_ty, 3248 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic, list<Register> uses=[]> { 3249 def NAME : sme2_mla_ll_array_single<mnemonic, op, matrix_ty, vector_ty, zpr_ty>, SMEPseudo2Instr<NAME, 1> { 3250 let Uses = uses; 3251 } 3252 3253 def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s4range, vector_ty, zpr_ty, SMEMatrixArray>; 3254 3255 def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm2s4range, zpr_ty, vt, tileslicerange2s4>; 3256} 3257 3258class sme2_mla_ll_array_vg24_single<bits<6> op, MatrixOperand matrix_ty, 3259 RegisterOperand vector_ty, ZPRRegOp zpr_ty, 3260 string mnemonic> 3261 : I<(outs matrix_ty:$ZAda), 3262 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 3263 vector_ty:$Zn, zpr_ty:$Zm), 3264 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm", 3265 "", []>, Sched<[]> { 3266 bits<4> Zm; 3267 bits<2> Rv; 3268 bits<5> Zn; 3269 bit imm; 3270 let Inst{31-23} = 0b110000010; 3271 let Inst{22} = op{5}; //sz 3272 let Inst{21} = 0b1; 3273 let Inst{20} = op{4}; //vg4 3274 let Inst{19-16} = Zm; 3275 let Inst{15} = 0b0; 3276 let Inst{14-13} = Rv; 3277 let Inst{12-10} = 0b000; 3278 let Inst{9-5} = Zn; 3279 let Inst{4-1} = op{3-0}; 3280 let Inst{0} = imm; 3281 3282 let Constraints = "$ZAda = $_ZAda"; 3283} 3284 3285//SME2 single-multi long long MLA two and four sources 3286 3287multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<6> op, 3288 MatrixOperand matrix_ty, 3289 RegisterOperand multi_vector_ty, 3290 ZPRRegOp zpr_ty, list<Register> uses> { 3291 def NAME: sme2_mla_ll_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3292 let Uses = uses; 3293 } 3294 3295 def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm1s4range, multi_vector_ty, zpr_ty, SMEMatrixArray>; 3296 3297 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm], $Zn, $Zm", 3298 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 3299} 3300 3301multiclass sme2_mla_ll_array_vg2_single<string mnemonic, bits<6> op, 3302 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, 3303 ZPRRegOp zpr_ty, ValueType vt,SDPatternOperator intrinsic, list<Register> uses=[]> { 3304 defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty, uses>; 3305 3306 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>; 3307} 3308 3309multiclass sme2_mla_ll_array_vg4_single<string mnemonic, bits<6> op, 3310 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, 3311 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic, list<Register> uses=[]> { 3312 defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty, uses>; 3313 3314 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>; 3315} 3316 3317// SME2 multiple vectors long long MLA two sources 3318 3319class sme2_mla_ll_array_vg2_multi<bits<5> op, MatrixOperand matrix_ty, 3320 RegisterOperand vector_ty,string mnemonic> 3321 : I<(outs matrix_ty:$ZAda), 3322 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 3323 vector_ty:$Zn, vector_ty:$Zm), 3324 mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm", 3325 "", []>, Sched<[]> { 3326 bits<4> Zm; 3327 bits<2> Rv; 3328 bits<4> Zn; 3329 bit imm; 3330 let Inst{31-23} = 0b110000011; 3331 let Inst{22} = op{4}; // sz 3332 let Inst{21} = 0b1; 3333 let Inst{20-17} = Zm; 3334 let Inst{16-15} = 0b00; 3335 let Inst{14-13} = Rv; 3336 let Inst{12-10} = 0b000; 3337 let Inst{9-6} = Zn; 3338 let Inst{5-2} = op{3-0}; 3339 let Inst{1} = 0b0; 3340 let Inst{0} = imm; 3341 3342 let Constraints = "$ZAda = $_ZAda"; 3343} 3344 3345multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<5> op, 3346 MatrixOperand matrix_ty, 3347 RegisterOperand vector_ty, ValueType vt, 3348 SDPatternOperator intrinsic, list<Register> uses=[]> { 3349 def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3350 let Uses = uses; 3351 } 3352 3353 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>; 3354 3355 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>; 3356 3357 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 3358 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>; 3359} 3360 3361// SME2 multiple vectors long long MLA four sources 3362 3363class sme2_mla_ll_array_vg4_multi<bits<5> op,MatrixOperand matrix_ty, 3364 RegisterOperand vector_ty, 3365 string mnemonic> 3366 : I<(outs matrix_ty:$ZAda), 3367 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 3368 vector_ty:$Zn, vector_ty:$Zm), 3369 mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm", 3370 "", []>, Sched<[]> { 3371 bits<3> Zm; 3372 bits<2> Rv; 3373 bits<3> Zn; 3374 bit imm; 3375 let Inst{31-23} = 0b110000011; 3376 let Inst{22} = op{4}; // sz 3377 let Inst{21} = 0b1; 3378 let Inst{20-18} = Zm; 3379 let Inst{17-15} = 0b010; 3380 let Inst{14-13} = Rv; 3381 let Inst{12-10} = 0b000; 3382 let Inst{9-7} = Zn; 3383 let Inst{6} = 0b0; 3384 let Inst{5-2} = op{3-0}; 3385 let Inst{1} = 0b0; 3386 let Inst{0} = imm; 3387 3388 let Constraints = "$ZAda = $_ZAda"; 3389} 3390 3391multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<5> op, 3392 MatrixOperand matrix_ty, 3393 RegisterOperand vector_ty, ValueType vt, 3394 SDPatternOperator intrinsic, list<Register> uses=[]> { 3395 def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3396 let Uses = uses; 3397 } 3398 3399 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>; 3400 3401 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>; 3402 3403 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 3404 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>; 3405} 3406 3407//===----------------------------------------------------------------------===// 3408// SME2 Outer Product and Accumulate 3409 3410multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 3411 def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3412 bits<2> ZAda; 3413 let Inst{1-0} = ZAda; 3414 let Inst{2} = 0b0; 3415 } 3416 3417 def _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 3418 3419 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv8i1, nxv8i16>; 3420} 3421 3422multiclass sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 3423 def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1>; 3424 3425 def _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 3426 3427 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>; 3428} 3429 3430//===----------------------------------------------------------------------===// 3431// SME2 Sparse Outer Product and Accumulate 3432 3433class sme_tmopa_16b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> 3434 : I<(outs TileOp16:$ZAda), 3435 (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm), 3436 mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm", 3437 "", []>, 3438 Sched<[]> { 3439 bit ZAda; 3440 bits<4> Zn; 3441 bits<5> Zm; 3442 bits<3> Zk; 3443 bits<2> imm; 3444 let Inst{31-25} = 0b1000000; 3445 let Inst{24} = opc{4}; 3446 let Inst{23-22} = 0b01; 3447 let Inst{21} = opc{3}; 3448 let Inst{20-16} = Zm; 3449 let Inst{15} = opc{2}; 3450 let Inst{14} = 0b0; 3451 let Inst{13} = opc{1}; 3452 let Inst{12-10} = Zk; 3453 let Inst{9-6} = Zn; 3454 let Inst{5-4} = imm; 3455 let Inst{3} = opc{0}; 3456 let Inst{2-1} = 0b00; 3457 let Inst{0} = ZAda; 3458 3459 let Constraints = "$ZAda = $_ZAda"; 3460} 3461 3462class sme_tmopa_32b<bits<5> opc, RegisterOperand zn_ty, RegisterOperand zm_ty, string mnemonic> 3463 : I<(outs TileOp32:$ZAda), 3464 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm, ZK:$Zk, VectorIndexS32b:$imm), 3465 mnemonic, "\t$ZAda, $Zn, $Zm, $Zk$imm", 3466 "", []>, 3467 Sched<[]> { 3468 bits<2> ZAda; 3469 bits<4> Zn; 3470 bits<5> Zm; 3471 bits<3> Zk; 3472 bits<2> imm; 3473 let Inst{31-25} = 0b1000000; 3474 let Inst{24} = opc{4}; 3475 let Inst{23-22} = 0b01; 3476 let Inst{21} = opc{3}; 3477 let Inst{20-16} = Zm; 3478 let Inst{15} = opc{2}; 3479 let Inst{14} = 0b0; 3480 let Inst{13} = opc{1}; 3481 let Inst{12-10} = Zk; 3482 let Inst{9-6} = Zn; 3483 let Inst{5-4} = imm; 3484 let Inst{3} = opc{0}; 3485 let Inst{2} = 0b0; 3486 let Inst{1-0} = ZAda; 3487 3488 let Constraints = "$ZAda = $_ZAda"; 3489} 3490 3491 3492//===----------------------------------------------------------------------===/// 3493// SME2 Zero Lookup Table. 3494class sme2_zero_zt<string mnemonic, bits<4> opc> 3495 : I<(outs ZTR:$ZT), (ins ), 3496 mnemonic, "\t\\{ $ZT \\}", 3497 "", []>, Sched<[]> { 3498 let Inst{31-4} = 0b1100000001001000000000000000; 3499 let Inst{3-0} = opc; 3500} 3501 3502multiclass sme2_zero_zt<string mnemonic, bits<4> opc> { 3503 def NAME : sme2_zero_zt<mnemonic, opc>; 3504 def NAME # _PSEUDO 3505 : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> { 3506 // Translated to actual instruction in AArch64ISelLowering.cpp 3507 let usesCustomInserter = 1; 3508 } 3509 def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)), 3510 (!cast<Instruction>(NAME # _PSEUDO) $zt)>; 3511} 3512 3513//===----------------------------------------------------------------------===// 3514// SME2 lookup table load/store 3515class sme2_spill_fill_vector<string mnemonic, bits<8> opc> 3516 : I<!if(opc{7}, (outs ), (outs ZTR:$ZTt)), 3517 !if(opc{7}, (ins ZTR:$ZTt, GPR64sp:$Rn), (ins GPR64sp:$Rn)), 3518 mnemonic, "\t$ZTt, [$Rn]", 3519 "", []>, Sched<[]> { 3520 bits<5> Rn; 3521 let Inst{31-22} = 0b1110000100; 3522 let Inst{21-16} = opc{7-2}; 3523 let Inst{15-10} = 0b100000; 3524 let Inst{9-5} = Rn; 3525 let Inst{4-2} = 0b000; 3526 let Inst{1-0} = opc{1-0}; 3527 3528 let mayLoad = !not(opc{7}); 3529 let mayStore = opc{7}; 3530} 3531 3532 3533multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> { 3534 def NAME : sme2_spill_fill_vector<mnemonic, opc>; 3535 def NAME # _PSEUDO 3536 : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> { 3537 // Translated to actual instruction in AArch64ISelLowering.cpp 3538 let usesCustomInserter = 1; 3539 } 3540 def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base), 3541 (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>; 3542} 3543 3544//===----------------------------------------------------------------------===/// 3545// SME2 move to/from lookup table 3546class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc> 3547 : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3), 3548 mnemonic, "\t$Rt, $ZTt[$imm3]", 3549 "", []>, Sched<[]> { 3550 bits<3> imm3; 3551 bits<5> Rt; 3552 let Inst{31-15} = 0b11000000010011000; 3553 let Inst{14-12} = imm3; 3554 let Inst{11-5} = opc; 3555 let Inst{4-0} = Rt; 3556} 3557 3558class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc> 3559 : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), 3560 mnemonic, "\t$ZTt[$imm3], $Rt", 3561 "", []>, Sched<[]> { 3562 bits<3> imm3; 3563 bits<5> Rt; 3564 let Inst{31-15} = 0b11000000010011100; 3565 let Inst{14-12} = imm3; 3566 let Inst{11-5} = opc; 3567 let Inst{4-0} = Rt; 3568} 3569 3570// SME2 move vector to lookup table 3571class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> 3572 : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt), 3573 mnemonic, "\t$ZTt[$off2, mul vl], $Zt", 3574 "", []>, Sched<[]> { 3575 bits<5> Zt; 3576 bits<2> off2; 3577 let Inst{31-14} = 0b110000000100111100; 3578 let Inst{13-12} = off2; 3579 let Inst{11-5} = opc; 3580 let Inst{4-0} = Zt; 3581} 3582 3583multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc, SDPatternOperator intrinsic_lane, SDPatternOperator intrinsic> { 3584 def NAME : sme2_movt_zt_to_zt<mnemonic, opc>; 3585 def NAME # _PSEUDO 3586 : Pseudo<(outs), (ins ZTR:$ZT, sme_elm_idx0_3:$off2, ZPRAny:$Zt), []>, Sched<[]> { 3587 let usesCustomInserter = 1; 3588 } 3589 def : InstAlias<mnemonic # "\t$ZTt, $Zt", 3590 (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>; 3591 3592 foreach vt = [nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16] in { 3593 def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), vt:$zn, sme_elm_idx0_3:$imm), 3594 (!cast<Instruction>(NAME # _PSEUDO) $zt, $imm, $zn)>; 3595 def : Pat<(intrinsic (imm_to_zt untyped:$zt), vt:$zn), 3596 (!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>; 3597 } 3598} 3599 3600//===----------------------------------------------------------------------===// 3601// SME2 lookup table expand one register 3602class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty, 3603 AsmVectorIndexOpnd index_ty, string mnemonic> 3604 : I<(outs vector_ty:$Zd), 3605 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 3606 mnemonic, "\t$Zd, $ZTt, $Zn$i", 3607 "", []>, Sched<[]> { 3608 bits<5> Zn; 3609 bits<5> Zd; 3610 let Inst{31-19} = 0b1100000011001; 3611 let Inst{18-14} = opc{6-2}; 3612 let Inst{13-12} = sz; 3613 let Inst{11-10} = opc{1-0}; 3614 let Inst{9-5} = Zn; 3615 let Inst{4-0} = Zd; 3616} 3617 3618class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty, 3619 string mnemonic> 3620 : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> { 3621 bits<4> i; 3622 let Inst{17-14} = i; 3623} 3624 3625multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> { 3626 def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>; 3627 def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>; 3628 def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>; 3629 3630 def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3631 (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3632 def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3633 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3634 def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3635 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3636 def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3637 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3638 def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3639 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3640 def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3641 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3642} 3643 3644class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty, 3645 string mnemonic> 3646 : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH32b_timm, mnemonic> { 3647 bits<3> i; 3648 let Inst{16-14} = i; 3649} 3650 3651multiclass sme2_luti4_vector_index<string mnemonic, SDPatternOperator intrinsic> { 3652 def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>; 3653 def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>; 3654 def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>; 3655 3656 def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3657 (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3658 def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3659 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3660 def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3661 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3662 def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3663 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3664 def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3665 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3666 def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3667 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3668} 3669 3670// SME2 lookup table expand two contiguous registers 3671class sme2_luti_vector_vg2_index<bits<2> sz, bits<6> opc, RegisterOperand vector_ty, 3672 AsmVectorIndexOpnd index_ty, string mnemonic> 3673 : I<(outs vector_ty:$Zd), 3674 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 3675 mnemonic, "\t$Zd, $ZTt, $Zn$i", 3676 "", []>, Sched<[]> { 3677 bits<5> Zn; 3678 bits<4> Zd; 3679 let Inst{31-19} = 0b1100000010001; 3680 let Inst{18-15} = opc{5-2}; 3681 let Inst{14} = 0b1; 3682 let Inst{13-12} = sz; 3683 let Inst{11-10} = opc{1-0}; 3684 let Inst{9-5} = Zn; 3685 let Inst{4-1} = Zd; 3686 let Inst{0} = 0b0; 3687} 3688 3689class sme2_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 3690 string mnemonic> 3691 : sme2_luti_vector_vg2_index<sz, {1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> { 3692 bits<3> i; 3693 let Inst{17-15} = i; 3694} 3695 3696multiclass sme2_luti2_vector_vg2_index<string mnemonic> { 3697 def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; 3698 def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; 3699 def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; 3700} 3701 3702class sme2_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 3703 string mnemonic> 3704 : sme2_luti_vector_vg2_index<sz, {0,1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> { 3705 bits<2> i; 3706 let Inst{16-15} = i; 3707} 3708 3709multiclass sme2_luti4_vector_vg2_index<string mnemonic> { 3710 def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; 3711 def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; 3712 def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; 3713} 3714 3715// SME2 lookup table expand four contiguous registers 3716class sme2_luti_vector_vg4_index<bits<2> sz, bits<5>opc, RegisterOperand vector_ty, 3717 AsmVectorIndexOpnd index_ty, string mnemonic> 3718 : I<(outs vector_ty:$Zd), 3719 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 3720 mnemonic, "\t$Zd, $ZTt, $Zn$i", 3721 "", []>, Sched<[]> { 3722 bits<5> Zn; 3723 bits<3> Zd; 3724 let Inst{31-19} = 0b1100000010001; 3725 let Inst{18-16} = opc{4-2}; 3726 let Inst{15-14} = 0b10; 3727 let Inst{13-12} = sz; 3728 let Inst{11-10} = opc{1-0}; 3729 let Inst{9-5} = Zn; 3730 let Inst{4-2} = Zd; 3731 let Inst{1-0} = 0b00; 3732} 3733 3734class sme2_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 3735 string mnemonic> 3736 : sme2_luti_vector_vg4_index<sz, {1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> { 3737 bits<2> i; 3738 let Inst{17-16} = i; 3739} 3740 3741multiclass sme2_luti2_vector_vg4_index<string mnemonic> { 3742 def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>; 3743 def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; 3744 def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; 3745} 3746 3747class sme2_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 3748 string mnemonic> 3749 : sme2_luti_vector_vg4_index<sz, {0,1,?,0,0}, vector_ty, VectorIndexD, mnemonic> { 3750 bits<1> i; 3751 let Inst{16} = i; 3752} 3753 3754multiclass sme2_luti4_vector_vg4_index<string mnemonic> { 3755 def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; 3756 def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; 3757} 3758 3759//===----------------------------------------------------------------------===// 3760// SME2 MOV 3761class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v, 3762 RegisterOperand tile_ty, 3763 Operand index_ty, 3764 RegisterOperand vector_ty, 3765 string mnemonic> 3766 : I<(outs tile_ty:$ZAd), 3767 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn), 3768 mnemonic, "\t$ZAd[$Rs, $imm], $Zn", 3769 "", []>, Sched<[]> { 3770 bits<2> Rs; 3771 bits<4> Zn; 3772 let Inst{31-24} = 0b11000000; 3773 let Inst{23-22} = sz; 3774 let Inst{21-16} = 0b000100; 3775 let Inst{15} = v; 3776 let Inst{14-13} = Rs; 3777 let Inst{12-10} = 0b000; 3778 let Inst{9-6} = Zn; 3779 let Inst{5-3} = 0b000; 3780 3781 let Constraints = "$ZAd = $_ZAd"; 3782} 3783 3784multiclass sme2_mova_vec_to_tile_or_array_aliases<int prefer, Instruction inst, 3785 RegisterOperand tile_or_array_ty, 3786 RegisterOperand rv_ty, 3787 Operand index_ty, 3788 RegisterOperand vector_ty, 3789 string mnemonic, 3790 string vg_acronym=""> { 3791 def : InstAlias<mnemonic # "\t$ZAd[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn", 3792 (inst tile_or_array_ty:$ZAd, rv_ty:$Rs, index_ty:$imm, vector_ty:$Zn), prefer>; 3793 3794} 3795 3796// SME2 move vector to tile, two registers 3797multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> { 3798 3799 def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v, 3800 !if(v, TileVectorOpV8, 3801 TileVectorOpH8), 3802 uimm3s2range, ZZ_b_mul_r, 3803 mnemonic>, SMEPseudo2Instr<NAME # _B, 1> { 3804 bits<3> imm; 3805 let Inst{2-0} = imm; 3806 } 3807 3808 def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v, 3809 !if(v, TileVectorOpV16, 3810 TileVectorOpH16), 3811 uimm2s2range, ZZ_h_mul_r, 3812 mnemonic>, SMEPseudo2Instr<NAME # _H, 1> { 3813 bits<1> ZAd; 3814 bits<2> imm; 3815 let Inst{2} = ZAd; 3816 let Inst{1-0} = imm; 3817 } 3818 3819 def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v, 3820 !if(v, TileVectorOpV32, 3821 TileVectorOpH32), 3822 uimm1s2range, ZZ_s_mul_r, 3823 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> { 3824 bits<2> ZAd; 3825 bits<1> imm; 3826 let Inst{2-1} = ZAd; 3827 let Inst{0} = imm; 3828 } 3829 3830 def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v, 3831 !if(v, TileVectorOpV64, 3832 TileVectorOpH64), 3833 uimm0s2range, ZZ_d_mul_r, 3834 mnemonic>, SMEPseudo2Instr<NAME # _D, 1> { 3835 bits<3> ZAd; 3836 let Inst{2-0} = ZAd; 3837 } 3838 3839 def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>; 3840 def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>; 3841 def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>; 3842 def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>; 3843 3844 def : SME2_Tile_VG2_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm3s2range, tileslicerange3s2>; 3845 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm2s2range, tileslicerange2s2>; 3846 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm2s2range, tileslicerange2s2>; 3847 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm2s2range, tileslicerange2s2>; 3848 def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm1s2range, tileslicerange1s2>; 3849 def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm1s2range, tileslicerange1s2>; 3850 def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s2range, tileslicerange0s2>; 3851 def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s2range, tileslicerange0s2>; 3852 3853 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B), 3854 !if(v, TileVectorOpV8, 3855 TileVectorOpH8), 3856 MatrixIndexGPR32Op12_15, 3857 uimm3s2range, ZZ_b_mul_r, 3858 "mov">; 3859 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H), 3860 !if(v, TileVectorOpV16, 3861 TileVectorOpH16), 3862 MatrixIndexGPR32Op12_15, 3863 uimm2s2range, ZZ_h_mul_r, 3864 "mov">; 3865 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S), 3866 !if(v, TileVectorOpV32, 3867 TileVectorOpH32), 3868 MatrixIndexGPR32Op12_15, 3869 uimm1s2range, ZZ_s_mul_r, 3870 "mov">; 3871 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D), 3872 !if(v, TileVectorOpV64, 3873 TileVectorOpH64), 3874 MatrixIndexGPR32Op12_15, 3875 uimm0s2range, ZZ_d_mul_r, 3876 "mov">; 3877 3878 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B), 3879 !if(v, TileVectorOpV8, 3880 TileVectorOpH8), 3881 MatrixIndexGPR32Op12_15, 3882 uimm3s2range, ZZ_b_mul_r, 3883 "mova">; 3884 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H), 3885 !if(v, TileVectorOpV16, 3886 TileVectorOpH16), 3887 MatrixIndexGPR32Op12_15, 3888 uimm2s2range, ZZ_h_mul_r, 3889 "mova">; 3890 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S), 3891 !if(v, TileVectorOpV32, 3892 TileVectorOpH32), 3893 MatrixIndexGPR32Op12_15, 3894 uimm1s2range, ZZ_s_mul_r, 3895 "mova">; 3896 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D), 3897 !if(v, TileVectorOpV64, 3898 TileVectorOpH64), 3899 MatrixIndexGPR32Op12_15, 3900 uimm0s2range, ZZ_d_mul_r, 3901 "mova">; 3902 3903 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B), 3904 !if(v, TileVectorOpV8, 3905 TileVectorOpH8), 3906 MatrixIndexGPR32Op12_15, 3907 uimm3s2range, ZZ_b_mul_r, 3908 "mova">; 3909 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H), 3910 !if(v, TileVectorOpV16, 3911 TileVectorOpH16), 3912 MatrixIndexGPR32Op12_15, 3913 uimm2s2range, ZZ_h_mul_r, 3914 "mova">; 3915 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S), 3916 !if(v, TileVectorOpV32, 3917 TileVectorOpH32), 3918 MatrixIndexGPR32Op12_15, 3919 uimm1s2range, ZZ_s_mul_r, 3920 "mova">; 3921 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D), 3922 !if(v, TileVectorOpV64, 3923 TileVectorOpH64), 3924 MatrixIndexGPR32Op12_15, 3925 uimm0s2range, ZZ_d_mul_r, 3926 "mova">; 3927} 3928 3929multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic, 3930 SDPatternOperator int_h, SDPatternOperator int_v>{ 3931 defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>; 3932 defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>; 3933} 3934 3935class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op, 3936 RegisterOperand tile_ty, 3937 Operand index_ty, 3938 RegisterOperand vector_ty, 3939 string mnemonic> 3940 : I<(outs tile_ty:$ZAd), 3941 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, 3942 vector_ty:$Zn), 3943 mnemonic, 3944 "\t$ZAd[$Rs, $imm], $Zn", 3945 "", []>, Sched<[]> { 3946 bits<2> Rs; 3947 bits<3> Zn; 3948 let Inst{31-24} = 0b11000000; 3949 let Inst{23-22} = sz; 3950 let Inst{21-16} = 0b000100; 3951 let Inst{15} = v; 3952 let Inst{14-13} = Rs; 3953 let Inst{12-10} = 0b001; 3954 let Inst{9-7} = Zn; 3955 let Inst{6-3} = 0b0000; 3956 let Inst{2-0} = op; 3957 let Constraints = "$ZAd = $_ZAd"; 3958} 3959 3960// SME2 move vector to tile, four registers 3961multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> { 3962 3963 def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?}, 3964 !if(v, TileVectorOpV8, 3965 TileVectorOpH8), 3966 uimm2s4range, ZZZZ_b_mul_r, 3967 mnemonic>, SMEPseudo2Instr<NAME # _B, 1> { 3968 bits<2> imm; 3969 let Inst{1-0} = imm; 3970 } 3971 3972 def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, {0,?,?}, 3973 !if(v, TileVectorOpV16, 3974 TileVectorOpH16), 3975 uimm1s4range, ZZZZ_h_mul_r, 3976 mnemonic>, SMEPseudo2Instr<NAME # _H, 1> { 3977 bits<1> ZAd; 3978 bits<1> imm; 3979 let Inst{1} = ZAd; 3980 let Inst{0} = imm; 3981 } 3982 3983 def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, {0,?,?}, 3984 !if(v, TileVectorOpV32, 3985 TileVectorOpH32), 3986 uimm0s4range, ZZZZ_s_mul_r, 3987 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> { 3988 bits<2> ZAd; 3989 let Inst{1-0} = ZAd; 3990 } 3991 3992 def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, {?,?,?}, 3993 !if(v, TileVectorOpV64, 3994 TileVectorOpH64), 3995 uimm0s4range, ZZZZ_d_mul_r, 3996 mnemonic>, SMEPseudo2Instr<NAME # _D, 1> { 3997 bits<3> ZAd; 3998 let Inst{2-0} = ZAd; 3999 } 4000 4001 def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>; 4002 def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>; 4003 def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>; 4004 def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>; 4005 4006 def : SME2_Tile_VG4_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm2s4range, tileslicerange2s4>; 4007 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm1s4range, tileslicerange1s4>; 4008 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm1s4range, tileslicerange1s4>; 4009 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm1s4range, tileslicerange1s4>; 4010 def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm0s4range, tileslicerange0s4>; 4011 def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm0s4range, tileslicerange0s4>; 4012 def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s4range, tileslicerange0s4>; 4013 def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s4range, tileslicerange0s4>; 4014 4015 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B), 4016 !if(v, TileVectorOpV8, 4017 TileVectorOpH8), 4018 MatrixIndexGPR32Op12_15, 4019 uimm2s4range, ZZZZ_b_mul_r, 4020 "mov">; 4021 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H), 4022 !if(v, TileVectorOpV16, 4023 TileVectorOpH16), 4024 MatrixIndexGPR32Op12_15, 4025 uimm1s4range, ZZZZ_h_mul_r, 4026 "mov">; 4027 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S), 4028 !if(v, TileVectorOpV32, 4029 TileVectorOpH32), 4030 MatrixIndexGPR32Op12_15, 4031 uimm0s4range, ZZZZ_s_mul_r, 4032 "mov">; 4033 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D), 4034 !if(v, TileVectorOpV64, 4035 TileVectorOpH64), 4036 MatrixIndexGPR32Op12_15, 4037 uimm0s4range, ZZZZ_d_mul_r, 4038 "mov">; 4039 4040 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B), 4041 !if(v, TileVectorOpV8, 4042 TileVectorOpH8), 4043 MatrixIndexGPR32Op12_15, 4044 uimm2s4range, ZZZZ_b_mul_r, 4045 "mova">; 4046 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H), 4047 !if(v, TileVectorOpV16, 4048 TileVectorOpH16), 4049 MatrixIndexGPR32Op12_15, 4050 uimm1s4range, ZZZZ_h_mul_r, 4051 "mova">; 4052 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S), 4053 !if(v, TileVectorOpV32, 4054 TileVectorOpH32), 4055 MatrixIndexGPR32Op12_15, 4056 uimm0s4range, ZZZZ_s_mul_r, 4057 "mova">; 4058 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D), 4059 !if(v, TileVectorOpV64, 4060 TileVectorOpH64), 4061 MatrixIndexGPR32Op12_15, 4062 uimm0s4range, ZZZZ_d_mul_r, 4063 "mova">; 4064 4065} 4066 4067multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic, 4068 SDPatternOperator int_h, SDPatternOperator int_v>{ 4069 defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>; 4070 defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>; 4071} 4072 4073// SME Move into Array 4074class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty, 4075 RegisterOperand vector_ty, 4076 string mnemonic, 4077 string vg_acronym=""> 4078 : I<(outs array_ty:$ZAd), 4079 (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm, 4080 vector_ty:$Zn), 4081 mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn", 4082 "", []>, Sched<[]> { 4083 bits<2> Rs; 4084 bits<3> imm; 4085 let Inst{31-15} = 0b11000000000001000; 4086 let Inst{14-13} = Rs; 4087 let Inst{12-11} = 0b01; 4088 let Inst{10-6} = op; 4089 let Inst{5-3} = 0b000; 4090 let Inst{2-0} = imm; 4091 4092 let Constraints = "$ZAd = $_ZAd"; 4093} 4094 4095// MOVA (vector to array, two registers) 4096multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator intrinsic> { 4097 def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64, 4098 ZZ_d_mul_r, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> { 4099 bits<4> Zn; 4100 let Inst{9-6} = Zn; 4101 } 4102 4103 def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>; 4104 4105 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>; 4106 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>; 4107 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>; 4108 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>; 4109 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>; 4110 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>; 4111 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; 4112 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; 4113 4114 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4115 MatrixOp8, 4116 MatrixIndexGPR32Op8_11, 4117 sme_elm_idx0_7, ZZ_b_mul_r, 4118 "mova">; 4119 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4120 MatrixOp16, 4121 MatrixIndexGPR32Op8_11, 4122 sme_elm_idx0_7, ZZ_h_mul_r, 4123 "mova">; 4124 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4125 MatrixOp32, 4126 MatrixIndexGPR32Op8_11, 4127 sme_elm_idx0_7, ZZ_s_mul_r, 4128 "mova">; 4129 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4130 MatrixOp64, 4131 MatrixIndexGPR32Op8_11, 4132 sme_elm_idx0_7, ZZ_d_mul_r, 4133 "mova">; 4134 4135 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4136 MatrixOp8, 4137 MatrixIndexGPR32Op8_11, 4138 sme_elm_idx0_7, ZZ_b_mul_r, 4139 "mova", "vgx2">; 4140 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4141 MatrixOp16, 4142 MatrixIndexGPR32Op8_11, 4143 sme_elm_idx0_7, ZZ_h_mul_r, 4144 "mova", "vgx2">; 4145 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4146 MatrixOp32, 4147 MatrixIndexGPR32Op8_11, 4148 sme_elm_idx0_7, ZZ_s_mul_r, 4149 "mova", "vgx2">; 4150 4151 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4152 MatrixOp8, 4153 MatrixIndexGPR32Op8_11, 4154 sme_elm_idx0_7, ZZ_b_mul_r, 4155 "mov">; 4156 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4157 MatrixOp16, 4158 MatrixIndexGPR32Op8_11, 4159 sme_elm_idx0_7, ZZ_h_mul_r, 4160 "mov">; 4161 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4162 MatrixOp32, 4163 MatrixIndexGPR32Op8_11, 4164 sme_elm_idx0_7, ZZ_s_mul_r, 4165 "mov">; 4166 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4167 MatrixOp64, 4168 MatrixIndexGPR32Op8_11, 4169 sme_elm_idx0_7, ZZ_d_mul_r, 4170 "mov">; 4171 4172 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4173 MatrixOp8, 4174 MatrixIndexGPR32Op8_11, 4175 sme_elm_idx0_7, ZZ_b_mul_r, 4176 "mov", "vgx2">; 4177 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4178 MatrixOp16, 4179 MatrixIndexGPR32Op8_11, 4180 sme_elm_idx0_7, ZZ_h_mul_r, 4181 "mov", "vgx2">; 4182 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4183 MatrixOp32, 4184 MatrixIndexGPR32Op8_11, 4185 sme_elm_idx0_7, ZZ_s_mul_r, 4186 "mov", "vgx2">; 4187 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME), 4188 MatrixOp64, 4189 MatrixIndexGPR32Op8_11, 4190 sme_elm_idx0_7, ZZ_d_mul_r, 4191 "mov", "vgx2">; 4192} 4193 4194// MOVA (vector to array, four registers) 4195multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator intrinsic> { 4196 def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64, 4197 ZZZZ_d_mul_r, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> { 4198 bits<3> Zn; 4199 let Inst{9-7} = Zn; 4200 } 4201 4202 def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>; 4203 4204 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>; 4205 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>; 4206 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>; 4207 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>; 4208 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>; 4209 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>; 4210 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; 4211 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; 4212 4213 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4214 MatrixOp8, 4215 MatrixIndexGPR32Op8_11, 4216 sme_elm_idx0_7, ZZZZ_b_mul_r, 4217 "mova">; 4218 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4219 MatrixOp16, 4220 MatrixIndexGPR32Op8_11, 4221 sme_elm_idx0_7, ZZZZ_h_mul_r, 4222 "mova">; 4223 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4224 MatrixOp32, 4225 MatrixIndexGPR32Op8_11, 4226 sme_elm_idx0_7, ZZZZ_s_mul_r, 4227 "mova">; 4228 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4229 MatrixOp64, 4230 MatrixIndexGPR32Op8_11, 4231 sme_elm_idx0_7, ZZZZ_d_mul_r, 4232 "mova">; 4233 4234 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4235 MatrixOp8, 4236 MatrixIndexGPR32Op8_11, 4237 sme_elm_idx0_7, ZZZZ_b_mul_r, 4238 "mova", "vgx4">; 4239 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4240 MatrixOp16, 4241 MatrixIndexGPR32Op8_11, 4242 sme_elm_idx0_7, ZZZZ_h_mul_r, 4243 "mova", "vgx4">; 4244 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4245 MatrixOp32, 4246 MatrixIndexGPR32Op8_11, 4247 sme_elm_idx0_7, ZZZZ_s_mul_r, 4248 "mova", "vgx4">; 4249 4250 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4251 MatrixOp8, 4252 MatrixIndexGPR32Op8_11, 4253 sme_elm_idx0_7, ZZZZ_b_mul_r, 4254 "mov">; 4255 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4256 MatrixOp16, 4257 MatrixIndexGPR32Op8_11, 4258 sme_elm_idx0_7, ZZZZ_h_mul_r, 4259 "mov">; 4260 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4261 MatrixOp32, 4262 MatrixIndexGPR32Op8_11, 4263 sme_elm_idx0_7, ZZZZ_s_mul_r, 4264 "mov">; 4265 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4266 MatrixOp64, 4267 MatrixIndexGPR32Op8_11, 4268 sme_elm_idx0_7, ZZZZ_d_mul_r, 4269 "mov">; 4270 4271 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4272 MatrixOp8, 4273 MatrixIndexGPR32Op8_11, 4274 sme_elm_idx0_7, ZZZZ_b_mul_r, 4275 "mov", "vgx4">; 4276 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4277 MatrixOp16, 4278 MatrixIndexGPR32Op8_11, 4279 sme_elm_idx0_7, ZZZZ_h_mul_r, 4280 "mov", "vgx4">; 4281 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 4282 MatrixOp32, 4283 MatrixIndexGPR32Op8_11, 4284 sme_elm_idx0_7, ZZZZ_s_mul_r, 4285 "mov", "vgx4">; 4286 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME), 4287 MatrixOp64, 4288 MatrixIndexGPR32Op8_11, 4289 sme_elm_idx0_7, ZZZZ_d_mul_r, 4290 "mov", "vgx4">; 4291 4292} 4293 4294class sme2_mova_tile_to_vec_vg2_multi_base<bits<2> sz, bit v, bits<3> op, 4295 RegisterOperand vector_ty, 4296 RegisterOperand tile_ty, 4297 Operand index_ty, 4298 string mnemonic> 4299 : I<!if(op{1}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)), 4300 (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), 4301 mnemonic, 4302 "\t$Zd, $ZAn[$Rs, $imm]", 4303 "", []>, Sched<[]> { 4304 bits<4> Zd; 4305 bits<2> Rs; 4306 let Inst{31-24} = 0b11000000; 4307 let Inst{23-22} = sz; 4308 let Inst{21-16} = 0b000110; 4309 let Inst{15} = v; 4310 let Inst{14-13} = Rs; 4311 let Inst{12-11} = 0b00; 4312 let Inst{10-8} = op; 4313 let Inst{4-1} = Zd; 4314 let Inst{0} = 0b0; 4315 4316 let Constraints = !if(op{1}, "$ZAn = $_ZAn", ""); 4317} 4318 4319multiclass sme2_mova_tile_or_array_to_vec_aliases<int op, Instruction inst, 4320 RegisterOperand vector_ty, 4321 RegisterOperand tile_or_array_ty, 4322 RegisterOperand rv_ty, 4323 Operand index_ty, 4324 string mnemonic, 4325 string vg_acronym=""> { 4326def : InstAlias<mnemonic # "\t$Zd, $ZAn[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]", 4327 (inst vector_ty:$Zd, tile_or_array_ty:$ZAn, rv_ty:$Rs, index_ty:$imm), op>; 4328 4329} 4330 4331multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemonic> { 4332 4333 def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r, 4334 !if(v, TileVectorOpV8, 4335 TileVectorOpH8), 4336 uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> { 4337 bits<3> imm; 4338 let Inst{7-5} = imm; 4339 } 4340 4341 def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r, 4342 !if(v, TileVectorOpV16, 4343 TileVectorOpH16), 4344 uimm2s2range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> { 4345 bits<1> ZAn; 4346 bits<2> imm; 4347 let Inst{7} = ZAn; 4348 let Inst{6-5} = imm; 4349 } 4350 4351 def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r, 4352 !if(v, TileVectorOpV32, 4353 TileVectorOpH32), 4354 uimm1s2range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> { 4355 bits<2> ZAn; 4356 bits<1> imm; 4357 let Inst{7-6} = ZAn; 4358 let Inst{5} = imm; 4359 } 4360 4361 def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r, 4362 !if(v, TileVectorOpV64, 4363 TileVectorOpH64), 4364 uimm0s2range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> { 4365 bits<3> ZAn; 4366 let Inst{7-5} = ZAn; 4367 } 4368 4369 if !eq(mnemonic, "mova") then { 4370 defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _B), 4371 ZZ_b_mul_r, 4372 !if(v, TileVectorOpV8, 4373 TileVectorOpH8), 4374 MatrixIndexGPR32Op12_15, 4375 uimm3s2range, "mov">; 4376 defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _H), 4377 ZZ_h_mul_r, 4378 !if(v, TileVectorOpV16, 4379 TileVectorOpH16), 4380 MatrixIndexGPR32Op12_15, 4381 uimm2s2range, "mov">; 4382 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S), 4383 ZZ_s_mul_r, 4384 !if(v, TileVectorOpV32, 4385 TileVectorOpH32), 4386 MatrixIndexGPR32Op12_15, 4387 uimm1s2range, "mov">; 4388 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D), 4389 ZZ_d_mul_r, 4390 !if(v, TileVectorOpV64, 4391 TileVectorOpH64), 4392 MatrixIndexGPR32Op12_15, 4393 uimm0s2range, "mov">; 4394 } 4395 4396 defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _B), 4397 ZZ_b_mul_r, 4398 !if(v, TileVectorOpV8, 4399 TileVectorOpH8), 4400 MatrixIndexGPR32Op12_15, 4401 uimm3s2range, mnemonic>; 4402 defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _H), 4403 ZZ_h_mul_r, 4404 !if(v, TileVectorOpV16, 4405 TileVectorOpH16), 4406 MatrixIndexGPR32Op12_15, 4407 uimm2s2range, mnemonic>; 4408 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S), 4409 ZZ_s_mul_r, 4410 !if(v, TileVectorOpV32, 4411 TileVectorOpH32), 4412 MatrixIndexGPR32Op12_15, 4413 uimm1s2range, mnemonic>; 4414 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D), 4415 ZZ_d_mul_r, 4416 !if(v, TileVectorOpV64, 4417 TileVectorOpH64), 4418 MatrixIndexGPR32Op12_15, 4419 uimm0s2range, mnemonic>; 4420 4421} 4422 4423// SME2 move tile to vector, two registers 4424multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{ 4425 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b000, mnemonic>; 4426 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>; 4427} 4428 4429 4430// SME2p1 move tile to vector and zero tile, two registers 4431multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{ 4432 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>; 4433 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>; 4434 4435 4436 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>; 4437 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>; 4438 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>; 4439 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>; 4440 4441 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>; 4442 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>; 4443 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>; 4444 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>; 4445} 4446 4447class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op, 4448 RegisterOperand vector_ty, 4449 RegisterOperand tile_ty, 4450 Operand index_ty, 4451 string mnemonic> 4452 : I<!if(op{4}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)), 4453 (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), 4454 mnemonic, 4455 "\t$Zd, $ZAn[$Rs, $imm]", 4456 "", []>, Sched<[]> { 4457 bits<3> Zd; 4458 bits<2> Rs; 4459 let Inst{31-24} = 0b11000000; 4460 let Inst{23-22} = sz; 4461 let Inst{21-16} = 0b000110; 4462 let Inst{15} = v; 4463 let Inst{14-13} = Rs; 4464 let Inst{12-11} = 0b00; 4465 let Inst{10-5} = op{5-0}; 4466 let Inst{4-2} = Zd; 4467 let Inst{1-0} = 0b00; 4468 4469 let Constraints = !if(op{4}, "$ZAn = $_ZAn", ""); 4470} 4471 4472multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemonic> { 4473 4474 def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, {opc,0,?,?}, 4475 ZZZZ_b_mul_r, 4476 !if(v, TileVectorOpV8, 4477 TileVectorOpH8), 4478 uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> { 4479 bits<2> imm; 4480 let Inst{6-5} = imm; 4481 } 4482 4483 def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, {opc,0,?,?}, 4484 ZZZZ_h_mul_r, 4485 !if(v, TileVectorOpV16, 4486 TileVectorOpH16), 4487 uimm1s4range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> { 4488 bits<1> ZAn; 4489 bits<1> imm; 4490 let Inst{6} = ZAn; 4491 let Inst{5} = imm; 4492 } 4493 4494 def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, {opc,0,?,?}, 4495 ZZZZ_s_mul_r, 4496 !if(v, TileVectorOpV32, 4497 TileVectorOpH32), 4498 uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> { 4499 bits<2> ZAn; 4500 let Inst{6-5} = ZAn; 4501 } 4502 4503 def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, {opc,?,?,?}, 4504 ZZZZ_d_mul_r, 4505 !if(v, TileVectorOpV64, 4506 TileVectorOpH64), 4507 uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> { 4508 bits<3> ZAn; 4509 let Inst{7-5} = ZAn; 4510 } 4511 4512 if !eq(mnemonic, "mova") then { 4513 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _B), 4514 ZZZZ_b_mul_r, 4515 !if(v, TileVectorOpV8, 4516 TileVectorOpH8), 4517 MatrixIndexGPR32Op12_15, 4518 uimm2s4range, "mov">; 4519 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _H), 4520 ZZZZ_h_mul_r, 4521 !if(v, TileVectorOpV16, 4522 TileVectorOpH16), 4523 MatrixIndexGPR32Op12_15, 4524 uimm1s4range, "mov">; 4525 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S), 4526 ZZZZ_s_mul_r, 4527 !if(v, TileVectorOpV32, 4528 TileVectorOpH32), 4529 MatrixIndexGPR32Op12_15, 4530 uimm0s4range, "mov">; 4531 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D), 4532 ZZZZ_d_mul_r, 4533 !if(v, TileVectorOpV64, 4534 TileVectorOpH64), 4535 MatrixIndexGPR32Op12_15, 4536 uimm0s4range, "mov">; 4537 } 4538 4539 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _B), 4540 ZZZZ_b_mul_r, 4541 !if(v, TileVectorOpV8, 4542 TileVectorOpH8), 4543 MatrixIndexGPR32Op12_15, 4544 uimm2s4range, mnemonic>; 4545 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _H), 4546 ZZZZ_h_mul_r, 4547 !if(v, TileVectorOpV16, 4548 TileVectorOpH16), 4549 MatrixIndexGPR32Op12_15, 4550 uimm1s4range, mnemonic>; 4551 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S), 4552 ZZZZ_s_mul_r, 4553 !if(v, TileVectorOpV32, 4554 TileVectorOpH32), 4555 MatrixIndexGPR32Op12_15, 4556 uimm0s4range, mnemonic>; 4557 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D), 4558 ZZZZ_d_mul_r, 4559 !if(v, TileVectorOpV64, 4560 TileVectorOpH64), 4561 MatrixIndexGPR32Op12_15, 4562 uimm0s4range, mnemonic>; 4563 4564} 4565 4566// SME2 move tile to vector, four registers 4567multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{ 4568 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b100, mnemonic>; 4569 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b100, mnemonic>; 4570} 4571 4572// SME2p1 move tile to vector and zero tile, four registers 4573multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{ 4574 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>; 4575 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>; 4576 4577 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>; 4578 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>; 4579 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>; 4580 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>; 4581 4582 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>; 4583 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>; 4584 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>; 4585 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>; 4586} 4587 4588 4589class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty, 4590 RegisterOperand array_ty, 4591 string mnemonic, string vg_acronym> 4592 : I<!if(op{2}, (outs vector_ty:$Zd, array_ty:$_ZAn), (outs vector_ty:$Zd)), 4593 (ins array_ty:$ZAn, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm), 4594 mnemonic, 4595 "\t$Zd, $ZAn[$Rs, $imm, " # vg_acronym # "]", 4596 "", []>, Sched<[]> { 4597 bits<2> Rs; 4598 bits<3> imm; 4599 let Inst{31-15} = 0b11000000000001100; 4600 let Inst{14-13} = Rs; 4601 let Inst{12-11} = 0b01; 4602 let Inst{10-8} = op{3-1}; 4603 let Inst{7-5} = imm; 4604 let Inst{1} = op{0}; 4605 let Inst{0} = 0b0; 4606 let Constraints = !if(op{2}, "$ZAn = $_ZAn", ""); 4607} 4608 4609// move array to vector, two registers. 4610multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> { 4611 def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64, 4612 mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>{ 4613 bits<4> Zd; 4614 let Inst{4-1} = Zd; 4615 } 4616 4617 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4618 ZZ_b_mul_r, MatrixOp8, 4619 MatrixIndexGPR32Op8_11, 4620 sme_elm_idx0_7, mnemonic>; 4621 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4622 ZZ_h_mul_r, MatrixOp16, 4623 MatrixIndexGPR32Op8_11, 4624 sme_elm_idx0_7, mnemonic>; 4625 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4626 ZZ_s_mul_r, MatrixOp32, 4627 MatrixIndexGPR32Op8_11, 4628 sme_elm_idx0_7, mnemonic>; 4629 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4630 ZZ_d_mul_r, MatrixOp64, 4631 MatrixIndexGPR32Op8_11, 4632 sme_elm_idx0_7, mnemonic>; 4633 4634 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4635 ZZ_b_mul_r, MatrixOp8, 4636 MatrixIndexGPR32Op8_11, 4637 sme_elm_idx0_7, mnemonic, "vgx2">; 4638 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4639 ZZ_h_mul_r, MatrixOp16, 4640 MatrixIndexGPR32Op8_11, 4641 sme_elm_idx0_7, mnemonic, "vgx2">; 4642 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4643 ZZ_s_mul_r, MatrixOp32, 4644 MatrixIndexGPR32Op8_11, 4645 sme_elm_idx0_7, mnemonic, "vgx2">; 4646 4647 if !eq(mnemonic, "mova") then { 4648 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4649 ZZ_b_mul_r, MatrixOp8, 4650 MatrixIndexGPR32Op8_11, 4651 sme_elm_idx0_7, "mov">; 4652 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4653 ZZ_h_mul_r, MatrixOp16, 4654 MatrixIndexGPR32Op8_11, 4655 sme_elm_idx0_7, "mov">; 4656 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4657 ZZ_s_mul_r, MatrixOp32, 4658 MatrixIndexGPR32Op8_11, 4659 sme_elm_idx0_7, "mov">; 4660 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4661 ZZ_d_mul_r, MatrixOp64, 4662 MatrixIndexGPR32Op8_11, 4663 sme_elm_idx0_7, "mov">; 4664 4665 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4666 ZZ_b_mul_r, MatrixOp8, 4667 MatrixIndexGPR32Op8_11, 4668 sme_elm_idx0_7, "mov", "vgx2">; 4669 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4670 ZZ_h_mul_r, MatrixOp16, 4671 MatrixIndexGPR32Op8_11, 4672 sme_elm_idx0_7, "mov", "vgx2">; 4673 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4674 ZZ_s_mul_r, MatrixOp32, 4675 MatrixIndexGPR32Op8_11, 4676 sme_elm_idx0_7, "mov", "vgx2">; 4677 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME), 4678 ZZ_d_mul_r, MatrixOp64, 4679 MatrixIndexGPR32Op8_11, 4680 sme_elm_idx0_7, "mov", "vgx2">; 4681 } 4682} 4683 4684multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> { 4685 defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>; 4686 def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>; 4687} 4688 4689// move array to vector, four registers 4690multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> { 4691 def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64, 4692 mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> { 4693 bits<3> Zd; 4694 let Inst{4-2} = Zd; 4695 } 4696 4697 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4698 ZZZZ_b_mul_r, MatrixOp8, 4699 MatrixIndexGPR32Op8_11, 4700 sme_elm_idx0_7, mnemonic>; 4701 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4702 ZZZZ_h_mul_r, MatrixOp16, 4703 MatrixIndexGPR32Op8_11, 4704 sme_elm_idx0_7, mnemonic>; 4705 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4706 ZZZZ_s_mul_r, MatrixOp32, 4707 MatrixIndexGPR32Op8_11, 4708 sme_elm_idx0_7, mnemonic>; 4709 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4710 ZZZZ_d_mul_r, MatrixOp64, 4711 MatrixIndexGPR32Op8_11, 4712 sme_elm_idx0_7, mnemonic>; 4713 4714 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4715 ZZZZ_b_mul_r, MatrixOp8, 4716 MatrixIndexGPR32Op8_11, 4717 sme_elm_idx0_7, mnemonic, "vgx4">; 4718 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4719 ZZZZ_h_mul_r, MatrixOp16, 4720 MatrixIndexGPR32Op8_11, 4721 sme_elm_idx0_7, mnemonic, "vgx4">; 4722 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4723 ZZZZ_s_mul_r, MatrixOp32, 4724 MatrixIndexGPR32Op8_11, 4725 sme_elm_idx0_7, mnemonic, "vgx4">; 4726 4727 if !eq(mnemonic, "mova") then { 4728 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4729 ZZZZ_b_mul_r, MatrixOp8, 4730 MatrixIndexGPR32Op8_11, 4731 sme_elm_idx0_7, "mov">; 4732 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4733 ZZZZ_h_mul_r, MatrixOp16, 4734 MatrixIndexGPR32Op8_11, 4735 sme_elm_idx0_7, "mov">; 4736 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4737 ZZZZ_s_mul_r, MatrixOp32, 4738 MatrixIndexGPR32Op8_11, 4739 sme_elm_idx0_7, "mov">; 4740 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4741 ZZZZ_d_mul_r, MatrixOp64, 4742 MatrixIndexGPR32Op8_11, 4743 sme_elm_idx0_7, "mov">; 4744 4745 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4746 ZZZZ_b_mul_r, MatrixOp8, 4747 MatrixIndexGPR32Op8_11, 4748 sme_elm_idx0_7, "mov", "vgx4">; 4749 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4750 ZZZZ_h_mul_r, MatrixOp16, 4751 MatrixIndexGPR32Op8_11, 4752 sme_elm_idx0_7, "mov", "vgx4">; 4753 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4754 ZZZZ_s_mul_r, MatrixOp32, 4755 MatrixIndexGPR32Op8_11, 4756 sme_elm_idx0_7, "mov", "vgx4">; 4757 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME), 4758 ZZZZ_d_mul_r, MatrixOp64, 4759 MatrixIndexGPR32Op8_11, 4760 sme_elm_idx0_7, "mov", "vgx4">; 4761 } 4762} 4763 4764multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> { 4765 defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>; 4766 def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>; 4767} 4768 4769//===----------------------------------------------------------------------===// 4770// SME2 multi-vec saturating shift right narrow 4771class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> 4772 : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), 4773 mnemonic, "\t$Zd, $Zn, $imm4", 4774 "", []>, Sched<[]> { 4775 bits<4> imm4; 4776 bits<4> Zn; 4777 bits<5> Zd; 4778 let Inst{31-21} = 0b11000001111; 4779 let Inst{20} = op; 4780 let Inst{19-16} = imm4; 4781 let Inst{15-10} = 0b110101; 4782 let Inst{9-6} = Zn; 4783 let Inst{5} = u; 4784 let Inst{4-0} = Zd; 4785} 4786 4787multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> { 4788 def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>; 4789 4790 def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; 4791} 4792 4793class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, 4794 RegisterOperand vector_ty, Operand imm_ty, 4795 string mnemonic> 4796 : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm), 4797 mnemonic, "\t$Zd, $Zn, $imm", 4798 "", []>, Sched<[]> { 4799 bits<3> Zn; 4800 bits<5> Zd; 4801 let Inst{31-24} = 0b11000001; 4802 let Inst{23-22} = sz; 4803 let Inst{21} = 0b1; 4804 // Inst{20-16} = imm5; 4805 let Inst{15-11} = 0b11011; 4806 let Inst{10} = op{2}; 4807 let Inst{9-7} = Zn; 4808 let Inst{6-5} = op{1-0}; 4809 let Inst{4-0} = Zd; 4810} 4811 4812multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 4813 def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32, 4814 mnemonic>{ 4815 bits<5> imm; 4816 let Inst{20-16} = imm; 4817 } 4818 def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64, 4819 mnemonic> { 4820 bits<6> imm; 4821 let Inst{22} = imm{5}; 4822 let Inst{20-16} = imm{4-0}; 4823 } 4824 4825 def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>; 4826 def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>; 4827} 4828 4829//===----------------------------------------------------------------------===// 4830// SME2 Multi-vector - SVE Select 4831class sme2_sel_vector_vg24<bits<2> sz, bits<4> op, RegisterOperand vector_ty, 4832 string mnemonic> 4833 : I<(outs vector_ty:$Zd), 4834 (ins PNRAny_p8to15:$PNg, vector_ty:$Zn, vector_ty:$Zm), 4835 mnemonic, "\t$Zd, $PNg, $Zn, $Zm", 4836 "", []>, Sched<[]> { 4837 bits<3> PNg; 4838 let Inst{31-24} = 0b11000001; 4839 let Inst{23-22} = sz; 4840 let Inst{21} = 0b1; 4841 let Inst{17-16} = op{3-2}; 4842 let Inst{15-13} = 0b100; 4843 let Inst{12-10} = PNg; 4844 let Inst{6} = op{1}; 4845 let Inst{5} = 0b0; 4846 let Inst{1} = op{0}; 4847 let Inst{0} = 0b0; 4848} 4849 4850class sme2_sel_vector_vg2<bits<2> sz, RegisterOperand vector_ty, 4851 string mnemonic> 4852 : sme2_sel_vector_vg24<sz, {?,0,?,?}, vector_ty, mnemonic> { 4853 bits<4> Zm; 4854 bits<4> Zn; 4855 bits<4> Zd; 4856 let Inst{20-17} = Zm; 4857 let Inst{9-6} = Zn; 4858 let Inst{4-1} = Zd; 4859} 4860 4861multiclass sme2_sel_vector_vg2<string mnemonic>{ 4862 def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>; 4863 def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>; 4864 def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>; 4865 def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>; 4866} 4867class sme2_sel_vector_vg4<bits<2> sz, RegisterOperand vector_ty, 4868 string mnemonic> 4869 : sme2_sel_vector_vg24<sz, 0b0100, vector_ty, mnemonic> { 4870 bits<3> Zm; 4871 bits<3> Zn; 4872 bits<3> Zd; 4873 let Inst{20-18} = Zm; 4874 let Inst{9-7} = Zn; 4875 let Inst{4-2} = Zd; 4876} 4877multiclass sme2_sel_vector_vg4<string mnemonic> { 4878 def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>; 4879 def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>; 4880 def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>; 4881 def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>; 4882} 4883 4884//===----------------------------------------------------------------------===// 4885// Non contiguous Load and Store 4886 4887class sme2_ld_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n, 4888 RegisterOperand multi_vector_ty, 4889 RegisterOperand gpr_ty, 4890 string mnemonic> 4891 : I<(outs multi_vector_ty:$Zt), 4892 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 4893 mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", 4894 "", []>, Sched<[]> { 4895 bits<5> Rm; 4896 bits<3> PNg; 4897 bits<5> Rn; 4898 bits<4> Zt; 4899 let Inst{31-21} = 0b10100001000; 4900 let Inst{20-16} = Rm; 4901 let Inst{15} = 0b0; 4902 let Inst{14-13} = msz; 4903 let Inst{12-10} = PNg; 4904 let Inst{9-5} = Rn; 4905 let Inst{4} = Zt{3}; 4906 let Inst{3} = n; 4907 let Inst{2-0} = Zt{2-0}; 4908 4909 let mayLoad = 1; 4910} 4911 4912class sme2_ld_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n, 4913 RegisterOperand multi_vector_ty, 4914 RegisterOperand gpr_ty, 4915 string mnemonic> 4916 : I<(outs multi_vector_ty:$Zt), 4917 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 4918 mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", 4919 "", []>, Sched<[]> { 4920 bits<5> Rm; 4921 bits<3> PNg; 4922 bits<5> Rn; 4923 bits<3> Zt; 4924 let Inst{31-21} = 0b10100001000; 4925 let Inst{20-16} = Rm; 4926 let Inst{15} = 0b1; 4927 let Inst{14-13} = msz; 4928 let Inst{12-10} = PNg; 4929 let Inst{9-5} = Rn; 4930 let Inst{4} = Zt{2}; 4931 let Inst{3} = n; 4932 let Inst{2} = 0b0; 4933 let Inst{1-0} = Zt{1-0}; 4934 4935 let mayLoad = 1; 4936} 4937 4938class sme2_ld_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op, 4939 RegisterOperand multi_vector_ty, 4940 Operand index_ty, 4941 string mnemonic> 4942 : I<(outs multi_vector_ty:$Zt), 4943 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 4944 mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]", 4945 "", []>, Sched<[]> { 4946 bits<4> imm4; 4947 bits<3> PNg; 4948 bits<5> Rn; 4949 let Inst{31-20} = 0b101000010100; 4950 let Inst{19-16} = imm4; 4951 let Inst{15} = op{1}; 4952 let Inst{14-13} = msz; 4953 let Inst{12-10} = PNg; 4954 let Inst{9-5} = Rn; 4955 let Inst{3} = n; 4956 let Inst{2} = op{0}; 4957 4958 let mayLoad = 1; 4959} 4960 4961multiclass sme2_ld_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n, 4962 RegisterOperand multi_vector_ty, 4963 Operand index_ty, 4964 string mnemonic>{ 4965 def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, {0,?}, 4966 multi_vector_ty, 4967 index_ty, mnemonic> { 4968 bits<4> Zt; 4969 let Inst{4} = Zt{3}; 4970 let Inst{2-0} = Zt{2-0}; 4971 } 4972 4973 def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]", 4974 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>; 4975} 4976 4977multiclass sme2_ld_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n, 4978 RegisterOperand multi_vector_ty, 4979 Operand index_ty, 4980 string mnemonic> { 4981 def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, 0b10, 4982 multi_vector_ty, 4983 index_ty, mnemonic> { 4984 bits<3> Zt; 4985 let Inst{4} = Zt{2}; 4986 let Inst{1-0} = Zt{1-0}; 4987 } 4988 4989 def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]", 4990 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>; 4991} 4992 4993//===----------------------------------------------------------------------===// 4994// SME2 Non-Contiguous Store 4995class sme2_st_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n, 4996 RegisterOperand multi_vector_ty, 4997 RegisterOperand gpr_ty, 4998 string mnemonic> 4999 : I<(outs ), 5000 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 5001 mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", 5002 "", []>, Sched<[]> { 5003 bits<5> Rm; 5004 bits<3> PNg; 5005 bits<5> Rn; 5006 bits<4> Zt; 5007 let Inst{31-21} = 0b10100001001; 5008 let Inst{20-16} = Rm; 5009 let Inst{15} = 0b0; 5010 let Inst{14-13} = msz; 5011 let Inst{12-10} = PNg; 5012 let Inst{9-5} = Rn; 5013 let Inst{4} = Zt{3}; 5014 let Inst{3} = n; 5015 let Inst{2-0} = Zt{2-0}; 5016 5017 let mayStore = 1; 5018} 5019 5020class sme2_st_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n, 5021 RegisterOperand multi_vector_ty, 5022 RegisterOperand gpr_ty, 5023 string mnemonic> 5024 : I<(outs ), 5025 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 5026 mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", 5027 "", []>, Sched<[]> { 5028 bits<5> Rm; 5029 bits<3> PNg; 5030 bits<5> Rn; 5031 bits<3> Zt; 5032 let Inst{31-21} = 0b10100001001; 5033 let Inst{20-16} = Rm; 5034 let Inst{15} = 0b1; 5035 let Inst{14-13} = msz; 5036 let Inst{12-10} = PNg; 5037 let Inst{9-5} = Rn; 5038 let Inst{4} = Zt{2}; 5039 let Inst{3} = n; 5040 let Inst{2} = 0b0; 5041 let Inst{1-0} = Zt{1-0}; 5042 5043 let mayStore = 1; 5044} 5045 5046class sme2_st_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op, 5047 RegisterOperand multi_vector_ty, 5048 Operand index_ty, 5049 string mnemonic> 5050 : I<(outs ), 5051 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 5052 mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]", 5053 "", []>, Sched<[]> { 5054 bits<4> imm4; 5055 bits<3> PNg; 5056 bits<5> Rn; 5057 let Inst{31-20} = 0b101000010110; 5058 let Inst{19-16} = imm4; 5059 let Inst{15} = op{1}; 5060 let Inst{14-13} = msz; 5061 let Inst{12-10} = PNg; 5062 let Inst{9-5} = Rn; 5063 let Inst{3} = n; 5064 let Inst{2} = op{0}; 5065 5066 let mayStore = 1; 5067} 5068 5069 5070multiclass sme2_st_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n, 5071 RegisterOperand multi_vector_ty, 5072 Operand index_ty, 5073 string mnemonic> { 5074 def NAME: sme2_st_vector_vg24_multi_scalar_immediate<msz, n, {0,?}, 5075 multi_vector_ty, 5076 index_ty, mnemonic> { 5077 bits<4> Zt; 5078 let Inst{4} = Zt{3}; 5079 let Inst{2-0} = Zt{2-0}; 5080 } 5081 5082 def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]", 5083 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>; 5084} 5085 5086multiclass sme2_st_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n, 5087 RegisterOperand multi_vector_ty, 5088 Operand index_ty, 5089 string mnemonic> { 5090 def NAME : sme2_st_vector_vg24_multi_scalar_immediate<msz, n, 0b10, 5091 multi_vector_ty, 5092 index_ty, mnemonic> { 5093 bits<3> Zt; 5094 let Inst{4} = Zt{2}; 5095 let Inst{1-0} = Zt{1-0}; 5096 } 5097 5098 def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]", 5099 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>; 5100} 5101 5102//===----------------------------------------------------------------------===// 5103// SME2.1 5104//===----------------------------------------------------------------------===// 5105// SME zeroing move array to vector 5106class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty, 5107 RegisterOperand tile_ty, Operand index_ty, 5108 string mnemonic> 5109 : I<(outs vector_ty:$Zd, tile_ty:$ZAn), 5110 (ins tile_ty:$_ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), 5111 mnemonic, "\t$Zd, $ZAn[$Rs, $imm]", 5112 "", []>, Sched<[]> { 5113 bits<2> Rs; 5114 bits<5> Zd; 5115 let Inst{31-24} = 0b11000000; 5116 let Inst{23-22} = sz; 5117 let Inst{21-17} = 0b00001; 5118 let Inst{16} = q; 5119 let Inst{15} = v; 5120 let Inst{14-13} = Rs; 5121 let Inst{12-9} = 0b0001; 5122 let Inst{4-0} = Zd; 5123 let Constraints = "$ZAn = $_ZAn"; 5124} 5125 5126multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> { 5127 def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8, 5128 !if(v, TileVectorOpV8, TileVectorOpH8), 5129 sme_elm_idx0_15, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> { 5130 bits<4> imm; 5131 let Inst{8-5} = imm; 5132 } 5133 5134 def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16, 5135 !if(v, TileVectorOpV16, TileVectorOpH16), 5136 sme_elm_idx0_7, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> { 5137 bits<1> ZAn; 5138 bits<3> imm; 5139 let Inst{8} = ZAn; 5140 let Inst{7-5} = imm; 5141 } 5142 5143 def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32, 5144 !if(v, TileVectorOpV32, TileVectorOpH32), 5145 sme_elm_idx0_3, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> { 5146 bits<2> ZAn; 5147 bits<2> imm; 5148 let Inst{8-7} = ZAn; 5149 let Inst{6-5} = imm; 5150 } 5151 5152 def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64, 5153 !if(v, TileVectorOpV64, TileVectorOpH64), 5154 sme_elm_idx0_1, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> { 5155 bits<3> ZAn; 5156 bits<1> imm; 5157 let Inst{8-6} = ZAn; 5158 let Inst{5} = imm; 5159 } 5160 5161 def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128, 5162 !if(v, TileVectorOpV128, TileVectorOpH128), 5163 sme_elm_idx0_0, mnemonic>, SMEPseudo2Instr<NAME # _Q, 1> { 5164 bits<4> ZAn; 5165 let Inst{8-5} = ZAn; 5166 } 5167} 5168 5169multiclass sme2p1_movaz_tile_to_vec<string mnemonic, SDPatternOperator intrinsic_horiz, SDPatternOperator intrinsic_vert, 5170 SDPatternOperator intrinsic_horiz_q, SDPatternOperator intrinsic_vert_q>{ 5171 defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>; 5172 defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>; 5173 5174 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>; 5175 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>; 5176 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>; 5177 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>; 5178 def NAME # _H_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>; 5179 5180 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>; 5181 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>; 5182 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>; 5183 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>; 5184 def NAME # _V_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>; 5185 5186 def : SME2_Tile_Movaz_Pat<NAME # _H_B, intrinsic_horiz, nxv16i8,sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>; 5187 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>; 5188 def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>; 5189 def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>; 5190 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>; 5191 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>; 5192 def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>; 5193 def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>; 5194 5195 def : SME2_Tile_Movaz_Pat<NAME # _V_B, intrinsic_vert, nxv16i8, sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>; 5196 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>; 5197 def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>; 5198 def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>; 5199 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>; 5200 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>; 5201 def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>; 5202 def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>; 5203 5204 // H_Q 5205 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5206 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5207 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5208 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5209 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5210 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5211 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5212 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5213 5214 // _V_Q 5215 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5216 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5217 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5218 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5219 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5220 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5221 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5222 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>; 5223} 5224 5225//===----------------------------------------------------------------------===// 5226// SME2.1 multiple vectors zero array 5227 5228class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic, 5229 string vg_acronym=""> 5230 : I<(outs MatrixOp64:$ZAd), 5231 (ins MatrixOp64:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm), 5232 mnemonic, "\t$ZAd[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]", 5233 "", []>, Sched<[]> { 5234 bits <2> Rv; 5235 let Inst{31-18} = 0b11000000000011; 5236 let Inst{17-15} = opc{5-3}; 5237 let Inst{14-13} = Rv; 5238 let Inst{12-3} = 0b0000000000; 5239 let Inst{2-0} = opc{2-0}; 5240 let Constraints = "$ZAd = $_ZAd"; 5241} 5242 5243multiclass sme2p1_zero_matrix<string mnemonic> { 5244 def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_Z , 1> { 5245 bits<3> imm; 5246 let Inst{2-0} = imm; 5247 } 5248 def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _2Z, 1> { 5249 bits<3> imm; 5250 let Inst{2-0} = imm; 5251 } 5252 def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_2Z, 1> { 5253 bits<2> imm; 5254 let Inst{1-0} = imm; 5255 } 5256 def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_2Z, 1> { 5257 bits<2> imm; 5258 let Inst{1-0} = imm; 5259 } 5260 def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_Z, 1> { 5261 bits<3> imm; 5262 let Inst{2-0} = imm; 5263 } 5264 def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _4Z, 1> { 5265 bits<2> imm; 5266 let Inst{1-0} = imm; 5267 } 5268 def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_4Z, 1> { 5269 bits<1> imm; 5270 let Inst{0} = imm; 5271 } 5272 def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_4Z, 1> { 5273 bits<1> imm; 5274 let Inst{0} = imm; 5275 } 5276 5277 def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_Z, sme_elm_idx0_7, SMEMatrixArray>; 5278 def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_Z, sme_elm_idx0_7, SMEMatrixArray>; 5279 def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _2Z, uimm2s2range, SMEMatrixArray>; 5280 def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_2Z, uimm1s2range, SMEMatrixArray>; 5281 def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_2Z, uimm1s2range, SMEMatrixArray>; 5282 def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _4Z, uimm1s4range, SMEMatrixArray>; 5283 def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_4Z, uimm0s4range, SMEMatrixArray>; 5284 def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_4Z, uimm0s4range, SMEMatrixArray>; 5285 5286 def : SME2_Zero_Matrix_Pat<NAME # _VG2_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x2, sme_elm_idx0_7, tileslice16>; 5287 def : SME2_Zero_Matrix_Pat<NAME # _VG4_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x4, sme_elm_idx0_7, tileslice16>; 5288 def : SME2_Zero_Matrix_Pat<NAME # _2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x1, uimm2s2range, tileslicerange2s2>; 5289 def : SME2_Zero_Matrix_Pat<NAME # _VG2_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x2, uimm1s2range, tileslicerange1s2>; 5290 def : SME2_Zero_Matrix_Pat<NAME # _VG4_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x4, uimm1s2range, tileslicerange1s2>; 5291 def : SME2_Zero_Matrix_Pat<NAME # _4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x1, uimm1s4range, tileslicerange1s4>; 5292 def : SME2_Zero_Matrix_Pat<NAME # _VG2_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x2, uimm0s4range, tileslicerange0s4>; 5293 def : SME2_Zero_Matrix_Pat<NAME # _VG4_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x4, uimm0s4range, tileslicerange0s4>; 5294} 5295 5296//===----------------------------------------------------------------------===// 5297// SME2.1 lookup table expand two non-contiguous registers 5298 5299class sme2p1_luti_vector_vg2_index<bits<4> op, bits<2> sz, RegisterOperand vector_ty, 5300 AsmVectorIndexOpnd index_ty, 5301 string mnemonic> 5302 : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 5303 mnemonic, "\t$Zd, $ZTt, $Zn$i", 5304 "", []>, Sched<[]> { 5305 bits<5> Zn; 5306 bits<4> Zd; 5307 let Inst{31-19} = 0b1100000010011; 5308 let Inst{18-15} = op; 5309 let Inst{14} = 0b1; 5310 let Inst{13-12} = sz; 5311 let Inst{11-10} = 0b00; 5312 let Inst{9-5} = Zn; 5313 let Inst{4} = Zd{3}; 5314 let Inst{3} = 0b0; 5315 let Inst{2-0} = Zd{2-0}; 5316} 5317 5318class sme2p1_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 5319 AsmVectorIndexOpnd index_ty, 5320 string mnemonic> 5321 : sme2p1_luti_vector_vg2_index<{1,?,?,?}, sz, vector_ty, index_ty, mnemonic> { 5322 bits<3> i; 5323 let Inst{17-15} = i; 5324} 5325 5326multiclass sme2p1_luti2_vector_vg2_index<string mnemonic> { 5327 def _B : sme2p1_luti2_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexH, 5328 mnemonic>; 5329 def _H : sme2p1_luti2_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexH, 5330 mnemonic>; 5331} 5332 5333class sme2p1_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 5334 AsmVectorIndexOpnd index_ty, 5335 string mnemonic> 5336 : sme2p1_luti_vector_vg2_index<{0b01,?,?}, sz, vector_ty, index_ty, mnemonic> { 5337 bits<2> i; 5338 let Inst{16-15} = i; 5339} 5340multiclass sme2p1_luti4_vector_vg2_index<string mnemonic> { 5341 def _B : sme2p1_luti4_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexS, 5342 mnemonic>; 5343 def _H : sme2p1_luti4_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexS, 5344 mnemonic>; 5345} 5346 5347// SME2.1 lookup table expand four non-contiguous registers 5348class sme2p1_luti_vector_vg4_index<bits<3> op, bits<2> sz, RegisterOperand vector_ty, 5349 AsmVectorIndexOpnd index_ty, 5350 string mnemonic> 5351 : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 5352 mnemonic, "\t$Zd, $ZTt, $Zn$i", 5353 "", []>, Sched<[]> { 5354 bits<5> Zn; 5355 bits<3> Zd; 5356 let Inst{31-19} = 0b1100000010011; 5357 let Inst{18-16} = op; 5358 let Inst{15-14} = 0b10; 5359 let Inst{13-12} = sz; 5360 let Inst{11-10} = 0b00; 5361 let Inst{9-5} = Zn; 5362 let Inst{4} = Zd{2}; 5363 let Inst{3-2} = 0b00; 5364 let Inst{1-0} = Zd{1-0}; 5365} 5366 5367class sme2p1_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 5368 AsmVectorIndexOpnd index_ty, 5369 string mnemonic> 5370 : sme2p1_luti_vector_vg4_index<{1,?,?}, sz, vector_ty, index_ty, mnemonic> { 5371 bits<2> i; 5372 let Inst{17-16} = i; 5373} 5374 5375multiclass sme2p1_luti2_vector_vg4_index<string mnemonic> { 5376 def _B : sme2p1_luti2_vector_vg4_index<0b00, ZZZZ_b_strided, VectorIndexS, 5377 mnemonic>; 5378 def _H : sme2p1_luti2_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexS, 5379 mnemonic>; 5380} 5381 5382class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 5383 AsmVectorIndexOpnd index_ty, 5384 string mnemonic> 5385 : sme2p1_luti_vector_vg4_index<{0b01,?}, sz, vector_ty, index_ty, mnemonic> { 5386 bit i; 5387 let Inst{16} = i; 5388} 5389 5390multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> { 5391 def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>; 5392} 5393 5394// SME2 lookup table two source registers expand to four contiguous destination registers 5395class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic> 5396 : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn), 5397 mnemonic, "\t$Zd, $ZTt, $Zn", 5398 "", []>, Sched<[]> { 5399 bits<4> Zn; 5400 bits<3> Zd; 5401 let Inst{31-14} = 0b110000001000101100; 5402 let Inst{13-12} = sz; 5403 let Inst{11-10} = op; 5404 let Inst{9-6} = Zn; 5405 let Inst{5} = 0b0; 5406 let Inst{4-2} = Zd; 5407 let Inst{1-0} = 0b00; 5408} 5409 5410// SME2 lookup table two source registers expand to four non-contiguous destination registers 5411class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic> 5412 : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn), 5413 mnemonic, "\t$Zd, $ZTt, $Zn", 5414 "", []>, Sched<[]> { 5415 bits<4> Zn; 5416 bits<3> Zd; 5417 let Inst{31-14} = 0b110000001001101100; 5418 let Inst{13-12} = sz; 5419 let Inst{11-10} = op; 5420 let Inst{9-6} = Zn; 5421 let Inst{5} = 0b0; 5422 let Inst{4} = Zd{2}; 5423 let Inst{3-2} = 0b00; 5424 let Inst{1-0} = Zd{1-0}; 5425} 5426 5427multiclass sme2_bfscale_single<string mnemonic> { 5428 def _2ZZ : sme2_sve_destructive_vector_vg2_single<0b00, 0b0011000, ZZ_h_mul_r, ZPR4b16, mnemonic>; 5429 def _4ZZ : sme2_sve_destructive_vector_vg4_single<0b00, 0b0011000, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; 5430} 5431 5432multiclass sme2_bfscale_multi<string mnemonic> { 5433 def _2Z2Z : sme2_sve_destructive_vector_vg2_multi<0b00, 0b0011000, ZZ_h_mul_r, mnemonic>; 5434 def _4Z4Z : sme2_sve_destructive_vector_vg4_multi<0b00, 0b0011000, ZZZZ_h_mul_r, mnemonic>; 5435} 5436 5437class sme2_bf16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5438 : I<(outs TileOp32:$ZAda), 5439 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5440 mnemonic, "\t$ZAda, $Zn, $Zm", 5441 "", []>, Sched<[]> { 5442 bits<2> ZAda; 5443 bits<3> Zn; 5444 bits<3> Zm; 5445 5446 let Inst{31-21} = 0b10000001000; 5447 let Inst{20} = M; 5448 let Inst{19-17} = Zm; 5449 let Inst{16-10} = 0b0000000; 5450 let Inst{9} = N; 5451 let Inst{8-6} = Zn; 5452 let Inst{5} = 0; 5453 let Inst{4} = S; 5454 let Inst{3-2} = 0b00; 5455 let Inst{1-0} = ZAda; 5456 5457 let Constraints = "$ZAda = $_ZAda"; 5458} 5459 5460multiclass sme2_bfmop4as_widening<bit S, string mnemonic> { 5461 // Single vectors 5462 def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; 5463 5464 // Multiple and single vectors 5465 def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; 5466 5467 // Single and multiple vectors 5468 def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; 5469 5470 // Multiple vectors 5471 def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; 5472} 5473 5474class sme2_multi2_fmul_sm<bits<2> size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty> 5475 : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm), 5476 mnemonic, "\t$Zd, $Zn, $Zm", 5477 "", []>, Sched<[]> { 5478 bits<4> Zd; 5479 bits<4> Zn; 5480 bits<4> Zm; 5481 5482 let Inst{31-24} = 0b11000001; 5483 let Inst{23-22} = size; 5484 let Inst{21} = 0b1; 5485 let Inst{20-17} = Zm; 5486 let Inst{16-10} = 0b0111010; 5487 let Inst{9-6} = Zn; 5488 let Inst{5} = 0b0; 5489 let Inst{4-1} = Zd; 5490 let Inst{0} = 0b0; 5491} 5492 5493multiclass sme2_multi2_fmul_sm<string mnemonic> { 5494 def _H : sme2_multi2_fmul_sm<0b01, mnemonic, ZZ_h_mul_r, ZPR4b16>; 5495 def _S : sme2_multi2_fmul_sm<0b10, mnemonic, ZZ_s_mul_r, ZPR4b32>; 5496 def _D : sme2_multi2_fmul_sm<0b11, mnemonic, ZZ_d_mul_r, ZPR4b64>; 5497} 5498 5499class sme2_multi4_fmul_sm<bits<2> size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty> 5500 : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm), 5501 mnemonic, "\t$Zd, $Zn, $Zm", 5502 "", []>, Sched<[]> { 5503 bits<3> Zd; 5504 bits<3> Zn; 5505 bits<4> Zm; 5506 5507 let Inst{31-24} = 0b11000001; 5508 let Inst{23-22} = size; 5509 let Inst{21} = 0b1; 5510 let Inst{20-17} = Zm; 5511 let Inst{16-10} = 0b1111010; 5512 let Inst{9-7} = Zn; 5513 let Inst{6-5} = 0b00; 5514 let Inst{4-2} = Zd; 5515 let Inst{1-0} = 0b00; 5516} 5517 5518multiclass sme2_multi4_fmul_sm<string mnemonic> { 5519 def _H : sme2_multi4_fmul_sm<0b01, mnemonic, ZZZZ_h_mul_r, ZPR4b16>; 5520 def _S : sme2_multi4_fmul_sm<0b10, mnemonic, ZZZZ_s_mul_r, ZPR4b32>; 5521 def _D : sme2_multi4_fmul_sm<0b11, mnemonic, ZZZZ_d_mul_r, ZPR4b64>; 5522} 5523 5524multiclass sme2_bfmul_single<string mnemonic> { 5525 def _2ZZ : sme2_multi2_fmul_sm<0b00, mnemonic, ZZ_h_mul_r, ZPR4b16>; 5526 def _4ZZ : sme2_multi4_fmul_sm<0b00, mnemonic, ZZZZ_h_mul_r, ZPR4b16>; 5527} 5528 5529class sme2_multi2_fmul_mm<bits<2> size, string mnemonic, RegisterOperand vector_ty> 5530 : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), 5531 mnemonic, "\t$Zd, $Zn, $Zm", 5532 "", []>, Sched<[]> { 5533 bits<4> Zd; 5534 bits<4> Zn; 5535 bits<4> Zm; 5536 5537 let Inst{31-24} = 0b11000001; 5538 let Inst{23-22} = size; 5539 let Inst{21} = 0b1; 5540 let Inst{20-17} = Zm; 5541 let Inst{16-10} = 0b0111001; 5542 let Inst{9-6} = Zn; 5543 let Inst{5} = 0b0; 5544 let Inst{4-1} = Zd; 5545 let Inst{0} = 0b0; 5546} 5547 5548multiclass sme2_multi2_fmul_mm<string mnemonic> { 5549 def _H : sme2_multi2_fmul_mm<0b01, mnemonic, ZZ_h_mul_r>; 5550 def _S : sme2_multi2_fmul_mm<0b10, mnemonic, ZZ_s_mul_r>; 5551 def _D : sme2_multi2_fmul_mm<0b11, mnemonic, ZZ_d_mul_r>; 5552} 5553 5554class sme2_multi4_fmul_mm<bits<2> size, string mnemonic, RegisterOperand vector_ty> 5555 : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), 5556 mnemonic, "\t$Zd, $Zn, $Zm", 5557 "", []>, Sched<[]> { 5558 bits<3> Zd; 5559 bits<3> Zn; 5560 bits<3> Zm; 5561 5562 let Inst{31-24} = 0b11000001; 5563 let Inst{23-22} = size; 5564 let Inst{21} = 0b1; 5565 let Inst{20-18} = Zm; 5566 let Inst{17-10} = 0b01111001; 5567 let Inst{9-7} = Zn; 5568 let Inst{6-5} = 0b00; 5569 let Inst{4-2} = Zd; 5570 let Inst{1-0} = 0b00; 5571} 5572 5573multiclass sme2_multi4_fmul_mm<string mnemonic> { 5574 def _H : sme2_multi4_fmul_mm<0b01, mnemonic, ZZZZ_h_mul_r>; 5575 def _S : sme2_multi4_fmul_mm<0b10, mnemonic, ZZZZ_s_mul_r>; 5576 def _D : sme2_multi4_fmul_mm<0b11, mnemonic, ZZZZ_d_mul_r>; 5577} 5578 5579multiclass sme2_bfmul_multi<string mnemonic> { 5580 def _2Z2Z : sme2_multi2_fmul_mm<0b00, mnemonic, ZZ_h_mul_r>; 5581 def _4Z4Z : sme2_multi4_fmul_mm<0b00, mnemonic, ZZZZ_h_mul_r>; 5582} 5583 5584class sme2_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5585 : I<(outs TileOp16:$ZAda), 5586 (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5587 mnemonic, "\t$ZAda, $Zn, $Zm", 5588 "", []>, Sched<[]> { 5589 bit ZAda; 5590 bits<3> Zn; 5591 bits<3> Zm; 5592 5593 let Inst{31-21} = 0b10000001000; 5594 let Inst{20} = M; 5595 let Inst{19-17} = Zm; 5596 let Inst{16-10} = 0b0000000; 5597 let Inst{9} = N; 5598 let Inst{8-6} = Zn; 5599 let Inst{5} = 0; 5600 let Inst{4} = S; 5601 let Inst{3-1} = 0b100; 5602 let Inst{0} = ZAda; 5603 5604 let Constraints = "$ZAda = $_ZAda"; 5605} 5606 5607multiclass sme2_fmop4as_fp16_non_widening<bit S, string mnemonic> { 5608 // Single vectors 5609 def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; 5610 5611 // Multiple and single vectors 5612 def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; 5613 5614 // Single and multiple vectors 5615 def _MZ2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; 5616 5617 // Multiple vectors 5618 def _M2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; 5619} 5620 5621class sme2_fp8_fp32_quarter_tile_outer_product<bit M, bit N, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5622 : I<(outs TileOp32:$ZAda), 5623 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5624 mnemonic, "\t$ZAda, $Zn, $Zm", 5625 "", []>, Sched<[]> { 5626 bits<2> ZAda; 5627 bits<3> Zn; 5628 bits<3> Zm; 5629 5630 let Inst{31-21} = 0b10000000001; 5631 let Inst{20} = M; 5632 let Inst{19-17} = Zm; 5633 let Inst{16-10} = 0b0000000; 5634 let Inst{9} = N; 5635 let Inst{8-6} = Zn; 5636 let Inst{5-2} = 0b0000; 5637 let Inst{1-0} = ZAda; 5638 5639 let Constraints = "$ZAda = $_ZAda"; 5640} 5641 5642multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic> { 5643 // Single vectors 5644 def _MZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>; 5645 5646 // Multiple and single vectors 5647 def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>; 5648 5649 // Single and multiple vectors 5650 def _MZ2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>; 5651 5652 // Multiple vectors 5653 def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; 5654} 5655 5656class sme2_bf16_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5657 : I<(outs TileOp16:$ZAda), 5658 (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5659 mnemonic, "\t$ZAda, $Zn, $Zm", 5660 "", []>, Sched<[]> { 5661 bit ZAda; 5662 bits<3> Zn; 5663 bits<3> Zm; 5664 5665 let Inst{31-21} = 0b10000001001; 5666 let Inst{20} = M; 5667 let Inst{19-17} = Zm; 5668 let Inst{16-10} = 0b0000000; 5669 let Inst{9} = N; 5670 let Inst{8-6} = Zn; 5671 let Inst{5} = 0; 5672 let Inst{4} = S; 5673 let Inst{3-1} = 0b100; 5674 let Inst{0} = ZAda; 5675 5676 let Constraints = "$ZAda = $_ZAda"; 5677} 5678 5679multiclass sme2_bfmop4as_non_widening<bit S, string mnemonic> { 5680 // Single vectors 5681 def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; 5682 5683 // Multiple and single vectors 5684 def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; 5685 5686 // Single and multiple vectors 5687 def _MZ2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; 5688 5689 // Multiple vectors 5690 def _M2Z2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; 5691} 5692 5693class sme2_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5694 : I<(outs TileOp32:$ZAda), 5695 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5696 mnemonic, "\t$ZAda, $Zn, $Zm", 5697 "", []>, Sched<[]> { 5698 bits<2> ZAda; 5699 bits<3> Zn; 5700 bits<3> Zm; 5701 5702 let Inst{31-21} = 0b10000000000; 5703 let Inst{20} = M; 5704 let Inst{19-17} = Zm; 5705 let Inst{16-10} = 0b0000000; 5706 let Inst{9} = N; 5707 let Inst{8-6} = Zn; 5708 let Inst{5} = 0; 5709 let Inst{4} = S; 5710 let Inst{3-2} = 0b00; 5711 let Inst{1-0} = ZAda; 5712 5713 let Constraints = "$ZAda = $_ZAda"; 5714} 5715 5716multiclass sme2_fmop4as_fp32_non_widening<bit S, string mnemonic> { 5717 // Single vectors 5718 def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>; 5719 5720 // Multiple and single vectors 5721 def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>; 5722 5723 // Single and multiple vectors 5724 def _MZ2Z_S : sme2_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR32Mul2_Lo, ZZ_s_mul_r_Hi>; 5725 5726 // Multiple vectors 5727 def _M2Z2Z_S : sme2_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZZ_s_mul_r_Hi>; 5728} 5729 5730class sme2_fp64_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5731 : I<(outs TileOp64:$ZAda), 5732 (ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5733 mnemonic, "\t$ZAda, $Zn, $Zm", 5734 "", []>, Sched<[]> { 5735 bits<3> ZAda; 5736 bits<3> Zn; 5737 bits<3> Zm; 5738 5739 let Inst{31-21} = 0b10000000110; 5740 let Inst{20} = M; 5741 let Inst{19-17} = Zm; 5742 let Inst{16-10} = 0b0000000; 5743 let Inst{9} = N; 5744 let Inst{8-6} = Zn; 5745 let Inst{5} = 0; 5746 let Inst{4} = S; 5747 let Inst{3} = 0b1; 5748 let Inst{2-0} = ZAda; 5749 5750 let Constraints = "$ZAda = $_ZAda"; 5751} 5752 5753multiclass sme2_fmop4as_fp64_non_widening<bit S, string mnemonic> { 5754 // Single vectors 5755 def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>; 5756 5757 // Multiple and single vectors 5758 def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; 5759 5760 // Single and multiple vectors 5761 def _MZ2Z_D : sme2_fp64_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR64Mul2_Lo, ZZ_d_mul_r_Hi>; 5762 5763 // Multiple vectors 5764 def _M2Z2Z_D : sme2_fp64_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZZ_d_mul_r_Hi>; 5765} 5766 5767class sme2_fp16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5768 : I<(outs TileOp32:$ZAda), 5769 (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5770 mnemonic, "\t$ZAda, $Zn, $Zm", 5771 "", []>, Sched<[]> { 5772 bits<2> ZAda; 5773 bits<3> Zn; 5774 bits<3> Zm; 5775 5776 let Inst{31-21} = 0b10000001001; 5777 let Inst{20} = M; 5778 let Inst{19-17} = Zm; 5779 let Inst{16-10} = 0b0000000; 5780 let Inst{9} = N; 5781 let Inst{8-6} = Zn; 5782 let Inst{5} = 0; 5783 let Inst{4} = S; 5784 let Inst{3-2} = 0b00; 5785 let Inst{1-0} = ZAda; 5786 5787 let Constraints = "$ZAda = $_ZAda"; 5788} 5789 5790multiclass sme2_fmop4as_fp16_fp32_widening<bit S, string mnemonic> { 5791 // Single vectors 5792 def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; 5793 5794 // Multiple and single vectors 5795 def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; 5796 5797 // Single and multiple vectors 5798 def _MZ2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; 5799 5800 // Multiple vectors 5801 def _M2Z2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; 5802} 5803 5804class sme2_fp8_fp16_quarter_tile_outer_product<bit M, bit N, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> 5805 : I<(outs TileOp16:$ZAda), 5806 (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), 5807 mnemonic, "\t$ZAda, $Zn, $Zm", 5808 "", []>, Sched<[]> { 5809 bit ZAda; 5810 bits<3> Zn; 5811 bits<3> Zm; 5812 5813 let Inst{31-21} = 0b10000000001; 5814 let Inst{20} = M; 5815 let Inst{19-17} = Zm; 5816 let Inst{16-10} = 0b0000000; 5817 let Inst{9} = N; 5818 let Inst{8-6} = Zn; 5819 let Inst{5-1} = 0b00100; 5820 let Inst{0} = ZAda; 5821 5822 let Constraints = "$ZAda = $_ZAda"; 5823} 5824 5825multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> { 5826 // Single vectors 5827 def _MZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>; 5828 5829 // Multiple and single vectors 5830 def _M2ZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>; 5831 5832 // Single and multiple vectors 5833 def _MZ2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>; 5834 5835 // Multiple vectors 5836 def _M2Z2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; 5837} 5838 5839// FP8 SME FDOT instructions 5840 5841multiclass sme2_fp8_fdot_index_za16_vg1x2<string mnemonic, bits<3> op, 5842 SDPatternOperator intrinsic> { 5843 def NAME : sme2_multi_vec_array_vg2_index<0b11, {op{2},?,?,op{1-0},?}, MatrixOp16, 5844 ZZ_b_mul_r, ZPR4b8, 5845 VectorIndexH32b_timm, mnemonic>, 5846 SMEPseudo2Instr<NAME, 1>{ 5847 let Uses=[FPMR, FPCR]; 5848 5849 bits<3> i; 5850 let Inst{11-10} = i{2-1}; 5851 let Inst{3} = i{0}; 5852 } 5853 5854 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 5855 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 5856 ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexH32b_timm:$i), 0>; 5857 5858 5859 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, ZPR4b8, VectorIndexH32b_timm, SMEMatrixArray>; 5860 5861 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexH32b_timm, tileslice16>; 5862} 5863 5864multiclass sme2_fp8_fdot_index_za16_vg1x4<string mnemonic, 5865 SDPatternOperator intrinsic> { 5866 def NAME : sme2_multi_vec_array_vg4_index<0b0, {0b1,?,?,0b100,?}, MatrixOp16, 5867 ZZZZ_b_mul_r, ZPR4b8, 5868 VectorIndexH32b_timm, mnemonic>, 5869 SMEPseudo2Instr<NAME, 1> { 5870 let Uses=[FPMR, FPCR]; 5871 5872 bits<3> i; 5873 let Inst{11-10} = i{2-1}; 5874 let Inst{3} = i{0}; 5875 } 5876 5877 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 5878 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 5879 sme_elm_idx0_7:$imm3, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexH32b_timm:$i), 0>; 5880 5881 5882 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, ZPR4b8, VectorIndexH32b_timm, SMEMatrixArray>; 5883 5884 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexH32b_timm, tileslice16>; 5885} 5886 5887multiclass sme2_fp8_fdot_index_za32_vg1x2<string mnemonic, 5888 SDPatternOperator intrinsic> { 5889 def NAME : sme2_multi_vec_array_vg2_index<0b01, {0b0,?,?,0b111}, MatrixOp32, ZZ_b_mul_r, ZPR4b8, 5890 VectorIndexS32b_timm, mnemonic>, 5891 SMEPseudo2Instr<NAME, 1> { 5892 let Uses=[FPMR, FPCR]; 5893 5894 bits<2> i; 5895 let Inst{11-10} = i; 5896 } 5897 5898 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 5899 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 5900 ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i), 0>; 5901 5902 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, ZPR4b8, VectorIndexS32b_timm, SMEMatrixArray>; 5903 5904 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexS32b_timm, tileslice16>; 5905} 5906 5907multiclass sme2_fp8_fdot_index_za32_vg1x4<string mnemonic, 5908 SDPatternOperator intrinsic> { 5909 def NAME : sme2_multi_vec_array_vg4_index<0b1, {0b0,?,?,0b0,0b001}, MatrixOp32, ZZZZ_b_mul_r, 5910 ZPR4b8, VectorIndexS32b_timm, mnemonic>, 5911 SMEPseudo2Instr<NAME, 1> { 5912 let Uses=[FPMR, FPCR]; 5913 5914 bits<2> i; 5915 let Inst{11-10} = i; 5916 } 5917 5918 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 5919 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 5920 ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i), 0>; 5921 5922 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, ZPR4b8, VectorIndexS32b_timm, SMEMatrixArray>; 5923 5924 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexS32b_timm, tileslice16>; 5925} 5926 5927multiclass sme2_fp8_fdotv_index_za32_vg1x4<string mnemonic, bit T, SDPatternOperator intrinsic> { 5928 def NAME : sme2_fp8_multi_vec_array_vg4_index<mnemonic, T>, 5929 SMEPseudo2Instr<NAME, 1>; 5930 5931 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, ZPR4b8, VectorIndexS32b_timm, SMEMatrixArray>; 5932 5933 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexS32b_timm, tileslice16>; 5934} 5935 5936multiclass sme2_fp8_fdot_single_vg1x2<string mnemonic, bits<7> op, 5937 MatrixOperand matrix_op, 5938 SDPatternOperator intrinsic> { 5939 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_op, ZZ_b, ZPR4b8, mnemonic>, 5940 SMEPseudo2Instr<NAME, 1> { 5941 let Uses=[FPMR, FPCR]; 5942 } 5943 5944 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 5945 (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b:$Zn, ZPR4b8:$Zm), 0>; 5946 5947 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, ZZ_b, ZPR4b8, SMEMatrixArray>; 5948 5949 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, tileslice16>; 5950} 5951 5952multiclass sme2_fp8_fdot_single_vg1x4<string mnemonic, bits<7> op, 5953 MatrixOperand matrix_op, 5954 SDPatternOperator intrinsic> { 5955 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_op, ZZZZ_b, ZPR4b8, mnemonic>, 5956 SMEPseudo2Instr<NAME, 1> { 5957 let Uses=[FPMR, FPCR]; 5958 } 5959 5960 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 5961 (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b:$Zn, ZPR4b8:$Zm), 0>; 5962 5963 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b, ZPR4b8, SMEMatrixArray>; 5964 5965 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, tileslice16>; 5966} 5967 5968multiclass sme2_fp8_fdot_multi_vg1x2<string mnemonic, bits<7> op, 5969 MatrixOperand matrix_op, 5970 SDPatternOperator intrinsic> { 5971 def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_op, ZZ_b_mul_r, mnemonic>, 5972 SMEPseudo2Instr<NAME, 1> { 5973 let Uses=[FPMR, FPCR]; 5974 } 5975 5976 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 5977 (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b_mul_r:$Zn, ZZ_b_mul_r:$Zm), 0>; 5978 5979 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, SMEMatrixArray>; 5980 5981 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, nxv16i8, tileslice16>; 5982} 5983 5984multiclass sme2_fp8_fdot_multi_vg1x4<string mnemonic, bits<7> op, 5985 MatrixOperand matrix_op, 5986 SDPatternOperator intrinsic> { 5987 def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_op, ZZZZ_b_mul_r, mnemonic>, 5988 SMEPseudo2Instr<NAME, 1> { 5989 let Uses=[FPMR, FPCR]; 5990 } 5991 5992 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 5993 (!cast<Instruction>(NAME) matrix_op:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b_mul_r:$Zn, ZZZZ_b_mul_r:$Zm), 0>; 5994 5995 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, SMEMatrixArray>; 5996 5997 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, nxv16i8, tileslice16>; 5998} 5999