1//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Scalable Vector Extension (SVE) Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13// For predicated nodes where the entire operation is controlled by a governing 14// predicate, please stick to a similar naming convention as used for the 15// ISD nodes: 16// 17// SDNode <=> AArch64ISD 18// ------------------------------- 19// _m<n> <=> _MERGE_OP<n> 20// _mt <=> _MERGE_PASSTHRU 21// _z <=> _MERGE_ZERO 22// _p <=> _PRED 23// 24// Given the context of this file, it is not strictly necessary to use _p to 25// distinguish predicated from unpredicated nodes given that most SVE 26// instructions are predicated. 27 28// Contiguous loads - node definitions 29// 30def SDT_AArch64_LD1 : SDTypeProfile<1, 3, [ 31 SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, 32 SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> 33]>; 34 35def AArch64ld1_z : SDNode<"AArch64ISD::LD1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; 36def AArch64ld1s_z : SDNode<"AArch64ISD::LD1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; 37 38// Non-faulting & first-faulting loads - node definitions 39// 40def AArch64ldnf1_z : SDNode<"AArch64ISD::LDNF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 41def AArch64ldff1_z : SDNode<"AArch64ISD::LDFF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 42 43def AArch64ldnf1s_z : SDNode<"AArch64ISD::LDNF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 44def AArch64ldff1s_z : SDNode<"AArch64ISD::LDFF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 45 46// Contiguous load and replicate - node definitions 47// 48 49def SDT_AArch64_LD1Replicate : SDTypeProfile<1, 2, [ 50 SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, 51 SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> 52]>; 53 54def AArch64ld1rq_z : SDNode<"AArch64ISD::LD1RQ_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; 55def AArch64ld1ro_z : SDNode<"AArch64ISD::LD1RO_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; 56 57// Gather loads - node definitions 58// 59def SDT_AArch64_GATHER_SV : SDTypeProfile<1, 4, [ 60 SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>, 61 SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> 62]>; 63 64def SDT_AArch64_GATHER_VS : SDTypeProfile<1, 4, [ 65 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>, 66 SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> 67]>; 68 69def AArch64ld1_gather_z : SDNode<"AArch64ISD::GLD1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 70def AArch64ld1_gather_scaled_z : SDNode<"AArch64ISD::GLD1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 71def AArch64ld1_gather_uxtw_z : SDNode<"AArch64ISD::GLD1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 72def AArch64ld1_gather_sxtw_z : SDNode<"AArch64ISD::GLD1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 73def AArch64ld1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 74def AArch64ld1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 75def AArch64ld1_gather_imm_z : SDNode<"AArch64ISD::GLD1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; 76 77def AArch64ld1s_gather_z : SDNode<"AArch64ISD::GLD1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 78def AArch64ld1s_gather_scaled_z : SDNode<"AArch64ISD::GLD1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 79def AArch64ld1s_gather_uxtw_z : SDNode<"AArch64ISD::GLD1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 80def AArch64ld1s_gather_sxtw_z : SDNode<"AArch64ISD::GLD1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 81def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 82def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; 83def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; 84 85def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 86def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 87def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 88def AArch64ldff1_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 89def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 90def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 91def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 92 93def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 94def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 95def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 96def AArch64ldff1s_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 97def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 98def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 99def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; 100 101def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; 102def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; 103// Gather vector base + scalar offset 104def AArch64ld1q_gather_z: SDNode<"AArch64ISD::GLD1Q_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; 105 106// Contiguous stores - node definitions 107// 108def SDT_AArch64_ST1 : SDTypeProfile<0, 4, [ 109 SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, 110 SDTCVecEltisVT<2,i1>, SDTCisSameNumEltsAs<0,2> 111]>; 112 113def AArch64st1 : SDNode<"AArch64ISD::ST1_PRED", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>; 114 115// Scatter stores - node definitions 116// 117def SDT_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [ 118 SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>, 119 SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> 120]>; 121 122def SDT_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [ 123 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>, 124 SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> 125]>; 126 127def AArch64st1_scatter : SDNode<"AArch64ISD::SST1_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; 128def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; 129def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; 130def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; 131def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; 132def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; 133def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; 134 135def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; 136 137// Scatter vector base + scalar offset 138def AArch64st1q_scatter : SDNode<"AArch64ISD::SST1Q_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; 139 140// AArch64 SVE/SVE2 - the remaining node definitions 141// 142 143// SVE CNT/INC/RDVL 144def sve_rdvl_imm : ComplexPattern<i64, 1, "SelectRDVLImm<-32, 31, 16>">; 145def sve_cnth_imm : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, 8>">; 146def sve_cntw_imm : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, 4>">; 147def sve_cntd_imm : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, 2>">; 148 149// SVE DEC 150def sve_cnth_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -8>">; 151def sve_cntw_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -4>">; 152def sve_cntd_imm_neg : ComplexPattern<i64, 1, "SelectRDVLImm<1, 16, -2>">; 153 154def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>; 155def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>; 156def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>; 157def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>; 158def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>; 159def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>; 160def AArch64saddv_p : SDNode<"AArch64ISD::SADDV_PRED", SDT_AArch64Reduce>; 161def AArch64uaddv_p : SDNode<"AArch64ISD::UADDV_PRED", SDT_AArch64Reduce>; 162def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; 163def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; 164def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; 165def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; 166def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; 167def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; 168def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; 169def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; 170def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; 171 172def SDT_AArch64Arith : SDTypeProfile<1, 3, [ 173 SDTCisVec<0>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, 174 SDTCisSameAs<2,3>, SDTCisSameNumEltsAs<0,1> 175]>; 176 177def SDT_AArch64FMA : SDTypeProfile<1, 4, [ 178 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>, 179 SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>, 180 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4> 181]>; 182 183// Predicated operations with the result of inactive lanes being unspecified. 184def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; 185def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; 186def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; 187def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>; 188def AArch64fmax_p : SDNode<"AArch64ISD::FMAX_PRED", SDT_AArch64Arith>; 189def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>; 190def AArch64fmin_p : SDNode<"AArch64ISD::FMIN_PRED", SDT_AArch64Arith>; 191def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>; 192def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>; 193def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>; 194def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; 195def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>; 196def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>; 197def AArch64sabd_p : SDNode<"AArch64ISD::ABDS_PRED", SDT_AArch64Arith>; 198def AArch64shadd_p : SDNode<"AArch64ISD::HADDS_PRED", SDT_AArch64Arith>; 199def AArch64srhadd_p : SDNode<"AArch64ISD::RHADDS_PRED", SDT_AArch64Arith>; 200def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; 201def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; 202def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; 203def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>; 204def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>; 205def AArch64uhadd_p : SDNode<"AArch64ISD::HADDU_PRED", SDT_AArch64Arith>; 206def AArch64urhadd_p : SDNode<"AArch64ISD::RHADDU_PRED", SDT_AArch64Arith>; 207def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; 208def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; 209def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; 210def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>; 211 212def AArch64fadd_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3), 213 (AArch64fadd_p node:$op1, node:$op2, node:$op3), [{ 214 return N->getFlags().hasAllowContract(); 215}]>; 216def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3), 217 (AArch64fsub_p node:$op1, node:$op2, node:$op3), [{ 218 return N->getFlags().hasAllowContract(); 219}]>; 220 221def AArch64fminnm_p_nnan : PatFrag<(ops node:$op1, node:$op2, node:$op3), 222 (AArch64fminnm_p node:$op1, node:$op2, node:$op3), [{ 223 return N->getFlags().hasNoNaNs(); 224}]>; 225 226def AArch64fmaxnm_p_nnan : PatFrag<(ops node:$op1, node:$op2, node:$op3), 227 (AArch64fmaxnm_p node:$op1, node:$op2, node:$op3), [{ 228 return N->getFlags().hasNoNaNs(); 229}]>; 230 231def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ 232 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>, 233 SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2> 234]>; 235 236def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>; 237def AArch64urshri_p_node : SDNode<"AArch64ISD::URSHR_I_PRED", SDT_AArch64Arith_Imm>; 238 239def AArch64urshri_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), 240 [(int_aarch64_sve_urshr node:$op1, node:$op2, node:$op3), 241 (AArch64urshri_p_node node:$op1, node:$op2, node:$op3)]>; 242 243def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [ 244 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>, 245 SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4> 246]>; 247 248// Predicated operations with the result of inactive lanes provided by the last operand. 249def AArch64clz_mt : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>; 250def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>; 251def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; 252def AArch64fabs_mt : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>; 253def AArch64abs_mt : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>; 254def AArch64neg_mt : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>; 255def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; 256def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; 257def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>; 258def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>; 259def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>; 260def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>; 261def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>; 262def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>; 263def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; 264def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>; 265def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>; 266def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>; 267def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>; 268def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arith>; 269def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>; 270def AArch64revd_mt : SDNode<"AArch64ISD::REVD_MERGE_PASSTHRU", SDT_AArch64Arith>; 271 272def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt), 273 (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{ 274 return N->getFlags().hasNoSignedZeros(); 275}]>; 276 277// These are like the above but we don't yet have need for ISD nodes. They allow 278// a single pattern to match intrinsic and ISD operand layouts. 279def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls node:$pt, node:$pg, node:$op)]>; 280def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>; 281def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>; 282 283def AArch64fmul_m1 : VSelectPredOrPassthruPatFrags<int_aarch64_sve_fmul, AArch64fmul_p>; 284def AArch64fadd_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [ 285 (int_aarch64_sve_fadd node:$pg, node:$op1, node:$op2), 286 (vselect node:$pg, (AArch64fadd_p (SVEAllActive), node:$op1, node:$op2), node:$op1) 287]>; 288def AArch64fsub_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [ 289 (int_aarch64_sve_fsub node:$pg, node:$op1, node:$op2), 290 (vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op1, node:$op2), node:$op1) 291]>; 292def AArch64fsubr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [ 293 (int_aarch64_sve_fsubr node:$pg, node:$op1, node:$op2), 294 (vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op2, node:$op1), node:$op1) 295]>; 296 297def AArch64shadd : PatFrags<(ops node:$pg, node:$op1, node:$op2), 298 [(int_aarch64_sve_shadd node:$pg, node:$op1, node:$op2), 299 (AArch64shadd_p node:$pg, node:$op1, node:$op2)]>; 300def AArch64uhadd : PatFrags<(ops node:$pg, node:$op1, node:$op2), 301 [(int_aarch64_sve_uhadd node:$pg, node:$op1, node:$op2), 302 (AArch64uhadd_p node:$pg, node:$op1, node:$op2)]>; 303def AArch64srhadd : PatFrags<(ops node:$pg, node:$op1, node:$op2), 304 [(int_aarch64_sve_srhadd node:$pg, node:$op1, node:$op2), 305 (AArch64srhadd_p node:$pg, node:$op1, node:$op2)]>; 306def AArch64urhadd : PatFrags<(ops node:$pg, node:$op1, node:$op2), 307 [(int_aarch64_sve_urhadd node:$pg, node:$op1, node:$op2), 308 (AArch64urhadd_p node:$pg, node:$op1, node:$op2)]>; 309 310def AArch64saba : PatFrags<(ops node:$op1, node:$op2, node:$op3), 311 [(int_aarch64_sve_saba node:$op1, node:$op2, node:$op3), 312 (add node:$op1, (AArch64sabd_p (SVEAllActive), node:$op2, node:$op3))]>; 313 314def AArch64uaba : PatFrags<(ops node:$op1, node:$op2, node:$op3), 315 [(int_aarch64_sve_uaba node:$op1, node:$op2, node:$op3), 316 (add node:$op1, (AArch64uabd_p (SVEAllActive), node:$op2, node:$op3))]>; 317 318def AArch64usra : PatFrags<(ops node:$op1, node:$op2, node:$op3), 319 [(int_aarch64_sve_usra node:$op1, node:$op2, node:$op3), 320 (add node:$op1, (AArch64lsr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; 321 322def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3), 323 [(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3), 324 (add node:$op1, (AArch64asr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>; 325 326// Replace pattern min(max(v1,v2),v3) by clamp 327def AArch64sclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm), 328 [(int_aarch64_sve_sclamp node:$Zd, node:$Zn, node:$Zm), 329 (AArch64smin_p (SVEAllActive), 330 (AArch64smax_p (SVEAllActive), node:$Zd, node:$Zn), 331 node:$Zm) 332 ]>; 333def AArch64uclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm), 334 [(int_aarch64_sve_uclamp node:$Zd, node:$Zn, node:$Zm), 335 (AArch64umin_p (SVEAllActive), 336 (AArch64umax_p (SVEAllActive), node:$Zd, node:$Zn), 337 node:$Zm) 338 ]>; 339def AArch64fclamp : PatFrags<(ops node:$Zd, node:$Zn, node:$Zm), 340 [(int_aarch64_sve_fclamp node:$Zd, node:$Zn, node:$Zm), 341 (AArch64fminnm_p (SVEAllActive), 342 (AArch64fmaxnm_p (SVEAllActive), node:$Zd, node:$Zn), 343 node:$Zm) 344 ]>; 345 346def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ 347 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, 348 SDTCVecEltisVT<1,i1> 349]>; 350 351def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [ 352 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>, 353 SDTCVecEltisVT<1,i1> 354]>; 355 356def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>; 357def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>; 358def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; 359def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; 360def AArch64fcvtx_mt : SDNode<"AArch64ISD::FCVTX_MERGE_PASSTHRU", SDT_AArch64FCVT>; 361def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; 362def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; 363 364def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, 365 [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>; 366def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; 367def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; 368def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; 369 370def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), 371 [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), 372 (AArch64fadda_p_node (SVEAllActive), node:$op2, 373 (vselect node:$op1, node:$op3, (splat_vector (f16 fpimm_minus0)))), 374 (AArch64fadda_p_node (SVEAllActive), node:$op2, 375 (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))), 376 (AArch64fadda_p_node (SVEAllActive), node:$op2, 377 (vselect node:$op1, node:$op3, (splat_vector (f64 fpimm_minus0))))]>; 378 379def SDT_AArch64PTest : SDTypeProfile<1, 2, [ 380 SDTCisVT<0, i32>, // out flags 381 SDTCisVec<1>, // governing predicate 382 SDTCisSameAs<2, 1> // source predicate 383]>; 384def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>; 385def AArch64ptest_any : SDNode<"AArch64ISD::PTEST_ANY", SDT_AArch64PTest>; 386 387def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, 388 [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0, 1>]>; 389def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>; 390 391def AArch64splice : SDNode<"AArch64ISD::SPLICE", SDT_AArch64Arith>; 392 393def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>; 394 395def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), 396 (AArch64mul_p node:$pred, node:$src1, node:$src2), [{ 397 return N->hasOneUse(); 398}]>; 399 400def AArch64fmul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), 401 (AArch64fmul_p node:$pred, node:$src1, node:$src2), [{ 402 return N->hasOneUse(); 403}]>; 404 405 406def AArch64fabd_p : PatFrags<(ops node:$pg, node:$op1, node:$op2), 407 [(int_aarch64_sve_fabd_u node:$pg, node:$op1, node:$op2), 408 (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)]>; 409 410def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), 411 [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za)]>; 412 413def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), 414 [(int_aarch64_sve_fmls_u node:$pg, node:$za, node:$zn, node:$zm), 415 (AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za), 416 (AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za)]>; 417 418def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), 419 [(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm), 420 (AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))), 421 (AArch64fneg_mt_nsz node:$pg, (AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), (undef))]>; 422 423def AArch64fnmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), 424 [(int_aarch64_sve_fnmls_u node:$pg, node:$za, node:$zn, node:$zm), 425 (AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>; 426 427def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), 428 (AArch64fsub_p node:$pg, node:$op2, node:$op1)>; 429 430def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [ 431 SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, 432 SDTCisSameAs<0,1>, SDTCisSameAs<1,2> 433]>; 434 435def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; 436 437def SDT_AArch64addw : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; 438 439def AArch64saddwt : SDNode<"AArch64ISD::SADDWT", SDT_AArch64addw>; 440def AArch64saddwb : SDNode<"AArch64ISD::SADDWB", SDT_AArch64addw>; 441def AArch64uaddwt : SDNode<"AArch64ISD::UADDWT", SDT_AArch64addw>; 442def AArch64uaddwb : SDNode<"AArch64ISD::UADDWB", SDT_AArch64addw>; 443 444def AArch64bic : PatFrags<(ops node:$op1, node:$op2), 445 [(and node:$op1, (xor node:$op2, (splat_vector (i32 -1)))), 446 (and node:$op1, (xor node:$op2, (splat_vector (i64 -1)))), 447 (and node:$op1, (xor node:$op2, (SVEAllActive))), 448 (AArch64bic_node node:$op1, node:$op2)]>; 449 450def AArch64subr : PatFrag<(ops node:$op1, node:$op2), 451 (sub node:$op2, node:$op1)>; 452 453def AArch64subr_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), 454 [(int_aarch64_sve_subr node:$pg, node:$op1, node:$op2), 455 (vselect node:$pg, (sub node:$op2, node:$op1), node:$op1)]>; 456 457def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), 458 [(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3), 459 (vselect node:$pred, (add node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>; 460def AArch64mla_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), 461 [(int_aarch64_sve_mla_u node:$pred, node:$op1, node:$op2, node:$op3), 462 (add node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3))]>; 463def AArch64mad_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), 464 [(int_aarch64_sve_mad node:$pred, node:$op1, node:$op2, node:$op3), 465 (vselect node:$pred, (add node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op1, node:$op2)), node:$op1), 466 (vselect node:$pred, (add node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>; 467def AArch64mls_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), 468 [(int_aarch64_sve_mls node:$pred, node:$op1, node:$op2, node:$op3), 469 (vselect node:$pred, (sub node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>; 470def AArch64mls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), 471 [(int_aarch64_sve_mls_u node:$pred, node:$op1, node:$op2, node:$op3), 472 (sub node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3))]>; 473def AArch64msb_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), 474 [(int_aarch64_sve_msb node:$pred, node:$op1, node:$op2, node:$op3), 475 (vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op1, node:$op2)), node:$op1), 476 (vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>; 477def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3), 478 [(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3), 479 (xor node:$op1, (xor node:$op2, node:$op3))]>; 480def AArch64bcax : PatFrags<(ops node:$op1, node:$op2, node:$op3), 481 [(int_aarch64_sve_bcax node:$op1, node:$op2, node:$op3), 482 (xor node:$op1, (and node:$op2, (vnot node:$op3)))]>; 483 484def AArch64fmla_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), 485 [(int_aarch64_sve_fmla node:$pg, node:$za, node:$zn, node:$zm), 486 (vselect node:$pg, (AArch64fadd_p_contract (SVEAllActive), node:$za, (AArch64fmul_p_oneuse (SVEAllActive), node:$zn, node:$zm)), node:$za), 487 (vselect node:$pg, (AArch64fma_p (SVEAllActive), node:$zn, node:$zm, node:$za), node:$za)]>; 488 489def AArch64fmls_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), 490 [(int_aarch64_sve_fmls node:$pg, node:$za, node:$zn, node:$zm), 491 (vselect node:$pg, (AArch64fsub_p_contract (SVEAllActive), node:$za, (AArch64fmul_p_oneuse (SVEAllActive), node:$zn, node:$zm)), node:$za), 492 (vselect node:$pg, (AArch64fma_p (SVEAllActive), (AArch64fneg_mt (SVEAllActive), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>; 493 494def AArch64famin_p : PatFrags<(ops node:$pred, node:$op1, node:$op2), 495 [(int_aarch64_sve_famin_u node:$pred, node:$op1, node:$op2), 496 (AArch64fmin_p node:$pred, 497 (AArch64fabs_mt node:$pred, node:$op1, undef), 498 (AArch64fabs_mt node:$pred, node:$op2, undef)), 499 (AArch64fminnm_p_nnan node:$pred, 500 (AArch64fabs_mt node:$pred, node:$op1, undef), 501 (AArch64fabs_mt node:$pred, node:$op2, undef))]>; 502 503def AArch64famax_p : PatFrags<(ops node:$pred, node:$op1, node:$op2), 504 [(int_aarch64_sve_famax_u node:$pred, node:$op1, node:$op2), 505 (AArch64fmax_p node:$pred, 506 (AArch64fabs_mt node:$pred, node:$op1, undef), 507 (AArch64fabs_mt node:$pred, node:$op2, undef)), 508 (AArch64fmaxnm_p_nnan node:$pred, 509 (AArch64fabs_mt node:$pred, node:$op1, undef), 510 (AArch64fabs_mt node:$pred, node:$op2, undef))]>; 511 512def AArch64add_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_add, add>; 513def AArch64sub_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_sub, sub>; 514def AArch64mul_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_mul, AArch64mul_p>; 515def AArch64and_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_and, and>; 516def AArch64orr_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_orr, or>; 517def AArch64eor_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_eor, xor>; 518def AArch64smax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_smax, AArch64smax_p>; 519def AArch64umax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_umax, AArch64umax_p>; 520def AArch64smin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_smin, AArch64smin_p>; 521def AArch64umin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_umin, AArch64umin_p>; 522def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm, AArch64fminnm_p>; 523def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>; 524def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>; 525def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>; 526 527def AArch64fadd : PatFrags<(ops node:$op1, node:$op2), 528 [(fadd node:$op1, node:$op2), 529 (AArch64fadd_p (SVEAllActive), node:$op1, node:$op2)]>; 530 531def AArch64fmul : PatFrags<(ops node:$op1, node:$op2), 532 [(fmul node:$op1, node:$op2), 533 (AArch64fmul_p (SVEAllActive), node:$op1, node:$op2)]>; 534 535def AArch64fsub : PatFrags<(ops node:$op1, node:$op2), 536 [(fsub node:$op1, node:$op2), 537 (AArch64fsub_p (SVEAllActive), node:$op1, node:$op2)]>; 538 539def AArch64mul : PatFrag<(ops node:$op1, node:$op2), 540 (AArch64mul_p (SVEAnyPredicate), node:$op1, node:$op2)>; 541 542def AArch64smulh : PatFrag<(ops node:$op1, node:$op2), 543 (AArch64smulh_p (SVEAnyPredicate), node:$op1, node:$op2)>; 544 545def AArch64umulh : PatFrag<(ops node:$op1, node:$op2), 546 (AArch64umulh_p (SVEAnyPredicate), node:$op1, node:$op2)>; 547 548 549def AArch64bsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3), 550 [(int_aarch64_sve_bsl node:$Op1, node:$Op2, node:$Op3), 551 (AArch64bsp node:$Op3, node:$Op1, node:$Op2)]>; 552 553def AArch64nbsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3), 554 [(int_aarch64_sve_nbsl node:$Op1, node:$Op2, node:$Op3), 555 (vnot (AArch64bsp node:$Op3, node:$Op1, node:$Op2))]>; 556 557 558let Predicates = [HasSVE] in { 559 def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; 560 def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; 561 def RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>; 562 def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>; 563 def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>; 564} // End HasSVE 565 566let Predicates = [HasSVE_or_SME] in { 567 defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>; 568 defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>; 569 defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>; 570 defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat>; 571 defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat>; 572 defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat>; 573 574 defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>; 575 defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>; 576 defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>; 577 defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", AArch64bic>; 578 579 defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", AArch64add_m1, DestructiveBinaryComm>; 580 defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", AArch64sub_m1, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">; 581 defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", AArch64subr_m1, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>; 582 583 defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", AArch64orr_m1, DestructiveBinaryComm>; 584 defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", AArch64eor_m1, DestructiveBinaryComm>; 585 defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", "AND_ZPZZ", AArch64and_m1, DestructiveBinaryComm>; 586 defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", "BIC_ZPZZ", int_aarch64_sve_bic, DestructiveBinary>; 587} // End HasSVE_or_SME 588 589let Predicates = [HasSVE_or_SME, UseExperimentalZeroingPseudos] in { 590 defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>; 591 defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>; 592 defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>; 593 594 defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_orr>; 595 defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_eor>; 596 defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_and>; 597 defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_bic>; 598} // End HasSVE_or_SME, UseExperimentalZeroingPseudos 599 600let Predicates = [HasSVE_or_SME] in { 601 defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>; 602 defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>; 603 defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr", AArch64subr>; 604 defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat, ssubsat>; 605 defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>; 606 defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat, saddsat>; 607 defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>; 608 609 defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>; 610 defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", AArch64msb_m1, "MLS_ZPmZZ", /*isReverseInstr*/ 1>; 611 defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", AArch64mla_m1, "MLA_ZPZZZ", "MAD_ZPmZZ">; 612 defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", AArch64mls_m1, "MLS_ZPZZZ", "MSB_ZPmZZ">; 613 614 defm MLA_ZPZZZ : sve_int_3op_p_mladdsub<AArch64mla_p>; 615 defm MLS_ZPZZZ : sve_int_3op_p_mladdsub<AArch64mls_p>; 616 617 // SVE predicated integer reductions. 618 defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>; 619 defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>; 620 defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>; 621 defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>; 622 defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>; 623 defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_p>; 624 defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_p>; 625 defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_p>; 626 defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_p>; 627 628 defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>; 629 defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; 630 defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; 631 defm BIC_ZI : sve_int_log_imm_bic<AArch64bic>; 632 633 defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>; 634 defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>; 635 defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>; 636 defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>; 637 638 defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>; 639 defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", AArch64mul_m1, DestructiveBinaryComm>; 640 defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>; 641 defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>; 642 643 defm MUL_ZPZZ : sve_int_bin_pred_bhsd<AArch64mul_p>; 644 defm SMULH_ZPZZ : sve_int_bin_pred_bhsd<AArch64smulh_p>; 645 defm UMULH_ZPZZ : sve_int_bin_pred_bhsd<AArch64umulh_p>; 646 647 defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">; 648 defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">; 649 defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", /*isReverseInstr*/ 1>; 650 defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", "UDIVR_ZPZZ", int_aarch64_sve_udivr, DestructiveBinaryCommWithRev, "UDIV_ZPmZ", /*isReverseInstr*/ 1>; 651 652 defm SDIV_ZPZZ : sve_int_bin_pred_sd<AArch64sdiv_p>; 653 defm UDIV_ZPZZ : sve_int_bin_pred_sd<AArch64udiv_p>; 654 655 defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", AArch64sdot>; 656 defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", AArch64udot>; 657 658 defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; 659 defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; 660 661 defm SXTB_ZPmZ : sve_int_un_pred_arit_h<0b000, "sxtb", AArch64sxt_mt>; 662 defm UXTB_ZPmZ : sve_int_un_pred_arit_h<0b001, "uxtb", AArch64uxt_mt>; 663 defm SXTH_ZPmZ : sve_int_un_pred_arit_w<0b010, "sxth", AArch64sxt_mt>; 664 defm UXTH_ZPmZ : sve_int_un_pred_arit_w<0b011, "uxth", AArch64uxt_mt>; 665 defm SXTW_ZPmZ : sve_int_un_pred_arit_d<0b100, "sxtw", AArch64sxt_mt>; 666 defm UXTW_ZPmZ : sve_int_un_pred_arit_d<0b101, "uxtw", AArch64uxt_mt>; 667 defm ABS_ZPmZ : sve_int_un_pred_arit< 0b110, "abs", AArch64abs_mt>; 668 defm NEG_ZPmZ : sve_int_un_pred_arit< 0b111, "neg", AArch64neg_mt>; 669 670 defm CLS_ZPmZ : sve_int_un_pred_arit_bitwise< 0b000, "cls", AArch64cls_mt>; 671 defm CLZ_ZPmZ : sve_int_un_pred_arit_bitwise< 0b001, "clz", AArch64clz_mt>; 672 defm CNT_ZPmZ : sve_int_un_pred_arit_bitwise< 0b010, "cnt", AArch64cnt_mt>; 673 defm CNOT_ZPmZ : sve_int_un_pred_arit_bitwise< 0b011, "cnot", AArch64cnot_mt>; 674 defm NOT_ZPmZ : sve_int_un_pred_arit_bitwise< 0b110, "not", AArch64not_mt>; 675 defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>; 676 defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>; 677 678 foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in { 679 // No dedicated instruction, so just clear the sign bit. 680 def : Pat<(VT (fabs VT:$op)), 681 (AND_ZI $op, (i64 (logical_imm64_XFORM(i64 0x7fff7fff7fff7fff))))>; 682 // No dedicated instruction, so just invert the sign bit. 683 def : Pat<(VT (fneg VT:$op)), 684 (EOR_ZI $op, (i64 (logical_imm64_XFORM(i64 0x8000800080008000))))>; 685 } 686 687 // zext(cmpeq(x, splat(0))) -> cnot(x) 688 def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)))), 689 (CNOT_ZPmZ_B $Op2, $Pg, $Op2)>; 690 def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive):$Pg), nxv8i16:$Op2, (SVEDup0), SETEQ)))), 691 (CNOT_ZPmZ_H $Op2, $Pg, $Op2)>; 692 def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive):$Pg), nxv4i32:$Op2, (SVEDup0), SETEQ)))), 693 (CNOT_ZPmZ_S $Op2, $Pg, $Op2)>; 694 def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive):$Pg), nxv2i64:$Op2, (SVEDup0), SETEQ)))), 695 (CNOT_ZPmZ_D $Op2, $Pg, $Op2)>; 696 697 defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", AArch64smax_m1, DestructiveBinaryComm>; 698 defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", AArch64umax_m1, DestructiveBinaryComm>; 699 defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", AArch64smin_m1, DestructiveBinaryComm>; 700 defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", AArch64umin_m1, DestructiveBinaryComm>; 701 defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>; 702 defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>; 703 704 defm SMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64smax_p>; 705 defm UMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64umax_p>; 706 defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>; 707 defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>; 708 defm SABD_ZPZZ : sve_int_bin_pred_bhsd<AArch64sabd_p>; 709 defm UABD_ZPZZ : sve_int_bin_pred_bhsd<AArch64uabd_p>; 710 711 defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", AArch64frecpe>; 712 defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>; 713 714 defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>; 715 defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>; 716 defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>; 717 defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>; 718 defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>; 719 defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>; 720 defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>; 721 defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>; 722 723 defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fadd_p>; 724 defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsub_p>; 725 defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, AArch64fmul_p>; 726 defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsubr_p>; 727 defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmaxnm_p>; 728 defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fminnm_p>; 729 defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmax_p>; 730 defm FMIN_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmin_p>; 731 732 let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { 733 defm FADD_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>; 734 defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>; 735 defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>; 736 defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>; 737 defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>; 738 defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>; 739 defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>; 740 defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>; 741 } 742 743 defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>; 744 defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; 745 defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>; 746 defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", AArch64fsubr_m1, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>; 747 defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", AArch64fmaxnm_m1, DestructiveBinaryComm>; 748 defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", AArch64fminnm_m1, DestructiveBinaryComm>; 749 defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", AArch64fmax_m1, DestructiveBinaryComm>; 750 defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", AArch64fmin_m1, DestructiveBinaryComm>; 751 defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>; 752 defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>; 753 defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>; 754 defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", /*isReverseInstr*/ 1>; 755 defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">; 756 757 defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>; 758 defm FSUB_ZPZZ : sve_fp_bin_pred_hfd<AArch64fsub_p>; 759 defm FMUL_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmul_p>; 760 defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmaxnm_p>; 761 defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>; 762 defm FMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmax_p>; 763 defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>; 764 defm FABD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fabd_p>; 765 defm FMULX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_fmulx_u>; 766 defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>; 767} // End HasSVE_or_SME 768 769let Predicates = [HasSVE_or_SME, UseExperimentalZeroingPseudos] in { 770 defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>; 771 defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>; 772 defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>; 773 defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>; 774 defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>; 775 defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>; 776 defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>; 777 defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>; 778 defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>; 779 defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>; 780 defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>; 781 defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>; 782} // End HasSVE_or_SME, UseExperimentalZeroingPseudos 783 784let Predicates = [HasSVE_or_SME] in { 785 defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", AArch64fadd>; 786 defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", AArch64fsub>; 787 defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", AArch64fmul>; 788} // End HasSVE_or_SME 789 790let Predicates = [HasSVE] in { 791 defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>; 792} // End HasSVE 793 794let Predicates = [HasSVE_or_SME] in { 795 defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", AArch64frecps>; 796 defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", AArch64frsqrts>; 797} // End HasSVE_or_SME 798 799let Predicates = [HasSVE] in { 800 defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>; 801} // End HasSVE 802 803let Predicates = [HasSVE_or_SME] in { 804 defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>; 805 defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>; 806 807 defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", AArch64fmla_m1, "FMAD_ZPmZZ">; 808 defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls", "FMLS_ZPZZZ", AArch64fmls_m1, "FMSB_ZPmZZ">; 809 defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla", "FNMLA_ZPZZZ", int_aarch64_sve_fnmla, "FNMAD_ZPmZZ">; 810 defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls", "FNMLS_ZPZZZ", int_aarch64_sve_fnmls, "FNMSB_ZPmZZ">; 811 812 defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad", int_aarch64_sve_fmad, "FMLA_ZPmZZ", /*isReverseInstr*/ 1>; 813 defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb", int_aarch64_sve_fmsb, "FMLS_ZPmZZ", /*isReverseInstr*/ 1>; 814 defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>; 815 defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>; 816 817 defm FMLA_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fmla_p>; 818 defm FMLS_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fmls_p>; 819 defm FNMLA_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fnmla_p>; 820 defm FNMLS_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fnmls_p>; 821} // End HasSVE_or_SME 822 823let Predicates = [HasSVE] in { 824 defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>; 825} // End HasSVE 826 827let Predicates = [HasSVE_or_SME] in { 828 defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", int_aarch64_sve_fmla_lane>; 829 defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", int_aarch64_sve_fmls_lane>; 830 831 defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; 832 defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; 833} // End HasSVE_or_SME 834 835let Predicates = [HasSVE] in { 836 // SVE floating point reductions. 837 defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>; 838} // End HasSVE 839 840let Predicates = [HasSVE_or_SME] in { 841 defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>; 842 defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>; 843 defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>; 844 defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>; 845 defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>; 846 847 // Splat immediate (unpredicated) 848 defm DUP_ZI : sve_int_dup_imm<"dup">; 849 defm FDUP_ZI : sve_int_dup_fpimm<"fdup">; 850 defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">; 851 852 // Splat immediate (predicated) 853 defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy", AArch64dup_mt>; 854 defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy", AArch64dup_mt>; 855 defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; 856 857 // Splat scalar register (unpredicated, GPR or vector + element index) 858 defm DUP_ZR : sve_int_perm_dup_r<"dup", splat_vector>; 859 defm DUP_ZZI : sve_int_perm_dup_i<"dup">; 860 861 // Splat scalar register (predicated) 862 defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>; 863 defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>; 864 865 // Duplicate FP scalar into all vector elements 866 def : Pat<(nxv8f16 (splat_vector (f16 FPR16:$src))), 867 (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; 868 def : Pat<(nxv4f16 (splat_vector (f16 FPR16:$src))), 869 (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; 870 def : Pat<(nxv2f16 (splat_vector (f16 FPR16:$src))), 871 (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; 872 def : Pat<(nxv4f32 (splat_vector (f32 FPR32:$src))), 873 (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; 874 def : Pat<(nxv2f32 (splat_vector (f32 FPR32:$src))), 875 (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; 876 def : Pat<(nxv2f64 (splat_vector (f64 FPR64:$src))), 877 (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>; 878 def : Pat<(nxv8bf16 (splat_vector (bf16 FPR16:$src))), 879 (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; 880 def : Pat<(nxv4bf16 (splat_vector (bf16 FPR16:$src))), 881 (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; 882 def : Pat<(nxv2bf16 (splat_vector (bf16 FPR16:$src))), 883 (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; 884 885 // Duplicate +0.0 into all vector elements 886 def : Pat<(nxv8f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; 887 def : Pat<(nxv4f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; 888 def : Pat<(nxv2f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; 889 def : Pat<(nxv4f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>; 890 def : Pat<(nxv2f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>; 891 def : Pat<(nxv2f64 (splat_vector (f64 fpimm0))), (DUP_ZI_D 0, 0)>; 892 def : Pat<(nxv8bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; 893 def : Pat<(nxv4bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; 894 def : Pat<(nxv2bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; 895 896 // Duplicate Int immediate into all vector elements 897 def : Pat<(nxv16i8 (splat_vector (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))), 898 (DUP_ZI_B $a, $b)>; 899 def : Pat<(nxv8i16 (splat_vector (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)))), 900 (DUP_ZI_H $a, $b)>; 901 def : Pat<(nxv4i32 (splat_vector (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)))), 902 (DUP_ZI_S $a, $b)>; 903 def : Pat<(nxv2i64 (splat_vector (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)))), 904 (DUP_ZI_D $a, $b)>; 905 906 // Duplicate immediate FP into all vector elements. 907 def : Pat<(nxv2f16 (splat_vector (f16 fpimm:$val))), 908 (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; 909 def : Pat<(nxv4f16 (splat_vector (f16 fpimm:$val))), 910 (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; 911 def : Pat<(nxv8f16 (splat_vector (f16 fpimm:$val))), 912 (DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>; 913 def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))), 914 (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; 915 def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))), 916 (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; 917 def : Pat<(nxv2f64 (splat_vector (f64 fpimm:$val))), 918 (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; 919 920 // Duplicate FP immediate into all vector elements 921 let AddedComplexity = 2 in { 922 def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)), 923 (FDUP_ZI_H fpimm16:$imm8)>; 924 def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)), 925 (FDUP_ZI_H fpimm16:$imm8)>; 926 def : Pat<(nxv2f16 (splat_vector fpimm16:$imm8)), 927 (FDUP_ZI_H fpimm16:$imm8)>; 928 def : Pat<(nxv4f32 (splat_vector fpimm32:$imm8)), 929 (FDUP_ZI_S fpimm32:$imm8)>; 930 def : Pat<(nxv2f32 (splat_vector fpimm32:$imm8)), 931 (FDUP_ZI_S fpimm32:$imm8)>; 932 def : Pat<(nxv2f64 (splat_vector fpimm64:$imm8)), 933 (FDUP_ZI_D fpimm64:$imm8)>; 934 } 935 936 // Select elements from either vector (predicated) 937 defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; 938 939 defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; 940} // End HasSVE_or_SME 941 942// COMPACT - word and doubleword 943let Predicates = [HasNonStreamingSVE_or_SME2p2] in { 944 defm COMPACT_ZPZ : sve_int_perm_compact_sd<"compact", int_aarch64_sve_compact>; 945} 946 947let Predicates = [HasSVE_or_SME] in { 948 defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; 949 defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; 950 defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>; 951 952 defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>; 953 defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>; 954 defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", AArch64revh_mt>; 955 defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", AArch64revw_mt>; 956 957 defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse, int_aarch64_sve_rev_b16, int_aarch64_sve_rev_b32, int_aarch64_sve_rev_b64>; 958 defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>; 959 960 defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>; 961 defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>; 962 defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>; 963 defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi", AArch64uunpkhi>; 964 965 defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>; 966 defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>; 967 968 // Define pattern for `nxv1i1 splat_vector(1)`. 969 // We do this here instead of in ISelLowering such that PatFrag's can still 970 // recognize a splat. 971 def : Pat<(nxv1i1 immAllOnesV), (PUNPKLO_PP (PTRUE_D 31))>; 972 973 defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; 974 defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; 975 def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; 976} // End HasSVE_or_SME 977 978let Predicates = [HasNonStreamingSVE_or_SME2p2] in { 979 defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>; 980} // End HasSVE 981 982let Predicates = [HasSVE_or_SME] in { 983 defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>; 984 defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>; 985 defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>; 986 defm BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs", null_frag>; 987 988 defm BRKN_PPzP : sve_int_brkn<0b0, "brkn", int_aarch64_sve_brkn_z>; 989 defm BRKNS_PPzP : sve_int_brkn<0b1, "brkns", null_frag>; 990 991 defm BRKA_PPzP : sve_int_break_z<0b000, "brka", int_aarch64_sve_brka_z>; 992 defm BRKA_PPmP : sve_int_break_m<0b001, "brka", int_aarch64_sve_brka>; 993 defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas", null_frag>; 994 defm BRKB_PPzP : sve_int_break_z<0b100, "brkb", int_aarch64_sve_brkb_z>; 995 defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>; 996 defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>; 997 998 defm PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest, AArch64ptest_any>; 999 defm PFALSE : sve_int_pfalse<0b000000, "pfalse">; 1000 defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>; 1001 defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>; 1002 1003 defm AND_PPzPP : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>; 1004 defm BIC_PPzPP : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>; 1005 defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>; 1006 defm SEL_PPPP : sve_int_pred_log_v2<0b0011, "sel", vselect, or>; 1007 defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>; 1008 defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>; 1009 defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>; 1010 defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>; 1011 defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>; 1012 defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>; 1013 defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>; 1014 defm ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs", null_frag>; 1015 defm ORNS_PPzPP : sve_int_pred_log<0b1101, "orns", null_frag>; 1016 defm NORS_PPzPP : sve_int_pred_log<0b1110, "nors", null_frag>; 1017 defm NANDS_PPzPP : sve_int_pred_log<0b1111, "nands", null_frag>; 1018 1019 defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta", AArch64clasta_n>; 1020 defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb", AArch64clastb_n>; 1021 defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta", AArch64clasta_n>; 1022 defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb", AArch64clastb_n>; 1023 defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>; 1024 defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>; 1025 1026 defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>; 1027 defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>; 1028 defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>; 1029 defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>; 1030 1031 // continuous load with reg+immediate 1032 defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>; 1033 defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>; 1034 defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>; 1035 defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>; 1036 defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>; 1037 defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>; 1038 defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>; 1039 defm LD1H_D_IMM : sve_mem_cld_si<0b0111, "ld1h", Z_d, ZPR64>; 1040 defm LD1SH_D_IMM : sve_mem_cld_si<0b1000, "ld1sh", Z_d, ZPR64>; 1041 defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>; 1042 defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>; 1043 defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>; 1044 let Predicates = [HasSVE2p1] in { 1045 defm LD1W_Q_IMM : sve_mem_128b_cld_si<0b10, "ld1w">; 1046 } 1047 defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>; 1048 defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>; 1049 defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>; 1050 defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>; 1051 let Predicates = [HasSVE2p1] in { 1052 defm LD1D_Q_IMM : sve_mem_128b_cld_si<0b11, "ld1d">; 1053 } 1054 1055 // LD1R loads (splat scalar to vector) 1056 defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>; 1057 defm LD1RB_H_IMM : sve_mem_ld_dup<0b00, 0b01, "ld1rb", Z_h, ZPR16, uimm6s1>; 1058 defm LD1RB_S_IMM : sve_mem_ld_dup<0b00, 0b10, "ld1rb", Z_s, ZPR32, uimm6s1>; 1059 defm LD1RB_D_IMM : sve_mem_ld_dup<0b00, 0b11, "ld1rb", Z_d, ZPR64, uimm6s1>; 1060 defm LD1RSW_IMM : sve_mem_ld_dup<0b01, 0b00, "ld1rsw", Z_d, ZPR64, uimm6s4>; 1061 defm LD1RH_IMM : sve_mem_ld_dup<0b01, 0b01, "ld1rh", Z_h, ZPR16, uimm6s2>; 1062 defm LD1RH_S_IMM : sve_mem_ld_dup<0b01, 0b10, "ld1rh", Z_s, ZPR32, uimm6s2>; 1063 defm LD1RH_D_IMM : sve_mem_ld_dup<0b01, 0b11, "ld1rh", Z_d, ZPR64, uimm6s2>; 1064 defm LD1RSH_D_IMM : sve_mem_ld_dup<0b10, 0b00, "ld1rsh", Z_d, ZPR64, uimm6s2>; 1065 defm LD1RSH_S_IMM : sve_mem_ld_dup<0b10, 0b01, "ld1rsh", Z_s, ZPR32, uimm6s2>; 1066 defm LD1RW_IMM : sve_mem_ld_dup<0b10, 0b10, "ld1rw", Z_s, ZPR32, uimm6s4>; 1067 defm LD1RW_D_IMM : sve_mem_ld_dup<0b10, 0b11, "ld1rw", Z_d, ZPR64, uimm6s4>; 1068 defm LD1RSB_D_IMM : sve_mem_ld_dup<0b11, 0b00, "ld1rsb", Z_d, ZPR64, uimm6s1>; 1069 defm LD1RSB_S_IMM : sve_mem_ld_dup<0b11, 0b01, "ld1rsb", Z_s, ZPR32, uimm6s1>; 1070 defm LD1RSB_H_IMM : sve_mem_ld_dup<0b11, 0b10, "ld1rsb", Z_h, ZPR16, uimm6s1>; 1071 defm LD1RD_IMM : sve_mem_ld_dup<0b11, 0b11, "ld1rd", Z_d, ZPR64, uimm6s8>; 1072 1073 // LD1RQ loads (load quadword-vector and splat to scalable vector) 1074 defm LD1RQ_B_IMM : sve_mem_ldqr_si<0b00, "ld1rqb", Z_b, ZPR8>; 1075 defm LD1RQ_H_IMM : sve_mem_ldqr_si<0b01, "ld1rqh", Z_h, ZPR16>; 1076 defm LD1RQ_W_IMM : sve_mem_ldqr_si<0b10, "ld1rqw", Z_s, ZPR32>; 1077 defm LD1RQ_D_IMM : sve_mem_ldqr_si<0b11, "ld1rqd", Z_d, ZPR64>; 1078 defm LD1RQ_B : sve_mem_ldqr_ss<0b00, "ld1rqb", Z_b, ZPR8, GPR64NoXZRshifted8>; 1079 defm LD1RQ_H : sve_mem_ldqr_ss<0b01, "ld1rqh", Z_h, ZPR16, GPR64NoXZRshifted16>; 1080 defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>; 1081 defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>; 1082 1083 multiclass sve_ld1rq_duplane_pat<ValueType vt1, ValueType vt2, SDPatternOperator op, Instruction load_instr_imm, Instruction ptrue, Instruction load_instr_scalar, ComplexPattern AddrCP> { 1084 def : Pat<(vt1 (op (vt1 (vector_insert_subvec (vt1 undef), (vt2 (load GPR64sp:$Xn)), (i64 0))), (i64 0))), 1085 (load_instr_imm (ptrue 31), GPR64sp:$Xn, 0)>; 1086 let AddedComplexity = 2 in { 1087 def : Pat<(vt1 (op (vt1 (vector_insert_subvec (vt1 undef), (vt2 (load (add GPR64sp:$Xn, simm4s16:$imm))), (i64 0))), (i64 0))), 1088 (load_instr_imm (ptrue 31), GPR64sp:$Xn, simm4s16:$imm)>; 1089 } 1090 def : Pat<(vt1 (op (vt1 (vector_insert_subvec (vt1 undef), (vt2 (load (AddrCP GPR64sp:$Xn, GPR64sp:$idx))), (i64 0))), (i64 0))), 1091 (load_instr_scalar (ptrue 31), GPR64sp:$Xn, $idx)>; 1092 } 1093 defm : sve_ld1rq_duplane_pat<nxv16i8, v16i8, AArch64duplane128, LD1RQ_B_IMM, PTRUE_B, LD1RQ_B, am_sve_regreg_lsl0>; 1094 defm : sve_ld1rq_duplane_pat<nxv8i16, v8i16, AArch64duplane128, LD1RQ_H_IMM, PTRUE_H, LD1RQ_H, am_sve_regreg_lsl1>; 1095 defm : sve_ld1rq_duplane_pat<nxv4i32, v4i32, AArch64duplane128, LD1RQ_W_IMM, PTRUE_S, LD1RQ_W, am_sve_regreg_lsl2>; 1096 defm : sve_ld1rq_duplane_pat<nxv2i64, v2i64, AArch64duplane128, LD1RQ_D_IMM, PTRUE_D, LD1RQ_D, am_sve_regreg_lsl3>; 1097 1098 // continuous load with reg+reg addressing. 1099 defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>; 1100 defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>; 1101 defm LD1B_S : sve_mem_cld_ss<0b0010, "ld1b", Z_s, ZPR32, GPR64NoXZRshifted8>; 1102 defm LD1B_D : sve_mem_cld_ss<0b0011, "ld1b", Z_d, ZPR64, GPR64NoXZRshifted8>; 1103 defm LD1SW_D : sve_mem_cld_ss<0b0100, "ld1sw", Z_d, ZPR64, GPR64NoXZRshifted32>; 1104 defm LD1H : sve_mem_cld_ss<0b0101, "ld1h", Z_h, ZPR16, GPR64NoXZRshifted16>; 1105 defm LD1H_S : sve_mem_cld_ss<0b0110, "ld1h", Z_s, ZPR32, GPR64NoXZRshifted16>; 1106 defm LD1H_D : sve_mem_cld_ss<0b0111, "ld1h", Z_d, ZPR64, GPR64NoXZRshifted16>; 1107 defm LD1SH_D : sve_mem_cld_ss<0b1000, "ld1sh", Z_d, ZPR64, GPR64NoXZRshifted16>; 1108 defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>; 1109 defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>; 1110 defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>; 1111 let Predicates = [HasSVE2p1] in { 1112 defm LD1W_Q : sve_mem_128b_cld_ss<0b10, "ld1w", GPR64NoXZRshifted32>; 1113 } 1114 defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>; 1115 defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; 1116 defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; 1117 defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; 1118 let Predicates = [HasSVE2p1] in { 1119 defm LD1D_Q : sve_mem_128b_cld_ss<0b11, "ld1d", GPR64NoXZRshifted64>; 1120 } 1121} // End HasSVE_or_SME 1122 1123let Predicates = [HasSVE] in { 1124 // non-faulting continuous load with reg+immediate 1125 defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>; 1126 defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>; 1127 defm LDNF1B_S_IMM : sve_mem_cldnf_si<0b0010, "ldnf1b", Z_s, ZPR32>; 1128 defm LDNF1B_D_IMM : sve_mem_cldnf_si<0b0011, "ldnf1b", Z_d, ZPR64>; 1129 defm LDNF1SW_D_IMM : sve_mem_cldnf_si<0b0100, "ldnf1sw", Z_d, ZPR64>; 1130 defm LDNF1H_IMM : sve_mem_cldnf_si<0b0101, "ldnf1h", Z_h, ZPR16>; 1131 defm LDNF1H_S_IMM : sve_mem_cldnf_si<0b0110, "ldnf1h", Z_s, ZPR32>; 1132 defm LDNF1H_D_IMM : sve_mem_cldnf_si<0b0111, "ldnf1h", Z_d, ZPR64>; 1133 defm LDNF1SH_D_IMM : sve_mem_cldnf_si<0b1000, "ldnf1sh", Z_d, ZPR64>; 1134 defm LDNF1SH_S_IMM : sve_mem_cldnf_si<0b1001, "ldnf1sh", Z_s, ZPR32>; 1135 defm LDNF1W_IMM : sve_mem_cldnf_si<0b1010, "ldnf1w", Z_s, ZPR32>; 1136 defm LDNF1W_D_IMM : sve_mem_cldnf_si<0b1011, "ldnf1w", Z_d, ZPR64>; 1137 defm LDNF1SB_D_IMM : sve_mem_cldnf_si<0b1100, "ldnf1sb", Z_d, ZPR64>; 1138 defm LDNF1SB_S_IMM : sve_mem_cldnf_si<0b1101, "ldnf1sb", Z_s, ZPR32>; 1139 defm LDNF1SB_H_IMM : sve_mem_cldnf_si<0b1110, "ldnf1sb", Z_h, ZPR16>; 1140 defm LDNF1D_IMM : sve_mem_cldnf_si<0b1111, "ldnf1d", Z_d, ZPR64>; 1141 1142 // First-faulting loads with reg+reg addressing. 1143 defm LDFF1B : sve_mem_cldff_ss<0b0000, "ldff1b", Z_b, ZPR8, GPR64shifted8>; 1144 defm LDFF1B_H : sve_mem_cldff_ss<0b0001, "ldff1b", Z_h, ZPR16, GPR64shifted8>; 1145 defm LDFF1B_S : sve_mem_cldff_ss<0b0010, "ldff1b", Z_s, ZPR32, GPR64shifted8>; 1146 defm LDFF1B_D : sve_mem_cldff_ss<0b0011, "ldff1b", Z_d, ZPR64, GPR64shifted8>; 1147 defm LDFF1SW_D : sve_mem_cldff_ss<0b0100, "ldff1sw", Z_d, ZPR64, GPR64shifted32>; 1148 defm LDFF1H : sve_mem_cldff_ss<0b0101, "ldff1h", Z_h, ZPR16, GPR64shifted16>; 1149 defm LDFF1H_S : sve_mem_cldff_ss<0b0110, "ldff1h", Z_s, ZPR32, GPR64shifted16>; 1150 defm LDFF1H_D : sve_mem_cldff_ss<0b0111, "ldff1h", Z_d, ZPR64, GPR64shifted16>; 1151 defm LDFF1SH_D : sve_mem_cldff_ss<0b1000, "ldff1sh", Z_d, ZPR64, GPR64shifted16>; 1152 defm LDFF1SH_S : sve_mem_cldff_ss<0b1001, "ldff1sh", Z_s, ZPR32, GPR64shifted16>; 1153 defm LDFF1W : sve_mem_cldff_ss<0b1010, "ldff1w", Z_s, ZPR32, GPR64shifted32>; 1154 defm LDFF1W_D : sve_mem_cldff_ss<0b1011, "ldff1w", Z_d, ZPR64, GPR64shifted32>; 1155 defm LDFF1SB_D : sve_mem_cldff_ss<0b1100, "ldff1sb", Z_d, ZPR64, GPR64shifted8>; 1156 defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>; 1157 defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>; 1158 defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>; 1159} // End HasSVE 1160 1161let Predicates = [HasSVE_or_SME] in { 1162 // LD(2|3|4) structured loads with reg+immediate 1163 defm LD2B_IMM : sve_mem_eld_si<0b00, 0b001, ZZ_b, "ld2b", simm4s2>; 1164 defm LD3B_IMM : sve_mem_eld_si<0b00, 0b010, ZZZ_b, "ld3b", simm4s3>; 1165 defm LD4B_IMM : sve_mem_eld_si<0b00, 0b011, ZZZZ_b, "ld4b", simm4s4>; 1166 defm LD2H_IMM : sve_mem_eld_si<0b01, 0b001, ZZ_h, "ld2h", simm4s2>; 1167 defm LD3H_IMM : sve_mem_eld_si<0b01, 0b010, ZZZ_h, "ld3h", simm4s3>; 1168 defm LD4H_IMM : sve_mem_eld_si<0b01, 0b011, ZZZZ_h, "ld4h", simm4s4>; 1169 defm LD2W_IMM : sve_mem_eld_si<0b10, 0b001, ZZ_s, "ld2w", simm4s2>; 1170 defm LD3W_IMM : sve_mem_eld_si<0b10, 0b010, ZZZ_s, "ld3w", simm4s3>; 1171 defm LD4W_IMM : sve_mem_eld_si<0b10, 0b011, ZZZZ_s, "ld4w", simm4s4>; 1172 defm LD2D_IMM : sve_mem_eld_si<0b11, 0b001, ZZ_d, "ld2d", simm4s2>; 1173 defm LD3D_IMM : sve_mem_eld_si<0b11, 0b010, ZZZ_d, "ld3d", simm4s3>; 1174 defm LD4D_IMM : sve_mem_eld_si<0b11, 0b011, ZZZZ_d, "ld4d", simm4s4>; 1175 let Predicates = [HasSVE2p1_or_SME2p1] in { 1176 defm LD2Q_IMM : sve_mem_eld_si<0b01, 0b100, ZZ_q, "ld2q", simm4s2>; 1177 defm LD3Q_IMM : sve_mem_eld_si<0b10, 0b100, ZZZ_q, "ld3q", simm4s3>; 1178 defm LD4Q_IMM : sve_mem_eld_si<0b11, 0b100, ZZZZ_q, "ld4q", simm4s4>; 1179 } 1180 1181 // LD(2|3|4) structured loads (register + register) 1182 def LD2B : sve_mem_eld_ss<0b00, 0b101, ZZ_b, "ld2b", GPR64NoXZRshifted8>; 1183 def LD3B : sve_mem_eld_ss<0b00, 0b110, ZZZ_b, "ld3b", GPR64NoXZRshifted8>; 1184 def LD4B : sve_mem_eld_ss<0b00, 0b111, ZZZZ_b, "ld4b", GPR64NoXZRshifted8>; 1185 def LD2H : sve_mem_eld_ss<0b01, 0b101, ZZ_h, "ld2h", GPR64NoXZRshifted16>; 1186 def LD3H : sve_mem_eld_ss<0b01, 0b110, ZZZ_h, "ld3h", GPR64NoXZRshifted16>; 1187 def LD4H : sve_mem_eld_ss<0b01, 0b111, ZZZZ_h, "ld4h", GPR64NoXZRshifted16>; 1188 def LD2W : sve_mem_eld_ss<0b10, 0b101, ZZ_s, "ld2w", GPR64NoXZRshifted32>; 1189 def LD3W : sve_mem_eld_ss<0b10, 0b110, ZZZ_s, "ld3w", GPR64NoXZRshifted32>; 1190 def LD4W : sve_mem_eld_ss<0b10, 0b111, ZZZZ_s, "ld4w", GPR64NoXZRshifted32>; 1191 def LD2D : sve_mem_eld_ss<0b11, 0b101, ZZ_d, "ld2d", GPR64NoXZRshifted64>; 1192 def LD3D : sve_mem_eld_ss<0b11, 0b110, ZZZ_d, "ld3d", GPR64NoXZRshifted64>; 1193 def LD4D : sve_mem_eld_ss<0b11, 0b111, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>; 1194 let Predicates = [HasSVE2p1_or_SME2p1] in { 1195 def LD2Q : sve_mem_eld_ss<0b01, 0b001, ZZ_q, "ld2q", GPR64NoXZRshifted128>; 1196 def LD3Q : sve_mem_eld_ss<0b10, 0b001, ZZZ_q, "ld3q", GPR64NoXZRshifted128>; 1197 def LD4Q : sve_mem_eld_ss<0b11, 0b001, ZZZZ_q, "ld4q", GPR64NoXZRshifted128>; 1198 } 1199} // End HasSVE_or_SME 1200 1201let Predicates = [HasSVE] in { 1202 // Gathers using unscaled 32-bit offsets, e.g. 1203 // ld1h z0.s, p0/z, [x0, z0.s, uxtw] 1204 defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; 1205 defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; 1206 defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; 1207 defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; 1208 defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; 1209 defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; 1210 defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; 1211 defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; 1212 defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; 1213 defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; 1214 1215 // Gathers using scaled 32-bit offsets, e.g. 1216 // ld1h z0.s, p0/z, [x0, z0.s, uxtw #1] 1217 defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; 1218 defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; 1219 defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; 1220 defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; 1221 defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; 1222 defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; 1223 1224 // Gathers using 32-bit pointers with scaled offset, e.g. 1225 // ld1h z0.s, p0/z, [z0.s, #16] 1226 defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv4i8>; 1227 defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv4i8>; 1228 defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv4i8>; 1229 defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv4i8>; 1230 defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv4i16>; 1231 defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv4i16>; 1232 defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv4i16>; 1233 defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv4i16>; 1234 defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv4i32>; 1235 defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv4i32>; 1236 1237 // Gathers using 64-bit pointers with scaled offset, e.g. 1238 // ld1h z0.d, p0/z, [z0.d, #16] 1239 defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv2i8>; 1240 defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv2i8>; 1241 defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv2i8>; 1242 defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv2i8>; 1243 defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv2i16>; 1244 defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv2i16>; 1245 defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv2i16>; 1246 defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv2i16>; 1247 defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm_z, nxv2i32>; 1248 defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm_z, nxv2i32>; 1249 defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv2i32>; 1250 defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv2i32>; 1251 defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm_z, nxv2i64>; 1252 defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm_z, nxv2i64>; 1253 1254 // Gathers using unscaled 64-bit offsets, e.g. 1255 // ld1h z0.d, p0/z, [x0, z0.d] 1256 defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_z, nxv2i8>; 1257 defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_z, nxv2i8>; 1258 defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather_z, nxv2i8>; 1259 defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_z, nxv2i8>; 1260 defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_z, nxv2i16>; 1261 defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_z, nxv2i16>; 1262 defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather_z, nxv2i16>; 1263 defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_z, nxv2i16>; 1264 defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_z, nxv2i32>; 1265 defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_z, nxv2i32>; 1266 defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather_z, nxv2i32>; 1267 defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_z, nxv2i32>; 1268 defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather_z, nxv2i64>; 1269 defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_z, nxv2i64>; 1270 let Predicates = [HasSVE2p1] in { 1271 defm GLD1Q : sve_mem_128b_gld_64_unscaled<"ld1q", AArch64ld1q_gather_z>; 1272 } 1273 1274 // Gathers using scaled 64-bit offsets, e.g. 1275 // ld1h z0.d, p0/z, [x0, z0.d, lsl #1] 1276 defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; 1277 defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; 1278 defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; 1279 defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; 1280 defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; 1281 defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; 1282 defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; 1283 defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; 1284 defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>; 1285 defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>; 1286 1287 // Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g. 1288 // ld1h z0.d, p0/z, [x0, z0.d, uxtw] 1289 defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; 1290 defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; 1291 defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; 1292 defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; 1293 defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; 1294 defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; 1295 defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; 1296 defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; 1297 defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; 1298 defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; 1299 defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; 1300 defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; 1301 defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; 1302 defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; 1303 1304 // Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g. 1305 // ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] 1306 defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; 1307 defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; 1308 defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; 1309 defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; 1310 defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; 1311 defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; 1312 defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; 1313 defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; 1314 defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; 1315 defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; 1316 1317 multiclass sve_masked_gather_x2_scaled<ValueType Ty, SDPatternOperator Load, string Inst> { 1318 // base + vector of scaled offsets 1319 def : Pat<(Ty (Load (SVEDup0Undef), nxv2i1:$gp, GPR64:$base, nxv2i64:$offs)), 1320 (!cast<Instruction>(Inst # _SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; 1321 // base + vector of signed 32bit scaled offsets 1322 def : Pat<(Ty (Load (SVEDup0Undef), nxv2i1:$gp, GPR64:$base, (sext_inreg nxv2i64:$offs, nxv2i32))), 1323 (!cast<Instruction>(Inst # _SXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; 1324 // base + vector of unsigned 32bit scaled offsets 1325 def : Pat<(Ty (Load (SVEDup0Undef), nxv2i1:$gp, GPR64:$base, (and nxv2i64:$offs, (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), 1326 (!cast<Instruction>(Inst # _UXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; 1327 } 1328 1329 multiclass sve_masked_gather_x2_unscaled<ValueType Ty, SDPatternOperator Load, string Inst, Operand ImmTy> { 1330 // vector of pointers + immediate offset (includes zero) 1331 def : Pat<(Ty (Load (SVEDup0Undef), nxv2i1:$gp, (i64 ImmTy:$imm), nxv2i64:$ptrs)), 1332 (!cast<Instruction>(Inst # _IMM) PPR:$gp, ZPR:$ptrs, ImmTy:$imm)>; 1333 // base + vector of offsets 1334 def : Pat<(Ty (Load (SVEDup0Undef), nxv2i1:$gp, GPR64:$base, nxv2i64:$offs)), 1335 (!cast<Instruction>(Inst) PPR:$gp, GPR64:$base, ZPR:$offs)>; 1336 // base + vector of signed 32bit offsets 1337 def : Pat<(Ty (Load (SVEDup0Undef), nxv2i1:$gp, GPR64:$base, (sext_inreg nxv2i64:$offs, nxv2i32))), 1338 (!cast<Instruction>(Inst # _SXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>; 1339 // base + vector of unsigned 32bit offsets 1340 def : Pat<(Ty (Load (SVEDup0Undef), nxv2i1:$gp, GPR64:$base, (and nxv2i64:$offs, (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), 1341 (!cast<Instruction>(Inst # _UXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>; 1342 } 1343 1344 multiclass sve_masked_gather_x4<ValueType Ty, SDPatternOperator Load, Instruction Inst> { 1345 def : Pat<(Ty (Load (SVEDup0Undef), nxv4i1:$gp, GPR64:$base, nxv4i32:$offs)), 1346 (Inst PPR:$gp, GPR64:$base, ZPR:$offs)>; 1347 } 1348 1349 defm : sve_masked_gather_x2_scaled<nxv2i64, azext_masked_gather_i16_signed_scaled, "GLD1H_D">; 1350 defm : sve_masked_gather_x2_scaled<nxv2i64, sext_masked_gather_i16_signed_scaled, "GLD1SH_D">; 1351 defm : sve_masked_gather_x2_scaled<nxv2i64, azext_masked_gather_i32_signed_scaled, "GLD1W_D">; 1352 defm : sve_masked_gather_x2_scaled<nxv2i64, sext_masked_gather_i32_signed_scaled, "GLD1SW_D">; 1353 defm : sve_masked_gather_x2_scaled<nxv2i64, nonext_masked_gather_signed_scaled, "GLD1D">; 1354 defm : sve_masked_gather_x2_scaled<nxv2f16, nonext_masked_gather_signed_scaled, "GLD1H_D">; 1355 defm : sve_masked_gather_x2_scaled<nxv2f32, nonext_masked_gather_signed_scaled, "GLD1W_D">; 1356 defm : sve_masked_gather_x2_scaled<nxv2f64, nonext_masked_gather_signed_scaled, "GLD1D">; 1357 defm : sve_masked_gather_x2_scaled<nxv2bf16, nonext_masked_gather_signed_scaled, "GLD1H_D">; 1358 1359 defm : sve_masked_gather_x2_unscaled<nxv2i64, azext_masked_gather_i8_signed_unscaled, "GLD1B_D" , imm0_31>; 1360 defm : sve_masked_gather_x2_unscaled<nxv2i64, sext_masked_gather_i8_signed_unscaled, "GLD1SB_D", imm0_31>; 1361 defm : sve_masked_gather_x2_unscaled<nxv2i64, azext_masked_gather_i16_signed_unscaled, "GLD1H_D", uimm5s2>; 1362 defm : sve_masked_gather_x2_unscaled<nxv2i64, sext_masked_gather_i16_signed_unscaled, "GLD1SH_D", uimm5s2>; 1363 defm : sve_masked_gather_x2_unscaled<nxv2i64, azext_masked_gather_i32_signed_unscaled, "GLD1W_D", uimm5s4>; 1364 defm : sve_masked_gather_x2_unscaled<nxv2i64, sext_masked_gather_i32_signed_unscaled, "GLD1SW_D", uimm5s4>; 1365 defm : sve_masked_gather_x2_unscaled<nxv2i64, nonext_masked_gather_signed_unscaled, "GLD1D", uimm5s8>; 1366 defm : sve_masked_gather_x2_unscaled<nxv2f16, nonext_masked_gather_signed_unscaled, "GLD1H_D", uimm5s2>; 1367 defm : sve_masked_gather_x2_unscaled<nxv2f32, nonext_masked_gather_signed_unscaled, "GLD1W_D", uimm5s4>; 1368 defm : sve_masked_gather_x2_unscaled<nxv2f64, nonext_masked_gather_signed_unscaled, "GLD1D", uimm5s8>; 1369 defm : sve_masked_gather_x2_unscaled<nxv2bf16, nonext_masked_gather_signed_unscaled, "GLD1H_D", uimm5s2>; 1370 1371 defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_signed_scaled, GLD1H_S_SXTW_SCALED>; 1372 defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_signed_scaled, GLD1SH_S_SXTW_SCALED>; 1373 defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_signed_scaled, GLD1W_SXTW_SCALED>; 1374 defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_signed_scaled, GLD1H_S_SXTW_SCALED>; 1375 defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_signed_scaled, GLD1W_SXTW_SCALED>; 1376 defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_signed_scaled, GLD1H_S_SXTW_SCALED>; 1377 1378 defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i8_signed_unscaled, GLD1B_S_SXTW>; 1379 defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i8_signed_unscaled, GLD1SB_S_SXTW>; 1380 defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_signed_unscaled, GLD1H_S_SXTW>; 1381 defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_signed_unscaled, GLD1SH_S_SXTW>; 1382 defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_signed_unscaled, GLD1W_SXTW>; 1383 defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_signed_unscaled, GLD1H_S_SXTW>; 1384 defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_signed_unscaled, GLD1W_SXTW>; 1385 defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_signed_unscaled, GLD1H_S_SXTW>; 1386 1387 defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_unsigned_scaled, GLD1H_S_UXTW_SCALED>; 1388 defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_unsigned_scaled, GLD1SH_S_UXTW_SCALED>; 1389 defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_unsigned_scaled, GLD1W_UXTW_SCALED>; 1390 defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_unsigned_scaled, GLD1H_S_UXTW_SCALED>; 1391 defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_unsigned_scaled, GLD1W_UXTW_SCALED>; 1392 defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_unsigned_scaled, GLD1H_S_UXTW_SCALED>; 1393 1394 defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i8_unsigned_unscaled, GLD1B_S_UXTW>; 1395 defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i8_unsigned_unscaled, GLD1SB_S_UXTW>; 1396 defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_unsigned_unscaled, GLD1H_S_UXTW>; 1397 defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_unsigned_unscaled, GLD1SH_S_UXTW>; 1398 defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_unsigned_unscaled, GLD1W_UXTW>; 1399 defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_unsigned_unscaled, GLD1H_S_UXTW>; 1400 defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_unsigned_unscaled, GLD1W_UXTW>; 1401 defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_unsigned_unscaled, GLD1H_S_UXTW>; 1402} // End HasSVE 1403 1404let Predicates = [HasSVE_or_SME] in { 1405 // Non-temporal contiguous loads (register + immediate) 1406 defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; 1407 defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>; 1408 defm LDNT1W_ZRI : sve_mem_cldnt_si<0b10, "ldnt1w", Z_s, ZPR32>; 1409 defm LDNT1D_ZRI : sve_mem_cldnt_si<0b11, "ldnt1d", Z_d, ZPR64>; 1410 1411 // Non-temporal contiguous loads (register + register) 1412 defm LDNT1B_ZRR : sve_mem_cldnt_ss<0b00, "ldnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; 1413 defm LDNT1H_ZRR : sve_mem_cldnt_ss<0b01, "ldnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; 1414 defm LDNT1W_ZRR : sve_mem_cldnt_ss<0b10, "ldnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; 1415 defm LDNT1D_ZRR : sve_mem_cldnt_ss<0b11, "ldnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; 1416 1417 // contiguous store with immediates 1418 defm ST1B_IMM : sve_mem_cst_si<0b00, 0b00, "st1b", Z_b, ZPR8>; 1419 defm ST1B_H_IMM : sve_mem_cst_si<0b00, 0b01, "st1b", Z_h, ZPR16>; 1420 defm ST1B_S_IMM : sve_mem_cst_si<0b00, 0b10, "st1b", Z_s, ZPR32>; 1421 defm ST1B_D_IMM : sve_mem_cst_si<0b00, 0b11, "st1b", Z_d, ZPR64>; 1422 defm ST1H_IMM : sve_mem_cst_si<0b01, 0b01, "st1h", Z_h, ZPR16>; 1423 defm ST1H_S_IMM : sve_mem_cst_si<0b01, 0b10, "st1h", Z_s, ZPR32>; 1424 defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>; 1425 defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>; 1426 defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>; 1427 let Predicates = [HasSVE2p1] in { 1428 defm ST1W_Q_IMM : sve_mem_cst_si<0b10, 0b00, "st1w", Z_q, ZPR128>; 1429 } 1430 defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>; 1431 let Predicates = [HasSVE2p1] in { 1432 defm ST1D_Q_IMM : sve_mem_cst_si<0b11, 0b10, "st1d", Z_q, ZPR128>; 1433 } 1434 1435 // contiguous store with reg+reg addressing. 1436 defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>; 1437 defm ST1B_H : sve_mem_cst_ss<0b0001, "st1b", Z_h, ZPR16, GPR64NoXZRshifted8>; 1438 defm ST1B_S : sve_mem_cst_ss<0b0010, "st1b", Z_s, ZPR32, GPR64NoXZRshifted8>; 1439 defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>; 1440 defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>; 1441 defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>; 1442 defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>; 1443 defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; 1444 defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; 1445 let Predicates = [HasSVE2p1] in { 1446 defm ST1W_Q : sve_mem_cst_ss<0b1000, "st1w", Z_q, ZPR128, GPR64NoXZRshifted32>; 1447 } 1448 defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; 1449 let Predicates = [HasSVE2p1] in { 1450 defm ST1D_Q : sve_mem_cst_ss<0b1110, "st1d", Z_q, ZPR128, GPR64NoXZRshifted64>; 1451 } 1452 1453 multiclass sve_ld1q_pat<ValueType Ty, ValueType PredTy, SDPatternOperator Load1qOp, Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> { 1454 let AddedComplexity = 2 in { 1455 def _reg_imm : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$imm))), 1456 (RegImmInst PPR3bAny:$Pg, GPR64sp:$base, simm4s1:$imm)>; 1457 } 1458 1459 let AddedComplexity = 1 in { 1460 def _reg_reg : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (AddrCP GPR64sp:$base, GPR64:$offset))), 1461 (RegRegInst PPR3bAny:$Pg, GPR64sp:$base, GPR64:$offset)>; 1462 } 1463 1464 def _default : Pat<(Ty (Load1qOp (PredTy PPR3bAny:$Pg), (i64 GPR64sp:$base))), 1465 (RegImmInst PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; 1466 } 1467 1468 multiclass sve_st1q_pat<ValueType DataType, ValueType PredTy, SDPatternOperator Store1qOp, Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> { 1469 let AddedComplexity = 2 in { 1470 def _reg_imm : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$imm)), 1471 (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, simm4s1:$imm)>; 1472 } 1473 1474 let AddedComplexity = 1 in { 1475 def _reg_reg : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (AddrCP GPR64sp:$base, GPR64:$offset)), 1476 (RegRegInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, GPR64:$offset)>; 1477 } 1478 1479 def _default : Pat<(Store1qOp (DataType ZPR128:$Zt), (PredTy PPR3bAny:$Pg), (i64 GPR64sp:$base)), 1480 (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; 1481 } 1482 1483 // ld1quw/st1qw 1484 defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; 1485 defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; 1486 defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; 1487 defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; 1488 1489 // ld1qud/st1qd 1490 defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; 1491 defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; 1492 defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; 1493 defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; 1494 1495} // End HasSVE_or_SME 1496 1497let Predicates = [HasSVE] in { 1498 // Scatters using unpacked, unscaled 32-bit offsets, e.g. 1499 // st1h z0.d, p0, [x0, z0.d, uxtw] 1500 defm SST1B_D : sve_mem_64b_sst_sv_32_unscaled<0b000, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; 1501 defm SST1H_D : sve_mem_64b_sst_sv_32_unscaled<0b010, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; 1502 defm SST1W_D : sve_mem_64b_sst_sv_32_unscaled<0b100, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; 1503 defm SST1D : sve_mem_64b_sst_sv_32_unscaled<0b110, "st1d", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; 1504 1505 // Scatters using packed, unscaled 32-bit offsets, e.g. 1506 // st1h z0.s, p0, [x0, z0.s, uxtw] 1507 defm SST1B_S : sve_mem_32b_sst_sv_32_unscaled<0b001, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; 1508 defm SST1H_S : sve_mem_32b_sst_sv_32_unscaled<0b011, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; 1509 defm SST1W : sve_mem_32b_sst_sv_32_unscaled<0b101, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; 1510 1511 // Scatters using packed, scaled 32-bit offsets, e.g. 1512 // st1h z0.s, p0, [x0, z0.s, uxtw #1] 1513 defm SST1H_S : sve_mem_32b_sst_sv_32_scaled<0b011, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; 1514 defm SST1W : sve_mem_32b_sst_sv_32_scaled<0b101, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; 1515 1516 // Scatters using unpacked, scaled 32-bit offsets, e.g. 1517 // st1h z0.d, p0, [x0, z0.d, uxtw #1] 1518 defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; 1519 defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; 1520 defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; 1521 1522 // Scatters using 32/64-bit pointers with offset, e.g. 1523 // st1h z0.s, p0, [z0.s, #16] 1524 defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", imm0_31, AArch64st1_scatter_imm, nxv4i8>; 1525 defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv4i16>; 1526 defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv4i32>; 1527 1528 // Scatters using 32/64-bit pointers with offset, e.g. 1529 // st1h z0.d, p0, [z0.d, #16] 1530 defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", imm0_31, AArch64st1_scatter_imm, nxv2i8>; 1531 defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv2i16>; 1532 defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv2i32>; 1533 defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", uimm5s8, AArch64st1_scatter_imm, nxv2i64>; 1534 1535 // Scatters using unscaled 64-bit offsets, e.g. 1536 // st1h z0.d, p0, [x0, z0.d] 1537 defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>; 1538 defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>; 1539 defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>; 1540 defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>; 1541 let Predicates = [HasSVE2p1] in { 1542 defm SST1Q : sve_mem_sst_128b_64_unscaled<"st1q", AArch64st1q_scatter>; 1543 } 1544 1545 // Scatters using scaled 64-bit offsets, e.g. 1546 // st1h z0.d, p0, [x0, z0.d, lsl #1] 1547 defm SST1H_D : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>; 1548 defm SST1W_D : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>; 1549 defm SST1D : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>; 1550 1551 multiclass sve_masked_scatter_x2_scaled<ValueType Ty, SDPatternOperator Store, string Inst> { 1552 // base + vector of scaled offsets 1553 def : Pat<(Store Ty:$data, nxv2i1:$gp, GPR64:$base, nxv2i64:$offs), 1554 (!cast<Instruction>(Inst # _SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; 1555 // base + vector of signed 32bit scaled offsets 1556 def : Pat<(Store Ty:$data, nxv2i1:$gp, GPR64:$base, (sext_inreg nxv2i64:$offs, nxv2i32)), 1557 (!cast<Instruction>(Inst # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; 1558 // base + vector of unsigned 32bit scaled offsets 1559 def : Pat<(Store Ty:$data, nxv2i1:$gp, GPR64:$base, (and nxv2i64:$offs, (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))), 1560 (!cast<Instruction>(Inst # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; 1561 } 1562 1563 multiclass sve_masked_scatter_x2_unscaled<ValueType Ty, SDPatternOperator Store, string Inst, Operand ImmTy> { 1564 // vector of pointers + immediate offset (includes zero) 1565 def : Pat<(Store Ty:$data, nxv2i1:$gp, (i64 ImmTy:$imm), nxv2i64:$ptrs), 1566 (!cast<Instruction>(Inst # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, ImmTy:$imm)>; 1567 // base + vector of offsets 1568 def : Pat<(Store Ty:$data, nxv2i1:$gp, GPR64:$base, nxv2i64:$offs), 1569 (!cast<Instruction>(Inst) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; 1570 // base + vector of signed 32bit offsets 1571 def : Pat<(Store Ty:$data, nxv2i1:$gp, GPR64:$base, (sext_inreg nxv2i64:$offs, nxv2i32)), 1572 (!cast<Instruction>(Inst # _SXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; 1573 // base + vector of unsigned 32bit offsets 1574 def : Pat<(Store Ty:$data, nxv2i1:$gp, GPR64:$base, (and nxv2i64:$offs, (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))), 1575 (!cast<Instruction>(Inst # _UXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; 1576 } 1577 1578 multiclass sve_masked_scatter_x4<ValueType Ty, SDPatternOperator Store, Instruction Inst> { 1579 def : Pat<(Store Ty:$data, nxv4i1:$gp, GPR64:$base, nxv4i32:$offs), 1580 (Inst ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; 1581 } 1582 1583 defm : sve_masked_scatter_x2_scaled<nxv2i64, trunc_masked_scatter_i16_signed_scaled, "SST1H_D">; 1584 defm : sve_masked_scatter_x2_scaled<nxv2i64, trunc_masked_scatter_i32_signed_scaled, "SST1W_D">; 1585 defm : sve_masked_scatter_x2_scaled<nxv2i64, nontrunc_masked_scatter_signed_scaled, "SST1D">; 1586 defm : sve_masked_scatter_x2_scaled<nxv2f16, nontrunc_masked_scatter_signed_scaled, "SST1H_D">; 1587 defm : sve_masked_scatter_x2_scaled<nxv2f32, nontrunc_masked_scatter_signed_scaled, "SST1W_D">; 1588 defm : sve_masked_scatter_x2_scaled<nxv2f64, nontrunc_masked_scatter_signed_scaled, "SST1D">; 1589 defm : sve_masked_scatter_x2_scaled<nxv2bf16, nontrunc_masked_scatter_signed_scaled, "SST1H_D">; 1590 1591 defm : sve_masked_scatter_x2_unscaled<nxv2i64, trunc_masked_scatter_i8_signed_unscaled, "SST1B_D" , imm0_31>; 1592 defm : sve_masked_scatter_x2_unscaled<nxv2i64, trunc_masked_scatter_i16_signed_unscaled, "SST1H_D", uimm5s2>; 1593 defm : sve_masked_scatter_x2_unscaled<nxv2i64, trunc_masked_scatter_i32_signed_unscaled, "SST1W_D", uimm5s4>; 1594 defm : sve_masked_scatter_x2_unscaled<nxv2i64, nontrunc_masked_scatter_signed_unscaled, "SST1D", uimm5s8>; 1595 defm : sve_masked_scatter_x2_unscaled<nxv2f16, nontrunc_masked_scatter_signed_unscaled, "SST1H_D", uimm5s2>; 1596 defm : sve_masked_scatter_x2_unscaled<nxv2f32, nontrunc_masked_scatter_signed_unscaled, "SST1W_D", uimm5s4>; 1597 defm : sve_masked_scatter_x2_unscaled<nxv2f64, nontrunc_masked_scatter_signed_unscaled, "SST1D", uimm5s8>; 1598 defm : sve_masked_scatter_x2_unscaled<nxv2bf16, nontrunc_masked_scatter_signed_unscaled, "SST1H_D", uimm5s2>; 1599 1600 defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_signed_scaled, SST1H_S_SXTW_SCALED>; 1601 defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_signed_scaled, SST1W_SXTW_SCALED>; 1602 defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_signed_scaled, SST1H_S_SXTW_SCALED>; 1603 defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_signed_scaled, SST1W_SXTW_SCALED>; 1604 defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_signed_scaled, SST1H_S_SXTW_SCALED>; 1605 1606 defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i8_signed_unscaled, SST1B_S_SXTW>; 1607 defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_signed_unscaled, SST1H_S_SXTW>; 1608 defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_signed_unscaled, SST1W_SXTW>; 1609 defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_signed_unscaled, SST1H_S_SXTW>; 1610 defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_signed_unscaled, SST1W_SXTW>; 1611 defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_signed_unscaled, SST1H_S_SXTW>; 1612 1613 defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_unsigned_scaled, SST1H_S_UXTW_SCALED>; 1614 defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_unsigned_scaled, SST1W_UXTW_SCALED>; 1615 defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_unsigned_scaled, SST1H_S_UXTW_SCALED>; 1616 defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_unsigned_scaled, SST1W_UXTW_SCALED>; 1617 defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_unsigned_scaled, SST1H_S_UXTW_SCALED>; 1618 1619 defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i8_unsigned_unscaled, SST1B_S_UXTW>; 1620 defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_unsigned_unscaled, SST1H_S_UXTW>; 1621 defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_unsigned_unscaled, SST1W_UXTW>; 1622 defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_unsigned_unscaled, SST1H_S_UXTW>; 1623 defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_unsigned_unscaled, SST1W_UXTW>; 1624 defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_unsigned_unscaled, SST1H_S_UXTW>; 1625} // End HasSVE 1626 1627let Predicates = [HasSVE_or_SME] in { 1628 // ST(2|3|4) structured stores (register + immediate) 1629 defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>; 1630 defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>; 1631 defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>; 1632 defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>; 1633 defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>; 1634 defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>; 1635 defm ST2W_IMM : sve_mem_est_si<0b10, 0b01, ZZ_s, "st2w", simm4s2>; 1636 defm ST3W_IMM : sve_mem_est_si<0b10, 0b10, ZZZ_s, "st3w", simm4s3>; 1637 defm ST4W_IMM : sve_mem_est_si<0b10, 0b11, ZZZZ_s, "st4w", simm4s4>; 1638 defm ST2D_IMM : sve_mem_est_si<0b11, 0b01, ZZ_d, "st2d", simm4s2>; 1639 defm ST3D_IMM : sve_mem_est_si<0b11, 0b10, ZZZ_d, "st3d", simm4s3>; 1640 defm ST4D_IMM : sve_mem_est_si<0b11, 0b11, ZZZZ_d, "st4d", simm4s4>; 1641 let Predicates = [HasSVE2p1_or_SME2p1] in { 1642 defm ST2Q_IMM : sve_mem_128b_est_si<0b01, ZZ_q, "st2q", simm4s2>; 1643 defm ST3Q_IMM : sve_mem_128b_est_si<0b10, ZZZ_q, "st3q", simm4s3>; 1644 defm ST4Q_IMM : sve_mem_128b_est_si<0b11, ZZZZ_q, "st4q", simm4s4>; 1645 } 1646 1647 // ST(2|3|4) structured stores (register + register) 1648 def ST2B : sve_mem_est_ss<0b00, 0b01, ZZ_b, "st2b", GPR64NoXZRshifted8>; 1649 def ST3B : sve_mem_est_ss<0b00, 0b10, ZZZ_b, "st3b", GPR64NoXZRshifted8>; 1650 def ST4B : sve_mem_est_ss<0b00, 0b11, ZZZZ_b, "st4b", GPR64NoXZRshifted8>; 1651 def ST2H : sve_mem_est_ss<0b01, 0b01, ZZ_h, "st2h", GPR64NoXZRshifted16>; 1652 def ST3H : sve_mem_est_ss<0b01, 0b10, ZZZ_h, "st3h", GPR64NoXZRshifted16>; 1653 def ST4H : sve_mem_est_ss<0b01, 0b11, ZZZZ_h, "st4h", GPR64NoXZRshifted16>; 1654 def ST2W : sve_mem_est_ss<0b10, 0b01, ZZ_s, "st2w", GPR64NoXZRshifted32>; 1655 def ST3W : sve_mem_est_ss<0b10, 0b10, ZZZ_s, "st3w", GPR64NoXZRshifted32>; 1656 def ST4W : sve_mem_est_ss<0b10, 0b11, ZZZZ_s, "st4w", GPR64NoXZRshifted32>; 1657 def ST2D : sve_mem_est_ss<0b11, 0b01, ZZ_d, "st2d", GPR64NoXZRshifted64>; 1658 def ST3D : sve_mem_est_ss<0b11, 0b10, ZZZ_d, "st3d", GPR64NoXZRshifted64>; 1659 def ST4D : sve_mem_est_ss<0b11, 0b11, ZZZZ_d, "st4d", GPR64NoXZRshifted64>; 1660 let Predicates = [HasSVE2p1_or_SME2p1] in { 1661 def ST2Q : sve_mem_128b_est_ss<0b01, ZZ_q, "st2q", GPR64NoXZRshifted128>; 1662 def ST3Q : sve_mem_128b_est_ss<0b10, ZZZ_q, "st3q", GPR64NoXZRshifted128>; 1663 def ST4Q : sve_mem_128b_est_ss<0b11, ZZZZ_q, "st4q", GPR64NoXZRshifted128>; 1664 } 1665 // Non-temporal contiguous stores (register + immediate) 1666 defm STNT1B_ZRI : sve_mem_cstnt_si<0b00, "stnt1b", Z_b, ZPR8>; 1667 defm STNT1H_ZRI : sve_mem_cstnt_si<0b01, "stnt1h", Z_h, ZPR16>; 1668 defm STNT1W_ZRI : sve_mem_cstnt_si<0b10, "stnt1w", Z_s, ZPR32>; 1669 defm STNT1D_ZRI : sve_mem_cstnt_si<0b11, "stnt1d", Z_d, ZPR64>; 1670 1671 // Non-temporal contiguous stores (register + register) 1672 defm STNT1B_ZRR : sve_mem_cstnt_ss<0b00, "stnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; 1673 defm STNT1H_ZRR : sve_mem_cstnt_ss<0b01, "stnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; 1674 defm STNT1W_ZRR : sve_mem_cstnt_ss<0b10, "stnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; 1675 defm STNT1D_ZRR : sve_mem_cstnt_ss<0b11, "stnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; 1676 1677 // Fill/Spill 1678 defm LDR_ZXI : sve_mem_z_fill<"ldr">; 1679 defm LDR_PXI : sve_mem_p_fill<"ldr">; 1680 defm STR_ZXI : sve_mem_z_spill<"str">; 1681 defm STR_PXI : sve_mem_p_spill<"str">; 1682 1683 // Contiguous prefetch (register + immediate) 1684 defm PRFB_PRI : sve_mem_prfm_si<0b00, "prfb">; 1685 defm PRFH_PRI : sve_mem_prfm_si<0b01, "prfh">; 1686 defm PRFW_PRI : sve_mem_prfm_si<0b10, "prfw">; 1687 defm PRFD_PRI : sve_mem_prfm_si<0b11, "prfd">; 1688 1689 // Contiguous prefetch (register + register) 1690 def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>; 1691 def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>; 1692 def PRFW_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>; 1693 def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>; 1694 1695 multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, ComplexPattern AddrCP> { 1696 // reg + imm 1697 let AddedComplexity = 2 in { 1698 def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)), 1699 (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, simm6s1:$offset)>; 1700 } 1701 1702 // reg + reg 1703 let AddedComplexity = 1 in { 1704 def _reg_reg : Pat<(prefetch (PredTy PPR_3b:$gp), (AddrCP GPR64sp:$base, GPR64:$index), (i32 sve_prfop:$prfop)), 1705 (RegRegInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, GPR64:$index)>; 1706 } 1707 1708 // default fallback 1709 def _default : Pat<(prefetch (PredTy PPR_3b:$gp), GPR64:$base, (i32 sve_prfop:$prfop)), 1710 (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, (i64 0))>; 1711 } 1712 1713 defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, am_sve_regreg_lsl0>; 1714 defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, am_sve_regreg_lsl1>; 1715 defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFW_PRR, am_sve_regreg_lsl2>; 1716 defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, am_sve_regreg_lsl3>; 1717} // End HasSVE_or_SME 1718 1719let Predicates = [HasSVE] in { 1720 // Gather prefetch using scaled 32-bit offsets, e.g. 1721 // prfh pldl1keep, p0, [x0, z0.s, uxtw #1] 1722 defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>; 1723 defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16, int_aarch64_sve_prfh_gather_sxtw_index, int_aarch64_sve_prfh_gather_uxtw_index>; 1724 defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32, int_aarch64_sve_prfw_gather_sxtw_index, int_aarch64_sve_prfw_gather_uxtw_index>; 1725 defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64, int_aarch64_sve_prfd_gather_sxtw_index, int_aarch64_sve_prfd_gather_uxtw_index>; 1726 1727 // Gather prefetch using unpacked, scaled 32-bit offsets, e.g. 1728 // prfh pldl1keep, p0, [x0, z0.d, uxtw #1] 1729 defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>; 1730 defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16, int_aarch64_sve_prfh_gather_sxtw_index, int_aarch64_sve_prfh_gather_uxtw_index>; 1731 defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32, int_aarch64_sve_prfw_gather_sxtw_index, int_aarch64_sve_prfw_gather_uxtw_index>; 1732 defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64, int_aarch64_sve_prfd_gather_sxtw_index, int_aarch64_sve_prfd_gather_uxtw_index>; 1733 1734 // Gather prefetch using scaled 64-bit offsets, e.g. 1735 // prfh pldl1keep, p0, [x0, z0.d, lsl #1] 1736 defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8, int_aarch64_sve_prfb_gather_index>; 1737 defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16, int_aarch64_sve_prfh_gather_index>; 1738 defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32, int_aarch64_sve_prfw_gather_index>; 1739 defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64, int_aarch64_sve_prfd_gather_index>; 1740 1741 // Gather prefetch using 32/64-bit pointers with offset, e.g. 1742 // prfh pldl1keep, p0, [z0.s, #16] 1743 // prfh pldl1keep, p0, [z0.d, #16] 1744 defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_prfb_gather_scalar_offset>; 1745 defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_prfh_gather_scalar_offset>; 1746 defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_prfw_gather_scalar_offset>; 1747 defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_prfd_gather_scalar_offset>; 1748 1749 defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_prfb_gather_scalar_offset>; 1750 defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_prfh_gather_scalar_offset>; 1751 defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_prfw_gather_scalar_offset>; 1752 defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_prfd_gather_scalar_offset>; 1753 1754 defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">; 1755 defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">; 1756 defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">; 1757 defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">; 1758 1759 def : Pat<(nxv4i32 (int_aarch64_sve_adrb nxv4i32:$Op1, nxv4i32:$Op2)), 1760 (ADR_LSL_ZZZ_S_0 $Op1, $Op2)>; 1761 def : Pat<(nxv4i32 (int_aarch64_sve_adrh nxv4i32:$Op1, nxv4i32:$Op2)), 1762 (ADR_LSL_ZZZ_S_1 $Op1, $Op2)>; 1763 def : Pat<(nxv4i32 (int_aarch64_sve_adrw nxv4i32:$Op1, nxv4i32:$Op2)), 1764 (ADR_LSL_ZZZ_S_2 $Op1, $Op2)>; 1765 def : Pat<(nxv4i32 (int_aarch64_sve_adrd nxv4i32:$Op1, nxv4i32:$Op2)), 1766 (ADR_LSL_ZZZ_S_3 $Op1, $Op2)>; 1767 1768 def : Pat<(nxv2i64 (int_aarch64_sve_adrb nxv2i64:$Op1, nxv2i64:$Op2)), 1769 (ADR_LSL_ZZZ_D_0 $Op1, $Op2)>; 1770 def : Pat<(nxv2i64 (int_aarch64_sve_adrh nxv2i64:$Op1, nxv2i64:$Op2)), 1771 (ADR_LSL_ZZZ_D_1 $Op1, $Op2)>; 1772 def : Pat<(nxv2i64 (int_aarch64_sve_adrw nxv2i64:$Op1, nxv2i64:$Op2)), 1773 (ADR_LSL_ZZZ_D_2 $Op1, $Op2)>; 1774 def : Pat<(nxv2i64 (int_aarch64_sve_adrd nxv2i64:$Op1, nxv2i64:$Op2)), 1775 (ADR_LSL_ZZZ_D_3 $Op1, $Op2)>; 1776 1777 // Patterns to generate adr instruction. 1778 // adr z0.d, [z0.d, z0.d, uxtw] 1779 def : Pat<(add nxv2i64:$Op1, 1780 (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), 1781 (ADR_UXTW_ZZZ_D_0 $Op1, $Op2)>; 1782 // adr z0.d, [z0.d, z0.d, sxtw] 1783 def : Pat<(add nxv2i64:$Op1, 1784 (nxv2i64 (sext_inreg nxv2i64:$Op2, nxv2i32))), 1785 (ADR_SXTW_ZZZ_D_0 $Op1, $Op2)>; 1786 1787 // adr z0.s, [z0.s, z0.s, lsl #<shift>] 1788 // adr z0.d, [z0.d, z0.d, lsl #<shift>] 1789 multiclass adrShiftPat<ValueType Ty, ValueType PredTy, ValueType ShiftTy, Instruction DestAdrIns, int ShiftAmt> { 1790 def : Pat<(add Ty:$Op1, 1791 (Ty (AArch64lsl_p (PredTy (SVEAllActive)), 1792 Ty:$Op2, 1793 (Ty (splat_vector (ShiftTy ShiftAmt)))))), 1794 (DestAdrIns $Op1, $Op2)>; 1795 } 1796 defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_1, 1>; 1797 defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_2, 2>; 1798 defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_3, 3>; 1799 defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_1, 1>; 1800 defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_2, 2>; 1801 defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_3, 3>; 1802 1803 // adr z0.d, [z0.d, z0.d, uxtw #<shift>] 1804 // adr z0.d, [z0.d, z0.d, sxtw #<shift>] 1805 multiclass adrXtwShiftPat<ValueType Ty, ValueType PredTy, int ShiftAmt> { 1806 def : Pat<(add Ty:$Op1, 1807 (Ty (AArch64lsl_p (PredTy (SVEAllActive)), 1808 (Ty (and Ty:$Op2, (Ty (splat_vector (i64 0xFFFFFFFF))))), 1809 (Ty (splat_vector (i64 ShiftAmt)))))), 1810 (!cast<Instruction>("ADR_UXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; 1811 1812 def : Pat<(add Ty:$Op1, 1813 (Ty (AArch64lsl_p (PredTy (SVEAllActive)), 1814 (Ty (sext_inreg Ty:$Op2, nxv2i32)), 1815 (Ty (splat_vector (i64 ShiftAmt)))))), 1816 (!cast<Instruction>("ADR_SXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; 1817 } 1818 defm : adrXtwShiftPat<nxv2i64, nxv2i1, 1>; 1819 defm : adrXtwShiftPat<nxv2i64, nxv2i1, 2>; 1820 defm : adrXtwShiftPat<nxv2i64, nxv2i1, 3>; 1821} // End HasSVE 1822 1823let Predicates = [HasSVE_or_SME] in { 1824 defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>; 1825 1826 defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>; 1827 defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2", AArch64zip2>; 1828 defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1", AArch64uzp1>; 1829 defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2", AArch64uzp2>; 1830 defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>; 1831 defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>; 1832 1833 defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1, int_aarch64_sve_zip1_b16, int_aarch64_sve_zip1_b32, int_aarch64_sve_zip1_b64>; 1834 defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2, int_aarch64_sve_zip2_b16, int_aarch64_sve_zip2_b32, int_aarch64_sve_zip2_b64>; 1835 defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1, int_aarch64_sve_uzp1_b16, int_aarch64_sve_uzp1_b32, int_aarch64_sve_uzp1_b64>; 1836 defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2, int_aarch64_sve_uzp2_b16, int_aarch64_sve_uzp2_b32, int_aarch64_sve_uzp2_b64>; 1837 defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1, int_aarch64_sve_trn1_b16, int_aarch64_sve_trn1_b32, int_aarch64_sve_trn1_b64>; 1838 defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2, int_aarch64_sve_trn2_b16, int_aarch64_sve_trn2_b32, int_aarch64_sve_trn2_b64>; 1839 1840 // Extract lo/hi halves of legal predicate types. 1841 def : Pat<(nxv1i1 (extract_subvector nxv2i1:$Ps, (i64 0))), 1842 (PUNPKLO_PP PPR:$Ps)>; 1843 def : Pat<(nxv1i1 (extract_subvector nxv2i1:$Ps, (i64 1))), 1844 (PUNPKHI_PP PPR:$Ps)>; 1845 def : Pat<(nxv2i1 (extract_subvector nxv4i1:$Ps, (i64 0))), 1846 (PUNPKLO_PP PPR:$Ps)>; 1847 def : Pat<(nxv2i1 (extract_subvector nxv4i1:$Ps, (i64 2))), 1848 (PUNPKHI_PP PPR:$Ps)>; 1849 def : Pat<(nxv4i1 (extract_subvector nxv8i1:$Ps, (i64 0))), 1850 (PUNPKLO_PP PPR:$Ps)>; 1851 def : Pat<(nxv4i1 (extract_subvector nxv8i1:$Ps, (i64 4))), 1852 (PUNPKHI_PP PPR:$Ps)>; 1853 def : Pat<(nxv8i1 (extract_subvector nxv16i1:$Ps, (i64 0))), 1854 (PUNPKLO_PP PPR:$Ps)>; 1855 def : Pat<(nxv8i1 (extract_subvector nxv16i1:$Ps, (i64 8))), 1856 (PUNPKHI_PP PPR:$Ps)>; 1857 1858 def : Pat<(nxv1i1 (extract_subvector nxv4i1:$Ps, (i64 0))), 1859 (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; 1860 def : Pat<(nxv1i1 (extract_subvector nxv4i1:$Ps, (i64 1))), 1861 (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; 1862 def : Pat<(nxv1i1 (extract_subvector nxv4i1:$Ps, (i64 2))), 1863 (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; 1864 def : Pat<(nxv1i1 (extract_subvector nxv4i1:$Ps, (i64 3))), 1865 (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; 1866 def : Pat<(nxv2i1 (extract_subvector nxv8i1:$Ps, (i64 0))), 1867 (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; 1868 def : Pat<(nxv2i1 (extract_subvector nxv8i1:$Ps, (i64 2))), 1869 (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; 1870 def : Pat<(nxv2i1 (extract_subvector nxv8i1:$Ps, (i64 4))), 1871 (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; 1872 def : Pat<(nxv2i1 (extract_subvector nxv8i1:$Ps, (i64 6))), 1873 (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; 1874 def : Pat<(nxv4i1 (extract_subvector nxv16i1:$Ps, (i64 0))), 1875 (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; 1876 def : Pat<(nxv4i1 (extract_subvector nxv16i1:$Ps, (i64 4))), 1877 (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; 1878 def : Pat<(nxv4i1 (extract_subvector nxv16i1:$Ps, (i64 8))), 1879 (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; 1880 def : Pat<(nxv4i1 (extract_subvector nxv16i1:$Ps, (i64 12))), 1881 (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; 1882 1883 1884 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 0))), 1885 (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; 1886 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 1))), 1887 (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; 1888 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 2))), 1889 (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; 1890 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 3))), 1891 (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; 1892 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 4))), 1893 (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; 1894 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 5))), 1895 (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; 1896 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 6))), 1897 (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; 1898 def : Pat<(nxv1i1 (extract_subvector nxv8i1:$Ps, (i64 7))), 1899 (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; 1900 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 0))), 1901 (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; 1902 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 2))), 1903 (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; 1904 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 4))), 1905 (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; 1906 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 6))), 1907 (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; 1908 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 8))), 1909 (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; 1910 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 10))), 1911 (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; 1912 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 12))), 1913 (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; 1914 def : Pat<(nxv2i1 (extract_subvector nxv16i1:$Ps, (i64 14))), 1915 (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; 1916 1917 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 0))), 1918 (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; 1919 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 1))), 1920 (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; 1921 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 2))), 1922 (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; 1923 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 3))), 1924 (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))))>; 1925 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 4))), 1926 (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; 1927 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 5))), 1928 (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; 1929 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 6))), 1930 (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; 1931 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 7))), 1932 (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))))>; 1933 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 8))), 1934 (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; 1935 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 9))), 1936 (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; 1937 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 10))), 1938 (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; 1939 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 11))), 1940 (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))))>; 1941 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 12))), 1942 (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; 1943 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 13))), 1944 (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; 1945 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 14))), 1946 (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; 1947 def : Pat<(nxv1i1 (extract_subvector nxv16i1:$Ps, (i64 15))), 1948 (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))))>; 1949 1950 // Extract subvectors from FP SVE vectors 1951 def : Pat<(nxv2f16 (extract_subvector nxv4f16:$Zs, (i64 0))), 1952 (UUNPKLO_ZZ_D ZPR:$Zs)>; 1953 def : Pat<(nxv2f16 (extract_subvector nxv4f16:$Zs, (i64 2))), 1954 (UUNPKHI_ZZ_D ZPR:$Zs)>; 1955 def : Pat<(nxv4f16 (extract_subvector nxv8f16:$Zs, (i64 0))), 1956 (UUNPKLO_ZZ_S ZPR:$Zs)>; 1957 def : Pat<(nxv4f16 (extract_subvector nxv8f16:$Zs, (i64 4))), 1958 (UUNPKHI_ZZ_S ZPR:$Zs)>; 1959 def : Pat<(nxv2f32 (extract_subvector nxv4f32:$Zs, (i64 0))), 1960 (UUNPKLO_ZZ_D ZPR:$Zs)>; 1961 def : Pat<(nxv2f32 (extract_subvector nxv4f32:$Zs, (i64 2))), 1962 (UUNPKHI_ZZ_D ZPR:$Zs)>; 1963 1964 def : Pat<(nxv2bf16 (extract_subvector nxv4bf16:$Zs, (i64 0))), 1965 (UUNPKLO_ZZ_D ZPR:$Zs)>; 1966 def : Pat<(nxv2bf16 (extract_subvector nxv4bf16:$Zs, (i64 2))), 1967 (UUNPKHI_ZZ_D ZPR:$Zs)>; 1968 def : Pat<(nxv4bf16 (extract_subvector nxv8bf16:$Zs, (i64 0))), 1969 (UUNPKLO_ZZ_S ZPR:$Zs)>; 1970 def : Pat<(nxv4bf16 (extract_subvector nxv8bf16:$Zs, (i64 4))), 1971 (UUNPKHI_ZZ_S ZPR:$Zs)>; 1972 1973 def : Pat<(nxv2f16 (extract_subvector nxv8f16:$Zs, (i64 0))), 1974 (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; 1975 def : Pat<(nxv2f16 (extract_subvector nxv8f16:$Zs, (i64 2))), 1976 (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; 1977 def : Pat<(nxv2f16 (extract_subvector nxv8f16:$Zs, (i64 4))), 1978 (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; 1979 def : Pat<(nxv2f16 (extract_subvector nxv8f16:$Zs, (i64 6))), 1980 (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; 1981 1982 def : Pat<(nxv2bf16 (extract_subvector nxv8bf16:$Zs, (i64 0))), 1983 (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; 1984 def : Pat<(nxv2bf16 (extract_subvector nxv8bf16:$Zs, (i64 2))), 1985 (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>; 1986 def : Pat<(nxv2bf16 (extract_subvector nxv8bf16:$Zs, (i64 4))), 1987 (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; 1988 def : Pat<(nxv2bf16 (extract_subvector nxv8bf16:$Zs, (i64 6))), 1989 (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>; 1990 1991 // Insert subvectors into FP SVE vectors. 1992 foreach VT = [nxv4f16, nxv4f32, nxv4bf16] in 1993 foreach idx = [0, 2] in 1994 def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.HalfLength:$src, (i64 idx))), 1995 (UZP1_ZZZ_S $src, $src)>; 1996 1997 foreach VT = [nxv8f16, nxv8bf16] in { 1998 foreach idx = [0, 4] in 1999 def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.HalfLength:$src, (i64 idx))), 2000 (UZP1_ZZZ_H $src, $src)>; 2001 2002 foreach idx = [0, 2, 4, 6] in 2003 def : Pat<(VT (vector_insert_subvec undef, SVEType<VT>.QuarterLength:$src, (i64 idx))), 2004 (UZP1_ZZZ_H (UZP1_ZZZ_H $src, $src), (UZP1_ZZZ_H $src, $src))>; 2005 } 2006 2007 // extract/insert 64-bit fixed length vector from/into a scalable vector 2008 foreach VT = [v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16] in { 2009 def : Pat<(VT (vector_extract_subvec NEONType<VT>.SVEContainer:$Zs, (i64 0))), 2010 (EXTRACT_SUBREG ZPR:$Zs, dsub)>; 2011 def : Pat<(NEONType<VT>.SVEContainer (vector_insert_subvec undef, (VT V64:$src), (i64 0))), 2012 (INSERT_SUBREG (IMPLICIT_DEF), $src, dsub)>; 2013 } 2014 2015 // extract/insert 128-bit fixed length vector from/into a scalable vector 2016 foreach VT = [v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16] in { 2017 def : Pat<(VT (vector_extract_subvec NEONType<VT>.SVEContainer:$Zs, (i64 0))), 2018 (EXTRACT_SUBREG ZPR:$Zs, zsub)>; 2019 def : Pat<(NEONType<VT>.SVEContainer (vector_insert_subvec undef, (VT V128:$src), (i64 0))), 2020 (INSERT_SUBREG (IMPLICIT_DEF), $src, zsub)>; 2021 } 2022 2023 // Concatenate two predicates. 2024 def : Pat<(nxv2i1 (concat_vectors nxv1i1:$p1, nxv1i1:$p2)), 2025 (UZP1_PPP_D $p1, $p2)>; 2026 def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), 2027 (UZP1_PPP_S $p1, $p2)>; 2028 def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)), 2029 (UZP1_PPP_H $p1, $p2)>; 2030 def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)), 2031 (UZP1_PPP_B $p1, $p2)>; 2032 2033 // Concatenate two floating point vectors. 2034 def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)), 2035 (UZP1_ZZZ_S $v1, $v2)>; 2036 def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)), 2037 (UZP1_ZZZ_H $v1, $v2)>; 2038 def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)), 2039 (UZP1_ZZZ_S $v1, $v2)>; 2040 def : Pat<(nxv4bf16 (concat_vectors nxv2bf16:$v1, nxv2bf16:$v2)), 2041 (UZP1_ZZZ_S $v1, $v2)>; 2042 def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)), 2043 (UZP1_ZZZ_H $v1, $v2)>; 2044 2045 // Splice with lane equal to -1 2046 def : Pat<(nxv16i8 (vector_splice nxv16i8:$Z1, nxv16i8:$Z2, (i64 -1))), 2047 (INSR_ZV_B ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), 2048 (LASTB_VPZ_B (PTRUE_B 31), ZPR:$Z1), bsub))>; 2049 def : Pat<(nxv8i16 (vector_splice nxv8i16:$Z1, nxv8i16:$Z2, (i64 -1))), 2050 (INSR_ZV_H ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), 2051 (LASTB_VPZ_H (PTRUE_H 31), ZPR:$Z1), hsub))>; 2052 def : Pat<(nxv4i32 (vector_splice nxv4i32:$Z1, nxv4i32:$Z2, (i64 -1))), 2053 (INSR_ZV_S ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), 2054 (LASTB_VPZ_S (PTRUE_S 31), ZPR:$Z1), ssub))>; 2055 def : Pat<(nxv2i64 (vector_splice nxv2i64:$Z1, nxv2i64:$Z2, (i64 -1))), 2056 (INSR_ZV_D ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF), 2057 (LASTB_VPZ_D (PTRUE_D 31), ZPR:$Z1), dsub))>; 2058 2059 // Splice with lane bigger or equal to 0 2060 foreach VT = [nxv16i8] in 2061 def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_255 i32:$index)))), 2062 (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; 2063 2064 foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in 2065 def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_127 i32:$index)))), 2066 (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; 2067 2068 foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in 2069 def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_63 i32:$index)))), 2070 (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; 2071 2072 foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in 2073 def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_31 i32:$index)))), 2074 (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>; 2075 2076 defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; 2077 defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; 2078 defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; 2079 defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", SETGT, SETLT>; 2080 defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", SETEQ, SETEQ>; 2081 defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", SETNE, SETNE>; 2082 2083 defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq", int_aarch64_sve_cmpeq_wide>; 2084 defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne", int_aarch64_sve_cmpne_wide>; 2085 defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge", int_aarch64_sve_cmpge_wide>; 2086 defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt", int_aarch64_sve_cmpgt_wide>; 2087 defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt", int_aarch64_sve_cmplt_wide>; 2088 defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple", int_aarch64_sve_cmple_wide>; 2089 defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs", int_aarch64_sve_cmphs_wide>; 2090 defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi", int_aarch64_sve_cmphi_wide>; 2091 defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo", int_aarch64_sve_cmplo_wide>; 2092 defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls", int_aarch64_sve_cmpls_wide>; 2093 2094 defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, SETLE>; 2095 defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, SETLT>; 2096 defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, SETGT>; 2097 defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, SETGE>; 2098 defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, SETEQ>; 2099 defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, SETEQ>; 2100 defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, SETULE>; 2101 defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, SETULT>; 2102 defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>; 2103 defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>; 2104 2105 defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; 2106 defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; 2107 defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; 2108 defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETUNE, SETNE, SETUNE, SETNE>; 2109 defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>; 2110 defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>; 2111 defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>; 2112 2113 defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>; 2114 defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>; 2115 defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>; 2116 defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>; 2117 defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>; 2118 defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>; 2119 2120 defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>; 2121 defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele, null_frag>; 2122 defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>; 2123 defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels, null_frag>; 2124 2125 defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>; 2126 defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele, null_frag>; 2127 defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>; 2128 defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels, null_frag>; 2129 2130 def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; 2131 def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>; 2132 def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>; 2133 def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>; 2134 2135 def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">; 2136 def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; 2137 def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; 2138 2139 defm CNTB_XPiI : sve_int_count<0b000, "cntb", int_aarch64_sve_cntb>; 2140 defm CNTH_XPiI : sve_int_count<0b010, "cnth", int_aarch64_sve_cnth>; 2141 defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>; 2142 defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>; 2143 defm CNTP_XPP : sve_int_pcount_pred<0b000, "cntp", int_aarch64_sve_cntp>; 2144 2145 def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)), 2146 (CNTP_XPP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op1), 2147 (BRKB_PPzP (PTRUE_B 31), PPR:$Op1))>; 2148 2149 def : Pat<(i64 (AArch64CttzElts nxv8i1:$Op1)), 2150 (CNTP_XPP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op1), 2151 (BRKB_PPzP (PTRUE_H 31), PPR:$Op1))>; 2152 2153 def : Pat<(i64 (AArch64CttzElts nxv4i1:$Op1)), 2154 (CNTP_XPP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op1), 2155 (BRKB_PPzP (PTRUE_S 31), PPR:$Op1))>; 2156 2157 def : Pat<(i64 (AArch64CttzElts nxv2i1:$Op1)), 2158 (CNTP_XPP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op1), 2159 (BRKB_PPzP (PTRUE_D 31), PPR:$Op1))>; 2160} 2161 2162 defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>; 2163 defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb", sub, int_aarch64_sve_cntb>; 2164 defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch", add, int_aarch64_sve_cnth>; 2165 defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech", sub, int_aarch64_sve_cnth>; 2166 defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw", add, int_aarch64_sve_cntw>; 2167 defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw", sub, int_aarch64_sve_cntw>; 2168 defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>; 2169 defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>; 2170 2171let Predicates = [HasSVE_or_SME] in { 2172 defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>; 2173 defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>; 2174 defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>; 2175 defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb", int_aarch64_sve_uqdecb_n32>; 2176 defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb", int_aarch64_sve_sqincb_n64>; 2177 defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb", int_aarch64_sve_uqincb_n64>; 2178 defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb", int_aarch64_sve_sqdecb_n64>; 2179 defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb", int_aarch64_sve_uqdecb_n64>; 2180 2181 defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch", int_aarch64_sve_sqinch_n32>; 2182 defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch", int_aarch64_sve_uqinch_n32>; 2183 defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech", int_aarch64_sve_sqdech_n32>; 2184 defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech", int_aarch64_sve_uqdech_n32>; 2185 defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch", int_aarch64_sve_sqinch_n64>; 2186 defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch", int_aarch64_sve_uqinch_n64>; 2187 defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech", int_aarch64_sve_sqdech_n64>; 2188 defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech", int_aarch64_sve_uqdech_n64>; 2189 2190 defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw", int_aarch64_sve_sqincw_n32>; 2191 defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw", int_aarch64_sve_uqincw_n32>; 2192 defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw", int_aarch64_sve_sqdecw_n32>; 2193 defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw", int_aarch64_sve_uqdecw_n32>; 2194 defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw", int_aarch64_sve_sqincw_n64>; 2195 defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw", int_aarch64_sve_uqincw_n64>; 2196 defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw", int_aarch64_sve_sqdecw_n64>; 2197 defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw", int_aarch64_sve_uqdecw_n64>; 2198 2199 defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd", int_aarch64_sve_sqincd_n32>; 2200 defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd", int_aarch64_sve_uqincd_n32>; 2201 defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd", int_aarch64_sve_sqdecd_n32>; 2202 defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd", int_aarch64_sve_uqdecd_n32>; 2203 defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd", int_aarch64_sve_sqincd_n64>; 2204 defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd", int_aarch64_sve_uqincd_n64>; 2205 defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd", int_aarch64_sve_sqdecd_n64>; 2206 defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd", int_aarch64_sve_uqdecd_n64>; 2207 2208 defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16, int_aarch64_sve_sqinch, nxv8i16>; 2209 defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16, int_aarch64_sve_uqinch, nxv8i16>; 2210 defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16, int_aarch64_sve_sqdech, nxv8i16>; 2211 defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16, int_aarch64_sve_uqdech, nxv8i16>; 2212 defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>; 2213 defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>; 2214 defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32, int_aarch64_sve_sqincw, nxv4i32>; 2215 defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32, int_aarch64_sve_uqincw, nxv4i32>; 2216 defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32, int_aarch64_sve_sqdecw, nxv4i32>; 2217 defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32, int_aarch64_sve_uqdecw, nxv4i32>; 2218 defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>; 2219 defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>; 2220 defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64, int_aarch64_sve_sqincd, nxv2i64>; 2221 defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64, int_aarch64_sve_uqincd, nxv2i64>; 2222 defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64, int_aarch64_sve_sqdecd, nxv2i64>; 2223 defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64, int_aarch64_sve_uqdecd, nxv2i64>; 2224 defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>; 2225 defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>; 2226 2227 defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp", int_aarch64_sve_sqincp_n32>; 2228 defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp", int_aarch64_sve_sqincp_n64>; 2229 defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp", int_aarch64_sve_uqincp_n32>; 2230 defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp", int_aarch64_sve_uqincp_n64>; 2231 defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp", int_aarch64_sve_sqdecp_n32>; 2232 defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp", int_aarch64_sve_sqdecp_n64>; 2233 defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp", int_aarch64_sve_uqdecp_n32>; 2234 defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp", int_aarch64_sve_uqdecp_n64>; 2235 defm INCP_XP : sve_int_count_r_x64<0b10000, "incp", null_frag, add>; 2236 defm DECP_XP : sve_int_count_r_x64<0b10100, "decp", null_frag, sub>; 2237 2238 defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp", int_aarch64_sve_sqincp>; 2239 defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp", int_aarch64_sve_uqincp>; 2240 defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp", int_aarch64_sve_sqdecp>; 2241 defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp", int_aarch64_sve_uqdecp>; 2242 defm INCP_ZP : sve_int_count_v<0b10000, "incp">; 2243 defm DECP_ZP : sve_int_count_v<0b10100, "decp">; 2244 2245 def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv16i1:$Op2)))), 2246 (INCP_XP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op2), GPR64:$Op1)>; 2247 2248 def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv16i1:$Op2))))), 2249 (EXTRACT_SUBREG (INCP_XP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op2), 2250 (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)), 2251 sub_32)>; 2252 2253 def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv8i1:$Op2)))), 2254 (INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2), GPR64:$Op1)>; 2255 2256 def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv8i1:$Op2))))), 2257 (EXTRACT_SUBREG (INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2), 2258 (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)), 2259 sub_32)>; 2260 2261 def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv4i1:$Op2)))), 2262 (INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2), GPR64:$Op1)>; 2263 2264 def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv4i1:$Op2))))), 2265 (EXTRACT_SUBREG (INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2), 2266 (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)), 2267 sub_32)>; 2268 2269 def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv2i1:$Op2)))), 2270 (INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2), GPR64:$Op1)>; 2271 2272 def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv2i1:$Op2))))), 2273 (EXTRACT_SUBREG (INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2), 2274 (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)), 2275 sub_32)>; 2276 2277 defm INDEX_RR : sve_int_index_rr<"index", AArch64mul_p_oneuse>; 2278 defm INDEX_IR : sve_int_index_ir<"index", AArch64mul_p, AArch64mul_p_oneuse>; 2279 defm INDEX_RI : sve_int_index_ri<"index">; 2280 defm INDEX_II : sve_int_index_ii<"index">; 2281 2282 // Unpredicated shifts 2283 defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; 2284 defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>; 2285 defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>; 2286 2287 defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr", int_aarch64_sve_asr_wide>; 2288 defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr", int_aarch64_sve_lsr_wide>; 2289 defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl", int_aarch64_sve_lsl_wide>; 2290 2291 // Predicated shifts 2292 defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>; 2293 defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>; 2294 defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>; 2295 defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_m1>; 2296 2297 defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; 2298 defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; 2299 defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>; 2300} // End HasSVE_or_SME 2301 2302let Predicates = [HasSVE_or_SME, UseExperimentalZeroingPseudos] in { 2303 defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>; 2304 defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>; 2305 defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>; 2306 defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>; 2307 2308 defm ASR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_asr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; 2309 defm LSR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; 2310 defm LSL_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsl, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>; 2311} // End HasSVE_or_SME, UseExperimentalZeroingPseudos 2312 2313let Predicates = [HasSVE_or_SME] in { 2314 defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; 2315 defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; 2316 defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; 2317 defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>; 2318 defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>; 2319 defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>; 2320 2321 defm ASR_ZPZZ : sve_int_bin_pred_bhsd<AArch64asr_p>; 2322 defm LSR_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsr_p>; 2323 defm LSL_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsl_p>; 2324 2325 defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; 2326 defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; 2327 defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; 2328 2329 defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>; 2330 defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>; 2331 defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; 2332 defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; 2333 defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; 2334 defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; 2335 defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; 2336 defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; 2337 defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; 2338 defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; 2339 defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>; 2340 defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>; 2341 defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>; 2342 defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>; 2343 defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; 2344 defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, null_frag, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; 2345 defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; 2346 defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; 2347 defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; 2348 defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; 2349 defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; 2350 defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; 2351 defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; 2352 defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; 2353 defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; 2354 defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; 2355 defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; 2356 defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; 2357 defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; 2358 defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; 2359 defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; 2360 defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; 2361 defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; 2362 defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; 2363 2364 //These patterns exist to improve the code quality of conversions on unpacked types. 2365 def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive:$Pg)), nxv2f16:$Zs, nxv2f32:$Zd)), 2366 (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2367 2368 // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. 2369 // This is ignored by the pattern below where it is matched by (i64 timm0_1) 2370 def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive:$Pg)), nxv2f32:$Zs, (i64 timm0_1), nxv2f16:$Zd)), 2371 (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2372 2373 def : Pat<(nxv4f32 (fpextend nxv4bf16:$op)), 2374 (LSL_ZZI_S $op, (i32 16))>; 2375 def : Pat<(nxv2f32 (fpextend nxv2bf16:$op)), 2376 (LSL_ZZI_S $op, (i32 16))>; 2377 2378 // Signed integer -> Floating-point 2379 def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), 2380 (sext_inreg nxv2i64:$Zs, nxv2i16), nxv2f16:$Zd)), 2381 (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2382 2383 def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg), 2384 (sext_inreg nxv4i32:$Zs, nxv4i16), nxv4f16:$Zd)), 2385 (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2386 2387 def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), 2388 (sext_inreg nxv2i64:$Zs, nxv2i32), nxv2f16:$Zd)), 2389 (SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2390 2391 def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), 2392 (sext_inreg nxv2i64:$Zs, nxv2i32), nxv2f32:$Zd)), 2393 (SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2394 2395 def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), 2396 (sext_inreg nxv2i64:$Zs, nxv2i32), nxv2f64:$Zd)), 2397 (SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2398 2399 // Unsigned integer -> Floating-point 2400 def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive:$Pg)), 2401 (and nxv2i64:$Zs, 2402 (nxv2i64 (splat_vector (i64 0xFFFF)))), nxv2f16:$Zd)), 2403 (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2404 2405 def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive:$Pg)), 2406 (and nxv2i64:$Zs, 2407 (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), nxv2f16:$Zd)), 2408 (UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2409 2410 def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive:$Pg)), 2411 (and nxv4i32:$Zs, 2412 (nxv4i32 (splat_vector (i32 0xFFFF)))), nxv4f16:$Zd)), 2413 (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2414 2415 def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive:$Pg)), 2416 (and nxv2i64:$Zs, 2417 (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), nxv2f32:$Zd)), 2418 (UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2419 2420 def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive:$Pg)), 2421 (and nxv2i64:$Zs, 2422 (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), nxv2f64:$Zd)), 2423 (UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 2424 2425 defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; 2426 defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>; 2427 defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>; 2428 defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>; 2429 defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>; 2430 defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>; 2431 defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>; 2432 defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>; 2433 defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>; 2434} // End HasSVE_or_SME 2435 2436let Predicates = [HasBF16, HasSVE_or_SME] in { 2437 defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; 2438 defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; 2439} // End HasBF16, HasSVE_or_SME 2440 2441let Predicates = [HasBF16, HasSVE] in { 2442 defm BFMMLA_ZZZ : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>; 2443} // End HasBF16, HasSVE 2444 2445let Predicates = [HasBF16, HasSVE_or_SME] in { 2446 defm BFMLALB_ZZZ : sve2_fp_mla_long<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb>; 2447 defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>; 2448 defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>; 2449 defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>; 2450 2451 defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>; 2452 defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>; 2453} // End HasBF16, HasSVE_or_SME 2454 2455let Predicates = [HasSVE_or_SME] in { 2456 // InstAliases 2457 def : InstAlias<"mov $Zd, $Zn", 2458 (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; 2459 def : InstAlias<"mov $Pd, $Pg/m, $Pn", 2460 (SEL_PPPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pd), 1>; 2461 def : InstAlias<"mov $Pd, $Pn", 2462 (ORR_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; 2463 def : InstAlias<"mov $Pd, $Pg/z, $Pn", 2464 (AND_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; 2465 2466 def : InstAlias<"movs $Pd, $Pn", 2467 (ORRS_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; 2468 def : InstAlias<"movs $Pd, $Pg/z, $Pn", 2469 (ANDS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; 2470 2471 def : InstAlias<"not $Pd, $Pg/z, $Pn", 2472 (EOR_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; 2473 2474 def : InstAlias<"nots $Pd, $Pg/z, $Pn", 2475 (EORS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; 2476 2477 def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", 2478 (CMPGE_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; 2479 def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", 2480 (CMPGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2481 def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", 2482 (CMPGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2483 def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", 2484 (CMPGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2485 2486 def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", 2487 (CMPHI_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; 2488 def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", 2489 (CMPHI_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2490 def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", 2491 (CMPHI_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2492 def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", 2493 (CMPHI_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2494 2495 def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", 2496 (CMPHS_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; 2497 def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", 2498 (CMPHS_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2499 def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", 2500 (CMPHS_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2501 def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", 2502 (CMPHS_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2503 2504 def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", 2505 (CMPGT_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; 2506 def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", 2507 (CMPGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2508 def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", 2509 (CMPGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2510 def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", 2511 (CMPGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2512 2513 def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", 2514 (FACGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2515 def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", 2516 (FACGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2517 def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", 2518 (FACGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2519 2520 def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", 2521 (FACGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2522 def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", 2523 (FACGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2524 def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", 2525 (FACGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2526 2527 def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", 2528 (FCMGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2529 def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", 2530 (FCMGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2531 def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", 2532 (FCMGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2533 2534 def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", 2535 (FCMGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; 2536 def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", 2537 (FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; 2538 def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", 2539 (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; 2540 2541 // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4. 2542 // These get expanded to individual LDR_ZXI/STR_ZXI instructions in 2543 // AArch64ExpandPseudoInsts. 2544 let mayLoad = 1, hasSideEffects = 0 in { 2545 def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2546 def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2547 def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2548 def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2549 } 2550 let mayStore = 1, hasSideEffects = 0 in { 2551 def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2552 def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2553 def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2554 def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; 2555 } 2556 2557 let AddedComplexity = 1 in { 2558 multiclass LD1RPat<ValueType vt, SDPatternOperator operator, 2559 Instruction load, Instruction ptrue, ValueType index_vt, ComplexPattern CP, Operand immtype> { 2560 def : Pat<(vt (splat_vector (index_vt (operator (CP GPR64:$base, immtype:$offset))))), 2561 (load (ptrue 31), GPR64:$base, $offset)>; 2562 def : Pat<(vt (AArch64dup_mt PPR:$pg, (index_vt (operator (CP GPR64:$base, immtype:$offset))), (SVEDup0Undef))), 2563 (load $pg, GPR64:$base, $offset)>; 2564 } 2565 } 2566 2567 // LDR1 of 8-bit data 2568 defm : LD1RPat<nxv16i8, extloadi8, LD1RB_IMM, PTRUE_B, i32, am_indexed8_6b, uimm6s1>; 2569 defm : LD1RPat<nxv8i16, zextloadi8, LD1RB_H_IMM, PTRUE_H, i32, am_indexed8_6b, uimm6s1>; 2570 defm : LD1RPat<nxv4i32, zextloadi8, LD1RB_S_IMM, PTRUE_S, i32, am_indexed8_6b, uimm6s1>; 2571 defm : LD1RPat<nxv2i64, zextloadi8, LD1RB_D_IMM, PTRUE_D, i64, am_indexed8_6b, uimm6s1>; 2572 defm : LD1RPat<nxv8i16, sextloadi8, LD1RSB_H_IMM, PTRUE_H, i32, am_indexed8_6b, uimm6s1>; 2573 defm : LD1RPat<nxv4i32, sextloadi8, LD1RSB_S_IMM, PTRUE_S, i32, am_indexed8_6b, uimm6s1>; 2574 defm : LD1RPat<nxv2i64, sextloadi8, LD1RSB_D_IMM, PTRUE_D, i64, am_indexed8_6b, uimm6s1>; 2575 2576 // LDR1 of 16-bit data 2577 defm : LD1RPat<nxv8i16, extloadi16, LD1RH_IMM, PTRUE_H, i32, am_indexed16_6b, uimm6s2>; 2578 defm : LD1RPat<nxv4i32, zextloadi16, LD1RH_S_IMM, PTRUE_S, i32, am_indexed16_6b, uimm6s2>; 2579 defm : LD1RPat<nxv2i64, zextloadi16, LD1RH_D_IMM, PTRUE_D, i64, am_indexed16_6b, uimm6s2>; 2580 defm : LD1RPat<nxv4i32, sextloadi16, LD1RSH_S_IMM, PTRUE_S, i32, am_indexed16_6b, uimm6s2>; 2581 defm : LD1RPat<nxv2i64, sextloadi16, LD1RSH_D_IMM, PTRUE_D, i64, am_indexed16_6b, uimm6s2>; 2582 2583 // LDR1 of 32-bit data 2584 defm : LD1RPat<nxv4i32, load, LD1RW_IMM, PTRUE_S, i32, am_indexed32_6b, uimm6s4>; 2585 defm : LD1RPat<nxv2i64, zextloadi32, LD1RW_D_IMM, PTRUE_D, i64, am_indexed32_6b, uimm6s4>; 2586 defm : LD1RPat<nxv2i64, sextloadi32, LD1RSW_IMM, PTRUE_D, i64, am_indexed32_6b, uimm6s4>; 2587 2588 // LDR1 of 64-bit data 2589 defm : LD1RPat<nxv2i64, load, LD1RD_IMM, PTRUE_D, i64, am_indexed64_6b, uimm6s8>; 2590 2591 let Predicates = [HasSVE_or_SME, UseSVEFPLD1R] in { 2592 // LD1R of FP data 2593 defm : LD1RPat<nxv8f16, load, LD1RH_IMM, PTRUE_H, f16, am_indexed16_6b, uimm6s2>; 2594 defm : LD1RPat<nxv4f16, load, LD1RH_S_IMM, PTRUE_S, f16, am_indexed16_6b, uimm6s2>; 2595 defm : LD1RPat<nxv2f16, load, LD1RH_D_IMM, PTRUE_D, f16, am_indexed16_6b, uimm6s2>; 2596 defm : LD1RPat<nxv4f32, load, LD1RW_IMM, PTRUE_S, f32, am_indexed32_6b, uimm6s4>; 2597 defm : LD1RPat<nxv2f32, load, LD1RW_D_IMM, PTRUE_D, f32, am_indexed32_6b, uimm6s4>; 2598 defm : LD1RPat<nxv2f64, load, LD1RD_IMM, PTRUE_D, f64, am_indexed64_6b, uimm6s8>; 2599 } 2600 2601// LD1R of 128-bit masked data 2602 multiclass ld1rq_pat<ValueType vt1, SDPatternOperator op, Instruction load_instr, ComplexPattern AddrCP>{ 2603 def : Pat<(vt1 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), 2604 (!cast<Instruction>(load_instr # _IMM) $gp, $base, (i64 0))>; 2605 let AddedComplexity = 2 in { 2606 def : Pat<(vt1 (op PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), 2607 (!cast<Instruction>(load_instr # _IMM) $gp, $base, simm4s16:$imm)>; 2608 } 2609 def : Pat<(vt1 (op PPR:$gp, (AddrCP GPR64:$base, GPR64:$idx))), 2610 (load_instr $gp, $base, $idx)>; 2611 } 2612 2613 defm : ld1rq_pat<nxv16i8, AArch64ld1rq_z, LD1RQ_B, am_sve_regreg_lsl0>; 2614 defm : ld1rq_pat<nxv8i16, AArch64ld1rq_z, LD1RQ_H, am_sve_regreg_lsl1>; 2615 defm : ld1rq_pat<nxv4i32, AArch64ld1rq_z, LD1RQ_W, am_sve_regreg_lsl2>; 2616 defm : ld1rq_pat<nxv2i64, AArch64ld1rq_z, LD1RQ_D, am_sve_regreg_lsl3>; 2617 2618 def : Pat<(sext_inreg nxv2i64:$Zs, nxv2i32), (SXTW_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; 2619 def : Pat<(sext_inreg nxv2i64:$Zs, nxv2i16), (SXTH_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; 2620 def : Pat<(sext_inreg nxv2i64:$Zs, nxv2i8), (SXTB_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; 2621 def : Pat<(sext_inreg nxv4i32:$Zs, nxv4i16), (SXTH_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; 2622 def : Pat<(sext_inreg nxv4i32:$Zs, nxv4i8), (SXTB_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>; 2623 def : Pat<(sext_inreg nxv8i16:$Zs, nxv8i8), (SXTB_ZPmZ_H_UNDEF (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>; 2624 2625 // General case that we ideally never want to match. 2626 def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>; 2627 2628 let AddedComplexity = 5 in { 2629 def : Pat<(vscale (i64 1)), (UBFMXri (RDVLI_XI 1), 4, 63)>; 2630 def : Pat<(vscale (i64 -1)), (SBFMXri (RDVLI_XI -1), 4, 63)>; 2631 2632 def : Pat<(vscale (sve_rdvl_imm i32:$imm)), (RDVLI_XI $imm)>; 2633 def : Pat<(vscale (sve_cnth_imm i32:$imm)), (CNTH_XPiI 31, $imm)>; 2634 def : Pat<(vscale (sve_cntw_imm i32:$imm)), (CNTW_XPiI 31, $imm)>; 2635 def : Pat<(vscale (sve_cntd_imm i32:$imm)), (CNTD_XPiI 31, $imm)>; 2636 2637 def : Pat<(vscale (sve_cnth_imm_neg i32:$imm)), (SUBXrs XZR, (CNTH_XPiI 31, $imm), 0)>; 2638 def : Pat<(vscale (sve_cntw_imm_neg i32:$imm)), (SUBXrs XZR, (CNTW_XPiI 31, $imm), 0)>; 2639 def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>; 2640 } 2641 2642 // Add NoUseScalarIncVL to avoid affecting for patterns with UseScalarIncVL 2643 let Predicates = [HasSVE_or_SME, NoUseScalarIncVL] in { 2644 def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))), 2645 (SUBXrs GPR64:$op, (CNTH_XPiI 31, $imm), 0)>; 2646 def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))), 2647 (SUBXrs GPR64:$op, (CNTW_XPiI 31, $imm), 0)>; 2648 def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))), 2649 (SUBXrs GPR64:$op, (CNTD_XPiI 31, $imm), 0)>; 2650 2651 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))), 2652 (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTH_XPiI 31, $imm), sub_32))>; 2653 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))), 2654 (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTW_XPiI 31, $imm), sub_32))>; 2655 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))), 2656 (SUBSWrr GPR32:$op, (EXTRACT_SUBREG (CNTD_XPiI 31, $imm), sub_32))>; 2657 } 2658 2659 let AddedComplexity = 5 in { 2660 def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), 2661 (INCH_ZPiI ZPR:$op, 31, $imm)>; 2662 def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), 2663 (INCW_ZPiI ZPR:$op, 31, $imm)>; 2664 def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))), 2665 (INCD_ZPiI ZPR:$op, 31, $imm)>; 2666 2667 def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), 2668 (DECH_ZPiI ZPR:$op, 31, $imm)>; 2669 def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), 2670 (DECW_ZPiI ZPR:$op, 31, $imm)>; 2671 def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))), 2672 (DECD_ZPiI ZPR:$op, 31, $imm)>; 2673 } 2674 2675 let Predicates = [HasSVE_or_SME, UseScalarIncVL], AddedComplexity = 5 in { 2676 def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))), 2677 (ADDVL_XXI GPR64:$op, $imm)>; 2678 2679 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))), 2680 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (IMPLICIT_DEF), 2681 GPR32:$op, sub_32), $imm), 2682 sub_32)>; 2683 2684 def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))), 2685 (INCH_XPiI GPR64:$op, 31, $imm)>; 2686 def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))), 2687 (INCW_XPiI GPR64:$op, 31, $imm)>; 2688 def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm i32:$imm))), 2689 (INCD_XPiI GPR64:$op, 31, $imm)>; 2690 2691 def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))), 2692 (DECH_XPiI GPR64:$op, 31, $imm)>; 2693 def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))), 2694 (DECW_XPiI GPR64:$op, 31, $imm)>; 2695 def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))), 2696 (DECD_XPiI GPR64:$op, 31, $imm)>; 2697 2698 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm i32:$imm))))), 2699 (EXTRACT_SUBREG (INCH_XPiI (INSERT_SUBREG (IMPLICIT_DEF), 2700 GPR32:$op, sub_32), 31, $imm), 2701 sub_32)>; 2702 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm i32:$imm))))), 2703 (EXTRACT_SUBREG (INCW_XPiI (INSERT_SUBREG (IMPLICIT_DEF), 2704 GPR32:$op, sub_32), 31, $imm), 2705 sub_32)>; 2706 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm i32:$imm))))), 2707 (EXTRACT_SUBREG (INCD_XPiI (INSERT_SUBREG (IMPLICIT_DEF), 2708 GPR32:$op, sub_32), 31, $imm), 2709 sub_32)>; 2710 2711 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))), 2712 (EXTRACT_SUBREG (DECH_XPiI (INSERT_SUBREG (IMPLICIT_DEF), 2713 GPR32:$op, sub_32), 31, $imm), 2714 sub_32)>; 2715 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))), 2716 (EXTRACT_SUBREG (DECW_XPiI (INSERT_SUBREG (IMPLICIT_DEF), 2717 GPR32:$op, sub_32), 31, $imm), 2718 sub_32)>; 2719 def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))), 2720 (EXTRACT_SUBREG (DECD_XPiI (INSERT_SUBREG (IMPLICIT_DEF), 2721 GPR32:$op, sub_32), 31, $imm), 2722 sub_32)>; 2723 } 2724 2725 // For big endian, only BITCASTs involving same sized vector types with same 2726 // size vector elements can be isel'd directly. 2727 let Predicates = [IsLE] in 2728 foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in 2729 foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in 2730 if !ne(VT,VT2) then 2731 def : Pat<(VT (bitconvert (VT2 ZPR:$src))), (VT ZPR:$src)>; 2732 2733 def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>; 2734 def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; 2735 2736 def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>; 2737 def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>; 2738 2739 def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; 2740 def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; 2741 2742 def : Pat<(nxv8i16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8i16 ZPR:$src)>; 2743 def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; 2744 2745 def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; 2746 def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>; 2747 2748 def : Pat<(nxv4bf16 (bitconvert (nxv4f16 ZPR:$src))), (nxv4bf16 ZPR:$src)>; 2749 def : Pat<(nxv4f16 (bitconvert (nxv4bf16 ZPR:$src))), (nxv4f16 ZPR:$src)>; 2750 2751 def : Pat<(nxv2bf16 (bitconvert (nxv2f16 ZPR:$src))), (nxv2bf16 ZPR:$src)>; 2752 def : Pat<(nxv2f16 (bitconvert (nxv2bf16 ZPR:$src))), (nxv2f16 ZPR:$src)>; 2753 2754 def : Pat<(nxv16i1 (bitconvert (aarch64svcount PNR:$src))), (nxv16i1 PPR:$src)>; 2755 def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PNR:$src)>; 2756 2757 // These allow nop casting between predicate vector types. 2758 foreach VT = [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ] in 2759 foreach VT2 = [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ] in 2760 def : Pat<(VT (reinterpret_cast (VT2 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; 2761 2762 // These allow nop casting between half vector types. 2763 foreach VT = [ nxv2f16, nxv4f16, nxv8f16 ] in 2764 foreach VT2 = [ nxv2f16, nxv4f16, nxv8f16 ] in 2765 def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 2766 2767 // These allow nop casting between float vector types. 2768 foreach VT = [ nxv2f32, nxv4f32 ] in 2769 foreach VT2 = [ nxv2f32, nxv4f32 ] in 2770 def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 2771 2772 // These allow nop casting between bfloat vector types. 2773 foreach VT = [ nxv2bf16, nxv4bf16, nxv8bf16 ] in 2774 foreach VT2 = [ nxv2bf16, nxv4bf16, nxv8bf16 ] in 2775 def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 2776 2777 // These allow nop casting between all packed vector types. 2778 foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in 2779 foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in 2780 def : Pat<(VT (AArch64NvCast (VT2 ZPR:$src))), (VT ZPR:$src)>; 2781 2782 def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)), 2783 (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>; 2784 def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)), 2785 (AND_PPzPP (PTRUE_H 31), PPR:$Ps1, PPR:$Ps2)>; 2786 def : Pat<(nxv4i1 (and PPR:$Ps1, PPR:$Ps2)), 2787 (AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>; 2788 def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)), 2789 (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>; 2790 // Emulate .Q operation using a PTRUE_D when the other lanes don't matter. 2791 def : Pat<(nxv1i1 (and PPR:$Ps1, PPR:$Ps2)), 2792 (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>; 2793 2794 // Add more complex addressing modes here as required 2795 multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load, 2796 Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> { 2797 let AddedComplexity = 1 in { 2798 def _reg_reg_z : Pat<(Ty (Load (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp), (SVEDup0Undef))), 2799 (RegRegInst PPR:$gp, GPR64:$base, GPR64:$offset)>; 2800 } 2801 let AddedComplexity = 2 in { 2802 def _reg_imm_z : Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), (PredTy PPR:$gp), (SVEDup0Undef))), 2803 (RegImmInst PPR:$gp, GPR64:$base, simm4s1:$offset)>; 2804 } 2805 def _default_z : Pat<(Ty (Load GPR64:$base, (PredTy PPR:$gp), (SVEDup0Undef))), 2806 (RegImmInst PPR:$gp, GPR64:$base, (i64 0))>; 2807 } 2808 2809 // 2-element contiguous loads 2810 defm : pred_load<nxv2i64, nxv2i1, azext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>; 2811 defm : pred_load<nxv2i64, nxv2i1, sext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>; 2812 defm : pred_load<nxv2i64, nxv2i1, azext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>; 2813 defm : pred_load<nxv2i64, nxv2i1, sext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>; 2814 defm : pred_load<nxv2i64, nxv2i1, azext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>; 2815 defm : pred_load<nxv2i64, nxv2i1, sext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>; 2816 defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>; 2817 defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>; 2818 defm : pred_load<nxv2bf16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>; 2819 defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>; 2820 defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>; 2821 2822 // 4-element contiguous loads 2823 defm : pred_load<nxv4i32, nxv4i1, azext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>; 2824 defm : pred_load<nxv4i32, nxv4i1, sext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>; 2825 defm : pred_load<nxv4i32, nxv4i1, azext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>; 2826 defm : pred_load<nxv4i32, nxv4i1, sext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>; 2827 defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>; 2828 defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>; 2829 defm : pred_load<nxv4bf16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>; 2830 defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>; 2831 2832 // 8-element contiguous loads 2833 defm : pred_load<nxv8i16, nxv8i1, azext_masked_load_i8, LD1B_H, LD1B_H_IMM, am_sve_regreg_lsl0>; 2834 defm : pred_load<nxv8i16, nxv8i1, sext_masked_load_i8, LD1SB_H, LD1SB_H_IMM, am_sve_regreg_lsl0>; 2835 defm : pred_load<nxv8i16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>; 2836 defm : pred_load<nxv8f16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>; 2837 defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>; 2838 2839 // 16-element contiguous loads 2840 defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B, LD1B_IMM, am_sve_regreg_lsl0>; 2841 2842 multiclass pred_store<ValueType Ty, ValueType PredTy, SDPatternOperator Store, 2843 Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> { 2844 let AddedComplexity = 1 in { 2845 def _reg_reg : Pat<(Store Ty:$vec, (AddrCP GPR64:$base, GPR64:$offset), PredTy:$gp), 2846 (RegRegInst ZPR:$vec, PPR:$gp, GPR64:$base, GPR64:$offset)>; 2847 } 2848 let AddedComplexity = 2 in { 2849 def _reg_imm : Pat<(Store Ty:$vec, (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), PredTy:$gp), 2850 (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, simm4s1:$offset)>; 2851 } 2852 def _default : Pat<(Store Ty:$vec, GPR64:$base, PredTy:$gp), 2853 (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>; 2854 } 2855 2856 // 2-element contiguous stores 2857 defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D, ST1B_D_IMM, am_sve_regreg_lsl0>; 2858 defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>; 2859 defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>; 2860 defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>; 2861 defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>; 2862 defm : pred_store<nxv2bf16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>; 2863 defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>; 2864 defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>; 2865 2866 // 4-element contiguous stores 2867 defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S, ST1B_S_IMM, am_sve_regreg_lsl0>; 2868 defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>; 2869 defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>; 2870 defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>; 2871 defm : pred_store<nxv4bf16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>; 2872 defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>; 2873 2874 // 8-element contiguous stores 2875 defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>; 2876 defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>; 2877 defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>; 2878 defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>; 2879 2880 // 16-element contiguous stores 2881 defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B, ST1B_IMM, am_sve_regreg_lsl0>; 2882 2883 defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRR, LDNT1B_ZRI, am_sve_regreg_lsl0>; 2884 defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRR, LDNT1H_ZRI, am_sve_regreg_lsl1>; 2885 defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRR, LDNT1W_ZRI, am_sve_regreg_lsl2>; 2886 defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRR, LDNT1D_ZRI, am_sve_regreg_lsl3>; 2887 2888 defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRR, STNT1B_ZRI, am_sve_regreg_lsl0>; 2889 defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRR, STNT1H_ZRI, am_sve_regreg_lsl1>; 2890 defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRR, STNT1W_ZRI, am_sve_regreg_lsl2>; 2891 defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRR, STNT1D_ZRI, am_sve_regreg_lsl3>; 2892 2893 multiclass unpred_store<PatFrag Store, ValueType Ty, Instruction RegRegInst, 2894 Instruction RegImmInst, Instruction PTrue, 2895 ComplexPattern AddrCP> { 2896 let AddedComplexity = 1 in { 2897 def _reg : Pat<(Store Ty:$val, (AddrCP GPR64sp:$base, GPR64:$offset)), 2898 (RegRegInst ZPR:$val, (PTrue 31), GPR64sp:$base, GPR64:$offset)>; 2899 } 2900 let AddedComplexity = 2 in { 2901 def _imm : Pat<(Store Ty:$val, (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)), 2902 (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; 2903 } 2904 2905 def : Pat<(Store Ty:$val, GPR64:$base), 2906 (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>; 2907 } 2908 2909 defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>; 2910 defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; 2911 defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; 2912 defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; 2913 defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; 2914 defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2915 defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2916 defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; 2917 defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; 2918 defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; 2919 defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; 2920 defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; 2921 defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2922 defm : unpred_store< store, nxv4bf16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2923 defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2924 defm : unpred_store< store, nxv2bf16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2925 defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; 2926 defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; 2927 defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; 2928 2929 multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegRegInst, 2930 Instruction RegImmInst, Instruction PTrue, 2931 ComplexPattern AddrCP> { 2932 let AddedComplexity = 1 in { 2933 def _reg: Pat<(Ty (Load (AddrCP GPR64sp:$base, GPR64:$offset))), 2934 (RegRegInst (PTrue 31), GPR64sp:$base, GPR64:$offset)>; 2935 } 2936 let AddedComplexity = 2 in { 2937 def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))), 2938 (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; 2939 } 2940 2941 def : Pat<(Ty (Load GPR64:$base)), 2942 (RegImmInst (PTrue 31), GPR64:$base, (i64 0))>; 2943 } 2944 2945 defm : unpred_load< load, nxv16i8, LD1B, LD1B_IMM, PTRUE_B, am_sve_regreg_lsl0>; 2946 defm : unpred_load< zextloadvi8, nxv8i16, LD1B_H, LD1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; 2947 defm : unpred_load< zextloadvi8, nxv4i32, LD1B_S, LD1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; 2948 defm : unpred_load< zextloadvi8, nxv2i64, LD1B_D, LD1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; 2949 defm : unpred_load< extloadvi8, nxv8i16, LD1B_H, LD1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; 2950 defm : unpred_load< extloadvi8, nxv4i32, LD1B_S, LD1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; 2951 defm : unpred_load< extloadvi8, nxv2i64, LD1B_D, LD1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; 2952 defm : unpred_load< sextloadvi8, nxv8i16, LD1SB_H, LD1SB_H_IMM, PTRUE_H, am_sve_regreg_lsl0>; 2953 defm : unpred_load< sextloadvi8, nxv4i32, LD1SB_S, LD1SB_S_IMM, PTRUE_S, am_sve_regreg_lsl0>; 2954 defm : unpred_load< sextloadvi8, nxv2i64, LD1SB_D, LD1SB_D_IMM, PTRUE_D, am_sve_regreg_lsl0>; 2955 defm : unpred_load< load, nxv8i16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; 2956 defm : unpred_load<zextloadvi16, nxv4i32, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2957 defm : unpred_load<zextloadvi16, nxv2i64, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2958 defm : unpred_load< extloadvi16, nxv4i32, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2959 defm : unpred_load< extloadvi16, nxv2i64, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2960 defm : unpred_load<sextloadvi16, nxv4i32, LD1SH_S, LD1SH_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2961 defm : unpred_load<sextloadvi16, nxv2i64, LD1SH_D, LD1SH_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2962 defm : unpred_load< load, nxv4i32, LD1W, LD1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; 2963 defm : unpred_load<zextloadvi32, nxv2i64, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; 2964 defm : unpred_load< extloadvi32, nxv2i64, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; 2965 defm : unpred_load<sextloadvi32, nxv2i64, LD1SW_D, LD1SW_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; 2966 defm : unpred_load< load, nxv2i64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; 2967 defm : unpred_load< load, nxv8f16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; 2968 defm : unpred_load< load, nxv8bf16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>; 2969 defm : unpred_load< load, nxv4f16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2970 defm : unpred_load< load, nxv4bf16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>; 2971 defm : unpred_load< load, nxv2f16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2972 defm : unpred_load< load, nxv2bf16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>; 2973 defm : unpred_load< load, nxv4f32, LD1W, LD1W_IMM, PTRUE_S, am_sve_regreg_lsl2>; 2974 defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>; 2975 defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>; 2976 2977 // Allow using the reg+reg form of ld1b/st1b for memory accesses with the 2978 // same width as nxv16i8. This saves an add in cases where we would 2979 // otherwise compute the address separately. 2980 multiclass unpred_loadstore_bitcast<ValueType Ty> { 2981 let Predicates = [IsLE] in { 2982 def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))), 2983 (LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>; 2984 def : Pat<(store Ty:$val, (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)), 2985 (ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>; 2986 } 2987 } 2988 defm : unpred_loadstore_bitcast<nxv8i16>; 2989 defm : unpred_loadstore_bitcast<nxv8f16>; 2990 defm : unpred_loadstore_bitcast<nxv8bf16>; 2991 defm : unpred_loadstore_bitcast<nxv4f32>; 2992 defm : unpred_loadstore_bitcast<nxv4i32>; 2993 defm : unpred_loadstore_bitcast<nxv2i64>; 2994 defm : unpred_loadstore_bitcast<nxv2f64>; 2995 2996 multiclass unpred_store_predicate<ValueType Ty, Instruction Store> { 2997 def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)), 2998 (Store PPR:$val, GPR64sp:$base, simm9:$offset)>; 2999 3000 def _default : Pat<(store (Ty PPR:$Val), GPR64:$base), 3001 (Store PPR:$Val, GPR64:$base, (i64 0))>; 3002 } 3003 3004 defm Pat_Store_P16 : unpred_store_predicate<nxv16i1, STR_PXI>; 3005 3006 multiclass unpred_load_predicate<ValueType Ty, Instruction Load> { 3007 def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))), 3008 (Load GPR64sp:$base, simm9:$offset)>; 3009 3010 def _default : Pat<(Ty (load GPR64:$base)), 3011 (Load GPR64:$base, (i64 0))>; 3012 } 3013 3014 defm Pat_Load_P16 : unpred_load_predicate<nxv16i1, LDR_PXI>; 3015 3016 multiclass ld1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty, 3017 SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> { 3018 // reg + reg 3019 let AddedComplexity = 1 in { 3020 def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)), 3021 (RegRegInst PPR:$gp, GPR64sp:$base, GPR64:$offset)>; 3022 } 3023 3024 // scalar + immediate (mul vl) 3025 let AddedComplexity = 2 in { 3026 def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)), 3027 (RegImmInst PPR:$gp, GPR64sp:$base, simm4s1:$offset)>; 3028 } 3029 3030 // base 3031 def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), 3032 (RegImmInst PPR:$gp, GPR64sp:$base, (i64 0))>; 3033 } 3034 3035 // 2-element contiguous loads 3036 defm : ld1<LD1B_D, LD1B_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>; 3037 defm : ld1<LD1SB_D, LD1SB_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>; 3038 defm : ld1<LD1H_D, LD1H_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>; 3039 defm : ld1<LD1SH_D, LD1SH_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>; 3040 defm : ld1<LD1W_D, LD1W_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>; 3041 defm : ld1<LD1SW_D, LD1SW_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>; 3042 defm : ld1<LD1D, LD1D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i64, am_sve_regreg_lsl3>; 3043 defm : ld1<LD1D, LD1D_IMM, nxv2f64, AArch64ld1_z, nxv2i1, nxv2f64, am_sve_regreg_lsl3>; 3044 3045 // 4-element contiguous loads 3046 defm : ld1<LD1B_S, LD1B_S_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>; 3047 defm : ld1<LD1SB_S, LD1SB_S_IMM, nxv4i32, AArch64ld1s_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>; 3048 defm : ld1<LD1H_S, LD1H_S_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>; 3049 defm : ld1<LD1SH_S, LD1SH_S_IMM, nxv4i32, AArch64ld1s_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>; 3050 defm : ld1<LD1W, LD1W_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i32, am_sve_regreg_lsl2>; 3051 defm : ld1<LD1W, LD1W_IMM, nxv4f32, AArch64ld1_z, nxv4i1, nxv4f32, am_sve_regreg_lsl2>; 3052 3053 // 8-element contiguous loads 3054 defm : ld1<LD1B_H, LD1B_H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>; 3055 defm : ld1<LD1SB_H, LD1SB_H_IMM, nxv8i16, AArch64ld1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>; 3056 defm : ld1<LD1H, LD1H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>; 3057 defm : ld1<LD1H, LD1H_IMM, nxv8f16, AArch64ld1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>; 3058 defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>; 3059 3060 // 16-element contiguous loads 3061 defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>; 3062} // End HasSVE_or_SME 3063 3064let Predicates = [HasSVE] in { 3065 multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> { 3066 // scalar + immediate (mul vl) 3067 let AddedComplexity = 1 in { 3068 def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)), 3069 (I PPR:$gp, GPR64sp:$base, simm4s1:$offset)>; 3070 } 3071 3072 // base 3073 def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), 3074 (I PPR:$gp, GPR64sp:$base, (i64 0))>; 3075 } 3076 3077 // 2-element contiguous non-faulting loads 3078 defm : ldnf1<LDNF1B_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i8>; 3079 defm : ldnf1<LDNF1SB_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i8>; 3080 defm : ldnf1<LDNF1H_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i16>; 3081 defm : ldnf1<LDNF1SH_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i16>; 3082 defm : ldnf1<LDNF1W_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i32>; 3083 defm : ldnf1<LDNF1SW_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i32>; 3084 defm : ldnf1<LDNF1D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i64>; 3085 defm : ldnf1<LDNF1D_IMM, nxv2f64, AArch64ldnf1_z, nxv2i1, nxv2f64>; 3086 3087 // 4-element contiguous non-faulting loads 3088 defm : ldnf1<LDNF1B_S_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i8>; 3089 defm : ldnf1<LDNF1SB_S_IMM, nxv4i32, AArch64ldnf1s_z, nxv4i1, nxv4i8>; 3090 defm : ldnf1<LDNF1H_S_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i16>; 3091 defm : ldnf1<LDNF1SH_S_IMM, nxv4i32, AArch64ldnf1s_z, nxv4i1, nxv4i16>; 3092 defm : ldnf1<LDNF1W_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i32>; 3093 defm : ldnf1<LDNF1W_IMM, nxv4f32, AArch64ldnf1_z, nxv4i1, nxv4f32>; 3094 3095 // 8-element contiguous non-faulting loads 3096 defm : ldnf1<LDNF1B_H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i8>; 3097 defm : ldnf1<LDNF1SB_H_IMM, nxv8i16, AArch64ldnf1s_z, nxv8i1, nxv8i8>; 3098 defm : ldnf1<LDNF1H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i16>; 3099 defm : ldnf1<LDNF1H_IMM, nxv8f16, AArch64ldnf1_z, nxv8i1, nxv8f16>; 3100 defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1_z, nxv8i1, nxv8bf16>; 3101 3102 // 16-element contiguous non-faulting loads 3103 defm : ldnf1<LDNF1B_IMM, nxv16i8, AArch64ldnf1_z, nxv16i1, nxv16i8>; 3104 3105 multiclass ldff1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> { 3106 // reg + reg 3107 let AddedComplexity = 1 in { 3108 def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)), 3109 (I PPR:$gp, GPR64sp:$base, GPR64:$offset)>; 3110 } 3111 3112 // Base 3113 def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)), 3114 (I PPR:$gp, GPR64sp:$base, XZR)>; 3115 } 3116 3117 // 2-element contiguous first faulting loads 3118 defm : ldff1<LDFF1B_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>; 3119 defm : ldff1<LDFF1SB_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>; 3120 defm : ldff1<LDFF1H_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>; 3121 defm : ldff1<LDFF1SH_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>; 3122 defm : ldff1<LDFF1W_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>; 3123 defm : ldff1<LDFF1SW_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>; 3124 defm : ldff1<LDFF1D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i64, am_sve_regreg_lsl3>; 3125 defm : ldff1<LDFF1W_D, nxv2f32, AArch64ldff1_z, nxv2i1, nxv2f32, am_sve_regreg_lsl2>; 3126 defm : ldff1<LDFF1D, nxv2f64, AArch64ldff1_z, nxv2i1, nxv2f64, am_sve_regreg_lsl3>; 3127 3128 // 4-element contiguous first faulting loads 3129 defm : ldff1<LDFF1B_S, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>; 3130 defm : ldff1<LDFF1SB_S, nxv4i32, AArch64ldff1s_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>; 3131 defm : ldff1<LDFF1H_S, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>; 3132 defm : ldff1<LDFF1SH_S, nxv4i32, AArch64ldff1s_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>; 3133 defm : ldff1<LDFF1W, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i32, am_sve_regreg_lsl2>; 3134 defm : ldff1<LDFF1W, nxv4f32, AArch64ldff1_z, nxv4i1, nxv4f32, am_sve_regreg_lsl2>; 3135 3136 // 8-element contiguous first faulting loads 3137 defm : ldff1<LDFF1B_H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>; 3138 defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>; 3139 defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>; 3140 defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>; 3141 defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>; 3142 3143 // 16-element contiguous first faulting loads 3144 defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>; 3145} // End HasSVE 3146 3147let Predicates = [HasSVE_or_SME] in { 3148 multiclass st1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty, 3149 SDPatternOperator Store, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> { 3150 // reg + reg 3151 let AddedComplexity = 1 in { 3152 def : Pat<(Store Ty:$vec, (AddrCP GPR64:$base, GPR64:$offset), PredTy:$gp, MemVT), 3153 (RegRegInst ZPR:$vec, PPR:$gp, GPR64sp:$base, GPR64:$offset)>; 3154 } 3155 3156 // scalar + immediate (mul vl) 3157 let AddedComplexity = 2 in { 3158 def : Pat<(Store Ty:$vec, (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), PredTy:$gp, MemVT), 3159 (RegImmInst ZPR:$vec, PPR:$gp, GPR64sp:$base, simm4s1:$offset)>; 3160 } 3161 3162 // base 3163 def : Pat<(Store Ty:$vec, GPR64:$base, (PredTy PPR:$gp), MemVT), 3164 (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>; 3165 } 3166 3167 // 2-element contiguous store 3168 defm : st1<ST1B_D, ST1B_D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i8, am_sve_regreg_lsl0>; 3169 defm : st1<ST1H_D, ST1H_D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i16, am_sve_regreg_lsl1>; 3170 defm : st1<ST1W_D, ST1W_D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i32, am_sve_regreg_lsl2>; 3171 defm : st1<ST1D, ST1D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i64, am_sve_regreg_lsl3>; 3172 3173 // 4-element contiguous store 3174 defm : st1<ST1B_S, ST1B_S_IMM, nxv4i32, AArch64st1, nxv4i1, nxv4i8, am_sve_regreg_lsl0>; 3175 defm : st1<ST1H_S, ST1H_S_IMM, nxv4i32, AArch64st1, nxv4i1, nxv4i16, am_sve_regreg_lsl1>; 3176 defm : st1<ST1W, ST1W_IMM, nxv4i32, AArch64st1, nxv4i1, nxv4i32, am_sve_regreg_lsl2>; 3177 3178 // 8-element contiguous store 3179 defm : st1<ST1B_H, ST1B_H_IMM, nxv8i16, AArch64st1, nxv8i1, nxv8i8, am_sve_regreg_lsl0>; 3180 defm : st1<ST1H, ST1H_IMM, nxv8i16, AArch64st1, nxv8i1, nxv8i16, am_sve_regreg_lsl1>; 3181 3182 // 16-element contiguous store 3183 defm : st1<ST1B, ST1B_IMM, nxv16i8, AArch64st1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>; 3184 3185 // Insert scalar into undef[0] 3186 def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)), 3187 (INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>; 3188 def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)), 3189 (INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>; 3190 def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)), 3191 (INSERT_SUBREG (nxv4i32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; 3192 def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)), 3193 (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 3194 3195 def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)), 3196 (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 3197 def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)), 3198 (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 3199 def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)), 3200 (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 3201 def : Pat<(nxv8bf16 (vector_insert (nxv8bf16 (undef)), (bf16 FPR16:$src), 0)), 3202 (INSERT_SUBREG (nxv8bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 3203 def : Pat<(nxv4bf16 (vector_insert (nxv4bf16 (undef)), (bf16 FPR16:$src), 0)), 3204 (INSERT_SUBREG (nxv4bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 3205 def : Pat<(nxv2bf16 (vector_insert (nxv2bf16 (undef)), (bf16 FPR16:$src), 0)), 3206 (INSERT_SUBREG (nxv2bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 3207 def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)), 3208 (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; 3209 def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)), 3210 (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; 3211 def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)), 3212 (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 3213 3214 // Insert scalar into vector[0] 3215 def : Pat<(nxv16i8 (vector_insert nxv16i8:$vec, (i32 GPR32:$src), 0)), 3216 (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>; 3217 def : Pat<(nxv8i16 (vector_insert nxv8i16:$vec, (i32 GPR32:$src), 0)), 3218 (CPY_ZPmR_H ZPR:$vec, (PTRUE_H 1), GPR32:$src)>; 3219 def : Pat<(nxv4i32 (vector_insert nxv4i32:$vec, (i32 GPR32:$src), 0)), 3220 (CPY_ZPmR_S ZPR:$vec, (PTRUE_S 1), GPR32:$src)>; 3221 def : Pat<(nxv2i64 (vector_insert nxv2i64:$vec, (i64 GPR64:$src), 0)), 3222 (CPY_ZPmR_D ZPR:$vec, (PTRUE_D 1), GPR64:$src)>; 3223 3224 def : Pat<(nxv8f16 (vector_insert nxv8f16:$vec, (f16 FPR16:$src), 0)), 3225 (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>; 3226 def : Pat<(nxv8bf16 (vector_insert nxv8bf16:$vec, (bf16 FPR16:$src), 0)), 3227 (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>; 3228 def : Pat<(nxv4f32 (vector_insert nxv4f32:$vec, (f32 FPR32:$src), 0)), 3229 (SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>; 3230 def : Pat<(nxv2f64 (vector_insert nxv2f64:$vec, (f64 FPR64:$src), 0)), 3231 (SEL_ZPZZ_D (PTRUE_D 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), ZPR:$vec)>; 3232 3233 // Insert scalar into vector with scalar index 3234 def : Pat<(nxv16i8 (vector_insert nxv16i8:$vec, GPR32:$src, GPR64:$index)), 3235 (CPY_ZPmR_B ZPR:$vec, 3236 (CMPEQ_PPzZZ_B (PTRUE_B 31), 3237 (INDEX_II_B 0, 1), 3238 (DUP_ZR_B (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3239 GPR32:$src)>; 3240 def : Pat<(nxv8i16 (vector_insert nxv8i16:$vec, GPR32:$src, GPR64:$index)), 3241 (CPY_ZPmR_H ZPR:$vec, 3242 (CMPEQ_PPzZZ_H (PTRUE_H 31), 3243 (INDEX_II_H 0, 1), 3244 (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3245 GPR32:$src)>; 3246 def : Pat<(nxv4i32 (vector_insert nxv4i32:$vec, GPR32:$src, GPR64:$index)), 3247 (CPY_ZPmR_S ZPR:$vec, 3248 (CMPEQ_PPzZZ_S (PTRUE_S 31), 3249 (INDEX_II_S 0, 1), 3250 (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3251 GPR32:$src)>; 3252 def : Pat<(nxv2i64 (vector_insert nxv2i64:$vec, GPR64:$src, GPR64:$index)), 3253 (CPY_ZPmR_D ZPR:$vec, 3254 (CMPEQ_PPzZZ_D (PTRUE_D 31), 3255 (INDEX_II_D 0, 1), 3256 (DUP_ZR_D GPR64:$index)), 3257 GPR64:$src)>; 3258 3259 // Insert FP scalar into vector with scalar index 3260 def : Pat<(nxv2f16 (vector_insert nxv2f16:$vec, (f16 FPR16:$src), GPR64:$index)), 3261 (CPY_ZPmV_H ZPR:$vec, 3262 (CMPEQ_PPzZZ_D (PTRUE_D 31), 3263 (INDEX_II_D 0, 1), 3264 (DUP_ZR_D GPR64:$index)), 3265 $src)>; 3266 def : Pat<(nxv4f16 (vector_insert nxv4f16:$vec, (f16 FPR16:$src), GPR64:$index)), 3267 (CPY_ZPmV_H ZPR:$vec, 3268 (CMPEQ_PPzZZ_S (PTRUE_S 31), 3269 (INDEX_II_S 0, 1), 3270 (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3271 $src)>; 3272 def : Pat<(nxv8f16 (vector_insert nxv8f16:$vec, (f16 FPR16:$src), GPR64:$index)), 3273 (CPY_ZPmV_H ZPR:$vec, 3274 (CMPEQ_PPzZZ_H (PTRUE_H 31), 3275 (INDEX_II_H 0, 1), 3276 (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3277 $src)>; 3278 def : Pat<(nxv2bf16 (vector_insert nxv2bf16:$vec, (bf16 FPR16:$src), GPR64:$index)), 3279 (CPY_ZPmV_H ZPR:$vec, 3280 (CMPEQ_PPzZZ_D (PTRUE_D 31), 3281 (INDEX_II_D 0, 1), 3282 (DUP_ZR_D GPR64:$index)), 3283 $src)>; 3284 def : Pat<(nxv4bf16 (vector_insert nxv4bf16:$vec, (bf16 FPR16:$src), GPR64:$index)), 3285 (CPY_ZPmV_H ZPR:$vec, 3286 (CMPEQ_PPzZZ_S (PTRUE_S 31), 3287 (INDEX_II_S 0, 1), 3288 (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3289 $src)>; 3290 def : Pat<(nxv8bf16 (vector_insert nxv8bf16:$vec, (bf16 FPR16:$src), GPR64:$index)), 3291 (CPY_ZPmV_H ZPR:$vec, 3292 (CMPEQ_PPzZZ_H (PTRUE_H 31), 3293 (INDEX_II_H 0, 1), 3294 (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3295 $src)>; 3296 def : Pat<(nxv2f32 (vector_insert nxv2f32:$vec, (f32 FPR32:$src), GPR64:$index)), 3297 (CPY_ZPmV_S ZPR:$vec, 3298 (CMPEQ_PPzZZ_D (PTRUE_D 31), 3299 (INDEX_II_D 0, 1), 3300 (DUP_ZR_D GPR64:$index)), 3301 $src) >; 3302 def : Pat<(nxv4f32 (vector_insert nxv4f32:$vec, (f32 FPR32:$src), GPR64:$index)), 3303 (CPY_ZPmV_S ZPR:$vec, 3304 (CMPEQ_PPzZZ_S (PTRUE_S 31), 3305 (INDEX_II_S 0, 1), 3306 (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), 3307 $src)>; 3308 def : Pat<(nxv2f64 (vector_insert nxv2f64:$vec, (f64 FPR64:$src), GPR64:$index)), 3309 (CPY_ZPmV_D ZPR:$vec, 3310 (CMPEQ_PPzZZ_D (PTRUE_D 31), 3311 (INDEX_II_D 0, 1), 3312 (DUP_ZR_D $index)), 3313 $src)>; 3314 3315 // Extract element from vector with scalar index 3316 def : Pat<(i32 (vector_extract nxv16i8:$vec, GPR64:$index)), 3317 (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>; 3318 def : Pat<(i32 (vector_extract nxv8i16:$vec, GPR64:$index)), 3319 (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; 3320 def : Pat<(i32 (vector_extract nxv4i32:$vec, GPR64:$index)), 3321 (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; 3322 def : Pat<(i64 (vector_extract nxv2i64:$vec, GPR64:$index)), 3323 (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 3324 def : Pat<(f16 (vector_extract nxv8f16:$vec, GPR64:$index)), 3325 (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; 3326 def : Pat<(f16 (vector_extract nxv4f16:$vec, GPR64:$index)), 3327 (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; 3328 def : Pat<(f16 (vector_extract nxv2f16:$vec, GPR64:$index)), 3329 (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 3330 def : Pat<(bf16 (vector_extract nxv8bf16:$vec, GPR64:$index)), 3331 (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; 3332 def : Pat<(bf16 (vector_extract nxv4bf16:$vec, GPR64:$index)), 3333 (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; 3334 def : Pat<(bf16 (vector_extract nxv2bf16:$vec, GPR64:$index)), 3335 (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 3336 def : Pat<(f32 (vector_extract nxv4f32:$vec, GPR64:$index)), 3337 (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; 3338 def : Pat<(f32 (vector_extract nxv2f32:$vec, GPR64:$index)), 3339 (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 3340 def : Pat<(f64 (vector_extract nxv2f64:$vec, GPR64:$index)), 3341 (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 3342 3343 // Extract element from vector with immediate index 3344 def : Pat<(i32 (vector_extract nxv16i8:$vec, sve_elm_idx_extdup_b:$index)), 3345 (EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>; 3346 def : Pat<(i32 (vector_extract nxv8i16:$vec, sve_elm_idx_extdup_h:$index)), 3347 (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), ssub)>; 3348 def : Pat<(i32 (vector_extract nxv4i32:$vec, sve_elm_idx_extdup_s:$index)), 3349 (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; 3350 def : Pat<(i64 (vector_extract nxv2i64:$vec, sve_elm_idx_extdup_d:$index)), 3351 (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>; 3352 def : Pat<(f16 (vector_extract nxv8f16:$vec, sve_elm_idx_extdup_h:$index)), 3353 (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; 3354 def : Pat<(f16 (vector_extract nxv4f16:$vec, sve_elm_idx_extdup_s:$index)), 3355 (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; 3356 def : Pat<(f16 (vector_extract nxv2f16:$vec, sve_elm_idx_extdup_d:$index)), 3357 (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; 3358 def : Pat<(bf16 (vector_extract nxv8bf16:$vec, sve_elm_idx_extdup_h:$index)), 3359 (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; 3360 def : Pat<(bf16 (vector_extract nxv4bf16:$vec, sve_elm_idx_extdup_s:$index)), 3361 (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; 3362 def : Pat<(bf16 (vector_extract nxv2bf16:$vec, sve_elm_idx_extdup_d:$index)), 3363 (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; 3364 def : Pat<(f32 (vector_extract nxv4f32:$vec, sve_elm_idx_extdup_s:$index)), 3365 (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; 3366 def : Pat<(f32 (vector_extract nxv2f32:$vec, sve_elm_idx_extdup_d:$index)), 3367 (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>; 3368 def : Pat<(f64 (vector_extract nxv2f64:$vec, sve_elm_idx_extdup_d:$index)), 3369 (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>; 3370 3371 // Extract element from vector with immediate index that's within the bottom 128-bits. 3372 let Predicates = [HasNEON], AddedComplexity = 1 in { 3373 def : Pat<(i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)), 3374 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>; 3375 def : Pat<(i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)), 3376 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>; 3377 def : Pat<(i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index)), 3378 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>; 3379 def : Pat<(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)), 3380 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>; 3381 3382 // Move element from the bottom 128-bits of a scalable vector to a single-element vector. 3383 // Alternative case where insertelement is just scalar_to_vector rather than vector_insert. 3384 def : Pat<(v1f64 (scalar_to_vector 3385 (f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))), 3386 (EXTRACT_SUBREG 3387 (INSvi64lane (IMPLICIT_DEF), (i64 0), 3388 (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index), 3389 dsub)>; 3390 def : Pat<(v1i64 (scalar_to_vector 3391 (i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))), 3392 (EXTRACT_SUBREG 3393 (INSvi64lane (IMPLICIT_DEF), (i64 0), 3394 (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index), 3395 dsub)>; 3396 } // End HasNEON 3397 3398 let Predicates = [HasNEON] in { 3399 def : Pat<(sext_inreg (vector_extract nxv16i8:$vec, VectorIndexB:$index), i8), 3400 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>; 3401 def : Pat<(sext_inreg (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index))), i8), 3402 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>; 3403 3404 def : Pat<(sext_inreg (vector_extract nxv8i16:$vec, VectorIndexH:$index), i16), 3405 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>; 3406 def : Pat<(sext_inreg (anyext (i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index))), i16), 3407 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>; 3408 3409 def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))), 3410 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>; 3411 } // End HasNEON 3412 3413 // Extract first element from vector. 3414 let AddedComplexity = 2 in { 3415 def : Pat<(i32 (vector_extract nxv16i8:$Zs, (i64 0))), 3416 (EXTRACT_SUBREG ZPR:$Zs, ssub)>; 3417 def : Pat<(i32 (vector_extract nxv8i16:$Zs, (i64 0))), 3418 (EXTRACT_SUBREG ZPR:$Zs, ssub)>; 3419 def : Pat<(i32 (vector_extract nxv4i32:$Zs, (i64 0))), 3420 (EXTRACT_SUBREG ZPR:$Zs, ssub)>; 3421 def : Pat<(i64 (vector_extract nxv2i64:$Zs, (i64 0))), 3422 (EXTRACT_SUBREG ZPR:$Zs, dsub)>; 3423 def : Pat<(f16 (vector_extract nxv8f16:$Zs, (i64 0))), 3424 (EXTRACT_SUBREG ZPR:$Zs, hsub)>; 3425 def : Pat<(f16 (vector_extract nxv4f16:$Zs, (i64 0))), 3426 (EXTRACT_SUBREG ZPR:$Zs, hsub)>; 3427 def : Pat<(f16 (vector_extract nxv2f16:$Zs, (i64 0))), 3428 (EXTRACT_SUBREG ZPR:$Zs, hsub)>; 3429 def : Pat<(bf16 (vector_extract nxv8bf16:$Zs, (i64 0))), 3430 (EXTRACT_SUBREG ZPR:$Zs, hsub)>; 3431 def : Pat<(bf16 (vector_extract nxv4bf16:$Zs, (i64 0))), 3432 (EXTRACT_SUBREG ZPR:$Zs, hsub)>; 3433 def : Pat<(bf16 (vector_extract nxv2bf16:$Zs, (i64 0))), 3434 (EXTRACT_SUBREG ZPR:$Zs, hsub)>; 3435 def : Pat<(f32 (vector_extract nxv4f32:$Zs, (i64 0))), 3436 (EXTRACT_SUBREG ZPR:$Zs, ssub)>; 3437 def : Pat<(f32 (vector_extract nxv2f32:$Zs, (i64 0))), 3438 (EXTRACT_SUBREG ZPR:$Zs, ssub)>; 3439 def : Pat<(f64 (vector_extract nxv2f64:$Zs, (i64 0))), 3440 (EXTRACT_SUBREG ZPR:$Zs, dsub)>; 3441 } 3442 3443 multiclass sve_predicated_add<SDNode extend, int value> { 3444 def : Pat<(nxv16i8 (add ZPR:$op, (extend nxv16i1:$pred))), 3445 (ADD_ZPmZ_B PPR:$pred, ZPR:$op, (DUP_ZI_B value, 0))>; 3446 def : Pat<(nxv8i16 (add ZPR:$op, (extend nxv8i1:$pred))), 3447 (ADD_ZPmZ_H PPR:$pred, ZPR:$op, (DUP_ZI_H value, 0))>; 3448 def : Pat<(nxv4i32 (add ZPR:$op, (extend nxv4i1:$pred))), 3449 (ADD_ZPmZ_S PPR:$pred, ZPR:$op, (DUP_ZI_S value, 0))>; 3450 def : Pat<(nxv2i64 (add ZPR:$op, (extend nxv2i1:$pred))), 3451 (ADD_ZPmZ_D PPR:$pred, ZPR:$op, (DUP_ZI_D value, 0))>; 3452 } 3453 3454 defm : sve_predicated_add<zext, 1>; 3455 defm : sve_predicated_add<sext, 255>; 3456 3457 def : Pat<(nxv16i8 (sub ZPR:$op, (sext nxv16i1:$pred))), 3458 (SUB_ZPmZ_B PPR:$pred, ZPR:$op, (DUP_ZI_B 255, 0))>; 3459 def : Pat<(nxv8i16 (sub ZPR:$op, (sext nxv8i1:$pred))), 3460 (SUB_ZPmZ_H PPR:$pred, ZPR:$op, (DUP_ZI_H 255, 0))>; 3461 def : Pat<(nxv4i32 (sub ZPR:$op, (sext nxv4i1:$pred))), 3462 (SUB_ZPmZ_S PPR:$pred, ZPR:$op, (DUP_ZI_S 255, 0))>; 3463 def : Pat<(nxv2i64 (sub ZPR:$op, (sext nxv2i1:$pred))), 3464 (SUB_ZPmZ_D PPR:$pred, ZPR:$op, (DUP_ZI_D 255, 0))>; 3465} // End HasSVE_or_SME 3466 3467let Predicates = [HasSVE, HasMatMulInt8] in { 3468 defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>; 3469 defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>; 3470 defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>; 3471} // End HasSVE, HasMatMulInt8 3472 3473let Predicates = [HasSVE_or_SME, HasMatMulInt8] in { 3474 defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", AArch64usdot>; 3475 defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>; 3476 defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>; 3477} // End HasSVE_or_SME, HasMatMulInt8 3478 3479let Predicates = [HasSVE, HasMatMulFP32] in { 3480 defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>; 3481} // End HasSVE, HasMatMulFP32 3482 3483let Predicates = [HasSVE_F16F32MM] in { 3484 def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>; 3485} // End HasSVE_F16F32MM 3486 3487let Predicates = [HasSVE, HasMatMulFP64] in { 3488 defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>; 3489 defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>; 3490 defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>; 3491 defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>; 3492 defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro_z>; 3493 defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro_z, am_sve_regreg_lsl0>; 3494 defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro_z, am_sve_regreg_lsl1>; 3495 defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro_z, am_sve_regreg_lsl2>; 3496 defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>; 3497} // End HasSVE, HasMatMulFP64 3498 3499let Predicates = [HasSVE_or_SME, HasMatMulFP64] in { 3500 defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>; 3501 defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>; 3502 defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>; 3503 defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>; 3504 defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>; 3505 defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>; 3506} // End HasSVE_or_SME, HasMatMulFP64 3507 3508let Predicates = [HasSVE2_or_SME] in { 3509 // SVE2 integer multiply-add (indexed) 3510 defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>; 3511 defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>; 3512 3513 // SVE2 saturating multiply-add high (indexed) 3514 defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah_lane>; 3515 defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh_lane>; 3516 3517 // SVE2 saturating multiply-add high (vectors, unpredicated) 3518 defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah>; 3519 defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh>; 3520 3521 // SVE2 integer multiply (indexed) 3522 defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul", int_aarch64_sve_mul_lane>; 3523 3524 // SVE2 saturating multiply high (indexed) 3525 defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh", int_aarch64_sve_sqdmulh_lane>; 3526 defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh", int_aarch64_sve_sqrdmulh_lane>; 3527 3528 // SVE2 signed saturating doubling multiply high (unpredicated) 3529 defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh", int_aarch64_sve_sqdmulh>; 3530 defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>; 3531 3532 // SVE2 integer multiply vectors (unpredicated) 3533 defm MUL_ZZZ : sve2_int_mul<0b000, "mul", AArch64mul>; 3534 defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", AArch64smulh>; 3535 defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", AArch64umulh>; 3536 defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>; 3537 3538 // SVE2 complex integer dot product (indexed) 3539 defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot", int_aarch64_sve_cdot_lane>; 3540 3541 // SVE2 complex integer dot product 3542 defm CDOT_ZZZ : sve2_cintx_dot<"cdot", int_aarch64_sve_cdot>; 3543 3544 // SVE2 complex integer multiply-add (indexed) 3545 defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla", int_aarch64_sve_cmla_lane_x>; 3546 // SVE2 complex saturating multiply-add (indexed) 3547 defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_lane_x>; 3548 3549 // SVE2 complex integer multiply-add 3550 defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla", int_aarch64_sve_cmla_x>; 3551 defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_x>; 3552 3553 // SVE2 integer multiply long (indexed) 3554 defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb", int_aarch64_sve_smullb_lane>; 3555 defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt", int_aarch64_sve_smullt_lane>; 3556 defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb", int_aarch64_sve_umullb_lane>; 3557 defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt", int_aarch64_sve_umullt_lane>; 3558 3559 // SVE2 saturating multiply (indexed) 3560 defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb", int_aarch64_sve_sqdmullb_lane>; 3561 defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt", int_aarch64_sve_sqdmullt_lane>; 3562 3563 // SVE2 integer multiply-add long (indexed) 3564 defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb", int_aarch64_sve_smlalb_lane>; 3565 defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt", int_aarch64_sve_smlalt_lane>; 3566 defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb", int_aarch64_sve_umlalb_lane>; 3567 defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt", int_aarch64_sve_umlalt_lane>; 3568 defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb", int_aarch64_sve_smlslb_lane>; 3569 defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt", int_aarch64_sve_smlslt_lane>; 3570 defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb", int_aarch64_sve_umlslb_lane>; 3571 defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt", int_aarch64_sve_umlslt_lane>; 3572 3573 // SVE2 integer multiply-add long (vectors, unpredicated) 3574 defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb", int_aarch64_sve_smlalb>; 3575 defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt", int_aarch64_sve_smlalt>; 3576 defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb", int_aarch64_sve_umlalb>; 3577 defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt", int_aarch64_sve_umlalt>; 3578 defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb", int_aarch64_sve_smlslb>; 3579 defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt", int_aarch64_sve_smlslt>; 3580 defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb", int_aarch64_sve_umlslb>; 3581 defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt", int_aarch64_sve_umlslt>; 3582 3583 // SVE2 saturating multiply-add long (indexed) 3584 defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb", int_aarch64_sve_sqdmlalb_lane>; 3585 defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt", int_aarch64_sve_sqdmlalt_lane>; 3586 defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb", int_aarch64_sve_sqdmlslb_lane>; 3587 defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt", int_aarch64_sve_sqdmlslt_lane>; 3588 3589 // SVE2 saturating multiply-add long (vectors, unpredicated) 3590 defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb", int_aarch64_sve_sqdmlalb>; 3591 defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt", int_aarch64_sve_sqdmlalt>; 3592 defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb", int_aarch64_sve_sqdmlslb>; 3593 defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt", int_aarch64_sve_sqdmlslt>; 3594 3595 // SVE2 saturating multiply-add interleaved long 3596 defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt", int_aarch64_sve_sqdmlalbt>; 3597 defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt", int_aarch64_sve_sqdmlslbt>; 3598 3599 // SVE2 integer halving add/subtract (predicated) 3600 defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", AArch64shadd>; 3601 defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", AArch64uhadd>; 3602 defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub>; 3603 defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub>; 3604 defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", AArch64srhadd>; 3605 defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", AArch64urhadd>; 3606 defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr>; 3607 defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr>; 3608 3609 // SVE2 integer pairwise add and accumulate long 3610 defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>; 3611 defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp", int_aarch64_sve_uadalp>; 3612 3613 // SVE2 integer pairwise arithmetic 3614 defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp", int_aarch64_sve_addp>; 3615 defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp", int_aarch64_sve_smaxp>; 3616 defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp", int_aarch64_sve_umaxp>; 3617 defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp", int_aarch64_sve_sminp>; 3618 defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>; 3619 3620 // SVE2 integer unary operations (predicated) 3621 defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b00, "urecpe", int_aarch64_sve_urecpe>; 3622 defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b01, "ursqrte", int_aarch64_sve_ursqrte>; 3623 defm SQABS_ZPmZ : sve2_int_un_pred_arit< 0b10, "sqabs", int_aarch64_sve_sqabs>; 3624 defm SQNEG_ZPmZ : sve2_int_un_pred_arit< 0b11, "sqneg", int_aarch64_sve_sqneg>; 3625 3626 // SVE2 saturating add/subtract 3627 defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", int_aarch64_sve_sqadd>; 3628 defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", int_aarch64_sve_uqadd>; 3629 defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", int_aarch64_sve_sqsub>; 3630 defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", int_aarch64_sve_uqsub>; 3631 defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", int_aarch64_sve_suqadd>; 3632 defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", int_aarch64_sve_usqadd>; 3633 defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", int_aarch64_sve_sqsubr>; 3634 defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>; 3635 3636 // SVE2 saturating/rounding bitwise shift left (predicated) 3637 defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl, "SRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SRSHLR_ZPmZ">; 3638 defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl, "URSHL_ZPZZ", DestructiveBinaryCommWithRev, "URSHLR_ZPmZ">; 3639 defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag, "SRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SRSHL_ZPmZ", /*isReverseInstr*/ 1>; 3640 defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag, "URSHLR_ZPZZ", DestructiveBinaryCommWithRev, "URSHL_ZPmZ", /*isReverseInstr*/ 1>; 3641 defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl, "SQSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQSHLR_ZPmZ">; 3642 defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl, "UQSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQSHLR_ZPmZ">; 3643 defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl, "SQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHLR_ZPmZ">; 3644 defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl, "UQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHLR_ZPmZ">; 3645 defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag, "SQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQSHL_ZPmZ", /*isReverseInstr*/ 1>; 3646 defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag, "UQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQSHL_ZPmZ", /*isReverseInstr*/ 1>; 3647 defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag, "SQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHL_ZPmZ", /*isReverseInstr*/ 1>; 3648 defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag, "UQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHL_ZPmZ", /*isReverseInstr*/ 1>; 3649 3650 defm SRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_srshl>; 3651 defm URSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_urshl>; 3652 defm SQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqshl>; 3653 defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>; 3654 defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>; 3655 defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>; 3656} // End HasSVE2_or_SME 3657 3658let Predicates = [HasSVE2_or_SME, UseExperimentalZeroingPseudos] in { 3659 defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>; 3660 defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>; 3661 defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>; 3662 defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>; 3663 defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>; 3664} // End HasSVE2_or_SME, UseExperimentalZeroingPseudos 3665 3666let Predicates = [HasSVE2_or_SME] in { 3667 // SVE2 predicated shifts 3668 defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>; 3669 defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>; 3670 defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; 3671 defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1101, "urshr", "URSHR_ZPZI", AArch64urshri_p>; 3672 defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; 3673 3674 // SVE2 integer add/subtract long 3675 defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; 3676 defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>; 3677 defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb", int_aarch64_sve_uaddlb>; 3678 defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt", int_aarch64_sve_uaddlt>; 3679 defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb", int_aarch64_sve_ssublb>; 3680 defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt", int_aarch64_sve_ssublt>; 3681 defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb", int_aarch64_sve_usublb>; 3682 defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt", int_aarch64_sve_usublt>; 3683 defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb", int_aarch64_sve_sabdlb>; 3684 defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt", int_aarch64_sve_sabdlt>; 3685 defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb", int_aarch64_sve_uabdlb>; 3686 defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>; 3687 3688 // SVE2 integer add/subtract wide 3689 defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", AArch64saddwb>; 3690 defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", AArch64saddwt>; 3691 defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", AArch64uaddwb>; 3692 defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", AArch64uaddwt>; 3693 defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb", int_aarch64_sve_ssubwb>; 3694 defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt", int_aarch64_sve_ssubwt>; 3695 defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb", int_aarch64_sve_usubwb>; 3696 defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt", int_aarch64_sve_usubwt>; 3697 3698 // SVE2 integer multiply long 3699 defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb", int_aarch64_sve_sqdmullb>; 3700 defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt", int_aarch64_sve_sqdmullt>; 3701 defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb", int_aarch64_sve_smullb>; 3702 defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt", int_aarch64_sve_smullt>; 3703 defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb", int_aarch64_sve_umullb>; 3704 defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt", int_aarch64_sve_umullt>; 3705 defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb", int_aarch64_sve_pmullb_pair>; 3706 defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt", int_aarch64_sve_pmullt_pair>; 3707 3708 // SVE2 bitwise shift and insert 3709 defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", AArch64vsri>; 3710 defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", AArch64vsli>; 3711 3712 // SVE2 bitwise shift right and accumulate 3713 defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", AArch64ssra>; 3714 defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", AArch64usra>; 3715 defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra, int_aarch64_sve_srshr>; 3716 defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra, AArch64urshri_p>; 3717 3718 // SVE2 complex integer add 3719 defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>; 3720 defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>; 3721 3722 // SVE2 integer absolute difference and accumulate 3723 defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", AArch64saba>; 3724 defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", AArch64uaba>; 3725 3726 // SVE2 integer absolute difference and accumulate long 3727 defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb", int_aarch64_sve_sabalb>; 3728 defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt", int_aarch64_sve_sabalt>; 3729 defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb", int_aarch64_sve_uabalb>; 3730 defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt", int_aarch64_sve_uabalt>; 3731 3732 // SVE2 integer add/subtract long with carry 3733 defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb", int_aarch64_sve_adclb>; 3734 defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt", int_aarch64_sve_adclt>; 3735 defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb", int_aarch64_sve_sbclb>; 3736 defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt", int_aarch64_sve_sbclt>; 3737 3738 // SVE2 bitwise shift right narrow (bottom) 3739 defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>; 3740 defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>; 3741 defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>; 3742 defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", AArch64rshrnb_pf>; 3743 defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>; 3744 defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>; 3745 defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>; 3746 defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb", int_aarch64_sve_uqrshrnb>; 3747 3748 // SVE2 bitwise shift right narrow (top) 3749 defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt", int_aarch64_sve_sqshrunt>; 3750 defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt", int_aarch64_sve_sqrshrunt>; 3751 defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt", int_aarch64_sve_shrnt>; 3752 defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt", int_aarch64_sve_rshrnt>; 3753 defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt", int_aarch64_sve_sqshrnt>; 3754 defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt", int_aarch64_sve_sqrshrnt>; 3755 defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt", int_aarch64_sve_uqshrnt>; 3756 defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt", int_aarch64_sve_uqrshrnt>; 3757 3758 // SVE2 integer add/subtract narrow high part (bottom) 3759 defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>; 3760 defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb", int_aarch64_sve_raddhnb>; 3761 defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb", int_aarch64_sve_subhnb>; 3762 defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb", int_aarch64_sve_rsubhnb>; 3763 3764 // SVE2 integer add/subtract narrow high part (top) 3765 defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt", int_aarch64_sve_addhnt>; 3766 defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt", int_aarch64_sve_raddhnt>; 3767 defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt", int_aarch64_sve_subhnt>; 3768 defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt", int_aarch64_sve_rsubhnt>; 3769 3770 // SVE2 saturating extract narrow (bottom) 3771 defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb", int_aarch64_sve_sqxtnb>; 3772 defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb", int_aarch64_sve_uqxtnb>; 3773 defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb", int_aarch64_sve_sqxtunb>; 3774 3775 // SVE2 saturating extract narrow (top) 3776 defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>; 3777 defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>; 3778 defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>; 3779} // End HasSVE2_or_SME 3780 3781let Predicates = [HasSVE2] in { 3782 // SVE2 character match 3783 defm MATCH_PPzZZ : sve2_char_match<0b0, "match", int_aarch64_sve_match>; 3784 defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>; 3785} // End HasSVE2 3786 3787let Predicates = [HasSVE2_or_SME] in { 3788 // SVE2 bitwise exclusive-or interleaved 3789 defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>; 3790 defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>; 3791 3792 // SVE2 bitwise shift left long 3793 defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb", int_aarch64_sve_sshllb>; 3794 defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt", int_aarch64_sve_sshllt>; 3795 defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb", int_aarch64_sve_ushllb>; 3796 defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt", int_aarch64_sve_ushllt>; 3797 3798 // SVE2 integer add/subtract interleaved long 3799 defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>; 3800 defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>; 3801 defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>; 3802} // End HasSVE2_or_SME 3803 3804let Predicates = [HasSVE2] in { 3805 // SVE2 histogram generation (segment) 3806 def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg", int_aarch64_sve_histseg>; 3807 3808 // SVE2 histogram generation (vector) 3809 defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>; 3810} // End HasSVE2 3811 3812let Predicates = [HasSVE2_or_SME] in { 3813 // SVE2 floating-point base 2 logarithm as integer 3814 defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", "FLOGB_ZPZZ", int_aarch64_sve_flogb>; 3815} 3816 3817let Predicates = [HasSVE2_or_SME, UseExperimentalZeroingPseudos] in { 3818 defm FLOGB_ZPZZ : sve2_fp_un_pred_zeroing_hsd<int_aarch64_sve_flogb>; 3819} // End HasSVE2_or_SME, UseExperimentalZeroingPseudos 3820 3821let Predicates = [HasSVE2_or_SME] in { 3822 // SVE2 floating-point convert precision 3823 defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">; 3824 defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx", AArch64fcvtx_mt>; 3825 defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">; 3826 defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">; 3827 3828 // SVE2 floating-point pairwise operations 3829 defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp", int_aarch64_sve_faddp>; 3830 defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp", int_aarch64_sve_fmaxnmp>; 3831 defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp", int_aarch64_sve_fminnmp>; 3832 defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp", int_aarch64_sve_fmaxp>; 3833 defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp", int_aarch64_sve_fminp>; 3834 3835 // SVE2 floating-point multiply-add long (indexed) 3836 defm FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b000, "fmlalb", nxv4f32, nxv8f16, int_aarch64_sve_fmlalb_lane>; 3837 defm FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b001, "fmlalt", nxv4f32, nxv8f16, int_aarch64_sve_fmlalt_lane>; 3838 defm FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b010, "fmlslb", nxv4f32, nxv8f16, int_aarch64_sve_fmlslb_lane>; 3839 defm FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b011, "fmlslt", nxv4f32, nxv8f16, int_aarch64_sve_fmlslt_lane>; 3840 3841 // SVE2 floating-point multiply-add long 3842 defm FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b000, "fmlalb", nxv4f32, nxv8f16, int_aarch64_sve_fmlalb>; 3843 defm FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b001, "fmlalt", nxv4f32, nxv8f16, int_aarch64_sve_fmlalt>; 3844 defm FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b010, "fmlslb", nxv4f32, nxv8f16, int_aarch64_sve_fmlslb>; 3845 defm FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b011, "fmlslt", nxv4f32, nxv8f16, int_aarch64_sve_fmlslt>; 3846 3847 // SVE2 bitwise ternary operations 3848 defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", AArch64eor3>; 3849 defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", AArch64bcax>; 3850 defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", AArch64bsl>; 3851 defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>; 3852 defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>; 3853 defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", AArch64nbsl>; 3854 3855 // SVE2 bitwise xor and rotate right by immediate 3856 defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>; 3857 3858 // SVE2 extract vector (immediate offset, constructive) 3859 def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; 3860 let AddedComplexity = 2 in { 3861 def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32 imm0_255:$imm))), 3862 (EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1), imm0_255:$imm)>; 3863 } 3864} // End HasSVE2_or_SME 3865 3866let Predicates = [HasSVE2] in { 3867 // SVE2 non-temporal gather loads 3868 defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv4i8>; 3869 defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather_z, nxv4i8>; 3870 defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv4i16>; 3871 defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather_z, nxv4i16>; 3872 defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather_z, nxv4i32>; 3873 3874 defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv2i8>; 3875 defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather_z, nxv2i8>; 3876 defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv2i16>; 3877 defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather_z, nxv2i16>; 3878 defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather_z, nxv2i32>; 3879 defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather_z, nxv2i32>; 3880 defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>; 3881} // End HasSVE2 3882 3883let Predicates = [HasSVE2_or_SME] in { 3884 // SVE2 vector splice (constructive) 3885 defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice", AArch64splice>; 3886} // End HasSVE2_or_SME 3887 3888let Predicates = [HasSVE2] in { 3889 // SVE2 non-temporal scatter stores 3890 defm STNT1B_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>; 3891 defm STNT1H_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>; 3892 defm STNT1W_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b101, "stnt1w", AArch64stnt1_scatter, nxv4i32>; 3893 3894 defm STNT1B_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b000, "stnt1b", AArch64stnt1_scatter, nxv2i8>; 3895 defm STNT1H_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>; 3896 defm STNT1W_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>; 3897 defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>; 3898} // End HasSVE2 3899 3900let Predicates = [HasSVE2_or_SME] in { 3901 // SVE2 table lookup (three sources) 3902 defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>; 3903 defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", 0b01, int_aarch64_sve_tbx>; 3904 3905 // SVE2 integer compare scalar count and limit 3906 defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege, null_frag>; 3907 defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>; 3908 defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs, null_frag>; 3909 defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>; 3910 3911 defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege, null_frag>; 3912 defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>; 3913 defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs, null_frag>; 3914 defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>; 3915 3916 // SVE2 pointer conflict compare 3917 defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">; 3918 defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">; 3919} // End HasSVE2_or_SME 3920 3921let Predicates = [HasSVEAES, HasNonStreamingSVE2_or_SSVE_AES] in { 3922 // SVE2 crypto destructive binary operations 3923 defm AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8, int_aarch64_sve_aese, nxv16i8>; 3924 defm AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8, int_aarch64_sve_aesd, nxv16i8>; 3925 3926 // SVE2 crypto unary operations 3927 defm AESMC_ZZ_B : sve2_crypto_unary_op<0b0, "aesmc", int_aarch64_sve_aesmc>; 3928 defm AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc", int_aarch64_sve_aesimc>; 3929 3930 // PMULLB and PMULLT instructions which operate with 64-bit source and 3931 // 128-bit destination elements are enabled with crypto extensions, similar 3932 // to NEON PMULL2 instruction. 3933 defm PMULLB_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11010, "pmullb", int_aarch64_sve_pmullb_pair>; 3934 defm PMULLT_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11011, "pmullt", int_aarch64_sve_pmullt_pair>; 3935} 3936 3937let Predicates = [HasSVE2SM4] in { 3938 // SVE2 crypto constructive binary operations 3939 defm SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32, int_aarch64_sve_sm4ekey, nxv4i32>; 3940 // SVE2 crypto destructive binary operations 3941 defm SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32, int_aarch64_sve_sm4e, nxv4i32>; 3942} // End HasSVE2SM4 3943 3944let Predicates = [HasSVE2SHA3] in { 3945 // SVE2 crypto constructive binary operations 3946 defm RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64, int_aarch64_sve_rax1, nxv2i64>; 3947} // End HasSVE2SHA3 3948 3949let Predicates = [HasSVEBitPerm, HasNonStreamingSVE2_or_SSVE_BitPerm] in { 3950 // SVE2 bitwise permute 3951 defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext", int_aarch64_sve_bext_x>; 3952 defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep", int_aarch64_sve_bdep_x>; 3953 defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp", int_aarch64_sve_bgrp_x>; 3954} 3955 3956let Predicates = [HasSVEAES2, HasNonStreamingSVE2p1_or_SSVE_AES] in { 3957 // SVE_AES2 multi-vector instructions (x2) 3958 def AESE_2ZZI_B : sve_crypto_binary_multi2<0b000, "aese">; 3959 def AESD_2ZZI_B : sve_crypto_binary_multi2<0b010, "aesd">; 3960 def AESEMC_2ZZI_B : sve_crypto_binary_multi2<0b100, "aesemc">; 3961 def AESDMIC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">; 3962 // SVE_AES2 multi-vector instructions (x4) 3963 def AESE_4ZZI_B : sve_crypto_binary_multi4<0b0000, "aese">; 3964 def AESD_4ZZI_B : sve_crypto_binary_multi4<0b0100, "aesd">; 3965 def AESEMC_4ZZI_B : sve_crypto_binary_multi4<0b1000, "aesemc">; 3966 def AESDMIC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">; 3967 3968 // SVE_AES2 multi-vector polynomial multiply 3969 def PMLAL_2ZZZ_Q : sve_crypto_pmlal_multi<"pmlal">; 3970 def PMULL_2ZZZ_Q : sve_crypto_pmull_multi<"pmull">; 3971} 3972 3973//===----------------------------------------------------------------------===// 3974// SME or SVE2.1 instructions 3975//===----------------------------------------------------------------------===// 3976 3977let Predicates = [HasSVE2p1_or_SME] in { 3978defm REVD_ZPmZ : sve2_int_perm_revd<"revd", AArch64revd_mt>; 3979 3980defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, AArch64sclamp>; 3981defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, AArch64uclamp>; 3982 3983defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>; 3984} // End HasSVE2p1_or_SME 3985 3986//===----------------------------------------------------------------------===// 3987// SME2 or SVE2.1 instructions 3988//===----------------------------------------------------------------------===// 3989 3990let Predicates = [HasSVE2p1_or_SME2] in { 3991defm FCLAMP_ZZZ : sve_fp_clamp<"fclamp", AArch64fclamp>; 3992 3993defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; 3994defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; 3995 3996defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>; 3997defm BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt>; 3998defm BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb_lane>; 3999defm BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt_lane>; 4000 4001defm SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>; 4002defm UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>; 4003defm SDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"sdot", 0b0, int_aarch64_sve_sdot_lane_x2>; 4004defm UDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>; 4005 4006defm CNTP_XCI : sve2p1_pcount_pn<"cntp", 0b000>; 4007defm PEXT_PCI : sve2p1_pred_as_ctr_to_mask<"pext", int_aarch64_sve_pext>; 4008defm PEXT_2PCI : sve2p1_pred_as_ctr_to_mask_pair<"pext">; 4009defm PTRUE_C : sve2p1_ptrue_pn<"ptrue">; 4010 4011defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>; 4012defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>; 4013defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>; 4014defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>; 4015defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>; 4016defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>; 4017 4018// Load to two registers 4019defm LD1B_2Z : sve2p1_mem_cld_ss_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8, ZZ_b_strided_and_contiguous>; 4020defm LD1H_2Z : sve2p1_mem_cld_ss_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r, GPR64shifted16, ZZ_h_strided_and_contiguous>; 4021defm LD1W_2Z : sve2p1_mem_cld_ss_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r, GPR64shifted32, ZZ_s_strided_and_contiguous>; 4022defm LD1D_2Z : sve2p1_mem_cld_ss_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r, GPR64shifted64, ZZ_d_strided_and_contiguous>; 4023defm LD1B_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, ZZ_b_strided_and_contiguous>; 4024defm LD1H_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r, ZZ_h_strided_and_contiguous>; 4025defm LD1W_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r, ZZ_s_strided_and_contiguous>; 4026defm LD1D_2Z_IMM : sve2p1_mem_cld_si_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r, ZZ_d_strided_and_contiguous>; 4027defm LDNT1B_2Z : sve2p1_mem_cld_ss_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r, GPR64shifted8, ZZ_b_strided_and_contiguous>; 4028defm LDNT1H_2Z : sve2p1_mem_cld_ss_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r, GPR64shifted16, ZZ_h_strided_and_contiguous>; 4029defm LDNT1W_2Z : sve2p1_mem_cld_ss_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r, GPR64shifted32, ZZ_s_strided_and_contiguous>; 4030defm LDNT1D_2Z : sve2p1_mem_cld_ss_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r, GPR64shifted64, ZZ_d_strided_and_contiguous>; 4031defm LDNT1B_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r, ZZ_b_strided_and_contiguous>; 4032defm LDNT1H_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r, ZZ_h_strided_and_contiguous>; 4033defm LDNT1W_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r, ZZ_s_strided_and_contiguous>; 4034defm LDNT1D_2Z_IMM : sve2p1_mem_cld_si_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r, ZZ_d_strided_and_contiguous>; 4035 4036// Load to four registers 4037defm LD1B_4Z : sve2p1_mem_cld_ss_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r, GPR64shifted8, ZZZZ_b_strided_and_contiguous>; 4038defm LD1H_4Z : sve2p1_mem_cld_ss_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r, GPR64shifted16, ZZZZ_h_strided_and_contiguous>; 4039defm LD1W_4Z : sve2p1_mem_cld_ss_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r, GPR64shifted32, ZZZZ_s_strided_and_contiguous>; 4040defm LD1D_4Z : sve2p1_mem_cld_ss_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r, GPR64shifted64, ZZZZ_d_strided_and_contiguous>; 4041defm LD1B_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r, ZZZZ_b_strided_and_contiguous>; 4042defm LD1H_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r, ZZZZ_h_strided_and_contiguous>; 4043defm LD1W_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r, ZZZZ_s_strided_and_contiguous>; 4044defm LD1D_4Z_IMM : sve2p1_mem_cld_si_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r, ZZZZ_d_strided_and_contiguous>; 4045defm LDNT1B_4Z : sve2p1_mem_cld_ss_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r, GPR64shifted8, ZZZZ_b_strided_and_contiguous>; 4046defm LDNT1H_4Z : sve2p1_mem_cld_ss_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r, GPR64shifted16, ZZZZ_h_strided_and_contiguous>; 4047defm LDNT1W_4Z : sve2p1_mem_cld_ss_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r, GPR64shifted32, ZZZZ_s_strided_and_contiguous>; 4048defm LDNT1D_4Z : sve2p1_mem_cld_ss_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r, GPR64shifted64, ZZZZ_d_strided_and_contiguous>; 4049defm LDNT1B_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r, ZZZZ_b_strided_and_contiguous>; 4050defm LDNT1H_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r, ZZZZ_h_strided_and_contiguous>; 4051defm LDNT1W_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r, ZZZZ_s_strided_and_contiguous>; 4052defm LDNT1D_4Z_IMM : sve2p1_mem_cld_si_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r, ZZZZ_d_strided_and_contiguous>; 4053 4054// Stores of two registers 4055def ST1B_2Z : sve2p1_mem_cst_ss_2z<"st1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>; 4056def ST1H_2Z : sve2p1_mem_cst_ss_2z<"st1h", 0b01, 0b0, ZZ_h_mul_r, GPR64shifted16>; 4057def ST1W_2Z : sve2p1_mem_cst_ss_2z<"st1w", 0b10, 0b0, ZZ_s_mul_r, GPR64shifted32>; 4058def ST1D_2Z : sve2p1_mem_cst_ss_2z<"st1d", 0b11, 0b0, ZZ_d_mul_r, GPR64shifted64>; 4059defm ST1B_2Z_IMM : sve2p1_mem_cst_si_2z<"st1b", 0b00, 0b0, ZZ_b_mul_r>; 4060defm ST1H_2Z_IMM : sve2p1_mem_cst_si_2z<"st1h", 0b01, 0b0, ZZ_h_mul_r>; 4061defm ST1W_2Z_IMM : sve2p1_mem_cst_si_2z<"st1w", 0b10, 0b0, ZZ_s_mul_r>; 4062defm ST1D_2Z_IMM : sve2p1_mem_cst_si_2z<"st1d", 0b11, 0b0, ZZ_d_mul_r>; 4063def STNT1B_2Z : sve2p1_mem_cst_ss_2z<"stnt1b", 0b00, 0b1, ZZ_b_mul_r, GPR64shifted8>; 4064def STNT1H_2Z : sve2p1_mem_cst_ss_2z<"stnt1h", 0b01, 0b1, ZZ_h_mul_r, GPR64shifted16>; 4065def STNT1W_2Z : sve2p1_mem_cst_ss_2z<"stnt1w", 0b10, 0b1, ZZ_s_mul_r, GPR64shifted32>; 4066def STNT1D_2Z : sve2p1_mem_cst_ss_2z<"stnt1d", 0b11, 0b1, ZZ_d_mul_r, GPR64shifted64>; 4067defm STNT1B_2Z_IMM : sve2p1_mem_cst_si_2z<"stnt1b", 0b00, 0b1, ZZ_b_mul_r>; 4068defm STNT1H_2Z_IMM : sve2p1_mem_cst_si_2z<"stnt1h", 0b01, 0b1, ZZ_h_mul_r>; 4069defm STNT1W_2Z_IMM : sve2p1_mem_cst_si_2z<"stnt1w", 0b10, 0b1, ZZ_s_mul_r>; 4070defm STNT1D_2Z_IMM : sve2p1_mem_cst_si_2z<"stnt1d", 0b11, 0b1, ZZ_d_mul_r>; 4071 4072// Stores of four registers 4073def ST1B_4Z : sve2p1_mem_cst_ss_4z<"st1b", 0b00, 0b0, ZZZZ_b_mul_r, GPR64shifted8>; 4074def ST1H_4Z : sve2p1_mem_cst_ss_4z<"st1h", 0b01, 0b0, ZZZZ_h_mul_r, GPR64shifted16>; 4075def ST1W_4Z : sve2p1_mem_cst_ss_4z<"st1w", 0b10, 0b0, ZZZZ_s_mul_r, GPR64shifted32>; 4076def ST1D_4Z : sve2p1_mem_cst_ss_4z<"st1d", 0b11, 0b0, ZZZZ_d_mul_r, GPR64shifted64>; 4077defm ST1B_4Z_IMM : sve2p1_mem_cst_si_4z<"st1b", 0b00, 0b0, ZZZZ_b_mul_r>; 4078defm ST1H_4Z_IMM : sve2p1_mem_cst_si_4z<"st1h", 0b01, 0b0, ZZZZ_h_mul_r>; 4079defm ST1W_4Z_IMM : sve2p1_mem_cst_si_4z<"st1w", 0b10, 0b0, ZZZZ_s_mul_r>; 4080defm ST1D_4Z_IMM : sve2p1_mem_cst_si_4z<"st1d", 0b11, 0b0, ZZZZ_d_mul_r>; 4081def STNT1B_4Z : sve2p1_mem_cst_ss_4z<"stnt1b", 0b00, 0b1, ZZZZ_b_mul_r, GPR64shifted8>; 4082def STNT1H_4Z : sve2p1_mem_cst_ss_4z<"stnt1h", 0b01, 0b1, ZZZZ_h_mul_r, GPR64shifted16>; 4083def STNT1W_4Z : sve2p1_mem_cst_ss_4z<"stnt1w", 0b10, 0b1, ZZZZ_s_mul_r, GPR64shifted32>; 4084def STNT1D_4Z : sve2p1_mem_cst_ss_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r, GPR64shifted64>; 4085defm STNT1B_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1b", 0b00, 0b1, ZZZZ_b_mul_r>; 4086defm STNT1H_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1h", 0b01, 0b1, ZZZZ_h_mul_r>; 4087defm STNT1W_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1w", 0b10, 0b1, ZZZZ_s_mul_r>; 4088defm STNT1D_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r>; 4089 4090multiclass store_pn_x2<ValueType Ty, SDPatternOperator Store, 4091 Instruction RegImmInst> { 4092 def : Pat<(Store Ty:$vec0, Ty:$vec1, aarch64svcount:$PNg, GPR64:$base), 4093 (RegImmInst (REG_SEQUENCE ZPR2Mul2, Ty:$vec0, zsub0, Ty:$vec1, zsub1), 4094 PNR:$PNg, GPR64:$base, (i64 0))>; 4095} 4096 4097// Stores of 2 consecutive vectors 4098defm : store_pn_x2<nxv16i8, int_aarch64_sve_st1_pn_x2, ST1B_2Z_IMM>; 4099defm : store_pn_x2<nxv8i16, int_aarch64_sve_st1_pn_x2, ST1H_2Z_IMM>; 4100defm : store_pn_x2<nxv4i32, int_aarch64_sve_st1_pn_x2, ST1W_2Z_IMM>; 4101defm : store_pn_x2<nxv2i64, int_aarch64_sve_st1_pn_x2, ST1D_2Z_IMM>; 4102defm : store_pn_x2<nxv16i8, int_aarch64_sve_stnt1_pn_x2, STNT1B_2Z_IMM>; 4103defm : store_pn_x2<nxv8i16, int_aarch64_sve_stnt1_pn_x2, STNT1H_2Z_IMM>; 4104defm : store_pn_x2<nxv4i32, int_aarch64_sve_stnt1_pn_x2, STNT1W_2Z_IMM>; 4105defm : store_pn_x2<nxv2i64, int_aarch64_sve_stnt1_pn_x2, STNT1D_2Z_IMM>; 4106defm : store_pn_x2<nxv8f16, int_aarch64_sve_st1_pn_x2, ST1H_2Z_IMM>; 4107defm : store_pn_x2<nxv8bf16, int_aarch64_sve_st1_pn_x2, ST1H_2Z_IMM>; 4108defm : store_pn_x2<nxv4f32, int_aarch64_sve_st1_pn_x2, ST1W_2Z_IMM>; 4109defm : store_pn_x2<nxv2f64, int_aarch64_sve_st1_pn_x2, ST1D_2Z_IMM>; 4110defm : store_pn_x2<nxv8f16, int_aarch64_sve_stnt1_pn_x2, STNT1H_2Z_IMM>; 4111defm : store_pn_x2<nxv8bf16, int_aarch64_sve_stnt1_pn_x2, STNT1H_2Z_IMM>; 4112defm : store_pn_x2<nxv4f32, int_aarch64_sve_stnt1_pn_x2, STNT1W_2Z_IMM>; 4113defm : store_pn_x2<nxv2f64, int_aarch64_sve_stnt1_pn_x2, STNT1D_2Z_IMM>; 4114 4115multiclass store_pn_x4<ValueType Ty, SDPatternOperator Store, 4116 Instruction RegImmInst> { 4117 def : Pat<(Store Ty:$vec0, Ty:$vec1, Ty:$vec2, Ty:$vec3, aarch64svcount:$PNg, GPR64:$base), 4118 (RegImmInst (REG_SEQUENCE ZPR4Mul4, Ty:$vec0, zsub0, Ty:$vec1, zsub1, 4119 Ty:$vec2, zsub2, Ty:$vec3, zsub3), 4120 PNR:$PNg, GPR64:$base, (i64 0))>; 4121} 4122 4123// Stores of 4 consecutive vectors 4124defm : store_pn_x4<nxv16i8, int_aarch64_sve_st1_pn_x4, ST1B_4Z_IMM>; 4125defm : store_pn_x4<nxv8i16, int_aarch64_sve_st1_pn_x4, ST1H_4Z_IMM>; 4126defm : store_pn_x4<nxv4i32, int_aarch64_sve_st1_pn_x4, ST1W_4Z_IMM>; 4127defm : store_pn_x4<nxv2i64, int_aarch64_sve_st1_pn_x4, ST1D_4Z_IMM>; 4128defm : store_pn_x4<nxv16i8, int_aarch64_sve_stnt1_pn_x4, STNT1B_4Z_IMM>; 4129defm : store_pn_x4<nxv8i16, int_aarch64_sve_stnt1_pn_x4, STNT1H_4Z_IMM>; 4130defm : store_pn_x4<nxv4i32, int_aarch64_sve_stnt1_pn_x4, STNT1W_4Z_IMM>; 4131defm : store_pn_x4<nxv2i64, int_aarch64_sve_stnt1_pn_x4, STNT1D_4Z_IMM>; 4132defm : store_pn_x4<nxv8f16, int_aarch64_sve_st1_pn_x4, ST1H_4Z_IMM>; 4133defm : store_pn_x4<nxv8bf16, int_aarch64_sve_st1_pn_x4, ST1H_4Z_IMM>; 4134defm : store_pn_x4<nxv4f32, int_aarch64_sve_st1_pn_x4, ST1W_4Z_IMM>; 4135defm : store_pn_x4<nxv2f64, int_aarch64_sve_st1_pn_x4, ST1D_4Z_IMM>; 4136defm : store_pn_x4<nxv8f16, int_aarch64_sve_stnt1_pn_x4, STNT1H_4Z_IMM>; 4137defm : store_pn_x4<nxv8bf16, int_aarch64_sve_stnt1_pn_x4, STNT1H_4Z_IMM>; 4138defm : store_pn_x4<nxv4f32, int_aarch64_sve_stnt1_pn_x4, STNT1W_4Z_IMM>; 4139defm : store_pn_x4<nxv2f64, int_aarch64_sve_stnt1_pn_x4, STNT1D_4Z_IMM>; 4140 4141defm WHILEGE_2PXX : sve2p1_int_while_rr_pair<"whilege", 0b000>; 4142defm WHILEGT_2PXX : sve2p1_int_while_rr_pair<"whilegt", 0b001>; 4143defm WHILELT_2PXX : sve2p1_int_while_rr_pair<"whilelt", 0b010>; 4144defm WHILELE_2PXX : sve2p1_int_while_rr_pair<"whilele", 0b011>; 4145defm WHILEHS_2PXX : sve2p1_int_while_rr_pair<"whilehs", 0b100>; 4146defm WHILEHI_2PXX : sve2p1_int_while_rr_pair<"whilehi", 0b101>; 4147defm WHILELO_2PXX : sve2p1_int_while_rr_pair<"whilelo", 0b110>; 4148defm WHILELS_2PXX : sve2p1_int_while_rr_pair<"whilels", 0b111>; 4149defm WHILEGE_CXX : sve2p1_int_while_rr_pn<"whilege", 0b000>; 4150defm WHILEGT_CXX : sve2p1_int_while_rr_pn<"whilegt", 0b001>; 4151defm WHILELT_CXX : sve2p1_int_while_rr_pn<"whilelt", 0b010>; 4152defm WHILELE_CXX : sve2p1_int_while_rr_pn<"whilele", 0b011>; 4153defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; 4154defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; 4155defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; 4156defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; 4157} // End HasSVE2p1_or_SME2 4158 4159let Predicates = [HasSVE_or_SME] in { 4160 4161// Aliases for existing SVE instructions for which predicate-as-counter are 4162// accepted as an operand to the instruction 4163def : InstAlias<"mov $Pd, $Pn", 4164 (ORR_PPzPP PPRorPNR8:$Pd, PPRorPNR8:$Pn, PPRorPNR8:$Pn, PPRorPNR8:$Pn), 0>; 4165 4166def : InstAlias<"pfalse\t$Pd", (PFALSE PPRorPNR8:$Pd), 0>; 4167 4168} 4169 4170//===----------------------------------------------------------------------===// 4171// Non-widening BFloat16 to BFloat16 instructions 4172//===----------------------------------------------------------------------===// 4173 4174let Predicates = [HasSVEB16B16] in { 4175defm BFADD_ZZZ : sve_fp_3op_u_zd_bfloat<0b000, "bfadd", AArch64fadd>; 4176defm BFSUB_ZZZ : sve_fp_3op_u_zd_bfloat<0b001, "bfsub", AArch64fsub>; 4177defm BFMUL_ZZZ : sve_fp_3op_u_zd_bfloat<0b010, "bfmul", AArch64fmul>; 4178 4179defm BFADD_ZPmZZ : sve_fp_2op_p_zds_bfloat<0b0000, "bfadd", "BFADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>; 4180defm BFSUB_ZPmZZ : sve_fp_2op_p_zds_bfloat<0b0001, "bfsub", "BFSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryComm>; 4181defm BFMUL_ZPmZZ : sve_fp_2op_p_zds_bfloat<0b0010, "bfmul", "BFMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>; 4182defm BFMAXNM_ZPmZZ : sve_fp_2op_p_zds_bfloat<0b0100, "bfmaxnm", "BFMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>; 4183defm BFMINNM_ZPmZZ : sve_fp_2op_p_zds_bfloat<0b0101, "bfminnm", "BFMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>; 4184defm BFMAX_ZPmZZ : sve_fp_2op_p_zds_bfloat<0b0110, "bfmax", "BFMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>; 4185defm BFMIN_ZPmZZ : sve_fp_2op_p_zds_bfloat<0b0111, "bfmin", "BFMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>; 4186 4187defm BFADD_ZPZZ : sve_fp_bin_pred_bfloat<AArch64fadd_p>; 4188defm BFSUB_ZPZZ : sve_fp_bin_pred_bfloat<AArch64fsub_p>; 4189defm BFMUL_ZPZZ : sve_fp_bin_pred_bfloat<AArch64fmul_p>; 4190defm BFMAXNM_ZPZZ : sve_fp_bin_pred_bfloat<AArch64fmaxnm_p>; 4191defm BFMINNM_ZPZZ : sve_fp_bin_pred_bfloat<AArch64fminnm_p>; 4192defm BFMAX_ZPZZ : sve_fp_bin_pred_bfloat<AArch64fmax_p>; 4193defm BFMIN_ZPZZ : sve_fp_bin_pred_bfloat<AArch64fmin_p>; 4194 4195defm BFMLA_ZPmZZ : sve_fp_3op_p_zds_a_bfloat<0b00, "bfmla", "BFMLA_ZPZZZ", AArch64fmla_m1>; 4196defm BFMLS_ZPmZZ : sve_fp_3op_p_zds_a_bfloat<0b01, "bfmls", "BFMLS_ZPZZZ", AArch64fmls_m1>; 4197 4198defm BFMLA_ZPZZZ : sve_fp_3op_pred_bfloat<AArch64fmla_p>; 4199defm BFMLS_ZPZZZ : sve_fp_3op_pred_bfloat<AArch64fmls_p>; 4200 4201defm BFMLA_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmla", 0b10, int_aarch64_sve_fmla_lane>; 4202defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, int_aarch64_sve_fmls_lane>; 4203 4204defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", int_aarch64_sve_fmul_lane>; 4205 4206defm BFCLAMP_ZZZ : sve_fp_clamp_bfloat<"bfclamp", AArch64fclamp>; 4207} // End HasSVEB16B16 4208 4209let Predicates = [HasSVEB16B16, UseExperimentalZeroingPseudos] in { 4210defm BFADD_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fadd>; 4211defm BFSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fsub>; 4212defm BFMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmul>; 4213defm BFMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmaxnm>; 4214defm BFMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fminnm>; 4215defm BFMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmin>; 4216defm BFMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmax>; 4217} // HasSVEB16B16, UseExperimentalZeroingPseudos 4218 4219let Predicates = [HasSVEBFSCALE] in { 4220 def BFSCALE_ZPZZ : sve_fp_2op_p_zds_bfscale<0b1001, "bfscale", DestructiveBinary>; 4221} // HasSVEBFSCALE 4222//===----------------------------------------------------------------------===// 4223// SME2.1 or SVE2.1 instructions 4224//===----------------------------------------------------------------------===// 4225let Predicates = [HasSVE2p1_or_SME2p1] in { 4226defm FADDQV : sve2p1_fp_reduction_q<0b000, "faddqv", int_aarch64_sve_faddqv>; 4227defm FMAXNMQV : sve2p1_fp_reduction_q<0b100, "fmaxnmqv", int_aarch64_sve_fmaxnmqv>; 4228defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv", int_aarch64_sve_fminnmqv>; 4229defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>; 4230defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>; 4231 4232defm DUPQ_ZZI : sve2p1_dupq<"dupq", int_aarch64_sve_dup_laneq>; 4233defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>; 4234 4235defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>; 4236defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>; 4237 4238defm ORQV_VPZ : sve2p1_int_reduce_q<0b1100, "orqv", int_aarch64_sve_orqv>; 4239defm EORQV_VPZ : sve2p1_int_reduce_q<0b1101, "eorqv", int_aarch64_sve_eorqv>; 4240defm ANDQV_VPZ : sve2p1_int_reduce_q<0b1110, "andqv", int_aarch64_sve_andqv>; 4241defm ADDQV_VPZ : sve2p1_int_reduce_q<0b0001, "addqv", int_aarch64_sve_addqv>; 4242defm SMAXQV_VPZ : sve2p1_int_reduce_q<0b0100, "smaxqv", int_aarch64_sve_smaxqv>; 4243defm UMAXQV_VPZ : sve2p1_int_reduce_q<0b0101, "umaxqv", int_aarch64_sve_umaxqv>; 4244defm SMINQV_VPZ : sve2p1_int_reduce_q<0b0110, "sminqv", int_aarch64_sve_sminqv>; 4245defm UMINQV_VPZ : sve2p1_int_reduce_q<0b0111, "uminqv", int_aarch64_sve_uminqv>; 4246 4247defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1", int_aarch64_sve_zipq1>; 4248defm ZIPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b001, "zipq2", int_aarch64_sve_zipq2>; 4249defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1", int_aarch64_sve_uzpq1>; 4250defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2", int_aarch64_sve_uzpq2>; 4251defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, int_aarch64_sve_tbxq>; 4252defm TBLQ_ZZZ : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>; 4253} // End HasSVE2p1_or_SME2p1 4254 4255 4256//===----------------------------------------------------------------------===// 4257// SME2.2 or SVE2.2 instructions 4258//===----------------------------------------------------------------------===// 4259let Predicates = [HasSVE2p2_or_SME2p2] in { 4260 // SVE Floating-point convert precision, zeroing predicate 4261 defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">; 4262 4263 // SVE2p2 floating-point convert precision down (placing odd), zeroing predicate 4264 defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">; 4265 def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>; 4266 // Placing even 4267 defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>; 4268 4269 // SVE2p2 floating-point convert precision up, zeroing predicate 4270 defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">; 4271 4272 // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate 4273 def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>; 4274 defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>; 4275 4276 // Floating-point convert to integer, zeroing predicate 4277 defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs", "int_aarch64_sve_fcvtzs", AArch64fcvtzs_mt>; 4278 defm FCVTZU_ZPzZ : sve_fp_z2op_p_zd_d<0b1, "fcvtzu", "int_aarch64_sve_fcvtzu", AArch64fcvtzu_mt>; 4279 // Integer convert to floating-point, zeroing predicate 4280 defm SCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b0, "scvtf", "int_aarch64_sve_scvtf", AArch64scvtf_mt>; 4281 defm UCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b1, "ucvtf", "int_aarch64_sve_ucvtf", AArch64ucvtf_mt>; 4282 // Signed integer base 2 logarithm of fp value, zeroing predicate 4283 defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb", int_aarch64_sve_flogb>; 4284 4285 // SVE2 integer unary operations, zeroing predicate 4286 defm URECPE_ZPzZ : sve2_int_un_pred_arit_z_S<0b00, "urecpe", int_aarch64_sve_urecpe>; 4287 defm URSQRTE_ZPzZ : sve2_int_un_pred_arit_z_S<0b01, "ursqrte", int_aarch64_sve_ursqrte>; 4288 defm SQABS_ZPzZ : sve2_int_un_pred_arit_z< 0b10, "sqabs", int_aarch64_sve_sqabs>; 4289 defm SQNEG_ZPzZ : sve2_int_un_pred_arit_z< 0b11, "sqneg", int_aarch64_sve_sqneg>; 4290 4291 // Floating point round to integral fp value in integer size range 4292 // Merging 4293 defm FRINT32Z_ZPmZ : sve_fp_2op_p_zd_frint<0b00, "frint32z">; 4294 defm FRINT32X_ZPmZ : sve_fp_2op_p_zd_frint<0b01, "frint32x">; 4295 defm FRINT64X_ZPmZ : sve_fp_2op_p_zd_frint<0b10, "frint64z">; 4296 defm FRINT64Z_ZPmZ : sve_fp_2op_p_zd_frint<0b11, "frint64x">; 4297 // Zeroing 4298 defm FRINT32Z_ZPzZ : sve_fp_z2op_p_zd_frint<0b00, "frint32z">; 4299 defm FRINT32X_ZPzZ : sve_fp_z2op_p_zd_frint<0b01, "frint32x">; 4300 defm FRINT64Z_ZPzZ : sve_fp_z2op_p_zd_frint<0b10, "frint64z">; 4301 defm FRINT64X_ZPzZ : sve_fp_z2op_p_zd_frint<0b11, "frint64x">; 4302 4303 // Floating-point round to integral fp value, zeroing predicate 4304 defm FRINTN_ZPzZ : sve_fp_z2op_p_zd_hsd<0b00000, "frintn", AArch64frintn_mt>; 4305 defm FRINTP_ZPzZ : sve_fp_z2op_p_zd_hsd<0b00001, "frintp", AArch64frintp_mt>; 4306 defm FRINTM_ZPzZ : sve_fp_z2op_p_zd_hsd<0b00010, "frintm", AArch64frintm_mt>; 4307 defm FRINTZ_ZPzZ : sve_fp_z2op_p_zd_hsd<0b00011, "frintz", AArch64frintz_mt>; 4308 defm FRINTA_ZPzZ : sve_fp_z2op_p_zd_hsd<0b00100, "frinta", AArch64frinta_mt>; 4309 defm FRINTX_ZPzZ : sve_fp_z2op_p_zd_hsd<0b00110, "frintx", AArch64frintx_mt>; 4310 defm FRINTI_ZPzZ : sve_fp_z2op_p_zd_hsd<0b00111, "frinti", AArch64frinti_mt>; 4311 // Floating-point invert exponent, zeroing predicate 4312 defm FRECPX_ZPzZ : sve_fp_z2op_p_zd_hsd<0b01100, "frecpx", AArch64frecpx_mt>; 4313 // Floating-point square root, zeroing predicate 4314 defm FSQRT_ZPZz : sve_fp_z2op_p_zd_hsd<0b01101, "fsqrt", AArch64fsqrt_mt>; 4315 4316 // SVE2p2 integer unary arithmetic (bitwise), zeroing predicate 4317 defm CLS_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b000, "cls", AArch64cls_mt>; 4318 defm CLZ_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b001, "clz", AArch64clz_mt>; 4319 defm CNT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b010, "cnt", AArch64cnt_mt>; 4320 defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot", AArch64cnot_mt>; 4321 defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, "not", AArch64not_mt>; 4322 4323 // floating point 4324 defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs", AArch64fabs_mt>; 4325 defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg", AArch64fneg_mt>; 4326 4327 // SVE2p2 integer unary arithmetic, zeroing predicate 4328 defm SXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b000, "sxtb", AArch64sxt_mt>; 4329 defm UXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b001, "uxtb", AArch64uxt_mt>; 4330 defm SXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b010, "sxth", AArch64sxt_mt>; 4331 defm UXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b011, "uxth", AArch64uxt_mt>; 4332 defm ABS_ZPzZ : sve_int_un_pred_arit_z< 0b110, "abs", AArch64abs_mt>; 4333 defm NEG_ZPzZ : sve_int_un_pred_arit_z< 0b111, "neg", AArch64neg_mt>; 4334 defm SXTW_ZPzZ : sve_int_un_pred_arit_d_z<0b100, "sxtw", AArch64sxt_mt>; 4335 defm UXTW_ZPzZ : sve_int_un_pred_arit_d_z<0b101, "uxtw", AArch64uxt_mt>; 4336 4337 // SVE predicate count 4338 defm FIRSTP_XPP : sve_int_pcount_pred_tmp<0b001, "firstp">; 4339 defm LASTP_XPP : sve_int_pcount_pred_tmp<0b010, "lastp">; 4340 4341 // SVE reverse within elements, zeroing predicate 4342 defm RBIT_ZPzZ : sve_int_perm_rev_rbit_z<"rbit", AArch64rbit_mt>; 4343 defm REVB_ZPzZ : sve_int_perm_rev_revb_z<"revb", AArch64revb_mt>; 4344 defm REVH_ZPzZ : sve_int_perm_rev_revh_z<"revh", AArch64revh_mt>; 4345 defm REVW_ZPzZ : sve_int_perm_rev_revw_z<"revw", AArch64revw_mt>; 4346 defm REVD_ZPzZ : sve_int_perm_rev_revd_z<"revd", AArch64revd_mt>; 4347} // End HasSME2p2orSVE2p2 4348 4349//===----------------------------------------------------------------------===// 4350// SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2 4351//===----------------------------------------------------------------------===// 4352let Predicates = [HasNonStreamingSVE2p2_or_SME2p2] in { 4353 // SVE2 EXPAND 4354 defm EXPAND_ZPZ : sve2_int_perm_expand<"expand">; 4355 // SVE COMPACT - byte and halfword 4356 defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact">; 4357} 4358 4359//===----------------------------------------------------------------------===// 4360// SVE2 FP8 instructions 4361//===----------------------------------------------------------------------===// 4362let Predicates = [HasSVE2_or_SME2, HasFP8] in { 4363// FP8 upconvert 4364defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt", nxv8f16, int_aarch64_sve_fp8_cvt1>; 4365defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt", nxv8f16, int_aarch64_sve_fp8_cvt2>; 4366defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt", nxv8bf16, int_aarch64_sve_fp8_cvt1>; 4367defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt", nxv8bf16, int_aarch64_sve_fp8_cvt2>; 4368defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt1>; 4369defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt", nxv8f16, int_aarch64_sve_fp8_cvtlt2>; 4370defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt1>; 4371defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>; 4372 4373// FP8 downconvert 4374defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r, nxv8f16, int_aarch64_sve_fp8_cvtn>; 4375defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnb>; 4376defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r, nxv8bf16, int_aarch64_sve_fp8_cvtn>; 4377 4378defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single_top<0b11, "fcvtnt", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnt>; 4379} // End HasSVE2_or_SME2, HasFP8 4380 4381let Predicates = [HasSVE2_or_SME2, HasFAMINMAX] in { 4382defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>; 4383defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>; 4384 4385defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>; 4386defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>; 4387} // End HasSVE2_or_SME2, HasFAMINMAX 4388 4389let Predicates = [HasSSVE_FP8FMA] in { 4390// FP8 Widening Multiply-Add Long - Indexed Group 4391defm FMLALB_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b0, "fmlalb", int_aarch64_sve_fp8_fmlalb_lane>; 4392defm FMLALT_ZZZI : sve2_fp8_mla_long_by_indexed_elem<0b1, "fmlalt", int_aarch64_sve_fp8_fmlalt_lane>; 4393// FP8 Widening Multiply-Add Long Group 4394defm FMLALB_ZZZ : sve2_fp8_mla<0b100, ZPR16, "fmlalb", nxv8f16, int_aarch64_sve_fp8_fmlalb>; 4395defm FMLALT_ZZZ : sve2_fp8_mla<0b101, ZPR16, "fmlalt", nxv8f16, int_aarch64_sve_fp8_fmlalt>; 4396// FP8 Widening Multiply-Add Long Long - Indexed Group 4397defm FMLALLBB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b00, "fmlallbb", int_aarch64_sve_fp8_fmlallbb_lane>; 4398defm FMLALLBT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b01, "fmlallbt", int_aarch64_sve_fp8_fmlallbt_lane>; 4399defm FMLALLTB_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b10, "fmlalltb", int_aarch64_sve_fp8_fmlalltb_lane>; 4400defm FMLALLTT_ZZZI : sve2_fp8_mla_long_long_by_indexed_elem<0b11, "fmlalltt", int_aarch64_sve_fp8_fmlalltt_lane>; 4401// FP8 Widening Multiply-Add Long Long Group 4402defm FMLALLBB_ZZZ : sve2_fp8_mla<0b000, ZPR32, "fmlallbb", nxv4f32, int_aarch64_sve_fp8_fmlallbb>; 4403defm FMLALLBT_ZZZ : sve2_fp8_mla<0b001, ZPR32, "fmlallbt", nxv4f32, int_aarch64_sve_fp8_fmlallbt>; 4404defm FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb", nxv4f32, int_aarch64_sve_fp8_fmlalltb>; 4405defm FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt", nxv4f32, int_aarch64_sve_fp8_fmlalltt>; 4406} // End HasSSVE_FP8FMA 4407 4408let Predicates = [HasSVE2, HasF8F32MM] in { 4409 def FMMLA_ZZZ_BtoS : sve2_fp8_mmla<0b0, ZPR32, "fmmla">; 4410} 4411 4412let Predicates = [HasSVE2, HasF8F16MM] in { 4413 def FMMLA_ZZZ_BtoH : sve2_fp8_mmla<0b1, ZPR16, "fmmla">; 4414} 4415 4416let Predicates = [HasSSVE_FP8DOT2] in { 4417// FP8 Widening Dot-Product - Indexed Group 4418defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed_h<"fdot", int_aarch64_sve_fp8_fdot_lane>; 4419// FP8 Widening Dot-Product - Group 4420defm FDOT_ZZZ_BtoH : sve_fp8_dot<0b0, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fp8_fdot>; 4421} 4422 4423// TODO: Replace nxv16i8 by nxv16f8 4424let Predicates = [HasSSVE_FP8DOT4] in { 4425// FP8 Widening Dot-Product - Indexed Group 4426defm FDOT_ZZZI_BtoS : sve2_fp8_dot_indexed_s<"fdot", int_aarch64_sve_fp8_fdot_lane>; 4427// FP8 Widening Dot-Product - Group 4428defm FDOT_ZZZ_BtoS : sve_fp8_dot<0b1, ZPR32, "fdot", nxv4f32, int_aarch64_sve_fp8_fdot>; 4429} 4430 4431let Predicates = [HasSVE2_or_SME2, HasLUT] in { 4432// LUTI2 4433 defm LUTI2_ZZZI : sve2_luti2_vector_index<"luti2">; 4434// LUTI4 4435 defm LUTI4_ZZZI : sve2_luti4_vector_index<"luti4">; 4436// LUTI4 (two contiguous registers) 4437 defm LUTI4_Z2ZZI : sve2_luti4_vector_vg2_index<"luti4">; 4438} // End HasSVE2_or_SME2, HasLUT 4439 4440//===----------------------------------------------------------------------===// 4441// Checked Pointer Arithmetic (FEAT_CPA) 4442//===----------------------------------------------------------------------===// 4443let Predicates = [HasSVE, HasCPA] in { 4444 // Add/subtract (vectors, unpredicated) 4445 def ADD_ZZZ_CPA : sve_int_bin_cons_arit_0<0b11, 0b010, "addpt", ZPR64>; 4446 def SUB_ZZZ_CPA : sve_int_bin_cons_arit_0<0b11, 0b011, "subpt", ZPR64>; 4447 4448 // Add/subtract (vectors, predicated) 4449 let DestructiveInstType = DestructiveBinaryComm in { 4450 def ADD_ZPmZ_CPA : sve_int_bin_pred_arit_log<0b11, 0b00, 0b100, "addpt", ZPR64>; 4451 def SUB_ZPmZ_CPA : sve_int_bin_pred_arit_log<0b11, 0b00, 0b101, "subpt", ZPR64>; 4452 } 4453 4454 // Multiply-add vectors, writing multiplicand 4455 def MAD_CPA : sve_int_mad_cpa<"madpt">; 4456 4457 // Multiply-add vectors, writing addend 4458 def MLA_CPA : sve_int_mla_cpa<"mlapt">; 4459} 4460