1//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX10 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// VNNI FP16 16let ExeDomain = SSEPackedSingle in 17defm VDPPHPS : avx512_dpf16ps_sizes<0x52, "vdpphps", X86dpfp16ps, avx512vl_f16_info, 18 [HasAVX10_2], [HasAVX10_2_512]>, 19 T8, PS, EVEX_CD8<32, CD8VF>; 20 21// VNNI INT8 22defm VPDPBSSD : VNNI_common<0x50, "vpdpbssd", X86vpdpbssd, SchedWriteVecIMul, 1, 23 [HasAVX10_2], [HasAVX10_2_512]>, XD; 24defm VPDPBSSDS : VNNI_common<0x51, "vpdpbssds", X86vpdpbssds, SchedWriteVecIMul, 1, 25 [HasAVX10_2], [HasAVX10_2_512]>, XD; 26defm VPDPBSUD : VNNI_common<0x50, "vpdpbsud", X86vpdpbsud, SchedWriteVecIMul, 0, 27 [HasAVX10_2], [HasAVX10_2_512]>, XS; 28defm VPDPBSUDS : VNNI_common<0x51, "vpdpbsuds", X86vpdpbsuds, SchedWriteVecIMul, 0, 29 [HasAVX10_2], [HasAVX10_2_512]>, XS; 30defm VPDPBUUD : VNNI_common<0x50, "vpdpbuud", X86vpdpbuud, SchedWriteVecIMul, 1, 31 [HasAVX10_2], [HasAVX10_2_512]>, PS; 32defm VPDPBUUDS : VNNI_common<0x51, "vpdpbuuds", X86vpdpbuuds, SchedWriteVecIMul, 1, 33 [HasAVX10_2], [HasAVX10_2_512]>, PS; 34 35// VNNI INT16 36defm VPDPWSUD : VNNI_common<0xd2, "vpdpwsud", X86vpdpwsud, SchedWriteVecIMul, 0, 37 [HasAVX10_2], [HasAVX10_2_512]>, XS; 38defm VPDPWSUDS : VNNI_common<0xd3, "vpdpwsuds", X86vpdpwsuds, SchedWriteVecIMul, 0, 39 [HasAVX10_2], [HasAVX10_2_512]>, XS; 40defm VPDPWUSD : VNNI_common<0xd2, "vpdpwusd", X86vpdpwusd, SchedWriteVecIMul, 0, 41 [HasAVX10_2], [HasAVX10_2_512]>, PD; 42defm VPDPWUSDS : VNNI_common<0xd3, "vpdpwusds", X86vpdpwusds, SchedWriteVecIMul, 0, 43 [HasAVX10_2], [HasAVX10_2_512]>, PD; 44defm VPDPWUUD : VNNI_common<0xd2, "vpdpwuud", X86vpdpwuud, SchedWriteVecIMul, 1, 45 [HasAVX10_2], [HasAVX10_2_512]>, PS; 46defm VPDPWUUDS : VNNI_common<0xd3, "vpdpwuuds", X86vpdpwuuds, SchedWriteVecIMul, 1, 47 [HasAVX10_2], [HasAVX10_2_512]>, PS; 48 49// VMPSADBW 50defm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW, 51 avx512vl_i16_info, avx512vl_i8_info, 52 HasAVX10_2>, 53 XS, EVEX_CD8<32, CD8VF>; 54 55// YMM Rounding 56multiclass avx256_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 57 X86SchedWriteSizes sched> { 58 defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM, 59 v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 60 defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM, 61 v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>; 62 defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM, 63 v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W; 64} 65 66multiclass avx256_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 67 X86SchedWriteSizes sched> { 68 defm PHZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM, 69 v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 70 defm PSZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM, 71 v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>; 72 defm PDZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM, 73 v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W; 74} 75 76multiclass avx256_vcmp_p_sae<X86SchedWriteWidths sched> { 77 defm PHZ256 : avx512_vcmp_sae<sched.YMM, v16f16x_info>, AVX512PSIi8Base, EVEX_CD8<16, CD8VF>, TA; 78 defm PSZ256 : avx512_vcmp_sae<sched.YMM, v8f32x_info>, AVX512PSIi8Base, EVEX_CD8<32, CD8VF>; 79 defm PDZ256 : avx512_vcmp_sae<sched.YMM, v4f64x_info>, AVX512PDIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 80} 81 82multiclass avx256_fixupimm_packed_all<bits<8> opc, string OpcodeStr, 83 X86SchedWriteWidths sched> { 84 defm PSZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v8f32x_info, 85 v8i32x_info>, EVEX_CD8<32, CD8VF>; 86 defm PDZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v4f64x_info, 87 v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 88} 89 90multiclass avx256_vgetexp<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE, 91 X86SchedWriteWidths sched> { 92 defm PHZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ph", v16f16x_info, OpNodeSAE, 93 sched.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 94 defm PSZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ps", v8f32x_info, OpNodeSAE, 95 sched.YMM>, T8,PD, EVEX_CD8<32, CD8VF>; 96 defm PDZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"pd", v4f64x_info, OpNodeSAE, 97 sched.YMM>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 98} 99 100multiclass avx256_unary_fp_sae<string OpcodeStr, bits<8> opcPs, bits<8> opcPd, 101 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 102 defm PHZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM, 103 v16f16x_info>, AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>; 104 defm PSZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM, 105 v8f32x_info>, AVX512AIi8Base, EVEX_CD8<32, CD8VF>; 106 defm PDZ256 : avx512_unary_fp_sae_packed_imm<opcPd, OpcodeStr, OpNodeSAE, sched.YMM, 107 v4f64x_info>, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 108} 109 110multiclass avx256_common_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE, 111 X86SchedWriteWidths sched> { 112 defm PSZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"ps", OpNodeSAE, sched.YMM, 113 v8f32x_info>, EVEX_CD8<32, CD8VF>; 114 defm PDZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"pd", OpNodeSAE, sched.YMM, 115 v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 116} 117 118multiclass avx256_fp_scalef_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 119 X86SchedWriteWidths sched> { 120 defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM, 121 v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 122 defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM, 123 v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 124 defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM, 125 v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 126} 127 128multiclass avx256_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 129 X86SchedWriteSizes sched> { 130 defm PHZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 131 sched.PH.YMM, v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 132 defm PSZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 133 sched.PS.YMM, v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>; 134 defm PDZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 135 sched.PD.YMM, v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W; 136} 137 138multiclass avx256_vcvtw_rc<string OpcodeStr, SDNode OpNodeRnd> { 139 defm PHZ256 : avx512_vcvt_fp_rc<0x7D, OpcodeStr, v16f16x_info, v16i16x_info, OpNodeRnd, 140 SchedWriteCvtPD2DQ.YMM>, EVEX_CD8<16, CD8VF>; 141} 142 143multiclass avx256_cvtdq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 144 X86SchedWriteWidths sched> { 145 defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info, 146 v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,PS, EVEX_CD8<32, CD8VF>; 147 defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info, 148 v8i32x_info, OpNodeRnd, sched.YMM>, TB, PS, EVEX_CD8<32, CD8VF>; 149} 150 151multiclass avx256_cvtudq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 152 X86SchedWriteWidths sched> { 153 defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info, 154 v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,XD, EVEX_CD8<32, CD8VF>; 155 defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info, 156 v8i32x_info, OpNodeRnd, sched.YMM>, TB, XD, EVEX_CD8<32, CD8VF>; 157} 158 159multiclass avx256_cvtqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> { 160 defm PHZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ph"), v8f16x_info, 161 _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,PS; 162 defm PSZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ps"), v4f32x_info, 163 _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, PS; 164 defm PDZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "pd"), v4f64x_info, 165 _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS; 166} 167 168multiclass avx256_cvtuqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> { 169 defm PHZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ph"), v8f16x_info, 170 _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,XD; 171 defm PSZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ps"), v4f32x_info, 172 _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, XD; 173 defm PDZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "pd"), v4f64x_info, 174 _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS; 175} 176 177multiclass avx256_vcvt_pd2<string OpcodeStr, X86VectorVTInfo _Src> { 178 defm PHZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ph"), v8f16x_info, 179 _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD; 180 defm PSZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ps"), v4f32x_info, 181 _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, TB, PD; 182 defm DQZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info, 183 _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, XD; 184 defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, 185 _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD; 186 defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v4i32x_info, 187 _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PS; 188 defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, 189 _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD; 190} 191 192multiclass avx256_vcvt_ps2<string OpcodeStr> { 193 defm PHZ256 : avx512_cvtps2ph_sae<v8i16x_info, v8f32x_info, WriteCvtPS2PHZ>, EVEX_CD8<32, CD8VH>; 194 defm PHXZ256 : avx512_vcvt_fp_rc<0x1D, !strconcat(OpcodeStr, "phx"), v8f16x_info, v8f32x_info, 195 X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD, EVEX_CD8<32, CD8VF>; 196 defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v4f32x_info, 197 X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, TB, PS, EVEX_CD8<32, CD8VF>; 198 defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info, 199 X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>; 200 defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info, 201 X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>; 202 defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info, 203 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PS, EVEX_CD8<32, CD8VF>; 204 defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info, 205 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>; 206} 207 208multiclass avx256_vcvt_ph2<string OpcodeStr> { 209 defm PSZ256 : avx512_cvtph2ps_sae<v8f32x_info, v8i16x_info, WriteCvtPH2PSZ>, EVEX_CD8<32, CD8VH>; 210 defm PSXZ256 : avx512_vcvt_fp_sae<0x13, !strconcat(OpcodeStr, "psx"), v8f32x_info, v8f16x_info, 211 X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VH>; 212 defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v8f16x_info, 213 X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VQ>; 214 defm WZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info, 215 X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>; 216 defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info, 217 X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VH>; 218 defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info, 219 X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 220 defm UWZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info, 221 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 222 defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info, 223 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>; 224 defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info, 225 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 226} 227 228multiclass avx256_vcvtt_pd2<string OpcodeStr, X86VectorVTInfo _Src> { 229 defm DQZ256 : avx512_vcvt_fp_sae<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info, 230 _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD; 231 defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, 232 _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD; 233 defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v4i32x_info, 234 _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PS; 235 defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, 236 _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PD; 237} 238 239multiclass avx256_vcvtt_ps2<string OpcodeStr> { 240 defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info, 241 X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, XS, EVEX_CD8<32, CD8VF>; 242 defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info, 243 X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>; 244 defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info, 245 X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PS, EVEX_CD8<32, CD8VF>; 246 defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info, 247 X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>; 248} 249 250multiclass avx256_vcvtt_ph2<string OpcodeStr> { 251 defm WZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info, 252 X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>; 253 defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info, 254 X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,XS, EVEX_CD8<16, CD8VH>; 255 defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info, 256 X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 257 defm UWZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info, 258 X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 259 defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info, 260 X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>; 261 defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info, 262 X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 263} 264 265multiclass avx256_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { 266 defm PHZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd, 267 SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 268 defm PSZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, 269 SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 270 defm PDZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, 271 SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 272} 273 274multiclass avx256_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { 275 defm PHZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd, 276 SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 277 defm PSZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, 278 SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 279 defm PDZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, 280 SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 281} 282 283multiclass avx256_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { 284 defm PHZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd, 285 SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 286 defm PSZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, 287 SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 288 defm PDZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, 289 SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 290} 291 292multiclass avx256_fma3_round3<bits<8> opc132, bits<8> opc213, bits<8> opc231, 293 string OpcodeStr, SDNode OpNodeRnd> { 294 defm NAME#132 : avx256_fma3_132_round<opc132, !strconcat(OpcodeStr, "132"), OpNodeRnd>; 295 defm NAME#213 : avx256_fma3_213_round<opc213, !strconcat(OpcodeStr, "213"), OpNodeRnd>; 296 defm NAME#231 : avx256_fma3_231_round<opc231, !strconcat(OpcodeStr, "231"), OpNodeRnd>; 297} 298 299let Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in { 300 defm VADD : avx256_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 301 defm VMUL : avx256_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 302 defm VSUB : avx256_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 303 defm VDIV : avx256_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 304 defm VMIN : avx256_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 305 defm VMAX : avx256_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 306 defm VCMP : avx256_vcmp_p_sae<SchedWriteFCmp>, EVEX, VVVV; 307 defm VFIXUPIMM : avx256_fixupimm_packed_all<0x54, "vfixupimm", SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV; 308 defm VGETEXP : avx256_vgetexp<0x42, "vgetexp", X86fgetexpSAE, SchedWriteFRnd>; 309 defm VREDUCE : avx256_unary_fp_sae<"vreduce", 0x56, 0x56, X86VReduceSAE, SchedWriteFRnd>; 310 defm VRNDSCALE : avx256_unary_fp_sae<"vrndscale", 0x08, 0x09, X86VRndScaleSAE, SchedWriteFRnd>; 311 defm VGETMANT : avx256_unary_fp_sae<"vgetmant", 0x26, 0x26, X86VGetMantSAE, SchedWriteFRnd>; 312 defm VRANGE : avx256_common_fp_sae_packed_imm<0x50, "vrange", X86VRangeSAE, SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV; 313 defm VSCALEF : avx256_fp_scalef_round<0x2C, "vscalef", X86scalefRnd, SchedWriteFAdd>; 314 defm VSQRT : avx256_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 315 defm VCVTW2 : avx256_vcvtw_rc<"vcvtw2ph", X86VSintToFpRnd>, T_MAP5, XS; 316 defm VCVTDQ2 : avx256_cvtdq2fp_rc<0x5B, "vcvtdq2", X86VSintToFpRnd, SchedWriteCvtDQ2PS>; 317 defm VCVTQQ2 : avx256_cvtqq2fp_rc<"vcvtqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 318 defm VCVTUW2 : avx256_vcvtw_rc<"vcvtuw2ph", X86VUintToFpRnd>, T_MAP5,XD; 319 defm VCVTUDQ2 : avx256_cvtudq2fp_rc<0x7A, "vcvtudq2", X86VUintToFpRnd, SchedWriteCvtDQ2PS>; 320 defm VCVTUQQ2 : avx256_cvtuqq2fp_rc<"vcvtuqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 321 defm VCVTPD2 : avx256_vcvt_pd2<"vcvtpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 322 defm VCVTPS2 : avx256_vcvt_ps2<"vcvtps2">; 323 defm VCVTPH2 : avx256_vcvt_ph2<"vcvtph2">; 324 defm VCVTTPD2 : avx256_vcvtt_pd2<"vcvttpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, TB, REX_W; 325 defm VCVTTPS2 : avx256_vcvtt_ps2<"vcvttps2">, TB; 326 defm VCVTTPH2 : avx256_vcvtt_ph2<"vcvttph2">; 327 defm VFMADD : avx256_fma3_round3<0x98, 0xA8, 0xB8, "vfmadd", X86FmaddRnd>; 328 defm VFMSUB : avx256_fma3_round3<0x9A, 0xAA, 0xBA, "vfmsub", X86FmsubRnd>; 329 defm VFMADDSUB : avx256_fma3_round3<0x96, 0xA6, 0xB6, "vfmaddsub", X86FmaddsubRnd>; 330 defm VFMSUBADD : avx256_fma3_round3<0x97, 0xA7, 0xB7, "vfmsubadd", X86FmsubaddRnd>; 331 defm VFNMADD : avx256_fma3_round3<0x9C, 0xAC, 0xBC, "vfnmadd", X86FnmaddRnd>; 332 defm VFNMSUB : avx256_fma3_round3<0x9E, 0xAE, 0xBE, "vfnmsub", X86FnmsubRnd>; 333 defm VFMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfmulcph", x86vfmulcRnd, SchedWriteFMA.YMM, 334 v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XS, EVEX_CD8<32, CD8VF>; 335 defm VFCMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfcmulcph", x86vfcmulcRnd, SchedWriteFMA.YMM, 336 v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XD, EVEX_CD8<32, CD8VF>; 337 defm VFMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfmaddcph", x86vfmaddcRnd, 338 v8f32x_info>, T_MAP6,XS, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>; 339 defm VFCMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfcmaddcph", x86vfcmaddcRnd, 340 v8f32x_info>, T_MAP6,XD, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>; 341} 342 343//------------------------------------------------- 344// AVX10 MINMAX instructions 345//------------------------------------------------- 346 347multiclass avx10_minmax_packed_base<string OpStr, X86VectorVTInfo VTI, SDNode OpNode> { 348 let ExeDomain = VTI.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 349 defm rri : AVX512_maskable<0x52, MRMSrcReg, VTI, (outs VTI.RC:$dst), 350 (ins VTI.RC:$src1, VTI.RC:$src2, i32u8imm:$src3), OpStr, 351 "$src3, $src2, $src1", "$src1, $src2, $src3", 352 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 353 (i32 timm:$src3)))>, 354 EVEX, VVVV, Sched<[WriteFMAX]>; 355 defm rmi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), 356 (ins VTI.RC:$src1, VTI.MemOp:$src2, i32u8imm:$src3), OpStr, 357 "$src3, $src2, $src1", "$src1, $src2, $src3", 358 (VTI.VT (OpNode VTI.RC:$src1, (VTI.LdFrag addr:$src2), 359 (i32 timm:$src3)))>, 360 EVEX, VVVV, 361 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 362 defm rmbi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), 363 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, i32u8imm:$src3), 364 OpStr, "$src3, ${src2}"#VTI.BroadcastStr#", $src1", 365 "$src1, ${src2}"#VTI.BroadcastStr#", $src3", 366 (VTI.VT (OpNode VTI.RC:$src1, (VTI.BroadcastLdFrag addr:$src2), 367 (i32 timm:$src3)))>, 368 EVEX, VVVV, EVEX_B, 369 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 370 } 371} 372 373multiclass avx10_minmax_packed_sae<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> { 374 let Uses = []<Register>, mayRaiseFPException = 0 in { 375 defm Zrrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info512, (outs VTI.info512.RC:$dst), 376 (ins VTI.info512.RC:$src1, VTI.info512.RC:$src2, i32u8imm:$src3), OpStr, 377 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 378 (VTI.info512.VT (OpNode (VTI.info512.VT VTI.info512.RC:$src1), 379 (VTI.info512.VT VTI.info512.RC:$src2), 380 (i32 timm:$src3)))>, 381 EVEX, VVVV, EVEX_B, EVEX_V512, Sched<[WriteFMAX]>; 382 let hasEVEX_U = 1 in 383 defm Z256rrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info256, (outs VTI.info256.RC:$dst), 384 (ins VTI.info256.RC:$src1, VTI.info256.RC:$src2, i32u8imm:$src3), OpStr, 385 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 386 (VTI.info256.VT (OpNode (VTI.info256.VT VTI.info256.RC:$src1), 387 (VTI.info256.VT VTI.info256.RC:$src2), 388 (i32 timm:$src3)))>, 389 EVEX, VVVV, EVEX_B, EVEX_V256, Sched<[WriteFMAX]>; 390 } 391} 392 393multiclass avx10_minmax_packed<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> { 394 let Predicates = [HasAVX10_2_512] in 395 defm Z : avx10_minmax_packed_base<OpStr, VTI.info512, OpNode>, EVEX_V512; 396 let Predicates = [HasAVX10_2] in { 397 defm Z256 : avx10_minmax_packed_base<OpStr, VTI.info256, OpNode>, EVEX_V256; 398 defm Z128 : avx10_minmax_packed_base<OpStr, VTI.info128, OpNode>, EVEX_V128; 399 } 400} 401 402multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode, 403 SDNode OpNodeSAE> { 404 let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in { 405 let mayRaiseFPException = 1 in { 406 let isCodeGenOnly = 1 in { 407 def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst), 408 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 409 !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 410 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>, 411 Sched<[WriteFMAX]>; 412 413 def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst), 414 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 415 !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 416 [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), 417 (i32 timm:$src3)))]>, 418 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 419 } 420 defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), 421 (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), 422 OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 423 (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 424 (i32 timm:$src3))), 425 0, 0, 0, vselect_mask, "", "_Int">, 426 Sched<[WriteFMAX]>; 427 428 defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), 429 (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 430 OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 431 (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 432 (i32 timm:$src3))), 433 0, 0, 0, vselect_mask, "", "_Int">, 434 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 435 } 436 let Uses = []<Register>, mayRaiseFPException = 0 in 437 defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), 438 (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), 439 OpStr, "$src3, {sae}, $src2, $src1", 440 "$src1, $src2, {sae}, $src3", 441 (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 442 (i32 timm:$src3))), 443 0, 0, 0, vselect_mask, "", "_Int">, 444 Sched<[WriteFMAX]>, EVEX_B; 445 } 446} 447 448 449let mayRaiseFPException = 0 in 450defm VMINMAXBF16 : avx10_minmax_packed<"vminmaxbf16", avx512vl_bf16_info, X86vminmax>, 451 AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA; 452 453defm VMINMAXPD : avx10_minmax_packed<"vminmaxpd", avx512vl_f64_info, X86vminmax>, 454 avx10_minmax_packed_sae<"vminmaxpd", avx512vl_f64_info, X86vminmaxSae>, 455 AVX512PDIi8Base, REX_W, TA, EVEX_CD8<64, CD8VF>; 456 457defm VMINMAXPH : avx10_minmax_packed<"vminmaxph", avx512vl_f16_info, X86vminmax>, 458 avx10_minmax_packed_sae<"vminmaxph", avx512vl_f16_info, X86vminmaxSae>, 459 AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>; 460 461defm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>, 462 avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>, 463 AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>; 464 465defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>, 466 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 467defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>, 468 AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA; 469defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>, 470 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 471 472//------------------------------------------------- 473// AVX10 SATCVT instructions 474//------------------------------------------------- 475 476multiclass avx10_sat_cvt_rmb<bits<8> Opc, string OpStr, X86FoldableSchedWrite sched, 477 X86VectorVTInfo DestInfo, 478 X86VectorVTInfo SrcInfo, 479 SDNode MaskNode> { 480 defm rr: AVX512_maskable<Opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 481 (ins SrcInfo.RC:$src), OpStr, "$src", "$src", 482 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src)))>, 483 Sched<[sched]>; 484 defm rm: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 485 (ins SrcInfo.MemOp:$src), OpStr, "$src", "$src", 486 (DestInfo.VT (MaskNode (SrcInfo.VT 487 (SrcInfo.LdFrag addr:$src))))>, 488 Sched<[sched.Folded, sched.ReadAfterFold]>; 489 defm rmb: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 490 (ins SrcInfo.ScalarMemOp:$src), OpStr, 491 "${src}"#SrcInfo.BroadcastStr, "${src}"#SrcInfo.BroadcastStr, 492 (DestInfo.VT (MaskNode (SrcInfo.VT 493 (SrcInfo.BroadcastLdFrag addr:$src))))>, EVEX_B, 494 Sched<[sched.Folded, sched.ReadAfterFold]>; 495} 496 497// Conversion with rounding control (RC) 498multiclass avx10_sat_cvt_rc<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 499 AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, 500 SDNode MaskNode> { 501 let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in 502 defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512, 503 (outs DestInfo.info512.RC:$dst), 504 (ins SrcInfo.info512.RC:$src, AVX512RC:$rc), 505 OpStr, "$rc, $src", "$src, $rc", 506 (DestInfo.info512.VT 507 (MaskNode (SrcInfo.info512.VT SrcInfo.info512.RC:$src), 508 (i32 timm:$rc)))>, 509 Sched<[sched.ZMM]>, EVEX, EVEX_RC, EVEX_B; 510 let Predicates = [HasAVX10_2], hasEVEX_U = 1 in { 511 defm Z256rrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info256, 512 (outs DestInfo.info256.RC:$dst), 513 (ins SrcInfo.info256.RC:$src, AVX512RC:$rc), 514 OpStr, "$rc, $src", "$src, $rc", 515 (DestInfo.info256.VT 516 (MaskNode (SrcInfo.info256.VT SrcInfo.info256.RC:$src), 517 (i32 timm:$rc)))>, 518 Sched<[sched.YMM]>, EVEX, EVEX_RC, EVEX_B; 519 } 520} 521 522// Conversion with SAE 523multiclass avx10_sat_cvt_sae<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 524 AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, 525 SDNode Node> { 526 let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in 527 defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512, 528 (outs DestInfo.info512.RC:$dst), 529 (ins SrcInfo.info512.RC:$src), 530 OpStr, "{sae}, $src", "$src, {sae}", 531 (DestInfo.info512.VT 532 (Node (SrcInfo.info512.VT SrcInfo.info512.RC:$src)))>, 533 Sched<[sched.ZMM]>, EVEX, EVEX_B; 534 let Predicates = [HasAVX10_2], hasEVEX_U = 1 in { 535 defm Z256rrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info256, 536 (outs DestInfo.info256.RC:$dst), 537 (ins SrcInfo.info256.RC:$src), 538 OpStr, "{sae}, $src", "$src, {sae}", 539 (DestInfo.info256.VT 540 (Node (SrcInfo.info256.VT SrcInfo.info256.RC:$src)))>, 541 Sched<[sched.YMM]>, EVEX, EVEX_B; 542 } 543} 544 545multiclass avx10_sat_cvt_base<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 546 SDNode MaskNode, AVX512VLVectorVTInfo DestInfo, 547 AVX512VLVectorVTInfo SrcInfo> { 548 let Predicates = [HasAVX10_2_512] in 549 defm Z : avx10_sat_cvt_rmb<Opc, OpStr, sched.ZMM, 550 DestInfo.info512, SrcInfo.info512, 551 MaskNode>, 552 EVEX, EVEX_V512; 553 let Predicates = [HasAVX10_2] in { 554 defm Z256 555 : avx10_sat_cvt_rmb<Opc, OpStr, sched.YMM, 556 DestInfo.info256, SrcInfo.info256, 557 MaskNode>, 558 EVEX, EVEX_V256; 559 defm Z128 560 : avx10_sat_cvt_rmb<Opc, OpStr, sched.XMM, 561 DestInfo.info128, SrcInfo.info128, 562 MaskNode>, 563 EVEX, EVEX_V128; 564 } 565} 566 567defm VCVTBF162IBS : avx10_sat_cvt_base<0x69, "vcvtbf162ibs", 568 SchedWriteVecIMul, X86vcvtp2ibs, 569 avx512vl_i16_info, avx512vl_bf16_info>, 570 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 571defm VCVTBF162IUBS : avx10_sat_cvt_base<0x6b, "vcvtbf162iubs", 572 SchedWriteVecIMul, X86vcvtp2iubs, 573 avx512vl_i16_info, avx512vl_bf16_info>, 574 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 575 576defm VCVTPH2IBS : avx10_sat_cvt_base<0x69, "vcvtph2ibs", SchedWriteVecIMul, 577 X86vcvtp2ibs, avx512vl_i16_info, 578 avx512vl_f16_info>, 579 avx10_sat_cvt_rc<0x69, "vcvtph2ibs", SchedWriteVecIMul, 580 avx512vl_i16_info, avx512vl_f16_info, 581 X86vcvtp2ibsRnd>, 582 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 583defm VCVTPH2IUBS : avx10_sat_cvt_base<0x6b, "vcvtph2iubs", SchedWriteVecIMul, 584 X86vcvtp2iubs, avx512vl_i16_info, 585 avx512vl_f16_info>, 586 avx10_sat_cvt_rc<0x6b, "vcvtph2iubs", SchedWriteVecIMul, 587 avx512vl_i16_info, avx512vl_f16_info, 588 X86vcvtp2iubsRnd>, 589 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 590 591defm VCVTPS2IBS : avx10_sat_cvt_base<0x69, "vcvtps2ibs", SchedWriteVecIMul, 592 X86vcvtp2ibs, avx512vl_i32_info, 593 avx512vl_f32_info>, 594 avx10_sat_cvt_rc<0x69, "vcvtps2ibs", SchedWriteVecIMul, 595 avx512vl_i32_info, avx512vl_f32_info, 596 X86vcvtp2ibsRnd>, 597 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 598defm VCVTPS2IUBS : avx10_sat_cvt_base<0x6b, "vcvtps2iubs", SchedWriteVecIMul, 599 X86vcvtp2iubs, avx512vl_i32_info, 600 avx512vl_f32_info>, 601 avx10_sat_cvt_rc<0x6b, "vcvtps2iubs", SchedWriteVecIMul, 602 avx512vl_i32_info, avx512vl_f32_info, 603 X86vcvtp2iubsRnd>, 604 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 605 606defm VCVTTBF162IBS : avx10_sat_cvt_base<0x68, "vcvttbf162ibs", 607 SchedWriteVecIMul, X86vcvttp2ibs, 608 avx512vl_i16_info, avx512vl_bf16_info>, 609 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 610defm VCVTTBF162IUBS : avx10_sat_cvt_base<0x6a, "vcvttbf162iubs", 611 SchedWriteVecIMul, X86vcvttp2iubs, 612 avx512vl_i16_info, avx512vl_bf16_info>, 613 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 614 615defm VCVTTPH2IBS : avx10_sat_cvt_base<0x68, "vcvttph2ibs", SchedWriteVecIMul, 616 X86vcvttp2ibs, avx512vl_i16_info, 617 avx512vl_f16_info>, 618 avx10_sat_cvt_sae<0x68, "vcvttph2ibs", SchedWriteVecIMul, 619 avx512vl_i16_info, avx512vl_f16_info, 620 X86vcvttp2ibsSAE>, 621 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 622defm VCVTTPH2IUBS : avx10_sat_cvt_base<0x6a, "vcvttph2iubs", SchedWriteVecIMul, 623 X86vcvttp2iubs, avx512vl_i16_info, 624 avx512vl_f16_info>, 625 avx10_sat_cvt_sae<0x6a, "vcvttph2iubs", SchedWriteVecIMul, 626 avx512vl_i16_info, avx512vl_f16_info, 627 X86vcvttp2iubsSAE>, 628 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 629 630defm VCVTTPS2IBS : avx10_sat_cvt_base<0x68, "vcvttps2ibs", SchedWriteVecIMul, 631 X86vcvttp2ibs, avx512vl_i32_info, 632 avx512vl_f32_info>, 633 avx10_sat_cvt_sae<0x68, "vcvttps2ibs", SchedWriteVecIMul, 634 avx512vl_i32_info, avx512vl_f32_info, 635 X86vcvttp2ibsSAE>, 636 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 637defm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul, 638 X86vcvttp2iubs, avx512vl_i32_info, 639 avx512vl_f32_info>, 640 avx10_sat_cvt_sae<0x6a, "vcvttps2iubs", SchedWriteVecIMul, 641 avx512vl_i32_info, avx512vl_f32_info, 642 X86vcvttp2iubsSAE>, 643 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 644 645//------------------------------------------------- 646// AVX10 SATCVT-DS instructions 647//------------------------------------------------- 648 649// Convert Double to Signed/Unsigned Doubleword with truncation. 650multiclass avx10_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 651 SDNode MaskOpNode, SDNode OpNodeSAE, 652 X86SchedWriteWidths sched> { 653 let Predicates = [HasAVX10_2_512] in { 654 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 655 MaskOpNode, sched.ZMM>, 656 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 657 OpNodeSAE, sched.ZMM>, EVEX_V512; 658 } 659 let Predicates = [HasAVX10_2] in { 660 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 661 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 662 f128mem, VK2WM>, EVEX_V128; 663 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 664 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 665 } 666 667 let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 668 defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNodeSAE, 669 sched.YMM>, EVEX_V256; 670 } 671 672 673 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 674 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 675 VR128X:$src), 0, "att">; 676 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 677 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 678 VK2WM:$mask, VR128X:$src), 0, "att">; 679 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 680 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 681 VK2WM:$mask, VR128X:$src), 0, "att">; 682 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 683 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 684 f64mem:$src), 0, "att">; 685 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 686 "$dst {${mask}}, ${src}{1to2}}", 687 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 688 VK2WM:$mask, f64mem:$src), 0, "att">; 689 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 690 "$dst {${mask}} {z}, ${src}{1to2}}", 691 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 692 VK2WM:$mask, f64mem:$src), 0, "att">; 693 694 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 695 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 696 VR256X:$src), 0, "att">; 697 def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst|$dst, $src {sae}}", 698 (!cast<Instruction>(NAME # "Z256rrb") VR128X:$dst, 699 VR256X:$src), 0, "att">; 700 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 701 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 702 VK4WM:$mask, VR256X:$src), 0, "att">; 703 def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}}|$dst {${mask}}, $src {sae}}", 704 (!cast<Instruction>(NAME # "Z256rrbk") VR128X:$dst, 705 VK4WM:$mask, VR256X:$src), 0, "att">; 706 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 707 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 708 VK4WM:$mask, VR256X:$src), 0, "att">; 709 def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src {sae}}", 710 (!cast<Instruction>(NAME # "Z256rrbkz") VR128X:$dst, 711 VK4WM:$mask, VR256X:$src), 0, "att">; 712 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 713 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 714 f64mem:$src), 0, "att">; 715 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 716 "$dst {${mask}}, ${src}{1to4}}", 717 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 718 VK4WM:$mask, f64mem:$src), 0, "att">; 719 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 720 "$dst {${mask}} {z}, ${src}{1to4}}", 721 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 722 VK4WM:$mask, f64mem:$src), 0, "att">; 723} 724 725// Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled 726multiclass avx10_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 727 SDNode MaskOpNode, SDNode OpNodeRnd, 728 X86SchedWriteWidths sched> { 729 let Predicates = [HasAVX10_2_512] in { 730 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 731 MaskOpNode, sched.ZMM>, 732 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 733 OpNodeRnd, sched.ZMM>, EVEX_V512; 734 } 735 let Predicates = [HasAVX10_2] in { 736 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 737 MaskOpNode, sched.XMM>, EVEX_V128; 738 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 739 MaskOpNode, sched.YMM>, EVEX_V256; 740 } 741 let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 742 defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f64x_info, 743 OpNodeRnd, sched.YMM>, EVEX_V256; 744 } 745} 746 747// Convert Float to Signed/Unsigned Quardword with truncation 748multiclass avx10_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 749 SDNode MaskOpNode, SDNode OpNodeRnd, 750 X86SchedWriteWidths sched> { 751 let Predicates = [HasAVX10_2_512] in { 752 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 753 MaskOpNode, sched.ZMM>, 754 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 755 OpNodeRnd, sched.ZMM>, EVEX_V512; 756 } 757 let Predicates = [HasAVX10_2] in { 758 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 759 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 760 (v2i64 (OpNode (bc_v4f32 (v2f64 761 (scalar_to_vector (loadf64 addr:$src)))))), 762 (v2i64 (MaskOpNode (bc_v4f32 (v2f64 763 (scalar_to_vector (loadf64 addr:$src))))))>, 764 EVEX_V128; 765 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 766 MaskOpNode, sched.YMM>, EVEX_V256; 767 } 768 769 let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 770 defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNodeRnd, 771 sched.YMM>, EVEX_V256; 772 } 773} 774 775// Convert Float to Signed/Unsigned Doubleword with truncation 776multiclass avx10_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 777 SDNode MaskOpNode, 778 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 779 let Predicates = [HasAVX10_2_512] in { 780 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 781 MaskOpNode, sched.ZMM>, 782 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 783 OpNodeSAE, sched.ZMM>, EVEX_V512; 784 } 785 786 let Predicates = [HasAVX10_2] in { 787 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 788 MaskOpNode, sched.XMM>, EVEX_V128; 789 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 790 MaskOpNode, sched.YMM>, EVEX_V256; 791 } 792 793 let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 794 defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f32x_info, 795 OpNodeSAE, sched.YMM>, EVEX_V256; 796 } 797} 798 799defm VCVTTPD2DQS : avx10_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis, 800 X86cvttp2sis, X86cvttp2sisSAE, 801 SchedWriteCvtPD2DQ>, 802 PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; 803defm VCVTTPD2UDQS : avx10_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis, 804 X86cvttp2uis, X86cvttp2uisSAE, 805 SchedWriteCvtPD2DQ>, 806 REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; 807defm VCVTTPS2DQS : avx10_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis, 808 X86cvttp2sis, X86cvttp2sisSAE, 809 SchedWriteCvtPS2DQ>, T_MAP5,PS, 810 EVEX_CD8<32, CD8VF>; 811defm VCVTTPS2UDQS : avx10_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis, 812 X86cvttp2uis, X86cvttp2uisSAE, 813 SchedWriteCvtPS2DQ>, T_MAP5,PS, 814 EVEX_CD8<32, CD8VF>; 815defm VCVTTPD2QQS : avx10_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis, 816 X86cvttp2sis, X86cvttp2sisSAE, 817 SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, 818 EVEX_CD8<64, CD8VF>; 819defm VCVTTPS2QQS : avx10_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis, 820 X86cvttp2sis, X86cvttp2sisSAE, 821 SchedWriteCvtPS2DQ>, T_MAP5,PD, 822 EVEX_CD8<32, CD8VH>; 823defm VCVTTPD2UQQS : avx10_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis, 824 X86cvttp2uis, X86cvttp2uisSAE, 825 SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, 826 EVEX_CD8<64, CD8VF>; 827defm VCVTTPS2UQQS : avx10_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis, 828 X86cvttp2uis, X86cvttp2uisSAE, 829 SchedWriteCvtPS2DQ>, T_MAP5,PD, 830 EVEX_CD8<32, CD8VH>; 831 832let Predicates = [HasAVX10_2] in { 833// Special patterns to allow use of X86mcvttp2si for masking. Instruction 834// patterns have been disabled with null_frag. 835// Patterns VCVTTPD2DQSZ128 836 837// VCVTTPD2DQS 838def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))), 839 (VCVTTPD2DQSZ128rr VR128X:$src)>; 840def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)), 841 (VCVTTPD2DQSZ256rr VR256X:$src)>; 842def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)), 843 (VCVTTPD2DQSZrr VR512:$src)>; 844 845// VCVTTPD2QQS 846def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)), 847 (VCVTTPD2QQSZ128rr VR128X:$src)>; 848def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)), 849 (VCVTTPD2QQSZ256rr VR256X:$src)>; 850def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)), 851 (VCVTTPD2QQSZrr VR512:$src)>; 852 853// VCVTTPD2UDQS 854def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))), 855 (VCVTTPD2UDQSZ128rr VR128X:$src)>; 856def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)), 857 (VCVTTPD2UDQSZ256rr VR256X:$src)>; 858def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)), 859 (VCVTTPD2UDQSZrr VR512:$src)>; 860 861// VCVTTPD2UQQS 862def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)), 863 (VCVTTPD2UQQSZ128rr VR128X:$src)>; 864def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)), 865 (VCVTTPD2UQQSZ256rr VR256X:$src)>; 866def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)), 867 (VCVTTPD2UQQSZrr VR512:$src)>; 868 869// VCVTTPS2DQS 870def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)), 871 (VCVTTPS2DQSZ128rr VR128X:$src)>; 872def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)), 873 (VCVTTPS2DQSZ256rr VR256X:$src)>; 874def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)), 875 (VCVTTPS2DQSZrr VR512:$src)>; 876 877// VCVTTPS2QQS 878def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))), 879 (VCVTTPS2QQSZ128rr VR128X:$src)>; 880def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)), 881 (VCVTTPS2QQSZ256rr VR128X:$src)>; 882def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)), 883 (VCVTTPS2QQSZrr VR256X:$src)>; 884 885// VCVTTPS2UDQS 886def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)), 887 (VCVTTPS2UDQSZ128rr VR128X:$src)>; 888def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)), 889 (VCVTTPS2UDQSZ256rr VR256X:$src)>; 890def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)), 891 (VCVTTPS2UDQSZrr VR512:$src)>; 892 893// VCVTTPS2UQQS 894def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))), 895 (VCVTTPS2UQQSZ128rr VR128X:$src)>; 896def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)), 897 (VCVTTPS2UQQSZ256rr VR128X:$src)>; 898def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)), 899 (VCVTTPS2UQQSZrr VR256X:$src)>; 900 901def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))), 902 (VCVTTPD2DQSZ128rr VR128X:$src)>; 903def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))), 904 (VCVTTPD2DQSZ128rm addr:$src)>; 905def : Pat<(v4i32 (X86cvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)))), 906 (VCVTTPD2DQSZ128rmb addr:$src)>; 907def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 908 VK2WM:$mask), 909 (VCVTTPD2DQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 910def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 911 VK2WM:$mask), 912 (VCVTTPD2DQSZ128rrkz VK2WM:$mask, VR128X:$src)>; 913def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 914 VK2WM:$mask), 915 (VCVTTPD2DQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 916def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 917 VK2WM:$mask), 918 (VCVTTPD2DQSZ128rmkz VK2WM:$mask, addr:$src)>; 919def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), 920 (v4i32 VR128X:$src0), VK2WM:$mask), 921 (VCVTTPD2DQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 922def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), 923 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 924 (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>; 925 926// Patterns VCVTTPD2UDQSZ128 927def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), 928 (VCVTTPD2UDQSZ128rmb addr:$src)>; 929def : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))), 930 (VCVTTPD2UDQSZ128rr VR128X:$src)>; 931def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), 932 (VCVTTPD2UDQSZ128rmb addr:$src)>; 933def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 934 VK2WM:$mask), 935 (VCVTTPD2UDQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 936def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 937 VK2WM:$mask), 938 (VCVTTPD2UDQSZ128rrkz VK2WM:$mask, VR128X:$src)>; 939def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 940 VK2WM:$mask), 941 (VCVTTPD2UDQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 942def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 943 VK2WM:$mask), 944 (VCVTTPD2UDQSZ128rmkz VK2WM:$mask, addr:$src)>; 945def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), 946 (v4i32 VR128X:$src0), VK2WM:$mask), 947 (VCVTTPD2UDQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 948def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), 949 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 950 (VCVTTPD2UDQSZ128rmbkz VK2WM:$mask, addr:$src)>; 951} 952 953// Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation. 954multiclass avx10_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 955 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 956 SDNode OpNodeInt, SDNode OpNodeSAE, 957 X86FoldableSchedWrite sched> { 958 let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in { 959 let isCodeGenOnly = 1 in { 960 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 961 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 962 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src, _DstRC.EltVT))]>, 963 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 964 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 965 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 966 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src), _DstRC.EltVT))]>, 967 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 968 } 969 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 970 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 971 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 972 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 973 let Uses = [MXCSR] in 974 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 975 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 976 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 977 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 978 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 979 (ins _SrcRC.IntScalarMemOp:$src), 980 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 981 [(set _DstRC.RC:$dst, 982 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 983 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, 984 SIMD_EXC; 985 } 986} 987 988defm VCVTTSS2SIS: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info, 989 fp_to_sint_sat, X86cvttss2Int, 990 X86cvttss2IntSAE, WriteCvtSS2I>, 991 T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 992defm VCVTTSS2SI64S: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info, 993 fp_to_sint_sat, X86cvttss2Int, 994 X86cvttss2IntSAE, WriteCvtSS2I>, 995 REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 996defm VCVTTSD2SIS: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info, 997 fp_to_sint_sat, X86cvttss2Int, 998 X86cvttss2IntSAE, WriteCvtSD2I>, 999 T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 1000defm VCVTTSD2SI64S: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info, 1001 fp_to_sint_sat, X86cvttss2Int, 1002 X86cvttss2IntSAE, WriteCvtSD2I>, 1003 REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 1004defm VCVTTSS2USIS: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info, 1005 fp_to_uint_sat, X86cvttss2UInt, 1006 X86cvttss2UIntSAE, WriteCvtSS2I>, 1007 T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 1008defm VCVTTSS2USI64S: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info, 1009 fp_to_uint_sat, X86cvttss2UInt, 1010 X86cvttss2UIntSAE, WriteCvtSS2I>, 1011 T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>; 1012defm VCVTTSD2USIS: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info, 1013 fp_to_uint_sat, X86cvttss2UInt, 1014 X86cvttss2UIntSAE, WriteCvtSD2I>, 1015 T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 1016defm VCVTTSD2USI64S: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info, 1017 fp_to_uint_sat, X86cvttss2UInt, 1018 X86cvttss2UIntSAE, WriteCvtSD2I>, 1019 T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>; 1020 1021//------------------------------------------------- 1022// AVX10 CONVERT instructions 1023//------------------------------------------------- 1024 1025multiclass avx10_cvt2ps2ph_rc<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 1026 X86VectorVTInfo _Src, X86VectorVTInfo _, 1027 SDNode OpNodeRnd> { 1028 let Uses = [MXCSR] in 1029 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1030 (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 1031 "$rc, $src2, $src1", "$src1, $src2, $rc", 1032 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1), 1033 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 1034 EVEX, VVVV, EVEX_B, EVEX_RC, PD, Sched<[sched]>; 1035} 1036 1037//TODO: Merge into avx512_binop_all, difference is rounding control added here. 1038multiclass avx10_cvt2ps2ph<bits<8> opc, string OpcodeStr, 1039 X86SchedWriteWidths sched, 1040 AVX512VLVectorVTInfo _SrcVTInfo, 1041 AVX512VLVectorVTInfo _DstVTInfo, 1042 SDNode OpNode, SDNode OpNodeRnd> { 1043 let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in { 1044 defm Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 1045 _SrcVTInfo.info512, _DstVTInfo.info512, 1046 _SrcVTInfo.info512>, 1047 avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.ZMM, 1048 _SrcVTInfo.info512, _DstVTInfo.info512, 1049 OpNodeRnd>, 1050 EVEX_V512, EVEX_CD8<32, CD8VF>; 1051 } 1052 let Predicates = [HasAVX10_2] in { 1053 defm Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 1054 _SrcVTInfo.info256, _DstVTInfo.info256, 1055 _SrcVTInfo.info256>, 1056 EVEX_V256, EVEX_CD8<32, CD8VF>; 1057 defm Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 1058 _SrcVTInfo.info128, _DstVTInfo.info128, 1059 _SrcVTInfo.info128>, 1060 EVEX_V128, EVEX_CD8<32, CD8VF>; 1061 } 1062 1063 let Predicates = [HasAVX10_2], hasEVEX_U = 1 in { 1064 defm Z256 : avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.YMM, 1065 _SrcVTInfo.info256, _DstVTInfo.info256, 1066 OpNodeRnd>; 1067 } 1068} 1069 1070defm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx", 1071 SchedWriteCvtPD2PS, 1072 avx512vl_f32_info, avx512vl_f16_info, 1073 X86vfpround2, X86vfpround2Rnd>, T8; 1074 1075defm VCVT2PH2BF8 : avx512_binop_all<0x74, "vcvt2ph2bf8", SchedWriteCvtPD2PS, 1076 avx512vl_f16_info, avx512vl_i8_info, 1077 X86vcvt2ph2bf8, [HasAVX10_2_512], [HasAVX10_2]>, 1078 EVEX_CD8<16, CD8VF>, T8, XD; 1079defm VCVT2PH2BF8S : avx512_binop_all<0x74, "vcvt2ph2bf8s", SchedWriteCvtPD2PS, 1080 avx512vl_f16_info, avx512vl_i8_info, 1081 X86vcvt2ph2bf8s, [HasAVX10_2_512], [HasAVX10_2]>, 1082 EVEX_CD8<16, CD8VF>, T_MAP5, XD; 1083defm VCVT2PH2HF8 : avx512_binop_all<0x18, "vcvt2ph2hf8", SchedWriteCvtPD2PS, 1084 avx512vl_f16_info, avx512vl_i8_info, 1085 X86vcvt2ph2hf8, [HasAVX10_2_512], [HasAVX10_2]>, 1086 EVEX_CD8<16, CD8VF>, T_MAP5, XD; 1087defm VCVT2PH2HF8S : avx512_binop_all<0x1b, "vcvt2ph2hf8s", SchedWriteCvtPD2PS, 1088 avx512vl_f16_info, avx512vl_i8_info, 1089 X86vcvt2ph2hf8s, [HasAVX10_2_512], [HasAVX10_2]>, 1090 EVEX_CD8<16, CD8VF>, T_MAP5, XD; 1091 1092//TODO: Merge into avx512_vcvt_fp, diffrence is one more source register here. 1093multiclass avx10_convert_3op_packed<bits<8> OpCode, string OpcodeStr, 1094 X86VectorVTInfo vt_dst, X86VectorVTInfo vt_src1, 1095 X86VectorVTInfo vt_src2, SDPatternOperator OpNode, 1096 SDPatternOperator MaskOpNode, X86FoldableSchedWrite sched, 1097 string Broadcast = vt_src2.BroadcastStr, 1098 X86MemOperand MemOp = vt_src2.MemOp, 1099 RegisterClass MaskRC = vt_src2.KRCWM, 1100 dag LdDAG = (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), 1101 (vt_src2.VT (vt_src2.LdFrag addr:$src2)))), 1102 dag MaskLdDAG = (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 1103 (vt_src2.VT (vt_src2.LdFrag addr:$src2))))> { 1104 defm rr : AVX512_maskable_cvt<OpCode, MRMSrcReg, vt_dst, (outs vt_dst.RC:$dst), 1105 (ins vt_src1.RC:$src1, vt_src2.RC:$src2), 1106 (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2), 1107 (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2), 1108 OpcodeStr, "$src2, $src1", "$src1, $src2", 1109 (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), 1110 (vt_src2.VT vt_src2.RC:$src2))), 1111 (vselect_mask MaskRC:$mask, 1112 (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 1113 (vt_src2.VT vt_src2.RC:$src2))), 1114 vt_dst.RC:$src0), 1115 (vselect_mask MaskRC:$mask, 1116 (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 1117 (vt_src2.VT vt_src2.RC:$src2))), 1118 vt_dst.ImmAllZerosV)>, 1119 EVEX, VVVV, Sched<[sched]>; 1120 let mayLoad = 1 in 1121 defm rm : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst), 1122 (ins vt_src1.RC:$src1, MemOp:$src2), 1123 (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2), 1124 (ins MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2), 1125 OpcodeStr, "$src2, $src1", "$src1, $src2", 1126 LdDAG, 1127 (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.RC:$src0), 1128 (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.ImmAllZerosV)>, 1129 EVEX, VVVV, Sched<[sched]>; 1130 1131 let mayLoad = 1 in 1132 defm rmb : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst), 1133 (ins vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2), 1134 (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, 1135 vt_src2.ScalarMemOp:$src2), 1136 (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2), 1137 OpcodeStr, 1138 "${src2}"#Broadcast#", $src1", "$src1, ${src2}"#Broadcast, 1139 (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT 1140 (vt_src2.BroadcastLdFrag addr:$src2)))), 1141 (vselect_mask MaskRC:$mask, 1142 (vt_dst.VT 1143 (MaskOpNode 1144 (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT 1145 (vt_src2.BroadcastLdFrag addr:$src2)))), 1146 vt_dst.RC:$src0), 1147 (vselect_mask MaskRC:$mask, 1148 (vt_dst.VT 1149 (MaskOpNode 1150 (vt_src1.VT vt_src1.RC:$src1), 1151 (vt_src2.VT 1152 (vt_src2.BroadcastLdFrag addr:$src2)))), 1153 vt_dst.ImmAllZerosV)>, 1154 EVEX, VVVV, EVEX_B, Sched<[sched]>; 1155} 1156 1157//TODO: Merge into avx512_cvt_trunc 1158multiclass avx10_convert_3op<bits<8> OpCode, string OpcodeStr, 1159 AVX512VLVectorVTInfo vt_dst, AVX512VLVectorVTInfo vt_src, 1160 X86SchedWriteWidths sched, 1161 SDPatternOperator OpNode, 1162 SDPatternOperator MaskOpNode, 1163 PatFrag bcast128 = vt_src.info128.BroadcastLdFrag, 1164 PatFrag loadVT128 = vt_src.info128.LdFrag, 1165 RegisterClass maskRC128 = vt_src.info128.KRCWM> { 1166 let Predicates = [HasAVX10_2_512] in 1167 defm Z : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info256, 1168 vt_dst.info512, vt_src.info512, OpNode, OpNode, sched.ZMM>, 1169 EVEX_V512, EVEX_CD8<16, CD8VF>; 1170 let Predicates = [HasAVX10_2] in { 1171 defm Z256 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128, 1172 vt_dst.info256, vt_src.info256, OpNode, OpNode, sched.YMM>, 1173 EVEX_V256, EVEX_CD8<16, CD8VF>; 1174 defm Z128 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128, 1175 vt_dst.info128, vt_src.info128, 1176 null_frag, null_frag, sched.XMM>, 1177 EVEX_V128, EVEX_CD8<16, CD8VF>; 1178 // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction 1179 // patterns have been disabled with null_frag. 1180 def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 1181 (vt_src.info128.VT VR128X:$src2))), 1182 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src1, VR128X:$src2)>; 1183 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 1184 (vt_src.info128.VT VR128X:$src2), 1185 (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), 1186 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, 1187 VR128X:$src1, VR128X:$src2)>; 1188 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 1189 (vt_src.info128.VT VR128X:$src2), 1190 vt_dst.info128.ImmAllZerosV, maskRC128:$mask), 1191 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, 1192 VR128X:$src1, VR128X:$src2)>; 1193 1194 def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 1195 (loadVT128 addr:$src2))), 1196 (!cast<Instruction>(NAME # "Z128rm") VR128X:$src1, addr:$src2)>; 1197 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 1198 (loadVT128 addr:$src2), 1199 (vt_dst.info128.VT VR128X:$src0), 1200 maskRC128:$mask), 1201 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, 1202 VR128X:$src1, addr:$src2)>; 1203 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 1204 (loadVT128 addr:$src2), 1205 vt_dst.info128.ImmAllZerosV, 1206 maskRC128:$mask), 1207 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, 1208 VR128X:$src1, addr:$src2)>; 1209 1210 def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 1211 (vt_src.info128.VT (bcast128 addr:$src2)))), 1212 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$src1, addr:$src2)>; 1213 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 1214 (vt_src.info128.VT (bcast128 addr:$src2)), 1215 (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), 1216 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, 1217 VR128X:$src1, addr:$src2)>; 1218 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 1219 (vt_src.info128.VT (bcast128 addr:$src2)), 1220 vt_dst.info128.ImmAllZerosV, maskRC128:$mask), 1221 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, 1222 VR128X:$src1, addr:$src2)>; 1223 } 1224} 1225 1226defm VCVTBIASPH2BF8 : avx10_convert_3op<0x74, "vcvtbiasph2bf8", 1227 avx512vl_i8_info, avx512vl_f16_info, 1228 SchedWriteCvtPD2PS, 1229 X86vcvtbiasph2bf8, X86vmcvtbiasph2bf8>, 1230 T8, PS; 1231defm VCVTBIASPH2BF8S : avx10_convert_3op<0x74, "vcvtbiasph2bf8s", 1232 avx512vl_i8_info, avx512vl_f16_info, 1233 SchedWriteCvtPD2PS, 1234 X86vcvtbiasph2bf8s, X86vmcvtbiasph2bf8s>, 1235 T_MAP5, PS; 1236defm VCVTBIASPH2HF8 : avx10_convert_3op<0x18, "vcvtbiasph2hf8", 1237 avx512vl_i8_info, avx512vl_f16_info, 1238 SchedWriteCvtPD2PS, 1239 X86vcvtbiasph2hf8, X86vmcvtbiasph2hf8>, 1240 T_MAP5, PS; 1241defm VCVTBIASPH2HF8S : avx10_convert_3op<0x1b, "vcvtbiasph2hf8s", 1242 avx512vl_i8_info, avx512vl_f16_info, 1243 SchedWriteCvtPD2PS, 1244 X86vcvtbiasph2hf8s, X86vmcvtbiasph2hf8s>, 1245 T_MAP5, PS; 1246 1247defm VCVTPH2BF8 : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8", avx512vl_i8_info, 1248 avx512vl_f16_info, SchedWriteCvtPD2PS, 1249 X86vcvtph2bf8, X86vmcvtph2bf8, 1250 [HasAVX10_2], [HasAVX10_2_512]>, 1251 T8, XS, EVEX_CD8<16, CD8VF>; 1252 1253defm VCVTPH2BF8S : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8s", avx512vl_i8_info, 1254 avx512vl_f16_info, SchedWriteCvtPD2PS, 1255 X86vcvtph2bf8s, X86vmcvtph2bf8s, 1256 [HasAVX10_2], [HasAVX10_2_512]>, 1257 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 1258 1259defm VCVTPH2HF8 : avx512_cvt_trunc_ne<0x18, "vcvtph2hf8", avx512vl_i8_info, 1260 avx512vl_f16_info, SchedWriteCvtPD2PS, 1261 X86vcvtph2hf8, X86vmcvtph2hf8, 1262 [HasAVX10_2], [HasAVX10_2_512]>, 1263 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 1264 1265defm VCVTPH2HF8S : avx512_cvt_trunc_ne<0x1b, "vcvtph2hf8s", avx512vl_i8_info, 1266 avx512vl_f16_info, SchedWriteCvtPD2PS, 1267 X86vcvtph2hf8s, X86vmcvtph2hf8s, 1268 [HasAVX10_2], [HasAVX10_2_512]>, 1269 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 1270 1271multiclass avx10_convert_2op_nomb_packed<bits<8> opc, string OpcodeStr, 1272 X86VectorVTInfo _dest, X86VectorVTInfo _src, 1273 SDNode OpNode, X86MemOperand x86memop, 1274 X86FoldableSchedWrite sched, 1275 dag ld_dag = (load addr:$src)> { 1276 let ExeDomain = _dest.ExeDomain in { 1277 defm rr : AVX512_maskable_split<opc, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 1278 (ins _src.RC:$src), OpcodeStr, "$src", "$src", 1279 (OpNode (_src.VT _src.RC:$src)), 1280 (OpNode (_src.VT _src.RC:$src))>, 1281 Sched<[sched]>; 1282 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _dest, (outs _dest.RC:$dst), 1283 (ins x86memop:$src), OpcodeStr, "$src", "$src", 1284 (OpNode (_src.VT ld_dag)), 1285 (OpNode (_src.VT ld_dag))>, 1286 Sched<[sched.Folded]>; 1287 } 1288} 1289 1290multiclass avx10_convert_2op_nomb<string OpcodeStr, AVX512VLVectorVTInfo _dest, 1291 AVX512VLVectorVTInfo _src, bits<8> opc, SDNode OpNode> { 1292 let Predicates = [HasAVX10_2_512] in 1293 defm Z : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info512, _src.info256, 1294 OpNode, f256mem, WriteCvtPH2PSZ>, EVEX_V512; 1295 let Predicates = [HasAVX10_2] in { 1296 defm Z128 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info128, _src.info128, 1297 OpNode, f64mem, WriteCvtPH2PSZ>, EVEX_V128; 1298 defm Z256 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info256, _src.info128, 1299 OpNode, f128mem, WriteCvtPH2PSZ>, EVEX_V256; 1300 } 1301} 1302 1303defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info, 1304 avx512vl_i8_info, 0x1e, X86vcvthf82ph>, 1305 AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>; 1306 1307//------------------------------------------------- 1308// AVX10 BF16 instructions 1309//------------------------------------------------- 1310 1311// VADDBF16, VSUBBF16, VMULBF16, VDIVBF16, VMAXBF16, VMINBF16 1312multiclass avx10_fp_binop_int_bf16<bits<8> opc, string OpcodeStr, 1313 X86SchedWriteSizes sched, 1314 bit IsCommutable = 0> { 1315 let Predicates = [HasAVX10_2_512] in 1316 defm Z : avx512_fp_packed<opc, OpcodeStr, 1317 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"), 1318 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"), 1319 v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, 1320 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1321 let Predicates = [HasAVX10_2] in { 1322 defm Z128 : avx512_fp_packed<opc, OpcodeStr, 1323 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"), 1324 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"), 1325 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, 1326 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1327 defm Z256 : avx512_fp_packed<opc, OpcodeStr, 1328 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"), 1329 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"), 1330 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, 1331 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1332 } 1333} 1334 1335multiclass avx10_fp_binop_bf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 1336 X86SchedWriteSizes sched, 1337 bit IsCommutable = 0, 1338 SDPatternOperator MaskOpNode = OpNode> { 1339 let Predicates = [HasAVX10_2_512] in 1340 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 1341 v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, 1342 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1343 let Predicates = [HasAVX10_2] in { 1344 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 1345 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, 1346 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1347 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 1348 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, 1349 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1350 } 1351} 1352 1353let Uses = []<Register>, mayRaiseFPException = 0 in { 1354defm VADDBF16 : avx10_fp_binop_bf16<0x58, "vadd", fadd, SchedWriteFAddSizes, 1>; 1355defm VSUBBF16 : avx10_fp_binop_bf16<0x5C, "vsub", fsub, SchedWriteFAddSizes, 0>; 1356defm VMULBF16 : avx10_fp_binop_bf16<0x59, "vmul", fmul, SchedWriteFMulSizes, 1>; 1357defm VDIVBF16 : avx10_fp_binop_bf16<0x5E, "vdiv", fdiv, SchedWriteFDivSizes, 0>; 1358defm VMINBF16 : avx10_fp_binop_int_bf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>; 1359defm VMAXBF16 : avx10_fp_binop_int_bf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>; 1360} 1361 1362// VCOMISBF16 1363let Uses = []<Register>, mayRaiseFPException = 0, 1364 Defs = [EFLAGS], Predicates = [HasAVX10_2] in { 1365 //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *` 1366 //which may require extend supports on BFR16X, loadbf16, ... 1367 defm VCOMISBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16, 1368 "comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX, 1369 VEX_LIG, EVEX_CD8<16, CD8VT1>; 1370 1371 let isCodeGenOnly = 1 in { 1372 defm VCOMISBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem, 1373 sse_load_bf16, "comisbf16", SSEPackedSingle>, 1374 T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 1375 } 1376} 1377 1378// VCMPBF16 1379multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1380 let mayRaiseFPException = 0 in { 1381 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1382 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1383 "vcmp"#_.Suffix, 1384 "$cc, $src2, $src1", "$src1, $src2, $cc", 1385 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1386 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1387 1>, Sched<[sched]>; 1388 1389 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1390 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 1391 "vcmp"#_.Suffix, 1392 "$cc, $src2, $src1", "$src1, $src2, $cc", 1393 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 1394 timm:$cc), 1395 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 1396 timm:$cc)>, 1397 Sched<[sched.Folded, sched.ReadAfterFold]>; 1398 1399 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1400 (outs _.KRC:$dst), 1401 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 1402 "vcmp"#_.Suffix, 1403 "$cc, ${src2}"#_.BroadcastStr#", $src1", 1404 "$src1, ${src2}"#_.BroadcastStr#", $cc", 1405 (X86cmpm (_.VT _.RC:$src1), 1406 (_.VT (_.BroadcastLdFrag addr:$src2)), 1407 timm:$cc), 1408 (X86cmpm_su (_.VT _.RC:$src1), 1409 (_.VT (_.BroadcastLdFrag addr:$src2)), 1410 timm:$cc)>, 1411 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 1412 } 1413} 1414 1415multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 1416 let Predicates = [HasAVX10_2_512] in 1417 defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512; 1418 let Predicates = [HasAVX10_2] in { 1419 defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128; 1420 defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256; 1421 } 1422} 1423 1424defm VCMPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>, 1425 AVX512XDIi8Base, EVEX, VVVV, 1426 EVEX_CD8<16, CD8VF>, TA; 1427 1428 1429// VSQRTBF16 1430multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr, 1431 X86SchedWriteSizes sched> { 1432 let Predicates = [HasAVX10_2_512] in 1433 defm Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 1434 sched.PH.ZMM, v32bf16_info>, 1435 EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 1436 let Predicates = [HasAVX10_2] in { 1437 defm Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 1438 sched.PH.XMM, v8bf16x_info>, 1439 EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 1440 defm Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 1441 sched.PH.YMM, v16bf16x_info>, 1442 EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 1443 } 1444} 1445 1446let Uses = []<Register>, mayRaiseFPException = 0 in 1447defm VSQRTBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrt", SchedWriteFSqrtSizes>; 1448 1449// VRSQRTBF16, VRCPBF16, VSRQTBF16, VGETEXPBF16 1450multiclass avx10_fp14_bf16<bits<8> opc, string OpcodeStr, SDNode OpNode, 1451 X86SchedWriteWidths sched> { 1452 let Predicates = [HasAVX10_2_512] in 1453 defm BF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 1454 OpNode, sched.ZMM, v32bf16_info>, 1455 EVEX_V512; 1456 let Predicates = [HasAVX10_2] in { 1457 defm BF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 1458 OpNode, sched.XMM, v8bf16x_info>, 1459 EVEX_V128; 1460 defm BF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 1461 OpNode, sched.YMM, v16bf16x_info>, 1462 EVEX_V256; 1463 } 1464} 1465 1466defm VRSQRT : avx10_fp14_bf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>, 1467 T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1468defm VRCP : avx10_fp14_bf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>, 1469 T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1470defm VGETEXP : avx10_fp14_bf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>, 1471 T_MAP5, EVEX_CD8<16, CD8VF>; 1472 1473// VSCALEFBF16 1474multiclass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr, 1475 X86SchedWriteWidths sched> { 1476 let Predicates = [HasAVX10_2_512] in 1477 defm Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>, 1478 EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1479 let Predicates = [HasAVX10_2] in { 1480 defm Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>, 1481 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS; 1482 defm Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>, 1483 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS; 1484 } 1485} 1486 1487let Uses = []<Register>, mayRaiseFPException = 0 in 1488defm VSCALEFBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>; 1489 1490// VREDUCEBF16, VRNDSCALEBF16, VGETMANTBF16 1491multiclass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr, 1492 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 1493 SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> { 1494 let Predicates = [HasAVX10_2_512] in 1495 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 1496 sched.ZMM, _.info512>, EVEX_V512; 1497 let Predicates = [HasAVX10_2] in { 1498 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 1499 sched.XMM, _.info128>, EVEX_V128; 1500 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 1501 sched.YMM, _.info256>, EVEX_V256; 1502 } 1503} 1504 1505let Uses = []<Register>, mayRaiseFPException = 0 in { 1506defm VREDUCEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreduce", avx512vl_bf16_info, 0x56, 1507 X86VReduce, X86VReduce, SchedWriteFRnd>, 1508 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1509defm VRNDSCALEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscale", avx512vl_bf16_info, 0x08, 1510 X86any_VRndScale, X86VRndScale, SchedWriteFRnd>, 1511 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1512defm VGETMANTBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26, 1513 X86VGetMant, X86VGetMant, SchedWriteFRnd>, 1514 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1515} 1516 1517// VFPCLASSBF16 1518multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec, 1519 X86SchedWriteWidths sched> { 1520 let Predicates = [HasAVX10_2_512] in 1521 defm Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM, 1522 avx512vl_bf16_info.info512, "z", 1523 []<Register>>, EVEX_V512; 1524 let Predicates = [HasAVX10_2] in { 1525 defm Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM, 1526 avx512vl_bf16_info.info128, "x", 1527 []<Register>>, EVEX_V128; 1528 defm Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM, 1529 avx512vl_bf16_info.info256, "y", 1530 []<Register>>, EVEX_V256; 1531 } 1532} 1533 1534defm VFPCLASSBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>, 1535 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1536 1537// VF[,N]M[ADD,SUB][132,213,231]BF16 1538multiclass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr, 1539 SDPatternOperator OpNode, SDNode MaskOpNode, 1540 X86SchedWriteWidths sched> { 1541 let Predicates = [HasAVX10_2_512] in 1542 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1543 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 1544 EVEX_CD8<16, CD8VF>; 1545 let Predicates = [HasAVX10_2] in { 1546 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1547 sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 1548 EVEX_CD8<16, CD8VF>; 1549 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1550 sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 1551 EVEX_CD8<16, CD8VF>; 1552 } 1553} 1554 1555let Uses = []<Register>, mayRaiseFPException = 0 in { 1556defm VFMADD213BF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213bf16", any_fma, 1557 fma, SchedWriteFMA>; 1558defm VFMSUB213BF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213bf16", X86any_Fmsub, 1559 X86Fmsub, SchedWriteFMA>; 1560defm VFNMADD213BF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213bf16", X86any_Fnmadd, 1561 X86Fnmadd, SchedWriteFMA>; 1562defm VFNMSUB213BF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213bf16", X86any_Fnmsub, 1563 X86Fnmsub, SchedWriteFMA>; 1564} 1565 1566multiclass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr, 1567 SDPatternOperator OpNode, SDNode MaskOpNode, 1568 X86SchedWriteWidths sched> { 1569 let Predicates = [HasAVX10_2_512] in 1570 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1571 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 1572 EVEX_CD8<16, CD8VF>; 1573 let Predicates = [HasAVX10_2] in { 1574 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1575 sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 1576 EVEX_CD8<16, CD8VF>; 1577 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1578 sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 1579 EVEX_CD8<16, CD8VF>; 1580 } 1581} 1582 1583let Uses = []<Register>, mayRaiseFPException = 0 in { 1584defm VFMADD231BF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231bf16", any_fma, 1585 fma, SchedWriteFMA>; 1586defm VFMSUB231BF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231bf16", X86any_Fmsub, 1587 X86Fmsub, SchedWriteFMA>; 1588defm VFNMADD231BF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231bf16", X86any_Fnmadd, 1589 X86Fnmadd, SchedWriteFMA>; 1590defm VFNMSUB231BF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231bf16", X86any_Fnmsub, 1591 X86Fnmsub, SchedWriteFMA>; 1592} 1593 1594multiclass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr, 1595 SDPatternOperator OpNode, SDNode MaskOpNode, 1596 X86SchedWriteWidths sched> { 1597 let Predicates = [HasAVX10_2_512] in 1598 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1599 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 1600 EVEX_CD8<16, CD8VF>; 1601 let Predicates = [HasAVX10_2] in { 1602 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1603 sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 1604 EVEX_CD8<16, CD8VF>; 1605 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1606 sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 1607 EVEX_CD8<16, CD8VF>; 1608 } 1609} 1610 1611let Uses = []<Register>, mayRaiseFPException = 0 in { 1612defm VFMADD132BF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132bf16", any_fma, 1613 fma, SchedWriteFMA>; 1614defm VFMSUB132BF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132bf16", X86any_Fmsub, 1615 X86Fmsub, SchedWriteFMA>; 1616defm VFNMADD132BF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132bf16", X86any_Fnmadd, 1617 X86Fnmadd, SchedWriteFMA>; 1618defm VFNMSUB132BF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132bf16", X86any_Fnmsub, 1619 X86Fnmsub, SchedWriteFMA>; 1620} 1621 1622//------------------------------------------------- 1623// AVX10 COMEF instructions 1624//------------------------------------------------- 1625multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT, 1626 SDPatternOperator OpNode, string OpcodeStr, 1627 X86MemOperand x86memop, PatFrag ld_frag, 1628 Domain d, X86FoldableSchedWrite sched = WriteFComX>{ 1629 let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { 1630 def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1631 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1632 [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>, 1633 EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1634 let mayLoad = 1 in { 1635 def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 1636 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1637 [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>, 1638 EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1639 } 1640 } 1641} 1642 1643multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, 1644 string OpcodeStr, 1645 Domain d, 1646 X86FoldableSchedWrite sched = WriteFComX> { 1647 let ExeDomain = d, mayRaiseFPException = 1 in { 1648 def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 1649 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1650 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>, 1651 EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1652 let mayLoad = 1 in { 1653 def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2), 1654 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1655 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>, 1656 EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1657 } 1658 def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 1659 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), 1660 []>, 1661 EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC; 1662 } 1663} 1664 1665let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { 1666 defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, 1667 "vucomxsd", f64mem, loadf64, SSEPackedDouble>, 1668 TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1669 defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, 1670 "vucomxsh", f16mem, loadf16, SSEPackedSingle>, 1671 T_MAP5, XD, EVEX_CD8<16, CD8VT1>; 1672 defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, 1673 "vucomxss", f32mem, loadf32, SSEPackedSingle>, 1674 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1675 defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, 1676 "vcomxsd", SSEPackedDouble>, 1677 TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1678 defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512, 1679 "vcomxsh", SSEPackedSingle>, 1680 T_MAP5, XD, EVEX_CD8<16, CD8VT1>; 1681 defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512, 1682 "vcomxss", SSEPackedSingle>, 1683 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1684 defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512, 1685 "vucomxsd", SSEPackedDouble>, 1686 TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1687 defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512, 1688 "vucomxsh", SSEPackedSingle>, 1689 T_MAP5, XD, EVEX_CD8<16, CD8VT1>; 1690 defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512, 1691 "vucomxss", SSEPackedSingle>, 1692 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1693} 1694 1695//------------------------------------------------- 1696// AVX10 MOVZXC (COPY) instructions 1697//------------------------------------------------- 1698let Predicates = [HasAVX10_2] in { 1699 def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 1700 (ins VR128X:$src), 1701 "vmovd\t{$src, $dst|$dst, $src}", 1702 [(set VR128X:$dst, (v4i32 (X86vzmovl 1703 (v4i32 VR128X:$src))))]>, EVEX, 1704 Sched<[WriteVecMoveFromGpr]>; 1705 1706let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 1707 def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 1708 (ins i32mem:$src), 1709 "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1710 EVEX_CD8<32, CD8VT1>, 1711 Sched<[WriteVecLoad]>; 1712 1713let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 1714 def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs), 1715 (ins i32mem:$dst, VR128X:$src), 1716 "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1717 EVEX_CD8<32, CD8VT1>, 1718 Sched<[WriteVecStore]>; 1719 1720let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 1721 def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst), 1722 (ins VR128X:$src), 1723 "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1724 Sched<[WriteVecMoveFromGpr]>; 1725 def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}", 1726 (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>; 1727 1728def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst), 1729 (ins VR128X:$src), 1730 "vmovw\t{$src, $dst|$dst, $src}", 1731 [(set VR128X:$dst, (v8i16 (X86vzmovl 1732 (v8i16 VR128X:$src))))]>, EVEX, T_MAP5, 1733 Sched<[WriteVecMoveFromGpr]>; 1734 1735let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 1736 def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst), 1737 (ins i16mem:$src), 1738 "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, 1739 EVEX_CD8<16, CD8VT1>, T_MAP5, 1740 Sched<[WriteVecLoad]>; 1741 1742let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 1743 def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs), 1744 (ins i32mem:$dst, VR128X:$src), 1745 "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, 1746 EVEX_CD8<16, CD8VT1>, T_MAP5, 1747 Sched<[WriteVecStore]>; 1748 1749let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 1750 def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 1751 (ins VR128X:$src), 1752 "vmovw\t{$src, $dst|$dst, $src}", 1753 []>, EVEX, T_MAP5, 1754 Sched<[WriteVecMoveFromGpr]>; 1755 def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", 1756 (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; 1757} 1758 1759// MOVRS 1760multiclass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> { 1761 let ExeDomain = _.ExeDomain in { 1762 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 1763 (ins _.MemOp:$src), OpStr, "$src", "$src", 1764 (_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size) 1765 addr:$src))>, EVEX; 1766 } 1767} 1768 1769multiclass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> { 1770 let Predicates = [HasMOVRS, HasAVX10_2_512, In64BitMode] in 1771 defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512; 1772 let Predicates = [HasMOVRS, HasAVX10_2, In64BitMode] in { 1773 defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128; 1774 defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256; 1775 } 1776} 1777 1778defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>, 1779 T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>; 1780defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>, 1781 T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>; 1782defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>, 1783 T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>; 1784defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>, 1785 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>; 1786 1787// SM4(EVEX) 1788multiclass avx10_sm4_base<string OpStr> { 1789 // SM4_Base is in X86InstrSSE.td. 1790 let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in { 1791 defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128; 1792 defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256; 1793 } 1794 let Predicates = [HasSM4, HasAVX10_2_512] in 1795 defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512; 1796} 1797 1798defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV; 1799defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV; 1800