1259ca9eeSPhoebe Wang//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===// 2259ca9eeSPhoebe Wang// 3259ca9eeSPhoebe Wang// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4259ca9eeSPhoebe Wang// See https://llvm.org/LICENSE.txt for license information. 5259ca9eeSPhoebe Wang// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6259ca9eeSPhoebe Wang// 7259ca9eeSPhoebe Wang//===----------------------------------------------------------------------===// 8259ca9eeSPhoebe Wang// 9259ca9eeSPhoebe Wang// This file describes the X86 AVX10 instruction set, defining the 10259ca9eeSPhoebe Wang// instructions, and properties of the instructions which are needed for code 11259ca9eeSPhoebe Wang// generation, machine code emission, and analysis. 12259ca9eeSPhoebe Wang// 13259ca9eeSPhoebe Wang//===----------------------------------------------------------------------===// 14259ca9eeSPhoebe Wang 15b0329206SPhoebe Wang// VNNI FP16 16b0329206SPhoebe Wanglet ExeDomain = SSEPackedSingle in 17b0329206SPhoebe Wangdefm VDPPHPS : avx512_dpf16ps_sizes<0x52, "vdpphps", X86dpfp16ps, avx512vl_f16_info, 18b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, 19b0329206SPhoebe Wang T8, PS, EVEX_CD8<32, CD8VF>; 20b0329206SPhoebe Wang 21b0329206SPhoebe Wang// VNNI INT8 22b0329206SPhoebe Wangdefm VPDPBSSD : VNNI_common<0x50, "vpdpbssd", X86vpdpbssd, SchedWriteVecIMul, 1, 23b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, XD; 24b0329206SPhoebe Wangdefm VPDPBSSDS : VNNI_common<0x51, "vpdpbssds", X86vpdpbssds, SchedWriteVecIMul, 1, 25b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, XD; 26b0329206SPhoebe Wangdefm VPDPBSUD : VNNI_common<0x50, "vpdpbsud", X86vpdpbsud, SchedWriteVecIMul, 0, 27b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, XS; 28b0329206SPhoebe Wangdefm VPDPBSUDS : VNNI_common<0x51, "vpdpbsuds", X86vpdpbsuds, SchedWriteVecIMul, 0, 29b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, XS; 30b0329206SPhoebe Wangdefm VPDPBUUD : VNNI_common<0x50, "vpdpbuud", X86vpdpbuud, SchedWriteVecIMul, 1, 31b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, PS; 32b0329206SPhoebe Wangdefm VPDPBUUDS : VNNI_common<0x51, "vpdpbuuds", X86vpdpbuuds, SchedWriteVecIMul, 1, 33b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, PS; 34b0329206SPhoebe Wang 35b0329206SPhoebe Wang// VNNI INT16 36b0329206SPhoebe Wangdefm VPDPWSUD : VNNI_common<0xd2, "vpdpwsud", X86vpdpwsud, SchedWriteVecIMul, 0, 37b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, XS; 38b0329206SPhoebe Wangdefm VPDPWSUDS : VNNI_common<0xd3, "vpdpwsuds", X86vpdpwsuds, SchedWriteVecIMul, 0, 39b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, XS; 40b0329206SPhoebe Wangdefm VPDPWUSD : VNNI_common<0xd2, "vpdpwusd", X86vpdpwusd, SchedWriteVecIMul, 0, 41b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, PD; 42b0329206SPhoebe Wangdefm VPDPWUSDS : VNNI_common<0xd3, "vpdpwusds", X86vpdpwusds, SchedWriteVecIMul, 0, 43b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, PD; 44b0329206SPhoebe Wangdefm VPDPWUUD : VNNI_common<0xd2, "vpdpwuud", X86vpdpwuud, SchedWriteVecIMul, 1, 45b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, PS; 46b0329206SPhoebe Wangdefm VPDPWUUDS : VNNI_common<0xd3, "vpdpwuuds", X86vpdpwuuds, SchedWriteVecIMul, 1, 47b0329206SPhoebe Wang [HasAVX10_2], [HasAVX10_2_512]>, PS; 48b0329206SPhoebe Wang 49259ca9eeSPhoebe Wang// VMPSADBW 50259ca9eeSPhoebe Wangdefm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW, 51259ca9eeSPhoebe Wang avx512vl_i16_info, avx512vl_i8_info, 52259ca9eeSPhoebe Wang HasAVX10_2>, 53259ca9eeSPhoebe Wang XS, EVEX_CD8<32, CD8VF>; 54259ca9eeSPhoebe Wang 55259ca9eeSPhoebe Wang// YMM Rounding 56259ca9eeSPhoebe Wangmulticlass avx256_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 57259ca9eeSPhoebe Wang X86SchedWriteSizes sched> { 58259ca9eeSPhoebe Wang defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM, 59259ca9eeSPhoebe Wang v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 60259ca9eeSPhoebe Wang defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM, 61259ca9eeSPhoebe Wang v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>; 62259ca9eeSPhoebe Wang defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM, 63259ca9eeSPhoebe Wang v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W; 64259ca9eeSPhoebe Wang} 65259ca9eeSPhoebe Wang 660dba5381SPhoebe Wangmulticlass avx256_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 670dba5381SPhoebe Wang X86SchedWriteSizes sched> { 680dba5381SPhoebe Wang defm PHZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM, 690dba5381SPhoebe Wang v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 700dba5381SPhoebe Wang defm PSZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM, 710dba5381SPhoebe Wang v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>; 720dba5381SPhoebe Wang defm PDZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM, 730dba5381SPhoebe Wang v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W; 740dba5381SPhoebe Wang} 750dba5381SPhoebe Wang 760dba5381SPhoebe Wangmulticlass avx256_vcmp_p_sae<X86SchedWriteWidths sched> { 770dba5381SPhoebe Wang defm PHZ256 : avx512_vcmp_sae<sched.YMM, v16f16x_info>, AVX512PSIi8Base, EVEX_CD8<16, CD8VF>, TA; 780dba5381SPhoebe Wang defm PSZ256 : avx512_vcmp_sae<sched.YMM, v8f32x_info>, AVX512PSIi8Base, EVEX_CD8<32, CD8VF>; 790dba5381SPhoebe Wang defm PDZ256 : avx512_vcmp_sae<sched.YMM, v4f64x_info>, AVX512PDIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 800dba5381SPhoebe Wang} 810dba5381SPhoebe Wang 820dba5381SPhoebe Wangmulticlass avx256_fixupimm_packed_all<bits<8> opc, string OpcodeStr, 830dba5381SPhoebe Wang X86SchedWriteWidths sched> { 840dba5381SPhoebe Wang defm PSZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v8f32x_info, 850dba5381SPhoebe Wang v8i32x_info>, EVEX_CD8<32, CD8VF>; 860dba5381SPhoebe Wang defm PDZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v4f64x_info, 870dba5381SPhoebe Wang v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 880dba5381SPhoebe Wang} 890dba5381SPhoebe Wang 900dba5381SPhoebe Wangmulticlass avx256_vgetexp<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE, 910dba5381SPhoebe Wang X86SchedWriteWidths sched> { 920dba5381SPhoebe Wang defm PHZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ph", v16f16x_info, OpNodeSAE, 930dba5381SPhoebe Wang sched.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 940dba5381SPhoebe Wang defm PSZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ps", v8f32x_info, OpNodeSAE, 950dba5381SPhoebe Wang sched.YMM>, T8,PD, EVEX_CD8<32, CD8VF>; 960dba5381SPhoebe Wang defm PDZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"pd", v4f64x_info, OpNodeSAE, 970dba5381SPhoebe Wang sched.YMM>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 980dba5381SPhoebe Wang} 990dba5381SPhoebe Wang 1000dba5381SPhoebe Wangmulticlass avx256_unary_fp_sae<string OpcodeStr, bits<8> opcPs, bits<8> opcPd, 1010dba5381SPhoebe Wang SDNode OpNodeSAE, X86SchedWriteWidths sched> { 1020dba5381SPhoebe Wang defm PHZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM, 1030dba5381SPhoebe Wang v16f16x_info>, AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>; 1040dba5381SPhoebe Wang defm PSZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM, 1050dba5381SPhoebe Wang v8f32x_info>, AVX512AIi8Base, EVEX_CD8<32, CD8VF>; 1060dba5381SPhoebe Wang defm PDZ256 : avx512_unary_fp_sae_packed_imm<opcPd, OpcodeStr, OpNodeSAE, sched.YMM, 1070dba5381SPhoebe Wang v4f64x_info>, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 1080dba5381SPhoebe Wang} 1090dba5381SPhoebe Wang 1100dba5381SPhoebe Wangmulticlass avx256_common_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE, 1110dba5381SPhoebe Wang X86SchedWriteWidths sched> { 1120dba5381SPhoebe Wang defm PSZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"ps", OpNodeSAE, sched.YMM, 1130dba5381SPhoebe Wang v8f32x_info>, EVEX_CD8<32, CD8VF>; 1140dba5381SPhoebe Wang defm PDZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"pd", OpNodeSAE, sched.YMM, 1150dba5381SPhoebe Wang v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 1160dba5381SPhoebe Wang} 1170dba5381SPhoebe Wang 1180dba5381SPhoebe Wangmulticlass avx256_fp_scalef_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 1190dba5381SPhoebe Wang X86SchedWriteWidths sched> { 1200dba5381SPhoebe Wang defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM, 1210dba5381SPhoebe Wang v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 1220dba5381SPhoebe Wang defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM, 1230dba5381SPhoebe Wang v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 1240dba5381SPhoebe Wang defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM, 1250dba5381SPhoebe Wang v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 1260dba5381SPhoebe Wang} 1270dba5381SPhoebe Wang 1280dba5381SPhoebe Wangmulticlass avx256_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 1290dba5381SPhoebe Wang X86SchedWriteSizes sched> { 1300dba5381SPhoebe Wang defm PHZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 1310dba5381SPhoebe Wang sched.PH.YMM, v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 1320dba5381SPhoebe Wang defm PSZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 1330dba5381SPhoebe Wang sched.PS.YMM, v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>; 1340dba5381SPhoebe Wang defm PDZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 1350dba5381SPhoebe Wang sched.PD.YMM, v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W; 1360dba5381SPhoebe Wang} 1370dba5381SPhoebe Wang 1380dba5381SPhoebe Wangmulticlass avx256_vcvtw_rc<string OpcodeStr, SDNode OpNodeRnd> { 1390dba5381SPhoebe Wang defm PHZ256 : avx512_vcvt_fp_rc<0x7D, OpcodeStr, v16f16x_info, v16i16x_info, OpNodeRnd, 1400dba5381SPhoebe Wang SchedWriteCvtPD2DQ.YMM>, EVEX_CD8<16, CD8VF>; 1410dba5381SPhoebe Wang} 1420dba5381SPhoebe Wang 1430dba5381SPhoebe Wangmulticlass avx256_cvtdq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 1440dba5381SPhoebe Wang X86SchedWriteWidths sched> { 1450dba5381SPhoebe Wang defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info, 1460dba5381SPhoebe Wang v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,PS, EVEX_CD8<32, CD8VF>; 1470dba5381SPhoebe Wang defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info, 1480dba5381SPhoebe Wang v8i32x_info, OpNodeRnd, sched.YMM>, TB, PS, EVEX_CD8<32, CD8VF>; 1490dba5381SPhoebe Wang} 1500dba5381SPhoebe Wang 1510dba5381SPhoebe Wangmulticlass avx256_cvtudq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 1520dba5381SPhoebe Wang X86SchedWriteWidths sched> { 1530dba5381SPhoebe Wang defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info, 1540dba5381SPhoebe Wang v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,XD, EVEX_CD8<32, CD8VF>; 1550dba5381SPhoebe Wang defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info, 1560dba5381SPhoebe Wang v8i32x_info, OpNodeRnd, sched.YMM>, TB, XD, EVEX_CD8<32, CD8VF>; 1570dba5381SPhoebe Wang} 1580dba5381SPhoebe Wang 1590dba5381SPhoebe Wangmulticlass avx256_cvtqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> { 1600dba5381SPhoebe Wang defm PHZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ph"), v8f16x_info, 1610dba5381SPhoebe Wang _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,PS; 1620dba5381SPhoebe Wang defm PSZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ps"), v4f32x_info, 1630dba5381SPhoebe Wang _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, PS; 1640dba5381SPhoebe Wang defm PDZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "pd"), v4f64x_info, 1650dba5381SPhoebe Wang _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS; 1660dba5381SPhoebe Wang} 1670dba5381SPhoebe Wang 1680dba5381SPhoebe Wangmulticlass avx256_cvtuqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> { 1690dba5381SPhoebe Wang defm PHZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ph"), v8f16x_info, 1700dba5381SPhoebe Wang _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,XD; 1710dba5381SPhoebe Wang defm PSZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ps"), v4f32x_info, 1720dba5381SPhoebe Wang _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, XD; 1730dba5381SPhoebe Wang defm PDZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "pd"), v4f64x_info, 1740dba5381SPhoebe Wang _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS; 1750dba5381SPhoebe Wang} 1760dba5381SPhoebe Wang 1770dba5381SPhoebe Wangmulticlass avx256_vcvt_pd2<string OpcodeStr, X86VectorVTInfo _Src> { 1780dba5381SPhoebe Wang defm PHZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ph"), v8f16x_info, 1790dba5381SPhoebe Wang _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD; 1800dba5381SPhoebe Wang defm PSZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ps"), v4f32x_info, 1810dba5381SPhoebe Wang _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, TB, PD; 1820dba5381SPhoebe Wang defm DQZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info, 1830dba5381SPhoebe Wang _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, XD; 1840dba5381SPhoebe Wang defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, 1850dba5381SPhoebe Wang _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD; 1860dba5381SPhoebe Wang defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v4i32x_info, 1870dba5381SPhoebe Wang _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PS; 1880dba5381SPhoebe Wang defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, 1890dba5381SPhoebe Wang _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD; 1900dba5381SPhoebe Wang} 1910dba5381SPhoebe Wang 1920dba5381SPhoebe Wangmulticlass avx256_vcvt_ps2<string OpcodeStr> { 1930dba5381SPhoebe Wang defm PHZ256 : avx512_cvtps2ph_sae<v8i16x_info, v8f32x_info, WriteCvtPS2PHZ>, EVEX_CD8<32, CD8VH>; 1940dba5381SPhoebe Wang defm PHXZ256 : avx512_vcvt_fp_rc<0x1D, !strconcat(OpcodeStr, "phx"), v8f16x_info, v8f32x_info, 1950dba5381SPhoebe Wang X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD, EVEX_CD8<32, CD8VF>; 1960dba5381SPhoebe Wang defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v4f32x_info, 1970dba5381SPhoebe Wang X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, TB, PS, EVEX_CD8<32, CD8VF>; 1980dba5381SPhoebe Wang defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info, 1990dba5381SPhoebe Wang X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>; 2000dba5381SPhoebe Wang defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info, 2010dba5381SPhoebe Wang X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>; 2020dba5381SPhoebe Wang defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info, 2030dba5381SPhoebe Wang X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PS, EVEX_CD8<32, CD8VF>; 2040dba5381SPhoebe Wang defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info, 2050dba5381SPhoebe Wang X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>; 2060dba5381SPhoebe Wang} 2070dba5381SPhoebe Wang 2080dba5381SPhoebe Wangmulticlass avx256_vcvt_ph2<string OpcodeStr> { 2090dba5381SPhoebe Wang defm PSZ256 : avx512_cvtph2ps_sae<v8f32x_info, v8i16x_info, WriteCvtPH2PSZ>, EVEX_CD8<32, CD8VH>; 2100dba5381SPhoebe Wang defm PSXZ256 : avx512_vcvt_fp_sae<0x13, !strconcat(OpcodeStr, "psx"), v8f32x_info, v8f16x_info, 2110dba5381SPhoebe Wang X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VH>; 2120dba5381SPhoebe Wang defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v8f16x_info, 2130dba5381SPhoebe Wang X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VQ>; 2140dba5381SPhoebe Wang defm WZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info, 2150dba5381SPhoebe Wang X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>; 2160dba5381SPhoebe Wang defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info, 2170dba5381SPhoebe Wang X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VH>; 2180dba5381SPhoebe Wang defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info, 2190dba5381SPhoebe Wang X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 2200dba5381SPhoebe Wang defm UWZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info, 2210dba5381SPhoebe Wang X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 2220dba5381SPhoebe Wang defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info, 2230dba5381SPhoebe Wang X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>; 2240dba5381SPhoebe Wang defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info, 2250dba5381SPhoebe Wang X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 2260dba5381SPhoebe Wang} 2270dba5381SPhoebe Wang 2280dba5381SPhoebe Wangmulticlass avx256_vcvtt_pd2<string OpcodeStr, X86VectorVTInfo _Src> { 2290dba5381SPhoebe Wang defm DQZ256 : avx512_vcvt_fp_sae<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info, 2300dba5381SPhoebe Wang _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD; 2310dba5381SPhoebe Wang defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, 2320dba5381SPhoebe Wang _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD; 2330dba5381SPhoebe Wang defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v4i32x_info, 2340dba5381SPhoebe Wang _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PS; 2350dba5381SPhoebe Wang defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, 2360dba5381SPhoebe Wang _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PD; 2370dba5381SPhoebe Wang} 2380dba5381SPhoebe Wang 2390dba5381SPhoebe Wangmulticlass avx256_vcvtt_ps2<string OpcodeStr> { 2400dba5381SPhoebe Wang defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info, 2410dba5381SPhoebe Wang X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, XS, EVEX_CD8<32, CD8VF>; 2420dba5381SPhoebe Wang defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info, 2430dba5381SPhoebe Wang X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>; 2440dba5381SPhoebe Wang defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info, 2450dba5381SPhoebe Wang X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PS, EVEX_CD8<32, CD8VF>; 2460dba5381SPhoebe Wang defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info, 2470dba5381SPhoebe Wang X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>; 2480dba5381SPhoebe Wang} 2490dba5381SPhoebe Wang 2500dba5381SPhoebe Wangmulticlass avx256_vcvtt_ph2<string OpcodeStr> { 2510dba5381SPhoebe Wang defm WZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info, 2520dba5381SPhoebe Wang X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>; 2530dba5381SPhoebe Wang defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info, 2540dba5381SPhoebe Wang X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,XS, EVEX_CD8<16, CD8VH>; 2550dba5381SPhoebe Wang defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info, 2560dba5381SPhoebe Wang X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 2570dba5381SPhoebe Wang defm UWZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info, 2580dba5381SPhoebe Wang X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>; 2590dba5381SPhoebe Wang defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info, 2600dba5381SPhoebe Wang X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>; 2610dba5381SPhoebe Wang defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info, 2620dba5381SPhoebe Wang X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>; 2630dba5381SPhoebe Wang} 2640dba5381SPhoebe Wang 2650dba5381SPhoebe Wangmulticlass avx256_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { 2660dba5381SPhoebe Wang defm PHZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd, 2670dba5381SPhoebe Wang SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 2680dba5381SPhoebe Wang defm PSZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, 2690dba5381SPhoebe Wang SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 2700dba5381SPhoebe Wang defm PDZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, 2710dba5381SPhoebe Wang SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 2720dba5381SPhoebe Wang} 2730dba5381SPhoebe Wang 2740dba5381SPhoebe Wangmulticlass avx256_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { 2750dba5381SPhoebe Wang defm PHZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd, 2760dba5381SPhoebe Wang SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 2770dba5381SPhoebe Wang defm PSZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, 2780dba5381SPhoebe Wang SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 2790dba5381SPhoebe Wang defm PDZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, 2800dba5381SPhoebe Wang SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 2810dba5381SPhoebe Wang} 2820dba5381SPhoebe Wang 2830dba5381SPhoebe Wangmulticlass avx256_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { 2840dba5381SPhoebe Wang defm PHZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd, 2850dba5381SPhoebe Wang SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>; 2860dba5381SPhoebe Wang defm PSZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, 2870dba5381SPhoebe Wang SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>; 2880dba5381SPhoebe Wang defm PDZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, 2890dba5381SPhoebe Wang SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W; 2900dba5381SPhoebe Wang} 2910dba5381SPhoebe Wang 2920dba5381SPhoebe Wangmulticlass avx256_fma3_round3<bits<8> opc132, bits<8> opc213, bits<8> opc231, 2930dba5381SPhoebe Wang string OpcodeStr, SDNode OpNodeRnd> { 2940dba5381SPhoebe Wang defm NAME#132 : avx256_fma3_132_round<opc132, !strconcat(OpcodeStr, "132"), OpNodeRnd>; 2950dba5381SPhoebe Wang defm NAME#213 : avx256_fma3_213_round<opc213, !strconcat(OpcodeStr, "213"), OpNodeRnd>; 2960dba5381SPhoebe Wang defm NAME#231 : avx256_fma3_231_round<opc231, !strconcat(OpcodeStr, "231"), OpNodeRnd>; 2970dba5381SPhoebe Wang} 2980dba5381SPhoebe Wang 2990dba5381SPhoebe Wanglet Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in { 300259ca9eeSPhoebe Wang defm VADD : avx256_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 3010dba5381SPhoebe Wang defm VMUL : avx256_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 3020dba5381SPhoebe Wang defm VSUB : avx256_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 3030dba5381SPhoebe Wang defm VDIV : avx256_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 3040dba5381SPhoebe Wang defm VMIN : avx256_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 3050dba5381SPhoebe Wang defm VMAX : avx256_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 3060dba5381SPhoebe Wang defm VCMP : avx256_vcmp_p_sae<SchedWriteFCmp>, EVEX, VVVV; 3070dba5381SPhoebe Wang defm VFIXUPIMM : avx256_fixupimm_packed_all<0x54, "vfixupimm", SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV; 3080dba5381SPhoebe Wang defm VGETEXP : avx256_vgetexp<0x42, "vgetexp", X86fgetexpSAE, SchedWriteFRnd>; 3090dba5381SPhoebe Wang defm VREDUCE : avx256_unary_fp_sae<"vreduce", 0x56, 0x56, X86VReduceSAE, SchedWriteFRnd>; 3100dba5381SPhoebe Wang defm VRNDSCALE : avx256_unary_fp_sae<"vrndscale", 0x08, 0x09, X86VRndScaleSAE, SchedWriteFRnd>; 3110dba5381SPhoebe Wang defm VGETMANT : avx256_unary_fp_sae<"vgetmant", 0x26, 0x26, X86VGetMantSAE, SchedWriteFRnd>; 3120dba5381SPhoebe Wang defm VRANGE : avx256_common_fp_sae_packed_imm<0x50, "vrange", X86VRangeSAE, SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV; 3130dba5381SPhoebe Wang defm VSCALEF : avx256_fp_scalef_round<0x2C, "vscalef", X86scalefRnd, SchedWriteFAdd>; 3140dba5381SPhoebe Wang defm VSQRT : avx256_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 3150dba5381SPhoebe Wang defm VCVTW2 : avx256_vcvtw_rc<"vcvtw2ph", X86VSintToFpRnd>, T_MAP5, XS; 3160dba5381SPhoebe Wang defm VCVTDQ2 : avx256_cvtdq2fp_rc<0x5B, "vcvtdq2", X86VSintToFpRnd, SchedWriteCvtDQ2PS>; 3170dba5381SPhoebe Wang defm VCVTQQ2 : avx256_cvtqq2fp_rc<"vcvtqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 3180dba5381SPhoebe Wang defm VCVTUW2 : avx256_vcvtw_rc<"vcvtuw2ph", X86VUintToFpRnd>, T_MAP5,XD; 3190dba5381SPhoebe Wang defm VCVTUDQ2 : avx256_cvtudq2fp_rc<0x7A, "vcvtudq2", X86VUintToFpRnd, SchedWriteCvtDQ2PS>; 3200dba5381SPhoebe Wang defm VCVTUQQ2 : avx256_cvtuqq2fp_rc<"vcvtuqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 3210dba5381SPhoebe Wang defm VCVTPD2 : avx256_vcvt_pd2<"vcvtpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W; 3220dba5381SPhoebe Wang defm VCVTPS2 : avx256_vcvt_ps2<"vcvtps2">; 3230dba5381SPhoebe Wang defm VCVTPH2 : avx256_vcvt_ph2<"vcvtph2">; 3240dba5381SPhoebe Wang defm VCVTTPD2 : avx256_vcvtt_pd2<"vcvttpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, TB, REX_W; 3250dba5381SPhoebe Wang defm VCVTTPS2 : avx256_vcvtt_ps2<"vcvttps2">, TB; 3260dba5381SPhoebe Wang defm VCVTTPH2 : avx256_vcvtt_ph2<"vcvttph2">; 3270dba5381SPhoebe Wang defm VFMADD : avx256_fma3_round3<0x98, 0xA8, 0xB8, "vfmadd", X86FmaddRnd>; 3280dba5381SPhoebe Wang defm VFMSUB : avx256_fma3_round3<0x9A, 0xAA, 0xBA, "vfmsub", X86FmsubRnd>; 3290dba5381SPhoebe Wang defm VFMADDSUB : avx256_fma3_round3<0x96, 0xA6, 0xB6, "vfmaddsub", X86FmaddsubRnd>; 3300dba5381SPhoebe Wang defm VFMSUBADD : avx256_fma3_round3<0x97, 0xA7, 0xB7, "vfmsubadd", X86FmsubaddRnd>; 3310dba5381SPhoebe Wang defm VFNMADD : avx256_fma3_round3<0x9C, 0xAC, 0xBC, "vfnmadd", X86FnmaddRnd>; 3320dba5381SPhoebe Wang defm VFNMSUB : avx256_fma3_round3<0x9E, 0xAE, 0xBE, "vfnmsub", X86FnmsubRnd>; 3330dba5381SPhoebe Wang defm VFMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfmulcph", x86vfmulcRnd, SchedWriteFMA.YMM, 3340dba5381SPhoebe Wang v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XS, EVEX_CD8<32, CD8VF>; 3350dba5381SPhoebe Wang defm VFCMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfcmulcph", x86vfcmulcRnd, SchedWriteFMA.YMM, 3360dba5381SPhoebe Wang v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XD, EVEX_CD8<32, CD8VF>; 3370dba5381SPhoebe Wang defm VFMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfmaddcph", x86vfmaddcRnd, 3380dba5381SPhoebe Wang v8f32x_info>, T_MAP6,XS, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>; 3390dba5381SPhoebe Wang defm VFCMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfcmaddcph", x86vfcmaddcRnd, 3400dba5381SPhoebe Wang v8f32x_info>, T_MAP6,XD, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>; 3410dba5381SPhoebe Wang} 3423d5cc7e1SFreddy Ye 3433d5cc7e1SFreddy Ye//------------------------------------------------- 3443d5cc7e1SFreddy Ye// AVX10 MINMAX instructions 3453d5cc7e1SFreddy Ye//------------------------------------------------- 3463d5cc7e1SFreddy Ye 3473d5cc7e1SFreddy Yemulticlass avx10_minmax_packed_base<string OpStr, X86VectorVTInfo VTI, SDNode OpNode> { 3483d5cc7e1SFreddy Ye let ExeDomain = VTI.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 3493d5cc7e1SFreddy Ye defm rri : AVX512_maskable<0x52, MRMSrcReg, VTI, (outs VTI.RC:$dst), 3503d5cc7e1SFreddy Ye (ins VTI.RC:$src1, VTI.RC:$src2, i32u8imm:$src3), OpStr, 3513d5cc7e1SFreddy Ye "$src3, $src2, $src1", "$src1, $src2, $src3", 3523d5cc7e1SFreddy Ye (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 3533d5cc7e1SFreddy Ye (i32 timm:$src3)))>, 3543d5cc7e1SFreddy Ye EVEX, VVVV, Sched<[WriteFMAX]>; 3553d5cc7e1SFreddy Ye defm rmi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), 3563d5cc7e1SFreddy Ye (ins VTI.RC:$src1, VTI.MemOp:$src2, i32u8imm:$src3), OpStr, 3573d5cc7e1SFreddy Ye "$src3, $src2, $src1", "$src1, $src2, $src3", 3583d5cc7e1SFreddy Ye (VTI.VT (OpNode VTI.RC:$src1, (VTI.LdFrag addr:$src2), 3593d5cc7e1SFreddy Ye (i32 timm:$src3)))>, 3603d5cc7e1SFreddy Ye EVEX, VVVV, 3613d5cc7e1SFreddy Ye Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 3623d5cc7e1SFreddy Ye defm rmbi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), 3633d5cc7e1SFreddy Ye (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, i32u8imm:$src3), 3643d5cc7e1SFreddy Ye OpStr, "$src3, ${src2}"#VTI.BroadcastStr#", $src1", 3653d5cc7e1SFreddy Ye "$src1, ${src2}"#VTI.BroadcastStr#", $src3", 3663d5cc7e1SFreddy Ye (VTI.VT (OpNode VTI.RC:$src1, (VTI.BroadcastLdFrag addr:$src2), 3673d5cc7e1SFreddy Ye (i32 timm:$src3)))>, 3683d5cc7e1SFreddy Ye EVEX, VVVV, EVEX_B, 3693d5cc7e1SFreddy Ye Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 3703d5cc7e1SFreddy Ye } 3713d5cc7e1SFreddy Ye} 3723d5cc7e1SFreddy Ye 3733d5cc7e1SFreddy Yemulticlass avx10_minmax_packed_sae<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> { 3743d5cc7e1SFreddy Ye let Uses = []<Register>, mayRaiseFPException = 0 in { 3753d5cc7e1SFreddy Ye defm Zrrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info512, (outs VTI.info512.RC:$dst), 3763d5cc7e1SFreddy Ye (ins VTI.info512.RC:$src1, VTI.info512.RC:$src2, i32u8imm:$src3), OpStr, 3773d5cc7e1SFreddy Ye "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 3783d5cc7e1SFreddy Ye (VTI.info512.VT (OpNode (VTI.info512.VT VTI.info512.RC:$src1), 3793d5cc7e1SFreddy Ye (VTI.info512.VT VTI.info512.RC:$src2), 3803d5cc7e1SFreddy Ye (i32 timm:$src3)))>, 3813d5cc7e1SFreddy Ye EVEX, VVVV, EVEX_B, EVEX_V512, Sched<[WriteFMAX]>; 3823d5cc7e1SFreddy Ye let hasEVEX_U = 1 in 3833d5cc7e1SFreddy Ye defm Z256rrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info256, (outs VTI.info256.RC:$dst), 3843d5cc7e1SFreddy Ye (ins VTI.info256.RC:$src1, VTI.info256.RC:$src2, i32u8imm:$src3), OpStr, 3853d5cc7e1SFreddy Ye "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 3863d5cc7e1SFreddy Ye (VTI.info256.VT (OpNode (VTI.info256.VT VTI.info256.RC:$src1), 3873d5cc7e1SFreddy Ye (VTI.info256.VT VTI.info256.RC:$src2), 3883d5cc7e1SFreddy Ye (i32 timm:$src3)))>, 3893d5cc7e1SFreddy Ye EVEX, VVVV, EVEX_B, EVEX_V256, Sched<[WriteFMAX]>; 3903d5cc7e1SFreddy Ye } 3913d5cc7e1SFreddy Ye} 3923d5cc7e1SFreddy Ye 3933d5cc7e1SFreddy Yemulticlass avx10_minmax_packed<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> { 3943d5cc7e1SFreddy Ye let Predicates = [HasAVX10_2_512] in 3953d5cc7e1SFreddy Ye defm Z : avx10_minmax_packed_base<OpStr, VTI.info512, OpNode>, EVEX_V512; 3963d5cc7e1SFreddy Ye let Predicates = [HasAVX10_2] in { 3973d5cc7e1SFreddy Ye defm Z256 : avx10_minmax_packed_base<OpStr, VTI.info256, OpNode>, EVEX_V256; 3983d5cc7e1SFreddy Ye defm Z128 : avx10_minmax_packed_base<OpStr, VTI.info128, OpNode>, EVEX_V128; 3993d5cc7e1SFreddy Ye } 4003d5cc7e1SFreddy Ye} 4013d5cc7e1SFreddy Ye 4023d5cc7e1SFreddy Yemulticlass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode, 4033d5cc7e1SFreddy Ye SDNode OpNodeSAE> { 4043d5cc7e1SFreddy Ye let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in { 4053d5cc7e1SFreddy Ye let mayRaiseFPException = 1 in { 40623ec9ee1SPhoebe Wang let isCodeGenOnly = 1 in { 40723ec9ee1SPhoebe Wang def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst), 40823ec9ee1SPhoebe Wang (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 40923ec9ee1SPhoebe Wang !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 41023ec9ee1SPhoebe Wang [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>, 41123ec9ee1SPhoebe Wang Sched<[WriteFMAX]>; 41223ec9ee1SPhoebe Wang 41323ec9ee1SPhoebe Wang def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst), 41423ec9ee1SPhoebe Wang (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 41523ec9ee1SPhoebe Wang !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 41623ec9ee1SPhoebe Wang [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), 41723ec9ee1SPhoebe Wang (i32 timm:$src3)))]>, 41823ec9ee1SPhoebe Wang Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 41923ec9ee1SPhoebe Wang } 4209cd774d1SPhoebe Wang defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), 4213d5cc7e1SFreddy Ye (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), 4223d5cc7e1SFreddy Ye OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 4233d5cc7e1SFreddy Ye (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 4249cd774d1SPhoebe Wang (i32 timm:$src3))), 4259cd774d1SPhoebe Wang 0, 0, 0, vselect_mask, "", "_Int">, 4263d5cc7e1SFreddy Ye Sched<[WriteFMAX]>; 4273d5cc7e1SFreddy Ye 4289cd774d1SPhoebe Wang defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), 4293d5cc7e1SFreddy Ye (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 4303d5cc7e1SFreddy Ye OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 4313d5cc7e1SFreddy Ye (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 4329cd774d1SPhoebe Wang (i32 timm:$src3))), 4339cd774d1SPhoebe Wang 0, 0, 0, vselect_mask, "", "_Int">, 4343d5cc7e1SFreddy Ye Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 4353d5cc7e1SFreddy Ye } 4363d5cc7e1SFreddy Ye let Uses = []<Register>, mayRaiseFPException = 0 in 4379cd774d1SPhoebe Wang defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), 4383d5cc7e1SFreddy Ye (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), 4393d5cc7e1SFreddy Ye OpStr, "$src3, {sae}, $src2, $src1", 4403d5cc7e1SFreddy Ye "$src1, $src2, {sae}, $src3", 4413d5cc7e1SFreddy Ye (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 4429cd774d1SPhoebe Wang (i32 timm:$src3))), 4439cd774d1SPhoebe Wang 0, 0, 0, vselect_mask, "", "_Int">, 4443d5cc7e1SFreddy Ye Sched<[WriteFMAX]>, EVEX_B; 4453d5cc7e1SFreddy Ye } 4463d5cc7e1SFreddy Ye} 4473d5cc7e1SFreddy Ye 4483d5cc7e1SFreddy Ye 4493d5cc7e1SFreddy Yelet mayRaiseFPException = 0 in 45013c6abfaSPhoebe Wangdefm VMINMAXBF16 : avx10_minmax_packed<"vminmaxbf16", avx512vl_bf16_info, X86vminmax>, 4513d5cc7e1SFreddy Ye AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA; 4523d5cc7e1SFreddy Ye 4533d5cc7e1SFreddy Yedefm VMINMAXPD : avx10_minmax_packed<"vminmaxpd", avx512vl_f64_info, X86vminmax>, 4543d5cc7e1SFreddy Ye avx10_minmax_packed_sae<"vminmaxpd", avx512vl_f64_info, X86vminmaxSae>, 4553d5cc7e1SFreddy Ye AVX512PDIi8Base, REX_W, TA, EVEX_CD8<64, CD8VF>; 4563d5cc7e1SFreddy Ye 4573d5cc7e1SFreddy Yedefm VMINMAXPH : avx10_minmax_packed<"vminmaxph", avx512vl_f16_info, X86vminmax>, 4583d5cc7e1SFreddy Ye avx10_minmax_packed_sae<"vminmaxph", avx512vl_f16_info, X86vminmaxSae>, 4593d5cc7e1SFreddy Ye AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>; 4603d5cc7e1SFreddy Ye 4613d5cc7e1SFreddy Yedefm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>, 4623d5cc7e1SFreddy Ye avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>, 4633d5cc7e1SFreddy Ye AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>; 4643d5cc7e1SFreddy Ye 4653d5cc7e1SFreddy Yedefm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>, 4663d5cc7e1SFreddy Ye AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 4673d5cc7e1SFreddy Yedefm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>, 4683d5cc7e1SFreddy Ye AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA; 4693d5cc7e1SFreddy Yedefm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>, 4703d5cc7e1SFreddy Ye AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 47180721e0dSFreddy Ye 47280721e0dSFreddy Ye//------------------------------------------------- 47380721e0dSFreddy Ye// AVX10 SATCVT instructions 47480721e0dSFreddy Ye//------------------------------------------------- 47580721e0dSFreddy Ye 47680721e0dSFreddy Yemulticlass avx10_sat_cvt_rmb<bits<8> Opc, string OpStr, X86FoldableSchedWrite sched, 47780721e0dSFreddy Ye X86VectorVTInfo DestInfo, 47880721e0dSFreddy Ye X86VectorVTInfo SrcInfo, 47980721e0dSFreddy Ye SDNode MaskNode> { 48080721e0dSFreddy Ye defm rr: AVX512_maskable<Opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 48180721e0dSFreddy Ye (ins SrcInfo.RC:$src), OpStr, "$src", "$src", 48280721e0dSFreddy Ye (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src)))>, 48380721e0dSFreddy Ye Sched<[sched]>; 48480721e0dSFreddy Ye defm rm: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 48580721e0dSFreddy Ye (ins SrcInfo.MemOp:$src), OpStr, "$src", "$src", 48680721e0dSFreddy Ye (DestInfo.VT (MaskNode (SrcInfo.VT 48780721e0dSFreddy Ye (SrcInfo.LdFrag addr:$src))))>, 48880721e0dSFreddy Ye Sched<[sched.Folded, sched.ReadAfterFold]>; 48980721e0dSFreddy Ye defm rmb: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 49080721e0dSFreddy Ye (ins SrcInfo.ScalarMemOp:$src), OpStr, 49180721e0dSFreddy Ye "${src}"#SrcInfo.BroadcastStr, "${src}"#SrcInfo.BroadcastStr, 49280721e0dSFreddy Ye (DestInfo.VT (MaskNode (SrcInfo.VT 49380721e0dSFreddy Ye (SrcInfo.BroadcastLdFrag addr:$src))))>, EVEX_B, 49480721e0dSFreddy Ye Sched<[sched.Folded, sched.ReadAfterFold]>; 49580721e0dSFreddy Ye} 49680721e0dSFreddy Ye 49780721e0dSFreddy Ye// Conversion with rounding control (RC) 49880721e0dSFreddy Yemulticlass avx10_sat_cvt_rc<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 49980721e0dSFreddy Ye AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, 50080721e0dSFreddy Ye SDNode MaskNode> { 50180721e0dSFreddy Ye let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in 50280721e0dSFreddy Ye defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512, 50380721e0dSFreddy Ye (outs DestInfo.info512.RC:$dst), 50480721e0dSFreddy Ye (ins SrcInfo.info512.RC:$src, AVX512RC:$rc), 50580721e0dSFreddy Ye OpStr, "$rc, $src", "$src, $rc", 50680721e0dSFreddy Ye (DestInfo.info512.VT 50780721e0dSFreddy Ye (MaskNode (SrcInfo.info512.VT SrcInfo.info512.RC:$src), 50880721e0dSFreddy Ye (i32 timm:$rc)))>, 50980721e0dSFreddy Ye Sched<[sched.ZMM]>, EVEX, EVEX_RC, EVEX_B; 51080721e0dSFreddy Ye let Predicates = [HasAVX10_2], hasEVEX_U = 1 in { 51180721e0dSFreddy Ye defm Z256rrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info256, 51280721e0dSFreddy Ye (outs DestInfo.info256.RC:$dst), 51380721e0dSFreddy Ye (ins SrcInfo.info256.RC:$src, AVX512RC:$rc), 51480721e0dSFreddy Ye OpStr, "$rc, $src", "$src, $rc", 51580721e0dSFreddy Ye (DestInfo.info256.VT 51680721e0dSFreddy Ye (MaskNode (SrcInfo.info256.VT SrcInfo.info256.RC:$src), 51780721e0dSFreddy Ye (i32 timm:$rc)))>, 51880721e0dSFreddy Ye Sched<[sched.YMM]>, EVEX, EVEX_RC, EVEX_B; 51980721e0dSFreddy Ye } 52080721e0dSFreddy Ye} 52180721e0dSFreddy Ye 52280721e0dSFreddy Ye// Conversion with SAE 52380721e0dSFreddy Yemulticlass avx10_sat_cvt_sae<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 52480721e0dSFreddy Ye AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, 52580721e0dSFreddy Ye SDNode Node> { 52680721e0dSFreddy Ye let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in 52780721e0dSFreddy Ye defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512, 52880721e0dSFreddy Ye (outs DestInfo.info512.RC:$dst), 52980721e0dSFreddy Ye (ins SrcInfo.info512.RC:$src), 53080721e0dSFreddy Ye OpStr, "{sae}, $src", "$src, {sae}", 53180721e0dSFreddy Ye (DestInfo.info512.VT 53280721e0dSFreddy Ye (Node (SrcInfo.info512.VT SrcInfo.info512.RC:$src)))>, 53380721e0dSFreddy Ye Sched<[sched.ZMM]>, EVEX, EVEX_B; 53480721e0dSFreddy Ye let Predicates = [HasAVX10_2], hasEVEX_U = 1 in { 53580721e0dSFreddy Ye defm Z256rrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info256, 53680721e0dSFreddy Ye (outs DestInfo.info256.RC:$dst), 53780721e0dSFreddy Ye (ins SrcInfo.info256.RC:$src), 53880721e0dSFreddy Ye OpStr, "{sae}, $src", "$src, {sae}", 53980721e0dSFreddy Ye (DestInfo.info256.VT 54080721e0dSFreddy Ye (Node (SrcInfo.info256.VT SrcInfo.info256.RC:$src)))>, 54180721e0dSFreddy Ye Sched<[sched.YMM]>, EVEX, EVEX_B; 54280721e0dSFreddy Ye } 54380721e0dSFreddy Ye} 54480721e0dSFreddy Ye 54580721e0dSFreddy Yemulticlass avx10_sat_cvt_base<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 54680721e0dSFreddy Ye SDNode MaskNode, AVX512VLVectorVTInfo DestInfo, 54780721e0dSFreddy Ye AVX512VLVectorVTInfo SrcInfo> { 54880721e0dSFreddy Ye let Predicates = [HasAVX10_2_512] in 54980721e0dSFreddy Ye defm Z : avx10_sat_cvt_rmb<Opc, OpStr, sched.ZMM, 55080721e0dSFreddy Ye DestInfo.info512, SrcInfo.info512, 55180721e0dSFreddy Ye MaskNode>, 55280721e0dSFreddy Ye EVEX, EVEX_V512; 55380721e0dSFreddy Ye let Predicates = [HasAVX10_2] in { 55480721e0dSFreddy Ye defm Z256 55580721e0dSFreddy Ye : avx10_sat_cvt_rmb<Opc, OpStr, sched.YMM, 55680721e0dSFreddy Ye DestInfo.info256, SrcInfo.info256, 55780721e0dSFreddy Ye MaskNode>, 55880721e0dSFreddy Ye EVEX, EVEX_V256; 55980721e0dSFreddy Ye defm Z128 56080721e0dSFreddy Ye : avx10_sat_cvt_rmb<Opc, OpStr, sched.XMM, 56180721e0dSFreddy Ye DestInfo.info128, SrcInfo.info128, 56280721e0dSFreddy Ye MaskNode>, 56380721e0dSFreddy Ye EVEX, EVEX_V128; 56480721e0dSFreddy Ye } 56580721e0dSFreddy Ye} 56680721e0dSFreddy Ye 5674f40b075SPhoebe Wangdefm VCVTBF162IBS : avx10_sat_cvt_base<0x69, "vcvtbf162ibs", 56880721e0dSFreddy Ye SchedWriteVecIMul, X86vcvtp2ibs, 56980721e0dSFreddy Ye avx512vl_i16_info, avx512vl_bf16_info>, 57080721e0dSFreddy Ye AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 5714f40b075SPhoebe Wangdefm VCVTBF162IUBS : avx10_sat_cvt_base<0x6b, "vcvtbf162iubs", 57280721e0dSFreddy Ye SchedWriteVecIMul, X86vcvtp2iubs, 57380721e0dSFreddy Ye avx512vl_i16_info, avx512vl_bf16_info>, 57480721e0dSFreddy Ye AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 57580721e0dSFreddy Ye 57680721e0dSFreddy Yedefm VCVTPH2IBS : avx10_sat_cvt_base<0x69, "vcvtph2ibs", SchedWriteVecIMul, 57780721e0dSFreddy Ye X86vcvtp2ibs, avx512vl_i16_info, 57880721e0dSFreddy Ye avx512vl_f16_info>, 57980721e0dSFreddy Ye avx10_sat_cvt_rc<0x69, "vcvtph2ibs", SchedWriteVecIMul, 58080721e0dSFreddy Ye avx512vl_i16_info, avx512vl_f16_info, 58180721e0dSFreddy Ye X86vcvtp2ibsRnd>, 58280721e0dSFreddy Ye AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 58380721e0dSFreddy Yedefm VCVTPH2IUBS : avx10_sat_cvt_base<0x6b, "vcvtph2iubs", SchedWriteVecIMul, 58480721e0dSFreddy Ye X86vcvtp2iubs, avx512vl_i16_info, 58580721e0dSFreddy Ye avx512vl_f16_info>, 58680721e0dSFreddy Ye avx10_sat_cvt_rc<0x6b, "vcvtph2iubs", SchedWriteVecIMul, 58780721e0dSFreddy Ye avx512vl_i16_info, avx512vl_f16_info, 58880721e0dSFreddy Ye X86vcvtp2iubsRnd>, 58980721e0dSFreddy Ye AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 59080721e0dSFreddy Ye 59180721e0dSFreddy Yedefm VCVTPS2IBS : avx10_sat_cvt_base<0x69, "vcvtps2ibs", SchedWriteVecIMul, 59280721e0dSFreddy Ye X86vcvtp2ibs, avx512vl_i32_info, 59380721e0dSFreddy Ye avx512vl_f32_info>, 59480721e0dSFreddy Ye avx10_sat_cvt_rc<0x69, "vcvtps2ibs", SchedWriteVecIMul, 59580721e0dSFreddy Ye avx512vl_i32_info, avx512vl_f32_info, 59680721e0dSFreddy Ye X86vcvtp2ibsRnd>, 59780721e0dSFreddy Ye AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 59880721e0dSFreddy Yedefm VCVTPS2IUBS : avx10_sat_cvt_base<0x6b, "vcvtps2iubs", SchedWriteVecIMul, 59980721e0dSFreddy Ye X86vcvtp2iubs, avx512vl_i32_info, 60080721e0dSFreddy Ye avx512vl_f32_info>, 60180721e0dSFreddy Ye avx10_sat_cvt_rc<0x6b, "vcvtps2iubs", SchedWriteVecIMul, 60280721e0dSFreddy Ye avx512vl_i32_info, avx512vl_f32_info, 60380721e0dSFreddy Ye X86vcvtp2iubsRnd>, 60480721e0dSFreddy Ye AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 60580721e0dSFreddy Ye 6064f40b075SPhoebe Wangdefm VCVTTBF162IBS : avx10_sat_cvt_base<0x68, "vcvttbf162ibs", 60780721e0dSFreddy Ye SchedWriteVecIMul, X86vcvttp2ibs, 60880721e0dSFreddy Ye avx512vl_i16_info, avx512vl_bf16_info>, 60980721e0dSFreddy Ye AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 6104f40b075SPhoebe Wangdefm VCVTTBF162IUBS : avx10_sat_cvt_base<0x6a, "vcvttbf162iubs", 61180721e0dSFreddy Ye SchedWriteVecIMul, X86vcvttp2iubs, 61280721e0dSFreddy Ye avx512vl_i16_info, avx512vl_bf16_info>, 61380721e0dSFreddy Ye AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 61480721e0dSFreddy Ye 61580721e0dSFreddy Yedefm VCVTTPH2IBS : avx10_sat_cvt_base<0x68, "vcvttph2ibs", SchedWriteVecIMul, 61680721e0dSFreddy Ye X86vcvttp2ibs, avx512vl_i16_info, 61780721e0dSFreddy Ye avx512vl_f16_info>, 61880721e0dSFreddy Ye avx10_sat_cvt_sae<0x68, "vcvttph2ibs", SchedWriteVecIMul, 61980721e0dSFreddy Ye avx512vl_i16_info, avx512vl_f16_info, 62080721e0dSFreddy Ye X86vcvttp2ibsSAE>, 62180721e0dSFreddy Ye AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 62280721e0dSFreddy Yedefm VCVTTPH2IUBS : avx10_sat_cvt_base<0x6a, "vcvttph2iubs", SchedWriteVecIMul, 62380721e0dSFreddy Ye X86vcvttp2iubs, avx512vl_i16_info, 62480721e0dSFreddy Ye avx512vl_f16_info>, 62580721e0dSFreddy Ye avx10_sat_cvt_sae<0x6a, "vcvttph2iubs", SchedWriteVecIMul, 62680721e0dSFreddy Ye avx512vl_i16_info, avx512vl_f16_info, 62780721e0dSFreddy Ye X86vcvttp2iubsSAE>, 62880721e0dSFreddy Ye AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 62980721e0dSFreddy Ye 63080721e0dSFreddy Yedefm VCVTTPS2IBS : avx10_sat_cvt_base<0x68, "vcvttps2ibs", SchedWriteVecIMul, 63180721e0dSFreddy Ye X86vcvttp2ibs, avx512vl_i32_info, 63280721e0dSFreddy Ye avx512vl_f32_info>, 63380721e0dSFreddy Ye avx10_sat_cvt_sae<0x68, "vcvttps2ibs", SchedWriteVecIMul, 63480721e0dSFreddy Ye avx512vl_i32_info, avx512vl_f32_info, 63580721e0dSFreddy Ye X86vcvttp2ibsSAE>, 63680721e0dSFreddy Ye AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 63780721e0dSFreddy Yedefm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul, 63880721e0dSFreddy Ye X86vcvttp2iubs, avx512vl_i32_info, 63980721e0dSFreddy Ye avx512vl_f32_info>, 64080721e0dSFreddy Ye avx10_sat_cvt_sae<0x6a, "vcvttps2iubs", SchedWriteVecIMul, 64180721e0dSFreddy Ye avx512vl_i32_info, avx512vl_f32_info, 64280721e0dSFreddy Ye X86vcvttp2iubsSAE>, 64380721e0dSFreddy Ye AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 6447c4cadfcSFreddy Ye 6457c4cadfcSFreddy Ye//------------------------------------------------- 646a409ebc1SMalay Sanghi// AVX10 SATCVT-DS instructions 647a409ebc1SMalay Sanghi//------------------------------------------------- 648a409ebc1SMalay Sanghi 649a409ebc1SMalay Sanghi// Convert Double to Signed/Unsigned Doubleword with truncation. 650a409ebc1SMalay Sanghimulticlass avx10_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 651a409ebc1SMalay Sanghi SDNode MaskOpNode, SDNode OpNodeSAE, 652a409ebc1SMalay Sanghi X86SchedWriteWidths sched> { 653a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2_512] in { 654a409ebc1SMalay Sanghi defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 655a409ebc1SMalay Sanghi MaskOpNode, sched.ZMM>, 656a409ebc1SMalay Sanghi avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 657a409ebc1SMalay Sanghi OpNodeSAE, sched.ZMM>, EVEX_V512; 658a409ebc1SMalay Sanghi } 659a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2] in { 660a409ebc1SMalay Sanghi defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 661a409ebc1SMalay Sanghi null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 662a409ebc1SMalay Sanghi f128mem, VK2WM>, EVEX_V128; 663a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 664a409ebc1SMalay Sanghi MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 665a409ebc1SMalay Sanghi } 666a409ebc1SMalay Sanghi 667a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 668a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNodeSAE, 669a409ebc1SMalay Sanghi sched.YMM>, EVEX_V256; 670a409ebc1SMalay Sanghi } 671a409ebc1SMalay Sanghi 672a409ebc1SMalay Sanghi 673a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 674a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 675a409ebc1SMalay Sanghi VR128X:$src), 0, "att">; 676a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 677a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 678a409ebc1SMalay Sanghi VK2WM:$mask, VR128X:$src), 0, "att">; 679a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 680a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 681a409ebc1SMalay Sanghi VK2WM:$mask, VR128X:$src), 0, "att">; 682a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 683a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 684a409ebc1SMalay Sanghi f64mem:$src), 0, "att">; 685a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 686a409ebc1SMalay Sanghi "$dst {${mask}}, ${src}{1to2}}", 687a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 688a409ebc1SMalay Sanghi VK2WM:$mask, f64mem:$src), 0, "att">; 689a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 690a409ebc1SMalay Sanghi "$dst {${mask}} {z}, ${src}{1to2}}", 691a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 692a409ebc1SMalay Sanghi VK2WM:$mask, f64mem:$src), 0, "att">; 693a409ebc1SMalay Sanghi 694a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 695a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 696a409ebc1SMalay Sanghi VR256X:$src), 0, "att">; 697a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst|$dst, $src {sae}}", 698a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rrb") VR128X:$dst, 699a409ebc1SMalay Sanghi VR256X:$src), 0, "att">; 700a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 701a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 702a409ebc1SMalay Sanghi VK4WM:$mask, VR256X:$src), 0, "att">; 703a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}}|$dst {${mask}}, $src {sae}}", 704a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rrbk") VR128X:$dst, 705a409ebc1SMalay Sanghi VK4WM:$mask, VR256X:$src), 0, "att">; 706a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 707a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 708a409ebc1SMalay Sanghi VK4WM:$mask, VR256X:$src), 0, "att">; 709a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src {sae}}", 710a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rrbkz") VR128X:$dst, 711a409ebc1SMalay Sanghi VK4WM:$mask, VR256X:$src), 0, "att">; 712a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 713a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 714a409ebc1SMalay Sanghi f64mem:$src), 0, "att">; 715a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 716a409ebc1SMalay Sanghi "$dst {${mask}}, ${src}{1to4}}", 717a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 718a409ebc1SMalay Sanghi VK4WM:$mask, f64mem:$src), 0, "att">; 719a409ebc1SMalay Sanghi def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 720a409ebc1SMalay Sanghi "$dst {${mask}} {z}, ${src}{1to4}}", 721a409ebc1SMalay Sanghi (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 722a409ebc1SMalay Sanghi VK4WM:$mask, f64mem:$src), 0, "att">; 723a409ebc1SMalay Sanghi} 724a409ebc1SMalay Sanghi 725a409ebc1SMalay Sanghi// Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled 726a409ebc1SMalay Sanghimulticlass avx10_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 727a409ebc1SMalay Sanghi SDNode MaskOpNode, SDNode OpNodeRnd, 728a409ebc1SMalay Sanghi X86SchedWriteWidths sched> { 729a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2_512] in { 730a409ebc1SMalay Sanghi defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 731a409ebc1SMalay Sanghi MaskOpNode, sched.ZMM>, 732a409ebc1SMalay Sanghi avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 733a409ebc1SMalay Sanghi OpNodeRnd, sched.ZMM>, EVEX_V512; 734a409ebc1SMalay Sanghi } 735a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2] in { 736a409ebc1SMalay Sanghi defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 737a409ebc1SMalay Sanghi MaskOpNode, sched.XMM>, EVEX_V128; 738a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 739a409ebc1SMalay Sanghi MaskOpNode, sched.YMM>, EVEX_V256; 740a409ebc1SMalay Sanghi } 741a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 742a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f64x_info, 743a409ebc1SMalay Sanghi OpNodeRnd, sched.YMM>, EVEX_V256; 744a409ebc1SMalay Sanghi } 745a409ebc1SMalay Sanghi} 746a409ebc1SMalay Sanghi 747a409ebc1SMalay Sanghi// Convert Float to Signed/Unsigned Quardword with truncation 748a409ebc1SMalay Sanghimulticlass avx10_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 749a409ebc1SMalay Sanghi SDNode MaskOpNode, SDNode OpNodeRnd, 750a409ebc1SMalay Sanghi X86SchedWriteWidths sched> { 751a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2_512] in { 752a409ebc1SMalay Sanghi defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 753a409ebc1SMalay Sanghi MaskOpNode, sched.ZMM>, 754a409ebc1SMalay Sanghi avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 755a409ebc1SMalay Sanghi OpNodeRnd, sched.ZMM>, EVEX_V512; 756a409ebc1SMalay Sanghi } 757a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2] in { 758a409ebc1SMalay Sanghi defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 759a409ebc1SMalay Sanghi MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 760a409ebc1SMalay Sanghi (v2i64 (OpNode (bc_v4f32 (v2f64 761a409ebc1SMalay Sanghi (scalar_to_vector (loadf64 addr:$src)))))), 762a409ebc1SMalay Sanghi (v2i64 (MaskOpNode (bc_v4f32 (v2f64 763a409ebc1SMalay Sanghi (scalar_to_vector (loadf64 addr:$src))))))>, 764a409ebc1SMalay Sanghi EVEX_V128; 765a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 766a409ebc1SMalay Sanghi MaskOpNode, sched.YMM>, EVEX_V256; 767a409ebc1SMalay Sanghi } 768a409ebc1SMalay Sanghi 769a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 770a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNodeRnd, 771a409ebc1SMalay Sanghi sched.YMM>, EVEX_V256; 772a409ebc1SMalay Sanghi } 773a409ebc1SMalay Sanghi} 774a409ebc1SMalay Sanghi 775a409ebc1SMalay Sanghi// Convert Float to Signed/Unsigned Doubleword with truncation 776a409ebc1SMalay Sanghimulticlass avx10_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 777a409ebc1SMalay Sanghi SDNode MaskOpNode, 778a409ebc1SMalay Sanghi SDNode OpNodeSAE, X86SchedWriteWidths sched> { 779a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2_512] in { 780a409ebc1SMalay Sanghi defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 781a409ebc1SMalay Sanghi MaskOpNode, sched.ZMM>, 782a409ebc1SMalay Sanghi avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 783a409ebc1SMalay Sanghi OpNodeSAE, sched.ZMM>, EVEX_V512; 784a409ebc1SMalay Sanghi } 785a409ebc1SMalay Sanghi 786a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2] in { 787a409ebc1SMalay Sanghi defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 788a409ebc1SMalay Sanghi MaskOpNode, sched.XMM>, EVEX_V128; 789a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 790a409ebc1SMalay Sanghi MaskOpNode, sched.YMM>, EVEX_V256; 791a409ebc1SMalay Sanghi } 792a409ebc1SMalay Sanghi 793a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 794a409ebc1SMalay Sanghi defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f32x_info, 795a409ebc1SMalay Sanghi OpNodeSAE, sched.YMM>, EVEX_V256; 796a409ebc1SMalay Sanghi } 797a409ebc1SMalay Sanghi} 798a409ebc1SMalay Sanghi 799a409ebc1SMalay Sanghidefm VCVTTPD2DQS : avx10_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis, 800a409ebc1SMalay Sanghi X86cvttp2sis, X86cvttp2sisSAE, 801a409ebc1SMalay Sanghi SchedWriteCvtPD2DQ>, 802a409ebc1SMalay Sanghi PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; 803a409ebc1SMalay Sanghidefm VCVTTPD2UDQS : avx10_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis, 804a409ebc1SMalay Sanghi X86cvttp2uis, X86cvttp2uisSAE, 805a409ebc1SMalay Sanghi SchedWriteCvtPD2DQ>, 806a409ebc1SMalay Sanghi REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; 807a409ebc1SMalay Sanghidefm VCVTTPS2DQS : avx10_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis, 808a409ebc1SMalay Sanghi X86cvttp2sis, X86cvttp2sisSAE, 809a409ebc1SMalay Sanghi SchedWriteCvtPS2DQ>, T_MAP5,PS, 810a409ebc1SMalay Sanghi EVEX_CD8<32, CD8VF>; 811a409ebc1SMalay Sanghidefm VCVTTPS2UDQS : avx10_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis, 812a409ebc1SMalay Sanghi X86cvttp2uis, X86cvttp2uisSAE, 813a409ebc1SMalay Sanghi SchedWriteCvtPS2DQ>, T_MAP5,PS, 814a409ebc1SMalay Sanghi EVEX_CD8<32, CD8VF>; 815a409ebc1SMalay Sanghidefm VCVTTPD2QQS : avx10_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis, 816a409ebc1SMalay Sanghi X86cvttp2sis, X86cvttp2sisSAE, 817a409ebc1SMalay Sanghi SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, 818a409ebc1SMalay Sanghi EVEX_CD8<64, CD8VF>; 819a409ebc1SMalay Sanghidefm VCVTTPS2QQS : avx10_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis, 820a409ebc1SMalay Sanghi X86cvttp2sis, X86cvttp2sisSAE, 821a409ebc1SMalay Sanghi SchedWriteCvtPS2DQ>, T_MAP5,PD, 822a409ebc1SMalay Sanghi EVEX_CD8<32, CD8VH>; 823a409ebc1SMalay Sanghidefm VCVTTPD2UQQS : avx10_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis, 824a409ebc1SMalay Sanghi X86cvttp2uis, X86cvttp2uisSAE, 825a409ebc1SMalay Sanghi SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, 826a409ebc1SMalay Sanghi EVEX_CD8<64, CD8VF>; 827a409ebc1SMalay Sanghidefm VCVTTPS2UQQS : avx10_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis, 828a409ebc1SMalay Sanghi X86cvttp2uis, X86cvttp2uisSAE, 829a409ebc1SMalay Sanghi SchedWriteCvtPS2DQ>, T_MAP5,PD, 830a409ebc1SMalay Sanghi EVEX_CD8<32, CD8VH>; 831a409ebc1SMalay Sanghi 832a409ebc1SMalay Sanghilet Predicates = [HasAVX10_2] in { 833a409ebc1SMalay Sanghi// Special patterns to allow use of X86mcvttp2si for masking. Instruction 834a409ebc1SMalay Sanghi// patterns have been disabled with null_frag. 835a409ebc1SMalay Sanghi// Patterns VCVTTPD2DQSZ128 836a409ebc1SMalay Sanghi 83736dd4213SJaydeepChauhan14// VCVTTPD2DQS 83836dd4213SJaydeepChauhan14def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))), 83936dd4213SJaydeepChauhan14 (VCVTTPD2DQSZ128rr VR128X:$src)>; 84036dd4213SJaydeepChauhan14def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)), 84136dd4213SJaydeepChauhan14 (VCVTTPD2DQSZ256rr VR256X:$src)>; 84236dd4213SJaydeepChauhan14def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)), 84336dd4213SJaydeepChauhan14 (VCVTTPD2DQSZrr VR512:$src)>; 84436dd4213SJaydeepChauhan14 84536dd4213SJaydeepChauhan14// VCVTTPD2QQS 84636dd4213SJaydeepChauhan14def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)), 84736dd4213SJaydeepChauhan14 (VCVTTPD2QQSZ128rr VR128X:$src)>; 84836dd4213SJaydeepChauhan14def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)), 84936dd4213SJaydeepChauhan14 (VCVTTPD2QQSZ256rr VR256X:$src)>; 85036dd4213SJaydeepChauhan14def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)), 85136dd4213SJaydeepChauhan14 (VCVTTPD2QQSZrr VR512:$src)>; 85236dd4213SJaydeepChauhan14 85336dd4213SJaydeepChauhan14// VCVTTPD2UDQS 85436dd4213SJaydeepChauhan14def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))), 85536dd4213SJaydeepChauhan14 (VCVTTPD2UDQSZ128rr VR128X:$src)>; 85636dd4213SJaydeepChauhan14def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)), 85736dd4213SJaydeepChauhan14 (VCVTTPD2UDQSZ256rr VR256X:$src)>; 85836dd4213SJaydeepChauhan14def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)), 85936dd4213SJaydeepChauhan14 (VCVTTPD2UDQSZrr VR512:$src)>; 86036dd4213SJaydeepChauhan14 86136dd4213SJaydeepChauhan14// VCVTTPD2UQQS 86236dd4213SJaydeepChauhan14def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)), 86336dd4213SJaydeepChauhan14 (VCVTTPD2UQQSZ128rr VR128X:$src)>; 86436dd4213SJaydeepChauhan14def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)), 86536dd4213SJaydeepChauhan14 (VCVTTPD2UQQSZ256rr VR256X:$src)>; 86636dd4213SJaydeepChauhan14def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)), 86736dd4213SJaydeepChauhan14 (VCVTTPD2UQQSZrr VR512:$src)>; 86836dd4213SJaydeepChauhan14 86936dd4213SJaydeepChauhan14// VCVTTPS2DQS 87036dd4213SJaydeepChauhan14def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)), 87136dd4213SJaydeepChauhan14 (VCVTTPS2DQSZ128rr VR128X:$src)>; 87236dd4213SJaydeepChauhan14def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)), 87336dd4213SJaydeepChauhan14 (VCVTTPS2DQSZ256rr VR256X:$src)>; 87436dd4213SJaydeepChauhan14def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)), 87536dd4213SJaydeepChauhan14 (VCVTTPS2DQSZrr VR512:$src)>; 87636dd4213SJaydeepChauhan14 87736dd4213SJaydeepChauhan14// VCVTTPS2QQS 87836dd4213SJaydeepChauhan14def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))), 87936dd4213SJaydeepChauhan14 (VCVTTPS2QQSZ128rr VR128X:$src)>; 88036dd4213SJaydeepChauhan14def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)), 88136dd4213SJaydeepChauhan14 (VCVTTPS2QQSZ256rr VR128X:$src)>; 88236dd4213SJaydeepChauhan14def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)), 88336dd4213SJaydeepChauhan14 (VCVTTPS2QQSZrr VR256X:$src)>; 88436dd4213SJaydeepChauhan14 88536dd4213SJaydeepChauhan14// VCVTTPS2UDQS 88636dd4213SJaydeepChauhan14def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)), 88736dd4213SJaydeepChauhan14 (VCVTTPS2UDQSZ128rr VR128X:$src)>; 88836dd4213SJaydeepChauhan14def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)), 88936dd4213SJaydeepChauhan14 (VCVTTPS2UDQSZ256rr VR256X:$src)>; 89036dd4213SJaydeepChauhan14def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)), 89136dd4213SJaydeepChauhan14 (VCVTTPS2UDQSZrr VR512:$src)>; 89236dd4213SJaydeepChauhan14 89336dd4213SJaydeepChauhan14// VCVTTPS2UQQS 89436dd4213SJaydeepChauhan14def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))), 89536dd4213SJaydeepChauhan14 (VCVTTPS2UQQSZ128rr VR128X:$src)>; 89636dd4213SJaydeepChauhan14def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)), 89736dd4213SJaydeepChauhan14 (VCVTTPS2UQQSZ256rr VR128X:$src)>; 89836dd4213SJaydeepChauhan14def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)), 89936dd4213SJaydeepChauhan14 (VCVTTPS2UQQSZrr VR256X:$src)>; 90036dd4213SJaydeepChauhan14 901a409ebc1SMalay Sanghidef : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))), 902a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rr VR128X:$src)>; 903a409ebc1SMalay Sanghidef : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))), 904a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rm addr:$src)>; 905a409ebc1SMalay Sanghidef : Pat<(v4i32 (X86cvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)))), 906a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rmb addr:$src)>; 907a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 908a409ebc1SMalay Sanghi VK2WM:$mask), 909a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 910a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 911a409ebc1SMalay Sanghi VK2WM:$mask), 912a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rrkz VK2WM:$mask, VR128X:$src)>; 913a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 914a409ebc1SMalay Sanghi VK2WM:$mask), 915a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 916a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 917a409ebc1SMalay Sanghi VK2WM:$mask), 918a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rmkz VK2WM:$mask, addr:$src)>; 919a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), 920a409ebc1SMalay Sanghi (v4i32 VR128X:$src0), VK2WM:$mask), 921a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 922a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), 923a409ebc1SMalay Sanghi v4i32x_info.ImmAllZerosV, VK2WM:$mask), 924a409ebc1SMalay Sanghi (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>; 925a409ebc1SMalay Sanghi 926a409ebc1SMalay Sanghi// Patterns VCVTTPD2UDQSZ128 927a409ebc1SMalay Sanghidef : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), 928a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rmb addr:$src)>; 929a409ebc1SMalay Sanghidef : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))), 930a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rr VR128X:$src)>; 931a409ebc1SMalay Sanghidef : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), 932a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rmb addr:$src)>; 933a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 934a409ebc1SMalay Sanghi VK2WM:$mask), 935a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 936a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 937a409ebc1SMalay Sanghi VK2WM:$mask), 938a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rrkz VK2WM:$mask, VR128X:$src)>; 939a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 940a409ebc1SMalay Sanghi VK2WM:$mask), 941a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 942a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 943a409ebc1SMalay Sanghi VK2WM:$mask), 944a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rmkz VK2WM:$mask, addr:$src)>; 945a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), 946a409ebc1SMalay Sanghi (v4i32 VR128X:$src0), VK2WM:$mask), 947a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 948a409ebc1SMalay Sanghidef : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), 949a409ebc1SMalay Sanghi v4i32x_info.ImmAllZerosV, VK2WM:$mask), 950a409ebc1SMalay Sanghi (VCVTTPD2UDQSZ128rmbkz VK2WM:$mask, addr:$src)>; 951a409ebc1SMalay Sanghi} 952a409ebc1SMalay Sanghi 953a409ebc1SMalay Sanghi// Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation. 954a409ebc1SMalay Sanghimulticlass avx10_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 955a409ebc1SMalay Sanghi X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 956a409ebc1SMalay Sanghi SDNode OpNodeInt, SDNode OpNodeSAE, 957a409ebc1SMalay Sanghi X86FoldableSchedWrite sched> { 958a409ebc1SMalay Sanghi let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in { 959a409ebc1SMalay Sanghi let isCodeGenOnly = 1 in { 960a409ebc1SMalay Sanghi def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 961a409ebc1SMalay Sanghi !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 962a409ebc1SMalay Sanghi [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src, _DstRC.EltVT))]>, 963a409ebc1SMalay Sanghi EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 964a409ebc1SMalay Sanghi def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 965a409ebc1SMalay Sanghi !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 966a409ebc1SMalay Sanghi [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src), _DstRC.EltVT))]>, 967a409ebc1SMalay Sanghi EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 968a409ebc1SMalay Sanghi } 969a409ebc1SMalay Sanghi def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 970a409ebc1SMalay Sanghi !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 971a409ebc1SMalay Sanghi [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 972a409ebc1SMalay Sanghi EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 973a409ebc1SMalay Sanghi let Uses = [MXCSR] in 974a409ebc1SMalay Sanghi def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 975a409ebc1SMalay Sanghi !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 976a409ebc1SMalay Sanghi [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 977a409ebc1SMalay Sanghi EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 978a409ebc1SMalay Sanghi def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 979a409ebc1SMalay Sanghi (ins _SrcRC.IntScalarMemOp:$src), 980a409ebc1SMalay Sanghi !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 981a409ebc1SMalay Sanghi [(set _DstRC.RC:$dst, 982a409ebc1SMalay Sanghi (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 983a409ebc1SMalay Sanghi EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, 984a409ebc1SMalay Sanghi SIMD_EXC; 985a409ebc1SMalay Sanghi } 986a409ebc1SMalay Sanghi} 987a409ebc1SMalay Sanghi 988a409ebc1SMalay Sanghidefm VCVTTSS2SIS: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info, 989a409ebc1SMalay Sanghi fp_to_sint_sat, X86cvttss2Int, 990a409ebc1SMalay Sanghi X86cvttss2IntSAE, WriteCvtSS2I>, 991a409ebc1SMalay Sanghi T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 992a409ebc1SMalay Sanghidefm VCVTTSS2SI64S: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info, 993a409ebc1SMalay Sanghi fp_to_sint_sat, X86cvttss2Int, 994a409ebc1SMalay Sanghi X86cvttss2IntSAE, WriteCvtSS2I>, 995a409ebc1SMalay Sanghi REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 996a409ebc1SMalay Sanghidefm VCVTTSD2SIS: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info, 997a409ebc1SMalay Sanghi fp_to_sint_sat, X86cvttss2Int, 998a409ebc1SMalay Sanghi X86cvttss2IntSAE, WriteCvtSD2I>, 999a409ebc1SMalay Sanghi T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 1000a409ebc1SMalay Sanghidefm VCVTTSD2SI64S: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info, 1001a409ebc1SMalay Sanghi fp_to_sint_sat, X86cvttss2Int, 1002a409ebc1SMalay Sanghi X86cvttss2IntSAE, WriteCvtSD2I>, 1003a409ebc1SMalay Sanghi REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 1004a409ebc1SMalay Sanghidefm VCVTTSS2USIS: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info, 1005a409ebc1SMalay Sanghi fp_to_uint_sat, X86cvttss2UInt, 1006a409ebc1SMalay Sanghi X86cvttss2UIntSAE, WriteCvtSS2I>, 1007a409ebc1SMalay Sanghi T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 1008a409ebc1SMalay Sanghidefm VCVTTSS2USI64S: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info, 1009a409ebc1SMalay Sanghi fp_to_uint_sat, X86cvttss2UInt, 1010a409ebc1SMalay Sanghi X86cvttss2UIntSAE, WriteCvtSS2I>, 1011a409ebc1SMalay Sanghi T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>; 1012a409ebc1SMalay Sanghidefm VCVTTSD2USIS: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info, 1013a409ebc1SMalay Sanghi fp_to_uint_sat, X86cvttss2UInt, 1014a409ebc1SMalay Sanghi X86cvttss2UIntSAE, WriteCvtSD2I>, 1015a409ebc1SMalay Sanghi T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 1016a409ebc1SMalay Sanghidefm VCVTTSD2USI64S: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info, 1017a409ebc1SMalay Sanghi fp_to_uint_sat, X86cvttss2UInt, 1018a409ebc1SMalay Sanghi X86cvttss2UIntSAE, WriteCvtSD2I>, 1019a409ebc1SMalay Sanghi T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>; 1020a409ebc1SMalay Sanghi 1021a409ebc1SMalay Sanghi//------------------------------------------------- 10227c4cadfcSFreddy Ye// AVX10 CONVERT instructions 10237c4cadfcSFreddy Ye//------------------------------------------------- 10247c4cadfcSFreddy Ye 10257c4cadfcSFreddy Yemulticlass avx10_cvt2ps2ph_rc<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 10267c4cadfcSFreddy Ye X86VectorVTInfo _Src, X86VectorVTInfo _, 10277c4cadfcSFreddy Ye SDNode OpNodeRnd> { 10287c4cadfcSFreddy Ye let Uses = [MXCSR] in 10297c4cadfcSFreddy Ye defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10307c4cadfcSFreddy Ye (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 10317c4cadfcSFreddy Ye "$rc, $src2, $src1", "$src1, $src2, $rc", 10327c4cadfcSFreddy Ye (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1), 10337c4cadfcSFreddy Ye (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 10347c4cadfcSFreddy Ye EVEX, VVVV, EVEX_B, EVEX_RC, PD, Sched<[sched]>; 10357c4cadfcSFreddy Ye} 10367c4cadfcSFreddy Ye 10377c4cadfcSFreddy Ye//TODO: Merge into avx512_binop_all, difference is rounding control added here. 10387c4cadfcSFreddy Yemulticlass avx10_cvt2ps2ph<bits<8> opc, string OpcodeStr, 10397c4cadfcSFreddy Ye X86SchedWriteWidths sched, 10407c4cadfcSFreddy Ye AVX512VLVectorVTInfo _SrcVTInfo, 10417c4cadfcSFreddy Ye AVX512VLVectorVTInfo _DstVTInfo, 10427c4cadfcSFreddy Ye SDNode OpNode, SDNode OpNodeRnd> { 10437c4cadfcSFreddy Ye let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in { 10447c4cadfcSFreddy Ye defm Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 10457c4cadfcSFreddy Ye _SrcVTInfo.info512, _DstVTInfo.info512, 10467c4cadfcSFreddy Ye _SrcVTInfo.info512>, 10477c4cadfcSFreddy Ye avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.ZMM, 10487c4cadfcSFreddy Ye _SrcVTInfo.info512, _DstVTInfo.info512, 10497c4cadfcSFreddy Ye OpNodeRnd>, 10507c4cadfcSFreddy Ye EVEX_V512, EVEX_CD8<32, CD8VF>; 10517c4cadfcSFreddy Ye } 10527c4cadfcSFreddy Ye let Predicates = [HasAVX10_2] in { 10537c4cadfcSFreddy Ye defm Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 10547c4cadfcSFreddy Ye _SrcVTInfo.info256, _DstVTInfo.info256, 10557c4cadfcSFreddy Ye _SrcVTInfo.info256>, 10567c4cadfcSFreddy Ye EVEX_V256, EVEX_CD8<32, CD8VF>; 10577c4cadfcSFreddy Ye defm Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 10587c4cadfcSFreddy Ye _SrcVTInfo.info128, _DstVTInfo.info128, 10597c4cadfcSFreddy Ye _SrcVTInfo.info128>, 10607c4cadfcSFreddy Ye EVEX_V128, EVEX_CD8<32, CD8VF>; 10617c4cadfcSFreddy Ye } 10627c4cadfcSFreddy Ye 10637c4cadfcSFreddy Ye let Predicates = [HasAVX10_2], hasEVEX_U = 1 in { 10647c4cadfcSFreddy Ye defm Z256 : avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.YMM, 10657c4cadfcSFreddy Ye _SrcVTInfo.info256, _DstVTInfo.info256, 10667c4cadfcSFreddy Ye OpNodeRnd>; 10677c4cadfcSFreddy Ye } 10687c4cadfcSFreddy Ye} 10697c4cadfcSFreddy Ye 10707c4cadfcSFreddy Yedefm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx", 10717c4cadfcSFreddy Ye SchedWriteCvtPD2PS, 10727c4cadfcSFreddy Ye avx512vl_f32_info, avx512vl_f16_info, 10737c4cadfcSFreddy Ye X86vfpround2, X86vfpround2Rnd>, T8; 10747c4cadfcSFreddy Ye 107525653e55SMikołaj Pirógdefm VCVT2PH2BF8 : avx512_binop_all<0x74, "vcvt2ph2bf8", SchedWriteCvtPD2PS, 10767c4cadfcSFreddy Ye avx512vl_f16_info, avx512vl_i8_info, 107725653e55SMikołaj Piróg X86vcvt2ph2bf8, [HasAVX10_2_512], [HasAVX10_2]>, 10787c4cadfcSFreddy Ye EVEX_CD8<16, CD8VF>, T8, XD; 107925653e55SMikołaj Pirógdefm VCVT2PH2BF8S : avx512_binop_all<0x74, "vcvt2ph2bf8s", SchedWriteCvtPD2PS, 10807c4cadfcSFreddy Ye avx512vl_f16_info, avx512vl_i8_info, 108125653e55SMikołaj Piróg X86vcvt2ph2bf8s, [HasAVX10_2_512], [HasAVX10_2]>, 10827c4cadfcSFreddy Ye EVEX_CD8<16, CD8VF>, T_MAP5, XD; 108325653e55SMikołaj Pirógdefm VCVT2PH2HF8 : avx512_binop_all<0x18, "vcvt2ph2hf8", SchedWriteCvtPD2PS, 10847c4cadfcSFreddy Ye avx512vl_f16_info, avx512vl_i8_info, 108525653e55SMikołaj Piróg X86vcvt2ph2hf8, [HasAVX10_2_512], [HasAVX10_2]>, 10867c4cadfcSFreddy Ye EVEX_CD8<16, CD8VF>, T_MAP5, XD; 108725653e55SMikołaj Pirógdefm VCVT2PH2HF8S : avx512_binop_all<0x1b, "vcvt2ph2hf8s", SchedWriteCvtPD2PS, 10887c4cadfcSFreddy Ye avx512vl_f16_info, avx512vl_i8_info, 108925653e55SMikołaj Piróg X86vcvt2ph2hf8s, [HasAVX10_2_512], [HasAVX10_2]>, 10907c4cadfcSFreddy Ye EVEX_CD8<16, CD8VF>, T_MAP5, XD; 10917c4cadfcSFreddy Ye 10927c4cadfcSFreddy Ye//TODO: Merge into avx512_vcvt_fp, diffrence is one more source register here. 10937c4cadfcSFreddy Yemulticlass avx10_convert_3op_packed<bits<8> OpCode, string OpcodeStr, 10947c4cadfcSFreddy Ye X86VectorVTInfo vt_dst, X86VectorVTInfo vt_src1, 10957c4cadfcSFreddy Ye X86VectorVTInfo vt_src2, SDPatternOperator OpNode, 10967c4cadfcSFreddy Ye SDPatternOperator MaskOpNode, X86FoldableSchedWrite sched, 10977c4cadfcSFreddy Ye string Broadcast = vt_src2.BroadcastStr, 10987c4cadfcSFreddy Ye X86MemOperand MemOp = vt_src2.MemOp, 10997c4cadfcSFreddy Ye RegisterClass MaskRC = vt_src2.KRCWM, 11007c4cadfcSFreddy Ye dag LdDAG = (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), 11017c4cadfcSFreddy Ye (vt_src2.VT (vt_src2.LdFrag addr:$src2)))), 11027c4cadfcSFreddy Ye dag MaskLdDAG = (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 11037c4cadfcSFreddy Ye (vt_src2.VT (vt_src2.LdFrag addr:$src2))))> { 11047c4cadfcSFreddy Ye defm rr : AVX512_maskable_cvt<OpCode, MRMSrcReg, vt_dst, (outs vt_dst.RC:$dst), 11057c4cadfcSFreddy Ye (ins vt_src1.RC:$src1, vt_src2.RC:$src2), 11067c4cadfcSFreddy Ye (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2), 11077c4cadfcSFreddy Ye (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2), 11087c4cadfcSFreddy Ye OpcodeStr, "$src2, $src1", "$src1, $src2", 11097c4cadfcSFreddy Ye (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), 11107c4cadfcSFreddy Ye (vt_src2.VT vt_src2.RC:$src2))), 11117c4cadfcSFreddy Ye (vselect_mask MaskRC:$mask, 11127c4cadfcSFreddy Ye (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 11137c4cadfcSFreddy Ye (vt_src2.VT vt_src2.RC:$src2))), 11147c4cadfcSFreddy Ye vt_dst.RC:$src0), 11157c4cadfcSFreddy Ye (vselect_mask MaskRC:$mask, 11167c4cadfcSFreddy Ye (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 11177c4cadfcSFreddy Ye (vt_src2.VT vt_src2.RC:$src2))), 11187c4cadfcSFreddy Ye vt_dst.ImmAllZerosV)>, 11197c4cadfcSFreddy Ye EVEX, VVVV, Sched<[sched]>; 11207c4cadfcSFreddy Ye let mayLoad = 1 in 11217c4cadfcSFreddy Ye defm rm : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst), 11227c4cadfcSFreddy Ye (ins vt_src1.RC:$src1, MemOp:$src2), 11237c4cadfcSFreddy Ye (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2), 11247c4cadfcSFreddy Ye (ins MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2), 11257c4cadfcSFreddy Ye OpcodeStr, "$src2, $src1", "$src1, $src2", 11267c4cadfcSFreddy Ye LdDAG, 11277c4cadfcSFreddy Ye (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.RC:$src0), 11287c4cadfcSFreddy Ye (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.ImmAllZerosV)>, 11297c4cadfcSFreddy Ye EVEX, VVVV, Sched<[sched]>; 11307c4cadfcSFreddy Ye 11317c4cadfcSFreddy Ye let mayLoad = 1 in 11327c4cadfcSFreddy Ye defm rmb : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst), 11337c4cadfcSFreddy Ye (ins vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2), 11347c4cadfcSFreddy Ye (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, 11357c4cadfcSFreddy Ye vt_src2.ScalarMemOp:$src2), 11367c4cadfcSFreddy Ye (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2), 11377c4cadfcSFreddy Ye OpcodeStr, 11387c4cadfcSFreddy Ye "${src2}"#Broadcast#", $src1", "$src1, ${src2}"#Broadcast, 11397c4cadfcSFreddy Ye (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT 11407c4cadfcSFreddy Ye (vt_src2.BroadcastLdFrag addr:$src2)))), 11417c4cadfcSFreddy Ye (vselect_mask MaskRC:$mask, 11427c4cadfcSFreddy Ye (vt_dst.VT 11437c4cadfcSFreddy Ye (MaskOpNode 11447c4cadfcSFreddy Ye (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT 11457c4cadfcSFreddy Ye (vt_src2.BroadcastLdFrag addr:$src2)))), 11467c4cadfcSFreddy Ye vt_dst.RC:$src0), 11477c4cadfcSFreddy Ye (vselect_mask MaskRC:$mask, 11487c4cadfcSFreddy Ye (vt_dst.VT 11497c4cadfcSFreddy Ye (MaskOpNode 11507c4cadfcSFreddy Ye (vt_src1.VT vt_src1.RC:$src1), 11517c4cadfcSFreddy Ye (vt_src2.VT 11527c4cadfcSFreddy Ye (vt_src2.BroadcastLdFrag addr:$src2)))), 11537c4cadfcSFreddy Ye vt_dst.ImmAllZerosV)>, 11547c4cadfcSFreddy Ye EVEX, VVVV, EVEX_B, Sched<[sched]>; 11557c4cadfcSFreddy Ye} 11567c4cadfcSFreddy Ye 11577c4cadfcSFreddy Ye//TODO: Merge into avx512_cvt_trunc 11587c4cadfcSFreddy Yemulticlass avx10_convert_3op<bits<8> OpCode, string OpcodeStr, 11597c4cadfcSFreddy Ye AVX512VLVectorVTInfo vt_dst, AVX512VLVectorVTInfo vt_src, 11607c4cadfcSFreddy Ye X86SchedWriteWidths sched, 11617c4cadfcSFreddy Ye SDPatternOperator OpNode, 11627c4cadfcSFreddy Ye SDPatternOperator MaskOpNode, 11637c4cadfcSFreddy Ye PatFrag bcast128 = vt_src.info128.BroadcastLdFrag, 11647c4cadfcSFreddy Ye PatFrag loadVT128 = vt_src.info128.LdFrag, 11657c4cadfcSFreddy Ye RegisterClass maskRC128 = vt_src.info128.KRCWM> { 11667c4cadfcSFreddy Ye let Predicates = [HasAVX10_2_512] in 11677c4cadfcSFreddy Ye defm Z : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info256, 11687c4cadfcSFreddy Ye vt_dst.info512, vt_src.info512, OpNode, OpNode, sched.ZMM>, 11697c4cadfcSFreddy Ye EVEX_V512, EVEX_CD8<16, CD8VF>; 11707c4cadfcSFreddy Ye let Predicates = [HasAVX10_2] in { 11717c4cadfcSFreddy Ye defm Z256 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128, 11727c4cadfcSFreddy Ye vt_dst.info256, vt_src.info256, OpNode, OpNode, sched.YMM>, 11737c4cadfcSFreddy Ye EVEX_V256, EVEX_CD8<16, CD8VF>; 11747c4cadfcSFreddy Ye defm Z128 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128, 11757c4cadfcSFreddy Ye vt_dst.info128, vt_src.info128, 11767c4cadfcSFreddy Ye null_frag, null_frag, sched.XMM>, 11777c4cadfcSFreddy Ye EVEX_V128, EVEX_CD8<16, CD8VF>; 11787c4cadfcSFreddy Ye // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction 11797c4cadfcSFreddy Ye // patterns have been disabled with null_frag. 11807c4cadfcSFreddy Ye def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 11817c4cadfcSFreddy Ye (vt_src.info128.VT VR128X:$src2))), 11827c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rr") VR128X:$src1, VR128X:$src2)>; 11837c4cadfcSFreddy Ye def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 11847c4cadfcSFreddy Ye (vt_src.info128.VT VR128X:$src2), 11857c4cadfcSFreddy Ye (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), 11867c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, 11877c4cadfcSFreddy Ye VR128X:$src1, VR128X:$src2)>; 11887c4cadfcSFreddy Ye def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 11897c4cadfcSFreddy Ye (vt_src.info128.VT VR128X:$src2), 11907c4cadfcSFreddy Ye vt_dst.info128.ImmAllZerosV, maskRC128:$mask), 11917c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, 11927c4cadfcSFreddy Ye VR128X:$src1, VR128X:$src2)>; 11937c4cadfcSFreddy Ye 11947c4cadfcSFreddy Ye def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 11957c4cadfcSFreddy Ye (loadVT128 addr:$src2))), 11967c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rm") VR128X:$src1, addr:$src2)>; 11977c4cadfcSFreddy Ye def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 11987c4cadfcSFreddy Ye (loadVT128 addr:$src2), 11997c4cadfcSFreddy Ye (vt_dst.info128.VT VR128X:$src0), 12007c4cadfcSFreddy Ye maskRC128:$mask), 12017c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, 12027c4cadfcSFreddy Ye VR128X:$src1, addr:$src2)>; 12037c4cadfcSFreddy Ye def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 12047c4cadfcSFreddy Ye (loadVT128 addr:$src2), 12057c4cadfcSFreddy Ye vt_dst.info128.ImmAllZerosV, 12067c4cadfcSFreddy Ye maskRC128:$mask), 12077c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, 12087c4cadfcSFreddy Ye VR128X:$src1, addr:$src2)>; 12097c4cadfcSFreddy Ye 12107c4cadfcSFreddy Ye def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 12117c4cadfcSFreddy Ye (vt_src.info128.VT (bcast128 addr:$src2)))), 12127c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rmb") VR128X:$src1, addr:$src2)>; 12137c4cadfcSFreddy Ye def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 12147c4cadfcSFreddy Ye (vt_src.info128.VT (bcast128 addr:$src2)), 12157c4cadfcSFreddy Ye (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), 12167c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, 12177c4cadfcSFreddy Ye VR128X:$src1, addr:$src2)>; 12187c4cadfcSFreddy Ye def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 12197c4cadfcSFreddy Ye (vt_src.info128.VT (bcast128 addr:$src2)), 12207c4cadfcSFreddy Ye vt_dst.info128.ImmAllZerosV, maskRC128:$mask), 12217c4cadfcSFreddy Ye (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, 12227c4cadfcSFreddy Ye VR128X:$src1, addr:$src2)>; 12237c4cadfcSFreddy Ye } 12247c4cadfcSFreddy Ye} 12257c4cadfcSFreddy Ye 12267c4cadfcSFreddy Yedefm VCVTBIASPH2BF8 : avx10_convert_3op<0x74, "vcvtbiasph2bf8", 12277c4cadfcSFreddy Ye avx512vl_i8_info, avx512vl_f16_info, 12287c4cadfcSFreddy Ye SchedWriteCvtPD2PS, 12297c4cadfcSFreddy Ye X86vcvtbiasph2bf8, X86vmcvtbiasph2bf8>, 12307c4cadfcSFreddy Ye T8, PS; 12317c4cadfcSFreddy Yedefm VCVTBIASPH2BF8S : avx10_convert_3op<0x74, "vcvtbiasph2bf8s", 12327c4cadfcSFreddy Ye avx512vl_i8_info, avx512vl_f16_info, 12337c4cadfcSFreddy Ye SchedWriteCvtPD2PS, 12347c4cadfcSFreddy Ye X86vcvtbiasph2bf8s, X86vmcvtbiasph2bf8s>, 12357c4cadfcSFreddy Ye T_MAP5, PS; 12367c4cadfcSFreddy Yedefm VCVTBIASPH2HF8 : avx10_convert_3op<0x18, "vcvtbiasph2hf8", 12377c4cadfcSFreddy Ye avx512vl_i8_info, avx512vl_f16_info, 12387c4cadfcSFreddy Ye SchedWriteCvtPD2PS, 12397c4cadfcSFreddy Ye X86vcvtbiasph2hf8, X86vmcvtbiasph2hf8>, 12407c4cadfcSFreddy Ye T_MAP5, PS; 12417c4cadfcSFreddy Yedefm VCVTBIASPH2HF8S : avx10_convert_3op<0x1b, "vcvtbiasph2hf8s", 12427c4cadfcSFreddy Ye avx512vl_i8_info, avx512vl_f16_info, 12437c4cadfcSFreddy Ye SchedWriteCvtPD2PS, 12447c4cadfcSFreddy Ye X86vcvtbiasph2hf8s, X86vmcvtbiasph2hf8s>, 12457c4cadfcSFreddy Ye T_MAP5, PS; 12467c4cadfcSFreddy Ye 124725653e55SMikołaj Pirógdefm VCVTPH2BF8 : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8", avx512vl_i8_info, 12487c4cadfcSFreddy Ye avx512vl_f16_info, SchedWriteCvtPD2PS, 124925653e55SMikołaj Piróg X86vcvtph2bf8, X86vmcvtph2bf8, 12507c4cadfcSFreddy Ye [HasAVX10_2], [HasAVX10_2_512]>, 12517c4cadfcSFreddy Ye T8, XS, EVEX_CD8<16, CD8VF>; 12527c4cadfcSFreddy Ye 125325653e55SMikołaj Pirógdefm VCVTPH2BF8S : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8s", avx512vl_i8_info, 12547c4cadfcSFreddy Ye avx512vl_f16_info, SchedWriteCvtPD2PS, 125525653e55SMikołaj Piróg X86vcvtph2bf8s, X86vmcvtph2bf8s, 12567c4cadfcSFreddy Ye [HasAVX10_2], [HasAVX10_2_512]>, 12577c4cadfcSFreddy Ye T_MAP5, XS, EVEX_CD8<16, CD8VF>; 12587c4cadfcSFreddy Ye 125925653e55SMikołaj Pirógdefm VCVTPH2HF8 : avx512_cvt_trunc_ne<0x18, "vcvtph2hf8", avx512vl_i8_info, 12607c4cadfcSFreddy Ye avx512vl_f16_info, SchedWriteCvtPD2PS, 126125653e55SMikołaj Piróg X86vcvtph2hf8, X86vmcvtph2hf8, 12627c4cadfcSFreddy Ye [HasAVX10_2], [HasAVX10_2_512]>, 12637c4cadfcSFreddy Ye T_MAP5, XS, EVEX_CD8<16, CD8VF>; 12647c4cadfcSFreddy Ye 126525653e55SMikołaj Pirógdefm VCVTPH2HF8S : avx512_cvt_trunc_ne<0x1b, "vcvtph2hf8s", avx512vl_i8_info, 12667c4cadfcSFreddy Ye avx512vl_f16_info, SchedWriteCvtPD2PS, 126725653e55SMikołaj Piróg X86vcvtph2hf8s, X86vmcvtph2hf8s, 12687c4cadfcSFreddy Ye [HasAVX10_2], [HasAVX10_2_512]>, 12697c4cadfcSFreddy Ye T_MAP5, XS, EVEX_CD8<16, CD8VF>; 12707c4cadfcSFreddy Ye 12717c4cadfcSFreddy Yemulticlass avx10_convert_2op_nomb_packed<bits<8> opc, string OpcodeStr, 12727c4cadfcSFreddy Ye X86VectorVTInfo _dest, X86VectorVTInfo _src, 12737c4cadfcSFreddy Ye SDNode OpNode, X86MemOperand x86memop, 12747c4cadfcSFreddy Ye X86FoldableSchedWrite sched, 12757c4cadfcSFreddy Ye dag ld_dag = (load addr:$src)> { 12767c4cadfcSFreddy Ye let ExeDomain = _dest.ExeDomain in { 12777c4cadfcSFreddy Ye defm rr : AVX512_maskable_split<opc, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 12787c4cadfcSFreddy Ye (ins _src.RC:$src), OpcodeStr, "$src", "$src", 12797c4cadfcSFreddy Ye (OpNode (_src.VT _src.RC:$src)), 12807c4cadfcSFreddy Ye (OpNode (_src.VT _src.RC:$src))>, 12817c4cadfcSFreddy Ye Sched<[sched]>; 12827c4cadfcSFreddy Ye defm rm : AVX512_maskable_split<opc, MRMSrcMem, _dest, (outs _dest.RC:$dst), 12837c4cadfcSFreddy Ye (ins x86memop:$src), OpcodeStr, "$src", "$src", 12847c4cadfcSFreddy Ye (OpNode (_src.VT ld_dag)), 12857c4cadfcSFreddy Ye (OpNode (_src.VT ld_dag))>, 12867c4cadfcSFreddy Ye Sched<[sched.Folded]>; 12877c4cadfcSFreddy Ye } 12887c4cadfcSFreddy Ye} 12897c4cadfcSFreddy Ye 12907c4cadfcSFreddy Yemulticlass avx10_convert_2op_nomb<string OpcodeStr, AVX512VLVectorVTInfo _dest, 12917c4cadfcSFreddy Ye AVX512VLVectorVTInfo _src, bits<8> opc, SDNode OpNode> { 12927c4cadfcSFreddy Ye let Predicates = [HasAVX10_2_512] in 12937c4cadfcSFreddy Ye defm Z : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info512, _src.info256, 12947c4cadfcSFreddy Ye OpNode, f256mem, WriteCvtPH2PSZ>, EVEX_V512; 12957c4cadfcSFreddy Ye let Predicates = [HasAVX10_2] in { 12967c4cadfcSFreddy Ye defm Z128 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info128, _src.info128, 12977c4cadfcSFreddy Ye OpNode, f64mem, WriteCvtPH2PSZ>, EVEX_V128; 12987c4cadfcSFreddy Ye defm Z256 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info256, _src.info128, 12997c4cadfcSFreddy Ye OpNode, f128mem, WriteCvtPH2PSZ>, EVEX_V256; 13007c4cadfcSFreddy Ye } 13017c4cadfcSFreddy Ye} 13027c4cadfcSFreddy Ye 13037c4cadfcSFreddy Yedefm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info, 13047c4cadfcSFreddy Ye avx512vl_i8_info, 0x1e, X86vcvthf82ph>, 13057c4cadfcSFreddy Ye AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>; 130683ad644aSFreddy Ye 130783ad644aSFreddy Ye//------------------------------------------------- 130883ad644aSFreddy Ye// AVX10 BF16 instructions 130983ad644aSFreddy Ye//------------------------------------------------- 131083ad644aSFreddy Ye 1311*ee2722fcSPhoebe Wang// VADDBF16, VSUBBF16, VMULBF16, VDIVBF16, VMAXBF16, VMINBF16 1312*ee2722fcSPhoebe Wangmulticlass avx10_fp_binop_int_bf16<bits<8> opc, string OpcodeStr, 131383ad644aSFreddy Ye X86SchedWriteSizes sched, 131483ad644aSFreddy Ye bit IsCommutable = 0> { 131583ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 131683ad644aSFreddy Ye defm Z : avx512_fp_packed<opc, OpcodeStr, 1317*ee2722fcSPhoebe Wang !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"), 1318*ee2722fcSPhoebe Wang !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"), 131983ad644aSFreddy Ye v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, 132083ad644aSFreddy Ye T_MAP5, PD, EVEX_CD8<16, CD8VF>; 132183ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 132283ad644aSFreddy Ye defm Z128 : avx512_fp_packed<opc, OpcodeStr, 1323*ee2722fcSPhoebe Wang !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"), 1324*ee2722fcSPhoebe Wang !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"), 132583ad644aSFreddy Ye v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, 132683ad644aSFreddy Ye T_MAP5, PD, EVEX_CD8<16, CD8VF>; 132783ad644aSFreddy Ye defm Z256 : avx512_fp_packed<opc, OpcodeStr, 1328*ee2722fcSPhoebe Wang !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"), 1329*ee2722fcSPhoebe Wang !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"), 133083ad644aSFreddy Ye v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, 133183ad644aSFreddy Ye T_MAP5, PD, EVEX_CD8<16, CD8VF>; 133283ad644aSFreddy Ye } 133383ad644aSFreddy Ye} 133483ad644aSFreddy Ye 133525653e55SMikołaj Pirógmulticlass avx10_fp_binop_bf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 133683ad644aSFreddy Ye X86SchedWriteSizes sched, 133783ad644aSFreddy Ye bit IsCommutable = 0, 133883ad644aSFreddy Ye SDPatternOperator MaskOpNode = OpNode> { 133983ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 134083ad644aSFreddy Ye defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 134183ad644aSFreddy Ye v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, 134283ad644aSFreddy Ye T_MAP5, PD, EVEX_CD8<16, CD8VF>; 134383ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 134483ad644aSFreddy Ye defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 134583ad644aSFreddy Ye v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, 134683ad644aSFreddy Ye T_MAP5, PD, EVEX_CD8<16, CD8VF>; 134783ad644aSFreddy Ye defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 134883ad644aSFreddy Ye v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, 134983ad644aSFreddy Ye T_MAP5, PD, EVEX_CD8<16, CD8VF>; 135083ad644aSFreddy Ye } 135183ad644aSFreddy Ye} 135283ad644aSFreddy Ye 135383ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0 in { 1354*ee2722fcSPhoebe Wangdefm VADDBF16 : avx10_fp_binop_bf16<0x58, "vadd", fadd, SchedWriteFAddSizes, 1>; 1355*ee2722fcSPhoebe Wangdefm VSUBBF16 : avx10_fp_binop_bf16<0x5C, "vsub", fsub, SchedWriteFAddSizes, 0>; 1356*ee2722fcSPhoebe Wangdefm VMULBF16 : avx10_fp_binop_bf16<0x59, "vmul", fmul, SchedWriteFMulSizes, 1>; 1357*ee2722fcSPhoebe Wangdefm VDIVBF16 : avx10_fp_binop_bf16<0x5E, "vdiv", fdiv, SchedWriteFDivSizes, 0>; 1358*ee2722fcSPhoebe Wangdefm VMINBF16 : avx10_fp_binop_int_bf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>; 1359*ee2722fcSPhoebe Wangdefm VMAXBF16 : avx10_fp_binop_int_bf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>; 136083ad644aSFreddy Ye} 136183ad644aSFreddy Ye 136224f177dfSPhoebe Wang// VCOMISBF16 136383ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0, 136483ad644aSFreddy Ye Defs = [EFLAGS], Predicates = [HasAVX10_2] in { 136583ad644aSFreddy Ye //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *` 136683ad644aSFreddy Ye //which may require extend supports on BFR16X, loadbf16, ... 136724f177dfSPhoebe Wang defm VCOMISBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16, 136824f177dfSPhoebe Wang "comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX, 136983ad644aSFreddy Ye VEX_LIG, EVEX_CD8<16, CD8VT1>; 137083ad644aSFreddy Ye 137183ad644aSFreddy Ye let isCodeGenOnly = 1 in { 137224f177dfSPhoebe Wang defm VCOMISBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem, 137324f177dfSPhoebe Wang sse_load_bf16, "comisbf16", SSEPackedSingle>, 137483ad644aSFreddy Ye T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 137583ad644aSFreddy Ye } 137683ad644aSFreddy Ye} 137783ad644aSFreddy Ye 1378*ee2722fcSPhoebe Wang// VCMPBF16 137983ad644aSFreddy Yemulticlass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 138083ad644aSFreddy Ye let mayRaiseFPException = 0 in { 138183ad644aSFreddy Ye defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 138283ad644aSFreddy Ye (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 138383ad644aSFreddy Ye "vcmp"#_.Suffix, 138483ad644aSFreddy Ye "$cc, $src2, $src1", "$src1, $src2, $cc", 138583ad644aSFreddy Ye (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 138683ad644aSFreddy Ye (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 138783ad644aSFreddy Ye 1>, Sched<[sched]>; 138883ad644aSFreddy Ye 138983ad644aSFreddy Ye defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 139083ad644aSFreddy Ye (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 139183ad644aSFreddy Ye "vcmp"#_.Suffix, 139283ad644aSFreddy Ye "$cc, $src2, $src1", "$src1, $src2, $cc", 139383ad644aSFreddy Ye (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 139483ad644aSFreddy Ye timm:$cc), 139583ad644aSFreddy Ye (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 139683ad644aSFreddy Ye timm:$cc)>, 139783ad644aSFreddy Ye Sched<[sched.Folded, sched.ReadAfterFold]>; 139883ad644aSFreddy Ye 139983ad644aSFreddy Ye defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 140083ad644aSFreddy Ye (outs _.KRC:$dst), 140183ad644aSFreddy Ye (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 140283ad644aSFreddy Ye "vcmp"#_.Suffix, 140383ad644aSFreddy Ye "$cc, ${src2}"#_.BroadcastStr#", $src1", 140483ad644aSFreddy Ye "$src1, ${src2}"#_.BroadcastStr#", $cc", 140583ad644aSFreddy Ye (X86cmpm (_.VT _.RC:$src1), 140683ad644aSFreddy Ye (_.VT (_.BroadcastLdFrag addr:$src2)), 140783ad644aSFreddy Ye timm:$cc), 140883ad644aSFreddy Ye (X86cmpm_su (_.VT _.RC:$src1), 140983ad644aSFreddy Ye (_.VT (_.BroadcastLdFrag addr:$src2)), 141083ad644aSFreddy Ye timm:$cc)>, 141183ad644aSFreddy Ye EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 141283ad644aSFreddy Ye } 141383ad644aSFreddy Ye} 141483ad644aSFreddy Ye 141583ad644aSFreddy Yemulticlass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 141683ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 141783ad644aSFreddy Ye defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512; 141883ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 141983ad644aSFreddy Ye defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128; 142083ad644aSFreddy Ye defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256; 142183ad644aSFreddy Ye } 142283ad644aSFreddy Ye} 142383ad644aSFreddy Ye 1424*ee2722fcSPhoebe Wangdefm VCMPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>, 142583ad644aSFreddy Ye AVX512XDIi8Base, EVEX, VVVV, 142683ad644aSFreddy Ye EVEX_CD8<16, CD8VF>, TA; 142783ad644aSFreddy Ye 142883ad644aSFreddy Ye 1429*ee2722fcSPhoebe Wang// VSQRTBF16 143083ad644aSFreddy Yemulticlass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr, 143183ad644aSFreddy Ye X86SchedWriteSizes sched> { 143283ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 1433*ee2722fcSPhoebe Wang defm Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 143483ad644aSFreddy Ye sched.PH.ZMM, v32bf16_info>, 143583ad644aSFreddy Ye EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 143683ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 1437*ee2722fcSPhoebe Wang defm Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 143883ad644aSFreddy Ye sched.PH.XMM, v8bf16x_info>, 143983ad644aSFreddy Ye EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 1440*ee2722fcSPhoebe Wang defm Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 144183ad644aSFreddy Ye sched.PH.YMM, v16bf16x_info>, 144283ad644aSFreddy Ye EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 144383ad644aSFreddy Ye } 144483ad644aSFreddy Ye} 144583ad644aSFreddy Ye 144683ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0 in 1447*ee2722fcSPhoebe Wangdefm VSQRTBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrt", SchedWriteFSqrtSizes>; 144883ad644aSFreddy Ye 1449*ee2722fcSPhoebe Wang// VRSQRTBF16, VRCPBF16, VSRQTBF16, VGETEXPBF16 1450*ee2722fcSPhoebe Wangmulticlass avx10_fp14_bf16<bits<8> opc, string OpcodeStr, SDNode OpNode, 145183ad644aSFreddy Ye X86SchedWriteWidths sched> { 145283ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 1453*ee2722fcSPhoebe Wang defm BF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 145483ad644aSFreddy Ye OpNode, sched.ZMM, v32bf16_info>, 145583ad644aSFreddy Ye EVEX_V512; 145683ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 1457*ee2722fcSPhoebe Wang defm BF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 145883ad644aSFreddy Ye OpNode, sched.XMM, v8bf16x_info>, 145983ad644aSFreddy Ye EVEX_V128; 1460*ee2722fcSPhoebe Wang defm BF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 146183ad644aSFreddy Ye OpNode, sched.YMM, v16bf16x_info>, 146283ad644aSFreddy Ye EVEX_V256; 146383ad644aSFreddy Ye } 146483ad644aSFreddy Ye} 146583ad644aSFreddy Ye 1466*ee2722fcSPhoebe Wangdefm VRSQRT : avx10_fp14_bf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>, 146783ad644aSFreddy Ye T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1468*ee2722fcSPhoebe Wangdefm VRCP : avx10_fp14_bf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>, 146983ad644aSFreddy Ye T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1470*ee2722fcSPhoebe Wangdefm VGETEXP : avx10_fp14_bf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>, 147183ad644aSFreddy Ye T_MAP5, EVEX_CD8<16, CD8VF>; 147283ad644aSFreddy Ye 1473*ee2722fcSPhoebe Wang// VSCALEFBF16 147483ad644aSFreddy Yemulticlass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr, 147583ad644aSFreddy Ye X86SchedWriteWidths sched> { 147683ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 147783ad644aSFreddy Ye defm Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>, 147883ad644aSFreddy Ye EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; 147983ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 148083ad644aSFreddy Ye defm Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>, 148183ad644aSFreddy Ye EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS; 148283ad644aSFreddy Ye defm Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>, 148383ad644aSFreddy Ye EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS; 148483ad644aSFreddy Ye } 148583ad644aSFreddy Ye} 148683ad644aSFreddy Ye 148783ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0 in 1488*ee2722fcSPhoebe Wangdefm VSCALEFBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>; 148983ad644aSFreddy Ye 1490*ee2722fcSPhoebe Wang// VREDUCEBF16, VRNDSCALEBF16, VGETMANTBF16 149183ad644aSFreddy Yemulticlass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr, 149283ad644aSFreddy Ye AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 149383ad644aSFreddy Ye SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> { 149483ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 149583ad644aSFreddy Ye defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 149683ad644aSFreddy Ye sched.ZMM, _.info512>, EVEX_V512; 149783ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 149883ad644aSFreddy Ye defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 149983ad644aSFreddy Ye sched.XMM, _.info128>, EVEX_V128; 150083ad644aSFreddy Ye defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 150183ad644aSFreddy Ye sched.YMM, _.info256>, EVEX_V256; 150283ad644aSFreddy Ye } 150383ad644aSFreddy Ye} 150483ad644aSFreddy Ye 150583ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0 in { 1506*ee2722fcSPhoebe Wangdefm VREDUCEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreduce", avx512vl_bf16_info, 0x56, 150783ad644aSFreddy Ye X86VReduce, X86VReduce, SchedWriteFRnd>, 150883ad644aSFreddy Ye AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1509*ee2722fcSPhoebe Wangdefm VRNDSCALEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscale", avx512vl_bf16_info, 0x08, 151083ad644aSFreddy Ye X86any_VRndScale, X86VRndScale, SchedWriteFRnd>, 151183ad644aSFreddy Ye AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1512*ee2722fcSPhoebe Wangdefm VGETMANTBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26, 151383ad644aSFreddy Ye X86VGetMant, X86VGetMant, SchedWriteFRnd>, 151483ad644aSFreddy Ye AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 151583ad644aSFreddy Ye} 151683ad644aSFreddy Ye 1517*ee2722fcSPhoebe Wang// VFPCLASSBF16 151883ad644aSFreddy Yemulticlass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec, 151983ad644aSFreddy Ye X86SchedWriteWidths sched> { 152083ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 152183ad644aSFreddy Ye defm Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM, 152283ad644aSFreddy Ye avx512vl_bf16_info.info512, "z", 152383ad644aSFreddy Ye []<Register>>, EVEX_V512; 152483ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 152583ad644aSFreddy Ye defm Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM, 152683ad644aSFreddy Ye avx512vl_bf16_info.info128, "x", 152783ad644aSFreddy Ye []<Register>>, EVEX_V128; 152883ad644aSFreddy Ye defm Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM, 152983ad644aSFreddy Ye avx512vl_bf16_info.info256, "y", 153083ad644aSFreddy Ye []<Register>>, EVEX_V256; 153183ad644aSFreddy Ye } 153283ad644aSFreddy Ye} 153383ad644aSFreddy Ye 1534*ee2722fcSPhoebe Wangdefm VFPCLASSBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>, 153583ad644aSFreddy Ye AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 153683ad644aSFreddy Ye 1537*ee2722fcSPhoebe Wang// VF[,N]M[ADD,SUB][132,213,231]BF16 153883ad644aSFreddy Yemulticlass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr, 153983ad644aSFreddy Ye SDPatternOperator OpNode, SDNode MaskOpNode, 154083ad644aSFreddy Ye X86SchedWriteWidths sched> { 154183ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 154283ad644aSFreddy Ye defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 154383ad644aSFreddy Ye sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 154483ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 154583ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 154683ad644aSFreddy Ye defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 154783ad644aSFreddy Ye sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 154883ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 154983ad644aSFreddy Ye defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 155083ad644aSFreddy Ye sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 155183ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 155283ad644aSFreddy Ye } 155383ad644aSFreddy Ye} 155483ad644aSFreddy Ye 155583ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0 in { 1556*ee2722fcSPhoebe Wangdefm VFMADD213BF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213bf16", any_fma, 155783ad644aSFreddy Ye fma, SchedWriteFMA>; 1558*ee2722fcSPhoebe Wangdefm VFMSUB213BF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213bf16", X86any_Fmsub, 155983ad644aSFreddy Ye X86Fmsub, SchedWriteFMA>; 1560*ee2722fcSPhoebe Wangdefm VFNMADD213BF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213bf16", X86any_Fnmadd, 156183ad644aSFreddy Ye X86Fnmadd, SchedWriteFMA>; 1562*ee2722fcSPhoebe Wangdefm VFNMSUB213BF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213bf16", X86any_Fnmsub, 156383ad644aSFreddy Ye X86Fnmsub, SchedWriteFMA>; 156483ad644aSFreddy Ye} 156583ad644aSFreddy Ye 156683ad644aSFreddy Yemulticlass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr, 156783ad644aSFreddy Ye SDPatternOperator OpNode, SDNode MaskOpNode, 156883ad644aSFreddy Ye X86SchedWriteWidths sched> { 156983ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 157083ad644aSFreddy Ye defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 157183ad644aSFreddy Ye sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 157283ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 157383ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 157483ad644aSFreddy Ye defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 157583ad644aSFreddy Ye sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 157683ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 157783ad644aSFreddy Ye defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 157883ad644aSFreddy Ye sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 157983ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 158083ad644aSFreddy Ye } 158183ad644aSFreddy Ye} 158283ad644aSFreddy Ye 158383ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0 in { 1584*ee2722fcSPhoebe Wangdefm VFMADD231BF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231bf16", any_fma, 158583ad644aSFreddy Ye fma, SchedWriteFMA>; 1586*ee2722fcSPhoebe Wangdefm VFMSUB231BF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231bf16", X86any_Fmsub, 158783ad644aSFreddy Ye X86Fmsub, SchedWriteFMA>; 1588*ee2722fcSPhoebe Wangdefm VFNMADD231BF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231bf16", X86any_Fnmadd, 158983ad644aSFreddy Ye X86Fnmadd, SchedWriteFMA>; 1590*ee2722fcSPhoebe Wangdefm VFNMSUB231BF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231bf16", X86any_Fnmsub, 159183ad644aSFreddy Ye X86Fnmsub, SchedWriteFMA>; 159283ad644aSFreddy Ye} 159383ad644aSFreddy Ye 159483ad644aSFreddy Yemulticlass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr, 159583ad644aSFreddy Ye SDPatternOperator OpNode, SDNode MaskOpNode, 159683ad644aSFreddy Ye X86SchedWriteWidths sched> { 159783ad644aSFreddy Ye let Predicates = [HasAVX10_2_512] in 159883ad644aSFreddy Ye defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 159983ad644aSFreddy Ye sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 160083ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 160183ad644aSFreddy Ye let Predicates = [HasAVX10_2] in { 160283ad644aSFreddy Ye defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 160383ad644aSFreddy Ye sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 160483ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 160583ad644aSFreddy Ye defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 160683ad644aSFreddy Ye sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 160783ad644aSFreddy Ye EVEX_CD8<16, CD8VF>; 160883ad644aSFreddy Ye } 160983ad644aSFreddy Ye} 161083ad644aSFreddy Ye 161183ad644aSFreddy Yelet Uses = []<Register>, mayRaiseFPException = 0 in { 1612*ee2722fcSPhoebe Wangdefm VFMADD132BF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132bf16", any_fma, 161383ad644aSFreddy Ye fma, SchedWriteFMA>; 1614*ee2722fcSPhoebe Wangdefm VFMSUB132BF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132bf16", X86any_Fmsub, 161583ad644aSFreddy Ye X86Fmsub, SchedWriteFMA>; 1616*ee2722fcSPhoebe Wangdefm VFNMADD132BF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132bf16", X86any_Fnmadd, 161783ad644aSFreddy Ye X86Fnmadd, SchedWriteFMA>; 1618*ee2722fcSPhoebe Wangdefm VFNMSUB132BF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132bf16", X86any_Fnmsub, 161983ad644aSFreddy Ye X86Fnmsub, SchedWriteFMA>; 162083ad644aSFreddy Ye} 1621f5ad9e1cSMahesh-Attarde 1622f5ad9e1cSMahesh-Attarde//------------------------------------------------- 1623f5ad9e1cSMahesh-Attarde// AVX10 COMEF instructions 1624f5ad9e1cSMahesh-Attarde//------------------------------------------------- 1625e61a7dc2SMahesh-Attardemulticlass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT, 1626e61a7dc2SMahesh-Attarde SDPatternOperator OpNode, string OpcodeStr, 1627e61a7dc2SMahesh-Attarde X86MemOperand x86memop, PatFrag ld_frag, 1628e61a7dc2SMahesh-Attarde Domain d, X86FoldableSchedWrite sched = WriteFComX>{ 1629e61a7dc2SMahesh-Attarde let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { 1630e61a7dc2SMahesh-Attarde def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1631e61a7dc2SMahesh-Attarde !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1632e61a7dc2SMahesh-Attarde [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>, 1633e61a7dc2SMahesh-Attarde EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1634e61a7dc2SMahesh-Attarde let mayLoad = 1 in { 1635e61a7dc2SMahesh-Attarde def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 1636e61a7dc2SMahesh-Attarde !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1637e61a7dc2SMahesh-Attarde [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>, 1638e61a7dc2SMahesh-Attarde EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1639e61a7dc2SMahesh-Attarde } 1640e61a7dc2SMahesh-Attarde } 1641e61a7dc2SMahesh-Attarde} 1642e61a7dc2SMahesh-Attarde 1643f5ad9e1cSMahesh-Attardemulticlass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, 1644f5ad9e1cSMahesh-Attarde string OpcodeStr, 1645f5ad9e1cSMahesh-Attarde Domain d, 1646f5ad9e1cSMahesh-Attarde X86FoldableSchedWrite sched = WriteFComX> { 1647f5ad9e1cSMahesh-Attarde let ExeDomain = d, mayRaiseFPException = 1 in { 1648f5ad9e1cSMahesh-Attarde def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 1649f5ad9e1cSMahesh-Attarde !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1650f5ad9e1cSMahesh-Attarde [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>, 1651f5ad9e1cSMahesh-Attarde EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1652f5ad9e1cSMahesh-Attarde let mayLoad = 1 in { 1653f5ad9e1cSMahesh-Attarde def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2), 1654f5ad9e1cSMahesh-Attarde !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1655f5ad9e1cSMahesh-Attarde [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>, 1656f5ad9e1cSMahesh-Attarde EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1657f5ad9e1cSMahesh-Attarde } 1658f5ad9e1cSMahesh-Attarde def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 1659f5ad9e1cSMahesh-Attarde !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), 1660f5ad9e1cSMahesh-Attarde []>, 1661f5ad9e1cSMahesh-Attarde EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC; 1662f5ad9e1cSMahesh-Attarde } 1663f5ad9e1cSMahesh-Attarde} 1664f5ad9e1cSMahesh-Attarde 1665f5ad9e1cSMahesh-Attardelet Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { 1666e61a7dc2SMahesh-Attarde defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, 1667e61a7dc2SMahesh-Attarde "vucomxsd", f64mem, loadf64, SSEPackedDouble>, 1668e61a7dc2SMahesh-Attarde TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1669e61a7dc2SMahesh-Attarde defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, 1670e61a7dc2SMahesh-Attarde "vucomxsh", f16mem, loadf16, SSEPackedSingle>, 1671e61a7dc2SMahesh-Attarde T_MAP5, XD, EVEX_CD8<16, CD8VT1>; 1672e61a7dc2SMahesh-Attarde defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, 1673e61a7dc2SMahesh-Attarde "vucomxss", f32mem, loadf32, SSEPackedSingle>, 1674e61a7dc2SMahesh-Attarde TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1675f5ad9e1cSMahesh-Attarde defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, 1676f5ad9e1cSMahesh-Attarde "vcomxsd", SSEPackedDouble>, 1677f5ad9e1cSMahesh-Attarde TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1678f5ad9e1cSMahesh-Attarde defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512, 1679f5ad9e1cSMahesh-Attarde "vcomxsh", SSEPackedSingle>, 1680f5ad9e1cSMahesh-Attarde T_MAP5, XD, EVEX_CD8<16, CD8VT1>; 1681f5ad9e1cSMahesh-Attarde defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512, 1682f5ad9e1cSMahesh-Attarde "vcomxss", SSEPackedSingle>, 1683f5ad9e1cSMahesh-Attarde TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1684f5ad9e1cSMahesh-Attarde defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512, 1685f5ad9e1cSMahesh-Attarde "vucomxsd", SSEPackedDouble>, 1686f5ad9e1cSMahesh-Attarde TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1687f5ad9e1cSMahesh-Attarde defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512, 1688f5ad9e1cSMahesh-Attarde "vucomxsh", SSEPackedSingle>, 1689f5ad9e1cSMahesh-Attarde T_MAP5, XD, EVEX_CD8<16, CD8VT1>; 1690f5ad9e1cSMahesh-Attarde defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512, 1691f5ad9e1cSMahesh-Attarde "vucomxss", SSEPackedSingle>, 1692f5ad9e1cSMahesh-Attarde TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1693f5ad9e1cSMahesh-Attarde} 1694311e4e32SMahesh-Attarde 1695311e4e32SMahesh-Attarde//------------------------------------------------- 1696311e4e32SMahesh-Attarde// AVX10 MOVZXC (COPY) instructions 1697311e4e32SMahesh-Attarde//------------------------------------------------- 1698311e4e32SMahesh-Attardelet Predicates = [HasAVX10_2] in { 1699311e4e32SMahesh-Attarde def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 1700311e4e32SMahesh-Attarde (ins VR128X:$src), 1701311e4e32SMahesh-Attarde "vmovd\t{$src, $dst|$dst, $src}", 1702311e4e32SMahesh-Attarde [(set VR128X:$dst, (v4i32 (X86vzmovl 1703311e4e32SMahesh-Attarde (v4i32 VR128X:$src))))]>, EVEX, 1704311e4e32SMahesh-Attarde Sched<[WriteVecMoveFromGpr]>; 1705311e4e32SMahesh-Attarde 1706311e4e32SMahesh-Attardelet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 1707311e4e32SMahesh-Attarde def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 1708311e4e32SMahesh-Attarde (ins i32mem:$src), 1709311e4e32SMahesh-Attarde "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1710311e4e32SMahesh-Attarde EVEX_CD8<32, CD8VT1>, 1711311e4e32SMahesh-Attarde Sched<[WriteVecLoad]>; 1712311e4e32SMahesh-Attarde 1713311e4e32SMahesh-Attardelet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 1714311e4e32SMahesh-Attarde def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs), 1715311e4e32SMahesh-Attarde (ins i32mem:$dst, VR128X:$src), 1716311e4e32SMahesh-Attarde "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1717311e4e32SMahesh-Attarde EVEX_CD8<32, CD8VT1>, 1718311e4e32SMahesh-Attarde Sched<[WriteVecStore]>; 1719311e4e32SMahesh-Attarde 1720311e4e32SMahesh-Attardelet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 1721311e4e32SMahesh-Attarde def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst), 1722311e4e32SMahesh-Attarde (ins VR128X:$src), 1723311e4e32SMahesh-Attarde "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1724311e4e32SMahesh-Attarde Sched<[WriteVecMoveFromGpr]>; 1725311e4e32SMahesh-Attarde def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}", 1726311e4e32SMahesh-Attarde (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>; 1727311e4e32SMahesh-Attarde 1728311e4e32SMahesh-Attardedef VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst), 1729311e4e32SMahesh-Attarde (ins VR128X:$src), 1730311e4e32SMahesh-Attarde "vmovw\t{$src, $dst|$dst, $src}", 1731311e4e32SMahesh-Attarde [(set VR128X:$dst, (v8i16 (X86vzmovl 1732311e4e32SMahesh-Attarde (v8i16 VR128X:$src))))]>, EVEX, T_MAP5, 1733311e4e32SMahesh-Attarde Sched<[WriteVecMoveFromGpr]>; 1734311e4e32SMahesh-Attarde 1735311e4e32SMahesh-Attardelet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 1736311e4e32SMahesh-Attarde def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst), 1737311e4e32SMahesh-Attarde (ins i16mem:$src), 1738311e4e32SMahesh-Attarde "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, 1739311e4e32SMahesh-Attarde EVEX_CD8<16, CD8VT1>, T_MAP5, 1740311e4e32SMahesh-Attarde Sched<[WriteVecLoad]>; 1741311e4e32SMahesh-Attarde 1742311e4e32SMahesh-Attardelet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 1743311e4e32SMahesh-Attarde def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs), 1744311e4e32SMahesh-Attarde (ins i32mem:$dst, VR128X:$src), 1745311e4e32SMahesh-Attarde "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, 1746311e4e32SMahesh-Attarde EVEX_CD8<16, CD8VT1>, T_MAP5, 1747311e4e32SMahesh-Attarde Sched<[WriteVecStore]>; 1748311e4e32SMahesh-Attarde 1749311e4e32SMahesh-Attardelet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 1750311e4e32SMahesh-Attarde def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 1751311e4e32SMahesh-Attarde (ins VR128X:$src), 1752311e4e32SMahesh-Attarde "vmovw\t{$src, $dst|$dst, $src}", 1753311e4e32SMahesh-Attarde []>, EVEX, T_MAP5, 1754311e4e32SMahesh-Attarde Sched<[WriteVecMoveFromGpr]>; 1755311e4e32SMahesh-Attarde def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", 1756311e4e32SMahesh-Attarde (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; 1757311e4e32SMahesh-Attarde} 1758c4248fa3SFreddy Ye 1759c4248fa3SFreddy Ye// MOVRS 1760c4248fa3SFreddy Yemulticlass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> { 1761c4248fa3SFreddy Ye let ExeDomain = _.ExeDomain in { 1762c4248fa3SFreddy Ye defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 1763c4248fa3SFreddy Ye (ins _.MemOp:$src), OpStr, "$src", "$src", 1764c4248fa3SFreddy Ye (_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size) 1765c4248fa3SFreddy Ye addr:$src))>, EVEX; 1766c4248fa3SFreddy Ye } 1767c4248fa3SFreddy Ye} 1768c4248fa3SFreddy Ye 1769c4248fa3SFreddy Yemulticlass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> { 17701274bca2SPhoebe Wang let Predicates = [HasMOVRS, HasAVX10_2_512, In64BitMode] in 1771c4248fa3SFreddy Ye defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512; 17721274bca2SPhoebe Wang let Predicates = [HasMOVRS, HasAVX10_2, In64BitMode] in { 1773c4248fa3SFreddy Ye defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128; 1774c4248fa3SFreddy Ye defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256; 1775c4248fa3SFreddy Ye } 1776c4248fa3SFreddy Ye} 1777c4248fa3SFreddy Ye 1778c4248fa3SFreddy Yedefm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>, 1779c4248fa3SFreddy Ye T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>; 1780c4248fa3SFreddy Yedefm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>, 1781c4248fa3SFreddy Ye T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>; 1782c4248fa3SFreddy Yedefm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>, 1783c4248fa3SFreddy Ye T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>; 1784c4248fa3SFreddy Yedefm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>, 1785c4248fa3SFreddy Ye T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>; 17865aa1275dSFreddy Ye 17875aa1275dSFreddy Ye// SM4(EVEX) 17885aa1275dSFreddy Yemulticlass avx10_sm4_base<string OpStr> { 17895aa1275dSFreddy Ye // SM4_Base is in X86InstrSSE.td. 17905aa1275dSFreddy Ye let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in { 17915aa1275dSFreddy Ye defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128; 17925aa1275dSFreddy Ye defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256; 17935aa1275dSFreddy Ye } 17945aa1275dSFreddy Ye let Predicates = [HasSM4, HasAVX10_2_512] in 17955aa1275dSFreddy Ye defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512; 17965aa1275dSFreddy Ye} 17975aa1275dSFreddy Ye 17985aa1275dSFreddy Yedefm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV; 17995aa1275dSFreddy Yedefm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV; 1800