xref: /llvm-project/llvm/lib/Target/X86/X86InstrAVX10.td (revision ee2722fc882ed5dbc7609686bd998b023c6645b2)
1//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX10 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// VNNI FP16
16let ExeDomain = SSEPackedSingle in
17defm VDPPHPS : avx512_dpf16ps_sizes<0x52, "vdpphps", X86dpfp16ps, avx512vl_f16_info,
18                                    [HasAVX10_2], [HasAVX10_2_512]>,
19                    T8, PS, EVEX_CD8<32, CD8VF>;
20
21// VNNI INT8
22defm VPDPBSSD   : VNNI_common<0x50, "vpdpbssd", X86vpdpbssd, SchedWriteVecIMul, 1,
23                              [HasAVX10_2], [HasAVX10_2_512]>, XD;
24defm VPDPBSSDS  : VNNI_common<0x51, "vpdpbssds", X86vpdpbssds, SchedWriteVecIMul, 1,
25                              [HasAVX10_2], [HasAVX10_2_512]>, XD;
26defm VPDPBSUD   : VNNI_common<0x50, "vpdpbsud", X86vpdpbsud, SchedWriteVecIMul, 0,
27                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
28defm VPDPBSUDS  : VNNI_common<0x51, "vpdpbsuds", X86vpdpbsuds, SchedWriteVecIMul, 0,
29                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
30defm VPDPBUUD   : VNNI_common<0x50, "vpdpbuud", X86vpdpbuud, SchedWriteVecIMul, 1,
31                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
32defm VPDPBUUDS  : VNNI_common<0x51, "vpdpbuuds", X86vpdpbuuds, SchedWriteVecIMul, 1,
33                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
34
35// VNNI INT16
36defm VPDPWSUD   : VNNI_common<0xd2, "vpdpwsud", X86vpdpwsud, SchedWriteVecIMul, 0,
37                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
38defm VPDPWSUDS  : VNNI_common<0xd3, "vpdpwsuds", X86vpdpwsuds, SchedWriteVecIMul, 0,
39                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
40defm VPDPWUSD   : VNNI_common<0xd2, "vpdpwusd", X86vpdpwusd, SchedWriteVecIMul, 0,
41                              [HasAVX10_2], [HasAVX10_2_512]>, PD;
42defm VPDPWUSDS  : VNNI_common<0xd3, "vpdpwusds", X86vpdpwusds, SchedWriteVecIMul, 0,
43                              [HasAVX10_2], [HasAVX10_2_512]>, PD;
44defm VPDPWUUD   : VNNI_common<0xd2, "vpdpwuud", X86vpdpwuud, SchedWriteVecIMul, 1,
45                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
46defm VPDPWUUDS  : VNNI_common<0xd3, "vpdpwuuds", X86vpdpwuuds, SchedWriteVecIMul, 1,
47                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
48
49// VMPSADBW
50defm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW,
51                                          avx512vl_i16_info, avx512vl_i8_info,
52                                          HasAVX10_2>,
53                    XS, EVEX_CD8<32, CD8VF>;
54
55// YMM Rounding
56multiclass avx256_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
57                                   X86SchedWriteSizes sched> {
58  defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM,
59                                       v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
60  defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM,
61                                       v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>;
62  defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM,
63                                       v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W;
64}
65
66multiclass avx256_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
67                                 X86SchedWriteSizes sched> {
68  defm PHZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM,
69                                     v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
70  defm PSZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM,
71                                     v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>;
72  defm PDZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM,
73                                     v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W;
74}
75
76multiclass avx256_vcmp_p_sae<X86SchedWriteWidths sched> {
77  defm PHZ256 : avx512_vcmp_sae<sched.YMM, v16f16x_info>, AVX512PSIi8Base, EVEX_CD8<16, CD8VF>, TA;
78  defm PSZ256 : avx512_vcmp_sae<sched.YMM, v8f32x_info>, AVX512PSIi8Base, EVEX_CD8<32, CD8VF>;
79  defm PDZ256 : avx512_vcmp_sae<sched.YMM, v4f64x_info>, AVX512PDIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
80}
81
82multiclass avx256_fixupimm_packed_all<bits<8> opc, string OpcodeStr,
83                                      X86SchedWriteWidths sched> {
84  defm PSZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v8f32x_info,
85                                           v8i32x_info>, EVEX_CD8<32, CD8VF>;
86  defm PDZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v4f64x_info,
87                                           v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
88}
89
90multiclass avx256_vgetexp<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE,
91                          X86SchedWriteWidths sched> {
92  defm PHZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ph", v16f16x_info, OpNodeSAE,
93                                  sched.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
94  defm PSZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ps", v8f32x_info, OpNodeSAE,
95                                  sched.YMM>, T8,PD, EVEX_CD8<32, CD8VF>;
96  defm PDZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"pd", v4f64x_info, OpNodeSAE,
97                                  sched.YMM>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
98}
99
100multiclass avx256_unary_fp_sae<string OpcodeStr, bits<8> opcPs, bits<8> opcPd,
101                               SDNode OpNodeSAE, X86SchedWriteWidths sched> {
102  defm PHZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM,
103                                               v16f16x_info>, AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>;
104  defm PSZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM,
105                                               v8f32x_info>, AVX512AIi8Base, EVEX_CD8<32, CD8VF>;
106  defm PDZ256 : avx512_unary_fp_sae_packed_imm<opcPd, OpcodeStr, OpNodeSAE, sched.YMM,
107                                               v4f64x_info>, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
108}
109
110multiclass avx256_common_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE,
111                                           X86SchedWriteWidths sched> {
112  defm PSZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"ps", OpNodeSAE, sched.YMM,
113                                         v8f32x_info>, EVEX_CD8<32, CD8VF>;
114  defm PDZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"pd", OpNodeSAE, sched.YMM,
115                                         v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
116}
117
118multiclass avx256_fp_scalef_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
119                                  X86SchedWriteWidths sched> {
120  defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM,
121                                       v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
122  defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM,
123                                       v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
124  defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM,
125                                       v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
126}
127
128multiclass avx256_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
129                                        X86SchedWriteSizes sched> {
130  defm PHZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
131                                         sched.PH.YMM, v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
132  defm PSZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
133                                         sched.PS.YMM, v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>;
134  defm PDZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
135                                         sched.PD.YMM, v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W;
136}
137
138multiclass avx256_vcvtw_rc<string OpcodeStr, SDNode OpNodeRnd> {
139  defm PHZ256 : avx512_vcvt_fp_rc<0x7D, OpcodeStr, v16f16x_info, v16i16x_info, OpNodeRnd,
140                                  SchedWriteCvtPD2DQ.YMM>, EVEX_CD8<16, CD8VF>;
141}
142
143multiclass avx256_cvtdq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
144                              X86SchedWriteWidths sched> {
145  defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info,
146                                  v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,PS, EVEX_CD8<32, CD8VF>;
147  defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info,
148                                  v8i32x_info, OpNodeRnd, sched.YMM>, TB, PS, EVEX_CD8<32, CD8VF>;
149}
150
151multiclass avx256_cvtudq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
152                               X86SchedWriteWidths sched> {
153  defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info,
154                                  v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,XD, EVEX_CD8<32, CD8VF>;
155  defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info,
156                                  v8i32x_info, OpNodeRnd, sched.YMM>, TB, XD, EVEX_CD8<32, CD8VF>;
157}
158
159multiclass avx256_cvtqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> {
160  defm PHZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ph"), v8f16x_info,
161                                  _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,PS;
162  defm PSZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ps"), v4f32x_info,
163                                  _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, PS;
164  defm PDZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "pd"), v4f64x_info,
165                                  _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS;
166}
167
168multiclass avx256_cvtuqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> {
169  defm PHZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ph"), v8f16x_info,
170                                  _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,XD;
171  defm PSZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ps"), v4f32x_info,
172                                  _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, XD;
173  defm PDZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "pd"), v4f64x_info,
174                                  _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS;
175}
176
177multiclass avx256_vcvt_pd2<string OpcodeStr, X86VectorVTInfo _Src> {
178  defm PHZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ph"), v8f16x_info,
179                                  _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD;
180  defm PSZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ps"), v4f32x_info,
181                                  _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, TB, PD;
182  defm DQZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info,
183                                  _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, XD;
184  defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info,
185                                  _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD;
186  defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v4i32x_info,
187                                   _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PS;
188  defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info,
189                                   _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD;
190}
191
192multiclass avx256_vcvt_ps2<string OpcodeStr> {
193  defm PHZ256 : avx512_cvtps2ph_sae<v8i16x_info, v8f32x_info, WriteCvtPS2PHZ>, EVEX_CD8<32, CD8VH>;
194  defm PHXZ256 : avx512_vcvt_fp_rc<0x1D, !strconcat(OpcodeStr, "phx"), v8f16x_info, v8f32x_info,
195                                   X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD, EVEX_CD8<32, CD8VF>;
196  defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v4f32x_info,
197                                   X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, TB, PS, EVEX_CD8<32, CD8VF>;
198  defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info,
199                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>;
200  defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info,
201                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>;
202  defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info,
203                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PS, EVEX_CD8<32, CD8VF>;
204  defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info,
205                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>;
206}
207
208multiclass avx256_vcvt_ph2<string OpcodeStr> {
209  defm PSZ256 : avx512_cvtph2ps_sae<v8f32x_info, v8i16x_info, WriteCvtPH2PSZ>, EVEX_CD8<32, CD8VH>;
210  defm PSXZ256 : avx512_vcvt_fp_sae<0x13, !strconcat(OpcodeStr, "psx"), v8f32x_info, v8f16x_info,
211                                    X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VH>;
212  defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v8f16x_info,
213                                   X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VQ>;
214  defm WZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info,
215                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>;
216  defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info,
217                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VH>;
218  defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info,
219                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
220  defm UWZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info,
221                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
222  defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info,
223                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>;
224  defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info,
225                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
226}
227
228multiclass avx256_vcvtt_pd2<string OpcodeStr, X86VectorVTInfo _Src> {
229  defm DQZ256 : avx512_vcvt_fp_sae<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info,
230                                   _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD;
231  defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info,
232                                   _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD;
233  defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v4i32x_info,
234                                    _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PS;
235  defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info,
236                                    _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PD;
237}
238
239multiclass avx256_vcvtt_ps2<string OpcodeStr> {
240  defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info,
241                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, XS, EVEX_CD8<32, CD8VF>;
242  defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info,
243                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>;
244  defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info,
245                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PS, EVEX_CD8<32, CD8VF>;
246  defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info,
247                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>;
248}
249
250multiclass avx256_vcvtt_ph2<string OpcodeStr> {
251  defm WZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info,
252                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>;
253  defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info,
254                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,XS, EVEX_CD8<16, CD8VH>;
255  defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info,
256                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
257  defm UWZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info,
258                                   X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
259  defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info,
260                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>;
261  defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info,
262                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
263}
264
265multiclass avx256_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
266  defm PHZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd,
267                                      SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
268  defm PSZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
269                                      SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
270  defm PDZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
271                                      SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
272}
273
274multiclass avx256_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
275  defm PHZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd,
276                                      SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
277  defm PSZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
278                                      SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
279  defm PDZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
280                                      SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
281}
282
283multiclass avx256_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
284  defm PHZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd,
285                                      SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
286  defm PSZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
287                                      SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
288  defm PDZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
289                                      SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
290}
291
292multiclass avx256_fma3_round3<bits<8> opc132, bits<8> opc213, bits<8> opc231,
293                              string OpcodeStr, SDNode OpNodeRnd> {
294  defm NAME#132 : avx256_fma3_132_round<opc132, !strconcat(OpcodeStr, "132"), OpNodeRnd>;
295  defm NAME#213 : avx256_fma3_213_round<opc213, !strconcat(OpcodeStr, "213"), OpNodeRnd>;
296  defm NAME#231 : avx256_fma3_231_round<opc231, !strconcat(OpcodeStr, "231"), OpNodeRnd>;
297}
298
299let Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in {
300  defm VADD : avx256_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
301  defm VMUL : avx256_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
302  defm VSUB : avx256_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
303  defm VDIV : avx256_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
304  defm VMIN : avx256_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
305  defm VMAX : avx256_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
306  defm VCMP : avx256_vcmp_p_sae<SchedWriteFCmp>, EVEX, VVVV;
307  defm VFIXUPIMM : avx256_fixupimm_packed_all<0x54, "vfixupimm", SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV;
308  defm VGETEXP : avx256_vgetexp<0x42, "vgetexp", X86fgetexpSAE, SchedWriteFRnd>;
309  defm VREDUCE : avx256_unary_fp_sae<"vreduce", 0x56, 0x56, X86VReduceSAE, SchedWriteFRnd>;
310  defm VRNDSCALE : avx256_unary_fp_sae<"vrndscale", 0x08, 0x09, X86VRndScaleSAE, SchedWriteFRnd>;
311  defm VGETMANT : avx256_unary_fp_sae<"vgetmant", 0x26, 0x26, X86VGetMantSAE, SchedWriteFRnd>;
312  defm VRANGE : avx256_common_fp_sae_packed_imm<0x50, "vrange", X86VRangeSAE, SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV;
313  defm VSCALEF : avx256_fp_scalef_round<0x2C, "vscalef", X86scalefRnd, SchedWriteFAdd>;
314  defm VSQRT : avx256_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
315  defm VCVTW2 : avx256_vcvtw_rc<"vcvtw2ph", X86VSintToFpRnd>, T_MAP5, XS;
316  defm VCVTDQ2 : avx256_cvtdq2fp_rc<0x5B, "vcvtdq2", X86VSintToFpRnd, SchedWriteCvtDQ2PS>;
317  defm VCVTQQ2 : avx256_cvtqq2fp_rc<"vcvtqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
318  defm VCVTUW2 : avx256_vcvtw_rc<"vcvtuw2ph", X86VUintToFpRnd>, T_MAP5,XD;
319  defm VCVTUDQ2 : avx256_cvtudq2fp_rc<0x7A, "vcvtudq2", X86VUintToFpRnd, SchedWriteCvtDQ2PS>;
320  defm VCVTUQQ2 : avx256_cvtuqq2fp_rc<"vcvtuqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
321  defm VCVTPD2 : avx256_vcvt_pd2<"vcvtpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
322  defm VCVTPS2 : avx256_vcvt_ps2<"vcvtps2">;
323  defm VCVTPH2 : avx256_vcvt_ph2<"vcvtph2">;
324  defm VCVTTPD2 : avx256_vcvtt_pd2<"vcvttpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, TB, REX_W;
325  defm VCVTTPS2 : avx256_vcvtt_ps2<"vcvttps2">, TB;
326  defm VCVTTPH2 : avx256_vcvtt_ph2<"vcvttph2">;
327  defm VFMADD : avx256_fma3_round3<0x98, 0xA8, 0xB8, "vfmadd", X86FmaddRnd>;
328  defm VFMSUB : avx256_fma3_round3<0x9A, 0xAA, 0xBA, "vfmsub", X86FmsubRnd>;
329  defm VFMADDSUB : avx256_fma3_round3<0x96, 0xA6, 0xB6, "vfmaddsub", X86FmaddsubRnd>;
330  defm VFMSUBADD : avx256_fma3_round3<0x97, 0xA7, 0xB7, "vfmsubadd", X86FmsubaddRnd>;
331  defm VFNMADD : avx256_fma3_round3<0x9C, 0xAC, 0xBC, "vfnmadd", X86FnmaddRnd>;
332  defm VFNMSUB : avx256_fma3_round3<0x9E, 0xAE, 0xBE, "vfnmsub", X86FnmsubRnd>;
333  defm VFMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfmulcph", x86vfmulcRnd, SchedWriteFMA.YMM,
334                                             v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XS, EVEX_CD8<32, CD8VF>;
335  defm VFCMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfcmulcph", x86vfcmulcRnd, SchedWriteFMA.YMM,
336                                              v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XD, EVEX_CD8<32, CD8VF>;
337  defm VFMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfmaddcph", x86vfmaddcRnd,
338                                           v8f32x_info>, T_MAP6,XS, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>;
339  defm VFCMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfcmaddcph", x86vfcmaddcRnd,
340                                            v8f32x_info>, T_MAP6,XD, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>;
341}
342
343//-------------------------------------------------
344// AVX10 MINMAX instructions
345//-------------------------------------------------
346
347multiclass avx10_minmax_packed_base<string OpStr, X86VectorVTInfo VTI, SDNode OpNode> {
348  let ExeDomain = VTI.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
349    defm rri : AVX512_maskable<0x52, MRMSrcReg, VTI, (outs VTI.RC:$dst),
350                                (ins VTI.RC:$src1, VTI.RC:$src2, i32u8imm:$src3), OpStr,
351                                "$src3, $src2, $src1", "$src1, $src2, $src3",
352                                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
353                                                (i32 timm:$src3)))>,
354                                EVEX, VVVV, Sched<[WriteFMAX]>;
355    defm rmi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst),
356                                (ins VTI.RC:$src1, VTI.MemOp:$src2, i32u8imm:$src3), OpStr,
357                                "$src3, $src2, $src1", "$src1, $src2, $src3",
358                                (VTI.VT (OpNode VTI.RC:$src1, (VTI.LdFrag addr:$src2),
359                                                (i32 timm:$src3)))>,
360                                EVEX, VVVV,
361                                Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
362    defm rmbi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst),
363                                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, i32u8imm:$src3),
364                                OpStr, "$src3, ${src2}"#VTI.BroadcastStr#", $src1",
365                                "$src1, ${src2}"#VTI.BroadcastStr#", $src3",
366                                (VTI.VT (OpNode VTI.RC:$src1, (VTI.BroadcastLdFrag addr:$src2),
367                                                (i32 timm:$src3)))>,
368                                EVEX, VVVV, EVEX_B,
369                                Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
370  }
371}
372
373multiclass avx10_minmax_packed_sae<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> {
374  let Uses = []<Register>, mayRaiseFPException = 0 in {
375    defm Zrrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info512, (outs VTI.info512.RC:$dst),
376                                (ins VTI.info512.RC:$src1, VTI.info512.RC:$src2, i32u8imm:$src3), OpStr,
377                                "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
378                                (VTI.info512.VT (OpNode (VTI.info512.VT VTI.info512.RC:$src1),
379                                                        (VTI.info512.VT VTI.info512.RC:$src2),
380                                                        (i32 timm:$src3)))>,
381                                EVEX, VVVV, EVEX_B, EVEX_V512, Sched<[WriteFMAX]>;
382    let hasEVEX_U = 1 in
383    defm Z256rrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info256, (outs VTI.info256.RC:$dst),
384                                (ins VTI.info256.RC:$src1, VTI.info256.RC:$src2, i32u8imm:$src3), OpStr,
385                                "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
386                                (VTI.info256.VT (OpNode (VTI.info256.VT VTI.info256.RC:$src1),
387                                                        (VTI.info256.VT VTI.info256.RC:$src2),
388                                                        (i32 timm:$src3)))>,
389                                EVEX, VVVV, EVEX_B, EVEX_V256, Sched<[WriteFMAX]>;
390  }
391}
392
393multiclass avx10_minmax_packed<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> {
394  let Predicates = [HasAVX10_2_512] in
395    defm Z    :   avx10_minmax_packed_base<OpStr, VTI.info512, OpNode>, EVEX_V512;
396  let Predicates = [HasAVX10_2] in {
397    defm Z256 :   avx10_minmax_packed_base<OpStr, VTI.info256, OpNode>, EVEX_V256;
398    defm Z128 :   avx10_minmax_packed_base<OpStr, VTI.info128, OpNode>, EVEX_V128;
399  }
400}
401
402multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
403                                SDNode OpNodeSAE> {
404  let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in {
405    let mayRaiseFPException = 1 in {
406      let isCodeGenOnly = 1 in {
407        def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst),
408                            (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
409                             !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
410                             [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>,
411                       Sched<[WriteFMAX]>;
412
413        def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst),
414                            (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
415                             !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
416                             [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2),
417                                                       (i32 timm:$src3)))]>,
418                       Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
419      }
420      defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
421                                 (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
422                                  OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
423                                  (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
424                                                (i32 timm:$src3))),
425                                 0, 0, 0, vselect_mask, "", "_Int">,
426                       Sched<[WriteFMAX]>;
427
428      defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
429                                 (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
430                                  OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
431                                  (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
432                                                (i32 timm:$src3))),
433                                 0, 0, 0, vselect_mask, "", "_Int">,
434                       Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
435    }
436    let Uses = []<Register>, mayRaiseFPException = 0 in
437      defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
438                                  (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
439                                   OpStr, "$src3, {sae}, $src2, $src1",
440                                   "$src1, $src2, {sae}, $src3",
441                                   (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
442                                                    (i32 timm:$src3))),
443                                  0, 0, 0, vselect_mask, "", "_Int">,
444                       Sched<[WriteFMAX]>, EVEX_B;
445  }
446}
447
448
449let mayRaiseFPException = 0 in
450defm VMINMAXBF16 : avx10_minmax_packed<"vminmaxbf16", avx512vl_bf16_info, X86vminmax>,
451                   AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA;
452
453defm VMINMAXPD : avx10_minmax_packed<"vminmaxpd", avx512vl_f64_info, X86vminmax>,
454                 avx10_minmax_packed_sae<"vminmaxpd", avx512vl_f64_info, X86vminmaxSae>,
455                 AVX512PDIi8Base, REX_W, TA, EVEX_CD8<64, CD8VF>;
456
457defm VMINMAXPH : avx10_minmax_packed<"vminmaxph", avx512vl_f16_info, X86vminmax>,
458                 avx10_minmax_packed_sae<"vminmaxph", avx512vl_f16_info, X86vminmaxSae>,
459                 AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>;
460
461defm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>,
462                 avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>,
463                 AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>;
464
465defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>,
466                 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
467defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>,
468                 AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA;
469defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>,
470                 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
471
472//-------------------------------------------------
473// AVX10 SATCVT instructions
474//-------------------------------------------------
475
476multiclass avx10_sat_cvt_rmb<bits<8> Opc, string OpStr, X86FoldableSchedWrite sched,
477                             X86VectorVTInfo DestInfo,
478                             X86VectorVTInfo SrcInfo,
479                             SDNode MaskNode> {
480  defm rr: AVX512_maskable<Opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
481                           (ins SrcInfo.RC:$src), OpStr, "$src", "$src",
482                           (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src)))>,
483                          Sched<[sched]>;
484  defm rm: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
485                           (ins SrcInfo.MemOp:$src), OpStr, "$src", "$src",
486                           (DestInfo.VT (MaskNode (SrcInfo.VT
487                           (SrcInfo.LdFrag addr:$src))))>,
488                          Sched<[sched.Folded, sched.ReadAfterFold]>;
489  defm rmb: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
490                            (ins SrcInfo.ScalarMemOp:$src), OpStr,
491                            "${src}"#SrcInfo.BroadcastStr, "${src}"#SrcInfo.BroadcastStr,
492                            (DestInfo.VT (MaskNode (SrcInfo.VT
493                            (SrcInfo.BroadcastLdFrag addr:$src))))>, EVEX_B,
494                            Sched<[sched.Folded, sched.ReadAfterFold]>;
495}
496
497// Conversion with rounding control (RC)
498multiclass avx10_sat_cvt_rc<bits<8> Opc, string OpStr, X86SchedWriteWidths sched,
499                            AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo,
500                            SDNode MaskNode> {
501  let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in
502  defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512,
503                              (outs DestInfo.info512.RC:$dst),
504                              (ins SrcInfo.info512.RC:$src, AVX512RC:$rc),
505                              OpStr, "$rc, $src", "$src, $rc",
506                              (DestInfo.info512.VT
507                                (MaskNode (SrcInfo.info512.VT SrcInfo.info512.RC:$src),
508                                          (i32 timm:$rc)))>,
509                             Sched<[sched.ZMM]>, EVEX, EVEX_RC, EVEX_B;
510  let Predicates = [HasAVX10_2], hasEVEX_U = 1 in {
511  defm Z256rrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info256,
512                                (outs DestInfo.info256.RC:$dst),
513                                (ins SrcInfo.info256.RC:$src, AVX512RC:$rc),
514                                OpStr, "$rc, $src", "$src, $rc",
515                                (DestInfo.info256.VT
516                                  (MaskNode (SrcInfo.info256.VT SrcInfo.info256.RC:$src),
517                                            (i32 timm:$rc)))>,
518                               Sched<[sched.YMM]>, EVEX, EVEX_RC, EVEX_B;
519  }
520}
521
522// Conversion with SAE
523multiclass avx10_sat_cvt_sae<bits<8> Opc, string OpStr, X86SchedWriteWidths sched,
524                             AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo,
525                             SDNode Node> {
526  let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in
527  defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512,
528                             (outs DestInfo.info512.RC:$dst),
529                             (ins SrcInfo.info512.RC:$src),
530                             OpStr, "{sae}, $src", "$src, {sae}",
531                             (DestInfo.info512.VT
532                               (Node (SrcInfo.info512.VT SrcInfo.info512.RC:$src)))>,
533                             Sched<[sched.ZMM]>, EVEX, EVEX_B;
534  let Predicates = [HasAVX10_2], hasEVEX_U = 1 in {
535  defm Z256rrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info256,
536                                (outs DestInfo.info256.RC:$dst),
537                                (ins SrcInfo.info256.RC:$src),
538                                OpStr, "{sae}, $src", "$src, {sae}",
539                                (DestInfo.info256.VT
540                                  (Node (SrcInfo.info256.VT SrcInfo.info256.RC:$src)))>,
541                                Sched<[sched.YMM]>, EVEX, EVEX_B;
542  }
543}
544
545multiclass avx10_sat_cvt_base<bits<8> Opc, string OpStr, X86SchedWriteWidths sched,
546                               SDNode MaskNode, AVX512VLVectorVTInfo DestInfo,
547                               AVX512VLVectorVTInfo SrcInfo> {
548  let Predicates = [HasAVX10_2_512] in
549  defm Z : avx10_sat_cvt_rmb<Opc, OpStr, sched.ZMM,
550           DestInfo.info512, SrcInfo.info512,
551           MaskNode>,
552      EVEX, EVEX_V512;
553  let Predicates = [HasAVX10_2] in {
554    defm Z256
555        : avx10_sat_cvt_rmb<Opc, OpStr, sched.YMM,
556           DestInfo.info256, SrcInfo.info256,
557          MaskNode>,
558          EVEX, EVEX_V256;
559    defm Z128
560        : avx10_sat_cvt_rmb<Opc, OpStr, sched.XMM,
561          DestInfo.info128, SrcInfo.info128,
562          MaskNode>,
563          EVEX, EVEX_V128;
564  }
565}
566
567defm VCVTBF162IBS : avx10_sat_cvt_base<0x69, "vcvtbf162ibs",
568                                       SchedWriteVecIMul, X86vcvtp2ibs,
569                                       avx512vl_i16_info, avx512vl_bf16_info>,
570                      AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
571defm VCVTBF162IUBS : avx10_sat_cvt_base<0x6b, "vcvtbf162iubs",
572                                        SchedWriteVecIMul, X86vcvtp2iubs,
573                                        avx512vl_i16_info, avx512vl_bf16_info>,
574                       AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
575
576defm VCVTPH2IBS : avx10_sat_cvt_base<0x69, "vcvtph2ibs", SchedWriteVecIMul,
577                                     X86vcvtp2ibs, avx512vl_i16_info,
578                                     avx512vl_f16_info>,
579                  avx10_sat_cvt_rc<0x69, "vcvtph2ibs", SchedWriteVecIMul,
580                                   avx512vl_i16_info, avx512vl_f16_info,
581                                   X86vcvtp2ibsRnd>,
582                  AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
583defm VCVTPH2IUBS : avx10_sat_cvt_base<0x6b, "vcvtph2iubs", SchedWriteVecIMul,
584                                      X86vcvtp2iubs, avx512vl_i16_info,
585                                      avx512vl_f16_info>,
586                   avx10_sat_cvt_rc<0x6b, "vcvtph2iubs", SchedWriteVecIMul,
587                                    avx512vl_i16_info, avx512vl_f16_info,
588                                    X86vcvtp2iubsRnd>,
589                   AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
590
591defm VCVTPS2IBS : avx10_sat_cvt_base<0x69, "vcvtps2ibs", SchedWriteVecIMul,
592                                     X86vcvtp2ibs, avx512vl_i32_info,
593                                     avx512vl_f32_info>,
594                  avx10_sat_cvt_rc<0x69, "vcvtps2ibs", SchedWriteVecIMul,
595                                   avx512vl_i32_info, avx512vl_f32_info,
596                                   X86vcvtp2ibsRnd>,
597                  AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
598defm VCVTPS2IUBS : avx10_sat_cvt_base<0x6b, "vcvtps2iubs", SchedWriteVecIMul,
599                                      X86vcvtp2iubs, avx512vl_i32_info,
600                                      avx512vl_f32_info>,
601                   avx10_sat_cvt_rc<0x6b, "vcvtps2iubs", SchedWriteVecIMul,
602                                    avx512vl_i32_info, avx512vl_f32_info,
603                                    X86vcvtp2iubsRnd>,
604                   AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
605
606defm VCVTTBF162IBS : avx10_sat_cvt_base<0x68, "vcvttbf162ibs",
607                                        SchedWriteVecIMul, X86vcvttp2ibs,
608                                        avx512vl_i16_info, avx512vl_bf16_info>,
609                       AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
610defm VCVTTBF162IUBS : avx10_sat_cvt_base<0x6a, "vcvttbf162iubs",
611                                         SchedWriteVecIMul, X86vcvttp2iubs,
612                                         avx512vl_i16_info, avx512vl_bf16_info>,
613                        AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
614
615defm VCVTTPH2IBS : avx10_sat_cvt_base<0x68, "vcvttph2ibs", SchedWriteVecIMul,
616                                      X86vcvttp2ibs, avx512vl_i16_info,
617                                      avx512vl_f16_info>,
618                   avx10_sat_cvt_sae<0x68, "vcvttph2ibs", SchedWriteVecIMul,
619                                     avx512vl_i16_info, avx512vl_f16_info,
620                                     X86vcvttp2ibsSAE>,
621                   AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
622defm VCVTTPH2IUBS : avx10_sat_cvt_base<0x6a, "vcvttph2iubs", SchedWriteVecIMul,
623                                       X86vcvttp2iubs, avx512vl_i16_info,
624                                       avx512vl_f16_info>,
625                    avx10_sat_cvt_sae<0x6a, "vcvttph2iubs", SchedWriteVecIMul,
626                                      avx512vl_i16_info, avx512vl_f16_info,
627                                      X86vcvttp2iubsSAE>,
628                    AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
629
630defm VCVTTPS2IBS : avx10_sat_cvt_base<0x68, "vcvttps2ibs", SchedWriteVecIMul,
631                                      X86vcvttp2ibs, avx512vl_i32_info,
632                                      avx512vl_f32_info>,
633                   avx10_sat_cvt_sae<0x68, "vcvttps2ibs", SchedWriteVecIMul,
634                                     avx512vl_i32_info, avx512vl_f32_info,
635                                     X86vcvttp2ibsSAE>,
636                   AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
637defm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul,
638                                       X86vcvttp2iubs, avx512vl_i32_info,
639                                       avx512vl_f32_info>,
640                    avx10_sat_cvt_sae<0x6a, "vcvttps2iubs", SchedWriteVecIMul,
641                                      avx512vl_i32_info, avx512vl_f32_info,
642                                      X86vcvttp2iubsSAE>,
643                    AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
644
645//-------------------------------------------------
646// AVX10 SATCVT-DS instructions
647//-------------------------------------------------
648
649// Convert Double to Signed/Unsigned Doubleword with truncation.
650multiclass avx10_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
651                            SDNode MaskOpNode, SDNode OpNodeSAE,
652                            X86SchedWriteWidths sched> {
653  let Predicates = [HasAVX10_2_512] in {
654    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
655                            MaskOpNode, sched.ZMM>,
656             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
657                               OpNodeSAE, sched.ZMM>, EVEX_V512;
658  }
659  let Predicates = [HasAVX10_2] in {
660    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
661                              null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
662                              f128mem, VK2WM>, EVEX_V128;
663    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
664                              MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
665  }
666
667  let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
668    defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNodeSAE,
669                                  sched.YMM>, EVEX_V256;
670  }
671
672
673  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
674                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
675                   VR128X:$src), 0, "att">;
676  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
677                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
678                   VK2WM:$mask, VR128X:$src), 0, "att">;
679  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
680                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
681                   VK2WM:$mask, VR128X:$src), 0, "att">;
682  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
683                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
684                   f64mem:$src), 0, "att">;
685  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
686                  "$dst {${mask}}, ${src}{1to2}}",
687                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
688                   VK2WM:$mask, f64mem:$src), 0, "att">;
689  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
690                  "$dst {${mask}} {z}, ${src}{1to2}}",
691                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
692                   VK2WM:$mask, f64mem:$src), 0, "att">;
693
694  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
695                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
696                   VR256X:$src), 0, "att">;
697  def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst|$dst, $src {sae}}",
698                  (!cast<Instruction>(NAME # "Z256rrb") VR128X:$dst,
699                   VR256X:$src), 0, "att">;
700  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
701                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
702                   VK4WM:$mask, VR256X:$src), 0, "att">;
703  def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}}|$dst {${mask}}, $src {sae}}",
704                  (!cast<Instruction>(NAME # "Z256rrbk") VR128X:$dst,
705                   VK4WM:$mask, VR256X:$src), 0, "att">;
706  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
707                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
708                   VK4WM:$mask, VR256X:$src), 0, "att">;
709  def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src {sae}}",
710                  (!cast<Instruction>(NAME # "Z256rrbkz") VR128X:$dst,
711                   VK4WM:$mask, VR256X:$src), 0, "att">;
712  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
713                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
714                   f64mem:$src), 0, "att">;
715  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
716                  "$dst {${mask}}, ${src}{1to4}}",
717                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
718                   VK4WM:$mask, f64mem:$src), 0, "att">;
719  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
720                  "$dst {${mask}} {z}, ${src}{1to4}}",
721                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
722                   VK4WM:$mask, f64mem:$src), 0, "att">;
723}
724
725// Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled
726multiclass avx10_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
727                            SDNode MaskOpNode, SDNode OpNodeRnd,
728                            X86SchedWriteWidths sched> {
729   let Predicates = [HasAVX10_2_512] in {
730     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
731                            MaskOpNode, sched.ZMM>,
732              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
733                                OpNodeRnd, sched.ZMM>, EVEX_V512;
734   }
735   let Predicates = [HasAVX10_2] in {
736     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
737                               MaskOpNode, sched.XMM>, EVEX_V128;
738     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
739                               MaskOpNode, sched.YMM>, EVEX_V256;
740   }
741   let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
742     defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f64x_info,
743                                   OpNodeRnd, sched.YMM>, EVEX_V256;
744   }
745}
746
747// Convert Float to Signed/Unsigned Quardword with truncation
748multiclass avx10_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
749                            SDNode MaskOpNode, SDNode OpNodeRnd,
750                            X86SchedWriteWidths sched> {
751  let Predicates = [HasAVX10_2_512] in {
752    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
753                           MaskOpNode, sched.ZMM>,
754             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
755                               OpNodeRnd, sched.ZMM>, EVEX_V512;
756  }
757  let Predicates = [HasAVX10_2] in {
758    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
759                              MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
760                              (v2i64 (OpNode (bc_v4f32 (v2f64
761                               (scalar_to_vector (loadf64 addr:$src)))))),
762                              (v2i64 (MaskOpNode (bc_v4f32 (v2f64
763                               (scalar_to_vector (loadf64 addr:$src))))))>,
764                              EVEX_V128;
765    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
766                              MaskOpNode, sched.YMM>, EVEX_V256;
767  }
768
769  let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
770    defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNodeRnd,
771                                  sched.YMM>, EVEX_V256;
772  }
773}
774
775// Convert Float to Signed/Unsigned Doubleword with truncation
776multiclass avx10_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
777                            SDNode MaskOpNode,
778                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
779  let Predicates = [HasAVX10_2_512] in {
780    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
781                           MaskOpNode, sched.ZMM>,
782             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
783                                OpNodeSAE, sched.ZMM>, EVEX_V512;
784  }
785
786  let Predicates = [HasAVX10_2] in {
787    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
788                              MaskOpNode, sched.XMM>, EVEX_V128;
789    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
790                              MaskOpNode, sched.YMM>, EVEX_V256;
791  }
792
793  let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
794    defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f32x_info,
795                                  OpNodeSAE, sched.YMM>, EVEX_V256;
796  }
797}
798
799defm VCVTTPD2DQS : avx10_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis,
800                                    X86cvttp2sis, X86cvttp2sisSAE,
801                                    SchedWriteCvtPD2DQ>,
802                                    PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
803defm VCVTTPD2UDQS : avx10_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis,
804                                     X86cvttp2uis, X86cvttp2uisSAE,
805                                     SchedWriteCvtPD2DQ>,
806                                     REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
807defm VCVTTPS2DQS : avx10_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis,
808                                    X86cvttp2sis, X86cvttp2sisSAE,
809                                    SchedWriteCvtPS2DQ>, T_MAP5,PS,
810                                    EVEX_CD8<32, CD8VF>;
811defm VCVTTPS2UDQS : avx10_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis,
812                                     X86cvttp2uis, X86cvttp2uisSAE,
813                                     SchedWriteCvtPS2DQ>, T_MAP5,PS,
814                                     EVEX_CD8<32, CD8VF>;
815defm VCVTTPD2QQS : avx10_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis,
816                                    X86cvttp2sis, X86cvttp2sisSAE,
817                                    SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
818                                    EVEX_CD8<64, CD8VF>;
819defm VCVTTPS2QQS : avx10_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis,
820                                    X86cvttp2sis, X86cvttp2sisSAE,
821                                    SchedWriteCvtPS2DQ>, T_MAP5,PD,
822                                    EVEX_CD8<32, CD8VH>;
823defm VCVTTPD2UQQS : avx10_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis,
824                                     X86cvttp2uis, X86cvttp2uisSAE,
825                                     SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
826                                     EVEX_CD8<64, CD8VF>;
827defm VCVTTPS2UQQS : avx10_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis,
828                                     X86cvttp2uis, X86cvttp2uisSAE,
829                                     SchedWriteCvtPS2DQ>, T_MAP5,PD,
830                                     EVEX_CD8<32, CD8VH>;
831
832let Predicates = [HasAVX10_2] in {
833// Special patterns to allow use of X86mcvttp2si for masking. Instruction
834// patterns have been disabled with null_frag.
835// Patterns VCVTTPD2DQSZ128
836
837// VCVTTPD2DQS
838def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))),
839          (VCVTTPD2DQSZ128rr VR128X:$src)>;
840def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
841          (VCVTTPD2DQSZ256rr VR256X:$src)>;
842def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
843          (VCVTTPD2DQSZrr VR512:$src)>;
844
845// VCVTTPD2QQS
846def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
847          (VCVTTPD2QQSZ128rr VR128X:$src)>;
848def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
849          (VCVTTPD2QQSZ256rr VR256X:$src)>;
850def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
851          (VCVTTPD2QQSZrr VR512:$src)>;
852
853// VCVTTPD2UDQS
854def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))),
855          (VCVTTPD2UDQSZ128rr VR128X:$src)>;
856def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
857          (VCVTTPD2UDQSZ256rr VR256X:$src)>;
858def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
859          (VCVTTPD2UDQSZrr VR512:$src)>;
860
861// VCVTTPD2UQQS
862def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
863          (VCVTTPD2UQQSZ128rr VR128X:$src)>;
864def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
865          (VCVTTPD2UQQSZ256rr VR256X:$src)>;
866def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
867          (VCVTTPD2UQQSZrr VR512:$src)>;
868
869// VCVTTPS2DQS
870def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
871          (VCVTTPS2DQSZ128rr VR128X:$src)>;
872def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
873          (VCVTTPS2DQSZ256rr VR256X:$src)>;
874def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
875          (VCVTTPS2DQSZrr VR512:$src)>;
876
877// VCVTTPS2QQS
878def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))),
879          (VCVTTPS2QQSZ128rr VR128X:$src)>;
880def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
881          (VCVTTPS2QQSZ256rr VR128X:$src)>;
882def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
883          (VCVTTPS2QQSZrr VR256X:$src)>;
884
885// VCVTTPS2UDQS
886def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
887          (VCVTTPS2UDQSZ128rr VR128X:$src)>;
888def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
889          (VCVTTPS2UDQSZ256rr VR256X:$src)>;
890def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
891          (VCVTTPS2UDQSZrr VR512:$src)>;
892
893// VCVTTPS2UQQS
894def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))),
895          (VCVTTPS2UQQSZ128rr VR128X:$src)>;
896def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
897          (VCVTTPS2UQQSZ256rr VR128X:$src)>;
898def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
899          (VCVTTPS2UQQSZrr VR256X:$src)>;
900
901def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
902          (VCVTTPD2DQSZ128rr VR128X:$src)>;
903def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
904          (VCVTTPD2DQSZ128rm addr:$src)>;
905def : Pat<(v4i32 (X86cvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)))),
906          (VCVTTPD2DQSZ128rmb addr:$src)>;
907def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
908           VK2WM:$mask),
909          (VCVTTPD2DQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
910def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
911           VK2WM:$mask),
912          (VCVTTPD2DQSZ128rrkz VK2WM:$mask, VR128X:$src)>;
913def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
914           VK2WM:$mask),
915          (VCVTTPD2DQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
916def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
917           VK2WM:$mask),
918          (VCVTTPD2DQSZ128rmkz VK2WM:$mask, addr:$src)>;
919def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)),
920           (v4i32 VR128X:$src0), VK2WM:$mask),
921          (VCVTTPD2DQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
922def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)),
923           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
924          (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>;
925
926// Patterns VCVTTPD2UDQSZ128
927def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))),
928          (VCVTTPD2UDQSZ128rmb addr:$src)>;
929def : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))),
930          (VCVTTPD2UDQSZ128rr VR128X:$src)>;
931def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))),
932          (VCVTTPD2UDQSZ128rmb addr:$src)>;
933def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
934           VK2WM:$mask),
935          (VCVTTPD2UDQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
936def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
937           VK2WM:$mask),
938          (VCVTTPD2UDQSZ128rrkz VK2WM:$mask, VR128X:$src)>;
939def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
940           VK2WM:$mask),
941          (VCVTTPD2UDQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
942def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
943           VK2WM:$mask),
944          (VCVTTPD2UDQSZ128rmkz VK2WM:$mask, addr:$src)>;
945def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)),
946           (v4i32 VR128X:$src0), VK2WM:$mask),
947          (VCVTTPD2UDQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
948def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)),
949           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
950          (VCVTTPD2UDQSZ128rmbkz VK2WM:$mask, addr:$src)>;
951}
952
953// Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation.
954multiclass avx10_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
955                          X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
956                          SDNode OpNodeInt, SDNode OpNodeSAE,
957                          X86FoldableSchedWrite sched> {
958  let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in {
959    let isCodeGenOnly = 1 in {
960    def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
961                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
962                      [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src, _DstRC.EltVT))]>,
963                    EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
964    def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
965                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
966                      [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src), _DstRC.EltVT))]>,
967                    EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
968  }
969  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
970                      !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
971                        [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
972                      EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
973  let Uses = [MXCSR] in
974  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
975                       !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
976                        [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
977                      EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
978  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
979                      (ins _SrcRC.IntScalarMemOp:$src),
980                      !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
981                        [(set _DstRC.RC:$dst,
982                          (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
983                      EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>,
984                      SIMD_EXC;
985  }
986}
987
988defm VCVTTSS2SIS: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info,
989                                 fp_to_sint_sat, X86cvttss2Int,
990                                 X86cvttss2IntSAE, WriteCvtSS2I>,
991                                 T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
992defm VCVTTSS2SI64S: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info,
993                                   fp_to_sint_sat, X86cvttss2Int,
994                                   X86cvttss2IntSAE, WriteCvtSS2I>,
995                                   REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
996defm VCVTTSD2SIS: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info,
997                                 fp_to_sint_sat, X86cvttss2Int,
998                                 X86cvttss2IntSAE, WriteCvtSD2I>,
999                                 T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
1000defm VCVTTSD2SI64S: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info,
1001                                   fp_to_sint_sat, X86cvttss2Int,
1002                                   X86cvttss2IntSAE, WriteCvtSD2I>,
1003                                   REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
1004defm VCVTTSS2USIS: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info,
1005                                  fp_to_uint_sat, X86cvttss2UInt,
1006                                  X86cvttss2UIntSAE, WriteCvtSS2I>,
1007                                  T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
1008defm VCVTTSS2USI64S: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info,
1009                                    fp_to_uint_sat, X86cvttss2UInt,
1010                                     X86cvttss2UIntSAE, WriteCvtSS2I>,
1011                                    T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>;
1012defm VCVTTSD2USIS: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info,
1013                                  fp_to_uint_sat, X86cvttss2UInt,
1014                                  X86cvttss2UIntSAE, WriteCvtSD2I>,
1015                                  T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
1016defm VCVTTSD2USI64S: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info,
1017                                    fp_to_uint_sat, X86cvttss2UInt,
1018                                    X86cvttss2UIntSAE, WriteCvtSD2I>,
1019                                    T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>;
1020
1021//-------------------------------------------------
1022// AVX10 CONVERT instructions
1023//-------------------------------------------------
1024
1025multiclass avx10_cvt2ps2ph_rc<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
1026                              X86VectorVTInfo _Src, X86VectorVTInfo _,
1027                              SDNode OpNodeRnd> {
1028  let Uses = [MXCSR] in
1029    defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1030                               (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
1031                               "$rc, $src2, $src1", "$src1, $src2, $rc",
1032                               (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
1033                                                (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
1034                              EVEX, VVVV, EVEX_B, EVEX_RC, PD, Sched<[sched]>;
1035}
1036
1037//TODO: Merge into avx512_binop_all, difference is rounding control added here.
1038multiclass avx10_cvt2ps2ph<bits<8> opc, string OpcodeStr,
1039                           X86SchedWriteWidths sched,
1040                           AVX512VLVectorVTInfo _SrcVTInfo,
1041                           AVX512VLVectorVTInfo _DstVTInfo,
1042                           SDNode OpNode, SDNode OpNodeRnd> {
1043  let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in {
1044    defm Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
1045                              _SrcVTInfo.info512, _DstVTInfo.info512,
1046                              _SrcVTInfo.info512>,
1047             avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.ZMM,
1048                                _SrcVTInfo.info512, _DstVTInfo.info512,
1049                                OpNodeRnd>,
1050             EVEX_V512, EVEX_CD8<32, CD8VF>;
1051  }
1052  let Predicates = [HasAVX10_2] in {
1053    defm Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
1054                                 _SrcVTInfo.info256, _DstVTInfo.info256,
1055                                 _SrcVTInfo.info256>,
1056                                EVEX_V256, EVEX_CD8<32, CD8VF>;
1057    defm Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
1058                                 _SrcVTInfo.info128, _DstVTInfo.info128,
1059                                 _SrcVTInfo.info128>,
1060                EVEX_V128, EVEX_CD8<32, CD8VF>;
1061  }
1062
1063  let Predicates = [HasAVX10_2], hasEVEX_U = 1 in {
1064    defm Z256 : avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.YMM,
1065                                   _SrcVTInfo.info256, _DstVTInfo.info256,
1066                                   OpNodeRnd>;
1067  }
1068}
1069
1070defm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx",
1071                                   SchedWriteCvtPD2PS,
1072                                   avx512vl_f32_info, avx512vl_f16_info,
1073                                   X86vfpround2, X86vfpround2Rnd>, T8;
1074
1075defm VCVT2PH2BF8 : avx512_binop_all<0x74, "vcvt2ph2bf8", SchedWriteCvtPD2PS,
1076                                     avx512vl_f16_info, avx512vl_i8_info,
1077                                     X86vcvt2ph2bf8, [HasAVX10_2_512], [HasAVX10_2]>,
1078                                    EVEX_CD8<16, CD8VF>, T8, XD;
1079defm VCVT2PH2BF8S : avx512_binop_all<0x74, "vcvt2ph2bf8s", SchedWriteCvtPD2PS,
1080                                      avx512vl_f16_info, avx512vl_i8_info,
1081                                      X86vcvt2ph2bf8s, [HasAVX10_2_512], [HasAVX10_2]>,
1082                                     EVEX_CD8<16, CD8VF>, T_MAP5, XD;
1083defm VCVT2PH2HF8 : avx512_binop_all<0x18, "vcvt2ph2hf8", SchedWriteCvtPD2PS,
1084                                     avx512vl_f16_info, avx512vl_i8_info,
1085                                     X86vcvt2ph2hf8, [HasAVX10_2_512], [HasAVX10_2]>,
1086                                    EVEX_CD8<16, CD8VF>, T_MAP5, XD;
1087defm VCVT2PH2HF8S : avx512_binop_all<0x1b, "vcvt2ph2hf8s", SchedWriteCvtPD2PS,
1088                                      avx512vl_f16_info, avx512vl_i8_info,
1089                                      X86vcvt2ph2hf8s, [HasAVX10_2_512], [HasAVX10_2]>,
1090                                     EVEX_CD8<16, CD8VF>, T_MAP5, XD;
1091
1092//TODO: Merge into avx512_vcvt_fp, diffrence is one more source register here.
1093multiclass avx10_convert_3op_packed<bits<8> OpCode, string OpcodeStr,
1094                                    X86VectorVTInfo vt_dst, X86VectorVTInfo vt_src1,
1095                                    X86VectorVTInfo vt_src2, SDPatternOperator OpNode,
1096                                    SDPatternOperator MaskOpNode, X86FoldableSchedWrite sched,
1097                                    string Broadcast = vt_src2.BroadcastStr,
1098                                    X86MemOperand MemOp = vt_src2.MemOp,
1099                                    RegisterClass MaskRC = vt_src2.KRCWM,
1100                                    dag LdDAG = (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1),
1101                                                           (vt_src2.VT (vt_src2.LdFrag addr:$src2)))),
1102                                    dag MaskLdDAG = (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1),
1103                                                               (vt_src2.VT (vt_src2.LdFrag addr:$src2))))> {
1104  defm rr : AVX512_maskable_cvt<OpCode, MRMSrcReg, vt_dst, (outs vt_dst.RC:$dst),
1105                      (ins vt_src1.RC:$src1, vt_src2.RC:$src2),
1106                      (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2),
1107                      (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2),
1108                      OpcodeStr, "$src2, $src1", "$src1, $src2",
1109                      (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1),
1110                                         (vt_src2.VT vt_src2.RC:$src2))),
1111                      (vselect_mask MaskRC:$mask,
1112                        (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1),
1113                        (vt_src2.VT vt_src2.RC:$src2))),
1114                        vt_dst.RC:$src0),
1115                      (vselect_mask MaskRC:$mask,
1116                        (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1),
1117                        (vt_src2.VT vt_src2.RC:$src2))),
1118                        vt_dst.ImmAllZerosV)>,
1119                      EVEX, VVVV, Sched<[sched]>;
1120  let mayLoad = 1 in
1121  defm rm : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst),
1122                      (ins vt_src1.RC:$src1, MemOp:$src2),
1123                      (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2),
1124                      (ins MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2),
1125                      OpcodeStr, "$src2, $src1", "$src1, $src2",
1126                      LdDAG,
1127                      (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.RC:$src0),
1128                      (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.ImmAllZerosV)>,
1129                      EVEX, VVVV, Sched<[sched]>;
1130
1131  let mayLoad = 1 in
1132  defm rmb : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst),
1133                      (ins vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2),
1134                      (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1,
1135                           vt_src2.ScalarMemOp:$src2),
1136                      (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2),
1137                      OpcodeStr,
1138                      "${src2}"#Broadcast#", $src1", "$src1, ${src2}"#Broadcast,
1139                      (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT
1140                                  (vt_src2.BroadcastLdFrag addr:$src2)))),
1141                      (vselect_mask MaskRC:$mask,
1142                                       (vt_dst.VT
1143                                        (MaskOpNode
1144                                         (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT
1145                                          (vt_src2.BroadcastLdFrag addr:$src2)))),
1146                                       vt_dst.RC:$src0),
1147                      (vselect_mask MaskRC:$mask,
1148                                       (vt_dst.VT
1149                                        (MaskOpNode
1150                                         (vt_src1.VT vt_src1.RC:$src1),
1151                                         (vt_src2.VT
1152                                          (vt_src2.BroadcastLdFrag addr:$src2)))),
1153                                       vt_dst.ImmAllZerosV)>,
1154                      EVEX, VVVV, EVEX_B, Sched<[sched]>;
1155}
1156
1157//TODO: Merge into avx512_cvt_trunc
1158multiclass avx10_convert_3op<bits<8> OpCode, string OpcodeStr,
1159           AVX512VLVectorVTInfo vt_dst, AVX512VLVectorVTInfo vt_src,
1160           X86SchedWriteWidths sched,
1161           SDPatternOperator OpNode,
1162           SDPatternOperator MaskOpNode,
1163           PatFrag bcast128 = vt_src.info128.BroadcastLdFrag,
1164           PatFrag loadVT128 = vt_src.info128.LdFrag,
1165           RegisterClass maskRC128 = vt_src.info128.KRCWM> {
1166  let Predicates = [HasAVX10_2_512] in
1167    defm Z : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info256,
1168               vt_dst.info512, vt_src.info512, OpNode, OpNode, sched.ZMM>,
1169               EVEX_V512, EVEX_CD8<16, CD8VF>;
1170  let Predicates = [HasAVX10_2] in {
1171    defm Z256 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128,
1172                  vt_dst.info256, vt_src.info256, OpNode, OpNode, sched.YMM>,
1173                  EVEX_V256, EVEX_CD8<16, CD8VF>;
1174    defm Z128 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128,
1175                  vt_dst.info128, vt_src.info128,
1176                  null_frag, null_frag, sched.XMM>,
1177                  EVEX_V128, EVEX_CD8<16, CD8VF>;
1178    // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction
1179    // patterns have been disabled with null_frag.
1180    def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1),
1181                                         (vt_src.info128.VT VR128X:$src2))),
1182              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src1, VR128X:$src2)>;
1183    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
1184                          (vt_src.info128.VT VR128X:$src2),
1185                          (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask),
1186              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask,
1187                          VR128X:$src1, VR128X:$src2)>;
1188    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
1189                          (vt_src.info128.VT VR128X:$src2),
1190                          vt_dst.info128.ImmAllZerosV, maskRC128:$mask),
1191              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask,
1192                          VR128X:$src1, VR128X:$src2)>;
1193
1194    def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1),
1195                                         (loadVT128 addr:$src2))),
1196              (!cast<Instruction>(NAME # "Z128rm") VR128X:$src1, addr:$src2)>;
1197    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
1198                          (loadVT128 addr:$src2),
1199                          (vt_dst.info128.VT VR128X:$src0),
1200                          maskRC128:$mask),
1201              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask,
1202                          VR128X:$src1, addr:$src2)>;
1203    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
1204                          (loadVT128 addr:$src2),
1205                          vt_dst.info128.ImmAllZerosV,
1206                          maskRC128:$mask),
1207              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask,
1208                          VR128X:$src1, addr:$src2)>;
1209
1210    def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1),
1211                                         (vt_src.info128.VT (bcast128 addr:$src2)))),
1212              (!cast<Instruction>(NAME # "Z128rmb") VR128X:$src1, addr:$src2)>;
1213    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
1214                          (vt_src.info128.VT (bcast128 addr:$src2)),
1215                          (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask),
1216              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask,
1217                           VR128X:$src1, addr:$src2)>;
1218    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
1219                          (vt_src.info128.VT (bcast128 addr:$src2)),
1220                          vt_dst.info128.ImmAllZerosV, maskRC128:$mask),
1221              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask,
1222                           VR128X:$src1, addr:$src2)>;
1223  }
1224}
1225
1226defm VCVTBIASPH2BF8 : avx10_convert_3op<0x74, "vcvtbiasph2bf8",
1227                                        avx512vl_i8_info, avx512vl_f16_info,
1228                                        SchedWriteCvtPD2PS,
1229                                        X86vcvtbiasph2bf8, X86vmcvtbiasph2bf8>,
1230                                        T8, PS;
1231defm VCVTBIASPH2BF8S : avx10_convert_3op<0x74, "vcvtbiasph2bf8s",
1232                                         avx512vl_i8_info, avx512vl_f16_info,
1233                                         SchedWriteCvtPD2PS,
1234                                         X86vcvtbiasph2bf8s, X86vmcvtbiasph2bf8s>,
1235                                         T_MAP5, PS;
1236defm VCVTBIASPH2HF8 : avx10_convert_3op<0x18, "vcvtbiasph2hf8",
1237                                        avx512vl_i8_info, avx512vl_f16_info,
1238                                        SchedWriteCvtPD2PS,
1239                                        X86vcvtbiasph2hf8, X86vmcvtbiasph2hf8>,
1240                                        T_MAP5, PS;
1241defm VCVTBIASPH2HF8S : avx10_convert_3op<0x1b, "vcvtbiasph2hf8s",
1242                                         avx512vl_i8_info, avx512vl_f16_info,
1243                                         SchedWriteCvtPD2PS,
1244                                         X86vcvtbiasph2hf8s, X86vmcvtbiasph2hf8s>,
1245                                         T_MAP5, PS;
1246
1247defm VCVTPH2BF8 : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8", avx512vl_i8_info,
1248                                        avx512vl_f16_info, SchedWriteCvtPD2PS,
1249                                        X86vcvtph2bf8, X86vmcvtph2bf8,
1250                                        [HasAVX10_2], [HasAVX10_2_512]>,
1251                                        T8, XS, EVEX_CD8<16, CD8VF>;
1252
1253defm VCVTPH2BF8S : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8s", avx512vl_i8_info,
1254                                         avx512vl_f16_info, SchedWriteCvtPD2PS,
1255                                         X86vcvtph2bf8s, X86vmcvtph2bf8s,
1256                                         [HasAVX10_2], [HasAVX10_2_512]>,
1257                                         T_MAP5, XS, EVEX_CD8<16, CD8VF>;
1258
1259defm VCVTPH2HF8 : avx512_cvt_trunc_ne<0x18, "vcvtph2hf8", avx512vl_i8_info,
1260                                        avx512vl_f16_info, SchedWriteCvtPD2PS,
1261                                        X86vcvtph2hf8, X86vmcvtph2hf8,
1262                                        [HasAVX10_2], [HasAVX10_2_512]>,
1263                                        T_MAP5, XS, EVEX_CD8<16, CD8VF>;
1264
1265defm VCVTPH2HF8S : avx512_cvt_trunc_ne<0x1b, "vcvtph2hf8s", avx512vl_i8_info,
1266                                         avx512vl_f16_info, SchedWriteCvtPD2PS,
1267                                         X86vcvtph2hf8s, X86vmcvtph2hf8s,
1268                                         [HasAVX10_2], [HasAVX10_2_512]>,
1269                                         T_MAP5, XS, EVEX_CD8<16, CD8VF>;
1270
1271multiclass avx10_convert_2op_nomb_packed<bits<8> opc, string OpcodeStr,
1272                                         X86VectorVTInfo _dest, X86VectorVTInfo _src,
1273                                         SDNode OpNode, X86MemOperand x86memop,
1274                                         X86FoldableSchedWrite sched,
1275                                         dag ld_dag = (load addr:$src)> {
1276  let ExeDomain = _dest.ExeDomain in {
1277  defm rr : AVX512_maskable_split<opc, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
1278                                  (ins _src.RC:$src), OpcodeStr, "$src", "$src",
1279                                  (OpNode (_src.VT _src.RC:$src)),
1280                                  (OpNode (_src.VT _src.RC:$src))>,
1281                                 Sched<[sched]>;
1282  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _dest, (outs _dest.RC:$dst),
1283                                  (ins x86memop:$src), OpcodeStr, "$src", "$src",
1284                                  (OpNode (_src.VT ld_dag)),
1285                                  (OpNode (_src.VT ld_dag))>,
1286                                 Sched<[sched.Folded]>;
1287  }
1288}
1289
1290multiclass avx10_convert_2op_nomb<string OpcodeStr, AVX512VLVectorVTInfo _dest,
1291                                  AVX512VLVectorVTInfo _src, bits<8> opc, SDNode OpNode> {
1292  let Predicates = [HasAVX10_2_512] in
1293  defm Z : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info512, _src.info256,
1294                                         OpNode, f256mem, WriteCvtPH2PSZ>, EVEX_V512;
1295  let Predicates = [HasAVX10_2] in {
1296  defm Z128 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info128, _src.info128,
1297                                            OpNode, f64mem, WriteCvtPH2PSZ>, EVEX_V128;
1298  defm Z256 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info256, _src.info128,
1299                                            OpNode, f128mem, WriteCvtPH2PSZ>, EVEX_V256;
1300  }
1301}
1302
1303defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info,
1304                                         avx512vl_i8_info, 0x1e, X86vcvthf82ph>,
1305                  AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>;
1306
1307//-------------------------------------------------
1308// AVX10 BF16 instructions
1309//-------------------------------------------------
1310
1311// VADDBF16, VSUBBF16, VMULBF16, VDIVBF16, VMAXBF16, VMINBF16
1312multiclass avx10_fp_binop_int_bf16<bits<8> opc, string OpcodeStr,
1313                                      X86SchedWriteSizes sched,
1314                                      bit IsCommutable = 0> {
1315  let Predicates = [HasAVX10_2_512] in
1316    defm Z : avx512_fp_packed<opc, OpcodeStr,
1317                              !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"),
1318                              !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"),
1319                              v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
1320                              T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1321  let Predicates = [HasAVX10_2] in {
1322    defm Z128 : avx512_fp_packed<opc, OpcodeStr,
1323                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"),
1324                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"),
1325                                 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
1326                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1327    defm Z256 : avx512_fp_packed<opc, OpcodeStr,
1328                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"),
1329                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"),
1330                                 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
1331                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1332  }
1333}
1334
1335multiclass avx10_fp_binop_bf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
1336                                X86SchedWriteSizes sched,
1337                                bit IsCommutable = 0,
1338                                SDPatternOperator MaskOpNode = OpNode> {
1339  let Predicates = [HasAVX10_2_512] in
1340    defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
1341                              v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
1342                              T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1343  let Predicates = [HasAVX10_2] in {
1344    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
1345                                 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
1346                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1347    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
1348                                 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
1349                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1350  }
1351}
1352
1353let Uses = []<Register>, mayRaiseFPException = 0 in {
1354defm VADDBF16 : avx10_fp_binop_bf16<0x58, "vadd", fadd, SchedWriteFAddSizes, 1>;
1355defm VSUBBF16 : avx10_fp_binop_bf16<0x5C, "vsub", fsub, SchedWriteFAddSizes, 0>;
1356defm VMULBF16 : avx10_fp_binop_bf16<0x59, "vmul", fmul, SchedWriteFMulSizes, 1>;
1357defm VDIVBF16 : avx10_fp_binop_bf16<0x5E, "vdiv", fdiv, SchedWriteFDivSizes, 0>;
1358defm VMINBF16 : avx10_fp_binop_int_bf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>;
1359defm VMAXBF16 : avx10_fp_binop_int_bf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
1360}
1361
1362// VCOMISBF16
1363let Uses = []<Register>, mayRaiseFPException = 0,
1364  Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
1365  //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *`
1366  //which may require extend supports on BFR16X, loadbf16, ...
1367  defm VCOMISBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
1368                                   "comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
1369                                   VEX_LIG, EVEX_CD8<16, CD8VT1>;
1370
1371  let isCodeGenOnly = 1 in {
1372    defm VCOMISBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
1373                                         sse_load_bf16, "comisbf16", SSEPackedSingle>,
1374                                         T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
1375  }
1376}
1377
1378// VCMPBF16
1379multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1380  let mayRaiseFPException = 0 in {
1381  defm rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1382                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1383                   "vcmp"#_.Suffix,
1384                   "$cc, $src2, $src1", "$src1, $src2, $cc",
1385                   (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1386                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1387                   1>, Sched<[sched]>;
1388
1389  defm rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1390                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1391                "vcmp"#_.Suffix,
1392                "$cc, $src2, $src1", "$src1, $src2, $cc",
1393                (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
1394                         timm:$cc),
1395                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
1396                            timm:$cc)>,
1397                Sched<[sched.Folded, sched.ReadAfterFold]>;
1398
1399  defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1400                (outs _.KRC:$dst),
1401                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1402                "vcmp"#_.Suffix,
1403                "$cc, ${src2}"#_.BroadcastStr#", $src1",
1404                "$src1, ${src2}"#_.BroadcastStr#", $cc",
1405                (X86cmpm (_.VT _.RC:$src1),
1406                         (_.VT (_.BroadcastLdFrag addr:$src2)),
1407                         timm:$cc),
1408                (X86cmpm_su (_.VT _.RC:$src1),
1409                            (_.VT (_.BroadcastLdFrag addr:$src2)),
1410                            timm:$cc)>,
1411                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
1412  }
1413}
1414
1415multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
1416  let Predicates = [HasAVX10_2_512] in
1417    defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512;
1418  let Predicates = [HasAVX10_2] in {
1419    defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128;
1420    defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256;
1421  }
1422}
1423
1424defm VCMPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>,
1425                                 AVX512XDIi8Base, EVEX, VVVV,
1426                                 EVEX_CD8<16, CD8VF>, TA;
1427
1428
1429// VSQRTBF16
1430multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr,
1431                                  X86SchedWriteSizes sched> {
1432  let Predicates = [HasAVX10_2_512] in
1433  defm Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"),
1434                              sched.PH.ZMM, v32bf16_info>,
1435                              EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
1436  let Predicates = [HasAVX10_2] in {
1437    defm Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"),
1438                                   sched.PH.XMM, v8bf16x_info>,
1439                                   EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
1440    defm Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"),
1441                                   sched.PH.YMM, v16bf16x_info>,
1442                                   EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
1443  }
1444}
1445
1446let Uses = []<Register>, mayRaiseFPException = 0 in
1447defm VSQRTBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrt", SchedWriteFSqrtSizes>;
1448
1449// VRSQRTBF16, VRCPBF16, VSRQTBF16, VGETEXPBF16
1450multiclass avx10_fp14_bf16<bits<8> opc, string OpcodeStr, SDNode OpNode,
1451                            X86SchedWriteWidths sched> {
1452  let Predicates = [HasAVX10_2_512] in
1453  defm BF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"),
1454                             OpNode, sched.ZMM, v32bf16_info>,
1455                             EVEX_V512;
1456  let Predicates = [HasAVX10_2] in {
1457    defm BF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"),
1458                                  OpNode, sched.XMM, v8bf16x_info>,
1459                                  EVEX_V128;
1460    defm BF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"),
1461                                  OpNode, sched.YMM, v16bf16x_info>,
1462                                  EVEX_V256;
1463  }
1464}
1465
1466defm VRSQRT  : avx10_fp14_bf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>,
1467                                T_MAP6, PS, EVEX_CD8<16, CD8VF>;
1468defm VRCP    : avx10_fp14_bf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>,
1469                                T_MAP6, PS, EVEX_CD8<16, CD8VF>;
1470defm VGETEXP : avx10_fp14_bf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>,
1471                                T_MAP5, EVEX_CD8<16, CD8VF>;
1472
1473// VSCALEFBF16
1474multiclass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr,
1475                                X86SchedWriteWidths sched> {
1476  let Predicates = [HasAVX10_2_512] in
1477    defm Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>,
1478                                EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>;
1479  let Predicates = [HasAVX10_2] in {
1480    defm Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>,
1481                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS;
1482    defm Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>,
1483                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS;
1484  }
1485}
1486
1487let Uses = []<Register>, mayRaiseFPException = 0 in
1488defm VSCALEFBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>;
1489
1490// VREDUCEBF16, VRNDSCALEBF16, VGETMANTBF16
1491multiclass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr,
1492            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
1493            SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> {
1494  let Predicates = [HasAVX10_2_512] in
1495    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
1496                                           sched.ZMM, _.info512>, EVEX_V512;
1497  let Predicates = [HasAVX10_2] in {
1498    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
1499                                           sched.XMM, _.info128>, EVEX_V128;
1500    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
1501                                           sched.YMM, _.info256>, EVEX_V256;
1502  }
1503}
1504
1505let Uses = []<Register>, mayRaiseFPException = 0 in {
1506defm VREDUCEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreduce", avx512vl_bf16_info, 0x56,
1507                            X86VReduce, X86VReduce, SchedWriteFRnd>,
1508                            AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1509defm VRNDSCALEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscale", avx512vl_bf16_info, 0x08,
1510                            X86any_VRndScale, X86VRndScale, SchedWriteFRnd>,
1511                            AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1512defm VGETMANTBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26,
1513                            X86VGetMant, X86VGetMant, SchedWriteFRnd>,
1514                            AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1515}
1516
1517// VFPCLASSBF16
1518multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec,
1519                                  X86SchedWriteWidths sched> {
1520  let Predicates = [HasAVX10_2_512] in
1521    defm Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM,
1522                                   avx512vl_bf16_info.info512, "z",
1523                                   []<Register>>, EVEX_V512;
1524  let Predicates = [HasAVX10_2] in {
1525    defm Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM,
1526                                      avx512vl_bf16_info.info128, "x",
1527                                      []<Register>>, EVEX_V128;
1528    defm Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM,
1529                                      avx512vl_bf16_info.info256, "y",
1530                                      []<Register>>, EVEX_V256;
1531  }
1532}
1533
1534defm VFPCLASSBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>,
1535                                      AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1536
1537// VF[,N]M[ADD,SUB][132,213,231]BF16
1538multiclass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr,
1539                                SDPatternOperator OpNode, SDNode MaskOpNode,
1540                                X86SchedWriteWidths sched> {
1541  let Predicates = [HasAVX10_2_512] in
1542    defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1543                               sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
1544                               EVEX_CD8<16, CD8VF>;
1545  let Predicates = [HasAVX10_2] in {
1546    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1547                               sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
1548                               EVEX_CD8<16, CD8VF>;
1549    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1550                               sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
1551                               EVEX_CD8<16, CD8VF>;
1552  }
1553}
1554
1555let Uses = []<Register>, mayRaiseFPException = 0 in {
1556defm VFMADD213BF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213bf16", any_fma,
1557                                          fma, SchedWriteFMA>;
1558defm VFMSUB213BF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213bf16", X86any_Fmsub,
1559                                          X86Fmsub, SchedWriteFMA>;
1560defm VFNMADD213BF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213bf16", X86any_Fnmadd,
1561                                           X86Fnmadd, SchedWriteFMA>;
1562defm VFNMSUB213BF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213bf16", X86any_Fnmsub,
1563                                           X86Fnmsub, SchedWriteFMA>;
1564}
1565
1566multiclass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr,
1567                                 SDPatternOperator OpNode, SDNode MaskOpNode,
1568                                 X86SchedWriteWidths sched> {
1569  let Predicates = [HasAVX10_2_512] in
1570    defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1571                               sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
1572                               EVEX_CD8<16, CD8VF>;
1573  let Predicates = [HasAVX10_2] in {
1574    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1575                               sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
1576                               EVEX_CD8<16, CD8VF>;
1577    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1578                               sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
1579                               EVEX_CD8<16, CD8VF>;
1580  }
1581}
1582
1583let Uses = []<Register>, mayRaiseFPException = 0 in {
1584defm VFMADD231BF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231bf16", any_fma,
1585                                          fma, SchedWriteFMA>;
1586defm VFMSUB231BF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231bf16", X86any_Fmsub,
1587                                          X86Fmsub, SchedWriteFMA>;
1588defm VFNMADD231BF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231bf16", X86any_Fnmadd,
1589                                           X86Fnmadd, SchedWriteFMA>;
1590defm VFNMSUB231BF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231bf16", X86any_Fnmsub,
1591                                           X86Fnmsub, SchedWriteFMA>;
1592}
1593
1594multiclass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr,
1595                                 SDPatternOperator OpNode, SDNode MaskOpNode,
1596                                 X86SchedWriteWidths sched> {
1597  let Predicates = [HasAVX10_2_512] in
1598    defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1599                                 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
1600                                 EVEX_CD8<16, CD8VF>;
1601  let Predicates = [HasAVX10_2] in {
1602    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1603                                    sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
1604                                    EVEX_CD8<16, CD8VF>;
1605    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1606                                    sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
1607                                    EVEX_CD8<16, CD8VF>;
1608  }
1609}
1610
1611let Uses = []<Register>, mayRaiseFPException = 0 in {
1612defm VFMADD132BF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132bf16", any_fma,
1613                                          fma, SchedWriteFMA>;
1614defm VFMSUB132BF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132bf16", X86any_Fmsub,
1615                                             X86Fmsub, SchedWriteFMA>;
1616defm VFNMADD132BF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132bf16", X86any_Fnmadd,
1617                                           X86Fnmadd, SchedWriteFMA>;
1618defm VFNMSUB132BF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132bf16", X86any_Fnmsub,
1619                                           X86Fnmsub, SchedWriteFMA>;
1620}
1621
1622//-------------------------------------------------
1623// AVX10  COMEF instructions
1624//-------------------------------------------------
1625multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT,
1626                        SDPatternOperator OpNode, string OpcodeStr,
1627                        X86MemOperand x86memop, PatFrag ld_frag,
1628                        Domain d, X86FoldableSchedWrite sched = WriteFComX>{
1629  let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in {
1630    def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
1631                    !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1632                    [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>,
1633                    EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1634    let mayLoad = 1 in {
1635      def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
1636                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1637                      [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>,
1638                      EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1639    }
1640  }
1641}
1642
1643multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
1644                             string OpcodeStr,
1645                             Domain d,
1646                             X86FoldableSchedWrite sched = WriteFComX> {
1647  let ExeDomain = d, mayRaiseFPException = 1 in {
1648    def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
1649                        !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1650                        [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
1651                        EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1652    let mayLoad = 1 in {
1653      def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1654                          !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1655                          [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
1656                          EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1657    }
1658    def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
1659                        !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
1660                        []>,
1661                        EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC;
1662  }
1663}
1664
1665let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
1666  defm VUCOMXSDZ  :  avx10_com_ef<0x2e, FR64X, f64, X86ucomi512,
1667                                  "vucomxsd", f64mem, loadf64, SSEPackedDouble>,
1668                                  TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1669  defm VUCOMXSHZ  :  avx10_com_ef<0x2e, FR16X, f16, X86ucomi512,
1670                                  "vucomxsh", f16mem, loadf16, SSEPackedSingle>,
1671                                  T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
1672  defm VUCOMXSSZ  :  avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
1673                                  "vucomxss", f32mem, loadf32, SSEPackedSingle>,
1674                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1675  defm VCOMXSDZ   :  avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
1676                                      "vcomxsd", SSEPackedDouble>,
1677                                      TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1678  defm VCOMXSHZ   :  avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
1679                                      "vcomxsh", SSEPackedSingle>,
1680                                      T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
1681  defm VCOMXSSZ   :  avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
1682                                      "vcomxss", SSEPackedSingle>,
1683                                      TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1684  defm VUCOMXSDZ  :  avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
1685                                      "vucomxsd", SSEPackedDouble>,
1686                                      TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1687  defm VUCOMXSHZ  :  avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
1688                                      "vucomxsh", SSEPackedSingle>,
1689                                      T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
1690  defm VUCOMXSSZ  :  avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
1691                                      "vucomxss", SSEPackedSingle>,
1692                                      TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1693}
1694
1695//-------------------------------------------------
1696// AVX10 MOVZXC (COPY) instructions
1697//-------------------------------------------------
1698let Predicates = [HasAVX10_2] in {
1699  def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
1700                                    (ins VR128X:$src),
1701                                    "vmovd\t{$src, $dst|$dst, $src}",
1702                                    [(set VR128X:$dst, (v4i32 (X86vzmovl
1703                                    (v4i32 VR128X:$src))))]>, EVEX,
1704                                    Sched<[WriteVecMoveFromGpr]>;
1705
1706let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
1707  def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
1708                                    (ins i32mem:$src),
1709                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
1710                                     EVEX_CD8<32, CD8VT1>,
1711                                     Sched<[WriteVecLoad]>;
1712
1713let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
1714  def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
1715                                    (ins i32mem:$dst, VR128X:$src),
1716                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
1717                                    EVEX_CD8<32, CD8VT1>,
1718                                    Sched<[WriteVecStore]>;
1719
1720let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
1721  def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
1722                                     (ins VR128X:$src),
1723                                     "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
1724                                     Sched<[WriteVecMoveFromGpr]>;
1725  def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
1726                  (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
1727
1728def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
1729                                  (ins VR128X:$src),
1730                                  "vmovw\t{$src, $dst|$dst, $src}",
1731                                  [(set VR128X:$dst, (v8i16 (X86vzmovl
1732                                  (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
1733                                  Sched<[WriteVecMoveFromGpr]>;
1734
1735let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
1736  def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
1737                                    (ins i16mem:$src),
1738                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
1739                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
1740                                    Sched<[WriteVecLoad]>;
1741
1742let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
1743  def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
1744                                    (ins i32mem:$dst, VR128X:$src),
1745                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
1746                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
1747                                    Sched<[WriteVecStore]>;
1748
1749let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
1750  def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
1751                                     (ins VR128X:$src),
1752                                     "vmovw\t{$src, $dst|$dst, $src}",
1753                                     []>, EVEX, T_MAP5,
1754                                     Sched<[WriteVecMoveFromGpr]>;
1755  def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
1756                  (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
1757}
1758
1759// MOVRS
1760multiclass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> {
1761  let ExeDomain = _.ExeDomain in {
1762    defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
1763                            (ins _.MemOp:$src), OpStr, "$src", "$src",
1764                            (_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size)
1765                                   addr:$src))>, EVEX;
1766  }
1767}
1768
1769multiclass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> {
1770  let Predicates = [HasMOVRS, HasAVX10_2_512, In64BitMode] in
1771    defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512;
1772  let Predicates = [HasMOVRS, HasAVX10_2, In64BitMode] in {
1773    defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128;
1774    defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256;
1775  }
1776}
1777
1778defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>,
1779                          T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>;
1780defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>,
1781                          T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>;
1782defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
1783                          T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
1784defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
1785                          T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
1786
1787// SM4(EVEX)
1788multiclass avx10_sm4_base<string OpStr> {
1789  // SM4_Base is in X86InstrSSE.td.
1790  let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in {
1791    defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
1792    defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
1793  }
1794  let Predicates = [HasSM4, HasAVX10_2_512] in
1795    defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512;
1796}
1797
1798defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
1799defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
1800