xref: /llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 9844badfca51e0eba72964552fd624224cbaacb0)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// This multiclass generates the masking variants from the non-masking
16// variant.  It only provides the assembly pieces for the masking variants.
17// It assumes custom ISel patterns for masking which can be provided as
18// template arguments.
19multiclass AVX512_maskable_custom<bits<8> O, Format F,
20                                  dag Outs,
21                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
22                                  string OpcodeStr,
23                                  string AttSrcAsm, string IntelSrcAsm,
24                                  list<dag> Pattern,
25                                  list<dag> MaskingPattern,
26                                  list<dag> ZeroMaskingPattern,
27                                  string MaskingConstraint = "",
28                                  bit IsCommutable = 0,
29                                  bit IsKCommutable = 0,
30                                  bit IsKZCommutable = IsCommutable,
31                                  string ClobberConstraint = "",
32                                  string Suffix = ""> {
33  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
34    def Suffix: AVX512<O, F, Outs, Ins,
35                            OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
36                                          "$dst, "#IntelSrcAsm#"}",
37                            Pattern>;
38
39  // Prefer over VMOV*rrk Pat<>
40  let isCommutable = IsKCommutable in
41    def k#Suffix: AVX512<O, F, Outs, MaskingIns,
42                              OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
43                                            "$dst {${mask}}, "#IntelSrcAsm#"}",
44                              MaskingPattern>,
45              EVEX_K {
46      // In case of the 3src subclass this is overridden with a let.
47      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
48                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
49                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
50    }
51
52  // Zero mask does not add any restrictions to commute operands transformation.
53  // So, it is Ok to use IsCommutable instead of IsKCommutable.
54  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
55      Constraints = ClobberConstraint in
56    def kz#Suffix: AVX512<O, F, Outs, ZeroMaskingIns,
57                               OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
58                                             "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
59                               ZeroMaskingPattern>,
60              EVEX_KZ;
61}
62
63
64// Common base class of AVX512_maskable and AVX512_maskable_3src.
65multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
66                                  dag Outs,
67                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
68                                  string OpcodeStr,
69                                  string AttSrcAsm, string IntelSrcAsm,
70                                  dag RHS, dag MaskingRHS,
71                                  SDPatternOperator Select = vselect_mask,
72                                  string MaskingConstraint = "",
73                                  bit IsCommutable = 0,
74                                  bit IsKCommutable = 0,
75                                  bit IsKZCommutable = IsCommutable,
76                                  string ClobberConstraint = "",
77                                  string Suffix = ""> :
78  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
79                         AttSrcAsm, IntelSrcAsm,
80                         [(set _.RC:$dst, RHS)],
81                         [(set _.RC:$dst, MaskingRHS)],
82                         [(set _.RC:$dst,
83                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
84                         MaskingConstraint, IsCommutable,
85                         IsKCommutable, IsKZCommutable, ClobberConstraint,
86                         Suffix>;
87
88// This multiclass generates the unconditional/non-masking, the masking and
89// the zero-masking variant of the vector instruction.  In the masking case, the
90// preserved vector elements come from a new dummy input operand tied to $dst.
91// This version uses a separate dag for non-masking and masking.
92multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
93                           dag Outs, dag Ins, string OpcodeStr,
94                           string AttSrcAsm, string IntelSrcAsm,
95                           dag RHS, dag MaskRHS,
96                           string ClobberConstraint = "",
97                           bit IsCommutable = 0, bit IsKCommutable = 0,
98                           bit IsKZCommutable = IsCommutable> :
99   AVX512_maskable_custom<O, F, Outs, Ins,
100                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
101                          !con((ins _.KRCWM:$mask), Ins),
102                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
103                          [(set _.RC:$dst, RHS)],
104                          [(set _.RC:$dst,
105                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
106                          [(set _.RC:$dst,
107                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
108                          "$src0 = $dst", IsCommutable, IsKCommutable,
109                          IsKZCommutable, ClobberConstraint>;
110
111// This multiclass generates the unconditional/non-masking, the masking and
112// the zero-masking variant of the vector instruction.  In the masking case, the
113// preserved vector elements come from a new dummy input operand tied to $dst.
114multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
115                           dag Outs, dag Ins, string OpcodeStr,
116                           string AttSrcAsm, string IntelSrcAsm,
117                           dag RHS,
118                           bit IsCommutable = 0, bit IsKCommutable = 0,
119                           bit IsKZCommutable = IsCommutable,
120                           SDPatternOperator Select = vselect_mask,
121                           string ClobberConstraint = "",
122                           string Suffix = ""> :
123   AVX512_maskable_common<O, F, _, Outs, Ins,
124                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
125                          !con((ins _.KRCWM:$mask), Ins),
126                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
127                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
128                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
129                          IsKZCommutable, ClobberConstraint, Suffix>;
130
131// This multiclass generates the unconditional/non-masking, the masking and
132// the zero-masking variant of the scalar instruction.
133multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
134                           dag Outs, dag Ins, string OpcodeStr,
135                           string AttSrcAsm, string IntelSrcAsm,
136                           dag RHS, string Suffix = ""> :
137   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
138                   RHS, 0, 0, 0, X86selects_mask, "", Suffix>;
139
140// Similar to AVX512_maskable but in this case one of the source operands
141// ($src1) is already tied to $dst so we just use that for the preserved
142// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
143// $src1.
144multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
145                                dag Outs, dag NonTiedIns, string OpcodeStr,
146                                string AttSrcAsm, string IntelSrcAsm,
147                                dag RHS,
148                                bit IsCommutable = 0,
149                                bit IsKCommutable = 0,
150                                SDPatternOperator Select = vselect_mask,
151                                bit MaskOnly = 0, string Suffix = ""> :
152   AVX512_maskable_common<O, F, _, Outs,
153                          !con((ins _.RC:$src1), NonTiedIns),
154                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
155                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
156                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
157                          !if(MaskOnly, (null_frag), RHS),
158                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
159                          Select, "", IsCommutable, IsKCommutable,
160                          IsCommutable, "", Suffix>;
161
162// Similar to AVX512_maskable_3src but in this case the input VT for the tied
163// operand differs from the output VT. This requires a bitconvert on
164// the preserved vector going into the vselect.
165// NOTE: The unmasked pattern is disabled.
166multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
167                                     X86VectorVTInfo InVT,
168                                     dag Outs, dag NonTiedIns, string OpcodeStr,
169                                     string AttSrcAsm, string IntelSrcAsm,
170                                     dag RHS, bit IsCommutable = 0> :
171   AVX512_maskable_common<O, F, OutVT, Outs,
172                          !con((ins InVT.RC:$src1), NonTiedIns),
173                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
174                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
175                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
176                          (vselect_mask InVT.KRCWM:$mask, RHS,
177                           (bitconvert InVT.RC:$src1)),
178                           vselect_mask, "", IsCommutable>;
179
180multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
181                                     dag Outs, dag NonTiedIns, string OpcodeStr,
182                                     string AttSrcAsm, string IntelSrcAsm,
183                                     dag RHS,
184                                     bit IsCommutable = 0,
185                                     bit IsKCommutable = 0,
186                                     bit MaskOnly = 0, string Suffix = ""> :
187   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
188                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
189                        X86selects_mask, MaskOnly, Suffix>;
190
191multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
192                                  dag Outs, dag Ins,
193                                  string OpcodeStr,
194                                  string AttSrcAsm, string IntelSrcAsm,
195                                  list<dag> Pattern> :
196   AVX512_maskable_custom<O, F, Outs, Ins,
197                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
198                          !con((ins _.KRCWM:$mask), Ins),
199                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
200                          "$src0 = $dst">;
201
202multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
203                                       dag Outs, dag NonTiedIns,
204                                       string OpcodeStr,
205                                       string AttSrcAsm, string IntelSrcAsm,
206                                       list<dag> Pattern> :
207   AVX512_maskable_custom<O, F, Outs,
208                          !con((ins _.RC:$src1), NonTiedIns),
209                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
210                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
211                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
212                          "">;
213
214// Instruction with mask that puts result in mask register,
215// like "compare" and "vptest"
216multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
217                                  dag Outs,
218                                  dag Ins, dag MaskingIns,
219                                  string OpcodeStr,
220                                  string AttSrcAsm, string IntelSrcAsm,
221                                  list<dag> Pattern,
222                                  list<dag> MaskingPattern,
223                                  bit IsCommutable = 0,
224                                  string Suffix = ""> {
225    let isCommutable = IsCommutable in {
226    def Suffix: AVX512<O, F, Outs, Ins,
227                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
228                                     "$dst, "#IntelSrcAsm#"}",
229                       Pattern>;
230
231    def k#Suffix: AVX512<O, F, Outs, MaskingIns,
232                         OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
233                                       "$dst {${mask}}, "#IntelSrcAsm#"}",
234                         MaskingPattern>, EVEX_K;
235    }
236}
237
238multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
239                                  dag Outs,
240                                  dag Ins, dag MaskingIns,
241                                  string OpcodeStr,
242                                  string AttSrcAsm, string IntelSrcAsm,
243                                  dag RHS, dag MaskingRHS,
244                                  bit IsCommutable = 0,
245                                  string Suffix = ""> :
246  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
247                         AttSrcAsm, IntelSrcAsm,
248                         [(set _.KRC:$dst, RHS)],
249                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable, Suffix>;
250
251multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
252                           dag Outs, dag Ins, string OpcodeStr,
253                           string AttSrcAsm, string IntelSrcAsm,
254                           dag RHS, dag RHS_su, bit IsCommutable = 0,
255                           string Suffix = ""> :
256   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
257                          !con((ins _.KRCWM:$mask), Ins),
258                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
259                          (and _.KRCWM:$mask, RHS_su), IsCommutable, Suffix>;
260
261// Used by conversion instructions.
262multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
263                                  dag Outs,
264                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
265                                  string OpcodeStr,
266                                  string AttSrcAsm, string IntelSrcAsm,
267                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
268  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
269                         AttSrcAsm, IntelSrcAsm,
270                         [(set _.RC:$dst, RHS)],
271                         [(set _.RC:$dst, MaskingRHS)],
272                         [(set _.RC:$dst, ZeroMaskingRHS)],
273                         "$src0 = $dst">;
274
275multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
276                               dag Outs, dag NonTiedIns, string OpcodeStr,
277                               string AttSrcAsm, string IntelSrcAsm,
278                               dag RHS, dag MaskingRHS, bit IsCommutable,
279                               bit IsKCommutable> :
280   AVX512_maskable_custom<O, F, Outs,
281                          !con((ins _.RC:$src1), NonTiedIns),
282                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
283                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
284                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
285                          [(set _.RC:$dst, RHS)],
286                          [(set _.RC:$dst,
287                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
288                          [(set _.RC:$dst,
289                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
290                          "", IsCommutable, IsKCommutable>;
291
292// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
293// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
294// swizzled by ExecutionDomainFix to pxor.
295// We set canFoldAsLoad because this can be converted to a constant-pool
296// load of an all-zeros value if folding it would be beneficial.
297let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
298    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
299def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
300               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
301def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
302               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
303}
304
305let Predicates = [HasAVX512] in {
306def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
307def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
308def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
309def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
310def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
311def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
312}
313
314// Alias instructions that allow VPTERNLOG to be used with a mask to create
315// a mix of all ones and all zeros elements. This is done this way to force
316// the same register to be used as input for all three sources.
317let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
318def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
319                                (ins VK16WM:$mask), "",
320                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
321                                                      (v16i32 immAllOnesV),
322                                                      (v16i32 immAllZerosV)))]>;
323def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
324                                (ins VK8WM:$mask), "",
325                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
326                                           (v8i64 immAllOnesV),
327                                           (v8i64 immAllZerosV)))]>;
328}
329
330let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
331    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
332def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
333               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
334def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
335               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
336}
337
338let Predicates = [HasAVX512] in {
339def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
340def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
341def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
342def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
343def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
344def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
345def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
346def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
347def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
348def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
349def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
350def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
351}
352
353// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
354// This is expanded by ExpandPostRAPseudos.
355let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
356    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
357  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
358                          [(set FR16X:$dst, fp16imm0)]>;
359  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
360                          [(set FR32X:$dst, fp32imm0)]>;
361  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
362                          [(set FR64X:$dst, fp64imm0)]>;
363  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
364                            [(set VR128X:$dst, fp128imm0)]>;
365}
366
367//===----------------------------------------------------------------------===//
368// AVX-512 - VECTOR INSERT
369//
370
371// Supports two different pattern operators for mask and unmasked ops. Allows
372// null_frag to be passed for one.
373multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
374                                  X86VectorVTInfo To,
375                                  SDPatternOperator vinsert_insert,
376                                  SDPatternOperator vinsert_for_mask,
377                                  X86FoldableSchedWrite sched> {
378  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
379    defm rri : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
380                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
381                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
382                   "$src3, $src2, $src1", "$src1, $src2, $src3",
383                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
384                                         (From.VT From.RC:$src2),
385                                         (iPTR imm)),
386                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
387                                           (From.VT From.RC:$src2),
388                                           (iPTR imm))>,
389                   AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
390    let mayLoad = 1 in
391    defm rmi : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
392                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
393                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
394                   "$src3, $src2, $src1", "$src1, $src2, $src3",
395                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
396                               (From.VT (From.LdFrag addr:$src2)),
397                               (iPTR imm)),
398                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
399                               (From.VT (From.LdFrag addr:$src2)),
400                               (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
401                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
402                   Sched<[sched.Folded, sched.ReadAfterFold]>;
403  }
404}
405
406// Passes the same pattern operator for masked and unmasked ops.
407multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
408                            X86VectorVTInfo To,
409                            SDPatternOperator vinsert_insert,
410                            X86FoldableSchedWrite sched> :
411  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
412
413multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
414                       X86VectorVTInfo To, PatFrag vinsert_insert,
415                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
416  let Predicates = p in {
417    def : Pat<(vinsert_insert:$ins
418                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
419              (To.VT (!cast<Instruction>(InstrStr#"rri")
420                     To.RC:$src1, From.RC:$src2,
421                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
422
423    def : Pat<(vinsert_insert:$ins
424                  (To.VT To.RC:$src1),
425                  (From.VT (From.LdFrag addr:$src2)),
426                  (iPTR imm)),
427              (To.VT (!cast<Instruction>(InstrStr#"rmi")
428                  To.RC:$src1, addr:$src2,
429                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
430  }
431}
432
433multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
434                            ValueType EltVT64, int Opcode256,
435                            X86FoldableSchedWrite sched> {
436
437  let Predicates = [HasVLX] in
438    defm NAME # "32X4Z256" : vinsert_for_size<Opcode128,
439                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
440                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
441                                 vinsert128_insert, sched>, EVEX_V256;
442
443  defm NAME # "32X4Z" : vinsert_for_size<Opcode128,
444                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
445                                 X86VectorVTInfo<16, EltVT32, VR512>,
446                                 vinsert128_insert, sched>, EVEX_V512;
447
448  defm NAME # "64X4Z" : vinsert_for_size<Opcode256,
449                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
450                                 X86VectorVTInfo< 8, EltVT64, VR512>,
451                                 vinsert256_insert, sched>, REX_W, EVEX_V512;
452
453  // Even with DQI we'd like to only use these instructions for masking.
454  let Predicates = [HasVLX, HasDQI] in
455    defm NAME # "64X2Z256" : vinsert_for_size_split<Opcode128,
456                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
457                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
458                                   null_frag, vinsert128_insert, sched>,
459                                   EVEX_V256, REX_W;
460
461  // Even with DQI we'd like to only use these instructions for masking.
462  let Predicates = [HasDQI] in {
463    defm NAME # "64X2Z" : vinsert_for_size_split<Opcode128,
464                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
465                                 X86VectorVTInfo< 8, EltVT64, VR512>,
466                                 null_frag, vinsert128_insert, sched>,
467                                 REX_W, EVEX_V512;
468
469    defm NAME # "32X8Z" : vinsert_for_size_split<Opcode256,
470                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
471                                   X86VectorVTInfo<16, EltVT32, VR512>,
472                                   null_frag, vinsert256_insert, sched>,
473                                   EVEX_V512;
474  }
475}
476
477// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
478defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
479defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
480
481// Codegen pattern with the alternative types,
482// Even with AVX512DQ we'll still use these for unmasked operations.
483defm : vinsert_for_size_lowering<"VINSERTF32X4Z256", v2f64x_info, v4f64x_info,
484              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
485defm : vinsert_for_size_lowering<"VINSERTI32X4Z256", v2i64x_info, v4i64x_info,
486              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
487
488defm : vinsert_for_size_lowering<"VINSERTF32X4Z", v2f64x_info, v8f64_info,
489              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
490defm : vinsert_for_size_lowering<"VINSERTI32X4Z", v2i64x_info, v8i64_info,
491              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
492
493defm : vinsert_for_size_lowering<"VINSERTF64X4Z", v8f32x_info, v16f32_info,
494              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
495defm : vinsert_for_size_lowering<"VINSERTI64X4Z", v8i32x_info, v16i32_info,
496              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
497
498// Codegen pattern with the alternative types insert VEC128 into VEC256
499defm : vinsert_for_size_lowering<"VINSERTI32X4Z256", v8i16x_info, v16i16x_info,
500              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
501defm : vinsert_for_size_lowering<"VINSERTI32X4Z256", v16i8x_info, v32i8x_info,
502              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
503defm : vinsert_for_size_lowering<"VINSERTF32X4Z256", v8f16x_info, v16f16x_info,
504              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
505defm : vinsert_for_size_lowering<"VINSERTF32X4Z256", v8bf16x_info, v16bf16x_info,
506              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
507// Codegen pattern with the alternative types insert VEC128 into VEC512
508defm : vinsert_for_size_lowering<"VINSERTI32X4Z", v8i16x_info, v32i16_info,
509              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
510defm : vinsert_for_size_lowering<"VINSERTI32X4Z", v16i8x_info, v64i8_info,
511               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
512defm : vinsert_for_size_lowering<"VINSERTF32X4Z", v8f16x_info, v32f16_info,
513              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
514defm : vinsert_for_size_lowering<"VINSERTF32X4Z", v8bf16x_info, v32bf16_info,
515              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
516// Codegen pattern with the alternative types insert VEC256 into VEC512
517defm : vinsert_for_size_lowering<"VINSERTI64X4Z", v16i16x_info, v32i16_info,
518              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
519defm : vinsert_for_size_lowering<"VINSERTI64X4Z", v32i8x_info, v64i8_info,
520              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
521defm : vinsert_for_size_lowering<"VINSERTF64X4Z", v16f16x_info, v32f16_info,
522              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
523defm : vinsert_for_size_lowering<"VINSERTF64X4Z", v16bf16x_info, v32bf16_info,
524              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
525
526
527multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
528                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
529                                 PatFrag vinsert_insert,
530                                 SDNodeXForm INSERT_get_vinsert_imm,
531                                 list<Predicate> p> {
532let Predicates = p in {
533  def : Pat<(Cast.VT
534             (vselect_mask Cast.KRCWM:$mask,
535                           (bitconvert
536                            (vinsert_insert:$ins (To.VT To.RC:$src1),
537                                                 (From.VT From.RC:$src2),
538                                                 (iPTR imm))),
539                           Cast.RC:$src0)),
540            (!cast<Instruction>(InstrStr#"rrik")
541             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
542             (INSERT_get_vinsert_imm To.RC:$ins))>;
543  def : Pat<(Cast.VT
544             (vselect_mask Cast.KRCWM:$mask,
545                           (bitconvert
546                            (vinsert_insert:$ins (To.VT To.RC:$src1),
547                                                 (From.VT
548                                                  (bitconvert
549                                                   (From.LdFrag addr:$src2))),
550                                                 (iPTR imm))),
551                           Cast.RC:$src0)),
552            (!cast<Instruction>(InstrStr#"rmik")
553             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
554             (INSERT_get_vinsert_imm To.RC:$ins))>;
555
556  def : Pat<(Cast.VT
557             (vselect_mask Cast.KRCWM:$mask,
558                           (bitconvert
559                            (vinsert_insert:$ins (To.VT To.RC:$src1),
560                                                 (From.VT From.RC:$src2),
561                                                 (iPTR imm))),
562                           Cast.ImmAllZerosV)),
563            (!cast<Instruction>(InstrStr#"rrikz")
564             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
565             (INSERT_get_vinsert_imm To.RC:$ins))>;
566  def : Pat<(Cast.VT
567             (vselect_mask Cast.KRCWM:$mask,
568                           (bitconvert
569                            (vinsert_insert:$ins (To.VT To.RC:$src1),
570                                                 (From.VT (From.LdFrag addr:$src2)),
571                                                 (iPTR imm))),
572                           Cast.ImmAllZerosV)),
573            (!cast<Instruction>(InstrStr#"rmikz")
574             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
575             (INSERT_get_vinsert_imm To.RC:$ins))>;
576}
577}
578
579defm : vinsert_for_mask_cast<"VINSERTF32X4Z256", v2f64x_info, v4f64x_info,
580                             v8f32x_info, vinsert128_insert,
581                             INSERT_get_vinsert128_imm, [HasVLX]>;
582defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v4f32x_info, v8f32x_info,
583                             v4f64x_info, vinsert128_insert,
584                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
585
586defm : vinsert_for_mask_cast<"VINSERTI32X4Z256", v2i64x_info, v4i64x_info,
587                             v8i32x_info, vinsert128_insert,
588                             INSERT_get_vinsert128_imm, [HasVLX]>;
589defm : vinsert_for_mask_cast<"VINSERTI32X4Z256", v8i16x_info, v16i16x_info,
590                             v8i32x_info, vinsert128_insert,
591                             INSERT_get_vinsert128_imm, [HasVLX]>;
592defm : vinsert_for_mask_cast<"VINSERTI32X4Z256", v16i8x_info, v32i8x_info,
593                             v8i32x_info, vinsert128_insert,
594                             INSERT_get_vinsert128_imm, [HasVLX]>;
595defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v4i32x_info, v8i32x_info,
596                             v4i64x_info, vinsert128_insert,
597                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
598defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v8i16x_info, v16i16x_info,
599                             v4i64x_info, vinsert128_insert,
600                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
601defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v16i8x_info, v32i8x_info,
602                             v4i64x_info, vinsert128_insert,
603                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
604
605defm : vinsert_for_mask_cast<"VINSERTF32X4Z", v2f64x_info, v8f64_info,
606                             v16f32_info, vinsert128_insert,
607                             INSERT_get_vinsert128_imm, [HasAVX512]>;
608defm : vinsert_for_mask_cast<"VINSERTF64X2Z", v4f32x_info, v16f32_info,
609                             v8f64_info, vinsert128_insert,
610                             INSERT_get_vinsert128_imm, [HasDQI]>;
611
612defm : vinsert_for_mask_cast<"VINSERTI32X4Z", v2i64x_info, v8i64_info,
613                             v16i32_info, vinsert128_insert,
614                             INSERT_get_vinsert128_imm, [HasAVX512]>;
615defm : vinsert_for_mask_cast<"VINSERTI32X4Z", v8i16x_info, v32i16_info,
616                             v16i32_info, vinsert128_insert,
617                             INSERT_get_vinsert128_imm, [HasAVX512]>;
618defm : vinsert_for_mask_cast<"VINSERTI32X4Z", v16i8x_info, v64i8_info,
619                             v16i32_info, vinsert128_insert,
620                             INSERT_get_vinsert128_imm, [HasAVX512]>;
621defm : vinsert_for_mask_cast<"VINSERTI64X2Z", v4i32x_info, v16i32_info,
622                             v8i64_info, vinsert128_insert,
623                             INSERT_get_vinsert128_imm, [HasDQI]>;
624defm : vinsert_for_mask_cast<"VINSERTI64X2Z", v8i16x_info, v32i16_info,
625                             v8i64_info, vinsert128_insert,
626                             INSERT_get_vinsert128_imm, [HasDQI]>;
627defm : vinsert_for_mask_cast<"VINSERTI64X2Z", v16i8x_info, v64i8_info,
628                             v8i64_info, vinsert128_insert,
629                             INSERT_get_vinsert128_imm, [HasDQI]>;
630
631defm : vinsert_for_mask_cast<"VINSERTF32X8Z", v4f64x_info, v8f64_info,
632                             v16f32_info, vinsert256_insert,
633                             INSERT_get_vinsert256_imm, [HasDQI]>;
634defm : vinsert_for_mask_cast<"VINSERTF64X4Z", v8f32x_info, v16f32_info,
635                             v8f64_info, vinsert256_insert,
636                             INSERT_get_vinsert256_imm, [HasAVX512]>;
637
638defm : vinsert_for_mask_cast<"VINSERTI32X8Z", v4i64x_info, v8i64_info,
639                             v16i32_info, vinsert256_insert,
640                             INSERT_get_vinsert256_imm, [HasDQI]>;
641defm : vinsert_for_mask_cast<"VINSERTI32X8Z", v16i16x_info, v32i16_info,
642                             v16i32_info, vinsert256_insert,
643                             INSERT_get_vinsert256_imm, [HasDQI]>;
644defm : vinsert_for_mask_cast<"VINSERTI32X8Z", v32i8x_info, v64i8_info,
645                             v16i32_info, vinsert256_insert,
646                             INSERT_get_vinsert256_imm, [HasDQI]>;
647defm : vinsert_for_mask_cast<"VINSERTI64X4Z", v8i32x_info, v16i32_info,
648                             v8i64_info, vinsert256_insert,
649                             INSERT_get_vinsert256_imm, [HasAVX512]>;
650defm : vinsert_for_mask_cast<"VINSERTI64X4Z", v16i16x_info, v32i16_info,
651                             v8i64_info, vinsert256_insert,
652                             INSERT_get_vinsert256_imm, [HasAVX512]>;
653defm : vinsert_for_mask_cast<"VINSERTI64X4Z", v32i8x_info, v64i8_info,
654                             v8i64_info, vinsert256_insert,
655                             INSERT_get_vinsert256_imm, [HasAVX512]>;
656
657// vinsertps - insert f32 to XMM
658let ExeDomain = SSEPackedSingle in {
659let isCommutable = 1 in
660def VINSERTPSZrri : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
661      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
662      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
663      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
664      EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
665def VINSERTPSZrmi : AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
666      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
667      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
668      [(set VR128X:$dst, (X86insertps VR128X:$src1,
669                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
670                          timm:$src3))]>,
671      EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
672      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
673}
674
675//===----------------------------------------------------------------------===//
676// AVX-512 VECTOR EXTRACT
677//---
678
679// Supports two different pattern operators for mask and unmasked ops. Allows
680// null_frag to be passed for one.
681multiclass vextract_for_size_split<int Opcode,
682                                   X86VectorVTInfo From, X86VectorVTInfo To,
683                                   SDPatternOperator vextract_extract,
684                                   SDPatternOperator vextract_for_mask,
685                                   SchedWrite SchedRR, SchedWrite SchedMR> {
686
687  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
688    defm rri : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
689                (ins From.RC:$src1, u8imm:$idx),
690                "vextract" # To.EltTypeName # "x" # To.NumElts,
691                "$idx, $src1", "$src1, $idx",
692                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
693                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
694                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
695
696    def mri  : AVX512AIi8<Opcode, MRMDestMem, (outs),
697                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
698                    "vextract" # To.EltTypeName # "x" # To.NumElts #
699                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
700                    [(store (To.VT (vextract_extract:$idx
701                                    (From.VT From.RC:$src1), (iPTR imm))),
702                             addr:$dst)]>, EVEX,
703                    Sched<[SchedMR]>;
704
705    let mayStore = 1, hasSideEffects = 0 in
706    def mrik : AVX512AIi8<Opcode, MRMDestMem, (outs),
707                    (ins To.MemOp:$dst, To.KRCWM:$mask,
708                                        From.RC:$src1, u8imm:$idx),
709                     "vextract" # To.EltTypeName # "x" # To.NumElts #
710                          "\t{$idx, $src1, $dst {${mask}}|"
711                          "$dst {${mask}}, $src1, $idx}", []>,
712                    EVEX_K, EVEX, Sched<[SchedMR]>;
713  }
714}
715
716// Passes the same pattern operator for masked and unmasked ops.
717multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
718                             X86VectorVTInfo To,
719                             SDPatternOperator vextract_extract,
720                             SchedWrite SchedRR, SchedWrite SchedMR> :
721  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
722
723// Codegen pattern for the alternative types
724multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
725                X86VectorVTInfo To, PatFrag vextract_extract,
726                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
727  let Predicates = p in {
728     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
729               (To.VT (!cast<Instruction>(InstrStr#"rri")
730                          From.RC:$src1,
731                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
732     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
733                              (iPTR imm))), addr:$dst),
734               (!cast<Instruction>(InstrStr#"mri") addr:$dst, From.RC:$src1,
735                (EXTRACT_get_vextract_imm To.RC:$ext))>;
736  }
737}
738
739multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
740                             ValueType EltVT64, int Opcode256,
741                             SchedWrite SchedRR, SchedWrite SchedMR> {
742  let Predicates = [HasAVX512] in {
743    defm NAME # "32X4Z" : vextract_for_size<Opcode128,
744                                   X86VectorVTInfo<16, EltVT32, VR512>,
745                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
746                                   vextract128_extract, SchedRR, SchedMR>,
747                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
748    defm NAME # "64X4Z" : vextract_for_size<Opcode256,
749                                   X86VectorVTInfo< 8, EltVT64, VR512>,
750                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
751                                   vextract256_extract, SchedRR, SchedMR>,
752                                       REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
753  }
754  let Predicates = [HasVLX] in
755    defm NAME # "32X4Z256" : vextract_for_size<Opcode128,
756                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
757                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
758                                 vextract128_extract, SchedRR, SchedMR>,
759                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
760
761  // Even with DQI we'd like to only use these instructions for masking.
762  let Predicates = [HasVLX, HasDQI] in
763    defm NAME # "64X2Z256" : vextract_for_size_split<Opcode128,
764                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
765                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
766                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
767                                    EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
768
769  // Even with DQI we'd like to only use these instructions for masking.
770  let Predicates = [HasDQI] in {
771    defm NAME # "64X2Z" : vextract_for_size_split<Opcode128,
772                                 X86VectorVTInfo< 8, EltVT64, VR512>,
773                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
774                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
775                                     REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
776    defm NAME # "32X8Z" : vextract_for_size_split<Opcode256,
777                                 X86VectorVTInfo<16, EltVT32, VR512>,
778                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
779                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
780                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
781  }
782}
783
784// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
785defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
786defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
787
788// extract_subvector codegen patterns with the alternative types.
789// Even with AVX512DQ we'll still use these for unmasked operations.
790defm : vextract_for_size_lowering<"VEXTRACTF32X4Z", v8f64_info, v2f64x_info,
791          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
792defm : vextract_for_size_lowering<"VEXTRACTI32X4Z", v8i64_info, v2i64x_info,
793          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
794
795defm : vextract_for_size_lowering<"VEXTRACTF64X4Z", v16f32_info, v8f32x_info,
796          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
797defm : vextract_for_size_lowering<"VEXTRACTI64X4Z", v16i32_info, v8i32x_info,
798          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
799
800defm : vextract_for_size_lowering<"VEXTRACTF32X4Z256", v4f64x_info, v2f64x_info,
801          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
802defm : vextract_for_size_lowering<"VEXTRACTI32X4Z256", v4i64x_info, v2i64x_info,
803          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
804
805// Codegen pattern with the alternative types extract VEC128 from VEC256
806defm : vextract_for_size_lowering<"VEXTRACTI32X4Z256", v16i16x_info, v8i16x_info,
807          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
808defm : vextract_for_size_lowering<"VEXTRACTI32X4Z256", v32i8x_info, v16i8x_info,
809          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
810defm : vextract_for_size_lowering<"VEXTRACTF32X4Z256", v16f16x_info, v8f16x_info,
811          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
812defm : vextract_for_size_lowering<"VEXTRACTF32X4Z256", v16bf16x_info, v8bf16x_info,
813          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
814
815// Codegen pattern with the alternative types extract VEC128 from VEC512
816defm : vextract_for_size_lowering<"VEXTRACTI32X4Z", v32i16_info, v8i16x_info,
817                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
818defm : vextract_for_size_lowering<"VEXTRACTI32X4Z", v64i8_info, v16i8x_info,
819                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
820defm : vextract_for_size_lowering<"VEXTRACTF32X4Z", v32f16_info, v8f16x_info,
821                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
822defm : vextract_for_size_lowering<"VEXTRACTF32X4Z", v32bf16_info, v8bf16x_info,
823                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
824// Codegen pattern with the alternative types extract VEC256 from VEC512
825defm : vextract_for_size_lowering<"VEXTRACTI64X4Z", v32i16_info, v16i16x_info,
826                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
827defm : vextract_for_size_lowering<"VEXTRACTI64X4Z", v64i8_info, v32i8x_info,
828                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
829defm : vextract_for_size_lowering<"VEXTRACTF64X4Z", v32f16_info, v16f16x_info,
830                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
831defm : vextract_for_size_lowering<"VEXTRACTF64X4Z", v32bf16_info, v16bf16x_info,
832                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
833
834
835// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
836// smaller extract to enable EVEX->VEX.
837let Predicates = [NoVLX, HasEVEX512] in {
838def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
839          (v2i64 (VEXTRACTI128rri
840                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
841                  (iPTR 1)))>;
842def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
843          (v2f64 (VEXTRACTF128rri
844                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
845                  (iPTR 1)))>;
846def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
847          (v4i32 (VEXTRACTI128rri
848                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
849                  (iPTR 1)))>;
850def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
851          (v4f32 (VEXTRACTF128rri
852                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
853                  (iPTR 1)))>;
854def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
855          (v8i16 (VEXTRACTI128rri
856                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
857                  (iPTR 1)))>;
858def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
859          (v8f16 (VEXTRACTF128rri
860                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
861                  (iPTR 1)))>;
862def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
863          (v16i8 (VEXTRACTI128rri
864                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
865                  (iPTR 1)))>;
866}
867
868// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
869// smaller extract to enable EVEX->VEX.
870let Predicates = [HasVLX] in {
871def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
872          (v2i64 (VEXTRACTI32X4Z256rri
873                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
874                  (iPTR 1)))>;
875def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
876          (v2f64 (VEXTRACTF32X4Z256rri
877                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
878                  (iPTR 1)))>;
879def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
880          (v4i32 (VEXTRACTI32X4Z256rri
881                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
882                  (iPTR 1)))>;
883def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
884          (v4f32 (VEXTRACTF32X4Z256rri
885                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
886                  (iPTR 1)))>;
887def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
888          (v8i16 (VEXTRACTI32X4Z256rri
889                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
890                  (iPTR 1)))>;
891def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
892          (v8f16 (VEXTRACTF32X4Z256rri
893                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
894                  (iPTR 1)))>;
895def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
896          (v16i8 (VEXTRACTI32X4Z256rri
897                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
898                  (iPTR 1)))>;
899}
900
901
902// Additional patterns for handling a bitcast between the vselect and the
903// extract_subvector.
904multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
905                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
906                                  PatFrag vextract_extract,
907                                  SDNodeXForm EXTRACT_get_vextract_imm,
908                                  list<Predicate> p> {
909let Predicates = p in {
910  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
911                                   (bitconvert
912                                    (To.VT (vextract_extract:$ext
913                                            (From.VT From.RC:$src), (iPTR imm)))),
914                                   To.RC:$src0)),
915            (Cast.VT (!cast<Instruction>(InstrStr#"rrik")
916                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
917                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
918
919  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
920                                   (bitconvert
921                                    (To.VT (vextract_extract:$ext
922                                            (From.VT From.RC:$src), (iPTR imm)))),
923                                   Cast.ImmAllZerosV)),
924            (Cast.VT (!cast<Instruction>(InstrStr#"rrikz")
925                      Cast.KRCWM:$mask, From.RC:$src,
926                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
927}
928}
929
930defm : vextract_for_mask_cast<"VEXTRACTF32X4Z256", v4f64x_info, v2f64x_info,
931                              v4f32x_info, vextract128_extract,
932                              EXTRACT_get_vextract128_imm, [HasVLX]>;
933defm : vextract_for_mask_cast<"VEXTRACTF64X2Z256", v8f32x_info, v4f32x_info,
934                              v2f64x_info, vextract128_extract,
935                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
936
937defm : vextract_for_mask_cast<"VEXTRACTI32X4Z256", v4i64x_info, v2i64x_info,
938                              v4i32x_info, vextract128_extract,
939                              EXTRACT_get_vextract128_imm, [HasVLX]>;
940defm : vextract_for_mask_cast<"VEXTRACTI32X4Z256", v16i16x_info, v8i16x_info,
941                              v4i32x_info, vextract128_extract,
942                              EXTRACT_get_vextract128_imm, [HasVLX]>;
943defm : vextract_for_mask_cast<"VEXTRACTI32X4Z256", v32i8x_info, v16i8x_info,
944                              v4i32x_info, vextract128_extract,
945                              EXTRACT_get_vextract128_imm, [HasVLX]>;
946defm : vextract_for_mask_cast<"VEXTRACTI64X2Z256", v8i32x_info, v4i32x_info,
947                              v2i64x_info, vextract128_extract,
948                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
949defm : vextract_for_mask_cast<"VEXTRACTI64X2Z256", v16i16x_info, v8i16x_info,
950                              v2i64x_info, vextract128_extract,
951                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
952defm : vextract_for_mask_cast<"VEXTRACTI64X2Z256", v32i8x_info, v16i8x_info,
953                              v2i64x_info, vextract128_extract,
954                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
955
956defm : vextract_for_mask_cast<"VEXTRACTF32X4Z", v8f64_info, v2f64x_info,
957                              v4f32x_info, vextract128_extract,
958                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
959defm : vextract_for_mask_cast<"VEXTRACTF64X2Z", v16f32_info, v4f32x_info,
960                              v2f64x_info, vextract128_extract,
961                              EXTRACT_get_vextract128_imm, [HasDQI]>;
962
963defm : vextract_for_mask_cast<"VEXTRACTI32X4Z", v8i64_info, v2i64x_info,
964                              v4i32x_info, vextract128_extract,
965                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
966defm : vextract_for_mask_cast<"VEXTRACTI32X4Z", v32i16_info, v8i16x_info,
967                              v4i32x_info, vextract128_extract,
968                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
969defm : vextract_for_mask_cast<"VEXTRACTI32X4Z", v64i8_info, v16i8x_info,
970                              v4i32x_info, vextract128_extract,
971                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
972defm : vextract_for_mask_cast<"VEXTRACTI64X2Z", v16i32_info, v4i32x_info,
973                              v2i64x_info, vextract128_extract,
974                              EXTRACT_get_vextract128_imm, [HasDQI]>;
975defm : vextract_for_mask_cast<"VEXTRACTI64X2Z", v32i16_info, v8i16x_info,
976                              v2i64x_info, vextract128_extract,
977                              EXTRACT_get_vextract128_imm, [HasDQI]>;
978defm : vextract_for_mask_cast<"VEXTRACTI64X2Z", v64i8_info, v16i8x_info,
979                              v2i64x_info, vextract128_extract,
980                              EXTRACT_get_vextract128_imm, [HasDQI]>;
981
982defm : vextract_for_mask_cast<"VEXTRACTF32X8Z", v8f64_info, v4f64x_info,
983                              v8f32x_info, vextract256_extract,
984                              EXTRACT_get_vextract256_imm, [HasDQI]>;
985defm : vextract_for_mask_cast<"VEXTRACTF64X4Z", v16f32_info, v8f32x_info,
986                              v4f64x_info, vextract256_extract,
987                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
988
989defm : vextract_for_mask_cast<"VEXTRACTI32X8Z", v8i64_info, v4i64x_info,
990                              v8i32x_info, vextract256_extract,
991                              EXTRACT_get_vextract256_imm, [HasDQI]>;
992defm : vextract_for_mask_cast<"VEXTRACTI32X8Z", v32i16_info, v16i16x_info,
993                              v8i32x_info, vextract256_extract,
994                              EXTRACT_get_vextract256_imm, [HasDQI]>;
995defm : vextract_for_mask_cast<"VEXTRACTI32X8Z", v64i8_info, v32i8x_info,
996                              v8i32x_info, vextract256_extract,
997                              EXTRACT_get_vextract256_imm, [HasDQI]>;
998defm : vextract_for_mask_cast<"VEXTRACTI64X4Z", v16i32_info, v8i32x_info,
999                              v4i64x_info, vextract256_extract,
1000                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1001defm : vextract_for_mask_cast<"VEXTRACTI64X4Z", v32i16_info, v16i16x_info,
1002                              v4i64x_info, vextract256_extract,
1003                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1004defm : vextract_for_mask_cast<"VEXTRACTI64X4Z", v64i8_info, v32i8x_info,
1005                              v4i64x_info, vextract256_extract,
1006                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1007
1008// vextractps - extract 32 bits from XMM
1009def VEXTRACTPSZrri : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1010      (ins VR128X:$src1, u8imm:$src2),
1011      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1012      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1013      EVEX, WIG, Sched<[WriteVecExtract]>;
1014
1015def VEXTRACTPSZmri : AVX512AIi8<0x17, MRMDestMem, (outs),
1016      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1017      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1018      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1019                          addr:$dst)]>,
1020      EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1021
1022//===---------------------------------------------------------------------===//
1023// AVX-512 BROADCAST
1024//---
1025// broadcast with a scalar argument.
1026multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1027                                   X86VectorVTInfo SrcInfo> {
1028  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1029            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1030             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1031  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1032                                       (X86VBroadcast SrcInfo.FRC:$src),
1033                                       DestInfo.RC:$src0)),
1034            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1035             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1036             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1037  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1038                                       (X86VBroadcast SrcInfo.FRC:$src),
1039                                       DestInfo.ImmAllZerosV)),
1040            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1041             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1042}
1043
1044// Split version to allow mask and broadcast node to be different types. This
1045// helps support the 32x2 broadcasts.
1046multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1047                                     SchedWrite SchedRR, SchedWrite SchedRM,
1048                                     X86VectorVTInfo MaskInfo,
1049                                     X86VectorVTInfo DestInfo,
1050                                     X86VectorVTInfo SrcInfo,
1051                                     bit IsConvertibleToThreeAddress,
1052                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1053                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1054  let hasSideEffects = 0 in
1055  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1056                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1057                    [(set MaskInfo.RC:$dst,
1058                      (MaskInfo.VT
1059                       (bitconvert
1060                        (DestInfo.VT
1061                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1062                    DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1063  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1064                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1065                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1066                       "${dst} {${mask}} {z}, $src}"),
1067                       [(set MaskInfo.RC:$dst,
1068                         (vselect_mask MaskInfo.KRCWM:$mask,
1069                          (MaskInfo.VT
1070                           (bitconvert
1071                            (DestInfo.VT
1072                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1073                          MaskInfo.ImmAllZerosV))],
1074                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1075  let Constraints = "$src0 = $dst" in
1076  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1077                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1078                          SrcInfo.RC:$src),
1079                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1080                     "${dst} {${mask}}, $src}"),
1081                     [(set MaskInfo.RC:$dst,
1082                       (vselect_mask MaskInfo.KRCWM:$mask,
1083                        (MaskInfo.VT
1084                         (bitconvert
1085                          (DestInfo.VT
1086                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1087                        MaskInfo.RC:$src0))],
1088                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1089
1090  let hasSideEffects = 0, mayLoad = 1, isReMaterializable = 1, canFoldAsLoad = 1 in
1091  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1092                    (ins SrcInfo.ScalarMemOp:$src),
1093                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1094                    [(set MaskInfo.RC:$dst,
1095                      (MaskInfo.VT
1096                       (bitconvert
1097                        (DestInfo.VT
1098                         (UnmaskedBcastOp addr:$src)))))],
1099                    DestInfo.ExeDomain>, T8, PD, EVEX,
1100                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1101
1102  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1103                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1104                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1105                       "${dst} {${mask}} {z}, $src}"),
1106                       [(set MaskInfo.RC:$dst,
1107                         (vselect_mask MaskInfo.KRCWM:$mask,
1108                          (MaskInfo.VT
1109                           (bitconvert
1110                            (DestInfo.VT
1111                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1112                          MaskInfo.ImmAllZerosV))],
1113                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1114                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1115
1116  let Constraints = "$src0 = $dst",
1117      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1118  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1119                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1120                          SrcInfo.ScalarMemOp:$src),
1121                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1122                     "${dst} {${mask}}, $src}"),
1123                     [(set MaskInfo.RC:$dst,
1124                       (vselect_mask MaskInfo.KRCWM:$mask,
1125                        (MaskInfo.VT
1126                         (bitconvert
1127                          (DestInfo.VT
1128                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1129                        MaskInfo.RC:$src0))],
1130                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1131                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1132}
1133
1134// Helper class to force mask and broadcast result to same type.
1135multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1136                               SchedWrite SchedRR, SchedWrite SchedRM,
1137                               X86VectorVTInfo DestInfo,
1138                               X86VectorVTInfo SrcInfo,
1139                               bit IsConvertibleToThreeAddress> :
1140  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1141                            DestInfo, DestInfo, SrcInfo,
1142                            IsConvertibleToThreeAddress>;
1143
1144multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1145                                  AVX512VLVectorVTInfo _> {
1146  let Predicates = [HasAVX512] in {
1147    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1148                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1149              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1150              EVEX_V512;
1151  }
1152
1153  let Predicates = [HasVLX] in {
1154    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1155                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1156                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1157                 EVEX_V256;
1158  }
1159}
1160
1161multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1162                                  AVX512VLVectorVTInfo _> {
1163  let Predicates = [HasAVX512] in {
1164    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1165                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1166              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1167              EVEX_V512;
1168  }
1169
1170  let Predicates = [HasVLX] in {
1171    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1172                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1173                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1174                 EVEX_V256;
1175    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1176                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1177                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1178                 EVEX_V128;
1179  }
1180}
1181defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1182                                       avx512vl_f32_info>;
1183defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1184                                       avx512vl_f64_info>, REX_W;
1185
1186multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1187                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1188                                    RegisterClass SrcRC> {
1189  // Fold with a mask even if it has multiple uses since it is cheap.
1190  let ExeDomain = _.ExeDomain in
1191  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1192                          (ins SrcRC:$src),
1193                          "vpbroadcast"#_.Suffix, "$src", "$src",
1194                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1195                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1196                          T8, PD, EVEX, Sched<[SchedRR]>;
1197}
1198
1199multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1200                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1201                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1202  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1203  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1204                         (outs _.RC:$dst), (ins GR32:$src),
1205                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1206                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1207                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1208                         "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1209
1210  def : Pat <(_.VT (OpNode SrcRC:$src)),
1211             (!cast<Instruction>(Name#rr)
1212              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1213
1214  // Fold with a mask even if it has multiple uses since it is cheap.
1215  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1216             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1217              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1218
1219  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1220             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1221              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1222}
1223
1224multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1225                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1226                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1227  let Predicates = [prd] in
1228    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1229              OpNode, SrcRC, Subreg>, EVEX_V512;
1230  let Predicates = [prd, HasVLX] in {
1231    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1232              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1233    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1234              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1235  }
1236}
1237
1238multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1239                                       SDPatternOperator OpNode,
1240                                       RegisterClass SrcRC, Predicate prd> {
1241  let Predicates = [prd] in
1242    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1243                                      SrcRC>, EVEX_V512;
1244  let Predicates = [prd, HasVLX] in {
1245    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1246                                         SrcRC>, EVEX_V256;
1247    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1248                                         SrcRC>, EVEX_V128;
1249  }
1250}
1251
1252defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1253                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1254defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1255                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1256                       HasBWI>;
1257defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1258                                                 X86VBroadcast, GR32, HasAVX512>;
1259defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1260                                                 X86VBroadcast, GR64, HasAVX512>, REX_W;
1261
1262multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1263                                      AVX512VLVectorVTInfo _, Predicate prd,
1264                                      bit IsConvertibleToThreeAddress> {
1265  let Predicates = [prd] in {
1266    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1267                                   WriteShuffle256Ld, _.info512, _.info128,
1268                                   IsConvertibleToThreeAddress>,
1269                                  EVEX_V512;
1270  }
1271  let Predicates = [prd, HasVLX] in {
1272    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1273                                    WriteShuffle256Ld, _.info256, _.info128,
1274                                    IsConvertibleToThreeAddress>,
1275                                 EVEX_V256;
1276    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1277                                    WriteShuffleXLd, _.info128, _.info128,
1278                                    IsConvertibleToThreeAddress>,
1279                                 EVEX_V128;
1280  }
1281}
1282
1283defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1284                                           avx512vl_i8_info, HasBWI, 0>;
1285defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1286                                           avx512vl_i16_info, HasBWI, 0>;
1287defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1288                                           avx512vl_i32_info, HasAVX512, 1>;
1289defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1290                                           avx512vl_i64_info, HasAVX512, 1>, REX_W;
1291
1292multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1293                                      SDPatternOperator OpNode,
1294                                      X86VectorVTInfo _Dst,
1295                                      X86VectorVTInfo _Src> {
1296  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1297                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1298                           (_Dst.VT (OpNode addr:$src))>,
1299                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1300                           AVX5128IBase, EVEX;
1301}
1302
1303// This should be used for the AVX512DQ broadcast instructions. It disables
1304// the unmasked patterns so that we only use the DQ instructions when masking
1305//  is requested.
1306multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1307                                         SDPatternOperator OpNode,
1308                                         X86VectorVTInfo _Dst,
1309                                         X86VectorVTInfo _Src> {
1310  let hasSideEffects = 0, mayLoad = 1 in
1311  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1312                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1313                           (null_frag),
1314                           (_Dst.VT (OpNode addr:$src))>,
1315                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1316                           AVX5128IBase, EVEX;
1317}
1318let Predicates = [HasBWI] in {
1319  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1320            (VPBROADCASTWZrm addr:$src)>;
1321
1322  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1323            (VPBROADCASTWZrr VR128X:$src)>;
1324  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1325            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1326}
1327let Predicates = [HasVLX, HasBWI] in {
1328  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1329            (VPBROADCASTWZ128rm addr:$src)>;
1330  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1331            (VPBROADCASTWZ256rm addr:$src)>;
1332
1333  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1334            (VPBROADCASTWZ128rr VR128X:$src)>;
1335  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1336            (VPBROADCASTWZ256rr VR128X:$src)>;
1337
1338  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1339            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1340  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1341            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1342}
1343
1344//===----------------------------------------------------------------------===//
1345// AVX-512 BROADCAST SUBVECTORS
1346//
1347
1348defm VBROADCASTI32X4Z : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1349                        X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1350                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1351defm VBROADCASTF32X4Z : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1352                        X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1353                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1354defm VBROADCASTI64X4Z : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1355                        X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1356                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1357defm VBROADCASTF64X4Z : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1358                        X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1359                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1360
1361let Predicates = [HasAVX512] in {
1362def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1363          (VBROADCASTF64X4Zrm addr:$src)>;
1364def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1365          (VBROADCASTF64X4Zrm addr:$src)>;
1366def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1367          (VBROADCASTF64X4Zrm addr:$src)>;
1368def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1369          (VBROADCASTI64X4Zrm addr:$src)>;
1370def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1371          (VBROADCASTI64X4Zrm addr:$src)>;
1372def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1373          (VBROADCASTI64X4Zrm addr:$src)>;
1374def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1375          (VBROADCASTI64X4Zrm addr:$src)>;
1376
1377def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1378          (VBROADCASTF32X4Zrm addr:$src)>;
1379def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1380          (VBROADCASTF32X4Zrm addr:$src)>;
1381def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1382          (VBROADCASTF32X4Zrm addr:$src)>;
1383def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1384          (VBROADCASTI32X4Zrm addr:$src)>;
1385def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1386          (VBROADCASTI32X4Zrm addr:$src)>;
1387def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1388          (VBROADCASTI32X4Zrm addr:$src)>;
1389def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1390          (VBROADCASTI32X4Zrm addr:$src)>;
1391
1392// Patterns for selects of bitcasted operations.
1393def : Pat<(vselect_mask VK16WM:$mask,
1394                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1395                        (v16f32 immAllZerosV)),
1396          (VBROADCASTF32X4Zrmkz VK16WM:$mask, addr:$src)>;
1397def : Pat<(vselect_mask VK16WM:$mask,
1398                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1399                        VR512:$src0),
1400          (VBROADCASTF32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1401def : Pat<(vselect_mask VK16WM:$mask,
1402                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1403                        (v16i32 immAllZerosV)),
1404          (VBROADCASTI32X4Zrmkz VK16WM:$mask, addr:$src)>;
1405def : Pat<(vselect_mask VK16WM:$mask,
1406                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1407                        VR512:$src0),
1408          (VBROADCASTI32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1409
1410def : Pat<(vselect_mask VK8WM:$mask,
1411                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1412                        (v8f64 immAllZerosV)),
1413          (VBROADCASTF64X4Zrmkz VK8WM:$mask, addr:$src)>;
1414def : Pat<(vselect_mask VK8WM:$mask,
1415                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1416                        VR512:$src0),
1417          (VBROADCASTF64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1418def : Pat<(vselect_mask VK8WM:$mask,
1419                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1420                        (v8i64 immAllZerosV)),
1421          (VBROADCASTI64X4Zrmkz VK8WM:$mask, addr:$src)>;
1422def : Pat<(vselect_mask VK8WM:$mask,
1423                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1424                        VR512:$src0),
1425          (VBROADCASTI64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1426}
1427
1428let Predicates = [HasVLX] in {
1429defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1430                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1431                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1432defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1433                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1434                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1435
1436def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1437          (VBROADCASTF32X4Z256rm addr:$src)>;
1438def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1439          (VBROADCASTF32X4Z256rm addr:$src)>;
1440def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1441          (VBROADCASTF32X4Z256rm addr:$src)>;
1442def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1443          (VBROADCASTI32X4Z256rm addr:$src)>;
1444def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1445          (VBROADCASTI32X4Z256rm addr:$src)>;
1446def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1447          (VBROADCASTI32X4Z256rm addr:$src)>;
1448def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1449          (VBROADCASTI32X4Z256rm addr:$src)>;
1450
1451// Patterns for selects of bitcasted operations.
1452def : Pat<(vselect_mask VK8WM:$mask,
1453                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1454                        (v8f32 immAllZerosV)),
1455          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1456def : Pat<(vselect_mask VK8WM:$mask,
1457                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1458                        VR256X:$src0),
1459          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1460def : Pat<(vselect_mask VK8WM:$mask,
1461                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1462                        (v8i32 immAllZerosV)),
1463          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1464def : Pat<(vselect_mask VK8WM:$mask,
1465                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1466                        VR256X:$src0),
1467          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1468}
1469
1470let Predicates = [HasBF16] in {
1471  def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1472            (VBROADCASTF64X4Zrm addr:$src)>;
1473  def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1474            (VBROADCASTF32X4Zrm addr:$src)>;
1475}
1476
1477let Predicates = [HasBF16, HasVLX] in
1478  def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1479            (VBROADCASTF32X4Z256rm addr:$src)>;
1480
1481let Predicates = [HasVLX, HasDQI] in {
1482defm VBROADCASTI64X2Z256 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1483                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1484                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1485defm VBROADCASTF64X2Z256 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1486                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1487                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1488
1489// Patterns for selects of bitcasted operations.
1490def : Pat<(vselect_mask VK4WM:$mask,
1491                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1492                        (v4f64 immAllZerosV)),
1493          (VBROADCASTF64X2Z256rmkz VK4WM:$mask, addr:$src)>;
1494def : Pat<(vselect_mask VK4WM:$mask,
1495                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1496                        VR256X:$src0),
1497          (VBROADCASTF64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1498def : Pat<(vselect_mask VK4WM:$mask,
1499                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1500                        (v4i64 immAllZerosV)),
1501          (VBROADCASTI64X2Z256rmkz VK4WM:$mask, addr:$src)>;
1502def : Pat<(vselect_mask VK4WM:$mask,
1503                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1504                        VR256X:$src0),
1505          (VBROADCASTI64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1506}
1507
1508let Predicates = [HasDQI] in {
1509defm VBROADCASTI64X2Z : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1510                        X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1511                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1512defm VBROADCASTI32X8Z : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1513                        X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1514                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1515defm VBROADCASTF64X2Z : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1516                        X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1517                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1518defm VBROADCASTF32X8Z : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1519                        X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1520                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1521
1522// Patterns for selects of bitcasted operations.
1523def : Pat<(vselect_mask VK16WM:$mask,
1524                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1525                        (v16f32 immAllZerosV)),
1526          (VBROADCASTF32X8Zrmkz VK16WM:$mask, addr:$src)>;
1527def : Pat<(vselect_mask VK16WM:$mask,
1528                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1529                        VR512:$src0),
1530          (VBROADCASTF32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1531def : Pat<(vselect_mask VK16WM:$mask,
1532                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1533                        (v16i32 immAllZerosV)),
1534          (VBROADCASTI32X8Zrmkz VK16WM:$mask, addr:$src)>;
1535def : Pat<(vselect_mask VK16WM:$mask,
1536                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1537                        VR512:$src0),
1538          (VBROADCASTI32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1539
1540def : Pat<(vselect_mask VK8WM:$mask,
1541                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1542                        (v8f64 immAllZerosV)),
1543          (VBROADCASTF64X2Zrmkz VK8WM:$mask, addr:$src)>;
1544def : Pat<(vselect_mask VK8WM:$mask,
1545                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1546                        VR512:$src0),
1547          (VBROADCASTF64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1548def : Pat<(vselect_mask VK8WM:$mask,
1549                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1550                        (v8i64 immAllZerosV)),
1551          (VBROADCASTI64X2Zrmkz VK8WM:$mask, addr:$src)>;
1552def : Pat<(vselect_mask VK8WM:$mask,
1553                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1554                        VR512:$src0),
1555          (VBROADCASTI64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1556}
1557
1558multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1559                                        AVX512VLVectorVTInfo _Dst,
1560                                        AVX512VLVectorVTInfo _Src> {
1561  let Predicates = [HasDQI] in
1562    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1563                                          WriteShuffle256Ld, _Dst.info512,
1564                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1565                                          EVEX_V512;
1566  let Predicates = [HasDQI, HasVLX] in
1567    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1568                                          WriteShuffle256Ld, _Dst.info256,
1569                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1570                                          EVEX_V256;
1571}
1572
1573multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1574                                         AVX512VLVectorVTInfo _Dst,
1575                                         AVX512VLVectorVTInfo _Src> :
1576  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1577
1578  let Predicates = [HasDQI, HasVLX] in
1579    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1580                                          WriteShuffleXLd, _Dst.info128,
1581                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1582                                          EVEX_V128;
1583}
1584
1585defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1586                                          avx512vl_i32_info, avx512vl_i64_info>;
1587defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1588                                          avx512vl_f32_info, avx512vl_f64_info>;
1589
1590//===----------------------------------------------------------------------===//
1591// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1592//---
1593multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1594                                  X86VectorVTInfo _, RegisterClass KRC> {
1595  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1596                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1597                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1598                  EVEX, Sched<[WriteShuffle]>;
1599}
1600
1601multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1602                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1603  let Predicates = [HasCDI] in
1604    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1605  let Predicates = [HasCDI, HasVLX] in {
1606    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1607    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1608  }
1609}
1610
1611defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1612                                               avx512vl_i32_info, VK16>;
1613defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1614                                               avx512vl_i64_info, VK8>, REX_W;
1615
1616//===----------------------------------------------------------------------===//
1617// -- VPERMI2 - 3 source operands form --
1618multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1619                         X86FoldableSchedWrite sched,
1620                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1621let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1622    hasSideEffects = 0 in {
1623  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1624          (ins _.RC:$src2, _.RC:$src3),
1625          OpcodeStr, "$src3, $src2", "$src2, $src3",
1626          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1627          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1628
1629  let mayLoad = 1 in
1630  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1631            (ins _.RC:$src2, _.MemOp:$src3),
1632            OpcodeStr, "$src3, $src2", "$src2, $src3",
1633            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1634                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1635            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1636  }
1637}
1638
1639multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1640                            X86FoldableSchedWrite sched,
1641                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1642  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1643      hasSideEffects = 0, mayLoad = 1 in
1644  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1645              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1646              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1647              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1648              (_.VT (X86VPermt2 _.RC:$src2,
1649               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1650              AVX5128IBase, EVEX, VVVV, EVEX_B,
1651              Sched<[sched.Folded, sched.ReadAfterFold]>;
1652}
1653
1654multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1655                               X86FoldableSchedWrite sched,
1656                               AVX512VLVectorVTInfo VTInfo,
1657                               AVX512VLVectorVTInfo ShuffleMask> {
1658  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1659                             ShuffleMask.info512>,
1660               avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1661                                ShuffleMask.info512>, EVEX_V512;
1662  let Predicates = [HasVLX] in {
1663  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1664                                ShuffleMask.info128>,
1665                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1666                                   ShuffleMask.info128>, EVEX_V128;
1667  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1668                                ShuffleMask.info256>,
1669                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1670                                   ShuffleMask.info256>, EVEX_V256;
1671  }
1672}
1673
1674multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1675                                  X86FoldableSchedWrite sched,
1676                                  AVX512VLVectorVTInfo VTInfo,
1677                                  AVX512VLVectorVTInfo Idx,
1678                                  Predicate Prd> {
1679  let Predicates = [Prd] in
1680  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1681                             Idx.info512>, EVEX_V512;
1682  let Predicates = [Prd, HasVLX] in {
1683  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1684                                Idx.info128>, EVEX_V128;
1685  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1686                                Idx.info256>,  EVEX_V256;
1687  }
1688}
1689
1690defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1691                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1692defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1693                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1694defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1695                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1696                  REX_W, EVEX_CD8<16, CD8VF>;
1697defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1698                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1699                  EVEX_CD8<8, CD8VF>;
1700defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1701                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1702defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1703                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1704
1705// Extra patterns to deal with extra bitcasts due to passthru and index being
1706// different types on the fp versions.
1707multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1708                                  X86VectorVTInfo IdxVT,
1709                                  X86VectorVTInfo CastVT> {
1710  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1711                                (X86VPermt2 (_.VT _.RC:$src2),
1712                                            (IdxVT.VT (bitconvert
1713                                                       (CastVT.VT _.RC:$src1))),
1714                                            _.RC:$src3),
1715                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1716            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1717                                                _.RC:$src2, _.RC:$src3)>;
1718  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1719                                (X86VPermt2 _.RC:$src2,
1720                                            (IdxVT.VT (bitconvert
1721                                                       (CastVT.VT _.RC:$src1))),
1722                                            (_.LdFrag addr:$src3)),
1723                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1724            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1725                                                _.RC:$src2, addr:$src3)>;
1726  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1727                                 (X86VPermt2 _.RC:$src2,
1728                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1729                                             (_.BroadcastLdFrag addr:$src3)),
1730                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1731            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1732                                                 _.RC:$src2, addr:$src3)>;
1733}
1734
1735// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1736defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1737defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1738defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1739
1740// VPERMT2
1741multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1742                         X86FoldableSchedWrite sched,
1743                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1744let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1745  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1746          (ins IdxVT.RC:$src2, _.RC:$src3),
1747          OpcodeStr, "$src3, $src2", "$src2, $src3",
1748          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1749          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1750
1751  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1752            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1753            OpcodeStr, "$src3, $src2", "$src2, $src3",
1754            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1755                   (_.LdFrag addr:$src3))), 1>,
1756            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1757  }
1758}
1759multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1760                            X86FoldableSchedWrite sched,
1761                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1762  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1763  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1764              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1765              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1766              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1767              (_.VT (X86VPermt2 _.RC:$src1,
1768               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1769              AVX5128IBase, EVEX, VVVV, EVEX_B,
1770              Sched<[sched.Folded, sched.ReadAfterFold]>;
1771}
1772
1773multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1774                               X86FoldableSchedWrite sched,
1775                               AVX512VLVectorVTInfo VTInfo,
1776                               AVX512VLVectorVTInfo ShuffleMask> {
1777  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1778                             ShuffleMask.info512>,
1779               avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1780                                ShuffleMask.info512>, EVEX_V512;
1781  let Predicates = [HasVLX] in {
1782  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1783                                ShuffleMask.info128>,
1784                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1785                                   ShuffleMask.info128>, EVEX_V128;
1786  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1787                                ShuffleMask.info256>,
1788                   avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1789                                    ShuffleMask.info256>, EVEX_V256;
1790  }
1791}
1792
1793multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1794                                  X86FoldableSchedWrite sched,
1795                                  AVX512VLVectorVTInfo VTInfo,
1796                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1797  let Predicates = [Prd] in
1798  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1799                             Idx.info512>, EVEX_V512;
1800  let Predicates = [Prd, HasVLX] in {
1801  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1802                                Idx.info128>, EVEX_V128;
1803  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1804                                Idx.info256>, EVEX_V256;
1805  }
1806}
1807
1808defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1809                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1810defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1811                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1812defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1813                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1814                  REX_W, EVEX_CD8<16, CD8VF>;
1815defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1816                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1817                  EVEX_CD8<8, CD8VF>;
1818defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1819                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1820defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1821                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1822
1823//===----------------------------------------------------------------------===//
1824// AVX-512 - BLEND using mask
1825//
1826
1827multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1828                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1829  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1830  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1831             (ins _.RC:$src1, _.RC:$src2),
1832             !strconcat(OpcodeStr,
1833             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1834             EVEX, VVVV, Sched<[sched]>;
1835  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1836             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1837             !strconcat(OpcodeStr,
1838             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1839             []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1840  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1841             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1842             !strconcat(OpcodeStr,
1843             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1844             []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1845  let mayLoad = 1 in {
1846  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1847             (ins _.RC:$src1, _.MemOp:$src2),
1848             !strconcat(OpcodeStr,
1849             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1850             []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1851             Sched<[sched.Folded, sched.ReadAfterFold]>;
1852  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1853             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1854             !strconcat(OpcodeStr,
1855             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1856             []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1857             Sched<[sched.Folded, sched.ReadAfterFold]>;
1858  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1859             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1860             !strconcat(OpcodeStr,
1861             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1862             []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1863             Sched<[sched.Folded, sched.ReadAfterFold]>;
1864  }
1865  }
1866}
1867multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1868                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1869  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1870  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1871      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1872       !strconcat(OpcodeStr,
1873            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1874            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1875      EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1876      Sched<[sched.Folded, sched.ReadAfterFold]>;
1877
1878  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1879      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1880       !strconcat(OpcodeStr,
1881            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1882            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1883      EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1884      Sched<[sched.Folded, sched.ReadAfterFold]>;
1885
1886  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1887      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1888       !strconcat(OpcodeStr,
1889            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1890            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1891      EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1892      Sched<[sched.Folded, sched.ReadAfterFold]>;
1893  }
1894}
1895
1896multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1897                        AVX512VLVectorVTInfo VTInfo> {
1898  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1899           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1900                                 EVEX_V512;
1901
1902  let Predicates = [HasVLX] in {
1903    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1904                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1905                                      EVEX_V256;
1906    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1907                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1908                                      EVEX_V128;
1909  }
1910}
1911
1912multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1913                        AVX512VLVectorVTInfo VTInfo> {
1914  let Predicates = [HasBWI] in
1915    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1916                               EVEX_V512;
1917
1918  let Predicates = [HasBWI, HasVLX] in {
1919    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1920                                  EVEX_V256;
1921    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1922                                  EVEX_V128;
1923  }
1924}
1925
1926defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1927                              avx512vl_f32_info>;
1928defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1929                              avx512vl_f64_info>, REX_W;
1930defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1931                              avx512vl_i32_info>;
1932defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1933                              avx512vl_i64_info>, REX_W;
1934defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1935                              avx512vl_i8_info>;
1936defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1937                              avx512vl_i16_info>, REX_W;
1938
1939//===----------------------------------------------------------------------===//
1940// Compare Instructions
1941//===----------------------------------------------------------------------===//
1942
1943// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1944
1945multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1946                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1947                             X86FoldableSchedWrite sched> {
1948  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1949                                   (outs _.KRC:$dst),
1950                                   (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1951                                   "vcmp"#_.Suffix,
1952                                   "$cc, $src2, $src1", "$src1, $src2, $cc",
1953                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1954                                   (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 0, "_Int">,
1955                                   EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1956  let mayLoad = 1 in
1957  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1958                                   (outs _.KRC:$dst),
1959                                   (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1960                                   "vcmp"#_.Suffix,
1961                                   "$cc, $src2, $src1", "$src1, $src2, $cc",
1962                                   (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1963                                       timm:$cc),
1964                                   (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1965                                       timm:$cc), 0, "_Int">, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1966                                   Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1967
1968  let Uses = [MXCSR] in
1969  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1970                                    (outs _.KRC:$dst),
1971                                    (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1972                                    "vcmp"#_.Suffix,
1973                                    "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1974                                    (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1975                                               timm:$cc),
1976                                    (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1977                                                  timm:$cc), 0, "_Int">,
1978                                    EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1979
1980  let isCodeGenOnly = 1 in {
1981    let isCommutable = 1 in
1982    def rri : AVX512Ii8<0xC2, MRMSrcReg,
1983                        (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1984                        !strconcat("vcmp", _.Suffix,
1985                                   "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1986                        [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1987                                                  _.FRC:$src2,
1988                                                  timm:$cc))]>,
1989                        EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1990    def rmi : AVX512Ii8<0xC2, MRMSrcMem,
1991                        (outs _.KRC:$dst),
1992                        (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1993                        !strconcat("vcmp", _.Suffix,
1994                                   "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1995                        [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1996                                                  (_.ScalarLdFrag addr:$src2),
1997                                                  timm:$cc))]>,
1998                        EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1999                        Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2000  }
2001}
2002
2003let Predicates = [HasAVX512] in {
2004  let ExeDomain = SSEPackedSingle in
2005  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2006                                   X86cmpms_su, X86cmpmsSAE_su,
2007                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2008  let ExeDomain = SSEPackedDouble in
2009  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2010                                   X86cmpms_su, X86cmpmsSAE_su,
2011                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2012}
2013let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2014  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2015                                   X86cmpms_su, X86cmpmsSAE_su,
2016                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2017
2018multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2019                              X86FoldableSchedWrite sched,
2020                              X86VectorVTInfo _, bit IsCommutable> {
2021  let isCommutable = IsCommutable, hasSideEffects = 0 in
2022  def rr : AVX512BI<opc, MRMSrcReg,
2023             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2024             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2025             []>, EVEX, VVVV, Sched<[sched]>;
2026  let mayLoad = 1, hasSideEffects = 0 in
2027  def rm : AVX512BI<opc, MRMSrcMem,
2028             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2029             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2030             []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2031  let isCommutable = IsCommutable, hasSideEffects = 0 in
2032  def rrk : AVX512BI<opc, MRMSrcReg,
2033              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2034              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2035                          "$dst {${mask}}, $src1, $src2}"),
2036              []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2037  let mayLoad = 1, hasSideEffects = 0 in
2038  def rmk : AVX512BI<opc, MRMSrcMem,
2039              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2040              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2041                          "$dst {${mask}}, $src1, $src2}"),
2042              []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2043}
2044
2045multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2046                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2047                                  bit IsCommutable> :
2048           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2049  let mayLoad = 1, hasSideEffects = 0 in {
2050  def rmb : AVX512BI<opc, MRMSrcMem,
2051              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2052              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2053                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2054              []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2055  def rmbk : AVX512BI<opc, MRMSrcMem,
2056               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2057                                       _.ScalarMemOp:$src2),
2058               !strconcat(OpcodeStr,
2059                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2060                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2061               []>, EVEX, VVVV, EVEX_K, EVEX_B,
2062               Sched<[sched.Folded, sched.ReadAfterFold]>;
2063  }
2064}
2065
2066multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2067                                 X86SchedWriteWidths sched,
2068                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2069                                 bit IsCommutable = 0> {
2070  let Predicates = [prd] in
2071  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2072                              VTInfo.info512, IsCommutable>, EVEX_V512;
2073
2074  let Predicates = [prd, HasVLX] in {
2075    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2076                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2077    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2078                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2079  }
2080}
2081
2082multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2083                                     X86SchedWriteWidths sched,
2084                                     AVX512VLVectorVTInfo VTInfo,
2085                                     Predicate prd, bit IsCommutable = 0> {
2086  let Predicates = [prd] in
2087  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2088                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2089
2090  let Predicates = [prd, HasVLX] in {
2091    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2092                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2093    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2094                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2095  }
2096}
2097
2098// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2099// increase the pattern complexity the way an immediate would.
2100let AddedComplexity = 2 in {
2101// FIXME: Is there a better scheduler class for VPCMP?
2102defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2103                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2104                EVEX_CD8<8, CD8VF>, WIG;
2105
2106defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2107                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2108                EVEX_CD8<16, CD8VF>, WIG;
2109
2110defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2111                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2112                EVEX_CD8<32, CD8VF>;
2113
2114defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2115                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2116                T8, REX_W, EVEX_CD8<64, CD8VF>;
2117
2118defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2119                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2120                EVEX_CD8<8, CD8VF>, WIG;
2121
2122defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2123                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2124                EVEX_CD8<16, CD8VF>, WIG;
2125
2126defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2127                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2128                EVEX_CD8<32, CD8VF>;
2129
2130defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2131                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2132                T8, REX_W, EVEX_CD8<64, CD8VF>;
2133}
2134
2135multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2136                          PatFrag Frag_su,
2137                          X86FoldableSchedWrite sched,
2138                          X86VectorVTInfo _, string Name> {
2139  let isCommutable = 1 in
2140  def rri : AVX512AIi8<opc, MRMSrcReg,
2141             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2142             !strconcat("vpcmp", Suffix,
2143                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2144             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2145                                                (_.VT _.RC:$src2),
2146                                                cond)))]>,
2147             EVEX, VVVV, Sched<[sched]>;
2148  def rmi : AVX512AIi8<opc, MRMSrcMem,
2149             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2150             !strconcat("vpcmp", Suffix,
2151                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2152             [(set _.KRC:$dst, (_.KVT
2153                                (Frag:$cc
2154                                 (_.VT _.RC:$src1),
2155                                 (_.VT (_.LdFrag addr:$src2)),
2156                                 cond)))]>,
2157             EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2158  let isCommutable = 1 in
2159  def rrik : AVX512AIi8<opc, MRMSrcReg,
2160              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2161                                      u8imm:$cc),
2162              !strconcat("vpcmp", Suffix,
2163                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2164                         "$dst {${mask}}, $src1, $src2, $cc}"),
2165              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2166                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2167                                                         (_.VT _.RC:$src2),
2168                                                         cond))))]>,
2169              EVEX, VVVV, EVEX_K, Sched<[sched]>;
2170  def rmik : AVX512AIi8<opc, MRMSrcMem,
2171              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2172                                    u8imm:$cc),
2173              !strconcat("vpcmp", Suffix,
2174                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2175                         "$dst {${mask}}, $src1, $src2, $cc}"),
2176              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2177                                     (_.KVT
2178                                      (Frag_su:$cc
2179                                       (_.VT _.RC:$src1),
2180                                       (_.VT (_.LdFrag addr:$src2)),
2181                                       cond))))]>,
2182              EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2183
2184  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2185                             (_.VT _.RC:$src1), cond)),
2186            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2187             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2188
2189  def : Pat<(and _.KRCWM:$mask,
2190                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2191                                     (_.VT _.RC:$src1), cond))),
2192            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2193             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2194             (X86pcmpm_imm_commute $cc))>;
2195}
2196
2197multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2198                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2199                              X86VectorVTInfo _, string Name> :
2200           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2201  def rmbi : AVX512AIi8<opc, MRMSrcMem,
2202             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2203                                     u8imm:$cc),
2204             !strconcat("vpcmp", Suffix,
2205                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2206                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2207             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2208                                       (_.VT _.RC:$src1),
2209                                       (_.BroadcastLdFrag addr:$src2),
2210                                       cond)))]>,
2211             EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2212  def rmbik : AVX512AIi8<opc, MRMSrcMem,
2213              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2214                                       _.ScalarMemOp:$src2, u8imm:$cc),
2215              !strconcat("vpcmp", Suffix,
2216                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2217                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2218              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2219                                     (_.KVT (Frag_su:$cc
2220                                             (_.VT _.RC:$src1),
2221                                             (_.BroadcastLdFrag addr:$src2),
2222                                             cond))))]>,
2223              EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2224
2225  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2226                    (_.VT _.RC:$src1), cond)),
2227            (!cast<Instruction>(Name#_.ZSuffix#"rmbi")
2228             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2229
2230  def : Pat<(and _.KRCWM:$mask,
2231                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2232                                     (_.VT _.RC:$src1), cond))),
2233            (!cast<Instruction>(Name#_.ZSuffix#"rmbik")
2234             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2235             (X86pcmpm_imm_commute $cc))>;
2236}
2237
2238multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2239                             PatFrag Frag_su, X86SchedWriteWidths sched,
2240                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2241  let Predicates = [prd] in
2242  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2243                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2244
2245  let Predicates = [prd, HasVLX] in {
2246    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2247                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2248    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2249                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2250  }
2251}
2252
2253multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2254                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2255                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2256  let Predicates = [prd] in
2257  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2258                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2259
2260  let Predicates = [prd, HasVLX] in {
2261    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2262                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2263    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2264                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2265  }
2266}
2267
2268// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2269defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2270                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2271                                EVEX_CD8<8, CD8VF>;
2272defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2273                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2274                                 EVEX_CD8<8, CD8VF>;
2275
2276defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2277                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2278                                REX_W, EVEX_CD8<16, CD8VF>;
2279defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2280                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2281                                 REX_W, EVEX_CD8<16, CD8VF>;
2282
2283defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2284                                    SchedWriteVecALU, avx512vl_i32_info,
2285                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2286defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2287                                     SchedWriteVecALU, avx512vl_i32_info,
2288                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2289
2290defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2291                                    SchedWriteVecALU, avx512vl_i64_info,
2292                                    HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2293defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2294                                     SchedWriteVecALU, avx512vl_i64_info,
2295                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2296
2297multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2298                              string Name> {
2299let Uses = [MXCSR], mayRaiseFPException = 1 in {
2300  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2301                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2302                   "vcmp"#_.Suffix,
2303                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2304                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2305                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2306                   1>, Sched<[sched]>;
2307
2308  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2309                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2310                "vcmp"#_.Suffix,
2311                "$cc, $src2, $src1", "$src1, $src2, $cc",
2312                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2313                             timm:$cc),
2314                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2315                            timm:$cc)>,
2316                Sched<[sched.Folded, sched.ReadAfterFold]>;
2317
2318  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2319                (outs _.KRC:$dst),
2320                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2321                "vcmp"#_.Suffix,
2322                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2323                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2324                (X86any_cmpm (_.VT _.RC:$src1),
2325                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2326                             timm:$cc),
2327                (X86cmpm_su (_.VT _.RC:$src1),
2328                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2329                            timm:$cc)>,
2330                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2331  }
2332
2333  // Patterns for selecting with loads in other operand.
2334  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2335                         timm:$cc),
2336            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2337                                                      (X86cmpm_imm_commute timm:$cc))>;
2338
2339  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2340                                            (_.VT _.RC:$src1),
2341                                            timm:$cc)),
2342            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2343                                                       _.RC:$src1, addr:$src2,
2344                                                       (X86cmpm_imm_commute timm:$cc))>;
2345
2346  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2347                         (_.VT _.RC:$src1), timm:$cc),
2348            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2349                                                       (X86cmpm_imm_commute timm:$cc))>;
2350
2351  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2352                                            (_.VT _.RC:$src1),
2353                                            timm:$cc)),
2354            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2355                                                        _.RC:$src1, addr:$src2,
2356                                                        (X86cmpm_imm_commute timm:$cc))>;
2357
2358  // Patterns for mask intrinsics.
2359  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2360                      (_.KVT immAllOnesV)),
2361            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2362
2363  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2364            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2365                                                       _.RC:$src2, timm:$cc)>;
2366
2367  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2368                      (_.KVT immAllOnesV)),
2369            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2370
2371  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2372                      _.KRCWM:$mask),
2373            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2374                                                       addr:$src2, timm:$cc)>;
2375
2376  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2377                      (_.KVT immAllOnesV)),
2378            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2379
2380  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2381                      _.KRCWM:$mask),
2382            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2383                                                        addr:$src2, timm:$cc)>;
2384
2385  // Patterns for mask intrinsics with loads in other operand.
2386  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2387                      (_.KVT immAllOnesV)),
2388            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2389                                                      (X86cmpm_imm_commute timm:$cc))>;
2390
2391  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2392                      _.KRCWM:$mask),
2393            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2394                                                       _.RC:$src1, addr:$src2,
2395                                                       (X86cmpm_imm_commute timm:$cc))>;
2396
2397  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2398                      (_.KVT immAllOnesV)),
2399            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2400                                                       (X86cmpm_imm_commute timm:$cc))>;
2401
2402  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2403                      _.KRCWM:$mask),
2404            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2405                                                        _.RC:$src1, addr:$src2,
2406                                                        (X86cmpm_imm_commute  timm:$cc))>;
2407}
2408
2409multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2410  // comparison code form (VCMP[EQ/LT/LE/...]
2411  let Uses = [MXCSR] in
2412  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2413                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2414                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2415                     "vcmp"#_.Suffix,
2416                     "$cc, {sae}, $src2, $src1",
2417                     "$src1, $src2, {sae}, $cc",
2418                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2419                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2420                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2421                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2422                     EVEX_B, Sched<[sched]>;
2423}
2424
2425multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2426                       Predicate Pred = HasAVX512> {
2427  let Predicates = [Pred] in {
2428    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2429                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2430
2431  }
2432  let Predicates = [Pred,HasVLX] in {
2433   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2434   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2435  }
2436}
2437
2438defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2439                          AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2440defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2441                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2442defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2443                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2444
2445// Patterns to select fp compares with load as first operand.
2446let Predicates = [HasAVX512] in {
2447  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2448            (VCMPSDZrmi FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2449
2450  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2451            (VCMPSSZrmi FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2452}
2453
2454let Predicates = [HasFP16] in {
2455  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2456            (VCMPSHZrmi FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2457}
2458
2459// ----------------------------------------------------------------
2460// FPClass
2461
2462//handle fpclass instruction  mask =  op(reg_scalar,imm)
2463//                                    op(mem_scalar,imm)
2464multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2465                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2466                                 Predicate prd> {
2467  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2468      def ri : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2469                      (ins _.RC:$src1, i32u8imm:$src2),
2470                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2471                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2472                              (i32 timm:$src2)))]>,
2473                      Sched<[sched]>;
2474      def rik : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2475                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2476                      OpcodeStr#_.Suffix#
2477                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2478                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2479                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2480                                      (i32 timm:$src2))))]>,
2481                      EVEX_K, Sched<[sched]>;
2482    def mi : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2483                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2484                    OpcodeStr#_.Suffix#
2485                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2486                    [(set _.KRC:$dst,
2487                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2488                                        (i32 timm:$src2)))]>,
2489                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2490    def mik : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2491                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2492                    OpcodeStr#_.Suffix#
2493                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2494                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2495                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2496                            (i32 timm:$src2))))]>,
2497                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2498  }
2499}
2500
2501//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2502//                                  fpclass(reg_vec, mem_vec, imm)
2503//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2504multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2505                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2506                                 string mem, list<Register> _Uses = [MXCSR]>{
2507  let ExeDomain = _.ExeDomain, Uses = _Uses in {
2508  def ri : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2509                      (ins _.RC:$src1, i32u8imm:$src2),
2510                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2511                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2512                                       (i32 timm:$src2)))]>,
2513                      Sched<[sched]>;
2514  def rik : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2515                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2516                      OpcodeStr#_.Suffix#
2517                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2518                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2519                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2520                                       (i32 timm:$src2))))]>,
2521                      EVEX_K, Sched<[sched]>;
2522  def mi : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2523                    (ins _.MemOp:$src1, i32u8imm:$src2),
2524                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2525                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2526                    [(set _.KRC:$dst,(X86Vfpclass
2527                                     (_.VT (_.LdFrag addr:$src1)),
2528                                     (i32 timm:$src2)))]>,
2529                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2530  def mik : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2531                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2532                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2533                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2534                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2535                                  (_.VT (_.LdFrag addr:$src1)),
2536                                  (i32 timm:$src2))))]>,
2537                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2538  def mbi : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2539                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2540                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2541                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2542                                                  #_.BroadcastStr#", $src2}",
2543                    [(set _.KRC:$dst,(X86Vfpclass
2544                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2545                                     (i32 timm:$src2)))]>,
2546                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2547  def mbik : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2548                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2549                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2550                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2551                                                   _.BroadcastStr#", $src2}",
2552                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2553                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2554                                     (i32 timm:$src2))))]>,
2555                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2556  }
2557
2558  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2559  // the memory form.
2560  def : InstAlias<OpcodeStr#_.Suffix#mem#
2561                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2562                  (!cast<Instruction>(NAME#"ri")
2563                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2564  def : InstAlias<OpcodeStr#_.Suffix#mem#
2565                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2566                  (!cast<Instruction>(NAME#"rik")
2567                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2568  def : InstAlias<OpcodeStr#_.Suffix#mem#
2569                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2570                  _.BroadcastStr#", $src2}",
2571                  (!cast<Instruction>(NAME#"mbi")
2572                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2573  def : InstAlias<OpcodeStr#_.Suffix#mem#
2574                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2575                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2576                  (!cast<Instruction>(NAME#"mbik")
2577                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2578}
2579
2580multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2581                                     bits<8> opc, X86SchedWriteWidths sched,
2582                                     Predicate prd>{
2583  let Predicates = [prd] in {
2584    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2585                                      _.info512, "z">, EVEX_V512;
2586  }
2587  let Predicates = [prd, HasVLX] in {
2588    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2589                                      _.info128, "x">, EVEX_V128;
2590    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2591                                      _.info256, "y">, EVEX_V256;
2592  }
2593}
2594
2595multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2596                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2597  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2598                                      sched, HasFP16>,
2599                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2600  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2601                                   sched.Scl, f16x_info, HasFP16>,
2602                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2603  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2604                                      sched, HasDQI>,
2605                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2606  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2607                                      sched, HasDQI>,
2608                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2609  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2610                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2611                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2612  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2613                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2614                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2615}
2616
2617defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2618
2619//-----------------------------------------------------------------
2620// Mask register copy, including
2621// - copy between mask registers
2622// - load/store mask registers
2623// - copy from GPR to mask register and vice versa
2624//
2625multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2626                          string OpcodeStr, RegisterClass KRC, ValueType vvt,
2627                          X86MemOperand x86memop, string Suffix = ""> {
2628  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2629      explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2630    def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2631                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2632                    Sched<[WriteMove]>;
2633  def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2634                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2635                    [(set KRC:$dst, (vvt (load addr:$src)))]>,
2636                  Sched<[WriteLoad]>, NoCD8;
2637  def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2638                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2639                    [(store KRC:$src, addr:$dst)]>,
2640                  Sched<[WriteStore]>, NoCD8;
2641}
2642
2643multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2644                               string OpcodeStr, RegisterClass KRC,
2645                               RegisterClass GRC, string Suffix = ""> {
2646  let hasSideEffects = 0 in {
2647    def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2648                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2649                    Sched<[WriteMove]>;
2650    def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2651                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2652                    Sched<[WriteMove]>;
2653  }
2654}
2655
2656let Predicates = [HasDQI, NoEGPR] in
2657  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2658               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2659               VEX, TB, PD;
2660let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2661  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2662               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2663               EVEX, TB, PD;
2664
2665let Predicates = [HasAVX512, NoEGPR] in
2666  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2667               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2668               VEX, TB;
2669let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2670  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2671               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2672               EVEX, TB;
2673
2674let Predicates = [HasBWI, NoEGPR] in {
2675  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2676               VEX, TB, PD, REX_W;
2677  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2678               VEX, TB, XD;
2679  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2680               VEX, TB, REX_W;
2681  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2682               VEX, TB, XD, REX_W;
2683}
2684let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2685  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2686               EVEX, TB, PD, REX_W;
2687  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2688               EVEX, TB, XD;
2689  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2690               EVEX, TB, REX_W;
2691  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2692               EVEX, TB, XD, REX_W;
2693}
2694
2695// GR from/to mask register
2696def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2697          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2698def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2699          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2700def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2701          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2702
2703def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2704          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2705def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2706          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2707
2708def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2709          (KMOVWrk VK16:$src)>;
2710def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2711          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2712def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2713          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2714def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2715          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2716
2717def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2718          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2719def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2720          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2721def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2722          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2723def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2724          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2725
2726def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2727          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2728def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2729          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2730def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2731          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2732def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2733          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2734
2735// Load/store kreg
2736let Predicates = [HasDQI] in {
2737  def : Pat<(v1i1 (load addr:$src)),
2738            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2739  def : Pat<(v2i1 (load addr:$src)),
2740            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2741  def : Pat<(v4i1 (load addr:$src)),
2742            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2743}
2744
2745let Predicates = [HasAVX512] in {
2746  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2747            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2748  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2749            (KMOVWkm addr:$src)>;
2750}
2751
2752def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2753                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2754                                              SDTCVecEltisVT<1, i1>,
2755                                              SDTCisPtrTy<2>]>>;
2756
2757let Predicates = [HasAVX512] in {
2758  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2759    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2760              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2761
2762    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2763              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2764
2765    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2766              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2767
2768    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2769              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2770  }
2771
2772  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2773  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2774  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2775  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2776  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2777  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2778  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2779
2780  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2781                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2782            (KMOVWkr (AND32ri
2783                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2784                      (i32 1)))>;
2785}
2786
2787// Mask unary operation
2788// - KNOT
2789multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2790                            RegisterClass KRC, SDPatternOperator OpNode,
2791                            X86FoldableSchedWrite sched, Predicate prd> {
2792  let Predicates = [prd] in
2793    def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2794               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2795               [(set KRC:$dst, (OpNode KRC:$src))]>,
2796               Sched<[sched]>;
2797}
2798
2799multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2800                                SDPatternOperator OpNode,
2801                                X86FoldableSchedWrite sched> {
2802  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2803                            sched, HasDQI>, VEX, TB, PD;
2804  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2805                            sched, HasAVX512>, VEX, TB;
2806  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2807                            sched, HasBWI>, VEX, TB, PD, REX_W;
2808  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2809                            sched, HasBWI>, VEX, TB, REX_W;
2810}
2811
2812// TODO - do we need a X86SchedWriteWidths::KMASK type?
2813defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2814
2815// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2816let Predicates = [HasAVX512, NoDQI] in
2817def : Pat<(vnot VK8:$src),
2818          (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2819
2820def : Pat<(vnot VK4:$src),
2821          (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2822def : Pat<(vnot VK2:$src),
2823          (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2824def : Pat<(vnot VK1:$src),
2825          (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2826
2827// Mask binary operation
2828// - KAND, KANDN, KOR, KXNOR, KXOR
2829multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2830                           RegisterClass KRC, SDPatternOperator OpNode,
2831                           X86FoldableSchedWrite sched, Predicate prd,
2832                           bit IsCommutable> {
2833  let Predicates = [prd], isCommutable = IsCommutable in
2834    def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2835               !strconcat(OpcodeStr,
2836                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2837               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2838               Sched<[sched]>;
2839}
2840
2841multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2842                                 SDPatternOperator OpNode,
2843                                 X86FoldableSchedWrite sched, bit IsCommutable,
2844                                 Predicate prdW = HasAVX512> {
2845  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2846                             sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2847  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2848                             sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2849  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2850                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2851  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2852                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2853}
2854
2855// TODO - do we need a X86SchedWriteWidths::KMASK type?
2856defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2857defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2858defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2859defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2860defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2861defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2862
2863multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2864                            Instruction Inst> {
2865  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2866  // for the DQI set, this type is legal and KxxxB instruction is used
2867  let Predicates = [NoDQI] in
2868  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2869            (COPY_TO_REGCLASS
2870              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2871                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2872
2873  // All types smaller than 8 bits require conversion anyway
2874  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2875        (COPY_TO_REGCLASS (Inst
2876                           (COPY_TO_REGCLASS VK1:$src1, VK16),
2877                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2878  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2879        (COPY_TO_REGCLASS (Inst
2880                           (COPY_TO_REGCLASS VK2:$src1, VK16),
2881                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2882  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2883        (COPY_TO_REGCLASS (Inst
2884                           (COPY_TO_REGCLASS VK4:$src1, VK16),
2885                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2886}
2887
2888defm : avx512_binop_pat<and,   KANDWkk>;
2889defm : avx512_binop_pat<vandn, KANDNWkk>;
2890defm : avx512_binop_pat<or,    KORWkk>;
2891defm : avx512_binop_pat<vxnor, KXNORWkk>;
2892defm : avx512_binop_pat<xor,   KXORWkk>;
2893
2894// Mask unpacking
2895multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2896                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2897                             Predicate prd> {
2898  let Predicates = [prd] in {
2899    let hasSideEffects = 0 in
2900    def kk : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2901               (ins Src.KRC:$src1, Src.KRC:$src2),
2902               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2903               VEX, VVVV, VEX_L, Sched<[sched]>;
2904
2905    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2906              (!cast<Instruction>(NAME#kk) Src.KRC:$src2, Src.KRC:$src1)>;
2907  }
2908}
2909
2910defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, TB, PD;
2911defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2912defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2913
2914// Mask bit testing
2915multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2916                              SDNode OpNode, X86FoldableSchedWrite sched,
2917                              Predicate prd> {
2918  let Predicates = [prd], Defs = [EFLAGS] in
2919    def kk : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2920               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2921               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2922               Sched<[sched]>;
2923}
2924
2925multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2926                                X86FoldableSchedWrite sched,
2927                                Predicate prdW = HasAVX512> {
2928  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2929                                                                VEX, TB, PD;
2930  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2931                                                                VEX, TB;
2932  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2933                                                                VEX, TB, REX_W;
2934  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2935                                                                VEX, TB, PD, REX_W;
2936}
2937
2938// TODO - do we need a X86SchedWriteWidths::KMASK type?
2939defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2940defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2941
2942// Mask shift
2943multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2944                               SDNode OpNode, X86FoldableSchedWrite sched> {
2945  let Predicates = [HasAVX512] in
2946    def ki : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2947                 !strconcat(OpcodeStr,
2948                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2949                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2950                 Sched<[sched]>;
2951}
2952
2953multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2954                                 SDNode OpNode, X86FoldableSchedWrite sched> {
2955  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2956                               sched>, VEX, TA, PD, REX_W;
2957  let Predicates = [HasDQI] in
2958  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2959                               sched>, VEX, TA, PD;
2960  let Predicates = [HasBWI] in {
2961  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2962                               sched>, VEX, TA, PD, REX_W;
2963  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2964                               sched>, VEX, TA, PD;
2965  }
2966}
2967
2968defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2969defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2970
2971// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2972multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2973                                                 string InstStr,
2974                                                 X86VectorVTInfo Narrow,
2975                                                 X86VectorVTInfo Wide> {
2976def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2977                                (Narrow.VT Narrow.RC:$src2), cond)),
2978          (COPY_TO_REGCLASS
2979           (!cast<Instruction>(InstStr#"Zrri")
2980            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2981            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2982            (X86pcmpm_imm $cc)), Narrow.KRC)>;
2983
2984def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2985                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2986                                                    (Narrow.VT Narrow.RC:$src2),
2987                                                    cond)))),
2988          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2989           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2990           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2991           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2992           (X86pcmpm_imm $cc)), Narrow.KRC)>;
2993}
2994
2995multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2996                                                     string InstStr,
2997                                                     X86VectorVTInfo Narrow,
2998                                                     X86VectorVTInfo Wide> {
2999// Broadcast load.
3000def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3001                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3002          (COPY_TO_REGCLASS
3003           (!cast<Instruction>(InstStr#"Zrmbi")
3004            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3005            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3006
3007def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3008                           (Narrow.KVT
3009                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3010                                         (Narrow.BroadcastLdFrag addr:$src2),
3011                                         cond)))),
3012          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3013           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3014           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3015           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3016
3017// Commuted with broadcast load.
3018def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3019                                (Narrow.VT Narrow.RC:$src1),
3020                                cond)),
3021          (COPY_TO_REGCLASS
3022           (!cast<Instruction>(InstStr#"Zrmbi")
3023            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3024            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3025
3026def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3027                           (Narrow.KVT
3028                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3029                                         (Narrow.VT Narrow.RC:$src1),
3030                                         cond)))),
3031          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3032           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3033           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3034           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3035}
3036
3037// Same as above, but for fp types which don't use PatFrags.
3038multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3039                                                X86VectorVTInfo Narrow,
3040                                                X86VectorVTInfo Wide> {
3041def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3042                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3043          (COPY_TO_REGCLASS
3044           (!cast<Instruction>(InstStr#"Zrri")
3045            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3046            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3047            timm:$cc), Narrow.KRC)>;
3048
3049def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3050                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3051                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3052          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3053           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3054           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3055           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3056           timm:$cc), Narrow.KRC)>;
3057
3058// Broadcast load.
3059def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3060                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3061          (COPY_TO_REGCLASS
3062           (!cast<Instruction>(InstStr#"Zrmbi")
3063            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3064            addr:$src2, timm:$cc), Narrow.KRC)>;
3065
3066def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3067                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3068                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3069          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3070           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3071           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3072           addr:$src2, timm:$cc), Narrow.KRC)>;
3073
3074// Commuted with broadcast load.
3075def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3076                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3077          (COPY_TO_REGCLASS
3078           (!cast<Instruction>(InstStr#"Zrmbi")
3079            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3080            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3081
3082def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3083                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3084                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3085          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3086           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3087           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3088           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3089}
3090
3091let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3092  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3093  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3094
3095  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3096  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3097
3098  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3099  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3100
3101  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3102  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3103
3104  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3105  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3106
3107  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3108  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3109
3110  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3111  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3112
3113  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3114  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3115
3116  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3117  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3118  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3119  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3120}
3121
3122let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3123  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3124  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3125
3126  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3127  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3128
3129  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3130  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3131
3132  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3133  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3134}
3135
3136// Mask setting all 0s or 1s
3137multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3138  let Predicates = [HasAVX512] in
3139    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3140        SchedRW = [WriteZero] in
3141      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3142                     [(set KRC:$dst, (VT Val))]>;
3143}
3144
3145multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3146  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3147  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3148  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3149}
3150
3151defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3152defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3153
3154// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3155let Predicates = [HasAVX512] in {
3156  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3157  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3158  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3159  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3160  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3161  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3162  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3163  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3164}
3165
3166// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3167multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3168                                             RegisterClass RC, ValueType VT> {
3169  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3170            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3171
3172  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3173            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3174}
3175defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3176defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3177defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3178defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3179defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3180defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3181
3182defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3183defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3184defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3185defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3186defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3187
3188defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3189defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3190defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3191defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3192
3193defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3194defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3195defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3196
3197defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3198defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3199
3200defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3201
3202//===----------------------------------------------------------------------===//
3203// AVX-512 - Aligned and unaligned load and store
3204//
3205
3206multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3207                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3208                       X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3209                       SDPatternOperator SelectOprr = vselect> {
3210  let hasSideEffects = 0 in {
3211  let isMoveReg = 1 in
3212  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3213                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3214                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3215  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3216                      (ins _.KRCWM:$mask,  _.RC:$src),
3217                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3218                       "${dst} {${mask}} {z}, $src}"),
3219                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3220                                           (_.VT _.RC:$src),
3221                                           _.ImmAllZerosV)))], _.ExeDomain>,
3222                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3223
3224  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3225  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3226                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3227                    !if(NoRMPattern, [],
3228                        [(set _.RC:$dst,
3229                          (_.VT (ld_frag addr:$src)))]),
3230                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3231
3232  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3233    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3234                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3235                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3236                      "${dst} {${mask}}, $src1}"),
3237                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3238                                          (_.VT _.RC:$src1),
3239                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3240                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3241    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3242                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3243                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3244                      "${dst} {${mask}}, $src1}"),
3245                     [(set _.RC:$dst, (_.VT
3246                         (vselect_mask _.KRCWM:$mask,
3247                          (_.VT (ld_frag addr:$src1)),
3248                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3249                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3250  }
3251  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3252                  (ins _.KRCWM:$mask, _.MemOp:$src),
3253                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3254                                "${dst} {${mask}} {z}, $src}",
3255                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3256                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3257                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3258  }
3259  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3260            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3261
3262  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3263            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3264
3265  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3266            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3267             _.KRCWM:$mask, addr:$ptr)>;
3268}
3269
3270multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3271                                 AVX512VLVectorVTInfo _, Predicate prd,
3272                                 X86SchedWriteMoveLSWidths Sched,
3273                                 bit NoRMPattern = 0> {
3274  let Predicates = [prd] in
3275  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3276                       _.info512.AlignedLdFrag, masked_load_aligned,
3277                       Sched.ZMM, NoRMPattern>, EVEX_V512;
3278
3279  let Predicates = [prd, HasVLX] in {
3280  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3281                          _.info256.AlignedLdFrag, masked_load_aligned,
3282                          Sched.YMM, NoRMPattern>, EVEX_V256;
3283  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3284                          _.info128.AlignedLdFrag, masked_load_aligned,
3285                          Sched.XMM, NoRMPattern>, EVEX_V128;
3286  }
3287}
3288
3289multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3290                          AVX512VLVectorVTInfo _, Predicate prd,
3291                          X86SchedWriteMoveLSWidths Sched,
3292                          bit NoRMPattern = 0,
3293                          SDPatternOperator SelectOprr = vselect> {
3294  let Predicates = [prd] in
3295  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3296                       masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3297
3298  let Predicates = [prd, HasVLX] in {
3299  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3300                         masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3301  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3302                         masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3303  }
3304}
3305
3306multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3307                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3308                        X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3309  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3310  let isMoveReg = 1 in
3311  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3312                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3313                         [], _.ExeDomain>, EVEX,
3314                         Sched<[Sched.RR]>;
3315  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3316                         (ins _.KRCWM:$mask, _.RC:$src),
3317                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3318                         "${dst} {${mask}}, $src}",
3319                         [], _.ExeDomain>,  EVEX, EVEX_K,
3320                         Sched<[Sched.RR]>;
3321  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3322                          (ins _.KRCWM:$mask, _.RC:$src),
3323                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3324                          "${dst} {${mask}} {z}, $src}",
3325                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3326                          Sched<[Sched.RR]>;
3327  }
3328
3329  let hasSideEffects = 0, mayStore = 1 in
3330  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3331                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3332                    !if(NoMRPattern, [],
3333                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3334                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3335  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3336                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3337              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3338               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3339
3340  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3341           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3342                                                        _.KRCWM:$mask, _.RC:$src)>;
3343
3344  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3345                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3346                   _.RC:$dst, _.RC:$src), 0>;
3347  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3348                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3349                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3350  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3351                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3352                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3353}
3354
3355multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3356                            AVX512VLVectorVTInfo _, Predicate prd,
3357                            X86SchedWriteMoveLSWidths Sched,
3358                            bit NoMRPattern = 0> {
3359  let Predicates = [prd] in
3360  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3361                        masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3362  let Predicates = [prd, HasVLX] in {
3363    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3364                             masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3365    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3366                             masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3367  }
3368}
3369
3370multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3371                                  AVX512VLVectorVTInfo _, Predicate prd,
3372                                  X86SchedWriteMoveLSWidths Sched,
3373                                  bit NoMRPattern = 0> {
3374  let Predicates = [prd] in
3375  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3376                        masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3377
3378  let Predicates = [prd, HasVLX] in {
3379    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3380                             masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3381    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3382                             masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3383  }
3384}
3385
3386defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3387                                     HasAVX512, SchedWriteFMoveLS>,
3388               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3389                                      HasAVX512, SchedWriteFMoveLS>,
3390               TB, EVEX_CD8<32, CD8VF>;
3391
3392defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3393                                     HasAVX512, SchedWriteFMoveLS>,
3394               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3395                                      HasAVX512, SchedWriteFMoveLS>,
3396               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3397
3398defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3399                              SchedWriteFMoveLS, 0, null_frag>,
3400               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3401                               SchedWriteFMoveLS>,
3402                               TB, EVEX_CD8<32, CD8VF>;
3403
3404defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3405                              SchedWriteFMoveLS, 0, null_frag>,
3406               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3407                               SchedWriteFMoveLS>,
3408               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3409
3410defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3411                                       HasAVX512, SchedWriteVecMoveLS, 1>,
3412                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3413                                        HasAVX512, SchedWriteVecMoveLS, 1>,
3414                 TB, PD, EVEX_CD8<32, CD8VF>;
3415
3416defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3417                                       HasAVX512, SchedWriteVecMoveLS>,
3418                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3419                                        HasAVX512, SchedWriteVecMoveLS>,
3420                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3421
3422defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3423                               SchedWriteVecMoveLS, 1>,
3424                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3425                                SchedWriteVecMoveLS, 1>,
3426                TB, XD, EVEX_CD8<8, CD8VF>;
3427
3428defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3429                                SchedWriteVecMoveLS, 1>,
3430                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3431                                 SchedWriteVecMoveLS, 1>,
3432                 TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3433
3434defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3435                                SchedWriteVecMoveLS, 1, null_frag>,
3436                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3437                                 SchedWriteVecMoveLS, 1>,
3438                 TB, XS, EVEX_CD8<32, CD8VF>;
3439
3440defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3441                                SchedWriteVecMoveLS, 0, null_frag>,
3442                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3443                                 SchedWriteVecMoveLS>,
3444                 TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3445
3446// Special instructions to help with spilling when we don't have VLX. We need
3447// to load or store from a ZMM register instead. These are converted in
3448// expandPostRAPseudos.
3449let isReMaterializable = 1, canFoldAsLoad = 1,
3450    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3451def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3452                            "", []>, Sched<[WriteFLoadX]>;
3453def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3454                            "", []>, Sched<[WriteFLoadY]>;
3455def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3456                            "", []>, Sched<[WriteFLoadX]>;
3457def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3458                            "", []>, Sched<[WriteFLoadY]>;
3459}
3460
3461let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3462def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3463                            "", []>, Sched<[WriteFStoreX]>;
3464def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3465                            "", []>, Sched<[WriteFStoreY]>;
3466def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3467                            "", []>, Sched<[WriteFStoreX]>;
3468def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3469                            "", []>, Sched<[WriteFStoreY]>;
3470}
3471
3472def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3473                          (v8i64 VR512:$src))),
3474   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$mask, VK16)),
3475                                              VK8), VR512:$src)>;
3476
3477def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3478                           (v16i32 VR512:$src))),
3479                  (VMOVDQA32Zrrkz (KNOTWkk VK16WM:$mask), VR512:$src)>;
3480
3481// These patterns exist to prevent the above patterns from introducing a second
3482// mask inversion when one already exists.
3483def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3484                          (v8i64 immAllZerosV),
3485                          (v8i64 VR512:$src))),
3486                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3487def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3488                           (v16i32 immAllZerosV),
3489                           (v16i32 VR512:$src))),
3490                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3491
3492multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3493                              X86VectorVTInfo Wide> {
3494 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3495                               Narrow.RC:$src1, Narrow.RC:$src0)),
3496           (EXTRACT_SUBREG
3497            (Wide.VT
3498             (!cast<Instruction>(InstrStr#"rrk")
3499              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3500              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3501              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3502            Narrow.SubRegIdx)>;
3503
3504 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3505                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3506           (EXTRACT_SUBREG
3507            (Wide.VT
3508             (!cast<Instruction>(InstrStr#"rrkz")
3509              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3510              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3511            Narrow.SubRegIdx)>;
3512}
3513
3514// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3515// available. Use a 512-bit operation and extract.
3516let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3517  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3518  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3519  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3520  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3521
3522  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3523  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3524  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3525  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3526}
3527
3528let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3529  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3530  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3531
3532  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3533  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3534
3535  defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3536  defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3537
3538  defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3539  defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3540}
3541
3542let Predicates = [HasAVX512] in {
3543  // 512-bit load.
3544  def : Pat<(alignedloadv16i32 addr:$src),
3545            (VMOVDQA64Zrm addr:$src)>;
3546  def : Pat<(alignedloadv32i16 addr:$src),
3547            (VMOVDQA64Zrm addr:$src)>;
3548  def : Pat<(alignedloadv32f16 addr:$src),
3549            (VMOVAPSZrm addr:$src)>;
3550  def : Pat<(alignedloadv32bf16 addr:$src),
3551            (VMOVAPSZrm addr:$src)>;
3552  def : Pat<(alignedloadv64i8 addr:$src),
3553            (VMOVDQA64Zrm addr:$src)>;
3554  def : Pat<(loadv16i32 addr:$src),
3555            (VMOVDQU64Zrm addr:$src)>;
3556  def : Pat<(loadv32i16 addr:$src),
3557            (VMOVDQU64Zrm addr:$src)>;
3558  def : Pat<(loadv32f16 addr:$src),
3559            (VMOVUPSZrm addr:$src)>;
3560  def : Pat<(loadv32bf16 addr:$src),
3561            (VMOVUPSZrm addr:$src)>;
3562  def : Pat<(loadv64i8 addr:$src),
3563            (VMOVDQU64Zrm addr:$src)>;
3564
3565  // 512-bit store.
3566  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3567            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3568  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3569            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3570  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3571            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3572  def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3573            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3574  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3575            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3576  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3577            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3578  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3579            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3580  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3581            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3582  def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3583            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3584  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3585            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3586}
3587
3588let Predicates = [HasVLX] in {
3589  // 128-bit load.
3590  def : Pat<(alignedloadv4i32 addr:$src),
3591            (VMOVDQA64Z128rm addr:$src)>;
3592  def : Pat<(alignedloadv8i16 addr:$src),
3593            (VMOVDQA64Z128rm addr:$src)>;
3594  def : Pat<(alignedloadv8f16 addr:$src),
3595            (VMOVAPSZ128rm addr:$src)>;
3596  def : Pat<(alignedloadv8bf16 addr:$src),
3597            (VMOVAPSZ128rm addr:$src)>;
3598  def : Pat<(alignedloadv16i8 addr:$src),
3599            (VMOVDQA64Z128rm addr:$src)>;
3600  def : Pat<(loadv4i32 addr:$src),
3601            (VMOVDQU64Z128rm addr:$src)>;
3602  def : Pat<(loadv8i16 addr:$src),
3603            (VMOVDQU64Z128rm addr:$src)>;
3604  def : Pat<(loadv8f16 addr:$src),
3605            (VMOVUPSZ128rm addr:$src)>;
3606  def : Pat<(loadv8bf16 addr:$src),
3607            (VMOVUPSZ128rm addr:$src)>;
3608  def : Pat<(loadv16i8 addr:$src),
3609            (VMOVDQU64Z128rm addr:$src)>;
3610
3611  // 128-bit store.
3612  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3613            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3614  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3615            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3616  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3617            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3618  def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3619            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3620  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3621            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3622  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3623            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3624  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3625            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3626  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3627            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3628  def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3629            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3630  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3631            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3632
3633  // 256-bit load.
3634  def : Pat<(alignedloadv8i32 addr:$src),
3635            (VMOVDQA64Z256rm addr:$src)>;
3636  def : Pat<(alignedloadv16i16 addr:$src),
3637            (VMOVDQA64Z256rm addr:$src)>;
3638  def : Pat<(alignedloadv16f16 addr:$src),
3639            (VMOVAPSZ256rm addr:$src)>;
3640  def : Pat<(alignedloadv16bf16 addr:$src),
3641            (VMOVAPSZ256rm addr:$src)>;
3642  def : Pat<(alignedloadv32i8 addr:$src),
3643            (VMOVDQA64Z256rm addr:$src)>;
3644  def : Pat<(loadv8i32 addr:$src),
3645            (VMOVDQU64Z256rm addr:$src)>;
3646  def : Pat<(loadv16i16 addr:$src),
3647            (VMOVDQU64Z256rm addr:$src)>;
3648  def : Pat<(loadv16f16 addr:$src),
3649            (VMOVUPSZ256rm addr:$src)>;
3650  def : Pat<(loadv16bf16 addr:$src),
3651            (VMOVUPSZ256rm addr:$src)>;
3652  def : Pat<(loadv32i8 addr:$src),
3653            (VMOVDQU64Z256rm addr:$src)>;
3654
3655  // 256-bit store.
3656  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3657            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3658  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3659            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3660  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3661            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3662  def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3663            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3664  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3665            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3666  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3667            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3668  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3669            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3670  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3671            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3672  def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3673            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3674  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3675            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3676}
3677
3678multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3679let Predicates = [HasBWI] in {
3680  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3681            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3682  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3683            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3684  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3685                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3686            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3687  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3688                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3689            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3690  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3691                     (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3692            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3693  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3694                     (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3695            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3696  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3697            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3698  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3699            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3700  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3701            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3702
3703  def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3704            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3705}
3706let Predicates = [HasBWI, HasVLX] in {
3707  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3708            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3709  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3710            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3711  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3712                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3713            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3714  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3715                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3716            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3717  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3718                     (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3719            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3720  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3721                     (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3722            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3723  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3724            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3725  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3726            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3727  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3728            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3729
3730  def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3731            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3732
3733  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3734            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3735  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3736            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3737  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3738                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3739            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3740  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3741                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3742            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3743  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3744                     (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3745            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3746  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3747                     (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3748            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3749  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3750            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3751  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3752            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3753  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3754            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3755
3756  def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3757            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3758}
3759}
3760
3761defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3762defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3763
3764// Move Int Doubleword to Packed Double Int
3765//
3766let ExeDomain = SSEPackedInt in {
3767def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3768                      "vmovd\t{$src, $dst|$dst, $src}",
3769                      [(set VR128X:$dst,
3770                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3771                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3772def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3773                      "vmovd\t{$src, $dst|$dst, $src}",
3774                      [(set VR128X:$dst,
3775                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3776                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3777def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3778                      "vmovq\t{$src, $dst|$dst, $src}",
3779                        [(set VR128X:$dst,
3780                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3781                      EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3782let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3783def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3784                      (ins i64mem:$src),
3785                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3786                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3787let isCodeGenOnly = 1 in {
3788def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3789                       "vmovq\t{$src, $dst|$dst, $src}",
3790                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3791                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3792def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3793                         "vmovq\t{$src, $dst|$dst, $src}",
3794                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3795                         EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3796}
3797} // ExeDomain = SSEPackedInt
3798
3799// Move Int Doubleword to Single Scalar
3800//
3801let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3802def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3803                      "vmovd\t{$src, $dst|$dst, $src}",
3804                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3805                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3806} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3807
3808// Move doubleword from xmm register to r/m32
3809//
3810let ExeDomain = SSEPackedInt in {
3811def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3812                       "vmovd\t{$src, $dst|$dst, $src}",
3813                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3814                                        (iPTR 0)))]>,
3815                       EVEX, Sched<[WriteVecMoveToGpr]>;
3816def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3817                       (ins i32mem:$dst, VR128X:$src),
3818                       "vmovd\t{$src, $dst|$dst, $src}",
3819                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3820                                     (iPTR 0))), addr:$dst)]>,
3821                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3822} // ExeDomain = SSEPackedInt
3823
3824// Move quadword from xmm1 register to r/m64
3825//
3826let ExeDomain = SSEPackedInt in {
3827def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3828                      "vmovq\t{$src, $dst|$dst, $src}",
3829                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3830                                                   (iPTR 0)))]>,
3831                      TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3832                      Requires<[HasAVX512]>;
3833
3834let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3835def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3836                      "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3837                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3838                      Requires<[HasAVX512, In64BitMode]>;
3839
3840def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3841                      (ins i64mem:$dst, VR128X:$src),
3842                      "vmovq\t{$src, $dst|$dst, $src}",
3843                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3844                              addr:$dst)]>,
3845                      EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3846                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3847
3848let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3849def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3850                             (ins VR128X:$src),
3851                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3852                             EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3853} // ExeDomain = SSEPackedInt
3854
3855def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3856                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3857
3858let Predicates = [HasAVX512] in {
3859  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3860            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3861}
3862
3863// Move Scalar Single to Double Int
3864//
3865let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3866def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3867                      (ins FR32X:$src),
3868                      "vmovd\t{$src, $dst|$dst, $src}",
3869                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3870                      EVEX, Sched<[WriteVecMoveToGpr]>;
3871} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3872
3873// Move Quadword Int to Packed Quadword Int
3874//
3875let ExeDomain = SSEPackedInt in {
3876def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3877                      (ins i64mem:$src),
3878                      "vmovq\t{$src, $dst|$dst, $src}",
3879                      [(set VR128X:$dst,
3880                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3881                      EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3882} // ExeDomain = SSEPackedInt
3883
3884// Allow "vmovd" but print "vmovq".
3885def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3886                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3887def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3888                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3889
3890// Conversions between masks and scalar fp.
3891def : Pat<(v32i1 (bitconvert FR32X:$src)),
3892          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3893def : Pat<(f32 (bitconvert VK32:$src)),
3894          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3895
3896def : Pat<(v64i1 (bitconvert FR64X:$src)),
3897          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3898def : Pat<(f64 (bitconvert VK64:$src)),
3899          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3900
3901//===----------------------------------------------------------------------===//
3902// AVX-512  MOVSH, MOVSS, MOVSD
3903//===----------------------------------------------------------------------===//
3904
3905multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3906                              X86VectorVTInfo _, Predicate prd = HasAVX512> {
3907  let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3908  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3909             (ins _.RC:$src1, _.RC:$src2),
3910             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3911             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3912             _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3913  let Predicates = [prd] in {
3914  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3915              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3916              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3917              "$dst {${mask}} {z}, $src1, $src2}"),
3918              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3919                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3920                                      _.ImmAllZerosV)))],
3921              _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3922  let Constraints = "$src0 = $dst"  in
3923  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3924             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3925             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3926             "$dst {${mask}}, $src1, $src2}"),
3927             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3928                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3929                                     (_.VT _.RC:$src0))))],
3930             _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3931  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3932  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3933             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3934             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3935             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3936  // _alt version uses FR32/FR64 register class.
3937  let isCodeGenOnly = 1 in
3938  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3939                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3940                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3941                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3942  }
3943  let mayLoad = 1, hasSideEffects = 0 in {
3944    let Constraints = "$src0 = $dst" in
3945    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3946               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3947               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3948               "$dst {${mask}}, $src}"),
3949               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3950    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3951               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3952               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3953               "$dst {${mask}} {z}, $src}"),
3954               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3955  }
3956  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3957             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3958             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3959             EVEX, Sched<[WriteFStore]>;
3960  let mayStore = 1, hasSideEffects = 0 in
3961  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3962              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3963              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3964              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3965  }
3966}
3967
3968defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3969                                  VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3970
3971defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3972                                  VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3973
3974defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3975                                  HasFP16>,
3976                                  VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3977
3978multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3979                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
3980
3981def : Pat<(_.VT (OpNode _.RC:$src0,
3982                        (_.VT (scalar_to_vector
3983                                  (_.EltVT (X86selects VK1WM:$mask,
3984                                                       (_.EltVT _.FRC:$src1),
3985                                                       (_.EltVT _.FRC:$src2))))))),
3986          (!cast<Instruction>(InstrStr#rrk)
3987                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3988                        VK1WM:$mask,
3989                        (_.VT _.RC:$src0),
3990                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3991
3992def : Pat<(_.VT (OpNode _.RC:$src0,
3993                        (_.VT (scalar_to_vector
3994                                  (_.EltVT (X86selects VK1WM:$mask,
3995                                                       (_.EltVT _.FRC:$src1),
3996                                                       (_.EltVT ZeroFP))))))),
3997          (!cast<Instruction>(InstrStr#rrkz)
3998                        VK1WM:$mask,
3999                        (_.VT _.RC:$src0),
4000                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4001}
4002
4003multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4004                                        dag Mask, RegisterClass MaskRC> {
4005
4006def : Pat<(masked_store
4007             (_.info512.VT (insert_subvector undef,
4008                               (_.info128.VT _.info128.RC:$src),
4009                               (iPTR 0))), addr:$dst, Mask),
4010          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4011                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4012                      _.info128.RC:$src)>;
4013
4014}
4015
4016multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4017                                               AVX512VLVectorVTInfo _,
4018                                               dag Mask, RegisterClass MaskRC,
4019                                               SubRegIndex subreg> {
4020
4021def : Pat<(masked_store
4022             (_.info512.VT (insert_subvector undef,
4023                               (_.info128.VT _.info128.RC:$src),
4024                               (iPTR 0))), addr:$dst, Mask),
4025          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4026                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4027                      _.info128.RC:$src)>;
4028
4029}
4030
4031// This matches the more recent codegen from clang that avoids emitting a 512
4032// bit masked store directly. Codegen will widen 128-bit masked store to 512
4033// bits on AVX512F only targets.
4034multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4035                                               AVX512VLVectorVTInfo _,
4036                                               dag Mask512, dag Mask128,
4037                                               RegisterClass MaskRC,
4038                                               SubRegIndex subreg> {
4039
4040// AVX512F pattern.
4041def : Pat<(masked_store
4042             (_.info512.VT (insert_subvector undef,
4043                               (_.info128.VT _.info128.RC:$src),
4044                               (iPTR 0))), addr:$dst, Mask512),
4045          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4046                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4047                      _.info128.RC:$src)>;
4048
4049// AVX512VL pattern.
4050def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4051          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4053                      _.info128.RC:$src)>;
4054}
4055
4056multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4057                                       dag Mask, RegisterClass MaskRC> {
4058
4059def : Pat<(_.info128.VT (extract_subvector
4060                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061                                        _.info512.ImmAllZerosV)),
4062                           (iPTR 0))),
4063          (!cast<Instruction>(InstrStr#rmkz)
4064                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4065                      addr:$srcAddr)>;
4066
4067def : Pat<(_.info128.VT (extract_subvector
4068                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4069                      (_.info512.VT (insert_subvector undef,
4070                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4071                            (iPTR 0))))),
4072                (iPTR 0))),
4073          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4074                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4075                      addr:$srcAddr)>;
4076
4077}
4078
4079multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4080                                              AVX512VLVectorVTInfo _,
4081                                              dag Mask, RegisterClass MaskRC,
4082                                              SubRegIndex subreg> {
4083
4084def : Pat<(_.info128.VT (extract_subvector
4085                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4086                                        _.info512.ImmAllZerosV)),
4087                           (iPTR 0))),
4088          (!cast<Instruction>(InstrStr#rmkz)
4089                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4090                      addr:$srcAddr)>;
4091
4092def : Pat<(_.info128.VT (extract_subvector
4093                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4094                      (_.info512.VT (insert_subvector undef,
4095                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4096                            (iPTR 0))))),
4097                (iPTR 0))),
4098          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4099                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4100                      addr:$srcAddr)>;
4101
4102}
4103
4104// This matches the more recent codegen from clang that avoids emitting a 512
4105// bit masked load directly. Codegen will widen 128-bit masked load to 512
4106// bits on AVX512F only targets.
4107multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4108                                              AVX512VLVectorVTInfo _,
4109                                              dag Mask512, dag Mask128,
4110                                              RegisterClass MaskRC,
4111                                              SubRegIndex subreg> {
4112// AVX512F patterns.
4113def : Pat<(_.info128.VT (extract_subvector
4114                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4115                                        _.info512.ImmAllZerosV)),
4116                           (iPTR 0))),
4117          (!cast<Instruction>(InstrStr#rmkz)
4118                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4119                      addr:$srcAddr)>;
4120
4121def : Pat<(_.info128.VT (extract_subvector
4122                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4123                      (_.info512.VT (insert_subvector undef,
4124                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4125                            (iPTR 0))))),
4126                (iPTR 0))),
4127          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4128                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4129                      addr:$srcAddr)>;
4130
4131// AVX512Vl patterns.
4132def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4133                         _.info128.ImmAllZerosV)),
4134          (!cast<Instruction>(InstrStr#rmkz)
4135                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4136                      addr:$srcAddr)>;
4137
4138def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4139                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4140          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4141                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4142                      addr:$srcAddr)>;
4143}
4144
4145defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4146defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4147
4148defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4149                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4150defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4151                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4152defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4153                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4154
4155let Predicates = [HasFP16] in {
4156defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4157defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4158                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4159defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4160                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4161defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4162                   (v32i1 (insert_subvector
4163                           (v32i1 immAllZerosV),
4164                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4165                           (iPTR 0))),
4166                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4167                   GR8, sub_8bit>;
4168
4169defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4170                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4171defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4172                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4173defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4174                   (v32i1 (insert_subvector
4175                           (v32i1 immAllZerosV),
4176                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4177                           (iPTR 0))),
4178                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4179                   GR8, sub_8bit>;
4180
4181def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4182          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4183           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4184           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4185           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4186
4187def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4188          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4189           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4190}
4191
4192defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4193                   (v16i1 (insert_subvector
4194                           (v16i1 immAllZerosV),
4195                           (v4i1 (extract_subvector
4196                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4197                                  (iPTR 0))),
4198                           (iPTR 0))),
4199                   (v4i1 (extract_subvector
4200                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4201                          (iPTR 0))), GR8, sub_8bit>;
4202defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4203                   (v8i1
4204                    (extract_subvector
4205                     (v16i1
4206                      (insert_subvector
4207                       (v16i1 immAllZerosV),
4208                       (v2i1 (extract_subvector
4209                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4210                              (iPTR 0))),
4211                       (iPTR 0))),
4212                     (iPTR 0))),
4213                   (v2i1 (extract_subvector
4214                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4215                          (iPTR 0))), GR8, sub_8bit>;
4216
4217defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4218                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4219defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4220                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4221defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4222                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4223
4224defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4225                   (v16i1 (insert_subvector
4226                           (v16i1 immAllZerosV),
4227                           (v4i1 (extract_subvector
4228                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4229                                  (iPTR 0))),
4230                           (iPTR 0))),
4231                   (v4i1 (extract_subvector
4232                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4233                          (iPTR 0))), GR8, sub_8bit>;
4234defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4235                   (v8i1
4236                    (extract_subvector
4237                     (v16i1
4238                      (insert_subvector
4239                       (v16i1 immAllZerosV),
4240                       (v2i1 (extract_subvector
4241                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4242                              (iPTR 0))),
4243                       (iPTR 0))),
4244                     (iPTR 0))),
4245                   (v2i1 (extract_subvector
4246                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4247                          (iPTR 0))), GR8, sub_8bit>;
4248
4249def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4250          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4251           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4252           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4253           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4254
4255def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4256          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4257           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4258
4259def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4260          (COPY_TO_REGCLASS
4261           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4262                                                       VK1WM:$mask, addr:$src)),
4263           FR32X)>;
4264def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4265          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4266
4267def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4268          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4269           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4270           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4271           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4272
4273def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4274          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4275           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4276
4277def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4278          (COPY_TO_REGCLASS
4279           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4280                                                       VK1WM:$mask, addr:$src)),
4281           FR64X)>;
4282def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4283          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4284
4285
4286def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4287          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4288def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4289          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4290
4291def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4292          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4293def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4294          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4295
4296let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4297  let Predicates = [HasFP16] in {
4298    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4299        (ins VR128X:$src1, VR128X:$src2),
4300        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4301        []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4302        Sched<[SchedWriteFShuffle.XMM]>;
4303
4304    let Constraints = "$src0 = $dst" in
4305    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4306        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4307         VR128X:$src1, VR128X:$src2),
4308        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4309          "$dst {${mask}}, $src1, $src2}",
4310        []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4311        Sched<[SchedWriteFShuffle.XMM]>;
4312
4313    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4314        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4315        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4316          "$dst {${mask}} {z}, $src1, $src2}",
4317        []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4318        Sched<[SchedWriteFShuffle.XMM]>;
4319  }
4320  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4321                           (ins VR128X:$src1, VR128X:$src2),
4322                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4323                           []>, TB, XS, EVEX, VVVV, VEX_LIG,
4324                           Sched<[SchedWriteFShuffle.XMM]>;
4325
4326  let Constraints = "$src0 = $dst" in
4327  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4328                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4329                                                   VR128X:$src1, VR128X:$src2),
4330                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4331                                        "$dst {${mask}}, $src1, $src2}",
4332                             []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4333                             Sched<[SchedWriteFShuffle.XMM]>;
4334
4335  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4336                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4337                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4338                                    "$dst {${mask}} {z}, $src1, $src2}",
4339                         []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4340                         Sched<[SchedWriteFShuffle.XMM]>;
4341
4342  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4343                           (ins VR128X:$src1, VR128X:$src2),
4344                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4345                           []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4346                           Sched<[SchedWriteFShuffle.XMM]>;
4347
4348  let Constraints = "$src0 = $dst" in
4349  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4350                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4351                                                   VR128X:$src1, VR128X:$src2),
4352                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4353                                        "$dst {${mask}}, $src1, $src2}",
4354                             []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4355                             REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4356
4357  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4358                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4359                                                          VR128X:$src2),
4360                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4361                                         "$dst {${mask}} {z}, $src1, $src2}",
4362                              []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4363                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4364}
4365
4366def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4367                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4368def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4369                             "$dst {${mask}}, $src1, $src2}",
4370                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4371                                VR128X:$src1, VR128X:$src2), 0>;
4372def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4373                             "$dst {${mask}} {z}, $src1, $src2}",
4374                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4375                                 VR128X:$src1, VR128X:$src2), 0>;
4376def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4377                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4378def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4379                             "$dst {${mask}}, $src1, $src2}",
4380                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4381                                VR128X:$src1, VR128X:$src2), 0>;
4382def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4383                             "$dst {${mask}} {z}, $src1, $src2}",
4384                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4385                                 VR128X:$src1, VR128X:$src2), 0>;
4386def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4387                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4388def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4389                             "$dst {${mask}}, $src1, $src2}",
4390                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4391                                VR128X:$src1, VR128X:$src2), 0>;
4392def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4393                             "$dst {${mask}} {z}, $src1, $src2}",
4394                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4395                                 VR128X:$src1, VR128X:$src2), 0>;
4396
4397let Predicates = [HasAVX512, OptForSize] in {
4398  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4399            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4400  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4401            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4402
4403  // Move low f32 and clear high bits.
4404  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4405            (SUBREG_TO_REG (i32 0),
4406             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4407              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4408  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4409            (SUBREG_TO_REG (i32 0),
4410             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4411              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4412
4413  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4414            (SUBREG_TO_REG (i32 0),
4415             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4416              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4417  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4418            (SUBREG_TO_REG (i32 0),
4419             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4420              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4421}
4422
4423// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4424// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4425let Predicates = [HasAVX512, OptForSpeed] in {
4426  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4427            (SUBREG_TO_REG (i32 0),
4428             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4429                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4430                          (i8 1))), sub_xmm)>;
4431  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4432            (SUBREG_TO_REG (i32 0),
4433             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4434                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4435                          (i8 3))), sub_xmm)>;
4436}
4437
4438let Predicates = [HasAVX512] in {
4439  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4440            (VMOVSSZrm addr:$src)>;
4441  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4442            (VMOVSDZrm addr:$src)>;
4443
4444  // Represent the same patterns above but in the form they appear for
4445  // 256-bit types
4446  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4447            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4448  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4449            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4450
4451  // Represent the same patterns above but in the form they appear for
4452  // 512-bit types
4453  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4454            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4455  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4456            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4457}
4458let Predicates = [HasFP16] in {
4459  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4460            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4461  def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4462            (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4463
4464  // FIXME we need better canonicalization in dag combine
4465  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4466            (SUBREG_TO_REG (i32 0),
4467             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4468              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4469  def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4470            (SUBREG_TO_REG (i32 0),
4471             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4472              (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4473
4474  // FIXME we need better canonicalization in dag combine
4475  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4476            (SUBREG_TO_REG (i32 0),
4477             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4478              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4479  def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4480            (SUBREG_TO_REG (i32 0),
4481             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4482              (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4483
4484  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4485            (VMOVSHZrm addr:$src)>;
4486
4487  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4488            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4489
4490  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4491            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4492}
4493
4494let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4495def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4496                                (ins VR128X:$src),
4497                                "vmovq\t{$src, $dst|$dst, $src}",
4498                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4499                                                   (v2i64 VR128X:$src))))]>,
4500                                EVEX, REX_W;
4501}
4502
4503let Predicates = [HasAVX512] in {
4504  def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4505            (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4506                                              GR8:$src, sub_8bit)))>;
4507  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4508            (VMOVDI2PDIZrr GR32:$src)>;
4509
4510  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4511            (VMOV64toPQIZrr GR64:$src)>;
4512
4513  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4514  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4515            (VMOVDI2PDIZrm addr:$src)>;
4516  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4517            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4518  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4519            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4520  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4521            (VMOVQI2PQIZrm addr:$src)>;
4522  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4523            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4524
4525  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4526  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4527            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4528  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4529            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4530
4531  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4532            (SUBREG_TO_REG (i32 0),
4533             (v2f64 (VMOVZPQILo2PQIZrr
4534                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4535             sub_xmm)>;
4536  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4537            (SUBREG_TO_REG (i32 0),
4538             (v2i64 (VMOVZPQILo2PQIZrr
4539                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4540             sub_xmm)>;
4541
4542  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4543            (SUBREG_TO_REG (i32 0),
4544             (v2f64 (VMOVZPQILo2PQIZrr
4545                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4546             sub_xmm)>;
4547  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4548            (SUBREG_TO_REG (i32 0),
4549             (v2i64 (VMOVZPQILo2PQIZrr
4550                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4551             sub_xmm)>;
4552}
4553
4554//===----------------------------------------------------------------------===//
4555// AVX-512 - Non-temporals
4556//===----------------------------------------------------------------------===//
4557
4558def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4559                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4560                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.ZMM.RM]>,
4561                      EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4562
4563let Predicates = [HasVLX] in {
4564  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4565                       (ins i256mem:$src),
4566                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4567                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.YMM.RM]>,
4568                       EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4569
4570  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4571                      (ins i128mem:$src),
4572                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4573                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.XMM.RM]>,
4574                      EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4575}
4576
4577multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4578                        X86SchedWriteMoveLS Sched,
4579                        PatFrag st_frag = alignednontemporalstore> {
4580  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4581  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4582                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4583                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4584                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4585}
4586
4587multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4588                           AVX512VLVectorVTInfo VTInfo,
4589                           X86SchedWriteMoveLSWidths Sched> {
4590  let Predicates = [HasAVX512] in
4591    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4592
4593  let Predicates = [HasAVX512, HasVLX] in {
4594    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4595    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4596  }
4597}
4598
4599defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4600                                SchedWriteVecMoveLSNT>, TB, PD;
4601defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4602                                SchedWriteFMoveLSNT>, TB, PD, REX_W;
4603defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4604                                SchedWriteFMoveLSNT>, TB;
4605
4606let Predicates = [HasAVX512], AddedComplexity = 400 in {
4607  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4608            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4609  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4610            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4611  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4612            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4613
4614  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4615            (VMOVNTDQAZrm addr:$src)>;
4616  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4617            (VMOVNTDQAZrm addr:$src)>;
4618  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4619            (VMOVNTDQAZrm addr:$src)>;
4620  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4621            (VMOVNTDQAZrm addr:$src)>;
4622  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4623            (VMOVNTDQAZrm addr:$src)>;
4624  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4625            (VMOVNTDQAZrm addr:$src)>;
4626}
4627
4628let Predicates = [HasVLX], AddedComplexity = 400 in {
4629  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4630            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4631  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4632            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4633  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4634            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4635
4636  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4637            (VMOVNTDQAZ256rm addr:$src)>;
4638  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4639            (VMOVNTDQAZ256rm addr:$src)>;
4640  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4641            (VMOVNTDQAZ256rm addr:$src)>;
4642  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4643            (VMOVNTDQAZ256rm addr:$src)>;
4644  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4645            (VMOVNTDQAZ256rm addr:$src)>;
4646  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4647            (VMOVNTDQAZ256rm addr:$src)>;
4648
4649  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4650            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4651  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4652            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4653  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4654            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4655
4656  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4657            (VMOVNTDQAZ128rm addr:$src)>;
4658  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4659            (VMOVNTDQAZ128rm addr:$src)>;
4660  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4661            (VMOVNTDQAZ128rm addr:$src)>;
4662  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4663            (VMOVNTDQAZ128rm addr:$src)>;
4664  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4665            (VMOVNTDQAZ128rm addr:$src)>;
4666  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4667            (VMOVNTDQAZ128rm addr:$src)>;
4668}
4669
4670//===----------------------------------------------------------------------===//
4671// AVX-512 - Integer arithmetic
4672//
4673multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4674                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4675                           bit IsCommutable = 0> {
4676  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4677                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4678                    "$src2, $src1", "$src1, $src2",
4679                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4680                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4681                    Sched<[sched]>;
4682
4683  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4684                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4685                  "$src2, $src1", "$src1, $src2",
4686                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4687                  AVX512BIBase, EVEX, VVVV,
4688                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4689}
4690
4691multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4692                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4693                            bit IsCommutable = 0> :
4694           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4695  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4696                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4697                  "${src2}"#_.BroadcastStr#", $src1",
4698                  "$src1, ${src2}"#_.BroadcastStr,
4699                  (_.VT (OpNode _.RC:$src1,
4700                                (_.BroadcastLdFrag addr:$src2)))>,
4701                  AVX512BIBase, EVEX, VVVV, EVEX_B,
4702                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4703}
4704
4705multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4706                              AVX512VLVectorVTInfo VTInfo,
4707                              X86SchedWriteWidths sched, Predicate prd,
4708                              bit IsCommutable = 0> {
4709  let Predicates = [prd] in
4710    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4711                             IsCommutable>, EVEX_V512;
4712
4713  let Predicates = [prd, HasVLX] in {
4714    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4715                                sched.YMM, IsCommutable>, EVEX_V256;
4716    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4717                                sched.XMM, IsCommutable>, EVEX_V128;
4718  }
4719}
4720
4721multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4722                               AVX512VLVectorVTInfo VTInfo,
4723                               X86SchedWriteWidths sched, Predicate prd,
4724                               bit IsCommutable = 0> {
4725  let Predicates = [prd] in
4726    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4727                             IsCommutable>, EVEX_V512;
4728
4729  let Predicates = [prd, HasVLX] in {
4730    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4731                                 sched.YMM, IsCommutable>, EVEX_V256;
4732    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4733                                 sched.XMM, IsCommutable>, EVEX_V128;
4734  }
4735}
4736
4737multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4738                                X86SchedWriteWidths sched, Predicate prd,
4739                                bit IsCommutable = 0> {
4740  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4741                                  sched, prd, IsCommutable>,
4742                                  REX_W, EVEX_CD8<64, CD8VF>;
4743}
4744
4745multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4746                                X86SchedWriteWidths sched, Predicate prd,
4747                                bit IsCommutable = 0> {
4748  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4749                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4750}
4751
4752multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4753                                X86SchedWriteWidths sched, Predicate prd,
4754                                bit IsCommutable = 0> {
4755  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4756                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4757                                 WIG;
4758}
4759
4760multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4761                                X86SchedWriteWidths sched, Predicate prd,
4762                                bit IsCommutable = 0> {
4763  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4764                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4765                                 WIG;
4766}
4767
4768multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4769                                 SDNode OpNode, X86SchedWriteWidths sched,
4770                                 Predicate prd, bit IsCommutable = 0> {
4771  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4772                                   IsCommutable>;
4773
4774  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4775                                   IsCommutable>;
4776}
4777
4778multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4779                                 SDNode OpNode, X86SchedWriteWidths sched,
4780                                 Predicate prd, bit IsCommutable = 0> {
4781  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4782                                   IsCommutable>;
4783
4784  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4785                                   IsCommutable>;
4786}
4787
4788multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4789                                  bits<8> opc_d, bits<8> opc_q,
4790                                  string OpcodeStr, SDNode OpNode,
4791                                  X86SchedWriteWidths sched,
4792                                  bit IsCommutable = 0> {
4793  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4794                                    sched, HasAVX512, IsCommutable>,
4795              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4796                                    sched, HasBWI, IsCommutable>;
4797}
4798
4799multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4800                            X86FoldableSchedWrite sched,
4801                            SDNode OpNode,X86VectorVTInfo _Src,
4802                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4803                            bit IsCommutable = 0> {
4804  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4805                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4806                            "$src2, $src1","$src1, $src2",
4807                            (_Dst.VT (OpNode
4808                                         (_Src.VT _Src.RC:$src1),
4809                                         (_Src.VT _Src.RC:$src2))),
4810                            IsCommutable>,
4811                            AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4812  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4813                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4814                        "$src2, $src1", "$src1, $src2",
4815                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4816                                      (_Src.LdFrag addr:$src2)))>,
4817                        AVX512BIBase, EVEX, VVVV,
4818                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4819
4820  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4821                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4822                    OpcodeStr,
4823                    "${src2}"#_Brdct.BroadcastStr#", $src1",
4824                     "$src1, ${src2}"#_Brdct.BroadcastStr,
4825                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4826                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4827                    AVX512BIBase, EVEX, VVVV, EVEX_B,
4828                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4829}
4830
4831defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4832                                    SchedWriteVecALU, 1>;
4833defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4834                                    SchedWriteVecALU, 0>;
4835defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4836                                    SchedWriteVecALU, HasBWI, 1>;
4837defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4838                                    SchedWriteVecALU, HasBWI, 0>;
4839defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4840                                     SchedWriteVecALU, HasBWI, 1>;
4841defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4842                                     SchedWriteVecALU, HasBWI, 0>;
4843defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4844                                    SchedWritePMULLD, HasAVX512, 1>, T8;
4845defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4846                                    SchedWriteVecIMul, HasBWI, 1>;
4847defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4848                                    SchedWriteVecIMul, HasDQI, 1>, T8;
4849defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4850                                    HasBWI, 1>;
4851defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4852                                     HasBWI, 1>;
4853defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4854                                      SchedWriteVecIMul, HasBWI, 1>, T8;
4855defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4856                                   SchedWriteVecALU, HasBWI, 1>;
4857defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4858                                    SchedWriteVecIMul, HasAVX512, 1>, T8;
4859defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4860                                     SchedWriteVecIMul, HasAVX512, 1>;
4861
4862multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4863                            X86SchedWriteWidths sched,
4864                            AVX512VLVectorVTInfo _SrcVTInfo,
4865                            AVX512VLVectorVTInfo _DstVTInfo,
4866                            SDNode OpNode, list<Predicate> prds512,
4867                            list<Predicate> prds,
4868                            X86VectorVTInfo _VTInfo512 = _SrcVTInfo.info512,
4869                            X86VectorVTInfo _VTInfo256 = _SrcVTInfo.info256,
4870                            X86VectorVTInfo _VTInfo128 = _SrcVTInfo.info128> {
4871  let Predicates = prds512 in
4872    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4873                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4874                                 _VTInfo512>, EVEX_V512;
4875  let Predicates = prds in {
4876    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4877                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4878                                      _VTInfo256>, EVEX_V256;
4879    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4880                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4881                                      _VTInfo128>, EVEX_V128;
4882  }
4883}
4884
4885defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4886                                       avx512vl_i8_info, avx512vl_i8_info,
4887                                       X86multishift, [HasVBMI], [HasVLX, HasVBMI],
4888                                       v8i64_info, v4i64x_info, v2i64x_info>, T8,
4889                                       EVEX_CD8<64, CD8VF>, REX_W;
4890
4891multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4892                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4893                            X86FoldableSchedWrite sched> {
4894  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4895                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4896                    OpcodeStr,
4897                    "${src2}"#_Src.BroadcastStr#", $src1",
4898                     "$src1, ${src2}"#_Src.BroadcastStr,
4899                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4900                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4901                    EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4902                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4903}
4904
4905multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4906                            SDNode OpNode,X86VectorVTInfo _Src,
4907                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4908                            bit IsCommutable = 0> {
4909  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4910                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4911                            "$src2, $src1","$src1, $src2",
4912                            (_Dst.VT (OpNode
4913                                         (_Src.VT _Src.RC:$src1),
4914                                         (_Src.VT _Src.RC:$src2))),
4915                            IsCommutable, IsCommutable>,
4916                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4917  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4918                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4919                        "$src2, $src1", "$src1, $src2",
4920                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4921                                      (_Src.LdFrag addr:$src2)))>,
4922                         EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4923                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4924}
4925
4926multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4927                                    SDNode OpNode> {
4928  let Predicates = [HasBWI] in
4929  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4930                                 v32i16_info, SchedWriteShuffle.ZMM>,
4931                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4932                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4933  let Predicates = [HasBWI, HasVLX] in {
4934    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4935                                     v16i16x_info, SchedWriteShuffle.YMM>,
4936                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4937                                      v16i16x_info, SchedWriteShuffle.YMM>,
4938                                      EVEX_V256;
4939    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4940                                     v8i16x_info, SchedWriteShuffle.XMM>,
4941                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4942                                      v8i16x_info, SchedWriteShuffle.XMM>,
4943                                      EVEX_V128;
4944  }
4945}
4946multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4947                            SDNode OpNode> {
4948  let Predicates = [HasBWI] in
4949  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4950                                SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4951  let Predicates = [HasBWI, HasVLX] in {
4952    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4953                                     v32i8x_info, SchedWriteShuffle.YMM>,
4954                                     EVEX_V256, WIG;
4955    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4956                                     v16i8x_info, SchedWriteShuffle.XMM>,
4957                                     EVEX_V128, WIG;
4958  }
4959}
4960
4961multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4962                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4963                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4964  let Predicates = [HasBWI] in
4965  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4966                                _Dst.info512, SchedWriteVecIMul.ZMM,
4967                                IsCommutable>, EVEX_V512;
4968  let Predicates = [HasBWI, HasVLX] in {
4969    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4970                                     _Dst.info256, SchedWriteVecIMul.YMM,
4971                                     IsCommutable>, EVEX_V256;
4972    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4973                                     _Dst.info128, SchedWriteVecIMul.XMM,
4974                                     IsCommutable>, EVEX_V128;
4975  }
4976}
4977
4978defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4979defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4980defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4981defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4982
4983defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4984                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4985defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4986                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4987
4988defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4989                                    SchedWriteVecALU, HasBWI, 1>, T8;
4990defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4991                                    SchedWriteVecALU, HasBWI, 1>;
4992defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4993                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4994defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4995                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4996
4997defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4998                                    SchedWriteVecALU, HasBWI, 1>;
4999defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5000                                    SchedWriteVecALU, HasBWI, 1>, T8;
5001defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5002                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5003defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5004                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5005
5006defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5007                                    SchedWriteVecALU, HasBWI, 1>, T8;
5008defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5009                                    SchedWriteVecALU, HasBWI, 1>;
5010defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5011                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5012defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5013                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5014
5015defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5016                                    SchedWriteVecALU, HasBWI, 1>;
5017defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5018                                    SchedWriteVecALU, HasBWI, 1>, T8;
5019defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5020                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5021defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5022                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5023
5024// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
5025let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
5026  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5027            (EXTRACT_SUBREG
5028                (VPMULLQZrr
5029                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5030                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5031             sub_ymm)>;
5032  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5033            (EXTRACT_SUBREG
5034                (VPMULLQZrmb
5035                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5036                    addr:$src2),
5037             sub_ymm)>;
5038
5039  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5040            (EXTRACT_SUBREG
5041                (VPMULLQZrr
5042                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5043                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5044             sub_xmm)>;
5045  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5046            (EXTRACT_SUBREG
5047                (VPMULLQZrmb
5048                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5049                    addr:$src2),
5050             sub_xmm)>;
5051}
5052
5053multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5054  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5055            (EXTRACT_SUBREG
5056                (!cast<Instruction>(Instr#"rr")
5057                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5058                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5059             sub_ymm)>;
5060  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5061            (EXTRACT_SUBREG
5062                (!cast<Instruction>(Instr#"rmb")
5063                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5064                    addr:$src2),
5065             sub_ymm)>;
5066
5067  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5068            (EXTRACT_SUBREG
5069                (!cast<Instruction>(Instr#"rr")
5070                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5071                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5072             sub_xmm)>;
5073  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5074            (EXTRACT_SUBREG
5075                (!cast<Instruction>(Instr#"rmb")
5076                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5077                    addr:$src2),
5078             sub_xmm)>;
5079}
5080
5081let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
5082  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5083  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5084  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5085  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5086}
5087
5088//===----------------------------------------------------------------------===//
5089// AVX-512  Logical Instructions
5090//===----------------------------------------------------------------------===//
5091
5092defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5093                                   SchedWriteVecLogic, HasAVX512, 1>;
5094defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5095                                  SchedWriteVecLogic, HasAVX512, 1>;
5096defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5097                                   SchedWriteVecLogic, HasAVX512, 1>;
5098defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5099                                    SchedWriteVecLogic, HasAVX512>;
5100
5101let Predicates = [HasVLX] in {
5102  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5103            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5104  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5105            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5106
5107  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5108            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5109  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5110            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5111
5112  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5113            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5114  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5115            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5116
5117  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5118            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5119  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5120            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5121
5122  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5123            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5124  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5125            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5126
5127  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5128            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5129  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5130            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5131
5132  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5133            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5134  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5135            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5136
5137  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5138            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5139  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5140            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5141
5142  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5143            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5144  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5145            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5146
5147  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5148            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5149  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5150            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5151
5152  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5153            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5154  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5155            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5156
5157  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5158            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5159  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5160            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5161
5162  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5163            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5164  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5165            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5166
5167  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5168            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5169  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5170            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5171
5172  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5173            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5174  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5175            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5176
5177  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5178            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5179  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5180            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5181}
5182
5183let Predicates = [HasAVX512] in {
5184  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5185            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5186  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5187            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5188
5189  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5190            (VPORQZrr VR512:$src1, VR512:$src2)>;
5191  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5192            (VPORQZrr VR512:$src1, VR512:$src2)>;
5193
5194  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5195            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5196  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5197            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5198
5199  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5200            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5201  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5202            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5203
5204  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5205            (VPANDQZrm VR512:$src1, addr:$src2)>;
5206  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5207            (VPANDQZrm VR512:$src1, addr:$src2)>;
5208
5209  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5210            (VPORQZrm VR512:$src1, addr:$src2)>;
5211  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5212            (VPORQZrm VR512:$src1, addr:$src2)>;
5213
5214  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5215            (VPXORQZrm VR512:$src1, addr:$src2)>;
5216  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5217            (VPXORQZrm VR512:$src1, addr:$src2)>;
5218
5219  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5220            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5221  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5222            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5223}
5224
5225// Patterns to catch vselect with different type than logic op.
5226multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5227                                    X86VectorVTInfo _,
5228                                    X86VectorVTInfo IntInfo> {
5229  // Masked register-register logical operations.
5230  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5231                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5232                   _.RC:$src0)),
5233            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5234             _.RC:$src1, _.RC:$src2)>;
5235
5236  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5237                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5238                   _.ImmAllZerosV)),
5239            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5240             _.RC:$src2)>;
5241
5242  // Masked register-memory logical operations.
5243  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5244                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5245                                            (load addr:$src2)))),
5246                   _.RC:$src0)),
5247            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5248             _.RC:$src1, addr:$src2)>;
5249  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5250                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5251                                            (load addr:$src2)))),
5252                   _.ImmAllZerosV)),
5253            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5254             addr:$src2)>;
5255}
5256
5257multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5258                                         X86VectorVTInfo _,
5259                                         X86VectorVTInfo IntInfo> {
5260  // Register-broadcast logical operations.
5261  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5262                   (bitconvert
5263                    (IntInfo.VT (OpNode _.RC:$src1,
5264                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5265                   _.RC:$src0)),
5266            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5267             _.RC:$src1, addr:$src2)>;
5268  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5269                   (bitconvert
5270                    (IntInfo.VT (OpNode _.RC:$src1,
5271                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5272                   _.ImmAllZerosV)),
5273            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5274             _.RC:$src1, addr:$src2)>;
5275}
5276
5277multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5278                                         AVX512VLVectorVTInfo SelectInfo,
5279                                         AVX512VLVectorVTInfo IntInfo> {
5280let Predicates = [HasVLX] in {
5281  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5282                                 IntInfo.info128>;
5283  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5284                                 IntInfo.info256>;
5285}
5286let Predicates = [HasAVX512] in {
5287  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5288                                 IntInfo.info512>;
5289}
5290}
5291
5292multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5293                                               AVX512VLVectorVTInfo SelectInfo,
5294                                               AVX512VLVectorVTInfo IntInfo> {
5295let Predicates = [HasVLX] in {
5296  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5297                                       SelectInfo.info128, IntInfo.info128>;
5298  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5299                                       SelectInfo.info256, IntInfo.info256>;
5300}
5301let Predicates = [HasAVX512] in {
5302  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5303                                       SelectInfo.info512, IntInfo.info512>;
5304}
5305}
5306
5307multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5308  // i64 vselect with i32/i16/i8 logic op
5309  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5310                                       avx512vl_i32_info>;
5311  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5312                                       avx512vl_i16_info>;
5313  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5314                                       avx512vl_i8_info>;
5315
5316  // i32 vselect with i64/i16/i8 logic op
5317  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5318                                       avx512vl_i64_info>;
5319  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5320                                       avx512vl_i16_info>;
5321  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5322                                       avx512vl_i8_info>;
5323
5324  // f32 vselect with i64/i32/i16/i8 logic op
5325  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5326                                       avx512vl_i64_info>;
5327  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5328                                       avx512vl_i32_info>;
5329  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5330                                       avx512vl_i16_info>;
5331  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5332                                       avx512vl_i8_info>;
5333
5334  // f64 vselect with i64/i32/i16/i8 logic op
5335  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5336                                       avx512vl_i64_info>;
5337  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5338                                       avx512vl_i32_info>;
5339  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5340                                       avx512vl_i16_info>;
5341  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5342                                       avx512vl_i8_info>;
5343
5344  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5345                                             avx512vl_f32_info,
5346                                             avx512vl_i32_info>;
5347  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5348                                             avx512vl_f64_info,
5349                                             avx512vl_i64_info>;
5350}
5351
5352defm : avx512_logical_lowering_types<"VPAND", and>;
5353defm : avx512_logical_lowering_types<"VPOR",  or>;
5354defm : avx512_logical_lowering_types<"VPXOR", xor>;
5355defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5356
5357//===----------------------------------------------------------------------===//
5358// AVX-512  FP arithmetic
5359//===----------------------------------------------------------------------===//
5360
5361multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5362                            SDPatternOperator OpNode, SDNode VecNode,
5363                            X86FoldableSchedWrite sched, bit IsCommutable> {
5364  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5365  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5366                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5367                           "$src2, $src1", "$src1, $src2",
5368                           (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">,
5369                           Sched<[sched]>;
5370
5371  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5372                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5373                         "$src2, $src1", "$src1, $src2",
5374                         (_.VT (VecNode _.RC:$src1,
5375                                        (_.ScalarIntMemFrags addr:$src2))), "_Int">,
5376                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5377  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5378  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5379                         (ins _.FRC:$src1, _.FRC:$src2),
5380                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5381                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5382                          Sched<[sched]> {
5383    let isCommutable = IsCommutable;
5384  }
5385  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5386                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5387                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5388                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5389                         (_.ScalarLdFrag addr:$src2)))]>,
5390                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5391  }
5392  }
5393}
5394
5395multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5396                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5397  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5398  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5399                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5400                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5401                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5402                          (i32 timm:$rc)), "_Int">,
5403                          EVEX_B, EVEX_RC, Sched<[sched]>;
5404}
5405multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5406                                SDPatternOperator OpNode, SDNode VecNode, SDNode SaeNode,
5407                                X86FoldableSchedWrite sched, bit IsCommutable> {
5408  let ExeDomain = _.ExeDomain in {
5409  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5410                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5411                           "$src2, $src1", "$src1, $src2",
5412                           (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">,
5413                           Sched<[sched]>, SIMD_EXC;
5414
5415  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5416                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5417                         "$src2, $src1", "$src1, $src2",
5418                         (_.VT (VecNode _.RC:$src1,
5419                                        (_.ScalarIntMemFrags addr:$src2))), "_Int">,
5420                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5421
5422  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5423      Uses = [MXCSR], mayRaiseFPException = 1 in {
5424  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5425                         (ins _.FRC:$src1, _.FRC:$src2),
5426                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5427                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5428                          Sched<[sched]> {
5429    let isCommutable = IsCommutable;
5430  }
5431  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5432                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5433                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5434                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5435                         (_.ScalarLdFrag addr:$src2)))]>,
5436                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5437  }
5438
5439  let Uses = [MXCSR] in
5440  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5441                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5442                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5443                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), "_Int">,
5444                            EVEX_B, Sched<[sched]>;
5445  }
5446}
5447
5448multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5449                                SDNode VecNode, SDNode RndNode,
5450                                X86SchedWriteSizes sched, bit IsCommutable> {
5451  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5452                              sched.PS.Scl, IsCommutable>,
5453             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5454                              sched.PS.Scl>,
5455                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5456  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5457                              sched.PD.Scl, IsCommutable>,
5458             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5459                              sched.PD.Scl>,
5460                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5461  let Predicates = [HasFP16] in
5462    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5463                                VecNode, sched.PH.Scl, IsCommutable>,
5464               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5465                                sched.PH.Scl>,
5466                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5467}
5468
5469multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5470                              SDNode VecNode, SDNode SaeNode,
5471                              X86SchedWriteSizes sched, bit IsCommutable> {
5472  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5473                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5474                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5475  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5476                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5477                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5478  let Predicates = [HasFP16] in {
5479    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5480                                VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5481                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5482  }
5483}
5484defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5485                                 SchedWriteFAddSizes, 1>;
5486defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5487                                 SchedWriteFMulSizes, 1>;
5488defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5489                                 SchedWriteFAddSizes, 0>;
5490defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5491                                 SchedWriteFDivSizes, 0>;
5492defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86any_fmin, X86fmins, X86fminSAEs,
5493                               SchedWriteFCmpSizes, 0>;
5494defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86any_fmax, X86fmaxs, X86fmaxSAEs,
5495                               SchedWriteFCmpSizes, 0>;
5496
5497// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5498// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5499multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5500                                    X86VectorVTInfo _, SDNode OpNode,
5501                                    X86FoldableSchedWrite sched> {
5502  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5503  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5504                         (ins _.FRC:$src1, _.FRC:$src2),
5505                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5506                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5507                          Sched<[sched]> {
5508    let isCommutable = 1;
5509  }
5510  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5511                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5512                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5513                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5514                         (_.ScalarLdFrag addr:$src2)))]>,
5515                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5516  }
5517}
5518defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5519                                         SchedWriteFCmp.Scl>, TB, XS,
5520                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5521
5522defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5523                                         SchedWriteFCmp.Scl>, TB, XD,
5524                                         REX_W, EVEX, VVVV, VEX_LIG,
5525                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5526
5527defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5528                                         SchedWriteFCmp.Scl>, TB, XS,
5529                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5530
5531defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5532                                         SchedWriteFCmp.Scl>, TB, XD,
5533                                         REX_W, EVEX, VVVV, VEX_LIG,
5534                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5535
5536defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5537                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5538                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5539
5540defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5541                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5542                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5543
5544multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5545                            SDPatternOperator MaskOpNode,
5546                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5547                            bit IsCommutable,
5548                            bit IsKCommutable = IsCommutable,
5549                            string suffix = _.Suffix,
5550                            string ClobberConstraint = "",
5551                            bit MayRaiseFPException = 1> {
5552  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5553      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5554  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5555                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5556                                 "$src2, $src1", "$src1, $src2",
5557                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5558                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5559                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5560  let mayLoad = 1 in {
5561    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5562                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5563                                   "$src2, $src1", "$src1, $src2",
5564                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5565                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5566                                   ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5567    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5568                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5569                                    "${src2}"#_.BroadcastStr#", $src1",
5570                                    "$src1, ${src2}"#_.BroadcastStr,
5571                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5572                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5573                                    ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5574    }
5575  }
5576}
5577
5578multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5579                                  SDPatternOperator OpNodeRnd,
5580                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5581                                  string suffix = _.Suffix,
5582                                  string ClobberConstraint = ""> {
5583  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5584  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5585                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5586                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5587                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5588                  0, 0, 0, vselect_mask, ClobberConstraint>,
5589                  EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5590}
5591
5592multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5593                                SDPatternOperator OpNodeSAE,
5594                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5595  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5596  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5597                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5598                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5599                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5600                  EVEX, VVVV, EVEX_B, Sched<[sched]>;
5601}
5602
5603multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5604                             SDPatternOperator MaskOpNode,
5605                             Predicate prd, X86SchedWriteSizes sched,
5606                             bit IsCommutable = 0,
5607                             bit IsPD128Commutable = IsCommutable> {
5608  let Predicates = [prd] in {
5609  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5610                              sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5611                              EVEX_CD8<32, CD8VF>;
5612  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5613                              sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5614                              EVEX_CD8<64, CD8VF>;
5615  }
5616
5617    // Define only if AVX512VL feature is present.
5618  let Predicates = [prd, HasVLX] in {
5619    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5620                                   sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5621                                   EVEX_CD8<32, CD8VF>;
5622    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5623                                   sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5624                                   EVEX_CD8<32, CD8VF>;
5625    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5626                                   sched.PD.XMM, IsPD128Commutable,
5627                                   IsCommutable>, EVEX_V128, TB, PD, REX_W,
5628                                   EVEX_CD8<64, CD8VF>;
5629    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5630                                   sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5631                                   EVEX_CD8<64, CD8VF>;
5632  }
5633}
5634
5635multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5636                              SDPatternOperator MaskOpNode,
5637                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5638  let Predicates = [HasFP16] in {
5639    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5640                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5641                                EVEX_CD8<16, CD8VF>;
5642  }
5643  let Predicates = [HasVLX, HasFP16] in {
5644    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5645                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5646                                   EVEX_CD8<16, CD8VF>;
5647    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5648                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5649                                   EVEX_CD8<16, CD8VF>;
5650  }
5651}
5652
5653let Uses = [MXCSR] in
5654multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5655                                   X86SchedWriteSizes sched> {
5656  let Predicates = [HasFP16] in {
5657    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5658                                      v32f16_info>,
5659                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5660  }
5661  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5662                                    v16f32_info>,
5663                                    EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5664  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5665                                    v8f64_info>,
5666                                    EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5667}
5668
5669let Uses = [MXCSR] in
5670multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5671                                 X86SchedWriteSizes sched> {
5672  let Predicates = [HasFP16] in {
5673    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5674                                    v32f16_info>,
5675                                    EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5676  }
5677  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5678                                  v16f32_info>,
5679                                  EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5680  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5681                                  v8f64_info>,
5682                                  EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5683}
5684
5685defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5686                              SchedWriteFAddSizes, 1>,
5687            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5688            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5689defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5690                              SchedWriteFMulSizes, 1>,
5691            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5692            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5693defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5694                              SchedWriteFAddSizes>,
5695            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5696            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5697defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5698                              SchedWriteFDivSizes>,
5699            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5700            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5701defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86any_fmin, X86fmin, HasAVX512,
5702                              SchedWriteFCmpSizes, 0>,
5703            avx512_fp_binop_ph<0x5D, "vmin", X86any_fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5704            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5705defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86any_fmax, X86fmax, HasAVX512,
5706                              SchedWriteFCmpSizes, 0>,
5707            avx512_fp_binop_ph<0x5F, "vmax", X86any_fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5708            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5709let isCodeGenOnly = 1 in {
5710  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5711                                 SchedWriteFCmpSizes, 1>,
5712               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5713                                 SchedWriteFCmpSizes, 1>;
5714  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5715                                 SchedWriteFCmpSizes, 1>,
5716               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5717                                 SchedWriteFCmpSizes, 1>;
5718}
5719let Uses = []<Register>, mayRaiseFPException = 0 in {
5720defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5721                               SchedWriteFLogicSizes, 1>;
5722defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5723                               SchedWriteFLogicSizes, 0>;
5724defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5725                               SchedWriteFLogicSizes, 1>;
5726defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5727                               SchedWriteFLogicSizes, 1>;
5728}
5729
5730multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5731                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5732  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5733  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5734                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5735                  "$src2, $src1", "$src1, $src2",
5736                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5737                  EVEX, VVVV, Sched<[sched]>;
5738  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5739                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5740                  "$src2, $src1", "$src1, $src2",
5741                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5742                  EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5743  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5744                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5745                   "${src2}"#_.BroadcastStr#", $src1",
5746                   "$src1, ${src2}"#_.BroadcastStr,
5747                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5748                   EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5749  }
5750}
5751
5752multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5753                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5754  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5755  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5756                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5757                  "$src2, $src1", "$src1, $src2",
5758                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5759                  Sched<[sched]>;
5760  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5761                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5762                  "$src2, $src1", "$src1, $src2",
5763                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5764                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5765  }
5766}
5767
5768multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5769                                X86SchedWriteWidths sched> {
5770  let Predicates = [HasFP16] in {
5771    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5772               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5773                                EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5774    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5775               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5776                             EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5777  }
5778  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5779             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5780                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5781  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5782             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5783                              EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5784  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5785             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5786                                    X86scalefsRnd, sched.Scl>,
5787                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5788  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5789             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5790                                    X86scalefsRnd, sched.Scl>,
5791                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5792
5793  // Define only if AVX512VL feature is present.
5794  let Predicates = [HasVLX] in {
5795    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5796                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5797    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5798                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5799    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5800                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5801    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5802                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5803  }
5804
5805  let Predicates = [HasFP16, HasVLX] in {
5806    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5807                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5808    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5809                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5810  }
5811}
5812defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5813
5814//===----------------------------------------------------------------------===//
5815// AVX-512  VPTESTM instructions
5816//===----------------------------------------------------------------------===//
5817
5818multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5819                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5820  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5821  // There are just too many permutations due to commutability and bitcasts.
5822  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5823  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5824                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5825                      "$src2, $src1", "$src1, $src2",
5826                   (null_frag), (null_frag), 1>,
5827                   EVEX, VVVV, Sched<[sched]>;
5828  let mayLoad = 1 in
5829  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5830                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5831                       "$src2, $src1", "$src1, $src2",
5832                   (null_frag), (null_frag)>,
5833                   EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5834                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5835  }
5836}
5837
5838multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5839                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5840  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5841  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5842                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5843                    "${src2}"#_.BroadcastStr#", $src1",
5844                    "$src1, ${src2}"#_.BroadcastStr,
5845                    (null_frag), (null_frag)>,
5846                    EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5847                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5848}
5849
5850multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5851                                  X86SchedWriteWidths sched,
5852                                  AVX512VLVectorVTInfo _> {
5853  let Predicates  = [HasAVX512] in
5854  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5855           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5856
5857  let Predicates = [HasAVX512, HasVLX] in {
5858  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5859              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5860  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5861              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5862  }
5863}
5864
5865multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5866                            X86SchedWriteWidths sched> {
5867  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5868                                 avx512vl_i32_info>;
5869  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5870                                 avx512vl_i64_info>, REX_W;
5871}
5872
5873multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5874                            X86SchedWriteWidths sched> {
5875  let Predicates = [HasBWI] in {
5876  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5877                            v32i16_info>, EVEX_V512, REX_W;
5878  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5879                            v64i8_info>, EVEX_V512;
5880  }
5881
5882  let Predicates = [HasVLX, HasBWI] in {
5883  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5884                            v16i16x_info>, EVEX_V256, REX_W;
5885  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5886                            v8i16x_info>, EVEX_V128, REX_W;
5887  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5888                            v32i8x_info>, EVEX_V256;
5889  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5890                            v16i8x_info>, EVEX_V128;
5891  }
5892}
5893
5894multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5895                                   X86SchedWriteWidths sched> :
5896  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5897  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5898
5899defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5900                                         SchedWriteVecLogic>, T8, PD;
5901defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5902                                         SchedWriteVecLogic>, T8, XS;
5903
5904//===----------------------------------------------------------------------===//
5905// AVX-512  Shift instructions
5906//===----------------------------------------------------------------------===//
5907
5908multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5909                            string OpcodeStr, SDNode OpNode,
5910                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5911  let ExeDomain = _.ExeDomain in {
5912  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5913                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5914                      "$src2, $src1", "$src1, $src2",
5915                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5916                   Sched<[sched]>;
5917  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5918                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5919                       "$src2, $src1", "$src1, $src2",
5920                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5921                          (i8 timm:$src2)))>,
5922                   Sched<[sched.Folded]>;
5923  }
5924}
5925
5926multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5927                             string OpcodeStr, SDNode OpNode,
5928                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5929  let ExeDomain = _.ExeDomain in
5930  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5931                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5932      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5933     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5934     EVEX_B, Sched<[sched.Folded]>;
5935}
5936
5937multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5938                            X86FoldableSchedWrite sched, ValueType SrcVT,
5939                            X86VectorVTInfo _> {
5940   // src2 is always 128-bit
5941  let ExeDomain = _.ExeDomain in {
5942  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5943                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5944                      "$src2, $src1", "$src1, $src2",
5945                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5946                   AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5947  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5948                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5949                       "$src2, $src1", "$src1, $src2",
5950                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5951                   AVX512BIBase,
5952                   EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5953  }
5954}
5955
5956multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5957                              X86SchedWriteWidths sched, ValueType SrcVT,
5958                              AVX512VLVectorVTInfo VTInfo,
5959                              Predicate prd> {
5960  let Predicates = [prd] in
5961  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5962                               VTInfo.info512>, EVEX_V512,
5963                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5964  let Predicates = [prd, HasVLX] in {
5965  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5966                               VTInfo.info256>, EVEX_V256,
5967                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5968  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5969                               VTInfo.info128>, EVEX_V128,
5970                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5971  }
5972}
5973
5974multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5975                              string OpcodeStr, SDNode OpNode,
5976                              X86SchedWriteWidths sched> {
5977  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5978                              avx512vl_i32_info, HasAVX512>;
5979  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5980                              avx512vl_i64_info, HasAVX512>, REX_W;
5981  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5982                              avx512vl_i16_info, HasBWI>;
5983}
5984
5985multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5986                                  string OpcodeStr, SDNode OpNode,
5987                                  X86SchedWriteWidths sched,
5988                                  AVX512VLVectorVTInfo VTInfo> {
5989  let Predicates = [HasAVX512] in
5990  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5991                              sched.ZMM, VTInfo.info512>,
5992             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5993                               VTInfo.info512>, EVEX_V512;
5994  let Predicates = [HasAVX512, HasVLX] in {
5995  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5996                              sched.YMM, VTInfo.info256>,
5997             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5998                               VTInfo.info256>, EVEX_V256;
5999  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6000                              sched.XMM, VTInfo.info128>,
6001             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6002                               VTInfo.info128>, EVEX_V128;
6003  }
6004}
6005
6006multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6007                              string OpcodeStr, SDNode OpNode,
6008                              X86SchedWriteWidths sched> {
6009  let Predicates = [HasBWI] in
6010  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6011                               sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6012  let Predicates = [HasVLX, HasBWI] in {
6013  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6014                               sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6015  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6016                               sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6017  }
6018}
6019
6020multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6021                               Format ImmFormR, Format ImmFormM,
6022                               string OpcodeStr, SDNode OpNode,
6023                               X86SchedWriteWidths sched> {
6024  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6025                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6026  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6027                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6028}
6029
6030defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6031                                 SchedWriteVecShiftImm>,
6032             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6033                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6034
6035defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6036                                 SchedWriteVecShiftImm>,
6037             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6038                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6039
6040defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6041                                 SchedWriteVecShiftImm>,
6042             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6043                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6044
6045defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6046                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6047defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6048                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6049
6050defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6051                                SchedWriteVecShift>;
6052defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6053                                SchedWriteVecShift>;
6054defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6055                                SchedWriteVecShift>;
6056
6057// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6058let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6059  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6060            (EXTRACT_SUBREG (v8i64
6061              (VPSRAQZrr
6062                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6063                 VR128X:$src2)), sub_ymm)>;
6064
6065  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6066            (EXTRACT_SUBREG (v8i64
6067              (VPSRAQZrr
6068                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6069                 VR128X:$src2)), sub_xmm)>;
6070
6071  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6072            (EXTRACT_SUBREG (v8i64
6073              (VPSRAQZri
6074                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6075                 timm:$src2)), sub_ymm)>;
6076
6077  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6078            (EXTRACT_SUBREG (v8i64
6079              (VPSRAQZri
6080                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6081                 timm:$src2)), sub_xmm)>;
6082}
6083
6084//===-------------------------------------------------------------------===//
6085// Variable Bit Shifts
6086//===-------------------------------------------------------------------===//
6087
6088multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6089                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6090  let ExeDomain = _.ExeDomain in {
6091  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6092                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6093                      "$src2, $src1", "$src1, $src2",
6094                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6095                   AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6096  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6097                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6098                       "$src2, $src1", "$src1, $src2",
6099                   (_.VT (OpNode _.RC:$src1,
6100                   (_.VT (_.LdFrag addr:$src2))))>,
6101                   AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6102                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6103  }
6104}
6105
6106multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6107                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6108  let ExeDomain = _.ExeDomain in
6109  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6110                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6111                    "${src2}"#_.BroadcastStr#", $src1",
6112                    "$src1, ${src2}"#_.BroadcastStr,
6113                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6114                    AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6115                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6116}
6117
6118multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6119                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6120  let Predicates  = [HasAVX512] in
6121  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6122           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6123
6124  let Predicates = [HasAVX512, HasVLX] in {
6125  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6126              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6127  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6128              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6129  }
6130}
6131
6132multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6133                                  SDNode OpNode, X86SchedWriteWidths sched> {
6134  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6135                                 avx512vl_i32_info>;
6136  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6137                                 avx512vl_i64_info>, REX_W;
6138}
6139
6140// Use 512bit version to implement 128/256 bit in case NoVLX.
6141multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6142                                     SDNode OpNode, list<Predicate> p> {
6143  let Predicates = p in {
6144  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6145                                  (_.info256.VT _.info256.RC:$src2))),
6146            (EXTRACT_SUBREG
6147                (!cast<Instruction>(OpcodeStr#"Zrr")
6148                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6149                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6150             sub_ymm)>;
6151
6152  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6153                                  (_.info128.VT _.info128.RC:$src2))),
6154            (EXTRACT_SUBREG
6155                (!cast<Instruction>(OpcodeStr#"Zrr")
6156                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6157                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6158             sub_xmm)>;
6159  }
6160}
6161multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6162                              SDNode OpNode, X86SchedWriteWidths sched> {
6163  let Predicates = [HasBWI] in
6164  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6165              EVEX_V512, REX_W;
6166  let Predicates = [HasVLX, HasBWI] in {
6167
6168  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6169              EVEX_V256, REX_W;
6170  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6171              EVEX_V128, REX_W;
6172  }
6173}
6174
6175defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6176              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6177
6178defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6179              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6180
6181defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6182              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6183
6184defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6185defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6186
6187defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
6188defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
6189defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
6190defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
6191
6192
6193// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6194let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6195  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6196            (EXTRACT_SUBREG (v8i64
6197              (VPROLVQZrr
6198                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6199                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6200                       sub_xmm)>;
6201  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6202            (EXTRACT_SUBREG (v8i64
6203              (VPROLVQZrr
6204                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6205                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6206                       sub_ymm)>;
6207
6208  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6209            (EXTRACT_SUBREG (v16i32
6210              (VPROLVDZrr
6211                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6212                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6213                        sub_xmm)>;
6214  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6215            (EXTRACT_SUBREG (v16i32
6216              (VPROLVDZrr
6217                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6218                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6219                        sub_ymm)>;
6220
6221  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6222            (EXTRACT_SUBREG (v8i64
6223              (VPROLQZri
6224                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6225                        timm:$src2)), sub_xmm)>;
6226  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6227            (EXTRACT_SUBREG (v8i64
6228              (VPROLQZri
6229                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6230                       timm:$src2)), sub_ymm)>;
6231
6232  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6233            (EXTRACT_SUBREG (v16i32
6234              (VPROLDZri
6235                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6236                        timm:$src2)), sub_xmm)>;
6237  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6238            (EXTRACT_SUBREG (v16i32
6239              (VPROLDZri
6240                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6241                        timm:$src2)), sub_ymm)>;
6242}
6243
6244// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6245let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6246  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6247            (EXTRACT_SUBREG (v8i64
6248              (VPRORVQZrr
6249                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6250                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6251                       sub_xmm)>;
6252  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6253            (EXTRACT_SUBREG (v8i64
6254              (VPRORVQZrr
6255                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6256                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6257                       sub_ymm)>;
6258
6259  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6260            (EXTRACT_SUBREG (v16i32
6261              (VPRORVDZrr
6262                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6263                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6264                        sub_xmm)>;
6265  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6266            (EXTRACT_SUBREG (v16i32
6267              (VPRORVDZrr
6268                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6269                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6270                        sub_ymm)>;
6271
6272  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6273            (EXTRACT_SUBREG (v8i64
6274              (VPRORQZri
6275                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6276                        timm:$src2)), sub_xmm)>;
6277  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6278            (EXTRACT_SUBREG (v8i64
6279              (VPRORQZri
6280                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6281                       timm:$src2)), sub_ymm)>;
6282
6283  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6284            (EXTRACT_SUBREG (v16i32
6285              (VPRORDZri
6286                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6287                        timm:$src2)), sub_xmm)>;
6288  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6289            (EXTRACT_SUBREG (v16i32
6290              (VPRORDZri
6291                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6292                        timm:$src2)), sub_ymm)>;
6293}
6294
6295//===-------------------------------------------------------------------===//
6296// 1-src variable permutation VPERMW/D/Q
6297//===-------------------------------------------------------------------===//
6298
6299multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6300                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6301  let Predicates  = [HasAVX512] in
6302  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6303           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6304
6305  let Predicates = [HasAVX512, HasVLX] in
6306  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6307              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6308}
6309
6310multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6311                                 string OpcodeStr, SDNode OpNode,
6312                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6313  let Predicates = [HasAVX512] in
6314  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6315                              sched, VTInfo.info512>,
6316             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6317                               sched, VTInfo.info512>, EVEX_V512;
6318  let Predicates = [HasAVX512, HasVLX] in
6319  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6320                              sched, VTInfo.info256>,
6321             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6322                               sched, VTInfo.info256>, EVEX_V256;
6323}
6324
6325multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6326                              Predicate prd, SDNode OpNode,
6327                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6328  let Predicates = [prd] in
6329  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6330              EVEX_V512 ;
6331  let Predicates = [HasVLX, prd] in {
6332  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6333              EVEX_V256 ;
6334  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6335              EVEX_V128 ;
6336  }
6337}
6338
6339defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6340                               WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6341defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6342                               WriteVarShuffle256, avx512vl_i8_info>;
6343
6344defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6345                                    WriteVarShuffle256, avx512vl_i32_info>;
6346defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6347                                    WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6348defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6349                                     WriteFVarShuffle256, avx512vl_f32_info>;
6350defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6351                                     WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6352
6353defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6354                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6355                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6356defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6357                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6358                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6359
6360//===----------------------------------------------------------------------===//
6361// AVX-512 - VPERMIL
6362//===----------------------------------------------------------------------===//
6363
6364multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6365                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6366                             X86VectorVTInfo Ctrl> {
6367  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6368                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6369                  "$src2, $src1", "$src1, $src2",
6370                  (_.VT (OpNode _.RC:$src1,
6371                               (Ctrl.VT Ctrl.RC:$src2)))>,
6372                  T8, PD, EVEX, VVVV, Sched<[sched]>;
6373  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6374                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6375                  "$src2, $src1", "$src1, $src2",
6376                  (_.VT (OpNode
6377                           _.RC:$src1,
6378                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6379                  T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6380                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6381  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6382                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6383                   "${src2}"#_.BroadcastStr#", $src1",
6384                   "$src1, ${src2}"#_.BroadcastStr,
6385                   (_.VT (OpNode
6386                            _.RC:$src1,
6387                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6388                   T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6389                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6390}
6391
6392multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6393                                    X86SchedWriteWidths sched,
6394                                    AVX512VLVectorVTInfo _,
6395                                    AVX512VLVectorVTInfo Ctrl> {
6396  let Predicates = [HasAVX512] in {
6397    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6398                                  _.info512, Ctrl.info512>, EVEX_V512;
6399  }
6400  let Predicates = [HasAVX512, HasVLX] in {
6401    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6402                                  _.info128, Ctrl.info128>, EVEX_V128;
6403    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6404                                  _.info256, Ctrl.info256>, EVEX_V256;
6405  }
6406}
6407
6408multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6409                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6410  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6411                                      _, Ctrl>;
6412  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6413                                    X86VPermilpi, SchedWriteFShuffle, _>,
6414                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6415}
6416
6417let ExeDomain = SSEPackedSingle in
6418defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6419                               avx512vl_i32_info>;
6420let ExeDomain = SSEPackedDouble in
6421defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6422                               avx512vl_i64_info>, REX_W;
6423
6424//===----------------------------------------------------------------------===//
6425// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6426//===----------------------------------------------------------------------===//
6427
6428defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6429                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6430                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6431defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6432                                  X86PShufhw, SchedWriteShuffle>,
6433                                  EVEX, AVX512XSIi8Base;
6434defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6435                                  X86PShuflw, SchedWriteShuffle>,
6436                                  EVEX, AVX512XDIi8Base;
6437
6438//===----------------------------------------------------------------------===//
6439// AVX-512 - VPSHUFB
6440//===----------------------------------------------------------------------===//
6441
6442multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6443                               X86SchedWriteWidths sched> {
6444  let Predicates = [HasBWI] in
6445  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6446                              EVEX_V512;
6447
6448  let Predicates = [HasVLX, HasBWI] in {
6449  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6450                              EVEX_V256;
6451  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6452                              EVEX_V128;
6453  }
6454}
6455
6456defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6457                                  SchedWriteVarShuffle>, WIG;
6458
6459//===----------------------------------------------------------------------===//
6460// Move Low to High and High to Low packed FP Instructions
6461//===----------------------------------------------------------------------===//
6462
6463def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6464          (ins VR128X:$src1, VR128X:$src2),
6465          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6466          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6467          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6468let isCommutable = 1 in
6469def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6470          (ins VR128X:$src1, VR128X:$src2),
6471          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6472          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6473          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6474
6475//===----------------------------------------------------------------------===//
6476// VMOVHPS/PD VMOVLPS Instructions
6477// All patterns was taken from SSS implementation.
6478//===----------------------------------------------------------------------===//
6479
6480multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6481                                  SDPatternOperator OpNode,
6482                                  X86VectorVTInfo _> {
6483  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6484  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6485                  (ins _.RC:$src1, f64mem:$src2),
6486                  !strconcat(OpcodeStr,
6487                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6488                  [(set _.RC:$dst,
6489                     (OpNode _.RC:$src1,
6490                       (_.VT (bitconvert
6491                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6492                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6493}
6494
6495// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6496// SSE1. And MOVLPS pattern is even more complex.
6497defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6498                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6499defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6500                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6501defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6502                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6503defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6504                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6505
6506let Predicates = [HasAVX512] in {
6507  // VMOVHPD patterns
6508  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6509            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6510
6511  // VMOVLPD patterns
6512  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6513            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6514}
6515
6516let SchedRW = [WriteFStore] in {
6517let mayStore = 1, hasSideEffects = 0 in
6518def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6519                       (ins f64mem:$dst, VR128X:$src),
6520                       "vmovhps\t{$src, $dst|$dst, $src}",
6521                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6522def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6523                       (ins f64mem:$dst, VR128X:$src),
6524                       "vmovhpd\t{$src, $dst|$dst, $src}",
6525                       [(store (f64 (extractelt
6526                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6527                                     (iPTR 0))), addr:$dst)]>,
6528                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6529let mayStore = 1, hasSideEffects = 0 in
6530def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6531                       (ins f64mem:$dst, VR128X:$src),
6532                       "vmovlps\t{$src, $dst|$dst, $src}",
6533                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6534def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6535                       (ins f64mem:$dst, VR128X:$src),
6536                       "vmovlpd\t{$src, $dst|$dst, $src}",
6537                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6538                                     (iPTR 0))), addr:$dst)]>,
6539                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6540} // SchedRW
6541
6542let Predicates = [HasAVX512] in {
6543  // VMOVHPD patterns
6544  def : Pat<(store (f64 (extractelt
6545                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6546                           (iPTR 0))), addr:$dst),
6547           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6548}
6549//===----------------------------------------------------------------------===//
6550// FMA - Fused Multiply Operations
6551//
6552
6553multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6554                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6555                               X86VectorVTInfo _> {
6556  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6557      Uses = [MXCSR], mayRaiseFPException = 1 in {
6558  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6559          (ins _.RC:$src2, _.RC:$src3),
6560          OpcodeStr, "$src3, $src2", "$src2, $src3",
6561          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6562          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6563          EVEX, VVVV, Sched<[sched]>;
6564
6565  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6566          (ins _.RC:$src2, _.MemOp:$src3),
6567          OpcodeStr, "$src3, $src2", "$src2, $src3",
6568          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6569          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6570          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6571                          sched.ReadAfterFold]>;
6572
6573  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6574            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6575            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6576            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6577            (OpNode _.RC:$src2,
6578             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6579            (MaskOpNode _.RC:$src2,
6580             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6581            EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6582                                    sched.ReadAfterFold]>;
6583  }
6584}
6585
6586multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6587                                 X86FoldableSchedWrite sched,
6588                                 X86VectorVTInfo _> {
6589  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6590      Uses = [MXCSR] in
6591  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6592          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6593          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6594          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6595          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6596          EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6597}
6598
6599multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6600                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6601                                   X86SchedWriteWidths sched,
6602                                   AVX512VLVectorVTInfo _,
6603                                   Predicate prd = HasAVX512> {
6604  let Predicates = [prd] in {
6605    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6606                                      sched.ZMM, _.info512>,
6607                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6608                                        _.info512>,
6609                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6610  }
6611  let Predicates = [HasVLX, prd] in {
6612    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6613                                    sched.YMM, _.info256>,
6614                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6615    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6616                                    sched.XMM, _.info128>,
6617                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6618  }
6619}
6620
6621multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6622                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6623    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6624                                      OpNodeRnd, SchedWriteFMA,
6625                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6626    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6627                                      OpNodeRnd, SchedWriteFMA,
6628                                      avx512vl_f32_info>, T8, PD;
6629    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6630                                      OpNodeRnd, SchedWriteFMA,
6631                                      avx512vl_f64_info>, T8, PD, REX_W;
6632}
6633
6634defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6635                                       fma, X86FmaddRnd>;
6636defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6637                                       X86Fmsub, X86FmsubRnd>;
6638defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6639                                       X86Fmaddsub, X86FmaddsubRnd>;
6640defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6641                                       X86Fmsubadd, X86FmsubaddRnd>;
6642defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6643                                       X86Fnmadd, X86FnmaddRnd>;
6644defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6645                                       X86Fnmsub, X86FnmsubRnd>;
6646
6647
6648multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6649                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6650                               X86VectorVTInfo _> {
6651  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6652      Uses = [MXCSR], mayRaiseFPException = 1 in {
6653  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6654          (ins _.RC:$src2, _.RC:$src3),
6655          OpcodeStr, "$src3, $src2", "$src2, $src3",
6656          (null_frag),
6657          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6658          EVEX, VVVV, Sched<[sched]>;
6659
6660  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6661          (ins _.RC:$src2, _.MemOp:$src3),
6662          OpcodeStr, "$src3, $src2", "$src2, $src3",
6663          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6664          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6665          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6666                          sched.ReadAfterFold]>;
6667
6668  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6669         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6670         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6671         "$src2, ${src3}"#_.BroadcastStr,
6672         (_.VT (OpNode _.RC:$src2,
6673                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6674                      _.RC:$src1)),
6675         (_.VT (MaskOpNode _.RC:$src2,
6676                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6677                           _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6678         Sched<[sched.Folded, sched.ReadAfterFold,
6679                sched.ReadAfterFold]>;
6680  }
6681}
6682
6683multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6684                                 X86FoldableSchedWrite sched,
6685                                 X86VectorVTInfo _> {
6686  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6687      Uses = [MXCSR] in
6688  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6689          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6690          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6691          (null_frag),
6692          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6693          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6694}
6695
6696multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6697                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6698                                   X86SchedWriteWidths sched,
6699                                   AVX512VLVectorVTInfo _,
6700                                   Predicate prd = HasAVX512> {
6701  let Predicates = [prd] in {
6702    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6703                                      sched.ZMM, _.info512>,
6704                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6705                                        _.info512>,
6706                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6707  }
6708  let Predicates = [HasVLX, prd] in {
6709    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6710                                    sched.YMM, _.info256>,
6711                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6712    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6713                                    sched.XMM, _.info128>,
6714                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6715  }
6716}
6717
6718multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6719                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6720    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6721                                      OpNodeRnd, SchedWriteFMA,
6722                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6723    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6724                                      OpNodeRnd, SchedWriteFMA,
6725                                      avx512vl_f32_info>, T8, PD;
6726    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6727                                      OpNodeRnd, SchedWriteFMA,
6728                                      avx512vl_f64_info>, T8, PD, REX_W;
6729}
6730
6731defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6732                                       fma, X86FmaddRnd>;
6733defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6734                                       X86Fmsub, X86FmsubRnd>;
6735defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6736                                       X86Fmaddsub, X86FmaddsubRnd>;
6737defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6738                                       X86Fmsubadd, X86FmsubaddRnd>;
6739defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6740                                       X86Fnmadd, X86FnmaddRnd>;
6741defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6742                                       X86Fnmsub, X86FnmsubRnd>;
6743
6744multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6745                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6746                               X86VectorVTInfo _> {
6747  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6748      Uses = [MXCSR], mayRaiseFPException = 1 in {
6749  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6750          (ins _.RC:$src2, _.RC:$src3),
6751          OpcodeStr, "$src3, $src2", "$src2, $src3",
6752          (null_frag),
6753          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6754          EVEX, VVVV, Sched<[sched]>;
6755
6756  // Pattern is 312 order so that the load is in a different place from the
6757  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6758  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6759          (ins _.RC:$src2, _.MemOp:$src3),
6760          OpcodeStr, "$src3, $src2", "$src2, $src3",
6761          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6762          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6763          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6764                          sched.ReadAfterFold]>;
6765
6766  // Pattern is 312 order so that the load is in a different place from the
6767  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6768  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6769         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6770         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6771         "$src2, ${src3}"#_.BroadcastStr,
6772         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6773                       _.RC:$src1, _.RC:$src2)),
6774         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6775                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6776         EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6777                                 sched.ReadAfterFold]>;
6778  }
6779}
6780
6781multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6782                                 X86FoldableSchedWrite sched,
6783                                 X86VectorVTInfo _> {
6784  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6785      Uses = [MXCSR] in
6786  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6787          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6788          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6789          (null_frag),
6790          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6791          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6792}
6793
6794multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6795                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6796                                   X86SchedWriteWidths sched,
6797                                   AVX512VLVectorVTInfo _,
6798                                   Predicate prd = HasAVX512> {
6799  let Predicates = [prd] in {
6800    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6801                                      sched.ZMM, _.info512>,
6802                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6803                                        _.info512>,
6804                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6805  }
6806  let Predicates = [HasVLX, prd] in {
6807    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6808                                    sched.YMM, _.info256>,
6809                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6810    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6811                                    sched.XMM, _.info128>,
6812                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6813  }
6814}
6815
6816multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6817                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6818    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6819                                      OpNodeRnd, SchedWriteFMA,
6820                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6821    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6822                                      OpNodeRnd, SchedWriteFMA,
6823                                      avx512vl_f32_info>, T8, PD;
6824    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6825                                      OpNodeRnd, SchedWriteFMA,
6826                                      avx512vl_f64_info>, T8, PD, REX_W;
6827}
6828
6829defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6830                                       fma, X86FmaddRnd>;
6831defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6832                                       X86Fmsub, X86FmsubRnd>;
6833defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6834                                       X86Fmaddsub, X86FmaddsubRnd>;
6835defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6836                                       X86Fmsubadd, X86FmsubaddRnd>;
6837defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6838                                       X86Fnmadd, X86FnmaddRnd>;
6839defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6840                                       X86Fnmsub, X86FnmsubRnd>;
6841
6842// Scalar FMA
6843multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6844                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6845let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6846  defm r: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6847          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6848          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">,
6849          EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6850
6851  let mayLoad = 1 in
6852  defm m: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6853          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6854          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">,
6855          EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6856                          SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6857
6858  let Uses = [MXCSR] in
6859  defm rb: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6860         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6861         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1, 0, "_Int">,
6862         EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6863
6864  let isCodeGenOnly = 1, isCommutable = 1 in {
6865    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6866                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6867                     !strconcat(OpcodeStr,
6868                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6869                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6870    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6871                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6872                    !strconcat(OpcodeStr,
6873                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6874                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6875                                     SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6876
6877    let Uses = [MXCSR] in
6878    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6879                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6880                     !strconcat(OpcodeStr,
6881                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6882                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6883                     Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6884  }// isCodeGenOnly = 1
6885}// Constraints = "$src1 = $dst"
6886}
6887
6888multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6889                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6890                            X86VectorVTInfo _, string SUFF> {
6891  let ExeDomain = _.ExeDomain in {
6892  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6893                // Operands for intrinsic are in 123 order to preserve passthu
6894                // semantics.
6895                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6896                         _.FRC:$src3))),
6897                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6898                         (_.ScalarLdFrag addr:$src3)))),
6899                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6900                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6901
6902  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6903                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6904                                          _.FRC:$src1))),
6905                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6906                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6907                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6908                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6909
6910  // One pattern is 312 order so that the load is in a different place from the
6911  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6912  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6913                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6914                         _.FRC:$src2))),
6915                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6916                                 _.FRC:$src1, _.FRC:$src2))),
6917                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6918                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6919  }
6920}
6921
6922multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6923                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6924  let Predicates = [HasAVX512] in {
6925    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6926                                 OpNodeRnd, f32x_info, "SS">,
6927                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6928    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6929                                 OpNodeRnd, f64x_info, "SD">,
6930                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6931  }
6932  let Predicates = [HasFP16] in {
6933    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6934                                 OpNodeRnd, f16x_info, "SH">,
6935                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6936  }
6937}
6938
6939defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6940defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6941defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6942defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6943
6944multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6945                                      SDNode RndOp, string Prefix,
6946                                      string Suffix, SDNode Move,
6947                                      X86VectorVTInfo _, PatLeaf ZeroFP,
6948                                      Predicate prd = HasAVX512> {
6949  let Predicates = [prd] in {
6950    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6951                (Op _.FRC:$src2,
6952                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6953                    _.FRC:$src3))))),
6954              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6955               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6956               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6957
6958    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6959                (Op _.FRC:$src2, _.FRC:$src3,
6960                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6961              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6962               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6963               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6964
6965    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6966                (Op _.FRC:$src2,
6967                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6968                    (_.ScalarLdFrag addr:$src3)))))),
6969              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6970               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6971               addr:$src3)>;
6972
6973    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6974                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6975                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6976              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6977               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6978               addr:$src3)>;
6979
6980    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6981                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6982                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6983              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6984               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6985               addr:$src3)>;
6986
6987    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6988               (X86selects_mask VK1WM:$mask,
6989                (MaskedOp _.FRC:$src2,
6990                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6991                    _.FRC:$src3),
6992                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6993              (!cast<I>(Prefix#"213"#Suffix#"Zrk_Int")
6994               VR128X:$src1, VK1WM:$mask,
6995               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6996               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6997
6998    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6999               (X86selects_mask VK1WM:$mask,
7000                (MaskedOp _.FRC:$src2,
7001                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7002                    (_.ScalarLdFrag addr:$src3)),
7003                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7004              (!cast<I>(Prefix#"213"#Suffix#"Zmk_Int")
7005               VR128X:$src1, VK1WM:$mask,
7006               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7007
7008    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7009               (X86selects_mask VK1WM:$mask,
7010                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7011                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7012                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7013              (!cast<I>(Prefix#"132"#Suffix#"Zmk_Int")
7014               VR128X:$src1, VK1WM:$mask,
7015               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7016
7017    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7018               (X86selects_mask VK1WM:$mask,
7019                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7020                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7021                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7022              (!cast<I>(Prefix#"231"#Suffix#"Zrk_Int")
7023               VR128X:$src1, VK1WM:$mask,
7024               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7025               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7026
7027    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7028               (X86selects_mask VK1WM:$mask,
7029                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7030                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7031                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7032              (!cast<I>(Prefix#"231"#Suffix#"Zmk_Int")
7033               VR128X:$src1, VK1WM:$mask,
7034               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7035
7036    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7037               (X86selects_mask VK1WM:$mask,
7038                (MaskedOp _.FRC:$src2,
7039                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7040                          _.FRC:$src3),
7041                (_.EltVT ZeroFP)))))),
7042              (!cast<I>(Prefix#"213"#Suffix#"Zrkz_Int")
7043               VR128X:$src1, VK1WM:$mask,
7044               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7045               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7046
7047    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7048               (X86selects_mask VK1WM:$mask,
7049                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7050                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7051                (_.EltVT ZeroFP)))))),
7052              (!cast<I>(Prefix#"231"#Suffix#"Zrkz_Int")
7053               VR128X:$src1, VK1WM:$mask,
7054               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7055               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7056
7057    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7058               (X86selects_mask VK1WM:$mask,
7059                (MaskedOp _.FRC:$src2,
7060                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7061                          (_.ScalarLdFrag addr:$src3)),
7062                (_.EltVT ZeroFP)))))),
7063              (!cast<I>(Prefix#"213"#Suffix#"Zmkz_Int")
7064               VR128X:$src1, VK1WM:$mask,
7065               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7066
7067    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7068               (X86selects_mask VK1WM:$mask,
7069                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7070                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7071                (_.EltVT ZeroFP)))))),
7072              (!cast<I>(Prefix#"132"#Suffix#"Zmkz_Int")
7073               VR128X:$src1, VK1WM:$mask,
7074               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7075
7076    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7077               (X86selects_mask VK1WM:$mask,
7078                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7079                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7080                (_.EltVT ZeroFP)))))),
7081              (!cast<I>(Prefix#"231"#Suffix#"Zmkz_Int")
7082               VR128X:$src1, VK1WM:$mask,
7083               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7084
7085    // Patterns with rounding mode.
7086    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7087                (RndOp _.FRC:$src2,
7088                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7089                       _.FRC:$src3, (i32 timm:$rc)))))),
7090              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7091               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7092               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7093
7094    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7095                (RndOp _.FRC:$src2, _.FRC:$src3,
7096                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7097                       (i32 timm:$rc)))))),
7098              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7099               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7100               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7101
7102    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7103               (X86selects_mask VK1WM:$mask,
7104                (RndOp _.FRC:$src2,
7105                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7106                       _.FRC:$src3, (i32 timm:$rc)),
7107                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7108              (!cast<I>(Prefix#"213"#Suffix#"Zrbk_Int")
7109               VR128X:$src1, VK1WM:$mask,
7110               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7111               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7112
7113    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7114               (X86selects_mask VK1WM:$mask,
7115                (RndOp _.FRC:$src2, _.FRC:$src3,
7116                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7117                       (i32 timm:$rc)),
7118                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7119              (!cast<I>(Prefix#"231"#Suffix#"Zrbk_Int")
7120               VR128X:$src1, VK1WM:$mask,
7121               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7122               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7123
7124    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7125               (X86selects_mask VK1WM:$mask,
7126                (RndOp _.FRC:$src2,
7127                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7128                       _.FRC:$src3, (i32 timm:$rc)),
7129                (_.EltVT ZeroFP)))))),
7130              (!cast<I>(Prefix#"213"#Suffix#"Zrbkz_Int")
7131               VR128X:$src1, VK1WM:$mask,
7132               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7133               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7134
7135    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7136               (X86selects_mask VK1WM:$mask,
7137                (RndOp _.FRC:$src2, _.FRC:$src3,
7138                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7139                       (i32 timm:$rc)),
7140                (_.EltVT ZeroFP)))))),
7141              (!cast<I>(Prefix#"231"#Suffix#"Zrbkz_Int")
7142               VR128X:$src1, VK1WM:$mask,
7143               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7144               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7145  }
7146}
7147defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7148                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7149defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7150                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7151defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7152                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7153defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7154                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7155
7156defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7157                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7158defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7159                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7160defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7161                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7162defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7163                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7164
7165defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7166                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7167defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7168                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7169defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7170                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7171defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7172                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7173
7174//===----------------------------------------------------------------------===//
7175// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7176//===----------------------------------------------------------------------===//
7177let Constraints = "$src1 = $dst" in {
7178multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7179                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7180  // NOTE: The SDNode have the multiply operands first with the add last.
7181  // This enables commuted load patterns to be autogenerated by tablegen.
7182  let ExeDomain = _.ExeDomain in {
7183  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7184          (ins _.RC:$src2, _.RC:$src3),
7185          OpcodeStr, "$src3, $src2", "$src2, $src3",
7186          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7187          T8, PD, EVEX, VVVV, Sched<[sched]>;
7188
7189  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7190          (ins _.RC:$src2, _.MemOp:$src3),
7191          OpcodeStr, "$src3, $src2", "$src2, $src3",
7192          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7193          T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7194                                sched.ReadAfterFold]>;
7195
7196  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7197            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7198            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7199            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7200            (OpNode _.RC:$src2,
7201                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7202                    _.RC:$src1)>,
7203            T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7204                                          sched.ReadAfterFold]>;
7205  }
7206}
7207} // Constraints = "$src1 = $dst"
7208
7209multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7210                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7211  let Predicates = [HasIFMA] in {
7212    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7213                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7214  }
7215  let Predicates = [HasVLX, HasIFMA] in {
7216    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7217                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7218    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7219                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7220  }
7221}
7222
7223defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7224                                         SchedWriteVecIMul, avx512vl_i64_info>,
7225                                         REX_W;
7226defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7227                                         SchedWriteVecIMul, avx512vl_i64_info>,
7228                                         REX_W;
7229
7230//===----------------------------------------------------------------------===//
7231// AVX-512  Scalar convert from sign integer to float/double
7232//===----------------------------------------------------------------------===//
7233
7234multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7235                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7236                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7237                    string mem, list<Register> _Uses = [MXCSR],
7238                    bit _mayRaiseFPException = 1> {
7239let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7240    mayRaiseFPException = _mayRaiseFPException in {
7241  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7242    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7243              (ins DstVT.FRC:$src1, SrcRC:$src),
7244              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7245              EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7246    let mayLoad = 1 in
7247      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7248              (ins DstVT.FRC:$src1, x86memop:$src),
7249              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7250              EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7251  } // hasSideEffects = 0
7252  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7253                (ins DstVT.RC:$src1, SrcRC:$src2),
7254                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7255                [(set DstVT.RC:$dst,
7256                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7257               EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7258
7259  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7260                (ins DstVT.RC:$src1, x86memop:$src2),
7261                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7262                [(set DstVT.RC:$dst,
7263                      (OpNode (DstVT.VT DstVT.RC:$src1),
7264                               (ld_frag addr:$src2)))]>,
7265                EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7266}
7267  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7268                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7269                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7270}
7271
7272multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7273                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7274                               X86VectorVTInfo DstVT, string asm,
7275                               string mem> {
7276  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7277  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7278              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7279              !strconcat(asm,
7280                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7281              [(set DstVT.RC:$dst,
7282                    (OpNode (DstVT.VT DstVT.RC:$src1),
7283                             SrcRC:$src2,
7284                             (i32 timm:$rc)))]>,
7285              EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7286  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7287                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7288                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7289}
7290
7291multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7292                                X86FoldableSchedWrite sched,
7293                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7294                                X86MemOperand x86memop, PatFrag ld_frag,
7295                                string asm, string mem> {
7296  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7297              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7298                            ld_frag, asm, mem>, VEX_LIG;
7299}
7300
7301let Predicates = [HasAVX512] in {
7302defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7303                                 WriteCvtI2SS, GR32,
7304                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7305                                 TB, XS, EVEX_CD8<32, CD8VT1>;
7306defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7307                                 WriteCvtI2SS, GR64,
7308                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7309                                 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7310defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7311                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7312                                 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7313defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7314                                 WriteCvtI2SD, GR64,
7315                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7316                                 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7317
7318def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7319              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7320def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7321              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7322
7323def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7324          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7325def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7326          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7327def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7328          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7329def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7330          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7331
7332def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7333          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7334def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7335          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7336def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7337          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7338def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7339          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7340
7341defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7342                                  WriteCvtI2SS, GR32,
7343                                  v4f32x_info, i32mem, loadi32,
7344                                  "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7345defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7346                                  WriteCvtI2SS, GR64,
7347                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7348                                  TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7349defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7350                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7351                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7352defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7353                                  WriteCvtI2SD, GR64,
7354                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7355                                  TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7356
7357def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7358              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7359def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7360              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7361
7362def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7363          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7364def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7365          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7366def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7367          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7368def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7369          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7370
7371def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7372          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7373def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7374          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7375def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7376          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7377def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7378          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7379}
7380
7381//===----------------------------------------------------------------------===//
7382// AVX-512  Scalar convert from float/double to integer
7383//===----------------------------------------------------------------------===//
7384
7385multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7386                                  X86VectorVTInfo DstVT, SDNode OpNode,
7387                                  SDNode OpNodeRnd,
7388                                  X86FoldableSchedWrite sched, string asm,
7389                                  string aliasStr, Predicate prd = HasAVX512> {
7390  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7391    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7392                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7393                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7394                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7395    let Uses = [MXCSR] in
7396    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7397                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7398                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7399                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7400                 Sched<[sched]>;
7401    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7402                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7403                [(set DstVT.RC:$dst, (OpNode
7404                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7405                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7406  } // Predicates = [prd]
7407
7408  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7409          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7410  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7411          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7412  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7413          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7414                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7415}
7416
7417// Convert float/double to signed/unsigned int 32/64
7418defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7419                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7420                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7421defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7422                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7423                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7424defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7425                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7426                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7427defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7428                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7429                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7430defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7431                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7432                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7433defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7434                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7435                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7436defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7437                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7438                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7439defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7440                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7441                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7442
7443multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7444                        X86VectorVTInfo DstVT, SDNode OpNode,
7445                        X86FoldableSchedWrite sched> {
7446  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7447    let isCodeGenOnly = 1 in {
7448    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7449                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7450                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7451                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7452    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7453                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7454                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7455                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7456    }
7457  } // Predicates = [HasAVX512]
7458}
7459
7460defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7461                       lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7462defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7463                       llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7464defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7465                       lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7466defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7467                       llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7468
7469let Predicates = [HasAVX512] in {
7470  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7471  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7472
7473  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7474  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7475}
7476
7477// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7478// which produce unnecessary vmovs{s,d} instructions
7479let Predicates = [HasAVX512] in {
7480def : Pat<(v4f32 (X86Movss
7481                   (v4f32 VR128X:$dst),
7482                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7483          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7484
7485def : Pat<(v4f32 (X86Movss
7486                   (v4f32 VR128X:$dst),
7487                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7488          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7489
7490def : Pat<(v4f32 (X86Movss
7491                   (v4f32 VR128X:$dst),
7492                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7493          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7494
7495def : Pat<(v4f32 (X86Movss
7496                   (v4f32 VR128X:$dst),
7497                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7498          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7499
7500def : Pat<(v2f64 (X86Movsd
7501                   (v2f64 VR128X:$dst),
7502                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7503          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7504
7505def : Pat<(v2f64 (X86Movsd
7506                   (v2f64 VR128X:$dst),
7507                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7508          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7509
7510def : Pat<(v2f64 (X86Movsd
7511                   (v2f64 VR128X:$dst),
7512                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7513          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7514
7515def : Pat<(v2f64 (X86Movsd
7516                   (v2f64 VR128X:$dst),
7517                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7518          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7519
7520def : Pat<(v4f32 (X86Movss
7521                   (v4f32 VR128X:$dst),
7522                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7523          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7524
7525def : Pat<(v4f32 (X86Movss
7526                   (v4f32 VR128X:$dst),
7527                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7528          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7529
7530def : Pat<(v4f32 (X86Movss
7531                   (v4f32 VR128X:$dst),
7532                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7533          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7534
7535def : Pat<(v4f32 (X86Movss
7536                   (v4f32 VR128X:$dst),
7537                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7538          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7539
7540def : Pat<(v2f64 (X86Movsd
7541                   (v2f64 VR128X:$dst),
7542                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7543          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7544
7545def : Pat<(v2f64 (X86Movsd
7546                   (v2f64 VR128X:$dst),
7547                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7548          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7549
7550def : Pat<(v2f64 (X86Movsd
7551                   (v2f64 VR128X:$dst),
7552                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7553          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7554
7555def : Pat<(v2f64 (X86Movsd
7556                   (v2f64 VR128X:$dst),
7557                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7558          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7559} // Predicates = [HasAVX512]
7560
7561// Convert float/double to signed/unsigned int 32/64 with truncation
7562multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7563                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7564                            SDNode OpNodeInt, SDNode OpNodeSAE,
7565                            X86FoldableSchedWrite sched, string aliasStr,
7566                            Predicate prd = HasAVX512> {
7567let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7568  let isCodeGenOnly = 1 in {
7569  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7570              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7571              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7572              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7573  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7574              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7575              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7576              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7577  }
7578
7579  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7580            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7581           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7582           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7583  let Uses = [MXCSR] in
7584  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7585            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7586            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7587                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7588  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7589              (ins _SrcRC.IntScalarMemOp:$src),
7590              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7591              [(set _DstRC.RC:$dst,
7592                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7593              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7594} // Predicates = [prd]
7595
7596  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7597          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7598  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7599          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7600  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7601          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7602                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7603}
7604
7605defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7606                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7607                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7608defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7609                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7610                        "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7611defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7612                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7613                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7614defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7615                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7616                        "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7617
7618defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7619                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7620                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7621defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7622                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7623                        "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7624defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7625                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7626                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7627defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7628                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7629                        "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7630
7631//===----------------------------------------------------------------------===//
7632// AVX-512  Convert form float to double and back
7633//===----------------------------------------------------------------------===//
7634
7635let Uses = [MXCSR], mayRaiseFPException = 1 in
7636multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7637                                X86VectorVTInfo _Src, SDNode OpNode,
7638                                X86FoldableSchedWrite sched> {
7639  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7640                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7641                         "$src2, $src1", "$src1, $src2",
7642                         (_.VT (OpNode (_.VT _.RC:$src1),
7643                                       (_Src.VT _Src.RC:$src2))), "_Int">,
7644                         EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7645  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7646                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7647                         "$src2, $src1", "$src1, $src2",
7648                         (_.VT (OpNode (_.VT _.RC:$src1),
7649                                  (_Src.ScalarIntMemFrags addr:$src2))), "_Int">,
7650                         EVEX, VVVV, VEX_LIG,
7651                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7652
7653  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7654    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7655               (ins _.FRC:$src1, _Src.FRC:$src2),
7656               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7657               EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7658    let mayLoad = 1 in
7659    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7660               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7661               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7662               EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7663  }
7664}
7665
7666// Scalar Conversion with SAE - suppress all exceptions
7667multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7668                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7669                                    X86FoldableSchedWrite sched> {
7670  let Uses = [MXCSR] in
7671  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7672                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7673                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7674                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7675                                         (_Src.VT _Src.RC:$src2))), "_Int">,
7676                        EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7677}
7678
7679// Scalar Conversion with rounding control (RC)
7680multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7681                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7682                                   X86FoldableSchedWrite sched> {
7683  let Uses = [MXCSR] in
7684  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7685                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7686                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7687                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7688                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc))), "_Int">,
7689                        EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7690                        EVEX_B, EVEX_RC;
7691}
7692multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7693                                      SDNode OpNode, SDNode OpNodeRnd,
7694                                      X86FoldableSchedWrite sched,
7695                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7696                                      Predicate prd = HasAVX512> {
7697  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7698    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7699             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7700                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7701  }
7702}
7703
7704multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7705                                       SDNode OpNode, SDNode OpNodeSAE,
7706                                       X86FoldableSchedWrite sched,
7707                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7708                                       Predicate prd = HasAVX512> {
7709  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7710    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7711             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7712             EVEX_CD8<_src.EltSize, CD8VT1>;
7713  }
7714}
7715defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7716                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7717                                         f32x_info>, TB, XD, REX_W;
7718defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7719                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7720                                          f64x_info>, TB, XS;
7721defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7722                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7723                                          f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7724defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7725                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7726                                          f64x_info, HasFP16>, T_MAP5, XS;
7727defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7728                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7729                                          f16x_info, HasFP16>, T_MAP5;
7730defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7731                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7732                                          f32x_info, HasFP16>, T_MAP6;
7733
7734def : Pat<(f64 (any_fpextend FR32X:$src)),
7735          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7736          Requires<[HasAVX512]>;
7737def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7738          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7739          Requires<[HasAVX512, OptForSize]>;
7740
7741def : Pat<(f32 (any_fpround FR64X:$src)),
7742          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7743           Requires<[HasAVX512]>;
7744
7745def : Pat<(f32 (any_fpextend FR16X:$src)),
7746          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7747          Requires<[HasFP16]>;
7748def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7749          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7750          Requires<[HasFP16, OptForSize]>;
7751
7752def : Pat<(f64 (any_fpextend FR16X:$src)),
7753          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7754          Requires<[HasFP16]>;
7755def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7756          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7757          Requires<[HasFP16, OptForSize]>;
7758
7759def : Pat<(f16 (any_fpround FR32X:$src)),
7760          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7761           Requires<[HasFP16]>;
7762def : Pat<(f16 (any_fpround FR64X:$src)),
7763          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7764           Requires<[HasFP16]>;
7765
7766def : Pat<(v4f32 (X86Movss
7767                   (v4f32 VR128X:$dst),
7768                   (v4f32 (scalar_to_vector
7769                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7770          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7771          Requires<[HasAVX512]>;
7772
7773def : Pat<(v2f64 (X86Movsd
7774                   (v2f64 VR128X:$dst),
7775                   (v2f64 (scalar_to_vector
7776                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7777          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7778          Requires<[HasAVX512]>;
7779
7780//===----------------------------------------------------------------------===//
7781// AVX-512  Vector convert from signed/unsigned integer to float/double
7782//          and from float/double to signed/unsigned integer
7783//===----------------------------------------------------------------------===//
7784
7785multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7786                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7787                          X86FoldableSchedWrite sched,
7788                          string Broadcast = _.BroadcastStr,
7789                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7790                          RegisterClass MaskRC = _.KRCWM,
7791                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7792                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7793let Uses = [MXCSR], mayRaiseFPException = 1 in {
7794  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7795                         (ins _Src.RC:$src),
7796                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7797                         (ins MaskRC:$mask, _Src.RC:$src),
7798                          OpcodeStr, "$src", "$src",
7799                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7800                         (vselect_mask MaskRC:$mask,
7801                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7802                                       _.RC:$src0),
7803                         (vselect_mask MaskRC:$mask,
7804                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7805                                       _.ImmAllZerosV)>,
7806                         EVEX, Sched<[sched]>;
7807
7808  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7809                         (ins MemOp:$src),
7810                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7811                         (ins MaskRC:$mask, MemOp:$src),
7812                         OpcodeStr#Alias, "$src", "$src",
7813                         LdDAG,
7814                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7815                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7816                         EVEX, Sched<[sched.Folded]>;
7817
7818  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7819                         (ins _Src.ScalarMemOp:$src),
7820                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7821                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7822                         OpcodeStr,
7823                         "${src}"#Broadcast, "${src}"#Broadcast,
7824                         (_.VT (OpNode (_Src.VT
7825                                  (_Src.BroadcastLdFrag addr:$src))
7826                            )),
7827                         (vselect_mask MaskRC:$mask,
7828                                       (_.VT
7829                                        (MaskOpNode
7830                                         (_Src.VT
7831                                          (_Src.BroadcastLdFrag addr:$src)))),
7832                                       _.RC:$src0),
7833                         (vselect_mask MaskRC:$mask,
7834                                       (_.VT
7835                                        (MaskOpNode
7836                                         (_Src.VT
7837                                          (_Src.BroadcastLdFrag addr:$src)))),
7838                                       _.ImmAllZerosV)>,
7839                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7840  }
7841}
7842// Conversion with SAE - suppress all exceptions
7843multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7844                              X86VectorVTInfo _Src, SDPatternOperator OpNodeSAE,
7845                              X86FoldableSchedWrite sched> {
7846  let Uses = [MXCSR] in
7847  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7848                        (ins _Src.RC:$src), OpcodeStr,
7849                        "{sae}, $src", "$src, {sae}",
7850                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7851                        EVEX, EVEX_B, Sched<[sched]>;
7852}
7853
7854// Conversion with rounding control (RC)
7855multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7856                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7857                         X86FoldableSchedWrite sched> {
7858  let Uses = [MXCSR] in
7859  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7860                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7861                        "$rc, $src", "$src, $rc",
7862                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7863                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7864}
7865
7866// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7867multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7868                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
7869                                SDNode MaskOpNode,
7870                                X86FoldableSchedWrite sched,
7871                                string Broadcast = _.BroadcastStr,
7872                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7873                                RegisterClass MaskRC = _.KRCWM>
7874  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7875                   Alias, MemOp, MaskRC,
7876                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7877                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7878
7879// Extend [Float to Double, Half to Float]
7880multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7881                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7882                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7883  let Predicates = [prd] in {
7884    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
7885                            any_fpextend, fpextend, sched.ZMM>,
7886             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7887                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7888  }
7889  let Predicates = [prd, HasVLX] in {
7890    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7891                               X86any_vfpext, X86vfpext, sched.XMM,
7892                               _dst.info128.BroadcastStr,
7893                               "", f64mem>, EVEX_V128;
7894    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7895                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7896  }
7897}
7898
7899// Truncate [Double to Float, Float to Half]
7900multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7901                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7902                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7903                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7904                            PatFrag loadVT128 = _src.info128.LdFrag,
7905                            RegisterClass maskRC128 = _src.info128.KRCWM> {
7906  let Predicates = [prd] in {
7907    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7908                            X86any_vfpround, X86vfpround, sched.ZMM>,
7909             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7910                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7911  }
7912  let Predicates = [prd, HasVLX] in {
7913    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7914                               null_frag, null_frag, sched.XMM,
7915                               _src.info128.BroadcastStr, "{x}",
7916                               f128mem, maskRC128>, EVEX_V128;
7917    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7918                               X86any_vfpround, X86vfpround,
7919                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7920
7921    // Special patterns to allow use of X86vmfpround for masking. Instruction
7922    // patterns have been disabled with null_frag.
7923    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7924              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7925    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7926                            maskRC128:$mask),
7927              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7928    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7929                            maskRC128:$mask),
7930              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7931
7932    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7933              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7934    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7935                            maskRC128:$mask),
7936              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7937    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7938                            maskRC128:$mask),
7939              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7940
7941    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7942              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7943    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7944                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7945              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7946    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7947                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
7948              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7949  }
7950
7951  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7952                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7953  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7954                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7955                  VK2WM:$mask, VR128X:$src), 0, "att">;
7956  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7957                  "$dst {${mask}} {z}, $src}",
7958                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7959                  VK2WM:$mask, VR128X:$src), 0, "att">;
7960  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7961                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7962  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7963                  "$dst {${mask}}, ${src}{1to2}}",
7964                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7965                  VK2WM:$mask, f64mem:$src), 0, "att">;
7966  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7967                  "$dst {${mask}} {z}, ${src}{1to2}}",
7968                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7969                  VK2WM:$mask, f64mem:$src), 0, "att">;
7970
7971  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7972                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7973  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7974                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7975                  VK4WM:$mask, VR256X:$src), 0, "att">;
7976  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7977                  "$dst {${mask}} {z}, $src}",
7978                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7979                  VK4WM:$mask, VR256X:$src), 0, "att">;
7980  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7981                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7982  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7983                  "$dst {${mask}}, ${src}{1to4}}",
7984                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7985                  VK4WM:$mask, f64mem:$src), 0, "att">;
7986  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7987                  "$dst {${mask}} {z}, ${src}{1to4}}",
7988                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7989                  VK4WM:$mask, f64mem:$src), 0, "att">;
7990}
7991
7992defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7993                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7994                                  REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7995defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7996                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7997                                   TB, EVEX_CD8<32, CD8VH>;
7998
7999// Extend Half to Double
8000multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8001                            X86SchedWriteWidths sched> {
8002  let Predicates = [HasFP16] in {
8003    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8004                                  any_fpextend, fpextend, sched.ZMM>,
8005             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8006                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8007    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8008                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8009  }
8010  let Predicates = [HasFP16, HasVLX] in {
8011    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8012                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8013                                     f32mem>, EVEX_V128;
8014    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8015                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8016                                     f64mem>, EVEX_V256;
8017  }
8018}
8019
8020// Truncate Double to Half
8021multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8022  let Predicates = [HasFP16] in {
8023    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8024                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8025             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8026                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8027  }
8028  let Predicates = [HasFP16, HasVLX] in {
8029    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8030                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8031                               VK2WM>, EVEX_V128;
8032    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8033                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8034                               VK4WM>, EVEX_V256;
8035  }
8036  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8037                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8038                  VR128X:$src), 0, "att">;
8039  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8040                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8041                  VK2WM:$mask, VR128X:$src), 0, "att">;
8042  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8043                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8044                  VK2WM:$mask, VR128X:$src), 0, "att">;
8045  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8046                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8047                  i64mem:$src), 0, "att">;
8048  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8049                  "$dst {${mask}}, ${src}{1to2}}",
8050                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8051                  VK2WM:$mask, i64mem:$src), 0, "att">;
8052  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8053                  "$dst {${mask}} {z}, ${src}{1to2}}",
8054                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8055                  VK2WM:$mask, i64mem:$src), 0, "att">;
8056
8057  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8058                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8059                  VR256X:$src), 0, "att">;
8060  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8061                  "$dst {${mask}}, $src}",
8062                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8063                  VK4WM:$mask, VR256X:$src), 0, "att">;
8064  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8065                  "$dst {${mask}} {z}, $src}",
8066                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8067                  VK4WM:$mask, VR256X:$src), 0, "att">;
8068  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8069                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8070                  i64mem:$src), 0, "att">;
8071  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8072                  "$dst {${mask}}, ${src}{1to4}}",
8073                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8074                  VK4WM:$mask, i64mem:$src), 0, "att">;
8075  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8076                  "$dst {${mask}} {z}, ${src}{1to4}}",
8077                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8078                  VK4WM:$mask, i64mem:$src), 0, "att">;
8079
8080  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8081                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8082                  VR512:$src), 0, "att">;
8083  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8084                  "$dst {${mask}}, $src}",
8085                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8086                  VK8WM:$mask, VR512:$src), 0, "att">;
8087  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8088                  "$dst {${mask}} {z}, $src}",
8089                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8090                  VK8WM:$mask, VR512:$src), 0, "att">;
8091  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8092                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8093                  i64mem:$src), 0, "att">;
8094  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8095                  "$dst {${mask}}, ${src}{1to8}}",
8096                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8097                  VK8WM:$mask, i64mem:$src), 0, "att">;
8098  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8099                  "$dst {${mask}} {z}, ${src}{1to8}}",
8100                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8101                  VK8WM:$mask, i64mem:$src), 0, "att">;
8102}
8103
8104defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8105                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8106                                   HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8107defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8108                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8109                                    HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8110defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8111                                 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8112defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8113                                 T_MAP5, EVEX_CD8<16, CD8VQ>;
8114
8115let Predicates = [HasFP16, HasVLX] in {
8116  // Special patterns to allow use of X86vmfpround for masking. Instruction
8117  // patterns have been disabled with null_frag.
8118  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8119            (VCVTPD2PHZ256rr VR256X:$src)>;
8120  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8121                          VK4WM:$mask)),
8122            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8123  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8124                          VK4WM:$mask),
8125            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8126
8127  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8128            (VCVTPD2PHZ256rm addr:$src)>;
8129  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8130                          VK4WM:$mask),
8131            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8132  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8133                          VK4WM:$mask),
8134            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8135
8136  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8137            (VCVTPD2PHZ256rmb addr:$src)>;
8138  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8139                          (v8f16 VR128X:$src0), VK4WM:$mask),
8140            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8141  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8142                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8143            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8144
8145  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8146            (VCVTPD2PHZ128rr VR128X:$src)>;
8147  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8148                          VK2WM:$mask),
8149            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8150  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8151                          VK2WM:$mask),
8152            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8153
8154  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8155            (VCVTPD2PHZ128rm addr:$src)>;
8156  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8157                          VK2WM:$mask),
8158            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8159  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8160                          VK2WM:$mask),
8161            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8162
8163  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8164            (VCVTPD2PHZ128rmb addr:$src)>;
8165  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8166                          (v8f16 VR128X:$src0), VK2WM:$mask),
8167            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8168  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8169                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8170            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8171}
8172
8173// Convert Signed/Unsigned Doubleword to Double
8174let Uses = []<Register>, mayRaiseFPException = 0 in
8175multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8176                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8177                           SDNode MaskOpNode128,
8178                           X86SchedWriteWidths sched> {
8179  // No rounding in this op
8180  let Predicates = [HasAVX512] in
8181    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8182                            MaskOpNode, sched.ZMM>, EVEX_V512;
8183
8184  let Predicates = [HasVLX] in {
8185    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8186                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8187                               "", i64mem, VK2WM,
8188                               (v2f64 (OpNode128 (bc_v4i32
8189                                (v2i64
8190                                 (scalar_to_vector (loadi64 addr:$src)))))),
8191                               (v2f64 (MaskOpNode128 (bc_v4i32
8192                                (v2i64
8193                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8194                               EVEX_V128;
8195    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8196                               MaskOpNode, sched.YMM>, EVEX_V256;
8197  }
8198}
8199
8200// Convert Signed/Unsigned Doubleword to Float
8201multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8202                           SDNode MaskOpNode, SDNode OpNodeRnd,
8203                           X86SchedWriteWidths sched> {
8204  let Predicates = [HasAVX512] in
8205    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8206                            MaskOpNode, sched.ZMM>,
8207             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8208                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8209
8210  let Predicates = [HasVLX] in {
8211    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8212                               MaskOpNode, sched.XMM>, EVEX_V128;
8213    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8214                               MaskOpNode, sched.YMM>, EVEX_V256;
8215  }
8216}
8217
8218// Convert Float to Signed/Unsigned Doubleword with truncation
8219multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8220                            SDNode MaskOpNode,
8221                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8222  let Predicates = [HasAVX512] in {
8223    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8224                            MaskOpNode, sched.ZMM>,
8225             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8226                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8227  }
8228  let Predicates = [HasVLX] in {
8229    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8230                               MaskOpNode, sched.XMM>, EVEX_V128;
8231    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8232                               MaskOpNode, sched.YMM>, EVEX_V256;
8233  }
8234}
8235
8236// Convert Float to Signed/Unsigned Doubleword
8237multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8238                           SDNode MaskOpNode, SDNode OpNodeRnd,
8239                           X86SchedWriteWidths sched> {
8240  let Predicates = [HasAVX512] in {
8241    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8242                            MaskOpNode, sched.ZMM>,
8243             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8244                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8245  }
8246  let Predicates = [HasVLX] in {
8247    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8248                               MaskOpNode, sched.XMM>, EVEX_V128;
8249    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8250                               MaskOpNode, sched.YMM>, EVEX_V256;
8251  }
8252}
8253
8254// Convert Double to Signed/Unsigned Doubleword with truncation
8255multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8256                            SDNode MaskOpNode, SDNode OpNodeSAE,
8257                            X86SchedWriteWidths sched> {
8258  let Predicates = [HasAVX512] in {
8259    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8260                            MaskOpNode, sched.ZMM>,
8261             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8262                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8263  }
8264  let Predicates = [HasVLX] in {
8265    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8266    // memory forms of these instructions in Asm Parser. They have the same
8267    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8268    // due to the same reason.
8269    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8270                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8271                               VK2WM>, EVEX_V128;
8272    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8273                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8274  }
8275
8276  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8277                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8278                  VR128X:$src), 0, "att">;
8279  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8280                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8281                  VK2WM:$mask, VR128X:$src), 0, "att">;
8282  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8283                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8284                  VK2WM:$mask, VR128X:$src), 0, "att">;
8285  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8286                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8287                  f64mem:$src), 0, "att">;
8288  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8289                  "$dst {${mask}}, ${src}{1to2}}",
8290                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8291                  VK2WM:$mask, f64mem:$src), 0, "att">;
8292  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8293                  "$dst {${mask}} {z}, ${src}{1to2}}",
8294                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8295                  VK2WM:$mask, f64mem:$src), 0, "att">;
8296
8297  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8298                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8299                  VR256X:$src), 0, "att">;
8300  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8301                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8302                  VK4WM:$mask, VR256X:$src), 0, "att">;
8303  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8304                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8305                  VK4WM:$mask, VR256X:$src), 0, "att">;
8306  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8307                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8308                  f64mem:$src), 0, "att">;
8309  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8310                  "$dst {${mask}}, ${src}{1to4}}",
8311                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8312                  VK4WM:$mask, f64mem:$src), 0, "att">;
8313  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8314                  "$dst {${mask}} {z}, ${src}{1to4}}",
8315                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8316                  VK4WM:$mask, f64mem:$src), 0, "att">;
8317}
8318
8319// Convert Double to Signed/Unsigned Doubleword
8320multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8321                           SDNode MaskOpNode, SDNode OpNodeRnd,
8322                           X86SchedWriteWidths sched> {
8323  let Predicates = [HasAVX512] in {
8324    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8325                            MaskOpNode, sched.ZMM>,
8326             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8327                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8328  }
8329  let Predicates = [HasVLX] in {
8330    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8331    // memory forms of these instructions in Asm Parcer. They have the same
8332    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8333    // due to the same reason.
8334    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8335                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8336                               VK2WM>, EVEX_V128;
8337    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8338                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8339  }
8340
8341  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8342                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8343  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8344                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8345                  VK2WM:$mask, VR128X:$src), 0, "att">;
8346  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8347                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8348                  VK2WM:$mask, VR128X:$src), 0, "att">;
8349  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8350                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8351                  f64mem:$src), 0, "att">;
8352  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8353                  "$dst {${mask}}, ${src}{1to2}}",
8354                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8355                  VK2WM:$mask, f64mem:$src), 0, "att">;
8356  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8357                  "$dst {${mask}} {z}, ${src}{1to2}}",
8358                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8359                  VK2WM:$mask, f64mem:$src), 0, "att">;
8360
8361  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8362                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8363  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8364                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8365                  VK4WM:$mask, VR256X:$src), 0, "att">;
8366  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8367                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8368                  VK4WM:$mask, VR256X:$src), 0, "att">;
8369  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8370                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8371                  f64mem:$src), 0, "att">;
8372  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8373                  "$dst {${mask}}, ${src}{1to4}}",
8374                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8375                  VK4WM:$mask, f64mem:$src), 0, "att">;
8376  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8377                  "$dst {${mask}} {z}, ${src}{1to4}}",
8378                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8379                  VK4WM:$mask, f64mem:$src), 0, "att">;
8380}
8381
8382// Convert Double to Signed/Unsigned Quardword
8383multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8384                           SDNode MaskOpNode, SDNode OpNodeRnd,
8385                           X86SchedWriteWidths sched> {
8386  let Predicates = [HasDQI] in {
8387    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8388                            MaskOpNode, sched.ZMM>,
8389             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8390                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8391  }
8392  let Predicates = [HasDQI, HasVLX] in {
8393    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8394                               MaskOpNode, sched.XMM>, EVEX_V128;
8395    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8396                               MaskOpNode, sched.YMM>, EVEX_V256;
8397  }
8398}
8399
8400// Convert Double to Signed/Unsigned Quardword with truncation
8401multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8402                            SDNode MaskOpNode, SDNode OpNodeRnd,
8403                            X86SchedWriteWidths sched> {
8404  let Predicates = [HasDQI] in {
8405    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8406                            MaskOpNode, sched.ZMM>,
8407             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8408                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8409  }
8410  let Predicates = [HasDQI, HasVLX] in {
8411    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8412                               MaskOpNode, sched.XMM>, EVEX_V128;
8413    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8414                               MaskOpNode, sched.YMM>, EVEX_V256;
8415  }
8416}
8417
8418// Convert Signed/Unsigned Quardword to Double
8419multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8420                           SDNode MaskOpNode, SDNode OpNodeRnd,
8421                           X86SchedWriteWidths sched> {
8422  let Predicates = [HasDQI] in {
8423    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8424                            MaskOpNode, sched.ZMM>,
8425             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8426                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8427  }
8428  let Predicates = [HasDQI, HasVLX] in {
8429    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8430                               MaskOpNode, sched.XMM>, EVEX_V128;
8431    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8432                               MaskOpNode, sched.YMM>, EVEX_V256;
8433  }
8434}
8435
8436// Convert Float to Signed/Unsigned Quardword
8437multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8438                           SDNode MaskOpNode, SDNode OpNodeRnd,
8439                           X86SchedWriteWidths sched> {
8440  let Predicates = [HasDQI] in {
8441    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8442                            MaskOpNode, sched.ZMM>,
8443             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8444                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8445  }
8446  let Predicates = [HasDQI, HasVLX] in {
8447    // Explicitly specified broadcast string, since we take only 2 elements
8448    // from v4f32x_info source
8449    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8450                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8451                               (v2i64 (OpNode (bc_v4f32
8452                                (v2f64
8453                                 (scalar_to_vector (loadf64 addr:$src)))))),
8454                               (v2i64 (MaskOpNode (bc_v4f32
8455                                (v2f64
8456                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8457                               EVEX_V128;
8458    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8459                               MaskOpNode, sched.YMM>, EVEX_V256;
8460  }
8461}
8462
8463// Convert Float to Signed/Unsigned Quardword with truncation
8464multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8465                            SDNode MaskOpNode, SDNode OpNodeRnd,
8466                            X86SchedWriteWidths sched> {
8467  let Predicates = [HasDQI] in {
8468    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8469                            MaskOpNode, sched.ZMM>,
8470             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8471                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8472  }
8473  let Predicates = [HasDQI, HasVLX] in {
8474    // Explicitly specified broadcast string, since we take only 2 elements
8475    // from v4f32x_info source
8476    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8477                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8478                               (v2i64 (OpNode (bc_v4f32
8479                                (v2f64
8480                                 (scalar_to_vector (loadf64 addr:$src)))))),
8481                               (v2i64 (MaskOpNode (bc_v4f32
8482                                (v2f64
8483                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8484                               EVEX_V128;
8485    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8486                               MaskOpNode, sched.YMM>, EVEX_V256;
8487  }
8488}
8489
8490// Convert Signed/Unsigned Quardword to Float
8491// Also Convert Signed/Unsigned Doubleword to Half
8492multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8493                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8494                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8495                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8496                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8497  let Predicates = [prd] in {
8498    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8499                            MaskOpNode, sched.ZMM>,
8500             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8501                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8502  }
8503  let Predicates = [prd, HasVLX] in {
8504    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8505    // memory forms of these instructions in Asm Parcer. They have the same
8506    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8507    // due to the same reason.
8508    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8509                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8510                               "{x}", i128mem, _src.info128.KRCWM>,
8511                               EVEX_V128;
8512    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8513                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8514                               "{y}">, EVEX_V256;
8515
8516    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8517    // patterns have been disabled with null_frag.
8518    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8519              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8520    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8521                             _src.info128.KRCWM:$mask),
8522              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8523    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8524                             _src.info128.KRCWM:$mask),
8525              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8526
8527    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8528              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8529    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8530                             _src.info128.KRCWM:$mask),
8531              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8532    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8533                             _src.info128.KRCWM:$mask),
8534              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8535
8536    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8537              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8538    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8539                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8540              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8541    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8542                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8543              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8544  }
8545
8546  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8547                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8548                  VR128X:$src), 0, "att">;
8549  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8550                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8551                  VK2WM:$mask, VR128X:$src), 0, "att">;
8552  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8553                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8554                  VK2WM:$mask, VR128X:$src), 0, "att">;
8555  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8556                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8557                  i64mem:$src), 0, "att">;
8558  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8559                  "$dst {${mask}}, ${src}{1to2}}",
8560                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8561                  VK2WM:$mask, i64mem:$src), 0, "att">;
8562  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8563                  "$dst {${mask}} {z}, ${src}{1to2}}",
8564                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8565                  VK2WM:$mask, i64mem:$src), 0, "att">;
8566
8567  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8568                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8569                  VR256X:$src), 0, "att">;
8570  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8571                  "$dst {${mask}}, $src}",
8572                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8573                  VK4WM:$mask, VR256X:$src), 0, "att">;
8574  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8575                  "$dst {${mask}} {z}, $src}",
8576                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8577                  VK4WM:$mask, VR256X:$src), 0, "att">;
8578  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8579                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8580                  i64mem:$src), 0, "att">;
8581  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8582                  "$dst {${mask}}, ${src}{1to4}}",
8583                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8584                  VK4WM:$mask, i64mem:$src), 0, "att">;
8585  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8586                  "$dst {${mask}} {z}, ${src}{1to4}}",
8587                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8588                  VK4WM:$mask, i64mem:$src), 0, "att">;
8589}
8590
8591defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8592                                 X86any_VSintToFP, X86VSintToFP,
8593                                 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8594
8595defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8596                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8597                                TB, EVEX_CD8<32, CD8VF>;
8598
8599defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8600                                 X86cvttp2si, X86cvttp2siSAE,
8601                                 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8602
8603defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8604                                 X86cvttp2si, X86cvttp2siSAE,
8605                                 SchedWriteCvtPD2DQ>,
8606                                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8607
8608defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8609                                 X86cvttp2ui, X86cvttp2uiSAE,
8610                                 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8611
8612defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8613                                 X86cvttp2ui, X86cvttp2uiSAE,
8614                                 SchedWriteCvtPD2DQ>,
8615                                 TB, REX_W, EVEX_CD8<64, CD8VF>;
8616
8617defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8618                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8619                                  SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8620
8621defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8622                                 uint_to_fp, X86VUintToFpRnd,
8623                                 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8624
8625defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8626                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8627                                 EVEX_CD8<32, CD8VF>;
8628
8629defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8630                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8631                                 REX_W, EVEX_CD8<64, CD8VF>;
8632
8633defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8634                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8635                                 TB, EVEX_CD8<32, CD8VF>;
8636
8637defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8638                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8639                                 TB, EVEX_CD8<64, CD8VF>;
8640
8641defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8642                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8643                                 TB, PD, EVEX_CD8<64, CD8VF>;
8644
8645defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8646                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8647                                 EVEX_CD8<32, CD8VH>;
8648
8649defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8650                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8651                                 TB, PD, EVEX_CD8<64, CD8VF>;
8652
8653defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8654                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8655                                 EVEX_CD8<32, CD8VH>;
8656
8657defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8658                                 X86cvttp2si, X86cvttp2siSAE,
8659                                 SchedWriteCvtPD2DQ>, REX_W,
8660                                 TB, PD, EVEX_CD8<64, CD8VF>;
8661
8662defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8663                                 X86cvttp2si, X86cvttp2siSAE,
8664                                 SchedWriteCvtPS2DQ>, TB, PD,
8665                                 EVEX_CD8<32, CD8VH>;
8666
8667defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8668                                 X86cvttp2ui, X86cvttp2uiSAE,
8669                                 SchedWriteCvtPD2DQ>, REX_W,
8670                                 TB, PD, EVEX_CD8<64, CD8VF>;
8671
8672defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8673                                 X86cvttp2ui, X86cvttp2uiSAE,
8674                                 SchedWriteCvtPS2DQ>, TB, PD,
8675                                 EVEX_CD8<32, CD8VH>;
8676
8677defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8678                            sint_to_fp, X86VSintToFpRnd,
8679                            SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8680
8681defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8682                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8683                            REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8684
8685defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8686                            X86any_VSintToFP, X86VMSintToFP,
8687                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8688                            SchedWriteCvtDQ2PS, HasFP16>,
8689                            T_MAP5, EVEX_CD8<32, CD8VF>;
8690
8691defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8692                            X86any_VUintToFP, X86VMUintToFP,
8693                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8694                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8695                            EVEX_CD8<32, CD8VF>;
8696
8697defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8698                            X86any_VSintToFP, X86VMSintToFP,
8699                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8700                            SchedWriteCvtDQ2PS>, REX_W, TB,
8701                            EVEX_CD8<64, CD8VF>;
8702
8703defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8704                            X86any_VUintToFP, X86VMUintToFP,
8705                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8706                            SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8707                            EVEX_CD8<64, CD8VF>;
8708
8709let Predicates = [HasVLX] in {
8710  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8711  // patterns have been disabled with null_frag.
8712  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8713            (VCVTPD2DQZ128rr VR128X:$src)>;
8714  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8715                          VK2WM:$mask),
8716            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8717  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8718                          VK2WM:$mask),
8719            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8720
8721  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8722            (VCVTPD2DQZ128rm addr:$src)>;
8723  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8724                          VK2WM:$mask),
8725            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8726  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8727                          VK2WM:$mask),
8728            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8729
8730  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8731            (VCVTPD2DQZ128rmb addr:$src)>;
8732  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8733                          (v4i32 VR128X:$src0), VK2WM:$mask),
8734            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8735  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8736                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8737            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8738
8739  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8740  // patterns have been disabled with null_frag.
8741  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8742            (VCVTTPD2DQZ128rr VR128X:$src)>;
8743  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8744                          VK2WM:$mask),
8745            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8746  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8747                          VK2WM:$mask),
8748            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8749
8750  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8751            (VCVTTPD2DQZ128rm addr:$src)>;
8752  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8753                          VK2WM:$mask),
8754            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8755  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8756                          VK2WM:$mask),
8757            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8758
8759  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8760            (VCVTTPD2DQZ128rmb addr:$src)>;
8761  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8762                          (v4i32 VR128X:$src0), VK2WM:$mask),
8763            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8764  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8765                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8766            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8767
8768  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8769  // patterns have been disabled with null_frag.
8770  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8771            (VCVTPD2UDQZ128rr VR128X:$src)>;
8772  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8773                           VK2WM:$mask),
8774            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8775  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8776                           VK2WM:$mask),
8777            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8778
8779  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8780            (VCVTPD2UDQZ128rm addr:$src)>;
8781  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8782                           VK2WM:$mask),
8783            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8784  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8785                           VK2WM:$mask),
8786            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8787
8788  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8789            (VCVTPD2UDQZ128rmb addr:$src)>;
8790  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8791                           (v4i32 VR128X:$src0), VK2WM:$mask),
8792            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8793  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8794                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8795            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8796
8797  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8798  // patterns have been disabled with null_frag.
8799  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8800            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8801  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8802                          VK2WM:$mask),
8803            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8804  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8805                          VK2WM:$mask),
8806            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8807
8808  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8809            (VCVTTPD2UDQZ128rm addr:$src)>;
8810  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8811                          VK2WM:$mask),
8812            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8813  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8814                          VK2WM:$mask),
8815            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8816
8817  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8818            (VCVTTPD2UDQZ128rmb addr:$src)>;
8819  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8820                          (v4i32 VR128X:$src0), VK2WM:$mask),
8821            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8822  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8823                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8824            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8825
8826  def : Pat<(v4i32 (lrint VR128X:$src)), (VCVTPS2DQZ128rr VR128X:$src)>;
8827  def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQZ128rm addr:$src)>;
8828  def : Pat<(v8i32 (lrint VR256X:$src)), (VCVTPS2DQZ256rr VR256X:$src)>;
8829  def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQZ256rm addr:$src)>;
8830  def : Pat<(v4i32 (lrint VR256X:$src)), (VCVTPD2DQZ256rr VR256X:$src)>;
8831  def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQZ256rm addr:$src)>;
8832}
8833def : Pat<(v16i32 (lrint VR512:$src)), (VCVTPS2DQZrr VR512:$src)>;
8834def : Pat<(v16i32 (lrint (loadv16f32 addr:$src))), (VCVTPS2DQZrm addr:$src)>;
8835def : Pat<(v8i32 (lrint VR512:$src)), (VCVTPD2DQZrr VR512:$src)>;
8836def : Pat<(v8i32 (lrint (loadv8f64 addr:$src))), (VCVTPD2DQZrm addr:$src)>;
8837
8838let Predicates = [HasDQI, HasVLX] in {
8839  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8840            (VCVTPS2QQZ128rm addr:$src)>;
8841  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8842                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8843                                 VR128X:$src0)),
8844            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8845  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8846                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8847                                 v2i64x_info.ImmAllZerosV)),
8848            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8849
8850  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8851            (VCVTPS2UQQZ128rm addr:$src)>;
8852  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8853                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8854                                 VR128X:$src0)),
8855            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8856  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8857                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8858                                 v2i64x_info.ImmAllZerosV)),
8859            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8860
8861  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8862            (VCVTTPS2QQZ128rm addr:$src)>;
8863  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8864                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8865                                 VR128X:$src0)),
8866            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8867  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8868                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8869                                 v2i64x_info.ImmAllZerosV)),
8870            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8871
8872  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8873            (VCVTTPS2UQQZ128rm addr:$src)>;
8874  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8875                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8876                                 VR128X:$src0)),
8877            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8878  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8879                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8880                                 v2i64x_info.ImmAllZerosV)),
8881            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8882
8883  def : Pat<(v4i64 (lrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8884  def : Pat<(v4i64 (lrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8885  def : Pat<(v4i64 (llrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>;
8886  def : Pat<(v4i64 (llrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>;
8887  def : Pat<(v2i64 (lrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8888  def : Pat<(v2i64 (lrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8889  def : Pat<(v4i64 (lrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8890  def : Pat<(v4i64 (lrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8891  def : Pat<(v2i64 (llrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>;
8892  def : Pat<(v2i64 (llrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>;
8893  def : Pat<(v4i64 (llrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>;
8894  def : Pat<(v4i64 (llrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>;
8895}
8896
8897let Predicates = [HasDQI] in {
8898  def : Pat<(v8i64 (lrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8899  def : Pat<(v8i64 (lrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8900  def : Pat<(v8i64 (llrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>;
8901  def : Pat<(v8i64 (llrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>;
8902  def : Pat<(v8i64 (lrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8903  def : Pat<(v8i64 (lrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8904  def : Pat<(v8i64 (llrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>;
8905  def : Pat<(v8i64 (llrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>;
8906}
8907
8908let Predicates = [HasVLX] in {
8909  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8910            (VCVTDQ2PDZ128rm addr:$src)>;
8911  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8912                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8913                                 VR128X:$src0)),
8914            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8915  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8916                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8917                                 v2f64x_info.ImmAllZerosV)),
8918            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8919
8920  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8921            (VCVTUDQ2PDZ128rm addr:$src)>;
8922  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8923                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8924                                 VR128X:$src0)),
8925            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8926  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8927                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8928                                 v2f64x_info.ImmAllZerosV)),
8929            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8930}
8931
8932//===----------------------------------------------------------------------===//
8933// Half precision conversion instructions
8934//===----------------------------------------------------------------------===//
8935
8936let Uses = [MXCSR], mayRaiseFPException = 1 in
8937multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8938                           X86MemOperand x86memop, dag ld_dag,
8939                           X86FoldableSchedWrite sched> {
8940  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8941                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8942                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8943                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8944                            T8, PD, Sched<[sched]>;
8945  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8946                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8947                            (X86any_cvtph2ps (_src.VT ld_dag)),
8948                            (X86cvtph2ps (_src.VT ld_dag))>,
8949                            T8, PD, Sched<[sched.Folded]>;
8950}
8951
8952multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8953                               X86FoldableSchedWrite sched> {
8954  let Uses = [MXCSR] in
8955  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8956                             (ins _src.RC:$src), "vcvtph2ps",
8957                             "{sae}, $src", "$src, {sae}",
8958                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8959                             T8, PD, EVEX_B, Sched<[sched]>;
8960}
8961
8962let Predicates = [HasAVX512] in
8963  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8964                                    (load addr:$src), WriteCvtPH2PSZ>,
8965                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8966                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8967
8968let Predicates = [HasVLX] in {
8969  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8970                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8971                       EVEX_CD8<32, CD8VH>;
8972  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8973                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
8974                       WriteCvtPH2PS>, EVEX, EVEX_V128,
8975                       EVEX_CD8<32, CD8VH>;
8976
8977  // Pattern match vcvtph2ps of a scalar i64 load.
8978  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8979              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8980            (VCVTPH2PSZ128rm addr:$src)>;
8981}
8982
8983multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8984                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8985let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8986  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8987             (ins _src.RC:$src1, i32u8imm:$src2),
8988             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8989             [(set _dest.RC:$dst,
8990                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8991             Sched<[RR]>;
8992  let Constraints = "$src0 = $dst" in
8993  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8994             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8995             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8996             [(set _dest.RC:$dst,
8997                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8998                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8999             Sched<[RR]>, EVEX_K;
9000  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9001             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9002             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9003             [(set _dest.RC:$dst,
9004                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9005                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9006             Sched<[RR]>, EVEX_KZ;
9007  let hasSideEffects = 0, mayStore = 1 in {
9008    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9009               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9010               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9011               Sched<[MR]>;
9012    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9013               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9014               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9015                EVEX_K, Sched<[MR]>;
9016  }
9017}
9018}
9019
9020multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9021                               SchedWrite Sched> {
9022  let hasSideEffects = 0, Uses = [MXCSR] in {
9023    def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9024              (ins _src.RC:$src1, i32u8imm:$src2),
9025              "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9026              [(set _dest.RC:$dst,
9027                    (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9028              EVEX_B, Sched<[Sched]>;
9029    let Constraints = "$src0 = $dst" in
9030    def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9031              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9032              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9033              [(set _dest.RC:$dst,
9034                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9035                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
9036              EVEX_B, Sched<[Sched]>, EVEX_K;
9037    def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9038              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9039              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9040              [(set _dest.RC:$dst,
9041                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9042                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9043              EVEX_B, Sched<[Sched]>, EVEX_KZ;
9044}
9045}
9046
9047let Predicates = [HasAVX512] in {
9048  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9049                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9050                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9051                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9052
9053  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9054            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9055}
9056
9057let Predicates = [HasVLX] in {
9058  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9059                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9060                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9061  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9062                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9063                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9064
9065  def : Pat<(store (f64 (extractelt
9066                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9067                         (iPTR 0))), addr:$dst),
9068            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9069  def : Pat<(store (i64 (extractelt
9070                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9071                         (iPTR 0))), addr:$dst),
9072            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9073  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9074            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9075}
9076
9077//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9078multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9079                              string OpcodeStr, Domain d,
9080                              X86FoldableSchedWrite sched = WriteFComX> {
9081  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9082  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9083                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9084                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9085}
9086
9087let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9088  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9089                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9090  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9091                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9092  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9093                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9094  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9095                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9096}
9097
9098let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9099  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9100                                 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9101                                 EVEX_CD8<32, CD8VT1>;
9102  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9103                                  "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9104                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9105  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9106                                 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9107                                 EVEX_CD8<32, CD8VT1>;
9108  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9109                                 "comisd", SSEPackedDouble>, TB, PD, EVEX,
9110                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9111  let isCodeGenOnly = 1 in {
9112    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9113                          sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9114                          EVEX_CD8<32, CD8VT1>;
9115    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9116                          sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9117                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9118
9119    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9120                          sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9121                          EVEX_CD8<32, CD8VT1>;
9122    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9123                          sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9124                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9125  }
9126}
9127
9128let Defs = [EFLAGS], Predicates = [HasFP16] in {
9129  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9130                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9131                                EVEX_CD8<16, CD8VT1>;
9132  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9133                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9134                                EVEX_CD8<16, CD8VT1>;
9135  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9136                                "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9137                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9138  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9139                                "comish", SSEPackedSingle>, T_MAP5, EVEX,
9140                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9141  let isCodeGenOnly = 1 in {
9142    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9143                                sse_load_f16, "ucomish", SSEPackedSingle>,
9144                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9145
9146    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9147                                sse_load_f16, "comish", SSEPackedSingle>,
9148                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9149  }
9150}
9151
9152/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9153multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9154                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9155                         Predicate prd = HasAVX512> {
9156  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9157  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9158                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9159                           "$src2, $src1", "$src1, $src2",
9160                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9161                           EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9162  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9163                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9164                         "$src2, $src1", "$src1, $src2",
9165                         (OpNode (_.VT _.RC:$src1),
9166                          (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9167                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9168}
9169}
9170
9171defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9172                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9173                               T_MAP6, PD;
9174defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9175                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9176                                 EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9177let Uses = [MXCSR] in {
9178defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9179                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9180                               T8, PD;
9181defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9182                               f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9183                               T8, PD;
9184defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9185                                 SchedWriteFRsqrt.Scl, f32x_info>,
9186                                 EVEX_CD8<32, CD8VT1>, T8, PD;
9187defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9188                                 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9189                                 EVEX_CD8<64, CD8VT1>, T8, PD;
9190}
9191
9192/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9193multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9194                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9195  let ExeDomain = _.ExeDomain in {
9196  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9197                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9198                         (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9199                         Sched<[sched]>;
9200  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9201                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9202                         (OpNode (_.VT
9203                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9204                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9205  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9206                          (ins _.ScalarMemOp:$src), OpcodeStr,
9207                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9208                          (OpNode (_.VT
9209                            (_.BroadcastLdFrag addr:$src)))>,
9210                          EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9211  }
9212}
9213
9214multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9215                                X86SchedWriteWidths sched> {
9216  let Uses = [MXCSR] in {
9217  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9218                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9219  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9220                             v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9221  }
9222  let Predicates = [HasFP16] in
9223  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9224                           v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9225
9226  // Define only if AVX512VL feature is present.
9227  let Predicates = [HasVLX], Uses = [MXCSR] in {
9228    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9229                                  OpNode, sched.XMM, v4f32x_info>,
9230                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9231    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9232                                  OpNode, sched.YMM, v8f32x_info>,
9233                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9234    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9235                                  OpNode, sched.XMM, v2f64x_info>,
9236                                  EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9237    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9238                                  OpNode, sched.YMM, v4f64x_info>,
9239                                  EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9240  }
9241  let Predicates = [HasFP16, HasVLX] in {
9242    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9243                                OpNode, sched.XMM, v8f16x_info>,
9244                                EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9245    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9246                                OpNode, sched.YMM, v16f16x_info>,
9247                                EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9248  }
9249}
9250
9251defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9252defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9253
9254/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9255multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9256                         SDNode OpNode, SDNode OpNodeSAE,
9257                         X86FoldableSchedWrite sched> {
9258  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9259  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9260                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9261                           "$src2, $src1", "$src1, $src2",
9262                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9263                           Sched<[sched]>, SIMD_EXC;
9264
9265  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9266                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9267                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9268                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9269                            EVEX_B, Sched<[sched]>;
9270
9271  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9272                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9273                         "$src2, $src1", "$src1, $src2",
9274                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9275                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9276  }
9277}
9278
9279multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9280                             X86FoldableSchedWrite sched> {
9281  let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in {
9282  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9283                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9284                           "$src2, $src1", "$src1, $src2",
9285                           (null_frag)>, Sched<[sched]>, SIMD_EXC;
9286  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9287                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9288                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9289                            (null_frag)>, EVEX_B, Sched<[sched]>;
9290  let mayLoad = 1 in
9291  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9292                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9293                         "$src2, $src1", "$src1, $src2",
9294                         (null_frag)>,
9295                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9296  }
9297}
9298
9299multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr,
9300                            X86FoldableSchedWrite sched> {
9301  defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>,
9302             EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9303  defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>,
9304             EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9305}
9306
9307defm VRCP28   : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>;
9308defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>;
9309
9310multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9311                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9312  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9313                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9314  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9315                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9316}
9317
9318multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9319                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9320  let Predicates = [HasFP16] in
9321  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9322               EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9323}
9324
9325defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9326                              SchedWriteFRnd.Scl>,
9327                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9328                                  SchedWriteFRnd.Scl>;
9329/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9330
9331multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9332                         SDNode OpNode, X86FoldableSchedWrite sched> {
9333  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9334  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9335                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9336                         (OpNode (_.VT _.RC:$src))>,
9337                         Sched<[sched]>;
9338
9339  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9340                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9341                         (OpNode (_.VT
9342                             (bitconvert (_.LdFrag addr:$src))))>,
9343                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9344
9345  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9346                         (ins _.ScalarMemOp:$src), OpcodeStr,
9347                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9348                         (OpNode (_.VT
9349                                  (_.BroadcastLdFrag addr:$src)))>,
9350                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9351  }
9352}
9353multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9354                         SDNode OpNode, X86FoldableSchedWrite sched> {
9355  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9356  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9357                        (ins _.RC:$src), OpcodeStr,
9358                        "{sae}, $src", "$src, {sae}",
9359                        (OpNode (_.VT _.RC:$src))>,
9360                        EVEX_B, Sched<[sched]>;
9361}
9362
9363multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9364                             X86FoldableSchedWrite sched> {
9365  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1,
9366    hasSideEffects = 0 in {
9367  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9368                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9369                         (null_frag)>, Sched<[sched]>;
9370  let mayLoad = 1 in
9371  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9372                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9373                         (null_frag)>,
9374                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9375  let mayLoad = 1 in
9376  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9377                         (ins _.ScalarMemOp:$src), OpcodeStr,
9378                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9379                         (null_frag)>,
9380                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9381  }
9382}
9383multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9384                                X86FoldableSchedWrite sched> {
9385  let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in
9386  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9387                        (ins _.RC:$src), OpcodeStr,
9388                        "{sae}, $src", "$src, {sae}",
9389                        (null_frag)>, Sched<[sched]>, EVEX_B;
9390}
9391
9392multiclass  avx512_eri_ass<bits<8> opc, string OpcodeStr,
9393                           X86SchedWriteWidths sched> {
9394   defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9395              avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
9396              T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9397   defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9398              avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
9399              T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9400}
9401
9402defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX;
9403defm VRCP28   : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX;
9404defm VEXP2    : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX;
9405
9406multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9407                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9408   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9409              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9410              T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9411   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9412              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9413              T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9414}
9415
9416multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9417                                  SDNode OpNode, X86SchedWriteWidths sched> {
9418  // Define only if AVX512VL feature is present.
9419  let Predicates = [HasVLX] in {
9420    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9421                                sched.XMM>,
9422                                EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9423    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9424                                sched.YMM>,
9425                                EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9426    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9427                                sched.XMM>,
9428                                EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9429    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9430                                sched.YMM>,
9431                                EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9432  }
9433}
9434
9435multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9436                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9437  let Predicates = [HasFP16] in
9438  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9439              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9440              T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9441  let Predicates = [HasFP16, HasVLX] in {
9442    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9443                                     EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9444    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9445                                     EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9446  }
9447}
9448defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9449                            SchedWriteFRnd>,
9450                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9451                                     SchedWriteFRnd>,
9452                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9453                                          SchedWriteFRnd>, EVEX;
9454
9455multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9456                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9457  let ExeDomain = _.ExeDomain in
9458  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9459                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9460                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9461                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9462}
9463
9464multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9465                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9466  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9467  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9468                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9469                         (_.VT (any_fsqrt _.RC:$src)),
9470                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9471                         Sched<[sched]>;
9472  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9473                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9474                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9475                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9476                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9477  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9478                          (ins _.ScalarMemOp:$src), OpcodeStr,
9479                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9480                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9481                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9482                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9483  }
9484}
9485
9486let Uses = [MXCSR], mayRaiseFPException = 1 in
9487multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9488                                  X86SchedWriteSizes sched> {
9489  let Predicates = [HasFP16] in
9490  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9491                                sched.PH.ZMM, v32f16_info>,
9492                                EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9493  let Predicates = [HasFP16, HasVLX] in {
9494    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9495                                     sched.PH.XMM, v8f16x_info>,
9496                                     EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9497    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9498                                     sched.PH.YMM, v16f16x_info>,
9499                                     EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9500  }
9501  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9502                                sched.PS.ZMM, v16f32_info>,
9503                                EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9504  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9505                                sched.PD.ZMM, v8f64_info>,
9506                                EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9507  // Define only if AVX512VL feature is present.
9508  let Predicates = [HasVLX] in {
9509    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9510                                     sched.PS.XMM, v4f32x_info>,
9511                                     EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9512    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9513                                     sched.PS.YMM, v8f32x_info>,
9514                                     EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9515    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9516                                     sched.PD.XMM, v2f64x_info>,
9517                                     EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9518    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9519                                     sched.PD.YMM, v4f64x_info>,
9520                                     EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9521  }
9522}
9523
9524let Uses = [MXCSR] in
9525multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9526                                        X86SchedWriteSizes sched> {
9527  let Predicates = [HasFP16] in
9528  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9529                                      sched.PH.ZMM, v32f16_info>,
9530                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9531  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9532                                      sched.PS.ZMM, v16f32_info>,
9533                                      EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9534  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9535                                      sched.PD.ZMM, v8f64_info>,
9536                                      EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9537}
9538
9539multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9540                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9541  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9542    defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9543                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9544                         "$src2, $src1", "$src1, $src2",
9545                         (X86fsqrts (_.VT _.RC:$src1),
9546                                    (_.VT _.RC:$src2)), "_Int">,
9547                         Sched<[sched]>, SIMD_EXC;
9548    defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9549                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9550                         "$src2, $src1", "$src1, $src2",
9551                         (X86fsqrts (_.VT _.RC:$src1),
9552                                    (_.ScalarIntMemFrags addr:$src2)), "_Int">,
9553                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9554    let Uses = [MXCSR] in
9555    defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9556                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9557                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9558                         (X86fsqrtRnds (_.VT _.RC:$src1),
9559                                     (_.VT _.RC:$src2),
9560                                     (i32 timm:$rc)), "_Int">,
9561                         EVEX_B, EVEX_RC, Sched<[sched]>;
9562
9563    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9564      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9565                (ins _.FRC:$src1, _.FRC:$src2),
9566                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9567                Sched<[sched]>, SIMD_EXC;
9568      let mayLoad = 1 in
9569        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9570                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9571                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9572                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9573    }
9574  }
9575
9576  let Predicates = [prd] in {
9577    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9578              (!cast<Instruction>(Name#Zr)
9579                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9580  }
9581
9582  let Predicates = [prd, OptForSize] in {
9583    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9584              (!cast<Instruction>(Name#Zm)
9585                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9586  }
9587}
9588
9589multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9590                                  X86SchedWriteSizes sched> {
9591  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9592                        EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9593  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9594                        EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9595  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9596                        EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9597}
9598
9599defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9600             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9601
9602defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9603
9604multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9605                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9606  let ExeDomain = _.ExeDomain in {
9607  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9608                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9609                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9610                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9611                           (i32 timm:$src3))), "_Int">,
9612                           Sched<[sched]>, SIMD_EXC;
9613
9614  let Uses = [MXCSR] in
9615  defm rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9616                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9617                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9618                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9619                         (i32 timm:$src3))), "_Int">, EVEX_B,
9620                         Sched<[sched]>;
9621
9622  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9623                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9624                         OpcodeStr,
9625                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9626                         (_.VT (X86RndScales _.RC:$src1,
9627                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3))), "_Int">,
9628                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9629
9630  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9631    def rri : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9632               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9633               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9634               []>, Sched<[sched]>, SIMD_EXC;
9635
9636    let mayLoad = 1 in
9637      def rmi : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9638                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9639                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9640                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9641  }
9642  }
9643
9644  let Predicates = [HasAVX512] in {
9645    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9646              (_.EltVT (!cast<Instruction>(NAME#rri) (_.EltVT (IMPLICIT_DEF)),
9647               _.FRC:$src1, timm:$src2))>;
9648  }
9649
9650  let Predicates = [HasAVX512, OptForSize] in {
9651    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9652              (_.EltVT (!cast<Instruction>(NAME#rmi) (_.EltVT (IMPLICIT_DEF)),
9653               addr:$src1, timm:$src2))>;
9654  }
9655}
9656
9657let Predicates = [HasFP16] in
9658defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9659                                           SchedWriteFRnd.Scl, f16x_info>,
9660                                           AVX512PSIi8Base, TA, EVEX, VVVV,
9661                                           EVEX_CD8<16, CD8VT1>;
9662
9663defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9664                                           SchedWriteFRnd.Scl, f32x_info>,
9665                                           AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9666                                           EVEX_CD8<32, CD8VT1>;
9667
9668defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9669                                           SchedWriteFRnd.Scl, f64x_info>,
9670                                           REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9671                                           EVEX_CD8<64, CD8VT1>;
9672
9673multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9674                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9675                                dag OutMask, Predicate BasePredicate> {
9676  let Predicates = [BasePredicate] in {
9677    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9678               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9679               (extractelt _.VT:$dst, (iPTR 0))))),
9680              (!cast<Instruction>("V"#OpcPrefix#rk_Int)
9681               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9682
9683    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9684               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9685               ZeroFP))),
9686              (!cast<Instruction>("V"#OpcPrefix#rkz_Int)
9687               OutMask, _.VT:$src2, _.VT:$src1)>;
9688  }
9689}
9690
9691defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9692                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9693                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9694defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9695                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9696                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9697defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9698                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9699                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9700
9701
9702//-------------------------------------------------
9703// Integer truncate and extend operations
9704//-------------------------------------------------
9705
9706multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9707                              SDPatternOperator MaskNode,
9708                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9709                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9710  let ExeDomain = DestInfo.ExeDomain in {
9711  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9712             (ins SrcInfo.RC:$src),
9713             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9714             [(set DestInfo.RC:$dst,
9715                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9716             EVEX, Sched<[sched]>;
9717  let Constraints = "$src0 = $dst" in
9718  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9719             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9720             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9721             [(set DestInfo.RC:$dst,
9722                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9723                             (DestInfo.VT DestInfo.RC:$src0),
9724                             SrcInfo.KRCWM:$mask))]>,
9725             EVEX, EVEX_K, Sched<[sched]>;
9726  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9727             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9728             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9729             [(set DestInfo.RC:$dst,
9730                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9731                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9732             EVEX, EVEX_KZ, Sched<[sched]>;
9733  }
9734
9735  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9736    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9737               (ins x86memop:$dst, SrcInfo.RC:$src),
9738               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9739               EVEX, Sched<[sched.Folded]>;
9740
9741    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9742               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9743               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9744               EVEX, EVEX_K, Sched<[sched.Folded]>;
9745  }//mayStore = 1, hasSideEffects = 0
9746}
9747
9748multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9749                                    PatFrag truncFrag, PatFrag mtruncFrag,
9750                                    string Name> {
9751
9752  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9753            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9754                                    addr:$dst, SrcInfo.RC:$src)>;
9755
9756  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9757                        SrcInfo.KRCWM:$mask),
9758            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9759                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9760}
9761
9762multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9763                        SDNode OpNode256, SDNode OpNode512,
9764                        SDPatternOperator MaskNode128,
9765                        SDPatternOperator MaskNode256,
9766                        SDPatternOperator MaskNode512,
9767                        X86SchedWriteWidths sched,
9768                        AVX512VLVectorVTInfo VTSrcInfo,
9769                        X86VectorVTInfo DestInfoZ128,
9770                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9771                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9772                        X86MemOperand x86memopZ, PatFrag truncFrag,
9773                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9774
9775  let Predicates = [HasVLX, prd] in {
9776    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9777                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9778                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9779                                         mtruncFrag, NAME>, EVEX_V128;
9780
9781    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9782                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9783                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9784                                         mtruncFrag, NAME>, EVEX_V256;
9785  }
9786  let Predicates = [prd] in
9787    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9788                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9789                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9790                                         mtruncFrag, NAME>, EVEX_V512;
9791}
9792
9793multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9794                           X86SchedWriteWidths sched, PatFrag StoreNode,
9795                           PatFrag MaskedStoreNode, SDNode InVecNode,
9796                           SDPatternOperator InVecMaskNode> {
9797  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9798                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9799                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9800                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9801                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9802}
9803
9804multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9805                           SDPatternOperator MaskNode,
9806                           X86SchedWriteWidths sched, PatFrag StoreNode,
9807                           PatFrag MaskedStoreNode, SDNode InVecNode,
9808                           SDPatternOperator InVecMaskNode> {
9809  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9810                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9811                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9812                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9813                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9814}
9815
9816multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9817                           SDPatternOperator MaskNode,
9818                           X86SchedWriteWidths sched, PatFrag StoreNode,
9819                           PatFrag MaskedStoreNode, SDNode InVecNode,
9820                           SDPatternOperator InVecMaskNode> {
9821  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9822                          InVecMaskNode, MaskNode, MaskNode, sched,
9823                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9824                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9825                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9826}
9827
9828multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9829                           SDPatternOperator MaskNode,
9830                           X86SchedWriteWidths sched, PatFrag StoreNode,
9831                           PatFrag MaskedStoreNode, SDNode InVecNode,
9832                           SDPatternOperator InVecMaskNode> {
9833  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9834                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9835                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9836                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9837                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9838}
9839
9840multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9841                           SDPatternOperator MaskNode,
9842                           X86SchedWriteWidths sched, PatFrag StoreNode,
9843                           PatFrag MaskedStoreNode, SDNode InVecNode,
9844                           SDPatternOperator InVecMaskNode> {
9845  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9846                          InVecMaskNode, MaskNode, MaskNode, sched,
9847                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9848                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9849                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9850}
9851
9852multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9853                           SDPatternOperator MaskNode,
9854                           X86SchedWriteWidths sched, PatFrag StoreNode,
9855                           PatFrag MaskedStoreNode, SDNode InVecNode,
9856                           SDPatternOperator InVecMaskNode> {
9857  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9858                          InVecMaskNode, MaskNode, MaskNode, sched,
9859                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9860                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9861                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9862}
9863
9864defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9865                                  SchedWriteVecTruncate, truncstorevi8,
9866                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9867defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9868                                  SchedWriteVecTruncate, truncstore_s_vi8,
9869                                  masked_truncstore_s_vi8, X86vtruncs,
9870                                  X86vmtruncs>;
9871defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9872                                  SchedWriteVecTruncate, truncstore_us_vi8,
9873                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9874
9875defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9876                                  SchedWriteVecTruncate, truncstorevi16,
9877                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9878defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9879                                  SchedWriteVecTruncate, truncstore_s_vi16,
9880                                  masked_truncstore_s_vi16, X86vtruncs,
9881                                  X86vmtruncs>;
9882defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9883                                  select_truncus, SchedWriteVecTruncate,
9884                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9885                                  X86vtruncus, X86vmtruncus>;
9886
9887defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9888                                  SchedWriteVecTruncate, truncstorevi32,
9889                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9890defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9891                                  SchedWriteVecTruncate, truncstore_s_vi32,
9892                                  masked_truncstore_s_vi32, X86vtruncs,
9893                                  X86vmtruncs>;
9894defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9895                                  select_truncus, SchedWriteVecTruncate,
9896                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9897                                  X86vtruncus, X86vmtruncus>;
9898
9899defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9900                                  SchedWriteVecTruncate, truncstorevi8,
9901                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9902defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9903                                  SchedWriteVecTruncate, truncstore_s_vi8,
9904                                  masked_truncstore_s_vi8, X86vtruncs,
9905                                  X86vmtruncs>;
9906defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9907                                  select_truncus, SchedWriteVecTruncate,
9908                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9909                                  X86vtruncus, X86vmtruncus>;
9910
9911defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9912                                  SchedWriteVecTruncate, truncstorevi16,
9913                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9914defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9915                                  SchedWriteVecTruncate, truncstore_s_vi16,
9916                                  masked_truncstore_s_vi16, X86vtruncs,
9917                                  X86vmtruncs>;
9918defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9919                                  select_truncus, SchedWriteVecTruncate,
9920                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9921                                  X86vtruncus, X86vmtruncus>;
9922
9923defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9924                                  SchedWriteVecTruncate, truncstorevi8,
9925                                  masked_truncstorevi8, X86vtrunc,
9926                                  X86vmtrunc>;
9927defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9928                                  SchedWriteVecTruncate, truncstore_s_vi8,
9929                                  masked_truncstore_s_vi8, X86vtruncs,
9930                                  X86vmtruncs>;
9931defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9932                                  select_truncus, SchedWriteVecTruncate,
9933                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9934                                  X86vtruncus, X86vmtruncus>;
9935
9936let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
9937def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9938         (v8i16 (EXTRACT_SUBREG
9939                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9940                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9941def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9942         (v4i32 (EXTRACT_SUBREG
9943                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9944                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9945}
9946
9947let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
9948def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9949         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9950                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9951}
9952
9953// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9954multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9955                           X86VectorVTInfo DestInfo,
9956                           X86VectorVTInfo SrcInfo> {
9957  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9958                                 DestInfo.RC:$src0,
9959                                 SrcInfo.KRCWM:$mask)),
9960            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9961                                                 SrcInfo.KRCWM:$mask,
9962                                                 SrcInfo.RC:$src)>;
9963
9964  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9965                                 DestInfo.ImmAllZerosV,
9966                                 SrcInfo.KRCWM:$mask)),
9967            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9968                                                  SrcInfo.RC:$src)>;
9969}
9970
9971let Predicates = [HasVLX] in {
9972defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9973defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9974defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9975}
9976
9977let Predicates = [HasAVX512] in {
9978defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9979defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9980defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9981
9982defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9983defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9984defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9985
9986defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9987defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9988defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9989}
9990
9991multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9992              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9993              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9994  let ExeDomain = DestInfo.ExeDomain in {
9995  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9996                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9997                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9998                  EVEX, Sched<[sched]>;
9999
10000  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10001                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10002                  (DestInfo.VT (LdFrag addr:$src))>,
10003                EVEX, Sched<[sched.Folded]>;
10004  }
10005}
10006
10007multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
10008          SDNode OpNode, SDNode InVecNode, string ExtTy,
10009          X86SchedWriteWidths sched,
10010          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10011  let Predicates = [HasVLX, HasBWI] in {
10012    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10013                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10014                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
10015
10016    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10017                    v16i8x_info, i128mem, LdFrag, OpNode>,
10018                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
10019  }
10020  let Predicates = [HasBWI] in {
10021    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10022                    v32i8x_info, i256mem, LdFrag, OpNode>,
10023                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
10024  }
10025}
10026
10027multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10028          SDNode OpNode, SDNode InVecNode, string ExtTy,
10029          X86SchedWriteWidths sched,
10030          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10031  let Predicates = [HasVLX, HasAVX512] in {
10032    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10033                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10034                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
10035
10036    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10037                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10038                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
10039  }
10040  let Predicates = [HasAVX512] in {
10041    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10042                   v16i8x_info, i128mem, LdFrag, OpNode>,
10043                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
10044  }
10045}
10046
10047multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10048                              SDNode InVecNode, string ExtTy,
10049                              X86SchedWriteWidths sched,
10050                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10051  let Predicates = [HasVLX, HasAVX512] in {
10052    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10053                   v16i8x_info, i16mem, LdFrag, InVecNode>,
10054                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
10055
10056    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10057                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10058                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
10059  }
10060  let Predicates = [HasAVX512] in {
10061    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10062                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10063                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
10064  }
10065}
10066
10067multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10068         SDNode OpNode, SDNode InVecNode, string ExtTy,
10069         X86SchedWriteWidths sched,
10070         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10071  let Predicates = [HasVLX, HasAVX512] in {
10072    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10073                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10074                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
10075
10076    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10077                   v8i16x_info, i128mem, LdFrag, OpNode>,
10078                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
10079  }
10080  let Predicates = [HasAVX512] in {
10081    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10082                   v16i16x_info, i256mem, LdFrag, OpNode>,
10083                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
10084  }
10085}
10086
10087multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10088         SDNode OpNode, SDNode InVecNode, string ExtTy,
10089         X86SchedWriteWidths sched,
10090         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10091  let Predicates = [HasVLX, HasAVX512] in {
10092    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10093                   v8i16x_info, i32mem, LdFrag, InVecNode>,
10094                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
10095
10096    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10097                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10098                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
10099  }
10100  let Predicates = [HasAVX512] in {
10101    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10102                   v8i16x_info, i128mem, LdFrag, OpNode>,
10103                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
10104  }
10105}
10106
10107multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10108         SDNode OpNode, SDNode InVecNode, string ExtTy,
10109         X86SchedWriteWidths sched,
10110         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10111
10112  let Predicates = [HasVLX, HasAVX512] in {
10113    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10114                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10115                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
10116
10117    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10118                   v4i32x_info, i128mem, LdFrag, OpNode>,
10119                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10120  }
10121  let Predicates = [HasAVX512] in {
10122    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10123                   v8i32x_info, i256mem, LdFrag, OpNode>,
10124                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10125  }
10126}
10127
10128defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10129defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10130defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10131defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10132defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10133defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10134
10135defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10136defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10137defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10138defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10139defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10140defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10141
10142
10143// Patterns that we also need any extend versions of. aext_vector_inreg
10144// is currently legalized to zext_vector_inreg.
10145multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10146  // 256-bit patterns
10147  let Predicates = [HasVLX, HasBWI] in {
10148    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10149              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10150  }
10151
10152  let Predicates = [HasVLX] in {
10153    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10154              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10155
10156    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10157              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10158  }
10159
10160  // 512-bit patterns
10161  let Predicates = [HasBWI] in {
10162    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10163              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10164  }
10165  let Predicates = [HasAVX512] in {
10166    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10167              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10168    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10169              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10170
10171    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10172              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10173
10174    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10175              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10176  }
10177}
10178
10179multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10180                                 SDNode InVecOp> :
10181    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10182  // 128-bit patterns
10183  let Predicates = [HasVLX, HasBWI] in {
10184  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10185            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10186  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10187            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10188  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10189            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10190  }
10191  let Predicates = [HasVLX] in {
10192  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10193            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10194  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10195            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10196
10197  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10198            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10199
10200  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10201            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10202  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10203            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10204  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10205            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10206
10207  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10208            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10209  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10210            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10211
10212  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10213            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10214  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10215            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10216  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10217            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10218  }
10219  let Predicates = [HasVLX] in {
10220  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10221            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10222  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10223            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10224  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10225            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10226
10227  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10228            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10229  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10230            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10231
10232  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10233            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10234  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10235            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10236  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10237            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10238  }
10239  // 512-bit patterns
10240  let Predicates = [HasAVX512] in {
10241  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10242            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10243  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10244            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10245  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10246            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10247  }
10248}
10249
10250defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10251defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10252
10253// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10254// ext+trunc aggressively making it impossible to legalize the DAG to this
10255// pattern directly.
10256let Predicates = [HasAVX512, NoBWI] in {
10257def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10258         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10259def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10260         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10261}
10262
10263//===----------------------------------------------------------------------===//
10264// GATHER - SCATTER Operations
10265
10266// FIXME: Improve scheduling of gather/scatter instructions.
10267multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10268                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10269  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10270      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10271  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10272            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10273            !strconcat(OpcodeStr#_.Suffix,
10274            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10275            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10276            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10277}
10278
10279multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10280                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10281  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10282                                      vy64xmem>, EVEX_V512, REX_W;
10283  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10284                                      vz64mem>, EVEX_V512, REX_W;
10285let Predicates = [HasVLX] in {
10286  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10287                              vx64xmem>, EVEX_V256, REX_W;
10288  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10289                              vy64xmem>, EVEX_V256, REX_W;
10290  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10291                              vx64xmem>, EVEX_V128, REX_W;
10292  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10293                              vx64xmem>, EVEX_V128, REX_W;
10294}
10295}
10296
10297multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10298                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10299  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz32mem>,
10300                                       EVEX_V512;
10301  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz32mem>,
10302                                       EVEX_V512;
10303let Predicates = [HasVLX] in {
10304  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10305                                          vy32xmem>, EVEX_V256;
10306  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10307                                          vy32xmem>, EVEX_V256;
10308  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10309                                          vx32xmem>, EVEX_V128;
10310  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10311                                          vx32xmem, VK2WM>, EVEX_V128;
10312}
10313}
10314
10315
10316defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10317               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10318
10319defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10320                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10321
10322multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10323                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10324
10325let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10326    hasSideEffects = 0 in
10327
10328  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10329            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10330            !strconcat(OpcodeStr#_.Suffix,
10331            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10332            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10333            Sched<[WriteStore]>;
10334}
10335
10336multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10337                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10338  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10339                                      vy64xmem>, EVEX_V512, REX_W;
10340  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10341                                      vz64mem>, EVEX_V512, REX_W;
10342let Predicates = [HasVLX] in {
10343  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10344                              vx64xmem>, EVEX_V256, REX_W;
10345  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10346                              vy64xmem>, EVEX_V256, REX_W;
10347  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10348                              vx64xmem>, EVEX_V128, REX_W;
10349  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10350                              vx64xmem>, EVEX_V128, REX_W;
10351}
10352}
10353
10354multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10355                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10356  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz32mem>,
10357                                       EVEX_V512;
10358  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz32mem>,
10359                                       EVEX_V512;
10360let Predicates = [HasVLX] in {
10361  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10362                                          vy32xmem>, EVEX_V256;
10363  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10364                                          vy32xmem>, EVEX_V256;
10365  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10366                                          vx32xmem>, EVEX_V128;
10367  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10368                                          vx32xmem, VK2WM>, EVEX_V128;
10369}
10370}
10371
10372defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10373               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10374
10375defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10376                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10377
10378// prefetch
10379multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10380                       RegisterClass KRC, X86MemOperand memop> {
10381  let mayLoad = 1, mayStore = 1 in
10382  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10383            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10384            EVEX, EVEX_K, Sched<[WriteLoad]>;
10385}
10386
10387defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10388                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10389
10390defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10391                     VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10392
10393defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10394                     VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10395
10396defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10397                     VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10398
10399defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10400                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10401
10402defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10403                     VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10404
10405defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10406                     VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10407
10408defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10409                     VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10410
10411defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10412                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10413
10414defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10415                     VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10416
10417defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10418                     VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10419
10420defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10421                     VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10422
10423defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10424                     VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10425
10426defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10427                     VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10428
10429defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10430                     VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10431
10432defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10433                     VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10434
10435multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10436def rk : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10437                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10438                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10439                  EVEX, Sched<[Sched]>;
10440}
10441
10442multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10443                                 string OpcodeStr, Predicate prd> {
10444let Predicates = [prd] in
10445  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10446
10447  let Predicates = [prd, HasVLX] in {
10448    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10449    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10450  }
10451}
10452
10453defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10454defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10455defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10456defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10457
10458multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10459    def kr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10460                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10461                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10462                        EVEX, Sched<[WriteMove]>;
10463}
10464
10465// Use 512bit version to implement 128/256 bit in case NoVLX.
10466multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10467                                           X86VectorVTInfo _,
10468                                           string Name> {
10469
10470  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10471            (_.KVT (COPY_TO_REGCLASS
10472                     (!cast<Instruction>(Name#"Zkr")
10473                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10474                                      _.RC:$src, _.SubRegIdx)),
10475                   _.KRC))>;
10476}
10477
10478multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10479                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10480  let Predicates = [prd] in
10481    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10482                                            EVEX_V512;
10483
10484  let Predicates = [prd, HasVLX] in {
10485    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10486                                              EVEX_V256;
10487    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10488                                               EVEX_V128;
10489  }
10490  let Predicates = [prd, NoVLX, HasEVEX512] in {
10491    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10492    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10493  }
10494}
10495
10496defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10497                                              avx512vl_i8_info, HasBWI>;
10498defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10499                                              avx512vl_i16_info, HasBWI>, REX_W;
10500defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10501                                              avx512vl_i32_info, HasDQI>;
10502defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10503                                              avx512vl_i64_info, HasDQI>, REX_W;
10504
10505// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10506// is available, but BWI is not. We can't handle this in lowering because
10507// a target independent DAG combine likes to combine sext and trunc.
10508let Predicates = [HasDQI, NoBWI] in {
10509  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10510            (VPMOVDBZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>;
10511  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10512            (VPMOVDWZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>;
10513}
10514
10515let Predicates = [HasDQI, NoBWI, HasVLX] in {
10516  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10517            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rk VK8:$src)))>;
10518}
10519
10520//===----------------------------------------------------------------------===//
10521// AVX-512 - COMPRESS and EXPAND
10522//
10523
10524multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10525                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10526  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10527              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10528              (null_frag)>, AVX5128IBase,
10529              Sched<[sched]>;
10530
10531  let mayStore = 1, hasSideEffects = 0 in
10532  def mr : AVX5128I<opc, MRMDestMem, (outs),
10533              (ins _.MemOp:$dst, _.RC:$src),
10534              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10535              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10536              Sched<[sched.Folded]>;
10537
10538  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10539              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10540              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10541              []>,
10542              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10543              Sched<[sched.Folded]>;
10544}
10545
10546multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10547  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10548            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10549                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10550
10551  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10552            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10553                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10554  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10555            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10556                            _.KRCWM:$mask, _.RC:$src)>;
10557  def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, undef)),
10558            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10559                            _.KRCWM:$mask, _.RC:$src)>;
10560  def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10561            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10562                            _.KRCWM:$mask, _.RC:$src)>;
10563  def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.RC:$passthru)),
10564              (!cast<Instruction>(Name#_.ZSuffix#rrk)
10565                            _.RC:$passthru, _.KRCWM:$mask, _.RC:$src)>;
10566}
10567
10568multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10569                                 X86FoldableSchedWrite sched,
10570                                 AVX512VLVectorVTInfo VTInfo,
10571                                 Predicate Pred = HasAVX512> {
10572  let Predicates = [Pred] in
10573  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10574           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10575
10576  let Predicates = [Pred, HasVLX] in {
10577    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10578                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10579    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10580                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10581  }
10582}
10583
10584// FIXME: Is there a better scheduler class for VPCOMPRESS?
10585defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10586                                          avx512vl_i32_info>, EVEX;
10587defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10588                                          avx512vl_i64_info>, EVEX, REX_W;
10589defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10590                                          avx512vl_f32_info>, EVEX;
10591defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10592                                          avx512vl_f64_info>, EVEX, REX_W;
10593
10594// expand
10595multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10596                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10597  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10598              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10599              (null_frag)>, AVX5128IBase,
10600              Sched<[sched]>;
10601
10602  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10603              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10604              (null_frag)>,
10605            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10606            Sched<[sched.Folded, sched.ReadAfterFold]>;
10607}
10608
10609multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10610
10611  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10612            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10613                                        _.KRCWM:$mask, addr:$src)>;
10614
10615  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10616            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10617                                        _.KRCWM:$mask, addr:$src)>;
10618
10619  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10620                                               (_.VT _.RC:$src0))),
10621            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10622                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10623
10624  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10625            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10626                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10627  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10628            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10629                            _.KRCWM:$mask, _.RC:$src)>;
10630}
10631
10632multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10633                               X86FoldableSchedWrite sched,
10634                               AVX512VLVectorVTInfo VTInfo,
10635                               Predicate Pred = HasAVX512> {
10636  let Predicates = [Pred] in
10637  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10638           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10639
10640  let Predicates = [Pred, HasVLX] in {
10641    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10642                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10643    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10644                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10645  }
10646}
10647
10648// FIXME: Is there a better scheduler class for VPEXPAND?
10649defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10650                                      avx512vl_i32_info>, EVEX;
10651defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10652                                      avx512vl_i64_info>, EVEX, REX_W;
10653defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10654                                      avx512vl_f32_info>, EVEX;
10655defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10656                                      avx512vl_f64_info>, EVEX, REX_W;
10657
10658//handle instruction  reg_vec1 = op(reg_vec,imm)
10659//                               op(mem_vec,imm)
10660//                               op(broadcast(eltVt),imm)
10661//all instruction created with FROUND_CURRENT
10662multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10663                                      SDPatternOperator OpNode,
10664                                      SDPatternOperator MaskOpNode,
10665                                      X86FoldableSchedWrite sched,
10666                                      X86VectorVTInfo _> {
10667  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10668  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10669                      (ins _.RC:$src1, i32u8imm:$src2),
10670                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10671                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10672                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10673                      Sched<[sched]>;
10674  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10675                    (ins _.MemOp:$src1, i32u8imm:$src2),
10676                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10677                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10678                            (i32 timm:$src2)),
10679                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10680                                (i32 timm:$src2))>,
10681                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10682  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10683                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10684                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10685                    "${src1}"#_.BroadcastStr#", $src2",
10686                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10687                            (i32 timm:$src2)),
10688                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10689                                (i32 timm:$src2))>, EVEX_B,
10690                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10691  }
10692}
10693
10694//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10695multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10696                                          SDNode OpNode, X86FoldableSchedWrite sched,
10697                                          X86VectorVTInfo _> {
10698  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10699  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10700                      (ins _.RC:$src1, i32u8imm:$src2),
10701                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10702                      "$src1, {sae}, $src2",
10703                      (OpNode (_.VT _.RC:$src1),
10704                              (i32 timm:$src2))>,
10705                      EVEX_B, Sched<[sched]>;
10706}
10707
10708multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10709            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10710            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10711            Predicate prd>{
10712  let Predicates = [prd] in {
10713    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10714                                           sched.ZMM, _.info512>,
10715                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10716                                               sched.ZMM, _.info512>, EVEX_V512;
10717  }
10718  let Predicates = [prd, HasVLX] in {
10719    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10720                                           sched.XMM, _.info128>, EVEX_V128;
10721    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10722                                           sched.YMM, _.info256>, EVEX_V256;
10723  }
10724}
10725
10726//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10727//                               op(reg_vec2,mem_vec,imm)
10728//                               op(reg_vec2,broadcast(eltVt),imm)
10729//all instruction created with FROUND_CURRENT
10730multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10731                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10732  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10733  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10734                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10735                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10736                      (OpNode (_.VT _.RC:$src1),
10737                              (_.VT _.RC:$src2),
10738                              (i32 timm:$src3))>,
10739                      Sched<[sched]>;
10740  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10741                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10742                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10743                    (OpNode (_.VT _.RC:$src1),
10744                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10745                            (i32 timm:$src3))>,
10746                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10747  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10748                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10749                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10750                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10751                    (OpNode (_.VT _.RC:$src1),
10752                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10753                            (i32 timm:$src3))>, EVEX_B,
10754                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10755  }
10756}
10757
10758//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10759//                               op(reg_vec2,mem_vec,imm)
10760multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10761                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10762                              X86VectorVTInfo SrcInfo>{
10763  let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10764  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10765                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10766                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10767                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10768                               (SrcInfo.VT SrcInfo.RC:$src2),
10769                               (i8 timm:$src3)))>,
10770                  Sched<[sched]>;
10771  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10772                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10773                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10774                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10775                             (SrcInfo.VT (bitconvert
10776                                                (SrcInfo.LdFrag addr:$src2))),
10777                             (i8 timm:$src3)))>,
10778                Sched<[sched.Folded, sched.ReadAfterFold]>;
10779  }
10780}
10781
10782//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10783//                               op(reg_vec2,mem_vec,imm)
10784//                               op(reg_vec2,broadcast(eltVt),imm)
10785multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10786                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10787  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10788
10789  let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10790  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10791                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10792                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10793                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10794                    (OpNode (_.VT _.RC:$src1),
10795                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10796                            (i8 timm:$src3))>, EVEX_B,
10797                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10798}
10799
10800//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10801//                                      op(reg_vec2,mem_scalar,imm)
10802multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10803                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10804  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10805  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10806                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10807                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10808                      (OpNode (_.VT _.RC:$src1),
10809                              (_.VT _.RC:$src2),
10810                              (i32 timm:$src3))>,
10811                      Sched<[sched]>;
10812  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10813                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10814                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10815                    (OpNode (_.VT _.RC:$src1),
10816                            (_.ScalarIntMemFrags addr:$src2),
10817                            (i32 timm:$src3))>,
10818                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10819  }
10820}
10821
10822//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10823multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10824                                    SDNode OpNode, X86FoldableSchedWrite sched,
10825                                    X86VectorVTInfo _> {
10826  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10827  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10828                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10829                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10830                      "$src1, $src2, {sae}, $src3",
10831                      (OpNode (_.VT _.RC:$src1),
10832                              (_.VT _.RC:$src2),
10833                              (i32 timm:$src3))>,
10834                      EVEX_B, Sched<[sched]>;
10835}
10836
10837//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10838multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10839                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10840  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10841  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10842                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10843                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10844                      "$src1, $src2, {sae}, $src3",
10845                      (OpNode (_.VT _.RC:$src1),
10846                              (_.VT _.RC:$src2),
10847                              (i32 timm:$src3))>,
10848                      EVEX_B, Sched<[sched]>;
10849}
10850
10851multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10852            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10853            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10854  let Predicates = [prd] in {
10855    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10856                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10857                                  EVEX_V512;
10858
10859  }
10860  let Predicates = [prd, HasVLX] in {
10861    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10862                                  EVEX_V128;
10863    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10864                                  EVEX_V256;
10865  }
10866}
10867
10868multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10869                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10870                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10871  let Predicates = [Pred] in {
10872    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10873                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10874  }
10875  let Predicates = [Pred, HasVLX] in {
10876    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10877                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10878    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10879                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10880  }
10881}
10882
10883multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10884                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10885                                  Predicate Pred = HasAVX512> {
10886  let Predicates = [Pred] in {
10887    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10888                                EVEX_V512;
10889  }
10890  let Predicates = [Pred, HasVLX] in {
10891    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10892                                EVEX_V128;
10893    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10894                                EVEX_V256;
10895  }
10896}
10897
10898multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10899                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10900                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10901  let Predicates = [prd] in {
10902     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10903              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10904  }
10905}
10906
10907multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10908                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10909                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10910                    X86SchedWriteWidths sched, Predicate prd>{
10911  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10912                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10913                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10914  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10915                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10916                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10917  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10918                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10919                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10920}
10921
10922defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10923                              X86VReduce, X86VReduce, X86VReduceSAE,
10924                              SchedWriteFRnd, HasDQI>;
10925defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10926                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10927                              SchedWriteFRnd, HasAVX512>;
10928defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10929                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
10930                              SchedWriteFRnd, HasAVX512>;
10931
10932defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10933                                                0x50, X86VRange, X86VRangeSAE,
10934                                                SchedWriteFAdd, HasDQI>,
10935      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10936defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10937                                                0x50, X86VRange, X86VRangeSAE,
10938                                                SchedWriteFAdd, HasDQI>,
10939      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10940
10941defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10942      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10943      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10944defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10945      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10946      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10947
10948defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10949      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10950      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10951defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10952      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10953      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10954defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10955      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10956      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10957
10958defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10959      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10960      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10961defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10962      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10963      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10964defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10965      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10966      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10967
10968multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10969                                          X86FoldableSchedWrite sched,
10970                                          X86VectorVTInfo _,
10971                                          X86VectorVTInfo CastInfo> {
10972  let ExeDomain = _.ExeDomain in {
10973  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10974                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10975                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10976                  (_.VT (bitconvert
10977                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10978                                                  (i8 timm:$src3)))))>,
10979                  Sched<[sched]>;
10980  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10981                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10982                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10983                (_.VT
10984                 (bitconvert
10985                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10986                                           (CastInfo.LdFrag addr:$src2),
10987                                           (i8 timm:$src3)))))>,
10988                Sched<[sched.Folded, sched.ReadAfterFold]>;
10989  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10990                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10991                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10992                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10993                    (_.VT
10994                     (bitconvert
10995                      (CastInfo.VT
10996                       (X86Shuf128 _.RC:$src1,
10997                                   (_.BroadcastLdFrag addr:$src2),
10998                                   (i8 timm:$src3)))))>, EVEX_B,
10999                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11000  }
11001}
11002
11003multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11004                                   AVX512VLVectorVTInfo _,
11005                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
11006  let Predicates = [HasAVX512] in
11007  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11008                                          _.info512, CastInfo.info512>, EVEX_V512;
11009
11010  let Predicates = [HasAVX512, HasVLX] in
11011  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11012                                             _.info256, CastInfo.info256>, EVEX_V256;
11013}
11014
11015defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11016      avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
11017defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11018      avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
11019defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11020      avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
11021defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11022      avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
11023
11024multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11025                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11026  let ExeDomain = _.ExeDomain in {
11027  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11028                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11029                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11030                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11031                  Sched<[sched]>;
11032  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11033                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11034                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11035                (_.VT (X86VAlign _.RC:$src1,
11036                                 (bitconvert (_.LdFrag addr:$src2)),
11037                                 (i8 timm:$src3)))>,
11038                Sched<[sched.Folded, sched.ReadAfterFold]>;
11039
11040  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11041                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11042                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11043                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
11044                   (X86VAlign _.RC:$src1,
11045                              (_.VT (_.BroadcastLdFrag addr:$src2)),
11046                              (i8 timm:$src3))>, EVEX_B,
11047                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11048  }
11049}
11050
11051multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11052                                AVX512VLVectorVTInfo _> {
11053  let Predicates = [HasAVX512] in {
11054    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11055                                AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
11056  }
11057  let Predicates = [HasAVX512, HasVLX] in {
11058    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11059                                AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
11060    // We can't really override the 256-bit version so change it back to unset.
11061    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11062                                AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
11063  }
11064}
11065
11066defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11067                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11068defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11069                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11070                                   REX_W;
11071
11072defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11073                                         SchedWriteShuffle, avx512vl_i8_info,
11074                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11075
11076// Fragments to help convert valignq into masked valignd. Or valignq/valignd
11077// into vpalignr.
11078def ValignqImm32XForm : SDNodeXForm<timm, [{
11079  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11080}]>;
11081def ValignqImm8XForm : SDNodeXForm<timm, [{
11082  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11083}]>;
11084def ValigndImm8XForm : SDNodeXForm<timm, [{
11085  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11086}]>;
11087
11088multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11089                                        X86VectorVTInfo From, X86VectorVTInfo To,
11090                                        SDNodeXForm ImmXForm> {
11091  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11092                                 (bitconvert
11093                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11094                                                   timm:$src3))),
11095                                 To.RC:$src0)),
11096            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11097                                                  To.RC:$src1, To.RC:$src2,
11098                                                  (ImmXForm timm:$src3))>;
11099
11100  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11101                                 (bitconvert
11102                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11103                                                   timm:$src3))),
11104                                 To.ImmAllZerosV)),
11105            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11106                                                   To.RC:$src1, To.RC:$src2,
11107                                                   (ImmXForm timm:$src3))>;
11108
11109  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11110                                 (bitconvert
11111                                  (From.VT (OpNode From.RC:$src1,
11112                                                   (From.LdFrag addr:$src2),
11113                                           timm:$src3))),
11114                                 To.RC:$src0)),
11115            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11116                                                  To.RC:$src1, addr:$src2,
11117                                                  (ImmXForm timm:$src3))>;
11118
11119  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11120                                 (bitconvert
11121                                  (From.VT (OpNode From.RC:$src1,
11122                                                   (From.LdFrag addr:$src2),
11123                                           timm:$src3))),
11124                                 To.ImmAllZerosV)),
11125            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11126                                                   To.RC:$src1, addr:$src2,
11127                                                   (ImmXForm timm:$src3))>;
11128}
11129
11130multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11131                                           X86VectorVTInfo From,
11132                                           X86VectorVTInfo To,
11133                                           SDNodeXForm ImmXForm> :
11134      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11135  def : Pat<(From.VT (OpNode From.RC:$src1,
11136                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11137                             timm:$src3)),
11138            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11139                                                  (ImmXForm timm:$src3))>;
11140
11141  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11142                                 (bitconvert
11143                                  (From.VT (OpNode From.RC:$src1,
11144                                           (bitconvert
11145                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11146                                           timm:$src3))),
11147                                 To.RC:$src0)),
11148            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11149                                                   To.RC:$src1, addr:$src2,
11150                                                   (ImmXForm timm:$src3))>;
11151
11152  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11153                                 (bitconvert
11154                                  (From.VT (OpNode From.RC:$src1,
11155                                           (bitconvert
11156                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11157                                           timm:$src3))),
11158                                 To.ImmAllZerosV)),
11159            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11160                                                    To.RC:$src1, addr:$src2,
11161                                                    (ImmXForm timm:$src3))>;
11162}
11163
11164let Predicates = [HasAVX512] in {
11165  // For 512-bit we lower to the widest element type we can. So we only need
11166  // to handle converting valignq to valignd.
11167  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11168                                         v16i32_info, ValignqImm32XForm>;
11169}
11170
11171let Predicates = [HasVLX] in {
11172  // For 128-bit we lower to the widest element type we can. So we only need
11173  // to handle converting valignq to valignd.
11174  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11175                                         v4i32x_info, ValignqImm32XForm>;
11176  // For 256-bit we lower to the widest element type we can. So we only need
11177  // to handle converting valignq to valignd.
11178  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11179                                         v8i32x_info, ValignqImm32XForm>;
11180}
11181
11182let Predicates = [HasVLX, HasBWI] in {
11183  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11184  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11185                                      v16i8x_info, ValignqImm8XForm>;
11186  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11187                                      v16i8x_info, ValigndImm8XForm>;
11188}
11189
11190defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11191                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11192                EVEX_CD8<8, CD8VF>;
11193
11194multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11195                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11196  let ExeDomain = _.ExeDomain in {
11197  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11198                    (ins _.RC:$src1), OpcodeStr,
11199                    "$src1", "$src1",
11200                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11201                    Sched<[sched]>;
11202
11203  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11204                  (ins _.MemOp:$src1), OpcodeStr,
11205                  "$src1", "$src1",
11206                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11207            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11208            Sched<[sched.Folded]>;
11209  }
11210}
11211
11212multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11213                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11214           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11215  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11216                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11217                  "${src1}"#_.BroadcastStr,
11218                  "${src1}"#_.BroadcastStr,
11219                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11220             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11221             Sched<[sched.Folded]>;
11222}
11223
11224multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11225                              X86SchedWriteWidths sched,
11226                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11227  let Predicates = [prd] in
11228    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11229                             EVEX_V512;
11230
11231  let Predicates = [prd, HasVLX] in {
11232    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11233                              EVEX_V256;
11234    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11235                              EVEX_V128;
11236  }
11237}
11238
11239multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11240                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11241                               Predicate prd> {
11242  let Predicates = [prd] in
11243    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11244                              EVEX_V512;
11245
11246  let Predicates = [prd, HasVLX] in {
11247    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11248                                 EVEX_V256;
11249    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11250                                 EVEX_V128;
11251  }
11252}
11253
11254multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11255                                 SDNode OpNode, X86SchedWriteWidths sched,
11256                                 Predicate prd> {
11257  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11258                               avx512vl_i64_info, prd>, REX_W;
11259  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11260                               avx512vl_i32_info, prd>;
11261}
11262
11263multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11264                                 SDNode OpNode, X86SchedWriteWidths sched,
11265                                 Predicate prd> {
11266  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11267                              avx512vl_i16_info, prd>, WIG;
11268  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11269                              avx512vl_i8_info, prd>, WIG;
11270}
11271
11272multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11273                                  bits<8> opc_d, bits<8> opc_q,
11274                                  string OpcodeStr, SDNode OpNode,
11275                                  X86SchedWriteWidths sched> {
11276  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11277                                    HasAVX512>,
11278              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11279                                    HasBWI>;
11280}
11281
11282defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11283                                    SchedWriteVecALU>;
11284
11285// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11286let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11287  def : Pat<(v4i64 (abs VR256X:$src)),
11288            (EXTRACT_SUBREG
11289                (VPABSQZrr
11290                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11291             sub_ymm)>;
11292  def : Pat<(v2i64 (abs VR128X:$src)),
11293            (EXTRACT_SUBREG
11294                (VPABSQZrr
11295                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11296             sub_xmm)>;
11297}
11298
11299// Use 512bit version to implement 128/256 bit.
11300multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11301                                 AVX512VLVectorVTInfo _, Predicate prd> {
11302  let Predicates = [prd, NoVLX, HasEVEX512] in {
11303    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11304              (EXTRACT_SUBREG
11305                (!cast<Instruction>(InstrStr # "Zrr")
11306                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11307                                 _.info256.RC:$src1,
11308                                 _.info256.SubRegIdx)),
11309              _.info256.SubRegIdx)>;
11310
11311    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11312              (EXTRACT_SUBREG
11313                (!cast<Instruction>(InstrStr # "Zrr")
11314                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11315                                 _.info128.RC:$src1,
11316                                 _.info128.SubRegIdx)),
11317              _.info128.SubRegIdx)>;
11318  }
11319}
11320
11321defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11322                                        SchedWriteVecIMul, HasCDI>;
11323
11324// FIXME: Is there a better scheduler class for VPCONFLICT?
11325defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11326                                        SchedWriteVecALU, HasCDI>;
11327
11328// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11329defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11330defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11331
11332//===---------------------------------------------------------------------===//
11333// Counts number of ones - VPOPCNTD and VPOPCNTQ
11334//===---------------------------------------------------------------------===//
11335
11336// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11337defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11338                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11339
11340defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11341defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11342
11343//===---------------------------------------------------------------------===//
11344// Replicate Single FP - MOVSHDUP and MOVSLDUP
11345//===---------------------------------------------------------------------===//
11346
11347multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11348                            X86SchedWriteWidths sched> {
11349  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11350                                      avx512vl_f32_info, HasAVX512>, TB, XS;
11351}
11352
11353defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11354                                  SchedWriteFShuffle>;
11355defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11356                                  SchedWriteFShuffle>;
11357
11358//===----------------------------------------------------------------------===//
11359// AVX-512 - MOVDDUP
11360//===----------------------------------------------------------------------===//
11361
11362multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11363                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11364  let ExeDomain = _.ExeDomain in {
11365  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11366                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11367                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11368                   Sched<[sched]>;
11369  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11370                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11371                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11372                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11373                 Sched<[sched.Folded]>;
11374  }
11375}
11376
11377multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11378                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11379  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11380                           VTInfo.info512>, EVEX_V512;
11381
11382  let Predicates = [HasAVX512, HasVLX] in {
11383    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11384                                VTInfo.info256>, EVEX_V256;
11385    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11386                                   VTInfo.info128>, EVEX_V128;
11387  }
11388}
11389
11390multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11391                          X86SchedWriteWidths sched> {
11392  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11393                                        avx512vl_f64_info>, TB, XD, REX_W;
11394}
11395
11396defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11397
11398let Predicates = [HasVLX] in {
11399def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11400          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11401
11402def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11403                        (v2f64 VR128X:$src0)),
11404          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11405                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11406def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11407                        immAllZerosV),
11408          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11409}
11410
11411//===----------------------------------------------------------------------===//
11412// AVX-512 - Unpack Instructions
11413//===----------------------------------------------------------------------===//
11414
11415let Uses = []<Register>, mayRaiseFPException = 0 in {
11416defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11417                                 SchedWriteFShuffleSizes, 0, 1>;
11418defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11419                                 SchedWriteFShuffleSizes>;
11420}
11421
11422defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11423                                       SchedWriteShuffle, HasBWI>;
11424defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11425                                       SchedWriteShuffle, HasBWI>;
11426defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11427                                       SchedWriteShuffle, HasBWI>;
11428defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11429                                       SchedWriteShuffle, HasBWI>;
11430
11431defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11432                                       SchedWriteShuffle, HasAVX512>;
11433defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11434                                       SchedWriteShuffle, HasAVX512>;
11435defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11436                                        SchedWriteShuffle, HasAVX512>;
11437defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11438                                        SchedWriteShuffle, HasAVX512>;
11439
11440//===----------------------------------------------------------------------===//
11441// AVX-512 - Extract & Insert Integer Instructions
11442//===----------------------------------------------------------------------===//
11443
11444multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11445                                                            X86VectorVTInfo _> {
11446  def mri : AVX512Ii8<opc, MRMDestMem, (outs),
11447              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11448              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11449              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11450                       addr:$dst)]>,
11451              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11452}
11453
11454multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11455  let Predicates = [HasBWI] in {
11456    def rri : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11457                  (ins _.RC:$src1, u8imm:$src2),
11458                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11459                  [(set GR32orGR64:$dst,
11460                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11461                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11462
11463    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11464  }
11465}
11466
11467multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11468  let Predicates = [HasBWI] in {
11469    def rri : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11470                  (ins _.RC:$src1, u8imm:$src2),
11471                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11472                  [(set GR32orGR64:$dst,
11473                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11474                  EVEX, TB, PD, Sched<[WriteVecExtract]>;
11475
11476    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11477    def rri_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11478                   (ins _.RC:$src1, u8imm:$src2),
11479                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11480                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11481
11482    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11483  }
11484}
11485
11486multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11487                                                            RegisterClass GRC> {
11488  let Predicates = [HasDQI] in {
11489    def rri : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11490                  (ins _.RC:$src1, u8imm:$src2),
11491                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11492                  [(set GRC:$dst,
11493                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11494                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11495
11496    def mri : AVX512Ii8<0x16, MRMDestMem, (outs),
11497                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11498                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11499                [(store (extractelt (_.VT _.RC:$src1),
11500                                    imm:$src2),addr:$dst)]>,
11501                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11502                Sched<[WriteVecExtractSt]>;
11503  }
11504}
11505
11506defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11507defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11508defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11509defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11510
11511multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11512                                            X86VectorVTInfo _, PatFrag LdFrag,
11513                                            SDPatternOperator immoperator> {
11514  def rmi : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11515      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11516      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11517      [(set _.RC:$dst,
11518          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11519      EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11520}
11521
11522multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11523                                            X86VectorVTInfo _, PatFrag LdFrag> {
11524  let Predicates = [HasBWI] in {
11525    def rri : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11526        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11527        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11528        [(set _.RC:$dst,
11529            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11530        Sched<[WriteVecInsert]>;
11531
11532    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11533  }
11534}
11535
11536multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11537                                         X86VectorVTInfo _, RegisterClass GRC> {
11538  let Predicates = [HasDQI] in {
11539    def rri : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11540        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11541        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11542        [(set _.RC:$dst,
11543            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11544        EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11545
11546    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11547                                    _.ScalarLdFrag, imm>, TA, PD;
11548  }
11549}
11550
11551defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11552                                     extloadi8>, TA, PD, WIG;
11553defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11554                                     extloadi16>, TB, PD, WIG;
11555defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11556defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11557
11558let Predicates = [HasAVX512, NoBWI] in {
11559  def : Pat<(X86pinsrb VR128:$src1,
11560                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11561                       timm:$src3),
11562            (VPINSRBrri VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11563                        timm:$src3)>;
11564}
11565
11566let Predicates = [HasBWI] in {
11567  def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11568            (VPINSRBZrri VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11569                         GR8:$src2, sub_8bit), timm:$src3)>;
11570  def : Pat<(X86pinsrb VR128:$src1,
11571                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11572                       timm:$src3),
11573            (VPINSRBZrri VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11574                         timm:$src3)>;
11575}
11576
11577// Always select FP16 instructions if available.
11578let Predicates = [HasBWI], AddedComplexity = -10 in {
11579  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrmi (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11580  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmri addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11581  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrri (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11582  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrri (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11583}
11584
11585//===----------------------------------------------------------------------===//
11586// VSHUFPS - VSHUFPD Operations
11587//===----------------------------------------------------------------------===//
11588
11589multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11590  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11591                                    SchedWriteFShuffle>,
11592                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11593                                    TA, EVEX, VVVV;
11594}
11595
11596defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11597defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11598
11599//===----------------------------------------------------------------------===//
11600// AVX-512 - Byte shift Left/Right
11601//===----------------------------------------------------------------------===//
11602
11603multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11604                               Format MRMm, string OpcodeStr,
11605                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11606  def ri : AVX512<opc, MRMr,
11607             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11608             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11609             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11610             Sched<[sched]>;
11611  def mi : AVX512<opc, MRMm,
11612           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11613           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11614           [(set _.RC:$dst,(_.VT (OpNode
11615                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11616                                 (i8 timm:$src2))))]>,
11617           Sched<[sched.Folded, sched.ReadAfterFold]>;
11618}
11619
11620multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11621                                   Format MRMm, string OpcodeStr,
11622                                   X86SchedWriteWidths sched, Predicate prd>{
11623  let Predicates = [prd] in
11624    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11625                                 sched.ZMM, v64i8_info>, EVEX_V512;
11626  let Predicates = [prd, HasVLX] in {
11627    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11628                                    sched.YMM, v32i8x_info>, EVEX_V256;
11629    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11630                                    sched.XMM, v16i8x_info>, EVEX_V128;
11631  }
11632}
11633defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11634                                       SchedWriteShuffle, HasBWI>,
11635                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11636defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11637                                       SchedWriteShuffle, HasBWI>,
11638                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11639
11640multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11641                                string OpcodeStr, X86FoldableSchedWrite sched,
11642                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11643  let isCommutable = 1 in
11644  def rr : AVX512BI<opc, MRMSrcReg,
11645             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11646             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11647             [(set _dst.RC:$dst,(_dst.VT
11648                                (OpNode (_src.VT _src.RC:$src1),
11649                                        (_src.VT _src.RC:$src2))))]>,
11650             Sched<[sched]>;
11651  def rm : AVX512BI<opc, MRMSrcMem,
11652           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11653           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11654           [(set _dst.RC:$dst,(_dst.VT
11655                              (OpNode (_src.VT _src.RC:$src1),
11656                              (_src.VT (bitconvert
11657                                        (_src.LdFrag addr:$src2))))))]>,
11658           Sched<[sched.Folded, sched.ReadAfterFold]>;
11659}
11660
11661multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11662                                    string OpcodeStr, X86SchedWriteWidths sched,
11663                                    Predicate prd> {
11664  let Predicates = [prd] in
11665    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11666                                  v8i64_info, v64i8_info>, EVEX_V512;
11667  let Predicates = [prd, HasVLX] in {
11668    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11669                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11670    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11671                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11672  }
11673}
11674
11675defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11676                                        SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11677
11678// Transforms to swizzle an immediate to enable better matching when
11679// memory operand isn't in the right place.
11680def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11681  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11682  uint8_t Imm = N->getZExtValue();
11683  // Swap bits 1/4 and 3/6.
11684  uint8_t NewImm = Imm & 0xa5;
11685  if (Imm & 0x02) NewImm |= 0x10;
11686  if (Imm & 0x10) NewImm |= 0x02;
11687  if (Imm & 0x08) NewImm |= 0x40;
11688  if (Imm & 0x40) NewImm |= 0x08;
11689  return getI8Imm(NewImm, SDLoc(N));
11690}]>;
11691def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11692  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11693  uint8_t Imm = N->getZExtValue();
11694  // Swap bits 2/4 and 3/5.
11695  uint8_t NewImm = Imm & 0xc3;
11696  if (Imm & 0x04) NewImm |= 0x10;
11697  if (Imm & 0x10) NewImm |= 0x04;
11698  if (Imm & 0x08) NewImm |= 0x20;
11699  if (Imm & 0x20) NewImm |= 0x08;
11700  return getI8Imm(NewImm, SDLoc(N));
11701}]>;
11702def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11703  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11704  uint8_t Imm = N->getZExtValue();
11705  // Swap bits 1/2 and 5/6.
11706  uint8_t NewImm = Imm & 0x99;
11707  if (Imm & 0x02) NewImm |= 0x04;
11708  if (Imm & 0x04) NewImm |= 0x02;
11709  if (Imm & 0x20) NewImm |= 0x40;
11710  if (Imm & 0x40) NewImm |= 0x20;
11711  return getI8Imm(NewImm, SDLoc(N));
11712}]>;
11713def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11714  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11715  uint8_t Imm = N->getZExtValue();
11716  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11717  uint8_t NewImm = Imm & 0x81;
11718  if (Imm & 0x02) NewImm |= 0x04;
11719  if (Imm & 0x04) NewImm |= 0x10;
11720  if (Imm & 0x08) NewImm |= 0x40;
11721  if (Imm & 0x10) NewImm |= 0x02;
11722  if (Imm & 0x20) NewImm |= 0x08;
11723  if (Imm & 0x40) NewImm |= 0x20;
11724  return getI8Imm(NewImm, SDLoc(N));
11725}]>;
11726def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11727  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11728  uint8_t Imm = N->getZExtValue();
11729  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11730  uint8_t NewImm = Imm & 0x81;
11731  if (Imm & 0x02) NewImm |= 0x10;
11732  if (Imm & 0x04) NewImm |= 0x02;
11733  if (Imm & 0x08) NewImm |= 0x20;
11734  if (Imm & 0x10) NewImm |= 0x04;
11735  if (Imm & 0x20) NewImm |= 0x40;
11736  if (Imm & 0x40) NewImm |= 0x08;
11737  return getI8Imm(NewImm, SDLoc(N));
11738}]>;
11739
11740multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11741                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11742                          string Name>{
11743  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11744  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11745                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11746                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11747                      (OpNode (_.VT _.RC:$src1),
11748                              (_.VT _.RC:$src2),
11749                              (_.VT _.RC:$src3),
11750                              (i8 timm:$src4)), 1, 1>,
11751                      AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11752  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11753                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11754                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11755                    (OpNode (_.VT _.RC:$src1),
11756                            (_.VT _.RC:$src2),
11757                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11758                            (i8 timm:$src4)), 1, 0>,
11759                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11760                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11761  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11762                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11763                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11764                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11765                    (OpNode (_.VT _.RC:$src1),
11766                            (_.VT _.RC:$src2),
11767                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11768                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11769                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11770                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11771  }// Constraints = "$src1 = $dst"
11772
11773  // Additional patterns for matching passthru operand in other positions.
11774  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11775                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11776                   _.RC:$src1)),
11777            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11778             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11779  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11780                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11781                   _.RC:$src1)),
11782            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11783             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11784
11785  // Additional patterns for matching zero masking with loads in other
11786  // positions.
11787  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11788                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11789                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11790                   _.ImmAllZerosV)),
11791            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11792             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11793  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11794                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11795                    _.RC:$src2, (i8 timm:$src4)),
11796                   _.ImmAllZerosV)),
11797            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11798             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11799
11800  // Additional patterns for matching masked loads with different
11801  // operand orders.
11802  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11803                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11804                    _.RC:$src2, (i8 timm:$src4)),
11805                   _.RC:$src1)),
11806            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11807             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11808  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11809                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11810                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11811                   _.RC:$src1)),
11812            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11813             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11814  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11815                   (OpNode _.RC:$src2, _.RC:$src1,
11816                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11817                   _.RC:$src1)),
11818            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11819             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11820  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11821                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11822                    _.RC:$src1, (i8 timm:$src4)),
11823                   _.RC:$src1)),
11824            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11825             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11826  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11827                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11828                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11829                   _.RC:$src1)),
11830            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11831             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11832
11833  // Additional patterns for matching zero masking with broadcasts in other
11834  // positions.
11835  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11836                   (OpNode (_.BroadcastLdFrag addr:$src3),
11837                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11838                   _.ImmAllZerosV)),
11839            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11840             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11841             (VPTERNLOG321_imm8 timm:$src4))>;
11842  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11843                   (OpNode _.RC:$src1,
11844                    (_.BroadcastLdFrag addr:$src3),
11845                    _.RC:$src2, (i8 timm:$src4)),
11846                   _.ImmAllZerosV)),
11847            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11848             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11849             (VPTERNLOG132_imm8 timm:$src4))>;
11850
11851  // Additional patterns for matching masked broadcasts with different
11852  // operand orders.
11853  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11854                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11855                    _.RC:$src2, (i8 timm:$src4)),
11856                   _.RC:$src1)),
11857            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11858             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11859  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11860                   (OpNode (_.BroadcastLdFrag addr:$src3),
11861                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11862                   _.RC:$src1)),
11863            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11864             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11865  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11866                   (OpNode _.RC:$src2, _.RC:$src1,
11867                    (_.BroadcastLdFrag addr:$src3),
11868                    (i8 timm:$src4)), _.RC:$src1)),
11869            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11870             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11871  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11872                   (OpNode _.RC:$src2,
11873                    (_.BroadcastLdFrag addr:$src3),
11874                    _.RC:$src1, (i8 timm:$src4)),
11875                   _.RC:$src1)),
11876            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11877             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11878  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11879                   (OpNode (_.BroadcastLdFrag addr:$src3),
11880                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11881                   _.RC:$src1)),
11882            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11883             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11884}
11885
11886multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11887                                 AVX512VLVectorVTInfo _> {
11888  let Predicates = [HasAVX512] in
11889    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11890                               _.info512, NAME>, EVEX_V512;
11891  let Predicates = [HasAVX512, HasVLX] in {
11892    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11893                               _.info128, NAME>, EVEX_V128;
11894    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11895                               _.info256, NAME>, EVEX_V256;
11896  }
11897}
11898
11899defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11900                                        avx512vl_i32_info>;
11901defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11902                                        avx512vl_i64_info>, REX_W;
11903
11904// Patterns to implement vnot using vpternlog instead of creating all ones
11905// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11906// so that the result is only dependent on src0. But we use the same source
11907// for all operands to prevent a false dependency.
11908// TODO: We should maybe have a more generalized algorithm for folding to
11909// vpternlog.
11910let Predicates = [HasAVX512] in {
11911  def : Pat<(v64i8 (vnot VR512:$src)),
11912            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11913  def : Pat<(v32i16 (vnot VR512:$src)),
11914            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11915  def : Pat<(v16i32 (vnot VR512:$src)),
11916            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11917  def : Pat<(v8i64 (vnot VR512:$src)),
11918            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11919}
11920
11921let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11922  def : Pat<(v16i8 (vnot VR128X:$src)),
11923            (EXTRACT_SUBREG
11924             (VPTERNLOGQZrri
11925              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11926              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11927              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11928              (i8 15)), sub_xmm)>;
11929  def : Pat<(v8i16 (vnot VR128X:$src)),
11930            (EXTRACT_SUBREG
11931             (VPTERNLOGQZrri
11932              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11933              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11934              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11935              (i8 15)), sub_xmm)>;
11936  def : Pat<(v4i32 (vnot VR128X:$src)),
11937            (EXTRACT_SUBREG
11938             (VPTERNLOGQZrri
11939              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11940              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11941              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11942              (i8 15)), sub_xmm)>;
11943  def : Pat<(v2i64 (vnot VR128X:$src)),
11944            (EXTRACT_SUBREG
11945             (VPTERNLOGQZrri
11946              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11947              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11948              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11949              (i8 15)), sub_xmm)>;
11950
11951  def : Pat<(v32i8 (vnot VR256X:$src)),
11952            (EXTRACT_SUBREG
11953             (VPTERNLOGQZrri
11954              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11955              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11956              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11957              (i8 15)), sub_ymm)>;
11958  def : Pat<(v16i16 (vnot VR256X:$src)),
11959            (EXTRACT_SUBREG
11960             (VPTERNLOGQZrri
11961              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11962              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11963              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11964              (i8 15)), sub_ymm)>;
11965  def : Pat<(v8i32 (vnot VR256X:$src)),
11966            (EXTRACT_SUBREG
11967             (VPTERNLOGQZrri
11968              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11969              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11970              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11971              (i8 15)), sub_ymm)>;
11972  def : Pat<(v4i64 (vnot VR256X:$src)),
11973            (EXTRACT_SUBREG
11974             (VPTERNLOGQZrri
11975              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11976              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11977              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11978              (i8 15)), sub_ymm)>;
11979}
11980
11981let Predicates = [HasVLX] in {
11982  def : Pat<(v16i8 (vnot VR128X:$src)),
11983            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11984  def : Pat<(v8i16 (vnot VR128X:$src)),
11985            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11986  def : Pat<(v4i32 (vnot VR128X:$src)),
11987            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11988  def : Pat<(v2i64 (vnot VR128X:$src)),
11989            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11990
11991  def : Pat<(v32i8 (vnot VR256X:$src)),
11992            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11993  def : Pat<(v16i16 (vnot VR256X:$src)),
11994            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11995  def : Pat<(v8i32 (vnot VR256X:$src)),
11996            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11997  def : Pat<(v4i64 (vnot VR256X:$src)),
11998            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11999}
12000
12001//===----------------------------------------------------------------------===//
12002// AVX-512 - FixupImm
12003//===----------------------------------------------------------------------===//
12004
12005multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12006                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12007                                  X86VectorVTInfo TblVT>{
12008  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12009      Uses = [MXCSR], mayRaiseFPException = 1 in {
12010    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12011                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12012                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12013                        (X86VFixupimm (_.VT _.RC:$src1),
12014                                      (_.VT _.RC:$src2),
12015                                      (TblVT.VT _.RC:$src3),
12016                                      (i32 timm:$src4))>, Sched<[sched]>;
12017    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12018                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12019                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12020                      (X86VFixupimm (_.VT _.RC:$src1),
12021                                    (_.VT _.RC:$src2),
12022                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12023                                    (i32 timm:$src4))>,
12024                      Sched<[sched.Folded, sched.ReadAfterFold]>;
12025    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12026                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12027                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12028                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
12029                      (X86VFixupimm (_.VT _.RC:$src1),
12030                                    (_.VT _.RC:$src2),
12031                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12032                                    (i32 timm:$src4))>,
12033                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12034  } // Constraints = "$src1 = $dst"
12035}
12036
12037multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12038                                      X86FoldableSchedWrite sched,
12039                                      X86VectorVTInfo _, X86VectorVTInfo TblVT> {
12040let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12041  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12042                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12043                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12044                      "$src2, $src3, {sae}, $src4",
12045                      (X86VFixupimmSAE (_.VT _.RC:$src1),
12046                                       (_.VT _.RC:$src2),
12047                                       (TblVT.VT _.RC:$src3),
12048                                       (i32 timm:$src4))>,
12049                      EVEX_B, Sched<[sched]>;
12050  }
12051}
12052
12053multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12054                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12055                                  X86VectorVTInfo _src3VT> {
12056  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12057      ExeDomain = _.ExeDomain in {
12058    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12059                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12060                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12061                      (X86VFixupimms (_.VT _.RC:$src1),
12062                                     (_.VT _.RC:$src2),
12063                                     (_src3VT.VT _src3VT.RC:$src3),
12064                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12065    let Uses = [MXCSR] in
12066    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12067                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12068                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12069                      "$src2, $src3, {sae}, $src4",
12070                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
12071                                        (_.VT _.RC:$src2),
12072                                        (_src3VT.VT _src3VT.RC:$src3),
12073                                        (i32 timm:$src4))>,
12074                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12075    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12076                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12077                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12078                     (X86VFixupimms (_.VT _.RC:$src1),
12079                                    (_.VT _.RC:$src2),
12080                                    (_src3VT.VT (scalar_to_vector
12081                                              (_src3VT.ScalarLdFrag addr:$src3))),
12082                                    (i32 timm:$src4))>,
12083                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12084  }
12085}
12086
12087multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12088                                      AVX512VLVectorVTInfo _Vec,
12089                                      AVX512VLVectorVTInfo _Tbl> {
12090  let Predicates = [HasAVX512] in
12091    defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", sched.ZMM, _Vec.info512, _Tbl.info512>,
12092                avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12093                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12094                                EVEX, VVVV, EVEX_V512;
12095  let Predicates = [HasAVX512, HasVLX] in {
12096    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12097                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12098                            EVEX, VVVV, EVEX_V128;
12099    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12100                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12101                            EVEX, VVVV, EVEX_V256;
12102  }
12103}
12104
12105defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12106                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12107                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
12108defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12109                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12110                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
12111defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12112                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12113defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12114                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12115
12116// Patterns used to select SSE scalar fp arithmetic instructions from
12117// either:
12118//
12119// (1) a scalar fp operation followed by a blend
12120//
12121// The effect is that the backend no longer emits unnecessary vector
12122// insert instructions immediately after SSE scalar fp instructions
12123// like addss or mulss.
12124//
12125// For example, given the following code:
12126//   __m128 foo(__m128 A, __m128 B) {
12127//     A[0] += B[0];
12128//     return A;
12129//   }
12130//
12131// Previously we generated:
12132//   addss %xmm0, %xmm1
12133//   movss %xmm1, %xmm0
12134//
12135// We now generate:
12136//   addss %xmm1, %xmm0
12137//
12138// (2) a vector packed single/double fp operation followed by a vector insert
12139//
12140// The effect is that the backend converts the packed fp instruction
12141// followed by a vector insert into a single SSE scalar fp instruction.
12142//
12143// For example, given the following code:
12144//   __m128 foo(__m128 A, __m128 B) {
12145//     __m128 C = A + B;
12146//     return (__m128) {c[0], a[1], a[2], a[3]};
12147//   }
12148//
12149// Previously we generated:
12150//   addps %xmm0, %xmm1
12151//   movss %xmm1, %xmm0
12152//
12153// We now generate:
12154//   addss %xmm1, %xmm0
12155
12156// TODO: Some canonicalization in lowering would simplify the number of
12157// patterns we have to try to match.
12158multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12159                                          string OpcPrefix, SDNode MoveNode,
12160                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12161  let Predicates = [HasAVX512] in {
12162    // extracted scalar math op with insert via movss
12163    def : Pat<(MoveNode
12164               (_.VT VR128X:$dst),
12165               (_.VT (scalar_to_vector
12166                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12167                          _.FRC:$src)))),
12168              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12169               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12170    def : Pat<(MoveNode
12171               (_.VT VR128X:$dst),
12172               (_.VT (scalar_to_vector
12173                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12174                          (_.ScalarLdFrag addr:$src))))),
12175              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12176
12177    // extracted masked scalar math op with insert via movss
12178    def : Pat<(MoveNode (_.VT VR128X:$src1),
12179               (scalar_to_vector
12180                (X86selects_mask VK1WM:$mask,
12181                            (MaskedOp (_.EltVT
12182                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12183                                      _.FRC:$src2),
12184                            _.FRC:$src0))),
12185              (!cast<Instruction>("V"#OpcPrefix#"Zrrk_Int")
12186               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12187               VK1WM:$mask, _.VT:$src1,
12188               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12189    def : Pat<(MoveNode (_.VT VR128X:$src1),
12190               (scalar_to_vector
12191                (X86selects_mask VK1WM:$mask,
12192                            (MaskedOp (_.EltVT
12193                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12194                                      (_.ScalarLdFrag addr:$src2)),
12195                            _.FRC:$src0))),
12196              (!cast<Instruction>("V"#OpcPrefix#"Zrmk_Int")
12197               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12198               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12199
12200    // extracted masked scalar math op with insert via movss
12201    def : Pat<(MoveNode (_.VT VR128X:$src1),
12202               (scalar_to_vector
12203                (X86selects_mask VK1WM:$mask,
12204                            (MaskedOp (_.EltVT
12205                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12206                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12207      (!cast<I>("V"#OpcPrefix#"Zrrkz_Int")
12208          VK1WM:$mask, _.VT:$src1,
12209          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12210    def : Pat<(MoveNode (_.VT VR128X:$src1),
12211               (scalar_to_vector
12212                (X86selects_mask VK1WM:$mask,
12213                            (MaskedOp (_.EltVT
12214                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12215                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12216      (!cast<I>("V"#OpcPrefix#"Zrmkz_Int") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12217  }
12218}
12219
12220defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12221defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12222defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12223defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12224
12225defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12226defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12227defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12228defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12229
12230defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12231defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12232defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12233defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12234
12235multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12236                                             SDNode Move, X86VectorVTInfo _> {
12237  let Predicates = [HasAVX512] in {
12238    def : Pat<(_.VT (Move _.VT:$dst,
12239                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12240              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12241  }
12242}
12243
12244defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12245defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12246defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12247
12248//===----------------------------------------------------------------------===//
12249// AES instructions
12250//===----------------------------------------------------------------------===//
12251
12252multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12253  let Predicates = [HasVLX, HasVAES] in {
12254    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12255                                  !cast<Intrinsic>(IntPrefix),
12256                                  loadv2i64, 0, VR128X, i128mem>,
12257                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12258    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12259                                  !cast<Intrinsic>(IntPrefix#"_256"),
12260                                  loadv4i64, 0, VR256X, i256mem>,
12261                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12262    }
12263    let Predicates = [HasAVX512, HasVAES] in
12264    defm Z    : AESI_binop_rm_int<Op, OpStr,
12265                                  !cast<Intrinsic>(IntPrefix#"_512"),
12266                                  loadv8i64, 0, VR512, i512mem>,
12267                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12268}
12269
12270defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12271defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12272defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12273defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12274
12275//===----------------------------------------------------------------------===//
12276// PCLMUL instructions - Carry less multiplication
12277//===----------------------------------------------------------------------===//
12278
12279let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12280defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12281                              EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12282
12283let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12284defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12285                              EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12286
12287defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12288                                int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12289                                EVEX_CD8<64, CD8VF>, WIG;
12290}
12291
12292// Aliases
12293defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12294defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12295defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12296
12297//===----------------------------------------------------------------------===//
12298// VBMI2
12299//===----------------------------------------------------------------------===//
12300
12301multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12302                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12303  let Constraints = "$src1 = $dst",
12304      ExeDomain   = VTI.ExeDomain in {
12305    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12306                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12307                "$src3, $src2", "$src2, $src3",
12308                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12309                T8, PD, EVEX, VVVV, Sched<[sched]>;
12310    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12311                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12312                "$src3, $src2", "$src2, $src3",
12313                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12314                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12315                T8, PD, EVEX, VVVV,
12316                Sched<[sched.Folded, sched.ReadAfterFold]>;
12317  }
12318}
12319
12320multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12321                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12322         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12323  let Constraints = "$src1 = $dst",
12324      ExeDomain   = VTI.ExeDomain in
12325  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12326              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12327              "${src3}"#VTI.BroadcastStr#", $src2",
12328              "$src2, ${src3}"#VTI.BroadcastStr,
12329              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12330               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12331              T8, PD, EVEX, VVVV, EVEX_B,
12332              Sched<[sched.Folded, sched.ReadAfterFold]>;
12333}
12334
12335multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12336                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12337  let Predicates = [HasVBMI2] in
12338  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12339                                   EVEX_V512;
12340  let Predicates = [HasVBMI2, HasVLX] in {
12341    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12342                                   EVEX_V256;
12343    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12344                                   EVEX_V128;
12345  }
12346}
12347
12348multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12349                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12350  let Predicates = [HasVBMI2] in
12351  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12352                                    EVEX_V512;
12353  let Predicates = [HasVBMI2, HasVLX] in {
12354    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12355                                    EVEX_V256;
12356    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12357                                    EVEX_V128;
12358  }
12359}
12360multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12361                           SDNode OpNode, X86SchedWriteWidths sched> {
12362  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12363             avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12364  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12365             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12366  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12367             avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12368}
12369
12370multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12371                           SDNode OpNode, X86SchedWriteWidths sched> {
12372  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12373             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12374             REX_W, EVEX_CD8<16, CD8VF>;
12375  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12376             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12377  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12378             sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12379}
12380
12381// Concat & Shift
12382defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12383defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12384defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12385defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12386
12387// Compress
12388defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12389                                         avx512vl_i8_info, HasVBMI2>, EVEX;
12390defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12391                                          avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12392// Expand
12393defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12394                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12395defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12396                                      avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12397
12398//===----------------------------------------------------------------------===//
12399// VNNI
12400//===----------------------------------------------------------------------===//
12401
12402let Constraints = "$src1 = $dst" in
12403multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12404                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12405                    bit IsCommutable> {
12406  let ExeDomain = VTI.ExeDomain in {
12407  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12408                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12409                                   "$src3, $src2", "$src2, $src3",
12410                                   (VTI.VT (OpNode VTI.RC:$src1,
12411                                            VTI.RC:$src2, VTI.RC:$src3)),
12412                                   IsCommutable, IsCommutable>,
12413                                   EVEX, VVVV, T8, Sched<[sched]>;
12414  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12415                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12416                                   "$src3, $src2", "$src2, $src3",
12417                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12418                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12419                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8,
12420                                   Sched<[sched.Folded, sched.ReadAfterFold,
12421                                          sched.ReadAfterFold]>;
12422  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12423                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12424                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12425                                   "$src2, ${src3}"#VTI.BroadcastStr,
12426                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12427                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12428                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12429                                   T8, Sched<[sched.Folded, sched.ReadAfterFold,
12430                                                sched.ReadAfterFold]>;
12431  }
12432}
12433
12434multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12435                       X86SchedWriteWidths sched, bit IsCommutable,
12436                       list<Predicate> prds, list<Predicate> prds512> {
12437  let Predicates = prds512 in
12438  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12439                           IsCommutable>, EVEX_V512;
12440  let Predicates = prds in {
12441    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12442                           IsCommutable>, EVEX_V256;
12443    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12444                           IsCommutable>, EVEX_V128;
12445  }
12446}
12447
12448// FIXME: Is there a better scheduler class for VPDP?
12449defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0,
12450                              [HasVNNI, HasVLX], [HasVNNI]>, PD;
12451defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0,
12452                              [HasVNNI, HasVLX], [HasVNNI]>, PD;
12453defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1,
12454                              [HasVNNI, HasVLX], [HasVNNI]>, PD;
12455defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1,
12456                              [HasVNNI, HasVLX], [HasVNNI]>, PD;
12457
12458// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12459let Predicates = [HasVNNI] in {
12460  def : Pat<(v16i32 (add VR512:$src1,
12461                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12462            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12463  def : Pat<(v16i32 (add VR512:$src1,
12464                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12465            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12466}
12467let Predicates = [HasVNNI,HasVLX] in {
12468  def : Pat<(v8i32 (add VR256X:$src1,
12469                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12470            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12471  def : Pat<(v8i32 (add VR256X:$src1,
12472                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12473            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12474  def : Pat<(v4i32 (add VR128X:$src1,
12475                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12476            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12477  def : Pat<(v4i32 (add VR128X:$src1,
12478                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12479            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12480}
12481
12482//===----------------------------------------------------------------------===//
12483// Bit Algorithms
12484//===----------------------------------------------------------------------===//
12485
12486// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12487defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12488                                   avx512vl_i8_info, HasBITALG>;
12489defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12490                                   avx512vl_i16_info, HasBITALG>, REX_W;
12491
12492defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12493defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12494
12495multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12496  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12497                                (ins VTI.RC:$src1, VTI.RC:$src2),
12498                                "vpshufbitqmb",
12499                                "$src2, $src1", "$src1, $src2",
12500                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12501                                (VTI.VT VTI.RC:$src2)),
12502                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12503                                (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12504                                Sched<[sched]>;
12505  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12506                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12507                                "vpshufbitqmb",
12508                                "$src2, $src1", "$src1, $src2",
12509                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12510                                (VTI.VT (VTI.LdFrag addr:$src2))),
12511                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12512                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12513                                EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12514                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12515}
12516
12517multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12518  let Predicates = [HasBITALG] in
12519  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12520  let Predicates = [HasBITALG, HasVLX] in {
12521    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12522    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12523  }
12524}
12525
12526// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12527defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12528
12529//===----------------------------------------------------------------------===//
12530// GFNI
12531//===----------------------------------------------------------------------===//
12532
12533multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12534                                   X86SchedWriteWidths sched> {
12535  let Predicates = [HasGFNI, HasAVX512] in
12536  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12537                                EVEX_V512;
12538  let Predicates = [HasGFNI, HasVLX] in {
12539    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12540                                EVEX_V256;
12541    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12542                                EVEX_V128;
12543  }
12544}
12545
12546defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12547                                          SchedWriteVecALU>,
12548                                          EVEX_CD8<8, CD8VF>, T8;
12549
12550multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12551                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12552                                      X86VectorVTInfo BcstVTI>
12553           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12554  let ExeDomain = VTI.ExeDomain in
12555  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12556                (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
12557                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12558                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12559                (OpNode (VTI.VT VTI.RC:$src1),
12560                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12561                 (i8 timm:$src3))>, EVEX_B,
12562                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12563}
12564
12565multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12566                                     X86SchedWriteWidths sched> {
12567  let Predicates = [HasGFNI, HasAVX512] in
12568  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12569                                           v64i8_info, v8i64_info>, EVEX_V512;
12570  let Predicates = [HasGFNI, HasVLX] in {
12571    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12572                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12573    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12574                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12575  }
12576}
12577
12578defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12579                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12580                         EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W, AVX512AIi8Base;
12581defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12582                         X86GF2P8affineqb, SchedWriteVecIMul>,
12583                         EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W, AVX512AIi8Base;
12584
12585
12586//===----------------------------------------------------------------------===//
12587// AVX5124FMAPS
12588//===----------------------------------------------------------------------===//
12589
12590let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12591    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12592defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12593                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12594                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12595                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12596                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12597
12598defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12599                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12600                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12601                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12602                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12603
12604defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12605                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12606                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12607                    []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12608                    Sched<[SchedWriteFMA.Scl.Folded]>;
12609
12610defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12611                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12612                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12613                     []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12614                     Sched<[SchedWriteFMA.Scl.Folded]>;
12615}
12616
12617//===----------------------------------------------------------------------===//
12618// AVX5124VNNIW
12619//===----------------------------------------------------------------------===//
12620
12621let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12622    Constraints = "$src1 = $dst" in {
12623defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12624                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12625                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12626                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12627                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12628
12629defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12630                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12631                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12632                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12633                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12634}
12635
12636let hasSideEffects = 0 in {
12637  let mayStore = 1, SchedRW = [WriteFStoreX] in
12638  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12639  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12640  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12641}
12642
12643//===----------------------------------------------------------------------===//
12644// VP2INTERSECT
12645//===----------------------------------------------------------------------===//
12646
12647multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12648  def rr : I<0x68, MRMSrcReg,
12649                  (outs _.KRPC:$dst),
12650                  (ins _.RC:$src1, _.RC:$src2),
12651                  !strconcat("vp2intersect", _.Suffix,
12652                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12653                  [(set _.KRPC:$dst, (X86vp2intersect
12654                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12655                  EVEX, VVVV, T8, XD, Sched<[sched]>;
12656
12657  def rm : I<0x68, MRMSrcMem,
12658                  (outs _.KRPC:$dst),
12659                  (ins  _.RC:$src1, _.MemOp:$src2),
12660                  !strconcat("vp2intersect", _.Suffix,
12661                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12662                  [(set _.KRPC:$dst, (X86vp2intersect
12663                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12664                  EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12665                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12666
12667  def rmb : I<0x68, MRMSrcMem,
12668                  (outs _.KRPC:$dst),
12669                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12670                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12671                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12672                  [(set _.KRPC:$dst, (X86vp2intersect
12673                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12674                  EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12675                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12676}
12677
12678multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12679  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12680    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12681
12682  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12683    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12684    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12685  }
12686}
12687
12688let ExeDomain = SSEPackedInt in {
12689defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12690defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12691}
12692
12693let ExeDomain = SSEPackedSingle in
12694defm VCVTNE2PS2BF16 : avx512_binop_all<0x72, "vcvtne2ps2bf16",
12695                                       SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12696                                       avx512vl_f32_info, avx512vl_bf16_info,
12697                                       X86vfpround2, [HasBF16], [HasVLX, HasBF16]>, T8, XD,
12698                                       EVEX_CD8<32, CD8VF>;
12699
12700// Truncate Float to BFloat16, Float16 to BF8/HF8[,S]
12701multiclass avx512_cvt_trunc_ne<bits<8> opc, string OpcodeStr,
12702                             AVX512VLVectorVTInfo vt_dst,
12703                             AVX512VLVectorVTInfo vt_src,
12704                             X86SchedWriteWidths sched,
12705                             SDPatternOperator OpNode,
12706                             SDPatternOperator MaskOpNode,
12707                             list<Predicate> prds, list<Predicate> prds512,
12708                             PatFrag bcast128 = vt_src.info128.BroadcastLdFrag,
12709                             PatFrag loadVT128 = vt_src.info128.LdFrag,
12710                             RegisterClass maskRC128 = vt_src.info128.KRCWM> {
12711  let ExeDomain = SSEPackedSingle in {
12712  let Predicates = prds512, Uses = []<Register>, mayRaiseFPException = 0 in {
12713    defm Z : avx512_vcvt_fp<opc, OpcodeStr, vt_dst.info256, vt_src.info512,
12714                            OpNode, OpNode, sched.ZMM>, EVEX_V512;
12715  }
12716  let Predicates = prds in {
12717    let Uses = []<Register>, mayRaiseFPException = 0 in {
12718    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, vt_dst.info128, vt_src.info128,
12719                               null_frag, null_frag, sched.XMM, vt_src.info128.BroadcastStr, "{x}", f128mem,
12720                               vt_src.info128.KRCWM>, EVEX_V128;
12721    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, vt_dst.info128, vt_src.info256,
12722                               OpNode, OpNode,
12723                               sched.YMM, vt_src.info256.BroadcastStr, "{y}">, EVEX_V256;
12724    }
12725  } // Predicates = prds
12726  } // ExeDomain = SSEPackedSingle
12727
12728  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12729                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12730                  VR128X:$src), 0>;
12731  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12732                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12733                  f128mem:$src), 0, "intel">;
12734  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12735                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12736                  VR256X:$src), 0>;
12737  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12738                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12739                  f256mem:$src), 0, "intel">;
12740
12741  let Predicates = prds in {
12742    // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction
12743    // patterns have been disabled with null_frag.
12744    def : Pat<(vt_dst.info128.VT (OpNode (vt_src.info128.VT VR128X:$src))),
12745              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
12746    def : Pat<(MaskOpNode (vt_src.info128.VT VR128X:$src), (vt_dst.info128.VT VR128X:$src0),
12747                           maskRC128:$mask),
12748              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
12749    def : Pat<(MaskOpNode (vt_src.info128.VT VR128X:$src), vt_dst.info128.ImmAllZerosV,
12750                           maskRC128:$mask),
12751              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
12752
12753    def : Pat<(vt_dst.info128.VT (OpNode (loadVT128 addr:$src))),
12754              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
12755    def : Pat<(MaskOpNode (loadVT128 addr:$src), (vt_dst.info128.VT VR128X:$src0),
12756                           maskRC128:$mask),
12757              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
12758    def : Pat<(MaskOpNode (loadVT128 addr:$src), vt_dst.info128.ImmAllZerosV,
12759                           maskRC128:$mask),
12760              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
12761
12762    def : Pat<(vt_dst.info128.VT (OpNode (vt_src.info128.VT (bcast128 addr:$src)))),
12763              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
12764    def : Pat<(MaskOpNode (vt_src.info128.VT (bcast128 addr:$src)),
12765                            (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask),
12766              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
12767    def : Pat<(MaskOpNode (vt_src.info128.VT (bcast128 addr:$src)),
12768                            vt_dst.info128.ImmAllZerosV, maskRC128:$mask),
12769              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
12770  }
12771}
12772
12773defm VCVTNEPS2BF16 : avx512_cvt_trunc_ne<0x72, "vcvtneps2bf16",
12774                                         avx512vl_bf16_info, avx512vl_f32_info,
12775                                         SchedWriteCvtPD2PS, X86cvtneps2bf16,
12776                                         X86mcvtneps2bf16, [HasBF16, HasVLX],
12777                                         [HasBF16]>, T8, XS, EVEX_CD8<32, CD8VF>;
12778
12779let Predicates = [HasBF16, HasVLX] in {
12780  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12781            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12782  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12783            (VCVTNEPS2BF16Z128rm addr:$src)>;
12784
12785  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12786            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12787  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12788            (VCVTNEPS2BF16Z256rm addr:$src)>;
12789
12790  def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12791            (VPBROADCASTWZ128rm addr:$src)>;
12792  def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12793            (VPBROADCASTWZ256rm addr:$src)>;
12794
12795  def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12796            (VPBROADCASTWZ128rr VR128X:$src)>;
12797  def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12798            (VPBROADCASTWZ256rr VR128X:$src)>;
12799
12800  def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12801            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12802  def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12803            (VCVTNEPS2BF16Z256rm addr:$src)>;
12804
12805  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12806}
12807
12808let Predicates = [HasBF16] in {
12809  def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12810            (VPBROADCASTWZrm addr:$src)>;
12811
12812  def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12813            (VPBROADCASTWZrr VR128X:$src)>;
12814
12815  def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12816            (VCVTNEPS2BF16Zrr VR512:$src)>;
12817  def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12818            (VCVTNEPS2BF16Zrm addr:$src)>;
12819  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12820}
12821
12822let Constraints = "$src1 = $dst" in {
12823multiclass avx512_dpf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12824                             X86FoldableSchedWrite sched,
12825                             X86VectorVTInfo _, X86VectorVTInfo src_v> {
12826  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12827                           (ins src_v.RC:$src2, src_v.RC:$src3),
12828                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12829                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12830                           EVEX, VVVV, Sched<[sched]>;
12831
12832  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12833                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12834                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12835                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12836                               (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12837                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12838
12839  let mayLoad = 1, hasSideEffects = 0 in
12840  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12841                  (ins src_v.RC:$src2, f32mem:$src3),
12842                  OpcodeStr,
12843                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12844                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12845                  (null_frag)>,
12846                  EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12847
12848}
12849} // Constraints = "$src1 = $dst"
12850
12851multiclass avx512_dpf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12852                                AVX512VLVectorVTInfo _, list<Predicate> prds,
12853                                list<Predicate> prds512> {
12854  let Predicates = prds512 in {
12855    defm Z    : avx512_dpf16ps_rm<opc, OpcodeStr, OpNode, WriteFMAZ,
12856                                  avx512vl_f32_info.info512, _.info512>, EVEX_V512;
12857  }
12858  let Predicates = prds in {
12859    defm Z256 : avx512_dpf16ps_rm<opc, OpcodeStr, OpNode, WriteFMAY,
12860                                  v8f32x_info, _.info256>, EVEX_V256;
12861    defm Z128 : avx512_dpf16ps_rm<opc, OpcodeStr, OpNode, WriteFMAX,
12862                                  v4f32x_info, _.info128>, EVEX_V128;
12863  }
12864}
12865
12866let ExeDomain = SSEPackedSingle in
12867defm VDPBF16PS : avx512_dpf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, avx512vl_bf16_info,
12868                                      [HasVLX, HasBF16], [HasBF16]>,
12869                      T8, XS, EVEX_CD8<32, CD8VF>;
12870
12871//===----------------------------------------------------------------------===//
12872// AVX512FP16
12873//===----------------------------------------------------------------------===//
12874
12875let Predicates = [HasFP16] in {
12876// Move word ( r/m16) to Packed word
12877def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12878                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12879def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12880                      "vmovw\t{$src, $dst|$dst, $src}",
12881                      [(set VR128X:$dst,
12882                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12883                      T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12884
12885def : Pat<(f16 (bitconvert GR16:$src)),
12886          (f16 (COPY_TO_REGCLASS
12887                (VMOVW2SHrr
12888                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12889                FR16X))>;
12890def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12891          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12892def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12893          (VMOVW2SHrr GR32:$src)>;
12894// FIXME: We should really find a way to improve these patterns.
12895def : Pat<(v8i32 (X86vzmovl
12896                  (insert_subvector undef,
12897                                    (v4i32 (scalar_to_vector
12898                                            (and GR32:$src, 0xffff))),
12899                                    (iPTR 0)))),
12900          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12901def : Pat<(v16i32 (X86vzmovl
12902                   (insert_subvector undef,
12903                                     (v4i32 (scalar_to_vector
12904                                             (and GR32:$src, 0xffff))),
12905                                     (iPTR 0)))),
12906          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12907
12908def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12909          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12910
12911// AVX 128-bit movw instruction write zeros in the high 128-bit part.
12912def : Pat<(v8i16 (X86vzload16 addr:$src)),
12913          (VMOVWrm addr:$src)>;
12914def : Pat<(v16i16 (X86vzload16 addr:$src)),
12915          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12916
12917// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12918def : Pat<(v32i16 (X86vzload16 addr:$src)),
12919          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12920
12921def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12922          (VMOVWrm addr:$src)>;
12923def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12924          (VMOVWrm addr:$src)>;
12925def : Pat<(v8i32 (X86vzmovl
12926                  (insert_subvector undef,
12927                                    (v4i32 (scalar_to_vector
12928                                            (i32 (zextloadi16 addr:$src)))),
12929                                    (iPTR 0)))),
12930          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12931def : Pat<(v16i32 (X86vzmovl
12932                   (insert_subvector undef,
12933                                     (v4i32 (scalar_to_vector
12934                                             (i32 (zextloadi16 addr:$src)))),
12935                                     (iPTR 0)))),
12936          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12937
12938// Move word from xmm register to r/m16
12939def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12940                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12941def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12942                       (ins i16mem:$dst, VR128X:$src),
12943                       "vmovw\t{$src, $dst|$dst, $src}",
12944                       [(store (i16 (extractelt (v8i16 VR128X:$src),
12945                                     (iPTR 0))), addr:$dst)]>,
12946                       T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12947
12948def : Pat<(i16 (bitconvert FR16X:$src)),
12949          (i16 (EXTRACT_SUBREG
12950                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12951                sub_16bit))>;
12952def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12953          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12954
12955// Allow "vmovw" to use GR64
12956let hasSideEffects = 0 in {
12957  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12958                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12959  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12960                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12961}
12962}
12963
12964// Convert 16-bit float to i16/u16
12965multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12966                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12967                          AVX512VLVectorVTInfo _Dst,
12968                          AVX512VLVectorVTInfo _Src,
12969                          X86SchedWriteWidths sched> {
12970  let Predicates = [HasFP16] in {
12971    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12972                            OpNode, MaskOpNode, sched.ZMM>,
12973             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12974                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12975  }
12976  let Predicates = [HasFP16, HasVLX] in {
12977    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12978                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12979    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12980                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12981  }
12982}
12983
12984// Convert 16-bit float to i16/u16 truncate
12985multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12986                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12987                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12988                           X86SchedWriteWidths sched> {
12989  let Predicates = [HasFP16] in {
12990    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12991                            OpNode, MaskOpNode, sched.ZMM>,
12992             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12993                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12994  }
12995  let Predicates = [HasFP16, HasVLX] in {
12996    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12997                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12998    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12999                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13000  }
13001}
13002
13003defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13004                                X86cvtp2UIntRnd, avx512vl_i16_info,
13005                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13006                                T_MAP5, EVEX_CD8<16, CD8VF>;
13007defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13008                                X86VUintToFpRnd, avx512vl_f16_info,
13009                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13010                                T_MAP5, XD, EVEX_CD8<16, CD8VF>;
13011defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13012                                X86cvttp2si, X86cvttp2siSAE,
13013                                avx512vl_i16_info, avx512vl_f16_info,
13014                                SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13015defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13016                                X86cvttp2ui, X86cvttp2uiSAE,
13017                                avx512vl_i16_info, avx512vl_f16_info,
13018                                SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
13019defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13020                                X86cvtp2IntRnd, avx512vl_i16_info,
13021                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13022                                T_MAP5, PD, EVEX_CD8<16, CD8VF>;
13023defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13024                                X86VSintToFpRnd, avx512vl_f16_info,
13025                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13026                                T_MAP5, XS, EVEX_CD8<16, CD8VF>;
13027
13028// Convert Half to Signed/Unsigned Doubleword
13029multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13030                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13031                           X86SchedWriteWidths sched> {
13032  let Predicates = [HasFP16] in {
13033    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13034                            MaskOpNode, sched.ZMM>,
13035             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13036                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13037  }
13038  let Predicates = [HasFP16, HasVLX] in {
13039    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13040                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13041    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13042                               MaskOpNode, sched.YMM>, EVEX_V256;
13043  }
13044}
13045
13046// Convert Half to Signed/Unsigned Doubleword with truncation
13047multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13048                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13049                            X86SchedWriteWidths sched> {
13050  let Predicates = [HasFP16] in {
13051    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13052                            MaskOpNode, sched.ZMM>,
13053             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13054                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13055  }
13056  let Predicates = [HasFP16, HasVLX] in {
13057    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13058                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13059    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13060                               MaskOpNode, sched.YMM>, EVEX_V256;
13061  }
13062}
13063
13064
13065defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13066                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13067                                 EVEX_CD8<16, CD8VH>;
13068defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13069                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
13070                                 EVEX_CD8<16, CD8VH>;
13071
13072defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13073                                X86cvttp2si, X86cvttp2siSAE,
13074                                SchedWriteCvtPS2DQ>, T_MAP5, XS,
13075                                EVEX_CD8<16, CD8VH>;
13076
13077defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13078                                 X86cvttp2ui, X86cvttp2uiSAE,
13079                                 SchedWriteCvtPS2DQ>, T_MAP5,
13080                                 EVEX_CD8<16, CD8VH>;
13081
13082// Convert Half to Signed/Unsigned Quardword
13083multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13084                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13085                           X86SchedWriteWidths sched> {
13086  let Predicates = [HasFP16] in {
13087    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13088                            MaskOpNode, sched.ZMM>,
13089             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13090                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13091  }
13092  let Predicates = [HasFP16, HasVLX] in {
13093    // Explicitly specified broadcast string, since we take only 2 elements
13094    // from v8f16x_info source
13095    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13096                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13097                               EVEX_V128;
13098    // Explicitly specified broadcast string, since we take only 4 elements
13099    // from v8f16x_info source
13100    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13101                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13102                               EVEX_V256;
13103  }
13104}
13105
13106// Convert Half to Signed/Unsigned Quardword with truncation
13107multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13108                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13109                            X86SchedWriteWidths sched> {
13110  let Predicates = [HasFP16] in {
13111    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13112                            MaskOpNode, sched.ZMM>,
13113             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13114                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13115  }
13116  let Predicates = [HasFP16, HasVLX] in {
13117    // Explicitly specified broadcast string, since we take only 2 elements
13118    // from v8f16x_info source
13119    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13120                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13121    // Explicitly specified broadcast string, since we take only 4 elements
13122    // from v8f16x_info source
13123    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13124                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13125  }
13126}
13127
13128defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13129                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13130                                 EVEX_CD8<16, CD8VQ>;
13131
13132defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13133                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13134                                 EVEX_CD8<16, CD8VQ>;
13135
13136defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13137                                 X86cvttp2si, X86cvttp2siSAE,
13138                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13139                                 EVEX_CD8<16, CD8VQ>;
13140
13141defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13142                                 X86cvttp2ui, X86cvttp2uiSAE,
13143                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13144                                 EVEX_CD8<16, CD8VQ>;
13145
13146// Convert Signed/Unsigned Quardword to Half
13147multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13148                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13149                           X86SchedWriteWidths sched> {
13150  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13151  // 512 memory forms of these instructions in Asm Parcer. They have the same
13152  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13153  // due to the same reason.
13154  let Predicates = [HasFP16] in {
13155    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13156                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13157             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13158                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13159  }
13160  let Predicates = [HasFP16, HasVLX] in {
13161    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13162                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13163                               i128mem, VK2WM>, EVEX_V128;
13164    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13165                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13166                               i256mem, VK4WM>, EVEX_V256;
13167  }
13168
13169  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13170                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13171                  VR128X:$src), 0, "att">;
13172  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13173                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13174                  VK2WM:$mask, VR128X:$src), 0, "att">;
13175  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13176                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13177                  VK2WM:$mask, VR128X:$src), 0, "att">;
13178  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13179                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13180                  i64mem:$src), 0, "att">;
13181  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13182                  "$dst {${mask}}, ${src}{1to2}}",
13183                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13184                  VK2WM:$mask, i64mem:$src), 0, "att">;
13185  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13186                  "$dst {${mask}} {z}, ${src}{1to2}}",
13187                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13188                  VK2WM:$mask, i64mem:$src), 0, "att">;
13189
13190  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13191                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13192                  VR256X:$src), 0, "att">;
13193  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13194                  "$dst {${mask}}, $src}",
13195                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13196                  VK4WM:$mask, VR256X:$src), 0, "att">;
13197  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13198                  "$dst {${mask}} {z}, $src}",
13199                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13200                  VK4WM:$mask, VR256X:$src), 0, "att">;
13201  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13202                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13203                  i64mem:$src), 0, "att">;
13204  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13205                  "$dst {${mask}}, ${src}{1to4}}",
13206                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13207                  VK4WM:$mask, i64mem:$src), 0, "att">;
13208  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13209                  "$dst {${mask}} {z}, ${src}{1to4}}",
13210                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13211                  VK4WM:$mask, i64mem:$src), 0, "att">;
13212
13213  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13214                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13215                  VR512:$src), 0, "att">;
13216  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13217                  "$dst {${mask}}, $src}",
13218                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13219                  VK8WM:$mask, VR512:$src), 0, "att">;
13220  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13221                  "$dst {${mask}} {z}, $src}",
13222                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13223                  VK8WM:$mask, VR512:$src), 0, "att">;
13224  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13225                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13226                  i64mem:$src), 0, "att">;
13227  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13228                  "$dst {${mask}}, ${src}{1to8}}",
13229                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13230                  VK8WM:$mask, i64mem:$src), 0, "att">;
13231  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13232                  "$dst {${mask}} {z}, ${src}{1to8}}",
13233                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13234                  VK8WM:$mask, i64mem:$src), 0, "att">;
13235}
13236
13237defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13238                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13239                            EVEX_CD8<64, CD8VF>;
13240
13241defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13242                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13243                            EVEX_CD8<64, CD8VF>;
13244
13245// Convert half to signed/unsigned int 32/64
13246defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13247                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13248                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13249defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13250                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13251                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13252defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13253                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13254                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13255defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13256                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13257                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13258
13259defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13260                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13261                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13262defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13263                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13264                        "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13265defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13266                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13267                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13268defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13269                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13270                        "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13271
13272let Predicates = [HasFP16] in {
13273  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13274                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13275                                   T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13276  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13277                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13278                                   T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13279  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13280                                    v8f16x_info, i32mem, loadi32,
13281                                    "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13282  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13283                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13284                                    T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13285  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13286              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13287
13288  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13289              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13290
13291
13292  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13293            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13294  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13295            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13296
13297  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13298            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13299  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13300            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13301
13302  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13303            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13304  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13305            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13306
13307  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13308            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13309  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13310            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13311
13312  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13313  // which produce unnecessary vmovsh instructions
13314  def : Pat<(v8f16 (X86Movsh
13315                     (v8f16 VR128X:$dst),
13316                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13317            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13318
13319  def : Pat<(v8f16 (X86Movsh
13320                     (v8f16 VR128X:$dst),
13321                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13322            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13323
13324  def : Pat<(v8f16 (X86Movsh
13325                     (v8f16 VR128X:$dst),
13326                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13327            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13328
13329  def : Pat<(v8f16 (X86Movsh
13330                     (v8f16 VR128X:$dst),
13331                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13332            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13333
13334  def : Pat<(v8f16 (X86Movsh
13335                     (v8f16 VR128X:$dst),
13336                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13337            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13338
13339  def : Pat<(v8f16 (X86Movsh
13340                     (v8f16 VR128X:$dst),
13341                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13342            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13343
13344  def : Pat<(v8f16 (X86Movsh
13345                     (v8f16 VR128X:$dst),
13346                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13347            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13348
13349  def : Pat<(v8f16 (X86Movsh
13350                     (v8f16 VR128X:$dst),
13351                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13352            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13353} // Predicates = [HasFP16]
13354
13355let Predicates = [HasFP16, HasVLX] in {
13356  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13357  // patterns have been disabled with null_frag.
13358  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13359            (VCVTQQ2PHZ256rr VR256X:$src)>;
13360  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13361                           VK4WM:$mask),
13362            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13363  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13364                           VK4WM:$mask),
13365            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13366
13367  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13368            (VCVTQQ2PHZ256rm addr:$src)>;
13369  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13370                           VK4WM:$mask),
13371            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13372  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13373                           VK4WM:$mask),
13374            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13375
13376  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13377            (VCVTQQ2PHZ256rmb addr:$src)>;
13378  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13379                           (v8f16 VR128X:$src0), VK4WM:$mask),
13380            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13381  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13382                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13383            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13384
13385  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13386            (VCVTQQ2PHZ128rr VR128X:$src)>;
13387  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13388                           VK2WM:$mask),
13389            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13390  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13391                           VK2WM:$mask),
13392            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13393
13394  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13395            (VCVTQQ2PHZ128rm addr:$src)>;
13396  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13397                           VK2WM:$mask),
13398            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13399  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13400                           VK2WM:$mask),
13401            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13402
13403  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13404            (VCVTQQ2PHZ128rmb addr:$src)>;
13405  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13406                           (v8f16 VR128X:$src0), VK2WM:$mask),
13407            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13408  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13409                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13410            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13411
13412  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13413  // patterns have been disabled with null_frag.
13414  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13415            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13416  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13417                           VK4WM:$mask),
13418            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13419  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13420                           VK4WM:$mask),
13421            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13422
13423  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13424            (VCVTUQQ2PHZ256rm addr:$src)>;
13425  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13426                           VK4WM:$mask),
13427            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13428  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13429                           VK4WM:$mask),
13430            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13431
13432  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13433            (VCVTUQQ2PHZ256rmb addr:$src)>;
13434  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13435                           (v8f16 VR128X:$src0), VK4WM:$mask),
13436            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13437  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13438                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13439            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13440
13441  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13442            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13443  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13444                           VK2WM:$mask),
13445            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13446  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13447                           VK2WM:$mask),
13448            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13449
13450  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13451            (VCVTUQQ2PHZ128rm addr:$src)>;
13452  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13453                           VK2WM:$mask),
13454            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13455  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13456                           VK2WM:$mask),
13457            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13458
13459  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13460            (VCVTUQQ2PHZ128rmb addr:$src)>;
13461  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13462                           (v8f16 VR128X:$src0), VK2WM:$mask),
13463            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13464  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13465                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13466            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13467}
13468
13469let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13470  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13471    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13472            (ins _.RC:$src2, _.RC:$src3),
13473            OpcodeStr, "$src3, $src2", "$src2, $src3",
13474            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13475
13476    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13477            (ins _.RC:$src2, _.MemOp:$src3),
13478            OpcodeStr, "$src3, $src2", "$src2, $src3",
13479            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13480
13481    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13482            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13483            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13484            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13485  }
13486} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13487
13488multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13489                                 X86VectorVTInfo _> {
13490  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13491  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13492          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13493          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13494          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13495          EVEX, VVVV, EVEX_B, EVEX_RC;
13496}
13497
13498
13499multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13500  let Predicates = [HasFP16] in {
13501    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13502                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13503                      EVEX_V512, Sched<[WriteFMAZ]>;
13504  }
13505  let Predicates = [HasVLX, HasFP16] in {
13506    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13507    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13508  }
13509}
13510
13511multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13512                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13513  let Predicates = [HasFP16] in {
13514    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13515                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13516                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13517                                       "", "@earlyclobber $dst">, EVEX_V512;
13518  }
13519  let Predicates = [HasVLX, HasFP16] in {
13520    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13521                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13522    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13523                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13524  }
13525}
13526
13527
13528let Uses = [MXCSR] in {
13529  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13530                                    T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13531  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13532                                    T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13533
13534  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13535                                         x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13536  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13537                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13538}
13539
13540
13541multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13542                                   bit IsCommutable> {
13543  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13544    defm r : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13545                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13546                        "$src3, $src2", "$src2, $src3",
13547                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13548                        Sched<[WriteFMAX]>;
13549    defm m : AVX512_maskable_3src_scalar<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13550                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13551                        "$src3, $src2", "$src2, $src3",
13552                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13553                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13554    defm rb : AVX512_maskable_3src_scalar<opc,  MRMSrcReg, f32x_info, (outs VR128X:$dst),
13555                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13556                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13557                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13558                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13559  }
13560}
13561
13562multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13563                                     SDNode OpNodeRnd, bit IsCommutable> {
13564  let Predicates = [HasFP16] in {
13565    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13566                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13567                        "$src2, $src1", "$src1, $src2",
13568                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13569                        IsCommutable, IsCommutable, IsCommutable,
13570                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13571    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13572                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13573                        "$src2, $src1", "$src1, $src2",
13574                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13575                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13576                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13577    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13578                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13579                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13580                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13581                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13582                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13583  }
13584}
13585
13586let Uses = [MXCSR] in {
13587  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13588                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13589  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13590                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13591
13592  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13593                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13594  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13595                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13596}
13597