1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// This multiclass generates the masking variants from the non-masking 16// variant. It only provides the assembly pieces for the masking variants. 17// It assumes custom ISel patterns for masking which can be provided as 18// template arguments. 19multiclass AVX512_maskable_custom<bits<8> O, Format F, 20 dag Outs, 21 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 22 string OpcodeStr, 23 string AttSrcAsm, string IntelSrcAsm, 24 list<dag> Pattern, 25 list<dag> MaskingPattern, 26 list<dag> ZeroMaskingPattern, 27 string MaskingConstraint = "", 28 bit IsCommutable = 0, 29 bit IsKCommutable = 0, 30 bit IsKZCommutable = IsCommutable, 31 string ClobberConstraint = "", 32 string Suffix = ""> { 33 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 34 def Suffix: AVX512<O, F, Outs, Ins, 35 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 36 "$dst, "#IntelSrcAsm#"}", 37 Pattern>; 38 39 // Prefer over VMOV*rrk Pat<> 40 let isCommutable = IsKCommutable in 41 def k#Suffix: AVX512<O, F, Outs, MaskingIns, 42 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 43 "$dst {${mask}}, "#IntelSrcAsm#"}", 44 MaskingPattern>, 45 EVEX_K { 46 // In case of the 3src subclass this is overridden with a let. 47 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 48 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 49 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 50 } 51 52 // Zero mask does not add any restrictions to commute operands transformation. 53 // So, it is Ok to use IsCommutable instead of IsKCommutable. 54 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 55 Constraints = ClobberConstraint in 56 def kz#Suffix: AVX512<O, F, Outs, ZeroMaskingIns, 57 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 58 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 59 ZeroMaskingPattern>, 60 EVEX_KZ; 61} 62 63 64// Common base class of AVX512_maskable and AVX512_maskable_3src. 65multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 66 dag Outs, 67 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 68 string OpcodeStr, 69 string AttSrcAsm, string IntelSrcAsm, 70 dag RHS, dag MaskingRHS, 71 SDPatternOperator Select = vselect_mask, 72 string MaskingConstraint = "", 73 bit IsCommutable = 0, 74 bit IsKCommutable = 0, 75 bit IsKZCommutable = IsCommutable, 76 string ClobberConstraint = "", 77 string Suffix = ""> : 78 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 79 AttSrcAsm, IntelSrcAsm, 80 [(set _.RC:$dst, RHS)], 81 [(set _.RC:$dst, MaskingRHS)], 82 [(set _.RC:$dst, 83 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 84 MaskingConstraint, IsCommutable, 85 IsKCommutable, IsKZCommutable, ClobberConstraint, 86 Suffix>; 87 88// This multiclass generates the unconditional/non-masking, the masking and 89// the zero-masking variant of the vector instruction. In the masking case, the 90// preserved vector elements come from a new dummy input operand tied to $dst. 91// This version uses a separate dag for non-masking and masking. 92multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 93 dag Outs, dag Ins, string OpcodeStr, 94 string AttSrcAsm, string IntelSrcAsm, 95 dag RHS, dag MaskRHS, 96 string ClobberConstraint = "", 97 bit IsCommutable = 0, bit IsKCommutable = 0, 98 bit IsKZCommutable = IsCommutable> : 99 AVX512_maskable_custom<O, F, Outs, Ins, 100 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 101 !con((ins _.KRCWM:$mask), Ins), 102 OpcodeStr, AttSrcAsm, IntelSrcAsm, 103 [(set _.RC:$dst, RHS)], 104 [(set _.RC:$dst, 105 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 106 [(set _.RC:$dst, 107 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 108 "$src0 = $dst", IsCommutable, IsKCommutable, 109 IsKZCommutable, ClobberConstraint>; 110 111// This multiclass generates the unconditional/non-masking, the masking and 112// the zero-masking variant of the vector instruction. In the masking case, the 113// preserved vector elements come from a new dummy input operand tied to $dst. 114multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 115 dag Outs, dag Ins, string OpcodeStr, 116 string AttSrcAsm, string IntelSrcAsm, 117 dag RHS, 118 bit IsCommutable = 0, bit IsKCommutable = 0, 119 bit IsKZCommutable = IsCommutable, 120 SDPatternOperator Select = vselect_mask, 121 string ClobberConstraint = "", 122 string Suffix = ""> : 123 AVX512_maskable_common<O, F, _, Outs, Ins, 124 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 125 !con((ins _.KRCWM:$mask), Ins), 126 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 127 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 128 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 129 IsKZCommutable, ClobberConstraint, Suffix>; 130 131// This multiclass generates the unconditional/non-masking, the masking and 132// the zero-masking variant of the scalar instruction. 133multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 134 dag Outs, dag Ins, string OpcodeStr, 135 string AttSrcAsm, string IntelSrcAsm, 136 dag RHS, string Suffix = ""> : 137 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 138 RHS, 0, 0, 0, X86selects_mask, "", Suffix>; 139 140// Similar to AVX512_maskable but in this case one of the source operands 141// ($src1) is already tied to $dst so we just use that for the preserved 142// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 143// $src1. 144multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 145 dag Outs, dag NonTiedIns, string OpcodeStr, 146 string AttSrcAsm, string IntelSrcAsm, 147 dag RHS, 148 bit IsCommutable = 0, 149 bit IsKCommutable = 0, 150 SDPatternOperator Select = vselect_mask, 151 bit MaskOnly = 0, string Suffix = ""> : 152 AVX512_maskable_common<O, F, _, Outs, 153 !con((ins _.RC:$src1), NonTiedIns), 154 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 155 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 156 OpcodeStr, AttSrcAsm, IntelSrcAsm, 157 !if(MaskOnly, (null_frag), RHS), 158 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 159 Select, "", IsCommutable, IsKCommutable, 160 IsCommutable, "", Suffix>; 161 162// Similar to AVX512_maskable_3src but in this case the input VT for the tied 163// operand differs from the output VT. This requires a bitconvert on 164// the preserved vector going into the vselect. 165// NOTE: The unmasked pattern is disabled. 166multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 167 X86VectorVTInfo InVT, 168 dag Outs, dag NonTiedIns, string OpcodeStr, 169 string AttSrcAsm, string IntelSrcAsm, 170 dag RHS, bit IsCommutable = 0> : 171 AVX512_maskable_common<O, F, OutVT, Outs, 172 !con((ins InVT.RC:$src1), NonTiedIns), 173 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 174 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 175 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 176 (vselect_mask InVT.KRCWM:$mask, RHS, 177 (bitconvert InVT.RC:$src1)), 178 vselect_mask, "", IsCommutable>; 179 180multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 181 dag Outs, dag NonTiedIns, string OpcodeStr, 182 string AttSrcAsm, string IntelSrcAsm, 183 dag RHS, 184 bit IsCommutable = 0, 185 bit IsKCommutable = 0, 186 bit MaskOnly = 0, string Suffix = ""> : 187 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 188 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 189 X86selects_mask, MaskOnly, Suffix>; 190 191multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 192 dag Outs, dag Ins, 193 string OpcodeStr, 194 string AttSrcAsm, string IntelSrcAsm, 195 list<dag> Pattern> : 196 AVX512_maskable_custom<O, F, Outs, Ins, 197 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 198 !con((ins _.KRCWM:$mask), Ins), 199 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 200 "$src0 = $dst">; 201 202multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 203 dag Outs, dag NonTiedIns, 204 string OpcodeStr, 205 string AttSrcAsm, string IntelSrcAsm, 206 list<dag> Pattern> : 207 AVX512_maskable_custom<O, F, Outs, 208 !con((ins _.RC:$src1), NonTiedIns), 209 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 210 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 211 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 212 "">; 213 214// Instruction with mask that puts result in mask register, 215// like "compare" and "vptest" 216multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 217 dag Outs, 218 dag Ins, dag MaskingIns, 219 string OpcodeStr, 220 string AttSrcAsm, string IntelSrcAsm, 221 list<dag> Pattern, 222 list<dag> MaskingPattern, 223 bit IsCommutable = 0, 224 string Suffix = ""> { 225 let isCommutable = IsCommutable in { 226 def Suffix: AVX512<O, F, Outs, Ins, 227 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 228 "$dst, "#IntelSrcAsm#"}", 229 Pattern>; 230 231 def k#Suffix: AVX512<O, F, Outs, MaskingIns, 232 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 233 "$dst {${mask}}, "#IntelSrcAsm#"}", 234 MaskingPattern>, EVEX_K; 235 } 236} 237 238multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 239 dag Outs, 240 dag Ins, dag MaskingIns, 241 string OpcodeStr, 242 string AttSrcAsm, string IntelSrcAsm, 243 dag RHS, dag MaskingRHS, 244 bit IsCommutable = 0, 245 string Suffix = ""> : 246 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 247 AttSrcAsm, IntelSrcAsm, 248 [(set _.KRC:$dst, RHS)], 249 [(set _.KRC:$dst, MaskingRHS)], IsCommutable, Suffix>; 250 251multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 252 dag Outs, dag Ins, string OpcodeStr, 253 string AttSrcAsm, string IntelSrcAsm, 254 dag RHS, dag RHS_su, bit IsCommutable = 0, 255 string Suffix = ""> : 256 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 257 !con((ins _.KRCWM:$mask), Ins), 258 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 259 (and _.KRCWM:$mask, RHS_su), IsCommutable, Suffix>; 260 261// Used by conversion instructions. 262multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 263 dag Outs, 264 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 265 string OpcodeStr, 266 string AttSrcAsm, string IntelSrcAsm, 267 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 268 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 269 AttSrcAsm, IntelSrcAsm, 270 [(set _.RC:$dst, RHS)], 271 [(set _.RC:$dst, MaskingRHS)], 272 [(set _.RC:$dst, ZeroMaskingRHS)], 273 "$src0 = $dst">; 274 275multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 276 dag Outs, dag NonTiedIns, string OpcodeStr, 277 string AttSrcAsm, string IntelSrcAsm, 278 dag RHS, dag MaskingRHS, bit IsCommutable, 279 bit IsKCommutable> : 280 AVX512_maskable_custom<O, F, Outs, 281 !con((ins _.RC:$src1), NonTiedIns), 282 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 283 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 284 OpcodeStr, AttSrcAsm, IntelSrcAsm, 285 [(set _.RC:$dst, RHS)], 286 [(set _.RC:$dst, 287 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 288 [(set _.RC:$dst, 289 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 290 "", IsCommutable, IsKCommutable>; 291 292// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 293// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 294// swizzled by ExecutionDomainFix to pxor. 295// We set canFoldAsLoad because this can be converted to a constant-pool 296// load of an all-zeros value if folding it would be beneficial. 297let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 298 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 299def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 300 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 301def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 302 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 303} 304 305let Predicates = [HasAVX512] in { 306def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 307def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 308def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 309def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 310def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 311def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 312} 313 314// Alias instructions that allow VPTERNLOG to be used with a mask to create 315// a mix of all ones and all zeros elements. This is done this way to force 316// the same register to be used as input for all three sources. 317let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 318def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 319 (ins VK16WM:$mask), "", 320 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 321 (v16i32 immAllOnesV), 322 (v16i32 immAllZerosV)))]>; 323def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 324 (ins VK8WM:$mask), "", 325 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 326 (v8i64 immAllOnesV), 327 (v8i64 immAllZerosV)))]>; 328} 329 330let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 331 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 332def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 333 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 334def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 335 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 336} 337 338let Predicates = [HasAVX512] in { 339def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 340def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 341def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 342def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 343def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 344def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 345def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 346def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 347def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 348def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 349def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 350def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 351} 352 353// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 354// This is expanded by ExpandPostRAPseudos. 355let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 356 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 357 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 358 [(set FR16X:$dst, fp16imm0)]>; 359 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 360 [(set FR32X:$dst, fp32imm0)]>; 361 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 362 [(set FR64X:$dst, fp64imm0)]>; 363 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 364 [(set VR128X:$dst, fp128imm0)]>; 365} 366 367//===----------------------------------------------------------------------===// 368// AVX-512 - VECTOR INSERT 369// 370 371// Supports two different pattern operators for mask and unmasked ops. Allows 372// null_frag to be passed for one. 373multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 374 X86VectorVTInfo To, 375 SDPatternOperator vinsert_insert, 376 SDPatternOperator vinsert_for_mask, 377 X86FoldableSchedWrite sched> { 378 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 379 defm rri : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 380 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 381 "vinsert" # From.EltTypeName # "x" # From.NumElts, 382 "$src3, $src2, $src1", "$src1, $src2, $src3", 383 (vinsert_insert:$src3 (To.VT To.RC:$src1), 384 (From.VT From.RC:$src2), 385 (iPTR imm)), 386 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 387 (From.VT From.RC:$src2), 388 (iPTR imm))>, 389 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 390 let mayLoad = 1 in 391 defm rmi : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 392 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 393 "vinsert" # From.EltTypeName # "x" # From.NumElts, 394 "$src3, $src2, $src1", "$src1, $src2, $src3", 395 (vinsert_insert:$src3 (To.VT To.RC:$src1), 396 (From.VT (From.LdFrag addr:$src2)), 397 (iPTR imm)), 398 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 399 (From.VT (From.LdFrag addr:$src2)), 400 (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV, 401 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 402 Sched<[sched.Folded, sched.ReadAfterFold]>; 403 } 404} 405 406// Passes the same pattern operator for masked and unmasked ops. 407multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 408 X86VectorVTInfo To, 409 SDPatternOperator vinsert_insert, 410 X86FoldableSchedWrite sched> : 411 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 412 413multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 414 X86VectorVTInfo To, PatFrag vinsert_insert, 415 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 416 let Predicates = p in { 417 def : Pat<(vinsert_insert:$ins 418 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 419 (To.VT (!cast<Instruction>(InstrStr#"rri") 420 To.RC:$src1, From.RC:$src2, 421 (INSERT_get_vinsert_imm To.RC:$ins)))>; 422 423 def : Pat<(vinsert_insert:$ins 424 (To.VT To.RC:$src1), 425 (From.VT (From.LdFrag addr:$src2)), 426 (iPTR imm)), 427 (To.VT (!cast<Instruction>(InstrStr#"rmi") 428 To.RC:$src1, addr:$src2, 429 (INSERT_get_vinsert_imm To.RC:$ins)))>; 430 } 431} 432 433multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 434 ValueType EltVT64, int Opcode256, 435 X86FoldableSchedWrite sched> { 436 437 let Predicates = [HasVLX] in 438 defm NAME # "32X4Z256" : vinsert_for_size<Opcode128, 439 X86VectorVTInfo< 4, EltVT32, VR128X>, 440 X86VectorVTInfo< 8, EltVT32, VR256X>, 441 vinsert128_insert, sched>, EVEX_V256; 442 443 defm NAME # "32X4Z" : vinsert_for_size<Opcode128, 444 X86VectorVTInfo< 4, EltVT32, VR128X>, 445 X86VectorVTInfo<16, EltVT32, VR512>, 446 vinsert128_insert, sched>, EVEX_V512; 447 448 defm NAME # "64X4Z" : vinsert_for_size<Opcode256, 449 X86VectorVTInfo< 4, EltVT64, VR256X>, 450 X86VectorVTInfo< 8, EltVT64, VR512>, 451 vinsert256_insert, sched>, REX_W, EVEX_V512; 452 453 // Even with DQI we'd like to only use these instructions for masking. 454 let Predicates = [HasVLX, HasDQI] in 455 defm NAME # "64X2Z256" : vinsert_for_size_split<Opcode128, 456 X86VectorVTInfo< 2, EltVT64, VR128X>, 457 X86VectorVTInfo< 4, EltVT64, VR256X>, 458 null_frag, vinsert128_insert, sched>, 459 EVEX_V256, REX_W; 460 461 // Even with DQI we'd like to only use these instructions for masking. 462 let Predicates = [HasDQI] in { 463 defm NAME # "64X2Z" : vinsert_for_size_split<Opcode128, 464 X86VectorVTInfo< 2, EltVT64, VR128X>, 465 X86VectorVTInfo< 8, EltVT64, VR512>, 466 null_frag, vinsert128_insert, sched>, 467 REX_W, EVEX_V512; 468 469 defm NAME # "32X8Z" : vinsert_for_size_split<Opcode256, 470 X86VectorVTInfo< 8, EltVT32, VR256X>, 471 X86VectorVTInfo<16, EltVT32, VR512>, 472 null_frag, vinsert256_insert, sched>, 473 EVEX_V512; 474 } 475} 476 477// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 478defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 479defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 480 481// Codegen pattern with the alternative types, 482// Even with AVX512DQ we'll still use these for unmasked operations. 483defm : vinsert_for_size_lowering<"VINSERTF32X4Z256", v2f64x_info, v4f64x_info, 484 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 485defm : vinsert_for_size_lowering<"VINSERTI32X4Z256", v2i64x_info, v4i64x_info, 486 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 487 488defm : vinsert_for_size_lowering<"VINSERTF32X4Z", v2f64x_info, v8f64_info, 489 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 490defm : vinsert_for_size_lowering<"VINSERTI32X4Z", v2i64x_info, v8i64_info, 491 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 492 493defm : vinsert_for_size_lowering<"VINSERTF64X4Z", v8f32x_info, v16f32_info, 494 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 495defm : vinsert_for_size_lowering<"VINSERTI64X4Z", v8i32x_info, v16i32_info, 496 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 497 498// Codegen pattern with the alternative types insert VEC128 into VEC256 499defm : vinsert_for_size_lowering<"VINSERTI32X4Z256", v8i16x_info, v16i16x_info, 500 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 501defm : vinsert_for_size_lowering<"VINSERTI32X4Z256", v16i8x_info, v32i8x_info, 502 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 503defm : vinsert_for_size_lowering<"VINSERTF32X4Z256", v8f16x_info, v16f16x_info, 504 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 505defm : vinsert_for_size_lowering<"VINSERTF32X4Z256", v8bf16x_info, v16bf16x_info, 506 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 507// Codegen pattern with the alternative types insert VEC128 into VEC512 508defm : vinsert_for_size_lowering<"VINSERTI32X4Z", v8i16x_info, v32i16_info, 509 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 510defm : vinsert_for_size_lowering<"VINSERTI32X4Z", v16i8x_info, v64i8_info, 511 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 512defm : vinsert_for_size_lowering<"VINSERTF32X4Z", v8f16x_info, v32f16_info, 513 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 514defm : vinsert_for_size_lowering<"VINSERTF32X4Z", v8bf16x_info, v32bf16_info, 515 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 516// Codegen pattern with the alternative types insert VEC256 into VEC512 517defm : vinsert_for_size_lowering<"VINSERTI64X4Z", v16i16x_info, v32i16_info, 518 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 519defm : vinsert_for_size_lowering<"VINSERTI64X4Z", v32i8x_info, v64i8_info, 520 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 521defm : vinsert_for_size_lowering<"VINSERTF64X4Z", v16f16x_info, v32f16_info, 522 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 523defm : vinsert_for_size_lowering<"VINSERTF64X4Z", v16bf16x_info, v32bf16_info, 524 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 525 526 527multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 528 X86VectorVTInfo To, X86VectorVTInfo Cast, 529 PatFrag vinsert_insert, 530 SDNodeXForm INSERT_get_vinsert_imm, 531 list<Predicate> p> { 532let Predicates = p in { 533 def : Pat<(Cast.VT 534 (vselect_mask Cast.KRCWM:$mask, 535 (bitconvert 536 (vinsert_insert:$ins (To.VT To.RC:$src1), 537 (From.VT From.RC:$src2), 538 (iPTR imm))), 539 Cast.RC:$src0)), 540 (!cast<Instruction>(InstrStr#"rrik") 541 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 542 (INSERT_get_vinsert_imm To.RC:$ins))>; 543 def : Pat<(Cast.VT 544 (vselect_mask Cast.KRCWM:$mask, 545 (bitconvert 546 (vinsert_insert:$ins (To.VT To.RC:$src1), 547 (From.VT 548 (bitconvert 549 (From.LdFrag addr:$src2))), 550 (iPTR imm))), 551 Cast.RC:$src0)), 552 (!cast<Instruction>(InstrStr#"rmik") 553 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 554 (INSERT_get_vinsert_imm To.RC:$ins))>; 555 556 def : Pat<(Cast.VT 557 (vselect_mask Cast.KRCWM:$mask, 558 (bitconvert 559 (vinsert_insert:$ins (To.VT To.RC:$src1), 560 (From.VT From.RC:$src2), 561 (iPTR imm))), 562 Cast.ImmAllZerosV)), 563 (!cast<Instruction>(InstrStr#"rrikz") 564 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 565 (INSERT_get_vinsert_imm To.RC:$ins))>; 566 def : Pat<(Cast.VT 567 (vselect_mask Cast.KRCWM:$mask, 568 (bitconvert 569 (vinsert_insert:$ins (To.VT To.RC:$src1), 570 (From.VT (From.LdFrag addr:$src2)), 571 (iPTR imm))), 572 Cast.ImmAllZerosV)), 573 (!cast<Instruction>(InstrStr#"rmikz") 574 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 575 (INSERT_get_vinsert_imm To.RC:$ins))>; 576} 577} 578 579defm : vinsert_for_mask_cast<"VINSERTF32X4Z256", v2f64x_info, v4f64x_info, 580 v8f32x_info, vinsert128_insert, 581 INSERT_get_vinsert128_imm, [HasVLX]>; 582defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v4f32x_info, v8f32x_info, 583 v4f64x_info, vinsert128_insert, 584 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 585 586defm : vinsert_for_mask_cast<"VINSERTI32X4Z256", v2i64x_info, v4i64x_info, 587 v8i32x_info, vinsert128_insert, 588 INSERT_get_vinsert128_imm, [HasVLX]>; 589defm : vinsert_for_mask_cast<"VINSERTI32X4Z256", v8i16x_info, v16i16x_info, 590 v8i32x_info, vinsert128_insert, 591 INSERT_get_vinsert128_imm, [HasVLX]>; 592defm : vinsert_for_mask_cast<"VINSERTI32X4Z256", v16i8x_info, v32i8x_info, 593 v8i32x_info, vinsert128_insert, 594 INSERT_get_vinsert128_imm, [HasVLX]>; 595defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v4i32x_info, v8i32x_info, 596 v4i64x_info, vinsert128_insert, 597 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 598defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v8i16x_info, v16i16x_info, 599 v4i64x_info, vinsert128_insert, 600 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 601defm : vinsert_for_mask_cast<"VINSERTF64X2Z256", v16i8x_info, v32i8x_info, 602 v4i64x_info, vinsert128_insert, 603 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 604 605defm : vinsert_for_mask_cast<"VINSERTF32X4Z", v2f64x_info, v8f64_info, 606 v16f32_info, vinsert128_insert, 607 INSERT_get_vinsert128_imm, [HasAVX512]>; 608defm : vinsert_for_mask_cast<"VINSERTF64X2Z", v4f32x_info, v16f32_info, 609 v8f64_info, vinsert128_insert, 610 INSERT_get_vinsert128_imm, [HasDQI]>; 611 612defm : vinsert_for_mask_cast<"VINSERTI32X4Z", v2i64x_info, v8i64_info, 613 v16i32_info, vinsert128_insert, 614 INSERT_get_vinsert128_imm, [HasAVX512]>; 615defm : vinsert_for_mask_cast<"VINSERTI32X4Z", v8i16x_info, v32i16_info, 616 v16i32_info, vinsert128_insert, 617 INSERT_get_vinsert128_imm, [HasAVX512]>; 618defm : vinsert_for_mask_cast<"VINSERTI32X4Z", v16i8x_info, v64i8_info, 619 v16i32_info, vinsert128_insert, 620 INSERT_get_vinsert128_imm, [HasAVX512]>; 621defm : vinsert_for_mask_cast<"VINSERTI64X2Z", v4i32x_info, v16i32_info, 622 v8i64_info, vinsert128_insert, 623 INSERT_get_vinsert128_imm, [HasDQI]>; 624defm : vinsert_for_mask_cast<"VINSERTI64X2Z", v8i16x_info, v32i16_info, 625 v8i64_info, vinsert128_insert, 626 INSERT_get_vinsert128_imm, [HasDQI]>; 627defm : vinsert_for_mask_cast<"VINSERTI64X2Z", v16i8x_info, v64i8_info, 628 v8i64_info, vinsert128_insert, 629 INSERT_get_vinsert128_imm, [HasDQI]>; 630 631defm : vinsert_for_mask_cast<"VINSERTF32X8Z", v4f64x_info, v8f64_info, 632 v16f32_info, vinsert256_insert, 633 INSERT_get_vinsert256_imm, [HasDQI]>; 634defm : vinsert_for_mask_cast<"VINSERTF64X4Z", v8f32x_info, v16f32_info, 635 v8f64_info, vinsert256_insert, 636 INSERT_get_vinsert256_imm, [HasAVX512]>; 637 638defm : vinsert_for_mask_cast<"VINSERTI32X8Z", v4i64x_info, v8i64_info, 639 v16i32_info, vinsert256_insert, 640 INSERT_get_vinsert256_imm, [HasDQI]>; 641defm : vinsert_for_mask_cast<"VINSERTI32X8Z", v16i16x_info, v32i16_info, 642 v16i32_info, vinsert256_insert, 643 INSERT_get_vinsert256_imm, [HasDQI]>; 644defm : vinsert_for_mask_cast<"VINSERTI32X8Z", v32i8x_info, v64i8_info, 645 v16i32_info, vinsert256_insert, 646 INSERT_get_vinsert256_imm, [HasDQI]>; 647defm : vinsert_for_mask_cast<"VINSERTI64X4Z", v8i32x_info, v16i32_info, 648 v8i64_info, vinsert256_insert, 649 INSERT_get_vinsert256_imm, [HasAVX512]>; 650defm : vinsert_for_mask_cast<"VINSERTI64X4Z", v16i16x_info, v32i16_info, 651 v8i64_info, vinsert256_insert, 652 INSERT_get_vinsert256_imm, [HasAVX512]>; 653defm : vinsert_for_mask_cast<"VINSERTI64X4Z", v32i8x_info, v64i8_info, 654 v8i64_info, vinsert256_insert, 655 INSERT_get_vinsert256_imm, [HasAVX512]>; 656 657// vinsertps - insert f32 to XMM 658let ExeDomain = SSEPackedSingle in { 659let isCommutable = 1 in 660def VINSERTPSZrri : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 661 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 662 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 663 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 664 EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 665def VINSERTPSZrmi : AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 666 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 667 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 668 [(set VR128X:$dst, (X86insertps VR128X:$src1, 669 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 670 timm:$src3))]>, 671 EVEX, VVVV, EVEX_CD8<32, CD8VT1>, 672 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 673} 674 675//===----------------------------------------------------------------------===// 676// AVX-512 VECTOR EXTRACT 677//--- 678 679// Supports two different pattern operators for mask and unmasked ops. Allows 680// null_frag to be passed for one. 681multiclass vextract_for_size_split<int Opcode, 682 X86VectorVTInfo From, X86VectorVTInfo To, 683 SDPatternOperator vextract_extract, 684 SDPatternOperator vextract_for_mask, 685 SchedWrite SchedRR, SchedWrite SchedMR> { 686 687 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 688 defm rri : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 689 (ins From.RC:$src1, u8imm:$idx), 690 "vextract" # To.EltTypeName # "x" # To.NumElts, 691 "$idx, $src1", "$src1, $idx", 692 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 693 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 694 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 695 696 def mri : AVX512AIi8<Opcode, MRMDestMem, (outs), 697 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 698 "vextract" # To.EltTypeName # "x" # To.NumElts # 699 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 700 [(store (To.VT (vextract_extract:$idx 701 (From.VT From.RC:$src1), (iPTR imm))), 702 addr:$dst)]>, EVEX, 703 Sched<[SchedMR]>; 704 705 let mayStore = 1, hasSideEffects = 0 in 706 def mrik : AVX512AIi8<Opcode, MRMDestMem, (outs), 707 (ins To.MemOp:$dst, To.KRCWM:$mask, 708 From.RC:$src1, u8imm:$idx), 709 "vextract" # To.EltTypeName # "x" # To.NumElts # 710 "\t{$idx, $src1, $dst {${mask}}|" 711 "$dst {${mask}}, $src1, $idx}", []>, 712 EVEX_K, EVEX, Sched<[SchedMR]>; 713 } 714} 715 716// Passes the same pattern operator for masked and unmasked ops. 717multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 718 X86VectorVTInfo To, 719 SDPatternOperator vextract_extract, 720 SchedWrite SchedRR, SchedWrite SchedMR> : 721 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 722 723// Codegen pattern for the alternative types 724multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 725 X86VectorVTInfo To, PatFrag vextract_extract, 726 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 727 let Predicates = p in { 728 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 729 (To.VT (!cast<Instruction>(InstrStr#"rri") 730 From.RC:$src1, 731 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 732 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 733 (iPTR imm))), addr:$dst), 734 (!cast<Instruction>(InstrStr#"mri") addr:$dst, From.RC:$src1, 735 (EXTRACT_get_vextract_imm To.RC:$ext))>; 736 } 737} 738 739multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 740 ValueType EltVT64, int Opcode256, 741 SchedWrite SchedRR, SchedWrite SchedMR> { 742 let Predicates = [HasAVX512] in { 743 defm NAME # "32X4Z" : vextract_for_size<Opcode128, 744 X86VectorVTInfo<16, EltVT32, VR512>, 745 X86VectorVTInfo< 4, EltVT32, VR128X>, 746 vextract128_extract, SchedRR, SchedMR>, 747 EVEX_V512, EVEX_CD8<32, CD8VT4>; 748 defm NAME # "64X4Z" : vextract_for_size<Opcode256, 749 X86VectorVTInfo< 8, EltVT64, VR512>, 750 X86VectorVTInfo< 4, EltVT64, VR256X>, 751 vextract256_extract, SchedRR, SchedMR>, 752 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 753 } 754 let Predicates = [HasVLX] in 755 defm NAME # "32X4Z256" : vextract_for_size<Opcode128, 756 X86VectorVTInfo< 8, EltVT32, VR256X>, 757 X86VectorVTInfo< 4, EltVT32, VR128X>, 758 vextract128_extract, SchedRR, SchedMR>, 759 EVEX_V256, EVEX_CD8<32, CD8VT4>; 760 761 // Even with DQI we'd like to only use these instructions for masking. 762 let Predicates = [HasVLX, HasDQI] in 763 defm NAME # "64X2Z256" : vextract_for_size_split<Opcode128, 764 X86VectorVTInfo< 4, EltVT64, VR256X>, 765 X86VectorVTInfo< 2, EltVT64, VR128X>, 766 null_frag, vextract128_extract, SchedRR, SchedMR>, 767 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 768 769 // Even with DQI we'd like to only use these instructions for masking. 770 let Predicates = [HasDQI] in { 771 defm NAME # "64X2Z" : vextract_for_size_split<Opcode128, 772 X86VectorVTInfo< 8, EltVT64, VR512>, 773 X86VectorVTInfo< 2, EltVT64, VR128X>, 774 null_frag, vextract128_extract, SchedRR, SchedMR>, 775 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 776 defm NAME # "32X8Z" : vextract_for_size_split<Opcode256, 777 X86VectorVTInfo<16, EltVT32, VR512>, 778 X86VectorVTInfo< 8, EltVT32, VR256X>, 779 null_frag, vextract256_extract, SchedRR, SchedMR>, 780 EVEX_V512, EVEX_CD8<32, CD8VT8>; 781 } 782} 783 784// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 785defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 786defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 787 788// extract_subvector codegen patterns with the alternative types. 789// Even with AVX512DQ we'll still use these for unmasked operations. 790defm : vextract_for_size_lowering<"VEXTRACTF32X4Z", v8f64_info, v2f64x_info, 791 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 792defm : vextract_for_size_lowering<"VEXTRACTI32X4Z", v8i64_info, v2i64x_info, 793 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 794 795defm : vextract_for_size_lowering<"VEXTRACTF64X4Z", v16f32_info, v8f32x_info, 796 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 797defm : vextract_for_size_lowering<"VEXTRACTI64X4Z", v16i32_info, v8i32x_info, 798 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 799 800defm : vextract_for_size_lowering<"VEXTRACTF32X4Z256", v4f64x_info, v2f64x_info, 801 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 802defm : vextract_for_size_lowering<"VEXTRACTI32X4Z256", v4i64x_info, v2i64x_info, 803 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 804 805// Codegen pattern with the alternative types extract VEC128 from VEC256 806defm : vextract_for_size_lowering<"VEXTRACTI32X4Z256", v16i16x_info, v8i16x_info, 807 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 808defm : vextract_for_size_lowering<"VEXTRACTI32X4Z256", v32i8x_info, v16i8x_info, 809 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 810defm : vextract_for_size_lowering<"VEXTRACTF32X4Z256", v16f16x_info, v8f16x_info, 811 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 812defm : vextract_for_size_lowering<"VEXTRACTF32X4Z256", v16bf16x_info, v8bf16x_info, 813 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 814 815// Codegen pattern with the alternative types extract VEC128 from VEC512 816defm : vextract_for_size_lowering<"VEXTRACTI32X4Z", v32i16_info, v8i16x_info, 817 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 818defm : vextract_for_size_lowering<"VEXTRACTI32X4Z", v64i8_info, v16i8x_info, 819 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 820defm : vextract_for_size_lowering<"VEXTRACTF32X4Z", v32f16_info, v8f16x_info, 821 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 822defm : vextract_for_size_lowering<"VEXTRACTF32X4Z", v32bf16_info, v8bf16x_info, 823 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 824// Codegen pattern with the alternative types extract VEC256 from VEC512 825defm : vextract_for_size_lowering<"VEXTRACTI64X4Z", v32i16_info, v16i16x_info, 826 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 827defm : vextract_for_size_lowering<"VEXTRACTI64X4Z", v64i8_info, v32i8x_info, 828 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 829defm : vextract_for_size_lowering<"VEXTRACTF64X4Z", v32f16_info, v16f16x_info, 830 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 831defm : vextract_for_size_lowering<"VEXTRACTF64X4Z", v32bf16_info, v16bf16x_info, 832 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 833 834 835// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 836// smaller extract to enable EVEX->VEX. 837let Predicates = [NoVLX, HasEVEX512] in { 838def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 839 (v2i64 (VEXTRACTI128rri 840 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 841 (iPTR 1)))>; 842def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 843 (v2f64 (VEXTRACTF128rri 844 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 845 (iPTR 1)))>; 846def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 847 (v4i32 (VEXTRACTI128rri 848 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 849 (iPTR 1)))>; 850def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 851 (v4f32 (VEXTRACTF128rri 852 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 853 (iPTR 1)))>; 854def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 855 (v8i16 (VEXTRACTI128rri 856 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 857 (iPTR 1)))>; 858def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 859 (v8f16 (VEXTRACTF128rri 860 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 861 (iPTR 1)))>; 862def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 863 (v16i8 (VEXTRACTI128rri 864 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 865 (iPTR 1)))>; 866} 867 868// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 869// smaller extract to enable EVEX->VEX. 870let Predicates = [HasVLX] in { 871def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 872 (v2i64 (VEXTRACTI32X4Z256rri 873 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 874 (iPTR 1)))>; 875def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 876 (v2f64 (VEXTRACTF32X4Z256rri 877 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 878 (iPTR 1)))>; 879def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 880 (v4i32 (VEXTRACTI32X4Z256rri 881 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 882 (iPTR 1)))>; 883def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 884 (v4f32 (VEXTRACTF32X4Z256rri 885 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 886 (iPTR 1)))>; 887def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 888 (v8i16 (VEXTRACTI32X4Z256rri 889 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 890 (iPTR 1)))>; 891def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 892 (v8f16 (VEXTRACTF32X4Z256rri 893 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 894 (iPTR 1)))>; 895def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 896 (v16i8 (VEXTRACTI32X4Z256rri 897 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 898 (iPTR 1)))>; 899} 900 901 902// Additional patterns for handling a bitcast between the vselect and the 903// extract_subvector. 904multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 905 X86VectorVTInfo To, X86VectorVTInfo Cast, 906 PatFrag vextract_extract, 907 SDNodeXForm EXTRACT_get_vextract_imm, 908 list<Predicate> p> { 909let Predicates = p in { 910 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 911 (bitconvert 912 (To.VT (vextract_extract:$ext 913 (From.VT From.RC:$src), (iPTR imm)))), 914 To.RC:$src0)), 915 (Cast.VT (!cast<Instruction>(InstrStr#"rrik") 916 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 917 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 918 919 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 920 (bitconvert 921 (To.VT (vextract_extract:$ext 922 (From.VT From.RC:$src), (iPTR imm)))), 923 Cast.ImmAllZerosV)), 924 (Cast.VT (!cast<Instruction>(InstrStr#"rrikz") 925 Cast.KRCWM:$mask, From.RC:$src, 926 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 927} 928} 929 930defm : vextract_for_mask_cast<"VEXTRACTF32X4Z256", v4f64x_info, v2f64x_info, 931 v4f32x_info, vextract128_extract, 932 EXTRACT_get_vextract128_imm, [HasVLX]>; 933defm : vextract_for_mask_cast<"VEXTRACTF64X2Z256", v8f32x_info, v4f32x_info, 934 v2f64x_info, vextract128_extract, 935 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 936 937defm : vextract_for_mask_cast<"VEXTRACTI32X4Z256", v4i64x_info, v2i64x_info, 938 v4i32x_info, vextract128_extract, 939 EXTRACT_get_vextract128_imm, [HasVLX]>; 940defm : vextract_for_mask_cast<"VEXTRACTI32X4Z256", v16i16x_info, v8i16x_info, 941 v4i32x_info, vextract128_extract, 942 EXTRACT_get_vextract128_imm, [HasVLX]>; 943defm : vextract_for_mask_cast<"VEXTRACTI32X4Z256", v32i8x_info, v16i8x_info, 944 v4i32x_info, vextract128_extract, 945 EXTRACT_get_vextract128_imm, [HasVLX]>; 946defm : vextract_for_mask_cast<"VEXTRACTI64X2Z256", v8i32x_info, v4i32x_info, 947 v2i64x_info, vextract128_extract, 948 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 949defm : vextract_for_mask_cast<"VEXTRACTI64X2Z256", v16i16x_info, v8i16x_info, 950 v2i64x_info, vextract128_extract, 951 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 952defm : vextract_for_mask_cast<"VEXTRACTI64X2Z256", v32i8x_info, v16i8x_info, 953 v2i64x_info, vextract128_extract, 954 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 955 956defm : vextract_for_mask_cast<"VEXTRACTF32X4Z", v8f64_info, v2f64x_info, 957 v4f32x_info, vextract128_extract, 958 EXTRACT_get_vextract128_imm, [HasAVX512]>; 959defm : vextract_for_mask_cast<"VEXTRACTF64X2Z", v16f32_info, v4f32x_info, 960 v2f64x_info, vextract128_extract, 961 EXTRACT_get_vextract128_imm, [HasDQI]>; 962 963defm : vextract_for_mask_cast<"VEXTRACTI32X4Z", v8i64_info, v2i64x_info, 964 v4i32x_info, vextract128_extract, 965 EXTRACT_get_vextract128_imm, [HasAVX512]>; 966defm : vextract_for_mask_cast<"VEXTRACTI32X4Z", v32i16_info, v8i16x_info, 967 v4i32x_info, vextract128_extract, 968 EXTRACT_get_vextract128_imm, [HasAVX512]>; 969defm : vextract_for_mask_cast<"VEXTRACTI32X4Z", v64i8_info, v16i8x_info, 970 v4i32x_info, vextract128_extract, 971 EXTRACT_get_vextract128_imm, [HasAVX512]>; 972defm : vextract_for_mask_cast<"VEXTRACTI64X2Z", v16i32_info, v4i32x_info, 973 v2i64x_info, vextract128_extract, 974 EXTRACT_get_vextract128_imm, [HasDQI]>; 975defm : vextract_for_mask_cast<"VEXTRACTI64X2Z", v32i16_info, v8i16x_info, 976 v2i64x_info, vextract128_extract, 977 EXTRACT_get_vextract128_imm, [HasDQI]>; 978defm : vextract_for_mask_cast<"VEXTRACTI64X2Z", v64i8_info, v16i8x_info, 979 v2i64x_info, vextract128_extract, 980 EXTRACT_get_vextract128_imm, [HasDQI]>; 981 982defm : vextract_for_mask_cast<"VEXTRACTF32X8Z", v8f64_info, v4f64x_info, 983 v8f32x_info, vextract256_extract, 984 EXTRACT_get_vextract256_imm, [HasDQI]>; 985defm : vextract_for_mask_cast<"VEXTRACTF64X4Z", v16f32_info, v8f32x_info, 986 v4f64x_info, vextract256_extract, 987 EXTRACT_get_vextract256_imm, [HasAVX512]>; 988 989defm : vextract_for_mask_cast<"VEXTRACTI32X8Z", v8i64_info, v4i64x_info, 990 v8i32x_info, vextract256_extract, 991 EXTRACT_get_vextract256_imm, [HasDQI]>; 992defm : vextract_for_mask_cast<"VEXTRACTI32X8Z", v32i16_info, v16i16x_info, 993 v8i32x_info, vextract256_extract, 994 EXTRACT_get_vextract256_imm, [HasDQI]>; 995defm : vextract_for_mask_cast<"VEXTRACTI32X8Z", v64i8_info, v32i8x_info, 996 v8i32x_info, vextract256_extract, 997 EXTRACT_get_vextract256_imm, [HasDQI]>; 998defm : vextract_for_mask_cast<"VEXTRACTI64X4Z", v16i32_info, v8i32x_info, 999 v4i64x_info, vextract256_extract, 1000 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1001defm : vextract_for_mask_cast<"VEXTRACTI64X4Z", v32i16_info, v16i16x_info, 1002 v4i64x_info, vextract256_extract, 1003 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1004defm : vextract_for_mask_cast<"VEXTRACTI64X4Z", v64i8_info, v32i8x_info, 1005 v4i64x_info, vextract256_extract, 1006 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1007 1008// vextractps - extract 32 bits from XMM 1009def VEXTRACTPSZrri : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1010 (ins VR128X:$src1, u8imm:$src2), 1011 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1012 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1013 EVEX, WIG, Sched<[WriteVecExtract]>; 1014 1015def VEXTRACTPSZmri : AVX512AIi8<0x17, MRMDestMem, (outs), 1016 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1017 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1018 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1019 addr:$dst)]>, 1020 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1021 1022//===---------------------------------------------------------------------===// 1023// AVX-512 BROADCAST 1024//--- 1025// broadcast with a scalar argument. 1026multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1027 X86VectorVTInfo SrcInfo> { 1028 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1029 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1030 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1031 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1032 (X86VBroadcast SrcInfo.FRC:$src), 1033 DestInfo.RC:$src0)), 1034 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1035 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1036 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1037 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1038 (X86VBroadcast SrcInfo.FRC:$src), 1039 DestInfo.ImmAllZerosV)), 1040 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1041 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1042} 1043 1044// Split version to allow mask and broadcast node to be different types. This 1045// helps support the 32x2 broadcasts. 1046multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1047 SchedWrite SchedRR, SchedWrite SchedRM, 1048 X86VectorVTInfo MaskInfo, 1049 X86VectorVTInfo DestInfo, 1050 X86VectorVTInfo SrcInfo, 1051 bit IsConvertibleToThreeAddress, 1052 SDPatternOperator UnmaskedOp = X86VBroadcast, 1053 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1054 let hasSideEffects = 0 in 1055 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1056 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1057 [(set MaskInfo.RC:$dst, 1058 (MaskInfo.VT 1059 (bitconvert 1060 (DestInfo.VT 1061 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1062 DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>; 1063 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1064 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1065 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1066 "${dst} {${mask}} {z}, $src}"), 1067 [(set MaskInfo.RC:$dst, 1068 (vselect_mask MaskInfo.KRCWM:$mask, 1069 (MaskInfo.VT 1070 (bitconvert 1071 (DestInfo.VT 1072 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1073 MaskInfo.ImmAllZerosV))], 1074 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1075 let Constraints = "$src0 = $dst" in 1076 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1077 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1078 SrcInfo.RC:$src), 1079 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1080 "${dst} {${mask}}, $src}"), 1081 [(set MaskInfo.RC:$dst, 1082 (vselect_mask MaskInfo.KRCWM:$mask, 1083 (MaskInfo.VT 1084 (bitconvert 1085 (DestInfo.VT 1086 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1087 MaskInfo.RC:$src0))], 1088 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1089 1090 let hasSideEffects = 0, mayLoad = 1, isReMaterializable = 1, canFoldAsLoad = 1 in 1091 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1092 (ins SrcInfo.ScalarMemOp:$src), 1093 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1094 [(set MaskInfo.RC:$dst, 1095 (MaskInfo.VT 1096 (bitconvert 1097 (DestInfo.VT 1098 (UnmaskedBcastOp addr:$src)))))], 1099 DestInfo.ExeDomain>, T8, PD, EVEX, 1100 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1101 1102 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1103 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1104 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1105 "${dst} {${mask}} {z}, $src}"), 1106 [(set MaskInfo.RC:$dst, 1107 (vselect_mask MaskInfo.KRCWM:$mask, 1108 (MaskInfo.VT 1109 (bitconvert 1110 (DestInfo.VT 1111 (SrcInfo.BroadcastLdFrag addr:$src)))), 1112 MaskInfo.ImmAllZerosV))], 1113 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, 1114 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1115 1116 let Constraints = "$src0 = $dst", 1117 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1118 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1119 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1120 SrcInfo.ScalarMemOp:$src), 1121 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1122 "${dst} {${mask}}, $src}"), 1123 [(set MaskInfo.RC:$dst, 1124 (vselect_mask MaskInfo.KRCWM:$mask, 1125 (MaskInfo.VT 1126 (bitconvert 1127 (DestInfo.VT 1128 (SrcInfo.BroadcastLdFrag addr:$src)))), 1129 MaskInfo.RC:$src0))], 1130 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, 1131 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1132} 1133 1134// Helper class to force mask and broadcast result to same type. 1135multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1136 SchedWrite SchedRR, SchedWrite SchedRM, 1137 X86VectorVTInfo DestInfo, 1138 X86VectorVTInfo SrcInfo, 1139 bit IsConvertibleToThreeAddress> : 1140 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1141 DestInfo, DestInfo, SrcInfo, 1142 IsConvertibleToThreeAddress>; 1143 1144multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1145 AVX512VLVectorVTInfo _> { 1146 let Predicates = [HasAVX512] in { 1147 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1148 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1149 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1150 EVEX_V512; 1151 } 1152 1153 let Predicates = [HasVLX] in { 1154 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1155 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1156 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1157 EVEX_V256; 1158 } 1159} 1160 1161multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1162 AVX512VLVectorVTInfo _> { 1163 let Predicates = [HasAVX512] in { 1164 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1165 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1166 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1167 EVEX_V512; 1168 } 1169 1170 let Predicates = [HasVLX] in { 1171 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1172 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1173 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1174 EVEX_V256; 1175 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1176 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1177 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1178 EVEX_V128; 1179 } 1180} 1181defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1182 avx512vl_f32_info>; 1183defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1184 avx512vl_f64_info>, REX_W; 1185 1186multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1187 X86VectorVTInfo _, SDPatternOperator OpNode, 1188 RegisterClass SrcRC> { 1189 // Fold with a mask even if it has multiple uses since it is cheap. 1190 let ExeDomain = _.ExeDomain in 1191 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1192 (ins SrcRC:$src), 1193 "vpbroadcast"#_.Suffix, "$src", "$src", 1194 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1195 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1196 T8, PD, EVEX, Sched<[SchedRR]>; 1197} 1198 1199multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1200 X86VectorVTInfo _, SDPatternOperator OpNode, 1201 RegisterClass SrcRC, SubRegIndex Subreg> { 1202 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1203 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1204 (outs _.RC:$dst), (ins GR32:$src), 1205 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1206 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1207 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1208 "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>; 1209 1210 def : Pat <(_.VT (OpNode SrcRC:$src)), 1211 (!cast<Instruction>(Name#rr) 1212 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1213 1214 // Fold with a mask even if it has multiple uses since it is cheap. 1215 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1216 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1217 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1218 1219 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1220 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1221 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1222} 1223 1224multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1225 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1226 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1227 let Predicates = [prd] in 1228 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1229 OpNode, SrcRC, Subreg>, EVEX_V512; 1230 let Predicates = [prd, HasVLX] in { 1231 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1232 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1233 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1234 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1235 } 1236} 1237 1238multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1239 SDPatternOperator OpNode, 1240 RegisterClass SrcRC, Predicate prd> { 1241 let Predicates = [prd] in 1242 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1243 SrcRC>, EVEX_V512; 1244 let Predicates = [prd, HasVLX] in { 1245 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1246 SrcRC>, EVEX_V256; 1247 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1248 SrcRC>, EVEX_V128; 1249 } 1250} 1251 1252defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1253 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1254defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1255 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1256 HasBWI>; 1257defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1258 X86VBroadcast, GR32, HasAVX512>; 1259defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1260 X86VBroadcast, GR64, HasAVX512>, REX_W; 1261 1262multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1263 AVX512VLVectorVTInfo _, Predicate prd, 1264 bit IsConvertibleToThreeAddress> { 1265 let Predicates = [prd] in { 1266 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1267 WriteShuffle256Ld, _.info512, _.info128, 1268 IsConvertibleToThreeAddress>, 1269 EVEX_V512; 1270 } 1271 let Predicates = [prd, HasVLX] in { 1272 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1273 WriteShuffle256Ld, _.info256, _.info128, 1274 IsConvertibleToThreeAddress>, 1275 EVEX_V256; 1276 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1277 WriteShuffleXLd, _.info128, _.info128, 1278 IsConvertibleToThreeAddress>, 1279 EVEX_V128; 1280 } 1281} 1282 1283defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1284 avx512vl_i8_info, HasBWI, 0>; 1285defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1286 avx512vl_i16_info, HasBWI, 0>; 1287defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1288 avx512vl_i32_info, HasAVX512, 1>; 1289defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1290 avx512vl_i64_info, HasAVX512, 1>, REX_W; 1291 1292multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1293 SDPatternOperator OpNode, 1294 X86VectorVTInfo _Dst, 1295 X86VectorVTInfo _Src> { 1296 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1297 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1298 (_Dst.VT (OpNode addr:$src))>, 1299 Sched<[SchedWriteShuffle.YMM.Folded]>, 1300 AVX5128IBase, EVEX; 1301} 1302 1303// This should be used for the AVX512DQ broadcast instructions. It disables 1304// the unmasked patterns so that we only use the DQ instructions when masking 1305// is requested. 1306multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1307 SDPatternOperator OpNode, 1308 X86VectorVTInfo _Dst, 1309 X86VectorVTInfo _Src> { 1310 let hasSideEffects = 0, mayLoad = 1 in 1311 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1312 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1313 (null_frag), 1314 (_Dst.VT (OpNode addr:$src))>, 1315 Sched<[SchedWriteShuffle.YMM.Folded]>, 1316 AVX5128IBase, EVEX; 1317} 1318let Predicates = [HasBWI] in { 1319 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1320 (VPBROADCASTWZrm addr:$src)>; 1321 1322 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1323 (VPBROADCASTWZrr VR128X:$src)>; 1324 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1325 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1326} 1327let Predicates = [HasVLX, HasBWI] in { 1328 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1329 (VPBROADCASTWZ128rm addr:$src)>; 1330 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1331 (VPBROADCASTWZ256rm addr:$src)>; 1332 1333 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1334 (VPBROADCASTWZ128rr VR128X:$src)>; 1335 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1336 (VPBROADCASTWZ256rr VR128X:$src)>; 1337 1338 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1339 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1340 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1341 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1342} 1343 1344//===----------------------------------------------------------------------===// 1345// AVX-512 BROADCAST SUBVECTORS 1346// 1347 1348defm VBROADCASTI32X4Z : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1349 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1350 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1351defm VBROADCASTF32X4Z : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1352 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1353 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1354defm VBROADCASTI64X4Z : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1355 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W, 1356 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1357defm VBROADCASTF64X4Z : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1358 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W, 1359 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1360 1361let Predicates = [HasAVX512] in { 1362def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1363 (VBROADCASTF64X4Zrm addr:$src)>; 1364def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1365 (VBROADCASTF64X4Zrm addr:$src)>; 1366def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1367 (VBROADCASTF64X4Zrm addr:$src)>; 1368def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1369 (VBROADCASTI64X4Zrm addr:$src)>; 1370def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1371 (VBROADCASTI64X4Zrm addr:$src)>; 1372def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1373 (VBROADCASTI64X4Zrm addr:$src)>; 1374def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1375 (VBROADCASTI64X4Zrm addr:$src)>; 1376 1377def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1378 (VBROADCASTF32X4Zrm addr:$src)>; 1379def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1380 (VBROADCASTF32X4Zrm addr:$src)>; 1381def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1382 (VBROADCASTF32X4Zrm addr:$src)>; 1383def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1384 (VBROADCASTI32X4Zrm addr:$src)>; 1385def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1386 (VBROADCASTI32X4Zrm addr:$src)>; 1387def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1388 (VBROADCASTI32X4Zrm addr:$src)>; 1389def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1390 (VBROADCASTI32X4Zrm addr:$src)>; 1391 1392// Patterns for selects of bitcasted operations. 1393def : Pat<(vselect_mask VK16WM:$mask, 1394 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1395 (v16f32 immAllZerosV)), 1396 (VBROADCASTF32X4Zrmkz VK16WM:$mask, addr:$src)>; 1397def : Pat<(vselect_mask VK16WM:$mask, 1398 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1399 VR512:$src0), 1400 (VBROADCASTF32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1401def : Pat<(vselect_mask VK16WM:$mask, 1402 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1403 (v16i32 immAllZerosV)), 1404 (VBROADCASTI32X4Zrmkz VK16WM:$mask, addr:$src)>; 1405def : Pat<(vselect_mask VK16WM:$mask, 1406 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1407 VR512:$src0), 1408 (VBROADCASTI32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1409 1410def : Pat<(vselect_mask VK8WM:$mask, 1411 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1412 (v8f64 immAllZerosV)), 1413 (VBROADCASTF64X4Zrmkz VK8WM:$mask, addr:$src)>; 1414def : Pat<(vselect_mask VK8WM:$mask, 1415 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1416 VR512:$src0), 1417 (VBROADCASTF64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1418def : Pat<(vselect_mask VK8WM:$mask, 1419 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1420 (v8i64 immAllZerosV)), 1421 (VBROADCASTI64X4Zrmkz VK8WM:$mask, addr:$src)>; 1422def : Pat<(vselect_mask VK8WM:$mask, 1423 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1424 VR512:$src0), 1425 (VBROADCASTI64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1426} 1427 1428let Predicates = [HasVLX] in { 1429defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1430 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1431 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1432defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1433 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1434 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1435 1436def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1437 (VBROADCASTF32X4Z256rm addr:$src)>; 1438def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1439 (VBROADCASTF32X4Z256rm addr:$src)>; 1440def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1441 (VBROADCASTF32X4Z256rm addr:$src)>; 1442def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1443 (VBROADCASTI32X4Z256rm addr:$src)>; 1444def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1445 (VBROADCASTI32X4Z256rm addr:$src)>; 1446def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1447 (VBROADCASTI32X4Z256rm addr:$src)>; 1448def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1449 (VBROADCASTI32X4Z256rm addr:$src)>; 1450 1451// Patterns for selects of bitcasted operations. 1452def : Pat<(vselect_mask VK8WM:$mask, 1453 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1454 (v8f32 immAllZerosV)), 1455 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1456def : Pat<(vselect_mask VK8WM:$mask, 1457 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1458 VR256X:$src0), 1459 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1460def : Pat<(vselect_mask VK8WM:$mask, 1461 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1462 (v8i32 immAllZerosV)), 1463 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1464def : Pat<(vselect_mask VK8WM:$mask, 1465 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1466 VR256X:$src0), 1467 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1468} 1469 1470let Predicates = [HasBF16] in { 1471 def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)), 1472 (VBROADCASTF64X4Zrm addr:$src)>; 1473 def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)), 1474 (VBROADCASTF32X4Zrm addr:$src)>; 1475} 1476 1477let Predicates = [HasBF16, HasVLX] in 1478 def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), 1479 (VBROADCASTF32X4Z256rm addr:$src)>; 1480 1481let Predicates = [HasVLX, HasDQI] in { 1482defm VBROADCASTI64X2Z256 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1483 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, 1484 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1485defm VBROADCASTF64X2Z256 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1486 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, 1487 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1488 1489// Patterns for selects of bitcasted operations. 1490def : Pat<(vselect_mask VK4WM:$mask, 1491 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1492 (v4f64 immAllZerosV)), 1493 (VBROADCASTF64X2Z256rmkz VK4WM:$mask, addr:$src)>; 1494def : Pat<(vselect_mask VK4WM:$mask, 1495 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1496 VR256X:$src0), 1497 (VBROADCASTF64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1498def : Pat<(vselect_mask VK4WM:$mask, 1499 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1500 (v4i64 immAllZerosV)), 1501 (VBROADCASTI64X2Z256rmkz VK4WM:$mask, addr:$src)>; 1502def : Pat<(vselect_mask VK4WM:$mask, 1503 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1504 VR256X:$src0), 1505 (VBROADCASTI64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1506} 1507 1508let Predicates = [HasDQI] in { 1509defm VBROADCASTI64X2Z : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1510 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W, 1511 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1512defm VBROADCASTI32X8Z : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1513 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1514 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1515defm VBROADCASTF64X2Z : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1516 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W, 1517 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1518defm VBROADCASTF32X8Z : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1519 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1520 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1521 1522// Patterns for selects of bitcasted operations. 1523def : Pat<(vselect_mask VK16WM:$mask, 1524 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1525 (v16f32 immAllZerosV)), 1526 (VBROADCASTF32X8Zrmkz VK16WM:$mask, addr:$src)>; 1527def : Pat<(vselect_mask VK16WM:$mask, 1528 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1529 VR512:$src0), 1530 (VBROADCASTF32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1531def : Pat<(vselect_mask VK16WM:$mask, 1532 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1533 (v16i32 immAllZerosV)), 1534 (VBROADCASTI32X8Zrmkz VK16WM:$mask, addr:$src)>; 1535def : Pat<(vselect_mask VK16WM:$mask, 1536 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1537 VR512:$src0), 1538 (VBROADCASTI32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1539 1540def : Pat<(vselect_mask VK8WM:$mask, 1541 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1542 (v8f64 immAllZerosV)), 1543 (VBROADCASTF64X2Zrmkz VK8WM:$mask, addr:$src)>; 1544def : Pat<(vselect_mask VK8WM:$mask, 1545 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1546 VR512:$src0), 1547 (VBROADCASTF64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1548def : Pat<(vselect_mask VK8WM:$mask, 1549 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1550 (v8i64 immAllZerosV)), 1551 (VBROADCASTI64X2Zrmkz VK8WM:$mask, addr:$src)>; 1552def : Pat<(vselect_mask VK8WM:$mask, 1553 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1554 VR512:$src0), 1555 (VBROADCASTI64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1556} 1557 1558multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1559 AVX512VLVectorVTInfo _Dst, 1560 AVX512VLVectorVTInfo _Src> { 1561 let Predicates = [HasDQI] in 1562 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1563 WriteShuffle256Ld, _Dst.info512, 1564 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1565 EVEX_V512; 1566 let Predicates = [HasDQI, HasVLX] in 1567 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1568 WriteShuffle256Ld, _Dst.info256, 1569 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1570 EVEX_V256; 1571} 1572 1573multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1574 AVX512VLVectorVTInfo _Dst, 1575 AVX512VLVectorVTInfo _Src> : 1576 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1577 1578 let Predicates = [HasDQI, HasVLX] in 1579 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1580 WriteShuffleXLd, _Dst.info128, 1581 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1582 EVEX_V128; 1583} 1584 1585defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1586 avx512vl_i32_info, avx512vl_i64_info>; 1587defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1588 avx512vl_f32_info, avx512vl_f64_info>; 1589 1590//===----------------------------------------------------------------------===// 1591// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1592//--- 1593multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1594 X86VectorVTInfo _, RegisterClass KRC> { 1595 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1596 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1597 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1598 EVEX, Sched<[WriteShuffle]>; 1599} 1600 1601multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1602 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1603 let Predicates = [HasCDI] in 1604 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1605 let Predicates = [HasCDI, HasVLX] in { 1606 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1607 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1608 } 1609} 1610 1611defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1612 avx512vl_i32_info, VK16>; 1613defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1614 avx512vl_i64_info, VK8>, REX_W; 1615 1616//===----------------------------------------------------------------------===// 1617// -- VPERMI2 - 3 source operands form -- 1618multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1619 X86FoldableSchedWrite sched, 1620 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1621let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1622 hasSideEffects = 0 in { 1623 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1624 (ins _.RC:$src2, _.RC:$src3), 1625 OpcodeStr, "$src3, $src2", "$src2, $src3", 1626 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1627 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1628 1629 let mayLoad = 1 in 1630 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1631 (ins _.RC:$src2, _.MemOp:$src3), 1632 OpcodeStr, "$src3, $src2", "$src2, $src3", 1633 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1634 (_.VT (_.LdFrag addr:$src3)))), 1>, 1635 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1636 } 1637} 1638 1639multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1640 X86FoldableSchedWrite sched, 1641 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1642 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1643 hasSideEffects = 0, mayLoad = 1 in 1644 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1645 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1646 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1647 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1648 (_.VT (X86VPermt2 _.RC:$src2, 1649 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1650 AVX5128IBase, EVEX, VVVV, EVEX_B, 1651 Sched<[sched.Folded, sched.ReadAfterFold]>; 1652} 1653 1654multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1655 X86FoldableSchedWrite sched, 1656 AVX512VLVectorVTInfo VTInfo, 1657 AVX512VLVectorVTInfo ShuffleMask> { 1658 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1659 ShuffleMask.info512>, 1660 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1661 ShuffleMask.info512>, EVEX_V512; 1662 let Predicates = [HasVLX] in { 1663 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1664 ShuffleMask.info128>, 1665 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1666 ShuffleMask.info128>, EVEX_V128; 1667 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1668 ShuffleMask.info256>, 1669 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1670 ShuffleMask.info256>, EVEX_V256; 1671 } 1672} 1673 1674multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1675 X86FoldableSchedWrite sched, 1676 AVX512VLVectorVTInfo VTInfo, 1677 AVX512VLVectorVTInfo Idx, 1678 Predicate Prd> { 1679 let Predicates = [Prd] in 1680 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1681 Idx.info512>, EVEX_V512; 1682 let Predicates = [Prd, HasVLX] in { 1683 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1684 Idx.info128>, EVEX_V128; 1685 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1686 Idx.info256>, EVEX_V256; 1687 } 1688} 1689 1690defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1691 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1692defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1693 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1694defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1695 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1696 REX_W, EVEX_CD8<16, CD8VF>; 1697defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1698 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1699 EVEX_CD8<8, CD8VF>; 1700defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1701 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1702defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1703 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1704 1705// Extra patterns to deal with extra bitcasts due to passthru and index being 1706// different types on the fp versions. 1707multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1708 X86VectorVTInfo IdxVT, 1709 X86VectorVTInfo CastVT> { 1710 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1711 (X86VPermt2 (_.VT _.RC:$src2), 1712 (IdxVT.VT (bitconvert 1713 (CastVT.VT _.RC:$src1))), 1714 _.RC:$src3), 1715 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1716 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1717 _.RC:$src2, _.RC:$src3)>; 1718 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1719 (X86VPermt2 _.RC:$src2, 1720 (IdxVT.VT (bitconvert 1721 (CastVT.VT _.RC:$src1))), 1722 (_.LdFrag addr:$src3)), 1723 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1724 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1725 _.RC:$src2, addr:$src3)>; 1726 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1727 (X86VPermt2 _.RC:$src2, 1728 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1729 (_.BroadcastLdFrag addr:$src3)), 1730 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1731 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1732 _.RC:$src2, addr:$src3)>; 1733} 1734 1735// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1736defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>; 1737defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>; 1738defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>; 1739 1740// VPERMT2 1741multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1742 X86FoldableSchedWrite sched, 1743 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1744let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1745 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1746 (ins IdxVT.RC:$src2, _.RC:$src3), 1747 OpcodeStr, "$src3, $src2", "$src2, $src3", 1748 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1749 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1750 1751 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1752 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1753 OpcodeStr, "$src3, $src2", "$src2, $src3", 1754 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1755 (_.LdFrag addr:$src3))), 1>, 1756 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1757 } 1758} 1759multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1760 X86FoldableSchedWrite sched, 1761 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1762 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1763 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1764 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1765 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1766 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1767 (_.VT (X86VPermt2 _.RC:$src1, 1768 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1769 AVX5128IBase, EVEX, VVVV, EVEX_B, 1770 Sched<[sched.Folded, sched.ReadAfterFold]>; 1771} 1772 1773multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1774 X86FoldableSchedWrite sched, 1775 AVX512VLVectorVTInfo VTInfo, 1776 AVX512VLVectorVTInfo ShuffleMask> { 1777 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1778 ShuffleMask.info512>, 1779 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1780 ShuffleMask.info512>, EVEX_V512; 1781 let Predicates = [HasVLX] in { 1782 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1783 ShuffleMask.info128>, 1784 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1785 ShuffleMask.info128>, EVEX_V128; 1786 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1787 ShuffleMask.info256>, 1788 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1789 ShuffleMask.info256>, EVEX_V256; 1790 } 1791} 1792 1793multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1794 X86FoldableSchedWrite sched, 1795 AVX512VLVectorVTInfo VTInfo, 1796 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1797 let Predicates = [Prd] in 1798 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1799 Idx.info512>, EVEX_V512; 1800 let Predicates = [Prd, HasVLX] in { 1801 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1802 Idx.info128>, EVEX_V128; 1803 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1804 Idx.info256>, EVEX_V256; 1805 } 1806} 1807 1808defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1809 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1810defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1811 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1812defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1813 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1814 REX_W, EVEX_CD8<16, CD8VF>; 1815defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1816 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1817 EVEX_CD8<8, CD8VF>; 1818defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1819 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1820defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1821 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1822 1823//===----------------------------------------------------------------------===// 1824// AVX-512 - BLEND using mask 1825// 1826 1827multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1828 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1829 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1830 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1831 (ins _.RC:$src1, _.RC:$src2), 1832 !strconcat(OpcodeStr, 1833 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1834 EVEX, VVVV, Sched<[sched]>; 1835 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1836 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1837 !strconcat(OpcodeStr, 1838 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1839 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 1840 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1841 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1842 !strconcat(OpcodeStr, 1843 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1844 []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>; 1845 let mayLoad = 1 in { 1846 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1847 (ins _.RC:$src1, _.MemOp:$src2), 1848 !strconcat(OpcodeStr, 1849 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1850 []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 1851 Sched<[sched.Folded, sched.ReadAfterFold]>; 1852 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1853 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1854 !strconcat(OpcodeStr, 1855 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1856 []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1857 Sched<[sched.Folded, sched.ReadAfterFold]>; 1858 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1859 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1860 !strconcat(OpcodeStr, 1861 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1862 []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1863 Sched<[sched.Folded, sched.ReadAfterFold]>; 1864 } 1865 } 1866} 1867multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1868 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1869 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1870 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1871 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1872 !strconcat(OpcodeStr, 1873 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1874 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1875 EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1876 Sched<[sched.Folded, sched.ReadAfterFold]>; 1877 1878 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1879 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1880 !strconcat(OpcodeStr, 1881 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1882 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1883 EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1884 Sched<[sched.Folded, sched.ReadAfterFold]>; 1885 1886 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1887 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1888 !strconcat(OpcodeStr, 1889 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1890 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1891 EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1892 Sched<[sched.Folded, sched.ReadAfterFold]>; 1893 } 1894} 1895 1896multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1897 AVX512VLVectorVTInfo VTInfo> { 1898 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1899 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1900 EVEX_V512; 1901 1902 let Predicates = [HasVLX] in { 1903 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1904 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1905 EVEX_V256; 1906 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1907 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1908 EVEX_V128; 1909 } 1910} 1911 1912multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1913 AVX512VLVectorVTInfo VTInfo> { 1914 let Predicates = [HasBWI] in 1915 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1916 EVEX_V512; 1917 1918 let Predicates = [HasBWI, HasVLX] in { 1919 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1920 EVEX_V256; 1921 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1922 EVEX_V128; 1923 } 1924} 1925 1926defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 1927 avx512vl_f32_info>; 1928defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 1929 avx512vl_f64_info>, REX_W; 1930defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 1931 avx512vl_i32_info>; 1932defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 1933 avx512vl_i64_info>, REX_W; 1934defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 1935 avx512vl_i8_info>; 1936defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 1937 avx512vl_i16_info>, REX_W; 1938 1939//===----------------------------------------------------------------------===// 1940// Compare Instructions 1941//===----------------------------------------------------------------------===// 1942 1943// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 1944 1945multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 1946 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 1947 X86FoldableSchedWrite sched> { 1948 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1949 (outs _.KRC:$dst), 1950 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1951 "vcmp"#_.Suffix, 1952 "$cc, $src2, $src1", "$src1, $src2, $cc", 1953 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1954 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 0, "_Int">, 1955 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1956 let mayLoad = 1 in 1957 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1958 (outs _.KRC:$dst), 1959 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 1960 "vcmp"#_.Suffix, 1961 "$cc, $src2, $src1", "$src1, $src2, $cc", 1962 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1963 timm:$cc), 1964 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1965 timm:$cc), 0, "_Int">, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1966 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1967 1968 let Uses = [MXCSR] in 1969 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1970 (outs _.KRC:$dst), 1971 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1972 "vcmp"#_.Suffix, 1973 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 1974 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1975 timm:$cc), 1976 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1977 timm:$cc), 0, "_Int">, 1978 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 1979 1980 let isCodeGenOnly = 1 in { 1981 let isCommutable = 1 in 1982 def rri : AVX512Ii8<0xC2, MRMSrcReg, 1983 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 1984 !strconcat("vcmp", _.Suffix, 1985 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1986 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1987 _.FRC:$src2, 1988 timm:$cc))]>, 1989 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1990 def rmi : AVX512Ii8<0xC2, MRMSrcMem, 1991 (outs _.KRC:$dst), 1992 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 1993 !strconcat("vcmp", _.Suffix, 1994 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1995 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1996 (_.ScalarLdFrag addr:$src2), 1997 timm:$cc))]>, 1998 EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1999 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2000 } 2001} 2002 2003let Predicates = [HasAVX512] in { 2004 let ExeDomain = SSEPackedSingle in 2005 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2006 X86cmpms_su, X86cmpmsSAE_su, 2007 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2008 let ExeDomain = SSEPackedDouble in 2009 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2010 X86cmpms_su, X86cmpmsSAE_su, 2011 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W; 2012} 2013let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2014 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2015 X86cmpms_su, X86cmpmsSAE_su, 2016 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2017 2018multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2019 X86FoldableSchedWrite sched, 2020 X86VectorVTInfo _, bit IsCommutable> { 2021 let isCommutable = IsCommutable, hasSideEffects = 0 in 2022 def rr : AVX512BI<opc, MRMSrcReg, 2023 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2024 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2025 []>, EVEX, VVVV, Sched<[sched]>; 2026 let mayLoad = 1, hasSideEffects = 0 in 2027 def rm : AVX512BI<opc, MRMSrcMem, 2028 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2029 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2030 []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2031 let isCommutable = IsCommutable, hasSideEffects = 0 in 2032 def rrk : AVX512BI<opc, MRMSrcReg, 2033 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2034 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2035 "$dst {${mask}}, $src1, $src2}"), 2036 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 2037 let mayLoad = 1, hasSideEffects = 0 in 2038 def rmk : AVX512BI<opc, MRMSrcMem, 2039 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2040 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2041 "$dst {${mask}}, $src1, $src2}"), 2042 []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2043} 2044 2045multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2046 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2047 bit IsCommutable> : 2048 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2049 let mayLoad = 1, hasSideEffects = 0 in { 2050 def rmb : AVX512BI<opc, MRMSrcMem, 2051 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2052 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2053 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2054 []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2055 def rmbk : AVX512BI<opc, MRMSrcMem, 2056 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2057 _.ScalarMemOp:$src2), 2058 !strconcat(OpcodeStr, 2059 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2060 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2061 []>, EVEX, VVVV, EVEX_K, EVEX_B, 2062 Sched<[sched.Folded, sched.ReadAfterFold]>; 2063 } 2064} 2065 2066multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2067 X86SchedWriteWidths sched, 2068 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2069 bit IsCommutable = 0> { 2070 let Predicates = [prd] in 2071 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2072 VTInfo.info512, IsCommutable>, EVEX_V512; 2073 2074 let Predicates = [prd, HasVLX] in { 2075 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2076 VTInfo.info256, IsCommutable>, EVEX_V256; 2077 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2078 VTInfo.info128, IsCommutable>, EVEX_V128; 2079 } 2080} 2081 2082multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2083 X86SchedWriteWidths sched, 2084 AVX512VLVectorVTInfo VTInfo, 2085 Predicate prd, bit IsCommutable = 0> { 2086 let Predicates = [prd] in 2087 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2088 VTInfo.info512, IsCommutable>, EVEX_V512; 2089 2090 let Predicates = [prd, HasVLX] in { 2091 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2092 VTInfo.info256, IsCommutable>, EVEX_V256; 2093 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2094 VTInfo.info128, IsCommutable>, EVEX_V128; 2095 } 2096} 2097 2098// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2099// increase the pattern complexity the way an immediate would. 2100let AddedComplexity = 2 in { 2101// FIXME: Is there a better scheduler class for VPCMP? 2102defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2103 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2104 EVEX_CD8<8, CD8VF>, WIG; 2105 2106defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2107 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2108 EVEX_CD8<16, CD8VF>, WIG; 2109 2110defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2111 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2112 EVEX_CD8<32, CD8VF>; 2113 2114defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2115 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2116 T8, REX_W, EVEX_CD8<64, CD8VF>; 2117 2118defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2119 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2120 EVEX_CD8<8, CD8VF>, WIG; 2121 2122defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2123 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2124 EVEX_CD8<16, CD8VF>, WIG; 2125 2126defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2127 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2128 EVEX_CD8<32, CD8VF>; 2129 2130defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2131 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2132 T8, REX_W, EVEX_CD8<64, CD8VF>; 2133} 2134 2135multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2136 PatFrag Frag_su, 2137 X86FoldableSchedWrite sched, 2138 X86VectorVTInfo _, string Name> { 2139 let isCommutable = 1 in 2140 def rri : AVX512AIi8<opc, MRMSrcReg, 2141 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2142 !strconcat("vpcmp", Suffix, 2143 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2144 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2145 (_.VT _.RC:$src2), 2146 cond)))]>, 2147 EVEX, VVVV, Sched<[sched]>; 2148 def rmi : AVX512AIi8<opc, MRMSrcMem, 2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2150 !strconcat("vpcmp", Suffix, 2151 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2152 [(set _.KRC:$dst, (_.KVT 2153 (Frag:$cc 2154 (_.VT _.RC:$src1), 2155 (_.VT (_.LdFrag addr:$src2)), 2156 cond)))]>, 2157 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2158 let isCommutable = 1 in 2159 def rrik : AVX512AIi8<opc, MRMSrcReg, 2160 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2161 u8imm:$cc), 2162 !strconcat("vpcmp", Suffix, 2163 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2164 "$dst {${mask}}, $src1, $src2, $cc}"), 2165 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2166 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2167 (_.VT _.RC:$src2), 2168 cond))))]>, 2169 EVEX, VVVV, EVEX_K, Sched<[sched]>; 2170 def rmik : AVX512AIi8<opc, MRMSrcMem, 2171 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2172 u8imm:$cc), 2173 !strconcat("vpcmp", Suffix, 2174 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2175 "$dst {${mask}}, $src1, $src2, $cc}"), 2176 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2177 (_.KVT 2178 (Frag_su:$cc 2179 (_.VT _.RC:$src1), 2180 (_.VT (_.LdFrag addr:$src2)), 2181 cond))))]>, 2182 EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2183 2184 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2185 (_.VT _.RC:$src1), cond)), 2186 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2187 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2188 2189 def : Pat<(and _.KRCWM:$mask, 2190 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2191 (_.VT _.RC:$src1), cond))), 2192 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2193 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2194 (X86pcmpm_imm_commute $cc))>; 2195} 2196 2197multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2198 PatFrag Frag_su, X86FoldableSchedWrite sched, 2199 X86VectorVTInfo _, string Name> : 2200 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2201 def rmbi : AVX512AIi8<opc, MRMSrcMem, 2202 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2203 u8imm:$cc), 2204 !strconcat("vpcmp", Suffix, 2205 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2206 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2207 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2208 (_.VT _.RC:$src1), 2209 (_.BroadcastLdFrag addr:$src2), 2210 cond)))]>, 2211 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2212 def rmbik : AVX512AIi8<opc, MRMSrcMem, 2213 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2214 _.ScalarMemOp:$src2, u8imm:$cc), 2215 !strconcat("vpcmp", Suffix, 2216 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2217 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2218 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2219 (_.KVT (Frag_su:$cc 2220 (_.VT _.RC:$src1), 2221 (_.BroadcastLdFrag addr:$src2), 2222 cond))))]>, 2223 EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2224 2225 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2226 (_.VT _.RC:$src1), cond)), 2227 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") 2228 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2229 2230 def : Pat<(and _.KRCWM:$mask, 2231 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2232 (_.VT _.RC:$src1), cond))), 2233 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") 2234 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2235 (X86pcmpm_imm_commute $cc))>; 2236} 2237 2238multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2239 PatFrag Frag_su, X86SchedWriteWidths sched, 2240 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2241 let Predicates = [prd] in 2242 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2243 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2244 2245 let Predicates = [prd, HasVLX] in { 2246 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2247 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2248 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2249 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2250 } 2251} 2252 2253multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2254 PatFrag Frag_su, X86SchedWriteWidths sched, 2255 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2256 let Predicates = [prd] in 2257 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2258 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2259 2260 let Predicates = [prd, HasVLX] in { 2261 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2262 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2263 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2264 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2265 } 2266} 2267 2268// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2269defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2270 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2271 EVEX_CD8<8, CD8VF>; 2272defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2273 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2274 EVEX_CD8<8, CD8VF>; 2275 2276defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2277 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2278 REX_W, EVEX_CD8<16, CD8VF>; 2279defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2280 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2281 REX_W, EVEX_CD8<16, CD8VF>; 2282 2283defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2284 SchedWriteVecALU, avx512vl_i32_info, 2285 HasAVX512>, EVEX_CD8<32, CD8VF>; 2286defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2287 SchedWriteVecALU, avx512vl_i32_info, 2288 HasAVX512>, EVEX_CD8<32, CD8VF>; 2289 2290defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2291 SchedWriteVecALU, avx512vl_i64_info, 2292 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2293defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2294 SchedWriteVecALU, avx512vl_i64_info, 2295 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2296 2297multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2298 string Name> { 2299let Uses = [MXCSR], mayRaiseFPException = 1 in { 2300 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2301 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2302 "vcmp"#_.Suffix, 2303 "$cc, $src2, $src1", "$src1, $src2, $cc", 2304 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2305 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2306 1>, Sched<[sched]>; 2307 2308 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2309 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2310 "vcmp"#_.Suffix, 2311 "$cc, $src2, $src1", "$src1, $src2, $cc", 2312 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2313 timm:$cc), 2314 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2315 timm:$cc)>, 2316 Sched<[sched.Folded, sched.ReadAfterFold]>; 2317 2318 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2319 (outs _.KRC:$dst), 2320 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2321 "vcmp"#_.Suffix, 2322 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2323 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2324 (X86any_cmpm (_.VT _.RC:$src1), 2325 (_.VT (_.BroadcastLdFrag addr:$src2)), 2326 timm:$cc), 2327 (X86cmpm_su (_.VT _.RC:$src1), 2328 (_.VT (_.BroadcastLdFrag addr:$src2)), 2329 timm:$cc)>, 2330 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2331 } 2332 2333 // Patterns for selecting with loads in other operand. 2334 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2335 timm:$cc), 2336 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2337 (X86cmpm_imm_commute timm:$cc))>; 2338 2339 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2340 (_.VT _.RC:$src1), 2341 timm:$cc)), 2342 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2343 _.RC:$src1, addr:$src2, 2344 (X86cmpm_imm_commute timm:$cc))>; 2345 2346 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2347 (_.VT _.RC:$src1), timm:$cc), 2348 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2349 (X86cmpm_imm_commute timm:$cc))>; 2350 2351 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2352 (_.VT _.RC:$src1), 2353 timm:$cc)), 2354 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2355 _.RC:$src1, addr:$src2, 2356 (X86cmpm_imm_commute timm:$cc))>; 2357 2358 // Patterns for mask intrinsics. 2359 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2360 (_.KVT immAllOnesV)), 2361 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2362 2363 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2364 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2365 _.RC:$src2, timm:$cc)>; 2366 2367 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2368 (_.KVT immAllOnesV)), 2369 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2370 2371 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2372 _.KRCWM:$mask), 2373 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2374 addr:$src2, timm:$cc)>; 2375 2376 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2377 (_.KVT immAllOnesV)), 2378 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2379 2380 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2381 _.KRCWM:$mask), 2382 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2383 addr:$src2, timm:$cc)>; 2384 2385 // Patterns for mask intrinsics with loads in other operand. 2386 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2387 (_.KVT immAllOnesV)), 2388 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2389 (X86cmpm_imm_commute timm:$cc))>; 2390 2391 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2392 _.KRCWM:$mask), 2393 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2394 _.RC:$src1, addr:$src2, 2395 (X86cmpm_imm_commute timm:$cc))>; 2396 2397 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2398 (_.KVT immAllOnesV)), 2399 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2400 (X86cmpm_imm_commute timm:$cc))>; 2401 2402 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2403 _.KRCWM:$mask), 2404 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2405 _.RC:$src1, addr:$src2, 2406 (X86cmpm_imm_commute timm:$cc))>; 2407} 2408 2409multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2410 // comparison code form (VCMP[EQ/LT/LE/...] 2411 let Uses = [MXCSR] in 2412 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2413 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2414 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2415 "vcmp"#_.Suffix, 2416 "$cc, {sae}, $src2, $src1", 2417 "$src1, $src2, {sae}, $cc", 2418 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2419 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2420 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2421 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2422 EVEX_B, Sched<[sched]>; 2423} 2424 2425multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2426 Predicate Pred = HasAVX512> { 2427 let Predicates = [Pred] in { 2428 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2429 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2430 2431 } 2432 let Predicates = [Pred,HasVLX] in { 2433 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2434 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2435 } 2436} 2437 2438defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2439 AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 2440defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2441 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 2442defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2443 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA; 2444 2445// Patterns to select fp compares with load as first operand. 2446let Predicates = [HasAVX512] in { 2447 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2448 (VCMPSDZrmi FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2449 2450 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2451 (VCMPSSZrmi FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2452} 2453 2454let Predicates = [HasFP16] in { 2455 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2456 (VCMPSHZrmi FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2457} 2458 2459// ---------------------------------------------------------------- 2460// FPClass 2461 2462//handle fpclass instruction mask = op(reg_scalar,imm) 2463// op(mem_scalar,imm) 2464multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2465 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2466 Predicate prd> { 2467 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2468 def ri : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2469 (ins _.RC:$src1, i32u8imm:$src2), 2470 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2471 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2472 (i32 timm:$src2)))]>, 2473 Sched<[sched]>; 2474 def rik : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2475 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2476 OpcodeStr#_.Suffix# 2477 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2478 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2479 (X86Vfpclasss_su (_.VT _.RC:$src1), 2480 (i32 timm:$src2))))]>, 2481 EVEX_K, Sched<[sched]>; 2482 def mi : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2483 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2484 OpcodeStr#_.Suffix# 2485 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2486 [(set _.KRC:$dst, 2487 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2488 (i32 timm:$src2)))]>, 2489 Sched<[sched.Folded, sched.ReadAfterFold]>; 2490 def mik : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2491 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2492 OpcodeStr#_.Suffix# 2493 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2494 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2495 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2496 (i32 timm:$src2))))]>, 2497 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2498 } 2499} 2500 2501//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2502// fpclass(reg_vec, mem_vec, imm) 2503// fpclass(reg_vec, broadcast(eltVt), imm) 2504multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2505 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2506 string mem, list<Register> _Uses = [MXCSR]>{ 2507 let ExeDomain = _.ExeDomain, Uses = _Uses in { 2508 def ri : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2509 (ins _.RC:$src1, i32u8imm:$src2), 2510 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2511 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2512 (i32 timm:$src2)))]>, 2513 Sched<[sched]>; 2514 def rik : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2515 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2516 OpcodeStr#_.Suffix# 2517 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2518 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2519 (X86Vfpclass_su (_.VT _.RC:$src1), 2520 (i32 timm:$src2))))]>, 2521 EVEX_K, Sched<[sched]>; 2522 def mi : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2523 (ins _.MemOp:$src1, i32u8imm:$src2), 2524 OpcodeStr#_.Suffix#"{"#mem#"}"# 2525 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2526 [(set _.KRC:$dst,(X86Vfpclass 2527 (_.VT (_.LdFrag addr:$src1)), 2528 (i32 timm:$src2)))]>, 2529 Sched<[sched.Folded, sched.ReadAfterFold]>; 2530 def mik : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2531 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2532 OpcodeStr#_.Suffix#"{"#mem#"}"# 2533 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2534 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2535 (_.VT (_.LdFrag addr:$src1)), 2536 (i32 timm:$src2))))]>, 2537 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2538 def mbi : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2539 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2540 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2541 _.BroadcastStr#", $dst|$dst, ${src1}" 2542 #_.BroadcastStr#", $src2}", 2543 [(set _.KRC:$dst,(X86Vfpclass 2544 (_.VT (_.BroadcastLdFrag addr:$src1)), 2545 (i32 timm:$src2)))]>, 2546 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2547 def mbik : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2548 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2549 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2550 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2551 _.BroadcastStr#", $src2}", 2552 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2553 (_.VT (_.BroadcastLdFrag addr:$src1)), 2554 (i32 timm:$src2))))]>, 2555 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2556 } 2557 2558 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2559 // the memory form. 2560 def : InstAlias<OpcodeStr#_.Suffix#mem# 2561 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2562 (!cast<Instruction>(NAME#"ri") 2563 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2564 def : InstAlias<OpcodeStr#_.Suffix#mem# 2565 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2566 (!cast<Instruction>(NAME#"rik") 2567 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2568 def : InstAlias<OpcodeStr#_.Suffix#mem# 2569 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2570 _.BroadcastStr#", $src2}", 2571 (!cast<Instruction>(NAME#"mbi") 2572 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2573 def : InstAlias<OpcodeStr#_.Suffix#mem# 2574 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2575 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2576 (!cast<Instruction>(NAME#"mbik") 2577 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2578} 2579 2580multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2581 bits<8> opc, X86SchedWriteWidths sched, 2582 Predicate prd>{ 2583 let Predicates = [prd] in { 2584 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2585 _.info512, "z">, EVEX_V512; 2586 } 2587 let Predicates = [prd, HasVLX] in { 2588 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2589 _.info128, "x">, EVEX_V128; 2590 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2591 _.info256, "y">, EVEX_V256; 2592 } 2593} 2594 2595multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2596 bits<8> opcScalar, X86SchedWriteWidths sched> { 2597 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2598 sched, HasFP16>, 2599 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2600 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2601 sched.Scl, f16x_info, HasFP16>, 2602 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2603 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2604 sched, HasDQI>, 2605 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2606 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2607 sched, HasDQI>, 2608 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W; 2609 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2610 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2611 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2612 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2613 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2614 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W; 2615} 2616 2617defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2618 2619//----------------------------------------------------------------- 2620// Mask register copy, including 2621// - copy between mask registers 2622// - load/store mask registers 2623// - copy from GPR to mask register and vice versa 2624// 2625multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2626 string OpcodeStr, RegisterClass KRC, ValueType vvt, 2627 X86MemOperand x86memop, string Suffix = ""> { 2628 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove], 2629 explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in 2630 def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2631 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2632 Sched<[WriteMove]>; 2633 def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2634 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2635 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2636 Sched<[WriteLoad]>, NoCD8; 2637 def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2638 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2639 [(store KRC:$src, addr:$dst)]>, 2640 Sched<[WriteStore]>, NoCD8; 2641} 2642 2643multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2644 string OpcodeStr, RegisterClass KRC, 2645 RegisterClass GRC, string Suffix = ""> { 2646 let hasSideEffects = 0 in { 2647 def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2648 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2649 Sched<[WriteMove]>; 2650 def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2651 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2652 Sched<[WriteMove]>; 2653 } 2654} 2655 2656let Predicates = [HasDQI, NoEGPR] in 2657 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2658 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2659 VEX, TB, PD; 2660let Predicates = [HasDQI, HasEGPR, In64BitMode] in 2661 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">, 2662 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">, 2663 EVEX, TB, PD; 2664 2665let Predicates = [HasAVX512, NoEGPR] in 2666 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2667 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2668 VEX, TB; 2669let Predicates = [HasAVX512, HasEGPR, In64BitMode] in 2670 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">, 2671 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">, 2672 EVEX, TB; 2673 2674let Predicates = [HasBWI, NoEGPR] in { 2675 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2676 VEX, TB, PD, REX_W; 2677 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2678 VEX, TB, XD; 2679 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2680 VEX, TB, REX_W; 2681 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2682 VEX, TB, XD, REX_W; 2683} 2684let Predicates = [HasBWI, HasEGPR, In64BitMode] in { 2685 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">, 2686 EVEX, TB, PD, REX_W; 2687 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">, 2688 EVEX, TB, XD; 2689 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">, 2690 EVEX, TB, REX_W; 2691 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">, 2692 EVEX, TB, XD, REX_W; 2693} 2694 2695// GR from/to mask register 2696def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2697 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2698def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2699 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2700def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2701 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2702 2703def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2704 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2705def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2706 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2707 2708def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2709 (KMOVWrk VK16:$src)>; 2710def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2711 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2712def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2713 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2714def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2715 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2716 2717def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2718 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2719def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2720 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2721def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2722 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2723def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2724 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2725 2726def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2727 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2728def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2729 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2730def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2731 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2732def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2733 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2734 2735// Load/store kreg 2736let Predicates = [HasDQI] in { 2737 def : Pat<(v1i1 (load addr:$src)), 2738 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2739 def : Pat<(v2i1 (load addr:$src)), 2740 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2741 def : Pat<(v4i1 (load addr:$src)), 2742 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2743} 2744 2745let Predicates = [HasAVX512] in { 2746 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2747 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2748 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2749 (KMOVWkm addr:$src)>; 2750} 2751 2752def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2753 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2754 SDTCVecEltisVT<1, i1>, 2755 SDTCisPtrTy<2>]>>; 2756 2757let Predicates = [HasAVX512] in { 2758 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2759 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2760 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2761 2762 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2763 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2764 2765 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2766 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2767 2768 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2769 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2770 } 2771 2772 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2773 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2774 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2775 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2776 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2777 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2778 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2779 2780 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2781 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2782 (KMOVWkr (AND32ri 2783 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2784 (i32 1)))>; 2785} 2786 2787// Mask unary operation 2788// - KNOT 2789multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2790 RegisterClass KRC, SDPatternOperator OpNode, 2791 X86FoldableSchedWrite sched, Predicate prd> { 2792 let Predicates = [prd] in 2793 def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2794 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2795 [(set KRC:$dst, (OpNode KRC:$src))]>, 2796 Sched<[sched]>; 2797} 2798 2799multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2800 SDPatternOperator OpNode, 2801 X86FoldableSchedWrite sched> { 2802 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2803 sched, HasDQI>, VEX, TB, PD; 2804 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2805 sched, HasAVX512>, VEX, TB; 2806 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2807 sched, HasBWI>, VEX, TB, PD, REX_W; 2808 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2809 sched, HasBWI>, VEX, TB, REX_W; 2810} 2811 2812// TODO - do we need a X86SchedWriteWidths::KMASK type? 2813defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2814 2815// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2816let Predicates = [HasAVX512, NoDQI] in 2817def : Pat<(vnot VK8:$src), 2818 (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2819 2820def : Pat<(vnot VK4:$src), 2821 (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2822def : Pat<(vnot VK2:$src), 2823 (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2824def : Pat<(vnot VK1:$src), 2825 (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 2826 2827// Mask binary operation 2828// - KAND, KANDN, KOR, KXNOR, KXOR 2829multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2830 RegisterClass KRC, SDPatternOperator OpNode, 2831 X86FoldableSchedWrite sched, Predicate prd, 2832 bit IsCommutable> { 2833 let Predicates = [prd], isCommutable = IsCommutable in 2834 def kk : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2835 !strconcat(OpcodeStr, 2836 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2837 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2838 Sched<[sched]>; 2839} 2840 2841multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2842 SDPatternOperator OpNode, 2843 X86FoldableSchedWrite sched, bit IsCommutable, 2844 Predicate prdW = HasAVX512> { 2845 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2846 sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD; 2847 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2848 sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB; 2849 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2850 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD; 2851 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2852 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB; 2853} 2854 2855// TODO - do we need a X86SchedWriteWidths::KMASK type? 2856defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 2857defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 2858defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 2859defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 2860defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 2861defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 2862 2863multiclass avx512_binop_pat<SDPatternOperator VOpNode, 2864 Instruction Inst> { 2865 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 2866 // for the DQI set, this type is legal and KxxxB instruction is used 2867 let Predicates = [NoDQI] in 2868 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 2869 (COPY_TO_REGCLASS 2870 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 2871 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 2872 2873 // All types smaller than 8 bits require conversion anyway 2874 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 2875 (COPY_TO_REGCLASS (Inst 2876 (COPY_TO_REGCLASS VK1:$src1, VK16), 2877 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 2878 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 2879 (COPY_TO_REGCLASS (Inst 2880 (COPY_TO_REGCLASS VK2:$src1, VK16), 2881 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 2882 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 2883 (COPY_TO_REGCLASS (Inst 2884 (COPY_TO_REGCLASS VK4:$src1, VK16), 2885 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 2886} 2887 2888defm : avx512_binop_pat<and, KANDWkk>; 2889defm : avx512_binop_pat<vandn, KANDNWkk>; 2890defm : avx512_binop_pat<or, KORWkk>; 2891defm : avx512_binop_pat<vxnor, KXNORWkk>; 2892defm : avx512_binop_pat<xor, KXORWkk>; 2893 2894// Mask unpacking 2895multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 2896 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 2897 Predicate prd> { 2898 let Predicates = [prd] in { 2899 let hasSideEffects = 0 in 2900 def kk : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 2901 (ins Src.KRC:$src1, Src.KRC:$src2), 2902 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 2903 VEX, VVVV, VEX_L, Sched<[sched]>; 2904 2905 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 2906 (!cast<Instruction>(NAME#kk) Src.KRC:$src2, Src.KRC:$src1)>; 2907 } 2908} 2909 2910defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD; 2911defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB; 2912defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W; 2913 2914// Mask bit testing 2915multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2916 SDNode OpNode, X86FoldableSchedWrite sched, 2917 Predicate prd> { 2918 let Predicates = [prd], Defs = [EFLAGS] in 2919 def kk : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 2920 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2921 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 2922 Sched<[sched]>; 2923} 2924 2925multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 2926 X86FoldableSchedWrite sched, 2927 Predicate prdW = HasAVX512> { 2928 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 2929 VEX, TB, PD; 2930 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 2931 VEX, TB; 2932 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 2933 VEX, TB, REX_W; 2934 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 2935 VEX, TB, PD, REX_W; 2936} 2937 2938// TODO - do we need a X86SchedWriteWidths::KMASK type? 2939defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 2940defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 2941 2942// Mask shift 2943multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2944 SDNode OpNode, X86FoldableSchedWrite sched> { 2945 let Predicates = [HasAVX512] in 2946 def ki : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 2947 !strconcat(OpcodeStr, 2948 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 2949 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 2950 Sched<[sched]>; 2951} 2952 2953multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 2954 SDNode OpNode, X86FoldableSchedWrite sched> { 2955 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2956 sched>, VEX, TA, PD, REX_W; 2957 let Predicates = [HasDQI] in 2958 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2959 sched>, VEX, TA, PD; 2960 let Predicates = [HasBWI] in { 2961 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2962 sched>, VEX, TA, PD, REX_W; 2963 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2964 sched>, VEX, TA, PD; 2965 } 2966} 2967 2968defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 2969defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 2970 2971// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 2972multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2973 string InstStr, 2974 X86VectorVTInfo Narrow, 2975 X86VectorVTInfo Wide> { 2976def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 2977 (Narrow.VT Narrow.RC:$src2), cond)), 2978 (COPY_TO_REGCLASS 2979 (!cast<Instruction>(InstStr#"Zrri") 2980 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2981 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2982 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2983 2984def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 2985 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 2986 (Narrow.VT Narrow.RC:$src2), 2987 cond)))), 2988 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 2989 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 2990 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2991 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2992 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2993} 2994 2995multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2996 string InstStr, 2997 X86VectorVTInfo Narrow, 2998 X86VectorVTInfo Wide> { 2999// Broadcast load. 3000def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3001 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3002 (COPY_TO_REGCLASS 3003 (!cast<Instruction>(InstStr#"Zrmbi") 3004 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3005 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3006 3007def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3008 (Narrow.KVT 3009 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3010 (Narrow.BroadcastLdFrag addr:$src2), 3011 cond)))), 3012 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3013 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3014 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3015 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3016 3017// Commuted with broadcast load. 3018def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3019 (Narrow.VT Narrow.RC:$src1), 3020 cond)), 3021 (COPY_TO_REGCLASS 3022 (!cast<Instruction>(InstStr#"Zrmbi") 3023 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3024 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3025 3026def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3027 (Narrow.KVT 3028 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3029 (Narrow.VT Narrow.RC:$src1), 3030 cond)))), 3031 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3032 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3033 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3034 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3035} 3036 3037// Same as above, but for fp types which don't use PatFrags. 3038multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3039 X86VectorVTInfo Narrow, 3040 X86VectorVTInfo Wide> { 3041def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3042 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3043 (COPY_TO_REGCLASS 3044 (!cast<Instruction>(InstStr#"Zrri") 3045 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3046 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3047 timm:$cc), Narrow.KRC)>; 3048 3049def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3050 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3051 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3052 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3053 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3054 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3055 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3056 timm:$cc), Narrow.KRC)>; 3057 3058// Broadcast load. 3059def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3060 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3061 (COPY_TO_REGCLASS 3062 (!cast<Instruction>(InstStr#"Zrmbi") 3063 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3064 addr:$src2, timm:$cc), Narrow.KRC)>; 3065 3066def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3067 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3068 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3069 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3070 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3071 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3072 addr:$src2, timm:$cc), Narrow.KRC)>; 3073 3074// Commuted with broadcast load. 3075def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3076 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3077 (COPY_TO_REGCLASS 3078 (!cast<Instruction>(InstStr#"Zrmbi") 3079 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3080 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3081 3082def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3083 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3084 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3085 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3086 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3087 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3088 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3089} 3090 3091let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 3092 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3093 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3094 3095 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3096 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3097 3098 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3099 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3100 3101 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3102 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3103 3104 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3105 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3106 3107 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3108 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3109 3110 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3111 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3112 3113 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3114 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3115 3116 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3117 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3118 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3119 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3120} 3121 3122let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 3123 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3124 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3125 3126 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3127 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3128 3129 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3130 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3131 3132 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3133 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3134} 3135 3136// Mask setting all 0s or 1s 3137multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3138 let Predicates = [HasAVX512] in 3139 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3140 SchedRW = [WriteZero] in 3141 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3142 [(set KRC:$dst, (VT Val))]>; 3143} 3144 3145multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3146 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3147 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3148 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3149} 3150 3151defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3152defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3153 3154// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3155let Predicates = [HasAVX512] in { 3156 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3157 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3158 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3159 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3160 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3161 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3162 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3163 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3164} 3165 3166// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3167multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3168 RegisterClass RC, ValueType VT> { 3169 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3170 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3171 3172 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3173 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3174} 3175defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3176defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3177defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3178defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3179defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3180defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3181 3182defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3183defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3184defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3185defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3186defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3187 3188defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3189defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3190defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3191defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3192 3193defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3194defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3195defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3196 3197defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3198defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3199 3200defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3201 3202//===----------------------------------------------------------------------===// 3203// AVX-512 - Aligned and unaligned load and store 3204// 3205 3206multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3207 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3208 X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, 3209 SDPatternOperator SelectOprr = vselect> { 3210 let hasSideEffects = 0 in { 3211 let isMoveReg = 1 in 3212 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3213 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3214 _.ExeDomain>, EVEX, Sched<[Sched.RR]>; 3215 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3216 (ins _.KRCWM:$mask, _.RC:$src), 3217 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3218 "${dst} {${mask}} {z}, $src}"), 3219 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3220 (_.VT _.RC:$src), 3221 _.ImmAllZerosV)))], _.ExeDomain>, 3222 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3223 3224 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3225 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3226 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3227 !if(NoRMPattern, [], 3228 [(set _.RC:$dst, 3229 (_.VT (ld_frag addr:$src)))]), 3230 _.ExeDomain>, EVEX, Sched<[Sched.RM]>; 3231 3232 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3233 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3234 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3235 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3236 "${dst} {${mask}}, $src1}"), 3237 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3238 (_.VT _.RC:$src1), 3239 (_.VT _.RC:$src0))))], _.ExeDomain>, 3240 EVEX, EVEX_K, Sched<[Sched.RR]>; 3241 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3242 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3243 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3244 "${dst} {${mask}}, $src1}"), 3245 [(set _.RC:$dst, (_.VT 3246 (vselect_mask _.KRCWM:$mask, 3247 (_.VT (ld_frag addr:$src1)), 3248 (_.VT _.RC:$src0))))], _.ExeDomain>, 3249 EVEX, EVEX_K, Sched<[Sched.RM]>; 3250 } 3251 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3252 (ins _.KRCWM:$mask, _.MemOp:$src), 3253 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3254 "${dst} {${mask}} {z}, $src}", 3255 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3256 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3257 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3258 } 3259 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3260 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3261 3262 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3263 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3264 3265 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3266 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3267 _.KRCWM:$mask, addr:$ptr)>; 3268} 3269 3270multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3271 AVX512VLVectorVTInfo _, Predicate prd, 3272 X86SchedWriteMoveLSWidths Sched, 3273 bit NoRMPattern = 0> { 3274 let Predicates = [prd] in 3275 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3276 _.info512.AlignedLdFrag, masked_load_aligned, 3277 Sched.ZMM, NoRMPattern>, EVEX_V512; 3278 3279 let Predicates = [prd, HasVLX] in { 3280 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3281 _.info256.AlignedLdFrag, masked_load_aligned, 3282 Sched.YMM, NoRMPattern>, EVEX_V256; 3283 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3284 _.info128.AlignedLdFrag, masked_load_aligned, 3285 Sched.XMM, NoRMPattern>, EVEX_V128; 3286 } 3287} 3288 3289multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3290 AVX512VLVectorVTInfo _, Predicate prd, 3291 X86SchedWriteMoveLSWidths Sched, 3292 bit NoRMPattern = 0, 3293 SDPatternOperator SelectOprr = vselect> { 3294 let Predicates = [prd] in 3295 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3296 masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512; 3297 3298 let Predicates = [prd, HasVLX] in { 3299 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3300 masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256; 3301 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3302 masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128; 3303 } 3304} 3305 3306multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3307 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3308 X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { 3309 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3310 let isMoveReg = 1 in 3311 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3312 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3313 [], _.ExeDomain>, EVEX, 3314 Sched<[Sched.RR]>; 3315 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3316 (ins _.KRCWM:$mask, _.RC:$src), 3317 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3318 "${dst} {${mask}}, $src}", 3319 [], _.ExeDomain>, EVEX, EVEX_K, 3320 Sched<[Sched.RR]>; 3321 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3322 (ins _.KRCWM:$mask, _.RC:$src), 3323 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3324 "${dst} {${mask}} {z}, $src}", 3325 [], _.ExeDomain>, EVEX, EVEX_KZ, 3326 Sched<[Sched.RR]>; 3327 } 3328 3329 let hasSideEffects = 0, mayStore = 1 in 3330 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3331 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3332 !if(NoMRPattern, [], 3333 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3334 _.ExeDomain>, EVEX, Sched<[Sched.MR]>; 3335 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3336 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3337 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3338 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>; 3339 3340 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3341 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3342 _.KRCWM:$mask, _.RC:$src)>; 3343 3344 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3345 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3346 _.RC:$dst, _.RC:$src), 0>; 3347 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3348 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3349 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3350 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3351 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3352 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3353} 3354 3355multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3356 AVX512VLVectorVTInfo _, Predicate prd, 3357 X86SchedWriteMoveLSWidths Sched, 3358 bit NoMRPattern = 0> { 3359 let Predicates = [prd] in 3360 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3361 masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512; 3362 let Predicates = [prd, HasVLX] in { 3363 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3364 masked_store, Sched.YMM, NoMRPattern>, EVEX_V256; 3365 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3366 masked_store, Sched.XMM, NoMRPattern>, EVEX_V128; 3367 } 3368} 3369 3370multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3371 AVX512VLVectorVTInfo _, Predicate prd, 3372 X86SchedWriteMoveLSWidths Sched, 3373 bit NoMRPattern = 0> { 3374 let Predicates = [prd] in 3375 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3376 masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512; 3377 3378 let Predicates = [prd, HasVLX] in { 3379 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3380 masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256; 3381 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3382 masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128; 3383 } 3384} 3385 3386defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3387 HasAVX512, SchedWriteFMoveLS>, 3388 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3389 HasAVX512, SchedWriteFMoveLS>, 3390 TB, EVEX_CD8<32, CD8VF>; 3391 3392defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3393 HasAVX512, SchedWriteFMoveLS>, 3394 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3395 HasAVX512, SchedWriteFMoveLS>, 3396 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3397 3398defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3399 SchedWriteFMoveLS, 0, null_frag>, 3400 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3401 SchedWriteFMoveLS>, 3402 TB, EVEX_CD8<32, CD8VF>; 3403 3404defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3405 SchedWriteFMoveLS, 0, null_frag>, 3406 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3407 SchedWriteFMoveLS>, 3408 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3409 3410defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3411 HasAVX512, SchedWriteVecMoveLS, 1>, 3412 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3413 HasAVX512, SchedWriteVecMoveLS, 1>, 3414 TB, PD, EVEX_CD8<32, CD8VF>; 3415 3416defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3417 HasAVX512, SchedWriteVecMoveLS>, 3418 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3419 HasAVX512, SchedWriteVecMoveLS>, 3420 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3421 3422defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3423 SchedWriteVecMoveLS, 1>, 3424 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3425 SchedWriteVecMoveLS, 1>, 3426 TB, XD, EVEX_CD8<8, CD8VF>; 3427 3428defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3429 SchedWriteVecMoveLS, 1>, 3430 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3431 SchedWriteVecMoveLS, 1>, 3432 TB, XD, REX_W, EVEX_CD8<16, CD8VF>; 3433 3434defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3435 SchedWriteVecMoveLS, 1, null_frag>, 3436 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3437 SchedWriteVecMoveLS, 1>, 3438 TB, XS, EVEX_CD8<32, CD8VF>; 3439 3440defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3441 SchedWriteVecMoveLS, 0, null_frag>, 3442 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3443 SchedWriteVecMoveLS>, 3444 TB, XS, REX_W, EVEX_CD8<64, CD8VF>; 3445 3446// Special instructions to help with spilling when we don't have VLX. We need 3447// to load or store from a ZMM register instead. These are converted in 3448// expandPostRAPseudos. 3449let isReMaterializable = 1, canFoldAsLoad = 1, 3450 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3451def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3452 "", []>, Sched<[WriteFLoadX]>; 3453def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3454 "", []>, Sched<[WriteFLoadY]>; 3455def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3456 "", []>, Sched<[WriteFLoadX]>; 3457def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3458 "", []>, Sched<[WriteFLoadY]>; 3459} 3460 3461let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3462def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3463 "", []>, Sched<[WriteFStoreX]>; 3464def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3465 "", []>, Sched<[WriteFStoreY]>; 3466def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3467 "", []>, Sched<[WriteFStoreX]>; 3468def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3469 "", []>, Sched<[WriteFStoreY]>; 3470} 3471 3472def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3473 (v8i64 VR512:$src))), 3474 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWkk (COPY_TO_REGCLASS VK8:$mask, VK16)), 3475 VK8), VR512:$src)>; 3476 3477def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3478 (v16i32 VR512:$src))), 3479 (VMOVDQA32Zrrkz (KNOTWkk VK16WM:$mask), VR512:$src)>; 3480 3481// These patterns exist to prevent the above patterns from introducing a second 3482// mask inversion when one already exists. 3483def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3484 (v8i64 immAllZerosV), 3485 (v8i64 VR512:$src))), 3486 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3487def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3488 (v16i32 immAllZerosV), 3489 (v16i32 VR512:$src))), 3490 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3491 3492multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3493 X86VectorVTInfo Wide> { 3494 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3495 Narrow.RC:$src1, Narrow.RC:$src0)), 3496 (EXTRACT_SUBREG 3497 (Wide.VT 3498 (!cast<Instruction>(InstrStr#"rrk") 3499 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3500 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3501 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3502 Narrow.SubRegIdx)>; 3503 3504 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3505 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3506 (EXTRACT_SUBREG 3507 (Wide.VT 3508 (!cast<Instruction>(InstrStr#"rrkz") 3509 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3510 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3511 Narrow.SubRegIdx)>; 3512} 3513 3514// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3515// available. Use a 512-bit operation and extract. 3516let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 3517 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3518 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3519 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3520 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3521 3522 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3523 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3524 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3525 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3526} 3527 3528let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 3529 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3530 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3531 3532 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3533 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3534 3535 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; 3536 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; 3537 3538 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>; 3539 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>; 3540} 3541 3542let Predicates = [HasAVX512] in { 3543 // 512-bit load. 3544 def : Pat<(alignedloadv16i32 addr:$src), 3545 (VMOVDQA64Zrm addr:$src)>; 3546 def : Pat<(alignedloadv32i16 addr:$src), 3547 (VMOVDQA64Zrm addr:$src)>; 3548 def : Pat<(alignedloadv32f16 addr:$src), 3549 (VMOVAPSZrm addr:$src)>; 3550 def : Pat<(alignedloadv32bf16 addr:$src), 3551 (VMOVAPSZrm addr:$src)>; 3552 def : Pat<(alignedloadv64i8 addr:$src), 3553 (VMOVDQA64Zrm addr:$src)>; 3554 def : Pat<(loadv16i32 addr:$src), 3555 (VMOVDQU64Zrm addr:$src)>; 3556 def : Pat<(loadv32i16 addr:$src), 3557 (VMOVDQU64Zrm addr:$src)>; 3558 def : Pat<(loadv32f16 addr:$src), 3559 (VMOVUPSZrm addr:$src)>; 3560 def : Pat<(loadv32bf16 addr:$src), 3561 (VMOVUPSZrm addr:$src)>; 3562 def : Pat<(loadv64i8 addr:$src), 3563 (VMOVDQU64Zrm addr:$src)>; 3564 3565 // 512-bit store. 3566 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3567 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3568 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3569 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3570 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3571 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3572 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst), 3573 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3574 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3575 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3576 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3577 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3578 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3579 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3580 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3581 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3582 def : Pat<(store (v32bf16 VR512:$src), addr:$dst), 3583 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3584 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3585 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3586} 3587 3588let Predicates = [HasVLX] in { 3589 // 128-bit load. 3590 def : Pat<(alignedloadv4i32 addr:$src), 3591 (VMOVDQA64Z128rm addr:$src)>; 3592 def : Pat<(alignedloadv8i16 addr:$src), 3593 (VMOVDQA64Z128rm addr:$src)>; 3594 def : Pat<(alignedloadv8f16 addr:$src), 3595 (VMOVAPSZ128rm addr:$src)>; 3596 def : Pat<(alignedloadv8bf16 addr:$src), 3597 (VMOVAPSZ128rm addr:$src)>; 3598 def : Pat<(alignedloadv16i8 addr:$src), 3599 (VMOVDQA64Z128rm addr:$src)>; 3600 def : Pat<(loadv4i32 addr:$src), 3601 (VMOVDQU64Z128rm addr:$src)>; 3602 def : Pat<(loadv8i16 addr:$src), 3603 (VMOVDQU64Z128rm addr:$src)>; 3604 def : Pat<(loadv8f16 addr:$src), 3605 (VMOVUPSZ128rm addr:$src)>; 3606 def : Pat<(loadv8bf16 addr:$src), 3607 (VMOVUPSZ128rm addr:$src)>; 3608 def : Pat<(loadv16i8 addr:$src), 3609 (VMOVDQU64Z128rm addr:$src)>; 3610 3611 // 128-bit store. 3612 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3613 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3614 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3615 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3616 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3617 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3618 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst), 3619 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3620 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3621 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3622 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3623 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3624 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3625 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3626 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3627 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3628 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst), 3629 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3630 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3631 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3632 3633 // 256-bit load. 3634 def : Pat<(alignedloadv8i32 addr:$src), 3635 (VMOVDQA64Z256rm addr:$src)>; 3636 def : Pat<(alignedloadv16i16 addr:$src), 3637 (VMOVDQA64Z256rm addr:$src)>; 3638 def : Pat<(alignedloadv16f16 addr:$src), 3639 (VMOVAPSZ256rm addr:$src)>; 3640 def : Pat<(alignedloadv16bf16 addr:$src), 3641 (VMOVAPSZ256rm addr:$src)>; 3642 def : Pat<(alignedloadv32i8 addr:$src), 3643 (VMOVDQA64Z256rm addr:$src)>; 3644 def : Pat<(loadv8i32 addr:$src), 3645 (VMOVDQU64Z256rm addr:$src)>; 3646 def : Pat<(loadv16i16 addr:$src), 3647 (VMOVDQU64Z256rm addr:$src)>; 3648 def : Pat<(loadv16f16 addr:$src), 3649 (VMOVUPSZ256rm addr:$src)>; 3650 def : Pat<(loadv16bf16 addr:$src), 3651 (VMOVUPSZ256rm addr:$src)>; 3652 def : Pat<(loadv32i8 addr:$src), 3653 (VMOVDQU64Z256rm addr:$src)>; 3654 3655 // 256-bit store. 3656 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3657 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3658 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3659 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3660 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3661 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3662 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst), 3663 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3664 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3665 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3666 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3667 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3668 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3669 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3670 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3671 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3672 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst), 3673 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3674 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3675 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3676} 3677 3678multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> { 3679let Predicates = [HasBWI] in { 3680 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))), 3681 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3682 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)), 3683 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3684 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3685 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3686 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3687 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3688 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)), 3689 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3690 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3691 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3692 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3693 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3694 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)), 3695 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3696 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))), 3697 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3698 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)), 3699 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3700 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)), 3701 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3702 3703 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask), 3704 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3705} 3706let Predicates = [HasBWI, HasVLX] in { 3707 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))), 3708 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3709 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)), 3710 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3711 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3712 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3713 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3714 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3715 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)), 3716 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3717 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3718 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3719 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3720 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3721 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)), 3722 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3723 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))), 3724 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3725 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)), 3726 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3727 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)), 3728 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3729 3730 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask), 3731 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3732 3733 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))), 3734 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3735 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)), 3736 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3737 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3738 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3739 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3740 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3741 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)), 3742 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3743 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3744 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3745 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3746 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3747 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)), 3748 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3749 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))), 3750 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3751 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)), 3752 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3753 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)), 3754 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3755 3756 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask), 3757 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3758} 3759} 3760 3761defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>; 3762defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>; 3763 3764// Move Int Doubleword to Packed Double Int 3765// 3766let ExeDomain = SSEPackedInt in { 3767def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3768 "vmovd\t{$src, $dst|$dst, $src}", 3769 [(set VR128X:$dst, 3770 (v4i32 (scalar_to_vector GR32:$src)))]>, 3771 EVEX, Sched<[WriteVecMoveFromGpr]>; 3772def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3773 "vmovd\t{$src, $dst|$dst, $src}", 3774 [(set VR128X:$dst, 3775 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3776 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3777def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3778 "vmovq\t{$src, $dst|$dst, $src}", 3779 [(set VR128X:$dst, 3780 (v2i64 (scalar_to_vector GR64:$src)))]>, 3781 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3782let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3783def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3784 (ins i64mem:$src), 3785 "vmovq\t{$src, $dst|$dst, $src}", []>, 3786 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3787let isCodeGenOnly = 1 in { 3788def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3789 "vmovq\t{$src, $dst|$dst, $src}", 3790 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3791 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3792def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3793 "vmovq\t{$src, $dst|$dst, $src}", 3794 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3795 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3796} 3797} // ExeDomain = SSEPackedInt 3798 3799// Move Int Doubleword to Single Scalar 3800// 3801let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3802def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3803 "vmovd\t{$src, $dst|$dst, $src}", 3804 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3805 EVEX, Sched<[WriteVecMoveFromGpr]>; 3806} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3807 3808// Move doubleword from xmm register to r/m32 3809// 3810let ExeDomain = SSEPackedInt in { 3811def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3812 "vmovd\t{$src, $dst|$dst, $src}", 3813 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3814 (iPTR 0)))]>, 3815 EVEX, Sched<[WriteVecMoveToGpr]>; 3816def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3817 (ins i32mem:$dst, VR128X:$src), 3818 "vmovd\t{$src, $dst|$dst, $src}", 3819 [(store (i32 (extractelt (v4i32 VR128X:$src), 3820 (iPTR 0))), addr:$dst)]>, 3821 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3822} // ExeDomain = SSEPackedInt 3823 3824// Move quadword from xmm1 register to r/m64 3825// 3826let ExeDomain = SSEPackedInt in { 3827def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3828 "vmovq\t{$src, $dst|$dst, $src}", 3829 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3830 (iPTR 0)))]>, 3831 TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>, 3832 Requires<[HasAVX512]>; 3833 3834let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3835def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3836 "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD, 3837 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, 3838 Requires<[HasAVX512, In64BitMode]>; 3839 3840def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3841 (ins i64mem:$dst, VR128X:$src), 3842 "vmovq\t{$src, $dst|$dst, $src}", 3843 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3844 addr:$dst)]>, 3845 EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>, 3846 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3847 3848let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3849def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3850 (ins VR128X:$src), 3851 "vmovq\t{$src, $dst|$dst, $src}", []>, 3852 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>; 3853} // ExeDomain = SSEPackedInt 3854 3855def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3856 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3857 3858let Predicates = [HasAVX512] in { 3859 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3860 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3861} 3862 3863// Move Scalar Single to Double Int 3864// 3865let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3866def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3867 (ins FR32X:$src), 3868 "vmovd\t{$src, $dst|$dst, $src}", 3869 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3870 EVEX, Sched<[WriteVecMoveToGpr]>; 3871} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3872 3873// Move Quadword Int to Packed Quadword Int 3874// 3875let ExeDomain = SSEPackedInt in { 3876def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3877 (ins i64mem:$src), 3878 "vmovq\t{$src, $dst|$dst, $src}", 3879 [(set VR128X:$dst, 3880 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3881 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3882} // ExeDomain = SSEPackedInt 3883 3884// Allow "vmovd" but print "vmovq". 3885def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3886 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3887def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3888 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3889 3890// Conversions between masks and scalar fp. 3891def : Pat<(v32i1 (bitconvert FR32X:$src)), 3892 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3893def : Pat<(f32 (bitconvert VK32:$src)), 3894 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3895 3896def : Pat<(v64i1 (bitconvert FR64X:$src)), 3897 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3898def : Pat<(f64 (bitconvert VK64:$src)), 3899 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3900 3901//===----------------------------------------------------------------------===// 3902// AVX-512 MOVSH, MOVSS, MOVSD 3903//===----------------------------------------------------------------------===// 3904 3905multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3906 X86VectorVTInfo _, Predicate prd = HasAVX512> { 3907 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in 3908 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3909 (ins _.RC:$src1, _.RC:$src2), 3910 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3911 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3912 _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 3913 let Predicates = [prd] in { 3914 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3915 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3916 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3917 "$dst {${mask}} {z}, $src1, $src2}"), 3918 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3919 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3920 _.ImmAllZerosV)))], 3921 _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3922 let Constraints = "$src0 = $dst" in 3923 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3924 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3925 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3926 "$dst {${mask}}, $src1, $src2}"), 3927 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3928 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3929 (_.VT _.RC:$src0))))], 3930 _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3931 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3932 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3933 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3934 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3935 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3936 // _alt version uses FR32/FR64 register class. 3937 let isCodeGenOnly = 1 in 3938 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3939 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3940 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3941 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3942 } 3943 let mayLoad = 1, hasSideEffects = 0 in { 3944 let Constraints = "$src0 = $dst" in 3945 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3946 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3947 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3948 "$dst {${mask}}, $src}"), 3949 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3950 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3951 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3952 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3953 "$dst {${mask}} {z}, $src}"), 3954 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3955 } 3956 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3957 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3958 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3959 EVEX, Sched<[WriteFStore]>; 3960 let mayStore = 1, hasSideEffects = 0 in 3961 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3962 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3963 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3964 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>; 3965 } 3966} 3967 3968defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3969 VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>; 3970 3971defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 3972 VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 3973 3974defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 3975 HasFP16>, 3976 VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 3977 3978multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 3979 PatLeaf ZeroFP, X86VectorVTInfo _> { 3980 3981def : Pat<(_.VT (OpNode _.RC:$src0, 3982 (_.VT (scalar_to_vector 3983 (_.EltVT (X86selects VK1WM:$mask, 3984 (_.EltVT _.FRC:$src1), 3985 (_.EltVT _.FRC:$src2))))))), 3986 (!cast<Instruction>(InstrStr#rrk) 3987 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 3988 VK1WM:$mask, 3989 (_.VT _.RC:$src0), 3990 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3991 3992def : Pat<(_.VT (OpNode _.RC:$src0, 3993 (_.VT (scalar_to_vector 3994 (_.EltVT (X86selects VK1WM:$mask, 3995 (_.EltVT _.FRC:$src1), 3996 (_.EltVT ZeroFP))))))), 3997 (!cast<Instruction>(InstrStr#rrkz) 3998 VK1WM:$mask, 3999 (_.VT _.RC:$src0), 4000 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4001} 4002 4003multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4004 dag Mask, RegisterClass MaskRC> { 4005 4006def : Pat<(masked_store 4007 (_.info512.VT (insert_subvector undef, 4008 (_.info128.VT _.info128.RC:$src), 4009 (iPTR 0))), addr:$dst, Mask), 4010 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4011 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4012 _.info128.RC:$src)>; 4013 4014} 4015 4016multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4017 AVX512VLVectorVTInfo _, 4018 dag Mask, RegisterClass MaskRC, 4019 SubRegIndex subreg> { 4020 4021def : Pat<(masked_store 4022 (_.info512.VT (insert_subvector undef, 4023 (_.info128.VT _.info128.RC:$src), 4024 (iPTR 0))), addr:$dst, Mask), 4025 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4026 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4027 _.info128.RC:$src)>; 4028 4029} 4030 4031// This matches the more recent codegen from clang that avoids emitting a 512 4032// bit masked store directly. Codegen will widen 128-bit masked store to 512 4033// bits on AVX512F only targets. 4034multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4035 AVX512VLVectorVTInfo _, 4036 dag Mask512, dag Mask128, 4037 RegisterClass MaskRC, 4038 SubRegIndex subreg> { 4039 4040// AVX512F pattern. 4041def : Pat<(masked_store 4042 (_.info512.VT (insert_subvector undef, 4043 (_.info128.VT _.info128.RC:$src), 4044 (iPTR 0))), addr:$dst, Mask512), 4045 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4046 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4047 _.info128.RC:$src)>; 4048 4049// AVX512VL pattern. 4050def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4052 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4053 _.info128.RC:$src)>; 4054} 4055 4056multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4057 dag Mask, RegisterClass MaskRC> { 4058 4059def : Pat<(_.info128.VT (extract_subvector 4060 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4061 _.info512.ImmAllZerosV)), 4062 (iPTR 0))), 4063 (!cast<Instruction>(InstrStr#rmkz) 4064 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4065 addr:$srcAddr)>; 4066 4067def : Pat<(_.info128.VT (extract_subvector 4068 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4069 (_.info512.VT (insert_subvector undef, 4070 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4071 (iPTR 0))))), 4072 (iPTR 0))), 4073 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4074 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4075 addr:$srcAddr)>; 4076 4077} 4078 4079multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4080 AVX512VLVectorVTInfo _, 4081 dag Mask, RegisterClass MaskRC, 4082 SubRegIndex subreg> { 4083 4084def : Pat<(_.info128.VT (extract_subvector 4085 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4086 _.info512.ImmAllZerosV)), 4087 (iPTR 0))), 4088 (!cast<Instruction>(InstrStr#rmkz) 4089 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4090 addr:$srcAddr)>; 4091 4092def : Pat<(_.info128.VT (extract_subvector 4093 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4094 (_.info512.VT (insert_subvector undef, 4095 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4096 (iPTR 0))))), 4097 (iPTR 0))), 4098 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4099 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4100 addr:$srcAddr)>; 4101 4102} 4103 4104// This matches the more recent codegen from clang that avoids emitting a 512 4105// bit masked load directly. Codegen will widen 128-bit masked load to 512 4106// bits on AVX512F only targets. 4107multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4108 AVX512VLVectorVTInfo _, 4109 dag Mask512, dag Mask128, 4110 RegisterClass MaskRC, 4111 SubRegIndex subreg> { 4112// AVX512F patterns. 4113def : Pat<(_.info128.VT (extract_subvector 4114 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4115 _.info512.ImmAllZerosV)), 4116 (iPTR 0))), 4117 (!cast<Instruction>(InstrStr#rmkz) 4118 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4119 addr:$srcAddr)>; 4120 4121def : Pat<(_.info128.VT (extract_subvector 4122 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4123 (_.info512.VT (insert_subvector undef, 4124 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4125 (iPTR 0))))), 4126 (iPTR 0))), 4127 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4128 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4129 addr:$srcAddr)>; 4130 4131// AVX512Vl patterns. 4132def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4133 _.info128.ImmAllZerosV)), 4134 (!cast<Instruction>(InstrStr#rmkz) 4135 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4136 addr:$srcAddr)>; 4137 4138def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4139 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4140 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4141 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4142 addr:$srcAddr)>; 4143} 4144 4145defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4146defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4147 4148defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4149 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4150defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4151 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4152defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4153 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4154 4155let Predicates = [HasFP16] in { 4156defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4157defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4158 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4159defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4160 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4161defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4162 (v32i1 (insert_subvector 4163 (v32i1 immAllZerosV), 4164 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4165 (iPTR 0))), 4166 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4167 GR8, sub_8bit>; 4168 4169defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4170 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4171defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4172 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4173defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4174 (v32i1 (insert_subvector 4175 (v32i1 immAllZerosV), 4176 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4177 (iPTR 0))), 4178 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4179 GR8, sub_8bit>; 4180 4181def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4182 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4183 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4184 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4185 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4186 4187def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4188 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4189 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4190} 4191 4192defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4193 (v16i1 (insert_subvector 4194 (v16i1 immAllZerosV), 4195 (v4i1 (extract_subvector 4196 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4197 (iPTR 0))), 4198 (iPTR 0))), 4199 (v4i1 (extract_subvector 4200 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4201 (iPTR 0))), GR8, sub_8bit>; 4202defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4203 (v8i1 4204 (extract_subvector 4205 (v16i1 4206 (insert_subvector 4207 (v16i1 immAllZerosV), 4208 (v2i1 (extract_subvector 4209 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4210 (iPTR 0))), 4211 (iPTR 0))), 4212 (iPTR 0))), 4213 (v2i1 (extract_subvector 4214 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4215 (iPTR 0))), GR8, sub_8bit>; 4216 4217defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4218 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4219defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4220 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4221defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4222 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4223 4224defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4225 (v16i1 (insert_subvector 4226 (v16i1 immAllZerosV), 4227 (v4i1 (extract_subvector 4228 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4229 (iPTR 0))), 4230 (iPTR 0))), 4231 (v4i1 (extract_subvector 4232 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4233 (iPTR 0))), GR8, sub_8bit>; 4234defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4235 (v8i1 4236 (extract_subvector 4237 (v16i1 4238 (insert_subvector 4239 (v16i1 immAllZerosV), 4240 (v2i1 (extract_subvector 4241 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4242 (iPTR 0))), 4243 (iPTR 0))), 4244 (iPTR 0))), 4245 (v2i1 (extract_subvector 4246 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4247 (iPTR 0))), GR8, sub_8bit>; 4248 4249def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4250 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4251 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4252 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4253 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4254 4255def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4256 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4257 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4258 4259def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4260 (COPY_TO_REGCLASS 4261 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4262 VK1WM:$mask, addr:$src)), 4263 FR32X)>; 4264def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4265 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4266 4267def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4268 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4269 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4270 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4271 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4272 4273def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4274 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4275 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4276 4277def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4278 (COPY_TO_REGCLASS 4279 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4280 VK1WM:$mask, addr:$src)), 4281 FR64X)>; 4282def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4283 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4284 4285 4286def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4287 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4288def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4289 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4290 4291def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4292 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4293def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4294 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4295 4296let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4297 let Predicates = [HasFP16] in { 4298 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4299 (ins VR128X:$src1, VR128X:$src2), 4300 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4301 []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4302 Sched<[SchedWriteFShuffle.XMM]>; 4303 4304 let Constraints = "$src0 = $dst" in 4305 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4306 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4307 VR128X:$src1, VR128X:$src2), 4308 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4309 "$dst {${mask}}, $src1, $src2}", 4310 []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG, 4311 Sched<[SchedWriteFShuffle.XMM]>; 4312 4313 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4314 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4315 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4316 "$dst {${mask}} {z}, $src1, $src2}", 4317 []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4318 Sched<[SchedWriteFShuffle.XMM]>; 4319 } 4320 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4321 (ins VR128X:$src1, VR128X:$src2), 4322 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4323 []>, TB, XS, EVEX, VVVV, VEX_LIG, 4324 Sched<[SchedWriteFShuffle.XMM]>; 4325 4326 let Constraints = "$src0 = $dst" in 4327 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4328 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4329 VR128X:$src1, VR128X:$src2), 4330 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4331 "$dst {${mask}}, $src1, $src2}", 4332 []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG, 4333 Sched<[SchedWriteFShuffle.XMM]>; 4334 4335 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4336 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4337 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4338 "$dst {${mask}} {z}, $src1, $src2}", 4339 []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG, 4340 Sched<[SchedWriteFShuffle.XMM]>; 4341 4342 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4343 (ins VR128X:$src1, VR128X:$src2), 4344 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4345 []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W, 4346 Sched<[SchedWriteFShuffle.XMM]>; 4347 4348 let Constraints = "$src0 = $dst" in 4349 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4350 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4351 VR128X:$src1, VR128X:$src2), 4352 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4353 "$dst {${mask}}, $src1, $src2}", 4354 []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG, 4355 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4356 4357 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4358 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4359 VR128X:$src2), 4360 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4361 "$dst {${mask}} {z}, $src1, $src2}", 4362 []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG, 4363 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4364} 4365 4366def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4367 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4368def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4369 "$dst {${mask}}, $src1, $src2}", 4370 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4371 VR128X:$src1, VR128X:$src2), 0>; 4372def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4373 "$dst {${mask}} {z}, $src1, $src2}", 4374 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4375 VR128X:$src1, VR128X:$src2), 0>; 4376def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4377 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4378def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4379 "$dst {${mask}}, $src1, $src2}", 4380 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4381 VR128X:$src1, VR128X:$src2), 0>; 4382def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4383 "$dst {${mask}} {z}, $src1, $src2}", 4384 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4385 VR128X:$src1, VR128X:$src2), 0>; 4386def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4387 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4388def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4389 "$dst {${mask}}, $src1, $src2}", 4390 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4391 VR128X:$src1, VR128X:$src2), 0>; 4392def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4393 "$dst {${mask}} {z}, $src1, $src2}", 4394 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4395 VR128X:$src1, VR128X:$src2), 0>; 4396 4397let Predicates = [HasAVX512, OptForSize] in { 4398 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4399 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4400 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4401 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4402 4403 // Move low f32 and clear high bits. 4404 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4405 (SUBREG_TO_REG (i32 0), 4406 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4407 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4408 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4409 (SUBREG_TO_REG (i32 0), 4410 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4411 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4412 4413 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4414 (SUBREG_TO_REG (i32 0), 4415 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4416 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4417 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4418 (SUBREG_TO_REG (i32 0), 4419 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4420 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4421} 4422 4423// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4424// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4425let Predicates = [HasAVX512, OptForSpeed] in { 4426 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4427 (SUBREG_TO_REG (i32 0), 4428 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4429 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4430 (i8 1))), sub_xmm)>; 4431 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4432 (SUBREG_TO_REG (i32 0), 4433 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4434 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4435 (i8 3))), sub_xmm)>; 4436} 4437 4438let Predicates = [HasAVX512] in { 4439 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4440 (VMOVSSZrm addr:$src)>; 4441 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4442 (VMOVSDZrm addr:$src)>; 4443 4444 // Represent the same patterns above but in the form they appear for 4445 // 256-bit types 4446 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4447 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4448 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4449 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4450 4451 // Represent the same patterns above but in the form they appear for 4452 // 512-bit types 4453 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4454 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4455 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4456 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4457} 4458let Predicates = [HasFP16] in { 4459 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4460 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4461 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))), 4462 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>; 4463 4464 // FIXME we need better canonicalization in dag combine 4465 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4466 (SUBREG_TO_REG (i32 0), 4467 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4468 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4469 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))), 4470 (SUBREG_TO_REG (i32 0), 4471 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4472 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4473 4474 // FIXME we need better canonicalization in dag combine 4475 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4476 (SUBREG_TO_REG (i32 0), 4477 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4478 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4479 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))), 4480 (SUBREG_TO_REG (i32 0), 4481 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4482 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>; 4483 4484 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4485 (VMOVSHZrm addr:$src)>; 4486 4487 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4488 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4489 4490 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4491 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4492} 4493 4494let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4495def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4496 (ins VR128X:$src), 4497 "vmovq\t{$src, $dst|$dst, $src}", 4498 [(set VR128X:$dst, (v2i64 (X86vzmovl 4499 (v2i64 VR128X:$src))))]>, 4500 EVEX, REX_W; 4501} 4502 4503let Predicates = [HasAVX512] in { 4504 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4505 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4506 GR8:$src, sub_8bit)))>; 4507 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4508 (VMOVDI2PDIZrr GR32:$src)>; 4509 4510 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4511 (VMOV64toPQIZrr GR64:$src)>; 4512 4513 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4514 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4515 (VMOVDI2PDIZrm addr:$src)>; 4516 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4517 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4518 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4519 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4520 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4521 (VMOVQI2PQIZrm addr:$src)>; 4522 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4523 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4524 4525 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4526 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4527 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4528 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4529 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4530 4531 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4532 (SUBREG_TO_REG (i32 0), 4533 (v2f64 (VMOVZPQILo2PQIZrr 4534 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4535 sub_xmm)>; 4536 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4537 (SUBREG_TO_REG (i32 0), 4538 (v2i64 (VMOVZPQILo2PQIZrr 4539 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4540 sub_xmm)>; 4541 4542 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4543 (SUBREG_TO_REG (i32 0), 4544 (v2f64 (VMOVZPQILo2PQIZrr 4545 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4546 sub_xmm)>; 4547 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4548 (SUBREG_TO_REG (i32 0), 4549 (v2i64 (VMOVZPQILo2PQIZrr 4550 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4551 sub_xmm)>; 4552} 4553 4554//===----------------------------------------------------------------------===// 4555// AVX-512 - Non-temporals 4556//===----------------------------------------------------------------------===// 4557 4558def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4559 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4560 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.ZMM.RM]>, 4561 EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4562 4563let Predicates = [HasVLX] in { 4564 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4565 (ins i256mem:$src), 4566 "vmovntdqa\t{$src, $dst|$dst, $src}", 4567 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, 4568 EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4569 4570 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4571 (ins i128mem:$src), 4572 "vmovntdqa\t{$src, $dst|$dst, $src}", 4573 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, 4574 EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4575} 4576 4577multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4578 X86SchedWriteMoveLS Sched, 4579 PatFrag st_frag = alignednontemporalstore> { 4580 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4581 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4582 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4583 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4584 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4585} 4586 4587multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4588 AVX512VLVectorVTInfo VTInfo, 4589 X86SchedWriteMoveLSWidths Sched> { 4590 let Predicates = [HasAVX512] in 4591 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4592 4593 let Predicates = [HasAVX512, HasVLX] in { 4594 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4595 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4596 } 4597} 4598 4599defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4600 SchedWriteVecMoveLSNT>, TB, PD; 4601defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4602 SchedWriteFMoveLSNT>, TB, PD, REX_W; 4603defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4604 SchedWriteFMoveLSNT>, TB; 4605 4606let Predicates = [HasAVX512], AddedComplexity = 400 in { 4607 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4608 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4609 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4610 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4611 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4612 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4613 4614 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4615 (VMOVNTDQAZrm addr:$src)>; 4616 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4617 (VMOVNTDQAZrm addr:$src)>; 4618 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4619 (VMOVNTDQAZrm addr:$src)>; 4620 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4621 (VMOVNTDQAZrm addr:$src)>; 4622 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4623 (VMOVNTDQAZrm addr:$src)>; 4624 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4625 (VMOVNTDQAZrm addr:$src)>; 4626} 4627 4628let Predicates = [HasVLX], AddedComplexity = 400 in { 4629 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4630 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4631 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4632 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4633 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4634 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4635 4636 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4637 (VMOVNTDQAZ256rm addr:$src)>; 4638 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4639 (VMOVNTDQAZ256rm addr:$src)>; 4640 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4641 (VMOVNTDQAZ256rm addr:$src)>; 4642 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4643 (VMOVNTDQAZ256rm addr:$src)>; 4644 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4645 (VMOVNTDQAZ256rm addr:$src)>; 4646 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4647 (VMOVNTDQAZ256rm addr:$src)>; 4648 4649 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4650 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4651 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4652 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4653 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4654 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4655 4656 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4657 (VMOVNTDQAZ128rm addr:$src)>; 4658 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4659 (VMOVNTDQAZ128rm addr:$src)>; 4660 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4661 (VMOVNTDQAZ128rm addr:$src)>; 4662 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4663 (VMOVNTDQAZ128rm addr:$src)>; 4664 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4665 (VMOVNTDQAZ128rm addr:$src)>; 4666 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4667 (VMOVNTDQAZ128rm addr:$src)>; 4668} 4669 4670//===----------------------------------------------------------------------===// 4671// AVX-512 - Integer arithmetic 4672// 4673multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4674 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4675 bit IsCommutable = 0> { 4676 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4677 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4678 "$src2, $src1", "$src1, $src2", 4679 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4680 IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV, 4681 Sched<[sched]>; 4682 4683 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4684 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4685 "$src2, $src1", "$src1, $src2", 4686 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4687 AVX512BIBase, EVEX, VVVV, 4688 Sched<[sched.Folded, sched.ReadAfterFold]>; 4689} 4690 4691multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4692 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4693 bit IsCommutable = 0> : 4694 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4695 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4696 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4697 "${src2}"#_.BroadcastStr#", $src1", 4698 "$src1, ${src2}"#_.BroadcastStr, 4699 (_.VT (OpNode _.RC:$src1, 4700 (_.BroadcastLdFrag addr:$src2)))>, 4701 AVX512BIBase, EVEX, VVVV, EVEX_B, 4702 Sched<[sched.Folded, sched.ReadAfterFold]>; 4703} 4704 4705multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4706 AVX512VLVectorVTInfo VTInfo, 4707 X86SchedWriteWidths sched, Predicate prd, 4708 bit IsCommutable = 0> { 4709 let Predicates = [prd] in 4710 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4711 IsCommutable>, EVEX_V512; 4712 4713 let Predicates = [prd, HasVLX] in { 4714 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4715 sched.YMM, IsCommutable>, EVEX_V256; 4716 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4717 sched.XMM, IsCommutable>, EVEX_V128; 4718 } 4719} 4720 4721multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4722 AVX512VLVectorVTInfo VTInfo, 4723 X86SchedWriteWidths sched, Predicate prd, 4724 bit IsCommutable = 0> { 4725 let Predicates = [prd] in 4726 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4727 IsCommutable>, EVEX_V512; 4728 4729 let Predicates = [prd, HasVLX] in { 4730 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4731 sched.YMM, IsCommutable>, EVEX_V256; 4732 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4733 sched.XMM, IsCommutable>, EVEX_V128; 4734 } 4735} 4736 4737multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4738 X86SchedWriteWidths sched, Predicate prd, 4739 bit IsCommutable = 0> { 4740 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4741 sched, prd, IsCommutable>, 4742 REX_W, EVEX_CD8<64, CD8VF>; 4743} 4744 4745multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4746 X86SchedWriteWidths sched, Predicate prd, 4747 bit IsCommutable = 0> { 4748 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4749 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4750} 4751 4752multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4753 X86SchedWriteWidths sched, Predicate prd, 4754 bit IsCommutable = 0> { 4755 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4756 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4757 WIG; 4758} 4759 4760multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4761 X86SchedWriteWidths sched, Predicate prd, 4762 bit IsCommutable = 0> { 4763 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4764 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4765 WIG; 4766} 4767 4768multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4769 SDNode OpNode, X86SchedWriteWidths sched, 4770 Predicate prd, bit IsCommutable = 0> { 4771 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4772 IsCommutable>; 4773 4774 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4775 IsCommutable>; 4776} 4777 4778multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4779 SDNode OpNode, X86SchedWriteWidths sched, 4780 Predicate prd, bit IsCommutable = 0> { 4781 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4782 IsCommutable>; 4783 4784 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4785 IsCommutable>; 4786} 4787 4788multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4789 bits<8> opc_d, bits<8> opc_q, 4790 string OpcodeStr, SDNode OpNode, 4791 X86SchedWriteWidths sched, 4792 bit IsCommutable = 0> { 4793 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4794 sched, HasAVX512, IsCommutable>, 4795 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4796 sched, HasBWI, IsCommutable>; 4797} 4798 4799multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4800 X86FoldableSchedWrite sched, 4801 SDNode OpNode,X86VectorVTInfo _Src, 4802 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4803 bit IsCommutable = 0> { 4804 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4805 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4806 "$src2, $src1","$src1, $src2", 4807 (_Dst.VT (OpNode 4808 (_Src.VT _Src.RC:$src1), 4809 (_Src.VT _Src.RC:$src2))), 4810 IsCommutable>, 4811 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 4812 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4813 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4814 "$src2, $src1", "$src1, $src2", 4815 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4816 (_Src.LdFrag addr:$src2)))>, 4817 AVX512BIBase, EVEX, VVVV, 4818 Sched<[sched.Folded, sched.ReadAfterFold]>; 4819 4820 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4821 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4822 OpcodeStr, 4823 "${src2}"#_Brdct.BroadcastStr#", $src1", 4824 "$src1, ${src2}"#_Brdct.BroadcastStr, 4825 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4826 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4827 AVX512BIBase, EVEX, VVVV, EVEX_B, 4828 Sched<[sched.Folded, sched.ReadAfterFold]>; 4829} 4830 4831defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4832 SchedWriteVecALU, 1>; 4833defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4834 SchedWriteVecALU, 0>; 4835defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4836 SchedWriteVecALU, HasBWI, 1>; 4837defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4838 SchedWriteVecALU, HasBWI, 0>; 4839defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4840 SchedWriteVecALU, HasBWI, 1>; 4841defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4842 SchedWriteVecALU, HasBWI, 0>; 4843defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4844 SchedWritePMULLD, HasAVX512, 1>, T8; 4845defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4846 SchedWriteVecIMul, HasBWI, 1>; 4847defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4848 SchedWriteVecIMul, HasDQI, 1>, T8; 4849defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4850 HasBWI, 1>; 4851defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4852 HasBWI, 1>; 4853defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4854 SchedWriteVecIMul, HasBWI, 1>, T8; 4855defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, 4856 SchedWriteVecALU, HasBWI, 1>; 4857defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4858 SchedWriteVecIMul, HasAVX512, 1>, T8; 4859defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4860 SchedWriteVecIMul, HasAVX512, 1>; 4861 4862multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4863 X86SchedWriteWidths sched, 4864 AVX512VLVectorVTInfo _SrcVTInfo, 4865 AVX512VLVectorVTInfo _DstVTInfo, 4866 SDNode OpNode, list<Predicate> prds512, 4867 list<Predicate> prds, 4868 X86VectorVTInfo _VTInfo512 = _SrcVTInfo.info512, 4869 X86VectorVTInfo _VTInfo256 = _SrcVTInfo.info256, 4870 X86VectorVTInfo _VTInfo128 = _SrcVTInfo.info128> { 4871 let Predicates = prds512 in 4872 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4873 _SrcVTInfo.info512, _DstVTInfo.info512, 4874 _VTInfo512>, EVEX_V512; 4875 let Predicates = prds in { 4876 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4877 _SrcVTInfo.info256, _DstVTInfo.info256, 4878 _VTInfo256>, EVEX_V256; 4879 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4880 _SrcVTInfo.info128, _DstVTInfo.info128, 4881 _VTInfo128>, EVEX_V128; 4882 } 4883} 4884 4885defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4886 avx512vl_i8_info, avx512vl_i8_info, 4887 X86multishift, [HasVBMI], [HasVLX, HasVBMI], 4888 v8i64_info, v4i64x_info, v2i64x_info>, T8, 4889 EVEX_CD8<64, CD8VF>, REX_W; 4890 4891multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4892 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4893 X86FoldableSchedWrite sched> { 4894 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4895 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4896 OpcodeStr, 4897 "${src2}"#_Src.BroadcastStr#", $src1", 4898 "$src1, ${src2}"#_Src.BroadcastStr, 4899 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4900 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4901 EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4902 Sched<[sched.Folded, sched.ReadAfterFold]>; 4903} 4904 4905multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4906 SDNode OpNode,X86VectorVTInfo _Src, 4907 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4908 bit IsCommutable = 0> { 4909 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4910 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4911 "$src2, $src1","$src1, $src2", 4912 (_Dst.VT (OpNode 4913 (_Src.VT _Src.RC:$src1), 4914 (_Src.VT _Src.RC:$src2))), 4915 IsCommutable, IsCommutable>, 4916 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>; 4917 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4918 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4919 "$src2, $src1", "$src1, $src2", 4920 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4921 (_Src.LdFrag addr:$src2)))>, 4922 EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>, 4923 Sched<[sched.Folded, sched.ReadAfterFold]>; 4924} 4925 4926multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4927 SDNode OpNode> { 4928 let Predicates = [HasBWI] in 4929 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4930 v32i16_info, SchedWriteShuffle.ZMM>, 4931 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4932 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4933 let Predicates = [HasBWI, HasVLX] in { 4934 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4935 v16i16x_info, SchedWriteShuffle.YMM>, 4936 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4937 v16i16x_info, SchedWriteShuffle.YMM>, 4938 EVEX_V256; 4939 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4940 v8i16x_info, SchedWriteShuffle.XMM>, 4941 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4942 v8i16x_info, SchedWriteShuffle.XMM>, 4943 EVEX_V128; 4944 } 4945} 4946multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4947 SDNode OpNode> { 4948 let Predicates = [HasBWI] in 4949 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4950 SchedWriteShuffle.ZMM>, EVEX_V512, WIG; 4951 let Predicates = [HasBWI, HasVLX] in { 4952 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4953 v32i8x_info, SchedWriteShuffle.YMM>, 4954 EVEX_V256, WIG; 4955 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4956 v16i8x_info, SchedWriteShuffle.XMM>, 4957 EVEX_V128, WIG; 4958 } 4959} 4960 4961multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4962 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4963 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4964 let Predicates = [HasBWI] in 4965 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4966 _Dst.info512, SchedWriteVecIMul.ZMM, 4967 IsCommutable>, EVEX_V512; 4968 let Predicates = [HasBWI, HasVLX] in { 4969 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4970 _Dst.info256, SchedWriteVecIMul.YMM, 4971 IsCommutable>, EVEX_V256; 4972 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4973 _Dst.info128, SchedWriteVecIMul.XMM, 4974 IsCommutable>, EVEX_V128; 4975 } 4976} 4977 4978defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4979defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4980defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4981defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4982 4983defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4984 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG; 4985defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4986 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG; 4987 4988defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4989 SchedWriteVecALU, HasBWI, 1>, T8; 4990defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4991 SchedWriteVecALU, HasBWI, 1>; 4992defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4993 SchedWriteVecALU, HasAVX512, 1>, T8; 4994defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4995 SchedWriteVecALU, HasAVX512, 1>, T8; 4996 4997defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4998 SchedWriteVecALU, HasBWI, 1>; 4999defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 5000 SchedWriteVecALU, HasBWI, 1>, T8; 5001defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 5002 SchedWriteVecALU, HasAVX512, 1>, T8; 5003defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 5004 SchedWriteVecALU, HasAVX512, 1>, T8; 5005 5006defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 5007 SchedWriteVecALU, HasBWI, 1>, T8; 5008defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 5009 SchedWriteVecALU, HasBWI, 1>; 5010defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5011 SchedWriteVecALU, HasAVX512, 1>, T8; 5012defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5013 SchedWriteVecALU, HasAVX512, 1>, T8; 5014 5015defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5016 SchedWriteVecALU, HasBWI, 1>; 5017defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5018 SchedWriteVecALU, HasBWI, 1>, T8; 5019defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5020 SchedWriteVecALU, HasAVX512, 1>, T8; 5021defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5022 SchedWriteVecALU, HasAVX512, 1>, T8; 5023 5024// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512. 5025let Predicates = [HasDQI, NoVLX, HasEVEX512] in { 5026 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5027 (EXTRACT_SUBREG 5028 (VPMULLQZrr 5029 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5030 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5031 sub_ymm)>; 5032 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5033 (EXTRACT_SUBREG 5034 (VPMULLQZrmb 5035 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5036 addr:$src2), 5037 sub_ymm)>; 5038 5039 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5040 (EXTRACT_SUBREG 5041 (VPMULLQZrr 5042 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5043 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5044 sub_xmm)>; 5045 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5046 (EXTRACT_SUBREG 5047 (VPMULLQZrmb 5048 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5049 addr:$src2), 5050 sub_xmm)>; 5051} 5052 5053multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5054 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5055 (EXTRACT_SUBREG 5056 (!cast<Instruction>(Instr#"rr") 5057 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5058 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5059 sub_ymm)>; 5060 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5061 (EXTRACT_SUBREG 5062 (!cast<Instruction>(Instr#"rmb") 5063 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5064 addr:$src2), 5065 sub_ymm)>; 5066 5067 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5068 (EXTRACT_SUBREG 5069 (!cast<Instruction>(Instr#"rr") 5070 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5071 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5072 sub_xmm)>; 5073 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5074 (EXTRACT_SUBREG 5075 (!cast<Instruction>(Instr#"rmb") 5076 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5077 addr:$src2), 5078 sub_xmm)>; 5079} 5080 5081let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 5082 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5083 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5084 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5085 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5086} 5087 5088//===----------------------------------------------------------------------===// 5089// AVX-512 Logical Instructions 5090//===----------------------------------------------------------------------===// 5091 5092defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5093 SchedWriteVecLogic, HasAVX512, 1>; 5094defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5095 SchedWriteVecLogic, HasAVX512, 1>; 5096defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5097 SchedWriteVecLogic, HasAVX512, 1>; 5098defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5099 SchedWriteVecLogic, HasAVX512>; 5100 5101let Predicates = [HasVLX] in { 5102 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5103 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5104 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5105 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5106 5107 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5108 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5109 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5110 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5111 5112 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5113 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5114 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5115 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5116 5117 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5118 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5119 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5120 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5121 5122 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5123 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5124 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5125 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5126 5127 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5128 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5129 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5130 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5131 5132 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5133 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5134 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5135 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5136 5137 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5138 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5139 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5140 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5141 5142 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5143 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5144 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5145 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5146 5147 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5148 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5149 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5150 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5151 5152 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5153 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5154 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5155 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5156 5157 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5158 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5159 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5160 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5161 5162 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5163 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5164 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5165 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5166 5167 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5168 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5169 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5170 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5171 5172 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5173 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5174 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5175 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5176 5177 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5178 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5179 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5180 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5181} 5182 5183let Predicates = [HasAVX512] in { 5184 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5185 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5186 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5187 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5188 5189 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5190 (VPORQZrr VR512:$src1, VR512:$src2)>; 5191 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5192 (VPORQZrr VR512:$src1, VR512:$src2)>; 5193 5194 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5195 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5196 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5197 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5198 5199 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5200 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5201 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5202 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5203 5204 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5205 (VPANDQZrm VR512:$src1, addr:$src2)>; 5206 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5207 (VPANDQZrm VR512:$src1, addr:$src2)>; 5208 5209 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5210 (VPORQZrm VR512:$src1, addr:$src2)>; 5211 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5212 (VPORQZrm VR512:$src1, addr:$src2)>; 5213 5214 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5215 (VPXORQZrm VR512:$src1, addr:$src2)>; 5216 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5217 (VPXORQZrm VR512:$src1, addr:$src2)>; 5218 5219 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5220 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5221 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5222 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5223} 5224 5225// Patterns to catch vselect with different type than logic op. 5226multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5227 X86VectorVTInfo _, 5228 X86VectorVTInfo IntInfo> { 5229 // Masked register-register logical operations. 5230 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5231 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5232 _.RC:$src0)), 5233 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5234 _.RC:$src1, _.RC:$src2)>; 5235 5236 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5237 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5238 _.ImmAllZerosV)), 5239 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5240 _.RC:$src2)>; 5241 5242 // Masked register-memory logical operations. 5243 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5244 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5245 (load addr:$src2)))), 5246 _.RC:$src0)), 5247 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5248 _.RC:$src1, addr:$src2)>; 5249 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5250 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5251 (load addr:$src2)))), 5252 _.ImmAllZerosV)), 5253 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5254 addr:$src2)>; 5255} 5256 5257multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5258 X86VectorVTInfo _, 5259 X86VectorVTInfo IntInfo> { 5260 // Register-broadcast logical operations. 5261 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5262 (bitconvert 5263 (IntInfo.VT (OpNode _.RC:$src1, 5264 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5265 _.RC:$src0)), 5266 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5267 _.RC:$src1, addr:$src2)>; 5268 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5269 (bitconvert 5270 (IntInfo.VT (OpNode _.RC:$src1, 5271 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5272 _.ImmAllZerosV)), 5273 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5274 _.RC:$src1, addr:$src2)>; 5275} 5276 5277multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5278 AVX512VLVectorVTInfo SelectInfo, 5279 AVX512VLVectorVTInfo IntInfo> { 5280let Predicates = [HasVLX] in { 5281 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5282 IntInfo.info128>; 5283 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5284 IntInfo.info256>; 5285} 5286let Predicates = [HasAVX512] in { 5287 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5288 IntInfo.info512>; 5289} 5290} 5291 5292multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5293 AVX512VLVectorVTInfo SelectInfo, 5294 AVX512VLVectorVTInfo IntInfo> { 5295let Predicates = [HasVLX] in { 5296 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5297 SelectInfo.info128, IntInfo.info128>; 5298 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5299 SelectInfo.info256, IntInfo.info256>; 5300} 5301let Predicates = [HasAVX512] in { 5302 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5303 SelectInfo.info512, IntInfo.info512>; 5304} 5305} 5306 5307multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5308 // i64 vselect with i32/i16/i8 logic op 5309 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5310 avx512vl_i32_info>; 5311 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5312 avx512vl_i16_info>; 5313 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5314 avx512vl_i8_info>; 5315 5316 // i32 vselect with i64/i16/i8 logic op 5317 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5318 avx512vl_i64_info>; 5319 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5320 avx512vl_i16_info>; 5321 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5322 avx512vl_i8_info>; 5323 5324 // f32 vselect with i64/i32/i16/i8 logic op 5325 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5326 avx512vl_i64_info>; 5327 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5328 avx512vl_i32_info>; 5329 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5330 avx512vl_i16_info>; 5331 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5332 avx512vl_i8_info>; 5333 5334 // f64 vselect with i64/i32/i16/i8 logic op 5335 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5336 avx512vl_i64_info>; 5337 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5338 avx512vl_i32_info>; 5339 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5340 avx512vl_i16_info>; 5341 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5342 avx512vl_i8_info>; 5343 5344 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5345 avx512vl_f32_info, 5346 avx512vl_i32_info>; 5347 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5348 avx512vl_f64_info, 5349 avx512vl_i64_info>; 5350} 5351 5352defm : avx512_logical_lowering_types<"VPAND", and>; 5353defm : avx512_logical_lowering_types<"VPOR", or>; 5354defm : avx512_logical_lowering_types<"VPXOR", xor>; 5355defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5356 5357//===----------------------------------------------------------------------===// 5358// AVX-512 FP arithmetic 5359//===----------------------------------------------------------------------===// 5360 5361multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5362 SDPatternOperator OpNode, SDNode VecNode, 5363 X86FoldableSchedWrite sched, bit IsCommutable> { 5364 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5365 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5366 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5367 "$src2, $src1", "$src1, $src2", 5368 (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">, 5369 Sched<[sched]>; 5370 5371 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5372 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5373 "$src2, $src1", "$src1, $src2", 5374 (_.VT (VecNode _.RC:$src1, 5375 (_.ScalarIntMemFrags addr:$src2))), "_Int">, 5376 Sched<[sched.Folded, sched.ReadAfterFold]>; 5377 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5378 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5379 (ins _.FRC:$src1, _.FRC:$src2), 5380 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5381 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5382 Sched<[sched]> { 5383 let isCommutable = IsCommutable; 5384 } 5385 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5386 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5387 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5388 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5389 (_.ScalarLdFrag addr:$src2)))]>, 5390 Sched<[sched.Folded, sched.ReadAfterFold]>; 5391 } 5392 } 5393} 5394 5395multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5396 SDNode VecNode, X86FoldableSchedWrite sched> { 5397 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5398 defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5399 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5400 "$rc, $src2, $src1", "$src1, $src2, $rc", 5401 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5402 (i32 timm:$rc)), "_Int">, 5403 EVEX_B, EVEX_RC, Sched<[sched]>; 5404} 5405multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5406 SDPatternOperator OpNode, SDNode VecNode, SDNode SaeNode, 5407 X86FoldableSchedWrite sched, bit IsCommutable> { 5408 let ExeDomain = _.ExeDomain in { 5409 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5410 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5411 "$src2, $src1", "$src1, $src2", 5412 (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">, 5413 Sched<[sched]>, SIMD_EXC; 5414 5415 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5416 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5417 "$src2, $src1", "$src1, $src2", 5418 (_.VT (VecNode _.RC:$src1, 5419 (_.ScalarIntMemFrags addr:$src2))), "_Int">, 5420 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5421 5422 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5423 Uses = [MXCSR], mayRaiseFPException = 1 in { 5424 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5425 (ins _.FRC:$src1, _.FRC:$src2), 5426 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5427 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5428 Sched<[sched]> { 5429 let isCommutable = IsCommutable; 5430 } 5431 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5432 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5433 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5434 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5435 (_.ScalarLdFrag addr:$src2)))]>, 5436 Sched<[sched.Folded, sched.ReadAfterFold]>; 5437 } 5438 5439 let Uses = [MXCSR] in 5440 defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5441 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5442 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5443 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), "_Int">, 5444 EVEX_B, Sched<[sched]>; 5445 } 5446} 5447 5448multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5449 SDNode VecNode, SDNode RndNode, 5450 X86SchedWriteSizes sched, bit IsCommutable> { 5451 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5452 sched.PS.Scl, IsCommutable>, 5453 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5454 sched.PS.Scl>, 5455 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5456 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5457 sched.PD.Scl, IsCommutable>, 5458 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5459 sched.PD.Scl>, 5460 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5461 let Predicates = [HasFP16] in 5462 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5463 VecNode, sched.PH.Scl, IsCommutable>, 5464 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5465 sched.PH.Scl>, 5466 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5467} 5468 5469multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5470 SDNode VecNode, SDNode SaeNode, 5471 X86SchedWriteSizes sched, bit IsCommutable> { 5472 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5473 VecNode, SaeNode, sched.PS.Scl, IsCommutable>, 5474 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5475 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5476 VecNode, SaeNode, sched.PD.Scl, IsCommutable>, 5477 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5478 let Predicates = [HasFP16] in { 5479 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5480 VecNode, SaeNode, sched.PH.Scl, IsCommutable>, 5481 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5482 } 5483} 5484defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5485 SchedWriteFAddSizes, 1>; 5486defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5487 SchedWriteFMulSizes, 1>; 5488defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5489 SchedWriteFAddSizes, 0>; 5490defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5491 SchedWriteFDivSizes, 0>; 5492defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86any_fmin, X86fmins, X86fminSAEs, 5493 SchedWriteFCmpSizes, 0>; 5494defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86any_fmax, X86fmaxs, X86fmaxSAEs, 5495 SchedWriteFCmpSizes, 0>; 5496 5497// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5498// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5499multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5500 X86VectorVTInfo _, SDNode OpNode, 5501 X86FoldableSchedWrite sched> { 5502 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5503 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5504 (ins _.FRC:$src1, _.FRC:$src2), 5505 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5506 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5507 Sched<[sched]> { 5508 let isCommutable = 1; 5509 } 5510 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5511 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5512 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5513 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5514 (_.ScalarLdFrag addr:$src2)))]>, 5515 Sched<[sched.Folded, sched.ReadAfterFold]>; 5516 } 5517} 5518defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5519 SchedWriteFCmp.Scl>, TB, XS, 5520 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5521 5522defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5523 SchedWriteFCmp.Scl>, TB, XD, 5524 REX_W, EVEX, VVVV, VEX_LIG, 5525 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5526 5527defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5528 SchedWriteFCmp.Scl>, TB, XS, 5529 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5530 5531defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5532 SchedWriteFCmp.Scl>, TB, XD, 5533 REX_W, EVEX, VVVV, VEX_LIG, 5534 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5535 5536defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5537 SchedWriteFCmp.Scl>, T_MAP5, XS, 5538 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5539 5540defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5541 SchedWriteFCmp.Scl>, T_MAP5, XS, 5542 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5543 5544multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5545 SDPatternOperator MaskOpNode, 5546 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5547 bit IsCommutable, 5548 bit IsKCommutable = IsCommutable, 5549 string suffix = _.Suffix, 5550 string ClobberConstraint = "", 5551 bit MayRaiseFPException = 1> { 5552 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5553 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5554 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5555 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5556 "$src2, $src1", "$src1, $src2", 5557 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5558 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5559 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>; 5560 let mayLoad = 1 in { 5561 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5562 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5563 "$src2, $src1", "$src1, $src2", 5564 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5565 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5566 ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5567 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5568 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5569 "${src2}"#_.BroadcastStr#", $src1", 5570 "$src1, ${src2}"#_.BroadcastStr, 5571 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5572 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5573 ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5574 } 5575 } 5576} 5577 5578multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5579 SDPatternOperator OpNodeRnd, 5580 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5581 string suffix = _.Suffix, 5582 string ClobberConstraint = ""> { 5583 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5584 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5585 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5586 "$rc, $src2, $src1", "$src1, $src2, $rc", 5587 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5588 0, 0, 0, vselect_mask, ClobberConstraint>, 5589 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 5590} 5591 5592multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5593 SDPatternOperator OpNodeSAE, 5594 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5595 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5596 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5597 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5598 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5599 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5600 EVEX, VVVV, EVEX_B, Sched<[sched]>; 5601} 5602 5603multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5604 SDPatternOperator MaskOpNode, 5605 Predicate prd, X86SchedWriteSizes sched, 5606 bit IsCommutable = 0, 5607 bit IsPD128Commutable = IsCommutable> { 5608 let Predicates = [prd] in { 5609 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5610 sched.PS.ZMM, IsCommutable>, EVEX_V512, TB, 5611 EVEX_CD8<32, CD8VF>; 5612 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5613 sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W, 5614 EVEX_CD8<64, CD8VF>; 5615 } 5616 5617 // Define only if AVX512VL feature is present. 5618 let Predicates = [prd, HasVLX] in { 5619 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5620 sched.PS.XMM, IsCommutable>, EVEX_V128, TB, 5621 EVEX_CD8<32, CD8VF>; 5622 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5623 sched.PS.YMM, IsCommutable>, EVEX_V256, TB, 5624 EVEX_CD8<32, CD8VF>; 5625 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5626 sched.PD.XMM, IsPD128Commutable, 5627 IsCommutable>, EVEX_V128, TB, PD, REX_W, 5628 EVEX_CD8<64, CD8VF>; 5629 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5630 sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W, 5631 EVEX_CD8<64, CD8VF>; 5632 } 5633} 5634 5635multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5636 SDPatternOperator MaskOpNode, 5637 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5638 let Predicates = [HasFP16] in { 5639 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5640 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5, 5641 EVEX_CD8<16, CD8VF>; 5642 } 5643 let Predicates = [HasVLX, HasFP16] in { 5644 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5645 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5, 5646 EVEX_CD8<16, CD8VF>; 5647 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5648 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5, 5649 EVEX_CD8<16, CD8VF>; 5650 } 5651} 5652 5653let Uses = [MXCSR] in 5654multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5655 X86SchedWriteSizes sched> { 5656 let Predicates = [HasFP16] in { 5657 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5658 v32f16_info>, 5659 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5660 } 5661 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5662 v16f32_info>, 5663 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5664 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5665 v8f64_info>, 5666 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5667} 5668 5669let Uses = [MXCSR] in 5670multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5671 X86SchedWriteSizes sched> { 5672 let Predicates = [HasFP16] in { 5673 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5674 v32f16_info>, 5675 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5676 } 5677 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5678 v16f32_info>, 5679 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5680 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5681 v8f64_info>, 5682 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5683} 5684 5685defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5686 SchedWriteFAddSizes, 1>, 5687 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5688 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5689defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5690 SchedWriteFMulSizes, 1>, 5691 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5692 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5693defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5694 SchedWriteFAddSizes>, 5695 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5696 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5697defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5698 SchedWriteFDivSizes>, 5699 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5700 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5701defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86any_fmin, X86fmin, HasAVX512, 5702 SchedWriteFCmpSizes, 0>, 5703 avx512_fp_binop_ph<0x5D, "vmin", X86any_fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5704 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5705defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86any_fmax, X86fmax, HasAVX512, 5706 SchedWriteFCmpSizes, 0>, 5707 avx512_fp_binop_ph<0x5F, "vmax", X86any_fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5708 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5709let isCodeGenOnly = 1 in { 5710 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5711 SchedWriteFCmpSizes, 1>, 5712 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5713 SchedWriteFCmpSizes, 1>; 5714 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5715 SchedWriteFCmpSizes, 1>, 5716 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5717 SchedWriteFCmpSizes, 1>; 5718} 5719let Uses = []<Register>, mayRaiseFPException = 0 in { 5720defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5721 SchedWriteFLogicSizes, 1>; 5722defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5723 SchedWriteFLogicSizes, 0>; 5724defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5725 SchedWriteFLogicSizes, 1>; 5726defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5727 SchedWriteFLogicSizes, 1>; 5728} 5729 5730multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5731 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5732 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5733 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5734 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5735 "$src2, $src1", "$src1, $src2", 5736 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5737 EVEX, VVVV, Sched<[sched]>; 5738 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5739 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5740 "$src2, $src1", "$src1, $src2", 5741 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5742 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5743 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5744 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5745 "${src2}"#_.BroadcastStr#", $src1", 5746 "$src1, ${src2}"#_.BroadcastStr, 5747 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5748 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5749 } 5750} 5751 5752multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5753 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5754 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5755 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5756 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5757 "$src2, $src1", "$src1, $src2", 5758 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5759 Sched<[sched]>; 5760 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5761 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5762 "$src2, $src1", "$src1, $src2", 5763 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5764 Sched<[sched.Folded, sched.ReadAfterFold]>; 5765 } 5766} 5767 5768multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5769 X86SchedWriteWidths sched> { 5770 let Predicates = [HasFP16] in { 5771 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 5772 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 5773 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 5774 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 5775 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 5776 EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>; 5777 } 5778 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5779 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5780 EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD; 5781 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5782 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5783 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5784 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5785 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5786 X86scalefsRnd, sched.Scl>, 5787 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD; 5788 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5789 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5790 X86scalefsRnd, sched.Scl>, 5791 EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD; 5792 5793 // Define only if AVX512VL feature is present. 5794 let Predicates = [HasVLX] in { 5795 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5796 EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD; 5797 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5798 EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD; 5799 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5800 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5801 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5802 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5803 } 5804 5805 let Predicates = [HasFP16, HasVLX] in { 5806 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 5807 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5808 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 5809 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5810 } 5811} 5812defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>; 5813 5814//===----------------------------------------------------------------------===// 5815// AVX-512 VPTESTM instructions 5816//===----------------------------------------------------------------------===// 5817 5818multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5819 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5820 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5821 // There are just too many permutations due to commutability and bitcasts. 5822 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5823 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5824 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5825 "$src2, $src1", "$src1, $src2", 5826 (null_frag), (null_frag), 1>, 5827 EVEX, VVVV, Sched<[sched]>; 5828 let mayLoad = 1 in 5829 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5830 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5831 "$src2, $src1", "$src1, $src2", 5832 (null_frag), (null_frag)>, 5833 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5834 Sched<[sched.Folded, sched.ReadAfterFold]>; 5835 } 5836} 5837 5838multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5839 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5840 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5841 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5842 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5843 "${src2}"#_.BroadcastStr#", $src1", 5844 "$src1, ${src2}"#_.BroadcastStr, 5845 (null_frag), (null_frag)>, 5846 EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5847 Sched<[sched.Folded, sched.ReadAfterFold]>; 5848} 5849 5850multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5851 X86SchedWriteWidths sched, 5852 AVX512VLVectorVTInfo _> { 5853 let Predicates = [HasAVX512] in 5854 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 5855 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5856 5857 let Predicates = [HasAVX512, HasVLX] in { 5858 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 5859 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5860 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 5861 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5862 } 5863} 5864 5865multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5866 X86SchedWriteWidths sched> { 5867 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5868 avx512vl_i32_info>; 5869 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5870 avx512vl_i64_info>, REX_W; 5871} 5872 5873multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5874 X86SchedWriteWidths sched> { 5875 let Predicates = [HasBWI] in { 5876 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5877 v32i16_info>, EVEX_V512, REX_W; 5878 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5879 v64i8_info>, EVEX_V512; 5880 } 5881 5882 let Predicates = [HasVLX, HasBWI] in { 5883 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5884 v16i16x_info>, EVEX_V256, REX_W; 5885 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5886 v8i16x_info>, EVEX_V128, REX_W; 5887 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5888 v32i8x_info>, EVEX_V256; 5889 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5890 v16i8x_info>, EVEX_V128; 5891 } 5892} 5893 5894multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5895 X86SchedWriteWidths sched> : 5896 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5897 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5898 5899defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5900 SchedWriteVecLogic>, T8, PD; 5901defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5902 SchedWriteVecLogic>, T8, XS; 5903 5904//===----------------------------------------------------------------------===// 5905// AVX-512 Shift instructions 5906//===----------------------------------------------------------------------===// 5907 5908multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5909 string OpcodeStr, SDNode OpNode, 5910 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5911 let ExeDomain = _.ExeDomain in { 5912 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5913 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5914 "$src2, $src1", "$src1, $src2", 5915 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5916 Sched<[sched]>; 5917 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5918 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5919 "$src2, $src1", "$src1, $src2", 5920 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5921 (i8 timm:$src2)))>, 5922 Sched<[sched.Folded]>; 5923 } 5924} 5925 5926multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5927 string OpcodeStr, SDNode OpNode, 5928 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5929 let ExeDomain = _.ExeDomain in 5930 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5931 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5932 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 5933 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5934 EVEX_B, Sched<[sched.Folded]>; 5935} 5936 5937multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5938 X86FoldableSchedWrite sched, ValueType SrcVT, 5939 X86VectorVTInfo _> { 5940 // src2 is always 128-bit 5941 let ExeDomain = _.ExeDomain in { 5942 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5943 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5944 "$src2, $src1", "$src1, $src2", 5945 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5946 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 5947 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5948 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5949 "$src2, $src1", "$src1, $src2", 5950 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5951 AVX512BIBase, 5952 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5953 } 5954} 5955 5956multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5957 X86SchedWriteWidths sched, ValueType SrcVT, 5958 AVX512VLVectorVTInfo VTInfo, 5959 Predicate prd> { 5960 let Predicates = [prd] in 5961 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5962 VTInfo.info512>, EVEX_V512, 5963 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5964 let Predicates = [prd, HasVLX] in { 5965 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5966 VTInfo.info256>, EVEX_V256, 5967 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5968 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5969 VTInfo.info128>, EVEX_V128, 5970 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5971 } 5972} 5973 5974multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5975 string OpcodeStr, SDNode OpNode, 5976 X86SchedWriteWidths sched> { 5977 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5978 avx512vl_i32_info, HasAVX512>; 5979 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5980 avx512vl_i64_info, HasAVX512>, REX_W; 5981 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5982 avx512vl_i16_info, HasBWI>; 5983} 5984 5985multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5986 string OpcodeStr, SDNode OpNode, 5987 X86SchedWriteWidths sched, 5988 AVX512VLVectorVTInfo VTInfo> { 5989 let Predicates = [HasAVX512] in 5990 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5991 sched.ZMM, VTInfo.info512>, 5992 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5993 VTInfo.info512>, EVEX_V512; 5994 let Predicates = [HasAVX512, HasVLX] in { 5995 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5996 sched.YMM, VTInfo.info256>, 5997 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5998 VTInfo.info256>, EVEX_V256; 5999 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6000 sched.XMM, VTInfo.info128>, 6001 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 6002 VTInfo.info128>, EVEX_V128; 6003 } 6004} 6005 6006multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 6007 string OpcodeStr, SDNode OpNode, 6008 X86SchedWriteWidths sched> { 6009 let Predicates = [HasBWI] in 6010 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6011 sched.ZMM, v32i16_info>, EVEX_V512, WIG; 6012 let Predicates = [HasVLX, HasBWI] in { 6013 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6014 sched.YMM, v16i16x_info>, EVEX_V256, WIG; 6015 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6016 sched.XMM, v8i16x_info>, EVEX_V128, WIG; 6017 } 6018} 6019 6020multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6021 Format ImmFormR, Format ImmFormM, 6022 string OpcodeStr, SDNode OpNode, 6023 X86SchedWriteWidths sched> { 6024 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6025 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6026 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6027 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 6028} 6029 6030defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6031 SchedWriteVecShiftImm>, 6032 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6033 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6034 6035defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6036 SchedWriteVecShiftImm>, 6037 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6038 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6039 6040defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6041 SchedWriteVecShiftImm>, 6042 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6043 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6044 6045defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6046 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6047defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6048 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6049 6050defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6051 SchedWriteVecShift>; 6052defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6053 SchedWriteVecShift>; 6054defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6055 SchedWriteVecShift>; 6056 6057// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6058let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6059 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6060 (EXTRACT_SUBREG (v8i64 6061 (VPSRAQZrr 6062 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6063 VR128X:$src2)), sub_ymm)>; 6064 6065 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6066 (EXTRACT_SUBREG (v8i64 6067 (VPSRAQZrr 6068 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6069 VR128X:$src2)), sub_xmm)>; 6070 6071 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6072 (EXTRACT_SUBREG (v8i64 6073 (VPSRAQZri 6074 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6075 timm:$src2)), sub_ymm)>; 6076 6077 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6078 (EXTRACT_SUBREG (v8i64 6079 (VPSRAQZri 6080 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6081 timm:$src2)), sub_xmm)>; 6082} 6083 6084//===-------------------------------------------------------------------===// 6085// Variable Bit Shifts 6086//===-------------------------------------------------------------------===// 6087 6088multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6089 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6090 let ExeDomain = _.ExeDomain in { 6091 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6092 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6093 "$src2, $src1", "$src1, $src2", 6094 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6095 AVX5128IBase, EVEX, VVVV, Sched<[sched]>; 6096 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6097 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6098 "$src2, $src1", "$src1, $src2", 6099 (_.VT (OpNode _.RC:$src1, 6100 (_.VT (_.LdFrag addr:$src2))))>, 6101 AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6102 Sched<[sched.Folded, sched.ReadAfterFold]>; 6103 } 6104} 6105 6106multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6107 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6108 let ExeDomain = _.ExeDomain in 6109 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6110 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6111 "${src2}"#_.BroadcastStr#", $src1", 6112 "$src1, ${src2}"#_.BroadcastStr, 6113 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6114 AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6115 Sched<[sched.Folded, sched.ReadAfterFold]>; 6116} 6117 6118multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6119 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6120 let Predicates = [HasAVX512] in 6121 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6122 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6123 6124 let Predicates = [HasAVX512, HasVLX] in { 6125 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6126 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6127 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6128 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6129 } 6130} 6131 6132multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6133 SDNode OpNode, X86SchedWriteWidths sched> { 6134 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6135 avx512vl_i32_info>; 6136 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6137 avx512vl_i64_info>, REX_W; 6138} 6139 6140// Use 512bit version to implement 128/256 bit in case NoVLX. 6141multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6142 SDNode OpNode, list<Predicate> p> { 6143 let Predicates = p in { 6144 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6145 (_.info256.VT _.info256.RC:$src2))), 6146 (EXTRACT_SUBREG 6147 (!cast<Instruction>(OpcodeStr#"Zrr") 6148 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6149 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6150 sub_ymm)>; 6151 6152 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6153 (_.info128.VT _.info128.RC:$src2))), 6154 (EXTRACT_SUBREG 6155 (!cast<Instruction>(OpcodeStr#"Zrr") 6156 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6157 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6158 sub_xmm)>; 6159 } 6160} 6161multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6162 SDNode OpNode, X86SchedWriteWidths sched> { 6163 let Predicates = [HasBWI] in 6164 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6165 EVEX_V512, REX_W; 6166 let Predicates = [HasVLX, HasBWI] in { 6167 6168 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6169 EVEX_V256, REX_W; 6170 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6171 EVEX_V128, REX_W; 6172 } 6173} 6174 6175defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6176 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6177 6178defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6179 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6180 6181defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6182 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6183 6184defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6185defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6186 6187defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>; 6188defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>; 6189defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>; 6190defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>; 6191 6192 6193// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6194let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6195 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6196 (EXTRACT_SUBREG (v8i64 6197 (VPROLVQZrr 6198 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6199 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6200 sub_xmm)>; 6201 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6202 (EXTRACT_SUBREG (v8i64 6203 (VPROLVQZrr 6204 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6205 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6206 sub_ymm)>; 6207 6208 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6209 (EXTRACT_SUBREG (v16i32 6210 (VPROLVDZrr 6211 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6212 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6213 sub_xmm)>; 6214 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6215 (EXTRACT_SUBREG (v16i32 6216 (VPROLVDZrr 6217 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6218 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6219 sub_ymm)>; 6220 6221 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6222 (EXTRACT_SUBREG (v8i64 6223 (VPROLQZri 6224 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6225 timm:$src2)), sub_xmm)>; 6226 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6227 (EXTRACT_SUBREG (v8i64 6228 (VPROLQZri 6229 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6230 timm:$src2)), sub_ymm)>; 6231 6232 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6233 (EXTRACT_SUBREG (v16i32 6234 (VPROLDZri 6235 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6236 timm:$src2)), sub_xmm)>; 6237 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6238 (EXTRACT_SUBREG (v16i32 6239 (VPROLDZri 6240 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6241 timm:$src2)), sub_ymm)>; 6242} 6243 6244// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6245let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6246 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6247 (EXTRACT_SUBREG (v8i64 6248 (VPRORVQZrr 6249 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6250 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6251 sub_xmm)>; 6252 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6253 (EXTRACT_SUBREG (v8i64 6254 (VPRORVQZrr 6255 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6256 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6257 sub_ymm)>; 6258 6259 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6260 (EXTRACT_SUBREG (v16i32 6261 (VPRORVDZrr 6262 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6263 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6264 sub_xmm)>; 6265 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6266 (EXTRACT_SUBREG (v16i32 6267 (VPRORVDZrr 6268 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6269 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6270 sub_ymm)>; 6271 6272 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6273 (EXTRACT_SUBREG (v8i64 6274 (VPRORQZri 6275 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6276 timm:$src2)), sub_xmm)>; 6277 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6278 (EXTRACT_SUBREG (v8i64 6279 (VPRORQZri 6280 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6281 timm:$src2)), sub_ymm)>; 6282 6283 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6284 (EXTRACT_SUBREG (v16i32 6285 (VPRORDZri 6286 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6287 timm:$src2)), sub_xmm)>; 6288 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6289 (EXTRACT_SUBREG (v16i32 6290 (VPRORDZri 6291 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6292 timm:$src2)), sub_ymm)>; 6293} 6294 6295//===-------------------------------------------------------------------===// 6296// 1-src variable permutation VPERMW/D/Q 6297//===-------------------------------------------------------------------===// 6298 6299multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6300 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6301 let Predicates = [HasAVX512] in 6302 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6303 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6304 6305 let Predicates = [HasAVX512, HasVLX] in 6306 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6307 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6308} 6309 6310multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6311 string OpcodeStr, SDNode OpNode, 6312 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6313 let Predicates = [HasAVX512] in 6314 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6315 sched, VTInfo.info512>, 6316 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6317 sched, VTInfo.info512>, EVEX_V512; 6318 let Predicates = [HasAVX512, HasVLX] in 6319 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6320 sched, VTInfo.info256>, 6321 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6322 sched, VTInfo.info256>, EVEX_V256; 6323} 6324 6325multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6326 Predicate prd, SDNode OpNode, 6327 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6328 let Predicates = [prd] in 6329 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6330 EVEX_V512 ; 6331 let Predicates = [HasVLX, prd] in { 6332 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6333 EVEX_V256 ; 6334 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6335 EVEX_V128 ; 6336 } 6337} 6338 6339defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6340 WriteVarShuffle256, avx512vl_i16_info>, REX_W; 6341defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6342 WriteVarShuffle256, avx512vl_i8_info>; 6343 6344defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6345 WriteVarShuffle256, avx512vl_i32_info>; 6346defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6347 WriteVarShuffle256, avx512vl_i64_info>, REX_W; 6348defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6349 WriteFVarShuffle256, avx512vl_f32_info>; 6350defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6351 WriteFVarShuffle256, avx512vl_f64_info>, REX_W; 6352 6353defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6354 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6355 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6356defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6357 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6358 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6359 6360//===----------------------------------------------------------------------===// 6361// AVX-512 - VPERMIL 6362//===----------------------------------------------------------------------===// 6363 6364multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6365 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6366 X86VectorVTInfo Ctrl> { 6367 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6368 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6369 "$src2, $src1", "$src1, $src2", 6370 (_.VT (OpNode _.RC:$src1, 6371 (Ctrl.VT Ctrl.RC:$src2)))>, 6372 T8, PD, EVEX, VVVV, Sched<[sched]>; 6373 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6374 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6375 "$src2, $src1", "$src1, $src2", 6376 (_.VT (OpNode 6377 _.RC:$src1, 6378 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6379 T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6380 Sched<[sched.Folded, sched.ReadAfterFold]>; 6381 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6382 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6383 "${src2}"#_.BroadcastStr#", $src1", 6384 "$src1, ${src2}"#_.BroadcastStr, 6385 (_.VT (OpNode 6386 _.RC:$src1, 6387 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6388 T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6389 Sched<[sched.Folded, sched.ReadAfterFold]>; 6390} 6391 6392multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6393 X86SchedWriteWidths sched, 6394 AVX512VLVectorVTInfo _, 6395 AVX512VLVectorVTInfo Ctrl> { 6396 let Predicates = [HasAVX512] in { 6397 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6398 _.info512, Ctrl.info512>, EVEX_V512; 6399 } 6400 let Predicates = [HasAVX512, HasVLX] in { 6401 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6402 _.info128, Ctrl.info128>, EVEX_V128; 6403 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6404 _.info256, Ctrl.info256>, EVEX_V256; 6405 } 6406} 6407 6408multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6409 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6410 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6411 _, Ctrl>; 6412 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6413 X86VPermilpi, SchedWriteFShuffle, _>, 6414 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6415} 6416 6417let ExeDomain = SSEPackedSingle in 6418defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6419 avx512vl_i32_info>; 6420let ExeDomain = SSEPackedDouble in 6421defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6422 avx512vl_i64_info>, REX_W; 6423 6424//===----------------------------------------------------------------------===// 6425// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6426//===----------------------------------------------------------------------===// 6427 6428defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6429 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6430 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6431defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6432 X86PShufhw, SchedWriteShuffle>, 6433 EVEX, AVX512XSIi8Base; 6434defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6435 X86PShuflw, SchedWriteShuffle>, 6436 EVEX, AVX512XDIi8Base; 6437 6438//===----------------------------------------------------------------------===// 6439// AVX-512 - VPSHUFB 6440//===----------------------------------------------------------------------===// 6441 6442multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6443 X86SchedWriteWidths sched> { 6444 let Predicates = [HasBWI] in 6445 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6446 EVEX_V512; 6447 6448 let Predicates = [HasVLX, HasBWI] in { 6449 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6450 EVEX_V256; 6451 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6452 EVEX_V128; 6453 } 6454} 6455 6456defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6457 SchedWriteVarShuffle>, WIG; 6458 6459//===----------------------------------------------------------------------===// 6460// Move Low to High and High to Low packed FP Instructions 6461//===----------------------------------------------------------------------===// 6462 6463def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6464 (ins VR128X:$src1, VR128X:$src2), 6465 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6466 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6467 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6468let isCommutable = 1 in 6469def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6470 (ins VR128X:$src1, VR128X:$src2), 6471 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6472 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6473 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6474 6475//===----------------------------------------------------------------------===// 6476// VMOVHPS/PD VMOVLPS Instructions 6477// All patterns was taken from SSS implementation. 6478//===----------------------------------------------------------------------===// 6479 6480multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6481 SDPatternOperator OpNode, 6482 X86VectorVTInfo _> { 6483 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6484 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6485 (ins _.RC:$src1, f64mem:$src2), 6486 !strconcat(OpcodeStr, 6487 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6488 [(set _.RC:$dst, 6489 (OpNode _.RC:$src1, 6490 (_.VT (bitconvert 6491 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6492 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV; 6493} 6494 6495// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6496// SSE1. And MOVLPS pattern is even more complex. 6497defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6498 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6499defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6500 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6501defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6502 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6503defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6504 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6505 6506let Predicates = [HasAVX512] in { 6507 // VMOVHPD patterns 6508 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6509 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6510 6511 // VMOVLPD patterns 6512 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6513 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6514} 6515 6516let SchedRW = [WriteFStore] in { 6517let mayStore = 1, hasSideEffects = 0 in 6518def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6519 (ins f64mem:$dst, VR128X:$src), 6520 "vmovhps\t{$src, $dst|$dst, $src}", 6521 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6522def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6523 (ins f64mem:$dst, VR128X:$src), 6524 "vmovhpd\t{$src, $dst|$dst, $src}", 6525 [(store (f64 (extractelt 6526 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6527 (iPTR 0))), addr:$dst)]>, 6528 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6529let mayStore = 1, hasSideEffects = 0 in 6530def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6531 (ins f64mem:$dst, VR128X:$src), 6532 "vmovlps\t{$src, $dst|$dst, $src}", 6533 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6534def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6535 (ins f64mem:$dst, VR128X:$src), 6536 "vmovlpd\t{$src, $dst|$dst, $src}", 6537 [(store (f64 (extractelt (v2f64 VR128X:$src), 6538 (iPTR 0))), addr:$dst)]>, 6539 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6540} // SchedRW 6541 6542let Predicates = [HasAVX512] in { 6543 // VMOVHPD patterns 6544 def : Pat<(store (f64 (extractelt 6545 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6546 (iPTR 0))), addr:$dst), 6547 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6548} 6549//===----------------------------------------------------------------------===// 6550// FMA - Fused Multiply Operations 6551// 6552 6553multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6554 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6555 X86VectorVTInfo _> { 6556 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6557 Uses = [MXCSR], mayRaiseFPException = 1 in { 6558 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6559 (ins _.RC:$src2, _.RC:$src3), 6560 OpcodeStr, "$src3, $src2", "$src2, $src3", 6561 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6562 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6563 EVEX, VVVV, Sched<[sched]>; 6564 6565 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6566 (ins _.RC:$src2, _.MemOp:$src3), 6567 OpcodeStr, "$src3, $src2", "$src2, $src3", 6568 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6569 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6570 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6571 sched.ReadAfterFold]>; 6572 6573 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6574 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6575 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6576 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6577 (OpNode _.RC:$src2, 6578 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6579 (MaskOpNode _.RC:$src2, 6580 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6581 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6582 sched.ReadAfterFold]>; 6583 } 6584} 6585 6586multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6587 X86FoldableSchedWrite sched, 6588 X86VectorVTInfo _> { 6589 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6590 Uses = [MXCSR] in 6591 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6592 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6593 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6594 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6595 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6596 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6597} 6598 6599multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6600 SDNode MaskOpNode, SDNode OpNodeRnd, 6601 X86SchedWriteWidths sched, 6602 AVX512VLVectorVTInfo _, 6603 Predicate prd = HasAVX512> { 6604 let Predicates = [prd] in { 6605 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6606 sched.ZMM, _.info512>, 6607 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6608 _.info512>, 6609 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6610 } 6611 let Predicates = [HasVLX, prd] in { 6612 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6613 sched.YMM, _.info256>, 6614 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6615 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6616 sched.XMM, _.info128>, 6617 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6618 } 6619} 6620 6621multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6622 SDNode MaskOpNode, SDNode OpNodeRnd> { 6623 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6624 OpNodeRnd, SchedWriteFMA, 6625 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6626 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6627 OpNodeRnd, SchedWriteFMA, 6628 avx512vl_f32_info>, T8, PD; 6629 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6630 OpNodeRnd, SchedWriteFMA, 6631 avx512vl_f64_info>, T8, PD, REX_W; 6632} 6633 6634defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6635 fma, X86FmaddRnd>; 6636defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6637 X86Fmsub, X86FmsubRnd>; 6638defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6639 X86Fmaddsub, X86FmaddsubRnd>; 6640defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6641 X86Fmsubadd, X86FmsubaddRnd>; 6642defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6643 X86Fnmadd, X86FnmaddRnd>; 6644defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6645 X86Fnmsub, X86FnmsubRnd>; 6646 6647 6648multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6649 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6650 X86VectorVTInfo _> { 6651 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6652 Uses = [MXCSR], mayRaiseFPException = 1 in { 6653 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6654 (ins _.RC:$src2, _.RC:$src3), 6655 OpcodeStr, "$src3, $src2", "$src2, $src3", 6656 (null_frag), 6657 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6658 EVEX, VVVV, Sched<[sched]>; 6659 6660 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6661 (ins _.RC:$src2, _.MemOp:$src3), 6662 OpcodeStr, "$src3, $src2", "$src2, $src3", 6663 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6664 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6665 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6666 sched.ReadAfterFold]>; 6667 6668 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6669 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6670 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6671 "$src2, ${src3}"#_.BroadcastStr, 6672 (_.VT (OpNode _.RC:$src2, 6673 (_.VT (_.BroadcastLdFrag addr:$src3)), 6674 _.RC:$src1)), 6675 (_.VT (MaskOpNode _.RC:$src2, 6676 (_.VT (_.BroadcastLdFrag addr:$src3)), 6677 _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B, 6678 Sched<[sched.Folded, sched.ReadAfterFold, 6679 sched.ReadAfterFold]>; 6680 } 6681} 6682 6683multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6684 X86FoldableSchedWrite sched, 6685 X86VectorVTInfo _> { 6686 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6687 Uses = [MXCSR] in 6688 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6689 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6690 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6691 (null_frag), 6692 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6693 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6694} 6695 6696multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6697 SDNode MaskOpNode, SDNode OpNodeRnd, 6698 X86SchedWriteWidths sched, 6699 AVX512VLVectorVTInfo _, 6700 Predicate prd = HasAVX512> { 6701 let Predicates = [prd] in { 6702 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6703 sched.ZMM, _.info512>, 6704 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6705 _.info512>, 6706 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6707 } 6708 let Predicates = [HasVLX, prd] in { 6709 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6710 sched.YMM, _.info256>, 6711 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6712 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6713 sched.XMM, _.info128>, 6714 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6715 } 6716} 6717 6718multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6719 SDNode MaskOpNode, SDNode OpNodeRnd > { 6720 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6721 OpNodeRnd, SchedWriteFMA, 6722 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6723 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6724 OpNodeRnd, SchedWriteFMA, 6725 avx512vl_f32_info>, T8, PD; 6726 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6727 OpNodeRnd, SchedWriteFMA, 6728 avx512vl_f64_info>, T8, PD, REX_W; 6729} 6730 6731defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6732 fma, X86FmaddRnd>; 6733defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6734 X86Fmsub, X86FmsubRnd>; 6735defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6736 X86Fmaddsub, X86FmaddsubRnd>; 6737defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6738 X86Fmsubadd, X86FmsubaddRnd>; 6739defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6740 X86Fnmadd, X86FnmaddRnd>; 6741defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6742 X86Fnmsub, X86FnmsubRnd>; 6743 6744multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6745 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6746 X86VectorVTInfo _> { 6747 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6748 Uses = [MXCSR], mayRaiseFPException = 1 in { 6749 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6750 (ins _.RC:$src2, _.RC:$src3), 6751 OpcodeStr, "$src3, $src2", "$src2, $src3", 6752 (null_frag), 6753 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6754 EVEX, VVVV, Sched<[sched]>; 6755 6756 // Pattern is 312 order so that the load is in a different place from the 6757 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6758 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6759 (ins _.RC:$src2, _.MemOp:$src3), 6760 OpcodeStr, "$src3, $src2", "$src2, $src3", 6761 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6762 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6763 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6764 sched.ReadAfterFold]>; 6765 6766 // Pattern is 312 order so that the load is in a different place from the 6767 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6768 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6769 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6770 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6771 "$src2, ${src3}"#_.BroadcastStr, 6772 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6773 _.RC:$src1, _.RC:$src2)), 6774 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6775 _.RC:$src1, _.RC:$src2)), 1, 0>, 6776 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6777 sched.ReadAfterFold]>; 6778 } 6779} 6780 6781multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6782 X86FoldableSchedWrite sched, 6783 X86VectorVTInfo _> { 6784 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6785 Uses = [MXCSR] in 6786 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6787 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6788 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6789 (null_frag), 6790 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6791 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6792} 6793 6794multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6795 SDNode MaskOpNode, SDNode OpNodeRnd, 6796 X86SchedWriteWidths sched, 6797 AVX512VLVectorVTInfo _, 6798 Predicate prd = HasAVX512> { 6799 let Predicates = [prd] in { 6800 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6801 sched.ZMM, _.info512>, 6802 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6803 _.info512>, 6804 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6805 } 6806 let Predicates = [HasVLX, prd] in { 6807 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6808 sched.YMM, _.info256>, 6809 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6810 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6811 sched.XMM, _.info128>, 6812 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6813 } 6814} 6815 6816multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6817 SDNode MaskOpNode, SDNode OpNodeRnd > { 6818 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6819 OpNodeRnd, SchedWriteFMA, 6820 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6821 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6822 OpNodeRnd, SchedWriteFMA, 6823 avx512vl_f32_info>, T8, PD; 6824 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6825 OpNodeRnd, SchedWriteFMA, 6826 avx512vl_f64_info>, T8, PD, REX_W; 6827} 6828 6829defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 6830 fma, X86FmaddRnd>; 6831defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 6832 X86Fmsub, X86FmsubRnd>; 6833defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 6834 X86Fmaddsub, X86FmaddsubRnd>; 6835defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 6836 X86Fmsubadd, X86FmsubaddRnd>; 6837defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 6838 X86Fnmadd, X86FnmaddRnd>; 6839defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 6840 X86Fnmsub, X86FnmsubRnd>; 6841 6842// Scalar FMA 6843multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6844 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6845let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6846 defm r: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6847 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6848 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">, 6849 EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6850 6851 let mayLoad = 1 in 6852 defm m: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6853 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6854 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">, 6855 EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6856 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6857 6858 let Uses = [MXCSR] in 6859 defm rb: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6860 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6861 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1, 0, "_Int">, 6862 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6863 6864 let isCodeGenOnly = 1, isCommutable = 1 in { 6865 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6866 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6867 !strconcat(OpcodeStr, 6868 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6869 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC; 6870 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 6871 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6872 !strconcat(OpcodeStr, 6873 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6874 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6875 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC; 6876 6877 let Uses = [MXCSR] in 6878 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6879 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6880 !strconcat(OpcodeStr, 6881 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6882 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6883 Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV; 6884 }// isCodeGenOnly = 1 6885}// Constraints = "$src1 = $dst" 6886} 6887 6888multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6889 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 6890 X86VectorVTInfo _, string SUFF> { 6891 let ExeDomain = _.ExeDomain in { 6892 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6893 // Operands for intrinsic are in 123 order to preserve passthu 6894 // semantics. 6895 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6896 _.FRC:$src3))), 6897 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6898 (_.ScalarLdFrag addr:$src3)))), 6899 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6900 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6901 6902 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6903 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6904 _.FRC:$src1))), 6905 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6906 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6907 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6908 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6909 6910 // One pattern is 312 order so that the load is in a different place from the 6911 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6912 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6913 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6914 _.FRC:$src2))), 6915 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6916 _.FRC:$src1, _.FRC:$src2))), 6917 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6918 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6919 } 6920} 6921 6922multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6923 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 6924 let Predicates = [HasAVX512] in { 6925 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6926 OpNodeRnd, f32x_info, "SS">, 6927 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD; 6928 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6929 OpNodeRnd, f64x_info, "SD">, 6930 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD; 6931 } 6932 let Predicates = [HasFP16] in { 6933 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6934 OpNodeRnd, f16x_info, "SH">, 6935 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD; 6936 } 6937} 6938 6939defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 6940defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 6941defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 6942defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 6943 6944multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 6945 SDNode RndOp, string Prefix, 6946 string Suffix, SDNode Move, 6947 X86VectorVTInfo _, PatLeaf ZeroFP, 6948 Predicate prd = HasAVX512> { 6949 let Predicates = [prd] in { 6950 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6951 (Op _.FRC:$src2, 6952 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6953 _.FRC:$src3))))), 6954 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6955 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6956 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6957 6958 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6959 (Op _.FRC:$src2, _.FRC:$src3, 6960 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6961 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6962 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6963 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6964 6965 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6966 (Op _.FRC:$src2, 6967 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6968 (_.ScalarLdFrag addr:$src3)))))), 6969 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6970 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6971 addr:$src3)>; 6972 6973 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6974 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6975 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6976 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6977 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6978 addr:$src3)>; 6979 6980 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6981 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6982 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6983 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6984 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6985 addr:$src3)>; 6986 6987 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6988 (X86selects_mask VK1WM:$mask, 6989 (MaskedOp _.FRC:$src2, 6990 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6991 _.FRC:$src3), 6992 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6993 (!cast<I>(Prefix#"213"#Suffix#"Zrk_Int") 6994 VR128X:$src1, VK1WM:$mask, 6995 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6996 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6997 6998 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6999 (X86selects_mask VK1WM:$mask, 7000 (MaskedOp _.FRC:$src2, 7001 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7002 (_.ScalarLdFrag addr:$src3)), 7003 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7004 (!cast<I>(Prefix#"213"#Suffix#"Zmk_Int") 7005 VR128X:$src1, VK1WM:$mask, 7006 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7007 7008 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7009 (X86selects_mask VK1WM:$mask, 7010 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7011 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7012 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7013 (!cast<I>(Prefix#"132"#Suffix#"Zmk_Int") 7014 VR128X:$src1, VK1WM:$mask, 7015 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7016 7017 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7018 (X86selects_mask VK1WM:$mask, 7019 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7020 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7021 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7022 (!cast<I>(Prefix#"231"#Suffix#"Zrk_Int") 7023 VR128X:$src1, VK1WM:$mask, 7024 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7025 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7026 7027 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7028 (X86selects_mask VK1WM:$mask, 7029 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7030 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7031 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7032 (!cast<I>(Prefix#"231"#Suffix#"Zmk_Int") 7033 VR128X:$src1, VK1WM:$mask, 7034 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7035 7036 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7037 (X86selects_mask VK1WM:$mask, 7038 (MaskedOp _.FRC:$src2, 7039 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7040 _.FRC:$src3), 7041 (_.EltVT ZeroFP)))))), 7042 (!cast<I>(Prefix#"213"#Suffix#"Zrkz_Int") 7043 VR128X:$src1, VK1WM:$mask, 7044 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7045 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7046 7047 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7048 (X86selects_mask VK1WM:$mask, 7049 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7050 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7051 (_.EltVT ZeroFP)))))), 7052 (!cast<I>(Prefix#"231"#Suffix#"Zrkz_Int") 7053 VR128X:$src1, VK1WM:$mask, 7054 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7055 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7056 7057 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7058 (X86selects_mask VK1WM:$mask, 7059 (MaskedOp _.FRC:$src2, 7060 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7061 (_.ScalarLdFrag addr:$src3)), 7062 (_.EltVT ZeroFP)))))), 7063 (!cast<I>(Prefix#"213"#Suffix#"Zmkz_Int") 7064 VR128X:$src1, VK1WM:$mask, 7065 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7066 7067 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7068 (X86selects_mask VK1WM:$mask, 7069 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7070 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7071 (_.EltVT ZeroFP)))))), 7072 (!cast<I>(Prefix#"132"#Suffix#"Zmkz_Int") 7073 VR128X:$src1, VK1WM:$mask, 7074 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7075 7076 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7077 (X86selects_mask VK1WM:$mask, 7078 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7079 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7080 (_.EltVT ZeroFP)))))), 7081 (!cast<I>(Prefix#"231"#Suffix#"Zmkz_Int") 7082 VR128X:$src1, VK1WM:$mask, 7083 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7084 7085 // Patterns with rounding mode. 7086 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7087 (RndOp _.FRC:$src2, 7088 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7089 _.FRC:$src3, (i32 timm:$rc)))))), 7090 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7091 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7092 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7093 7094 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7095 (RndOp _.FRC:$src2, _.FRC:$src3, 7096 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7097 (i32 timm:$rc)))))), 7098 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7099 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7100 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7101 7102 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7103 (X86selects_mask VK1WM:$mask, 7104 (RndOp _.FRC:$src2, 7105 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7106 _.FRC:$src3, (i32 timm:$rc)), 7107 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7108 (!cast<I>(Prefix#"213"#Suffix#"Zrbk_Int") 7109 VR128X:$src1, VK1WM:$mask, 7110 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7111 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7112 7113 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7114 (X86selects_mask VK1WM:$mask, 7115 (RndOp _.FRC:$src2, _.FRC:$src3, 7116 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7117 (i32 timm:$rc)), 7118 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7119 (!cast<I>(Prefix#"231"#Suffix#"Zrbk_Int") 7120 VR128X:$src1, VK1WM:$mask, 7121 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7122 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7123 7124 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7125 (X86selects_mask VK1WM:$mask, 7126 (RndOp _.FRC:$src2, 7127 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7128 _.FRC:$src3, (i32 timm:$rc)), 7129 (_.EltVT ZeroFP)))))), 7130 (!cast<I>(Prefix#"213"#Suffix#"Zrbkz_Int") 7131 VR128X:$src1, VK1WM:$mask, 7132 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7133 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7134 7135 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7136 (X86selects_mask VK1WM:$mask, 7137 (RndOp _.FRC:$src2, _.FRC:$src3, 7138 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7139 (i32 timm:$rc)), 7140 (_.EltVT ZeroFP)))))), 7141 (!cast<I>(Prefix#"231"#Suffix#"Zrbkz_Int") 7142 VR128X:$src1, VK1WM:$mask, 7143 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7144 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7145 } 7146} 7147defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7148 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7149defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7150 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7151defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7152 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7153defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7154 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7155 7156defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7157 "SS", X86Movss, v4f32x_info, fp32imm0>; 7158defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7159 "SS", X86Movss, v4f32x_info, fp32imm0>; 7160defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7161 "SS", X86Movss, v4f32x_info, fp32imm0>; 7162defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7163 "SS", X86Movss, v4f32x_info, fp32imm0>; 7164 7165defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7166 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7167defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7168 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7169defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7170 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7171defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7172 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7173 7174//===----------------------------------------------------------------------===// 7175// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7176//===----------------------------------------------------------------------===// 7177let Constraints = "$src1 = $dst" in { 7178multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7179 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7180 // NOTE: The SDNode have the multiply operands first with the add last. 7181 // This enables commuted load patterns to be autogenerated by tablegen. 7182 let ExeDomain = _.ExeDomain in { 7183 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7184 (ins _.RC:$src2, _.RC:$src3), 7185 OpcodeStr, "$src3, $src2", "$src2, $src3", 7186 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7187 T8, PD, EVEX, VVVV, Sched<[sched]>; 7188 7189 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7190 (ins _.RC:$src2, _.MemOp:$src3), 7191 OpcodeStr, "$src3, $src2", "$src2, $src3", 7192 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7193 T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 7194 sched.ReadAfterFold]>; 7195 7196 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7197 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7198 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7199 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7200 (OpNode _.RC:$src2, 7201 (_.VT (_.BroadcastLdFrag addr:$src3)), 7202 _.RC:$src1)>, 7203 T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 7204 sched.ReadAfterFold]>; 7205 } 7206} 7207} // Constraints = "$src1 = $dst" 7208 7209multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7210 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7211 let Predicates = [HasIFMA] in { 7212 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7213 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7214 } 7215 let Predicates = [HasVLX, HasIFMA] in { 7216 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7217 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7218 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7219 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7220 } 7221} 7222 7223defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7224 SchedWriteVecIMul, avx512vl_i64_info>, 7225 REX_W; 7226defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7227 SchedWriteVecIMul, avx512vl_i64_info>, 7228 REX_W; 7229 7230//===----------------------------------------------------------------------===// 7231// AVX-512 Scalar convert from sign integer to float/double 7232//===----------------------------------------------------------------------===// 7233 7234multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7235 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7236 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7237 string mem, list<Register> _Uses = [MXCSR], 7238 bit _mayRaiseFPException = 1> { 7239let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7240 mayRaiseFPException = _mayRaiseFPException in { 7241 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7242 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7243 (ins DstVT.FRC:$src1, SrcRC:$src), 7244 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7245 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7246 let mayLoad = 1 in 7247 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7248 (ins DstVT.FRC:$src1, x86memop:$src), 7249 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7250 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7251 } // hasSideEffects = 0 7252 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7253 (ins DstVT.RC:$src1, SrcRC:$src2), 7254 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7255 [(set DstVT.RC:$dst, 7256 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7257 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7258 7259 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7260 (ins DstVT.RC:$src1, x86memop:$src2), 7261 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7262 [(set DstVT.RC:$dst, 7263 (OpNode (DstVT.VT DstVT.RC:$src1), 7264 (ld_frag addr:$src2)))]>, 7265 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7266} 7267 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7268 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7269 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7270} 7271 7272multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7273 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7274 X86VectorVTInfo DstVT, string asm, 7275 string mem> { 7276 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7277 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7278 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7279 !strconcat(asm, 7280 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7281 [(set DstVT.RC:$dst, 7282 (OpNode (DstVT.VT DstVT.RC:$src1), 7283 SrcRC:$src2, 7284 (i32 timm:$rc)))]>, 7285 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7286 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7287 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7288 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7289} 7290 7291multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7292 X86FoldableSchedWrite sched, 7293 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7294 X86MemOperand x86memop, PatFrag ld_frag, 7295 string asm, string mem> { 7296 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7297 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7298 ld_frag, asm, mem>, VEX_LIG; 7299} 7300 7301let Predicates = [HasAVX512] in { 7302defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7303 WriteCvtI2SS, GR32, 7304 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7305 TB, XS, EVEX_CD8<32, CD8VT1>; 7306defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7307 WriteCvtI2SS, GR64, 7308 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7309 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7310defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7311 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7312 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7313defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7314 WriteCvtI2SD, GR64, 7315 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7316 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7317 7318def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7319 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7320def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7321 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7322 7323def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7324 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7325def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7326 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7327def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7328 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7329def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7330 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7331 7332def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7333 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7334def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7335 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7336def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7337 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7338def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7339 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7340 7341defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7342 WriteCvtI2SS, GR32, 7343 v4f32x_info, i32mem, loadi32, 7344 "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>; 7345defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7346 WriteCvtI2SS, GR64, 7347 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7348 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7349defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7350 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7351 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7352defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7353 WriteCvtI2SD, GR64, 7354 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7355 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7356 7357def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7358 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7359def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7360 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7361 7362def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7363 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7364def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7365 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7366def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7367 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7368def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7369 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7370 7371def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7372 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7373def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7374 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7375def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7376 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7377def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7378 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7379} 7380 7381//===----------------------------------------------------------------------===// 7382// AVX-512 Scalar convert from float/double to integer 7383//===----------------------------------------------------------------------===// 7384 7385multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7386 X86VectorVTInfo DstVT, SDNode OpNode, 7387 SDNode OpNodeRnd, 7388 X86FoldableSchedWrite sched, string asm, 7389 string aliasStr, Predicate prd = HasAVX512> { 7390 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7391 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7392 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7393 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7394 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7395 let Uses = [MXCSR] in 7396 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7397 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7398 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7399 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7400 Sched<[sched]>; 7401 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7402 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7403 [(set DstVT.RC:$dst, (OpNode 7404 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7405 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7406 } // Predicates = [prd] 7407 7408 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7409 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7410 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7411 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7412 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7413 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7414 SrcVT.IntScalarMemOp:$src), 0, "att">; 7415} 7416 7417// Convert float/double to signed/unsigned int 32/64 7418defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7419 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7420 TB, XS, EVEX_CD8<32, CD8VT1>; 7421defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7422 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7423 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7424defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7425 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7426 TB, XS, EVEX_CD8<32, CD8VT1>; 7427defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7428 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7429 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7430defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7431 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7432 TB, XD, EVEX_CD8<64, CD8VT1>; 7433defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7434 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7435 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7436defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7437 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7438 TB, XD, EVEX_CD8<64, CD8VT1>; 7439defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7440 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7441 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7442 7443multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7444 X86VectorVTInfo DstVT, SDNode OpNode, 7445 X86FoldableSchedWrite sched> { 7446 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7447 let isCodeGenOnly = 1 in { 7448 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7449 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7450 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7451 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7452 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7453 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7454 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7455 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7456 } 7457 } // Predicates = [HasAVX512] 7458} 7459 7460defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7461 lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>; 7462defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7463 llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7464defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7465 lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>; 7466defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7467 llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7468 7469let Predicates = [HasAVX512] in { 7470 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7471 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7472 7473 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7474 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7475} 7476 7477// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7478// which produce unnecessary vmovs{s,d} instructions 7479let Predicates = [HasAVX512] in { 7480def : Pat<(v4f32 (X86Movss 7481 (v4f32 VR128X:$dst), 7482 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7483 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7484 7485def : Pat<(v4f32 (X86Movss 7486 (v4f32 VR128X:$dst), 7487 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7488 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7489 7490def : Pat<(v4f32 (X86Movss 7491 (v4f32 VR128X:$dst), 7492 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7493 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7494 7495def : Pat<(v4f32 (X86Movss 7496 (v4f32 VR128X:$dst), 7497 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7498 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7499 7500def : Pat<(v2f64 (X86Movsd 7501 (v2f64 VR128X:$dst), 7502 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7503 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7504 7505def : Pat<(v2f64 (X86Movsd 7506 (v2f64 VR128X:$dst), 7507 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7508 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7509 7510def : Pat<(v2f64 (X86Movsd 7511 (v2f64 VR128X:$dst), 7512 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7513 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7514 7515def : Pat<(v2f64 (X86Movsd 7516 (v2f64 VR128X:$dst), 7517 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7518 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7519 7520def : Pat<(v4f32 (X86Movss 7521 (v4f32 VR128X:$dst), 7522 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7523 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7524 7525def : Pat<(v4f32 (X86Movss 7526 (v4f32 VR128X:$dst), 7527 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7528 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7529 7530def : Pat<(v4f32 (X86Movss 7531 (v4f32 VR128X:$dst), 7532 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7533 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7534 7535def : Pat<(v4f32 (X86Movss 7536 (v4f32 VR128X:$dst), 7537 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7538 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7539 7540def : Pat<(v2f64 (X86Movsd 7541 (v2f64 VR128X:$dst), 7542 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7543 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7544 7545def : Pat<(v2f64 (X86Movsd 7546 (v2f64 VR128X:$dst), 7547 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7548 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7549 7550def : Pat<(v2f64 (X86Movsd 7551 (v2f64 VR128X:$dst), 7552 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7553 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7554 7555def : Pat<(v2f64 (X86Movsd 7556 (v2f64 VR128X:$dst), 7557 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7558 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7559} // Predicates = [HasAVX512] 7560 7561// Convert float/double to signed/unsigned int 32/64 with truncation 7562multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7563 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7564 SDNode OpNodeInt, SDNode OpNodeSAE, 7565 X86FoldableSchedWrite sched, string aliasStr, 7566 Predicate prd = HasAVX512> { 7567let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7568 let isCodeGenOnly = 1 in { 7569 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7570 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7571 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7572 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7573 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7574 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7575 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7576 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7577 } 7578 7579 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7580 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7581 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7582 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7583 let Uses = [MXCSR] in 7584 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7585 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7586 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7587 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7588 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7589 (ins _SrcRC.IntScalarMemOp:$src), 7590 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7591 [(set _DstRC.RC:$dst, 7592 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7593 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7594} // Predicates = [prd] 7595 7596 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7597 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7598 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7599 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7600 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7601 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7602 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7603} 7604 7605defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7606 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7607 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7608defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7609 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7610 "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7611defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7612 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7613 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7614defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7615 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7616 "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7617 7618defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7619 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7620 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7621defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7622 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7623 "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>; 7624defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7625 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7626 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7627defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7628 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7629 "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7630 7631//===----------------------------------------------------------------------===// 7632// AVX-512 Convert form float to double and back 7633//===----------------------------------------------------------------------===// 7634 7635let Uses = [MXCSR], mayRaiseFPException = 1 in 7636multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7637 X86VectorVTInfo _Src, SDNode OpNode, 7638 X86FoldableSchedWrite sched> { 7639 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7640 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7641 "$src2, $src1", "$src1, $src2", 7642 (_.VT (OpNode (_.VT _.RC:$src1), 7643 (_Src.VT _Src.RC:$src2))), "_Int">, 7644 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7645 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7646 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7647 "$src2, $src1", "$src1, $src2", 7648 (_.VT (OpNode (_.VT _.RC:$src1), 7649 (_Src.ScalarIntMemFrags addr:$src2))), "_Int">, 7650 EVEX, VVVV, VEX_LIG, 7651 Sched<[sched.Folded, sched.ReadAfterFold]>; 7652 7653 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7654 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7655 (ins _.FRC:$src1, _Src.FRC:$src2), 7656 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7657 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7658 let mayLoad = 1 in 7659 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7660 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7661 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7662 EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7663 } 7664} 7665 7666// Scalar Conversion with SAE - suppress all exceptions 7667multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7668 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7669 X86FoldableSchedWrite sched> { 7670 let Uses = [MXCSR] in 7671 defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7672 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7673 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7674 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7675 (_Src.VT _Src.RC:$src2))), "_Int">, 7676 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 7677} 7678 7679// Scalar Conversion with rounding control (RC) 7680multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7681 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7682 X86FoldableSchedWrite sched> { 7683 let Uses = [MXCSR] in 7684 defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7685 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7686 "$rc, $src2, $src1", "$src1, $src2, $rc", 7687 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7688 (_Src.VT _Src.RC:$src2), (i32 timm:$rc))), "_Int">, 7689 EVEX, VVVV, VEX_LIG, Sched<[sched]>, 7690 EVEX_B, EVEX_RC; 7691} 7692multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7693 SDNode OpNode, SDNode OpNodeRnd, 7694 X86FoldableSchedWrite sched, 7695 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7696 Predicate prd = HasAVX512> { 7697 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7698 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7699 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7700 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7701 } 7702} 7703 7704multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7705 SDNode OpNode, SDNode OpNodeSAE, 7706 X86FoldableSchedWrite sched, 7707 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7708 Predicate prd = HasAVX512> { 7709 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7710 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7711 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7712 EVEX_CD8<_src.EltSize, CD8VT1>; 7713 } 7714} 7715defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7716 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7717 f32x_info>, TB, XD, REX_W; 7718defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7719 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7720 f64x_info>, TB, XS; 7721defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7722 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7723 f16x_info, HasFP16>, T_MAP5, XD, REX_W; 7724defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7725 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7726 f64x_info, HasFP16>, T_MAP5, XS; 7727defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7728 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7729 f16x_info, HasFP16>, T_MAP5; 7730defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7731 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7732 f32x_info, HasFP16>, T_MAP6; 7733 7734def : Pat<(f64 (any_fpextend FR32X:$src)), 7735 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7736 Requires<[HasAVX512]>; 7737def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7738 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7739 Requires<[HasAVX512, OptForSize]>; 7740 7741def : Pat<(f32 (any_fpround FR64X:$src)), 7742 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7743 Requires<[HasAVX512]>; 7744 7745def : Pat<(f32 (any_fpextend FR16X:$src)), 7746 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7747 Requires<[HasFP16]>; 7748def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 7749 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 7750 Requires<[HasFP16, OptForSize]>; 7751 7752def : Pat<(f64 (any_fpextend FR16X:$src)), 7753 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 7754 Requires<[HasFP16]>; 7755def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 7756 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7757 Requires<[HasFP16, OptForSize]>; 7758 7759def : Pat<(f16 (any_fpround FR32X:$src)), 7760 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 7761 Requires<[HasFP16]>; 7762def : Pat<(f16 (any_fpround FR64X:$src)), 7763 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 7764 Requires<[HasFP16]>; 7765 7766def : Pat<(v4f32 (X86Movss 7767 (v4f32 VR128X:$dst), 7768 (v4f32 (scalar_to_vector 7769 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7770 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7771 Requires<[HasAVX512]>; 7772 7773def : Pat<(v2f64 (X86Movsd 7774 (v2f64 VR128X:$dst), 7775 (v2f64 (scalar_to_vector 7776 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7777 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7778 Requires<[HasAVX512]>; 7779 7780//===----------------------------------------------------------------------===// 7781// AVX-512 Vector convert from signed/unsigned integer to float/double 7782// and from float/double to signed/unsigned integer 7783//===----------------------------------------------------------------------===// 7784 7785multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7786 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7787 X86FoldableSchedWrite sched, 7788 string Broadcast = _.BroadcastStr, 7789 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7790 RegisterClass MaskRC = _.KRCWM, 7791 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7792 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7793let Uses = [MXCSR], mayRaiseFPException = 1 in { 7794 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7795 (ins _Src.RC:$src), 7796 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7797 (ins MaskRC:$mask, _Src.RC:$src), 7798 OpcodeStr, "$src", "$src", 7799 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7800 (vselect_mask MaskRC:$mask, 7801 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7802 _.RC:$src0), 7803 (vselect_mask MaskRC:$mask, 7804 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7805 _.ImmAllZerosV)>, 7806 EVEX, Sched<[sched]>; 7807 7808 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7809 (ins MemOp:$src), 7810 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7811 (ins MaskRC:$mask, MemOp:$src), 7812 OpcodeStr#Alias, "$src", "$src", 7813 LdDAG, 7814 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 7815 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 7816 EVEX, Sched<[sched.Folded]>; 7817 7818 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7819 (ins _Src.ScalarMemOp:$src), 7820 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7821 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7822 OpcodeStr, 7823 "${src}"#Broadcast, "${src}"#Broadcast, 7824 (_.VT (OpNode (_Src.VT 7825 (_Src.BroadcastLdFrag addr:$src)) 7826 )), 7827 (vselect_mask MaskRC:$mask, 7828 (_.VT 7829 (MaskOpNode 7830 (_Src.VT 7831 (_Src.BroadcastLdFrag addr:$src)))), 7832 _.RC:$src0), 7833 (vselect_mask MaskRC:$mask, 7834 (_.VT 7835 (MaskOpNode 7836 (_Src.VT 7837 (_Src.BroadcastLdFrag addr:$src)))), 7838 _.ImmAllZerosV)>, 7839 EVEX, EVEX_B, Sched<[sched.Folded]>; 7840 } 7841} 7842// Conversion with SAE - suppress all exceptions 7843multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7844 X86VectorVTInfo _Src, SDPatternOperator OpNodeSAE, 7845 X86FoldableSchedWrite sched> { 7846 let Uses = [MXCSR] in 7847 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7848 (ins _Src.RC:$src), OpcodeStr, 7849 "{sae}, $src", "$src, {sae}", 7850 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7851 EVEX, EVEX_B, Sched<[sched]>; 7852} 7853 7854// Conversion with rounding control (RC) 7855multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7856 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 7857 X86FoldableSchedWrite sched> { 7858 let Uses = [MXCSR] in 7859 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7860 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7861 "$rc, $src", "$src, $rc", 7862 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7863 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7864} 7865 7866// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7867multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7868 X86VectorVTInfo _Src, SDPatternOperator OpNode, 7869 SDNode MaskOpNode, 7870 X86FoldableSchedWrite sched, 7871 string Broadcast = _.BroadcastStr, 7872 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7873 RegisterClass MaskRC = _.KRCWM> 7874 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 7875 Alias, MemOp, MaskRC, 7876 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 7877 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7878 7879// Extend [Float to Double, Half to Float] 7880multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 7881 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7882 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 7883 let Predicates = [prd] in { 7884 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 7885 any_fpextend, fpextend, sched.ZMM>, 7886 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 7887 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7888 } 7889 let Predicates = [prd, HasVLX] in { 7890 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 7891 X86any_vfpext, X86vfpext, sched.XMM, 7892 _dst.info128.BroadcastStr, 7893 "", f64mem>, EVEX_V128; 7894 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 7895 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 7896 } 7897} 7898 7899// Truncate [Double to Float, Float to Half] 7900multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 7901 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7902 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 7903 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 7904 PatFrag loadVT128 = _src.info128.LdFrag, 7905 RegisterClass maskRC128 = _src.info128.KRCWM> { 7906 let Predicates = [prd] in { 7907 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 7908 X86any_vfpround, X86vfpround, sched.ZMM>, 7909 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 7910 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7911 } 7912 let Predicates = [prd, HasVLX] in { 7913 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 7914 null_frag, null_frag, sched.XMM, 7915 _src.info128.BroadcastStr, "{x}", 7916 f128mem, maskRC128>, EVEX_V128; 7917 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 7918 X86any_vfpround, X86vfpround, 7919 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 7920 7921 // Special patterns to allow use of X86vmfpround for masking. Instruction 7922 // patterns have been disabled with null_frag. 7923 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 7924 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 7925 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 7926 maskRC128:$mask), 7927 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 7928 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 7929 maskRC128:$mask), 7930 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 7931 7932 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 7933 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 7934 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 7935 maskRC128:$mask), 7936 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7937 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 7938 maskRC128:$mask), 7939 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 7940 7941 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 7942 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 7943 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7944 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 7945 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7946 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7947 _dst.info128.ImmAllZerosV, maskRC128:$mask), 7948 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 7949 } 7950 7951 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7952 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7953 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7954 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7955 VK2WM:$mask, VR128X:$src), 0, "att">; 7956 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 7957 "$dst {${mask}} {z}, $src}", 7958 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7959 VK2WM:$mask, VR128X:$src), 0, "att">; 7960 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7961 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7962 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7963 "$dst {${mask}}, ${src}{1to2}}", 7964 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7965 VK2WM:$mask, f64mem:$src), 0, "att">; 7966 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7967 "$dst {${mask}} {z}, ${src}{1to2}}", 7968 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7969 VK2WM:$mask, f64mem:$src), 0, "att">; 7970 7971 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7972 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7973 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7974 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7975 VK4WM:$mask, VR256X:$src), 0, "att">; 7976 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 7977 "$dst {${mask}} {z}, $src}", 7978 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7979 VK4WM:$mask, VR256X:$src), 0, "att">; 7980 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7981 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7982 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7983 "$dst {${mask}}, ${src}{1to4}}", 7984 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7985 VK4WM:$mask, f64mem:$src), 0, "att">; 7986 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7987 "$dst {${mask}} {z}, ${src}{1to4}}", 7988 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7989 VK4WM:$mask, f64mem:$src), 0, "att">; 7990} 7991 7992defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 7993 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 7994 REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 7995defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 7996 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 7997 TB, EVEX_CD8<32, CD8VH>; 7998 7999// Extend Half to Double 8000multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 8001 X86SchedWriteWidths sched> { 8002 let Predicates = [HasFP16] in { 8003 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 8004 any_fpextend, fpextend, sched.ZMM>, 8005 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 8006 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8007 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 8008 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 8009 } 8010 let Predicates = [HasFP16, HasVLX] in { 8011 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8012 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8013 f32mem>, EVEX_V128; 8014 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8015 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8016 f64mem>, EVEX_V256; 8017 } 8018} 8019 8020// Truncate Double to Half 8021multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8022 let Predicates = [HasFP16] in { 8023 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8024 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8025 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8026 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8027 } 8028 let Predicates = [HasFP16, HasVLX] in { 8029 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8030 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8031 VK2WM>, EVEX_V128; 8032 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8033 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8034 VK4WM>, EVEX_V256; 8035 } 8036 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8037 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8038 VR128X:$src), 0, "att">; 8039 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8040 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8041 VK2WM:$mask, VR128X:$src), 0, "att">; 8042 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8043 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8044 VK2WM:$mask, VR128X:$src), 0, "att">; 8045 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8046 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8047 i64mem:$src), 0, "att">; 8048 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8049 "$dst {${mask}}, ${src}{1to2}}", 8050 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8051 VK2WM:$mask, i64mem:$src), 0, "att">; 8052 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8053 "$dst {${mask}} {z}, ${src}{1to2}}", 8054 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8055 VK2WM:$mask, i64mem:$src), 0, "att">; 8056 8057 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8058 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8059 VR256X:$src), 0, "att">; 8060 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8061 "$dst {${mask}}, $src}", 8062 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8063 VK4WM:$mask, VR256X:$src), 0, "att">; 8064 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8065 "$dst {${mask}} {z}, $src}", 8066 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8067 VK4WM:$mask, VR256X:$src), 0, "att">; 8068 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8069 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8070 i64mem:$src), 0, "att">; 8071 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8072 "$dst {${mask}}, ${src}{1to4}}", 8073 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8074 VK4WM:$mask, i64mem:$src), 0, "att">; 8075 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8076 "$dst {${mask}} {z}, ${src}{1to4}}", 8077 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8078 VK4WM:$mask, i64mem:$src), 0, "att">; 8079 8080 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8081 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8082 VR512:$src), 0, "att">; 8083 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8084 "$dst {${mask}}, $src}", 8085 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8086 VK8WM:$mask, VR512:$src), 0, "att">; 8087 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8088 "$dst {${mask}} {z}, $src}", 8089 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8090 VK8WM:$mask, VR512:$src), 0, "att">; 8091 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8092 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8093 i64mem:$src), 0, "att">; 8094 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8095 "$dst {${mask}}, ${src}{1to8}}", 8096 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8097 VK8WM:$mask, i64mem:$src), 0, "att">; 8098 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8099 "$dst {${mask}} {z}, ${src}{1to8}}", 8100 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8101 VK8WM:$mask, i64mem:$src), 0, "att">; 8102} 8103 8104defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8105 avx512vl_f32_info, SchedWriteCvtPD2PS, 8106 HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>; 8107defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8108 avx512vl_f16_info, SchedWriteCvtPS2PD, 8109 HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>; 8110defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8111 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>; 8112defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8113 T_MAP5, EVEX_CD8<16, CD8VQ>; 8114 8115let Predicates = [HasFP16, HasVLX] in { 8116 // Special patterns to allow use of X86vmfpround for masking. Instruction 8117 // patterns have been disabled with null_frag. 8118 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8119 (VCVTPD2PHZ256rr VR256X:$src)>; 8120 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8121 VK4WM:$mask)), 8122 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8123 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8124 VK4WM:$mask), 8125 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8126 8127 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8128 (VCVTPD2PHZ256rm addr:$src)>; 8129 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8130 VK4WM:$mask), 8131 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8132 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8133 VK4WM:$mask), 8134 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8135 8136 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8137 (VCVTPD2PHZ256rmb addr:$src)>; 8138 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8139 (v8f16 VR128X:$src0), VK4WM:$mask), 8140 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8141 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8142 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8143 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8144 8145 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8146 (VCVTPD2PHZ128rr VR128X:$src)>; 8147 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8148 VK2WM:$mask), 8149 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8150 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8151 VK2WM:$mask), 8152 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8153 8154 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8155 (VCVTPD2PHZ128rm addr:$src)>; 8156 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8157 VK2WM:$mask), 8158 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8159 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8160 VK2WM:$mask), 8161 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8162 8163 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8164 (VCVTPD2PHZ128rmb addr:$src)>; 8165 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8166 (v8f16 VR128X:$src0), VK2WM:$mask), 8167 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8168 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8169 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8170 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8171} 8172 8173// Convert Signed/Unsigned Doubleword to Double 8174let Uses = []<Register>, mayRaiseFPException = 0 in 8175multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8176 SDNode MaskOpNode, SDPatternOperator OpNode128, 8177 SDNode MaskOpNode128, 8178 X86SchedWriteWidths sched> { 8179 // No rounding in this op 8180 let Predicates = [HasAVX512] in 8181 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8182 MaskOpNode, sched.ZMM>, EVEX_V512; 8183 8184 let Predicates = [HasVLX] in { 8185 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8186 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8187 "", i64mem, VK2WM, 8188 (v2f64 (OpNode128 (bc_v4i32 8189 (v2i64 8190 (scalar_to_vector (loadi64 addr:$src)))))), 8191 (v2f64 (MaskOpNode128 (bc_v4i32 8192 (v2i64 8193 (scalar_to_vector (loadi64 addr:$src))))))>, 8194 EVEX_V128; 8195 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8196 MaskOpNode, sched.YMM>, EVEX_V256; 8197 } 8198} 8199 8200// Convert Signed/Unsigned Doubleword to Float 8201multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8202 SDNode MaskOpNode, SDNode OpNodeRnd, 8203 X86SchedWriteWidths sched> { 8204 let Predicates = [HasAVX512] in 8205 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8206 MaskOpNode, sched.ZMM>, 8207 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8208 OpNodeRnd, sched.ZMM>, EVEX_V512; 8209 8210 let Predicates = [HasVLX] in { 8211 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8212 MaskOpNode, sched.XMM>, EVEX_V128; 8213 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8214 MaskOpNode, sched.YMM>, EVEX_V256; 8215 } 8216} 8217 8218// Convert Float to Signed/Unsigned Doubleword with truncation 8219multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8220 SDNode MaskOpNode, 8221 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8222 let Predicates = [HasAVX512] in { 8223 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8224 MaskOpNode, sched.ZMM>, 8225 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8226 OpNodeSAE, sched.ZMM>, EVEX_V512; 8227 } 8228 let Predicates = [HasVLX] in { 8229 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8230 MaskOpNode, sched.XMM>, EVEX_V128; 8231 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8232 MaskOpNode, sched.YMM>, EVEX_V256; 8233 } 8234} 8235 8236// Convert Float to Signed/Unsigned Doubleword 8237multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8238 SDNode MaskOpNode, SDNode OpNodeRnd, 8239 X86SchedWriteWidths sched> { 8240 let Predicates = [HasAVX512] in { 8241 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8242 MaskOpNode, sched.ZMM>, 8243 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8244 OpNodeRnd, sched.ZMM>, EVEX_V512; 8245 } 8246 let Predicates = [HasVLX] in { 8247 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8248 MaskOpNode, sched.XMM>, EVEX_V128; 8249 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8250 MaskOpNode, sched.YMM>, EVEX_V256; 8251 } 8252} 8253 8254// Convert Double to Signed/Unsigned Doubleword with truncation 8255multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8256 SDNode MaskOpNode, SDNode OpNodeSAE, 8257 X86SchedWriteWidths sched> { 8258 let Predicates = [HasAVX512] in { 8259 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8260 MaskOpNode, sched.ZMM>, 8261 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8262 OpNodeSAE, sched.ZMM>, EVEX_V512; 8263 } 8264 let Predicates = [HasVLX] in { 8265 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8266 // memory forms of these instructions in Asm Parser. They have the same 8267 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8268 // due to the same reason. 8269 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8270 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8271 VK2WM>, EVEX_V128; 8272 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8273 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8274 } 8275 8276 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8277 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8278 VR128X:$src), 0, "att">; 8279 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8280 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8281 VK2WM:$mask, VR128X:$src), 0, "att">; 8282 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8283 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8284 VK2WM:$mask, VR128X:$src), 0, "att">; 8285 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8286 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8287 f64mem:$src), 0, "att">; 8288 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8289 "$dst {${mask}}, ${src}{1to2}}", 8290 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8291 VK2WM:$mask, f64mem:$src), 0, "att">; 8292 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8293 "$dst {${mask}} {z}, ${src}{1to2}}", 8294 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8295 VK2WM:$mask, f64mem:$src), 0, "att">; 8296 8297 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8298 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8299 VR256X:$src), 0, "att">; 8300 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8301 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8302 VK4WM:$mask, VR256X:$src), 0, "att">; 8303 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8304 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8305 VK4WM:$mask, VR256X:$src), 0, "att">; 8306 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8307 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8308 f64mem:$src), 0, "att">; 8309 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8310 "$dst {${mask}}, ${src}{1to4}}", 8311 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8312 VK4WM:$mask, f64mem:$src), 0, "att">; 8313 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8314 "$dst {${mask}} {z}, ${src}{1to4}}", 8315 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8316 VK4WM:$mask, f64mem:$src), 0, "att">; 8317} 8318 8319// Convert Double to Signed/Unsigned Doubleword 8320multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8321 SDNode MaskOpNode, SDNode OpNodeRnd, 8322 X86SchedWriteWidths sched> { 8323 let Predicates = [HasAVX512] in { 8324 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8325 MaskOpNode, sched.ZMM>, 8326 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8327 OpNodeRnd, sched.ZMM>, EVEX_V512; 8328 } 8329 let Predicates = [HasVLX] in { 8330 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8331 // memory forms of these instructions in Asm Parcer. They have the same 8332 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8333 // due to the same reason. 8334 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8335 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8336 VK2WM>, EVEX_V128; 8337 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8338 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8339 } 8340 8341 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8342 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8343 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8344 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8345 VK2WM:$mask, VR128X:$src), 0, "att">; 8346 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8347 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8348 VK2WM:$mask, VR128X:$src), 0, "att">; 8349 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8350 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8351 f64mem:$src), 0, "att">; 8352 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8353 "$dst {${mask}}, ${src}{1to2}}", 8354 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8355 VK2WM:$mask, f64mem:$src), 0, "att">; 8356 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8357 "$dst {${mask}} {z}, ${src}{1to2}}", 8358 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8359 VK2WM:$mask, f64mem:$src), 0, "att">; 8360 8361 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8362 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8363 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8364 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8365 VK4WM:$mask, VR256X:$src), 0, "att">; 8366 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8367 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8368 VK4WM:$mask, VR256X:$src), 0, "att">; 8369 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8370 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8371 f64mem:$src), 0, "att">; 8372 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8373 "$dst {${mask}}, ${src}{1to4}}", 8374 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8375 VK4WM:$mask, f64mem:$src), 0, "att">; 8376 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8377 "$dst {${mask}} {z}, ${src}{1to4}}", 8378 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8379 VK4WM:$mask, f64mem:$src), 0, "att">; 8380} 8381 8382// Convert Double to Signed/Unsigned Quardword 8383multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8384 SDNode MaskOpNode, SDNode OpNodeRnd, 8385 X86SchedWriteWidths sched> { 8386 let Predicates = [HasDQI] in { 8387 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8388 MaskOpNode, sched.ZMM>, 8389 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8390 OpNodeRnd, sched.ZMM>, EVEX_V512; 8391 } 8392 let Predicates = [HasDQI, HasVLX] in { 8393 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8394 MaskOpNode, sched.XMM>, EVEX_V128; 8395 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8396 MaskOpNode, sched.YMM>, EVEX_V256; 8397 } 8398} 8399 8400// Convert Double to Signed/Unsigned Quardword with truncation 8401multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8402 SDNode MaskOpNode, SDNode OpNodeRnd, 8403 X86SchedWriteWidths sched> { 8404 let Predicates = [HasDQI] in { 8405 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8406 MaskOpNode, sched.ZMM>, 8407 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8408 OpNodeRnd, sched.ZMM>, EVEX_V512; 8409 } 8410 let Predicates = [HasDQI, HasVLX] in { 8411 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8412 MaskOpNode, sched.XMM>, EVEX_V128; 8413 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8414 MaskOpNode, sched.YMM>, EVEX_V256; 8415 } 8416} 8417 8418// Convert Signed/Unsigned Quardword to Double 8419multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8420 SDNode MaskOpNode, SDNode OpNodeRnd, 8421 X86SchedWriteWidths sched> { 8422 let Predicates = [HasDQI] in { 8423 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8424 MaskOpNode, sched.ZMM>, 8425 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8426 OpNodeRnd, sched.ZMM>, EVEX_V512; 8427 } 8428 let Predicates = [HasDQI, HasVLX] in { 8429 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8430 MaskOpNode, sched.XMM>, EVEX_V128; 8431 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8432 MaskOpNode, sched.YMM>, EVEX_V256; 8433 } 8434} 8435 8436// Convert Float to Signed/Unsigned Quardword 8437multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8438 SDNode MaskOpNode, SDNode OpNodeRnd, 8439 X86SchedWriteWidths sched> { 8440 let Predicates = [HasDQI] in { 8441 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8442 MaskOpNode, sched.ZMM>, 8443 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8444 OpNodeRnd, sched.ZMM>, EVEX_V512; 8445 } 8446 let Predicates = [HasDQI, HasVLX] in { 8447 // Explicitly specified broadcast string, since we take only 2 elements 8448 // from v4f32x_info source 8449 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8450 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8451 (v2i64 (OpNode (bc_v4f32 8452 (v2f64 8453 (scalar_to_vector (loadf64 addr:$src)))))), 8454 (v2i64 (MaskOpNode (bc_v4f32 8455 (v2f64 8456 (scalar_to_vector (loadf64 addr:$src))))))>, 8457 EVEX_V128; 8458 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8459 MaskOpNode, sched.YMM>, EVEX_V256; 8460 } 8461} 8462 8463// Convert Float to Signed/Unsigned Quardword with truncation 8464multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8465 SDNode MaskOpNode, SDNode OpNodeRnd, 8466 X86SchedWriteWidths sched> { 8467 let Predicates = [HasDQI] in { 8468 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8469 MaskOpNode, sched.ZMM>, 8470 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8471 OpNodeRnd, sched.ZMM>, EVEX_V512; 8472 } 8473 let Predicates = [HasDQI, HasVLX] in { 8474 // Explicitly specified broadcast string, since we take only 2 elements 8475 // from v4f32x_info source 8476 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8477 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8478 (v2i64 (OpNode (bc_v4f32 8479 (v2f64 8480 (scalar_to_vector (loadf64 addr:$src)))))), 8481 (v2i64 (MaskOpNode (bc_v4f32 8482 (v2f64 8483 (scalar_to_vector (loadf64 addr:$src))))))>, 8484 EVEX_V128; 8485 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8486 MaskOpNode, sched.YMM>, EVEX_V256; 8487 } 8488} 8489 8490// Convert Signed/Unsigned Quardword to Float 8491// Also Convert Signed/Unsigned Doubleword to Half 8492multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8493 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8494 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8495 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8496 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8497 let Predicates = [prd] in { 8498 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8499 MaskOpNode, sched.ZMM>, 8500 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8501 OpNodeRnd, sched.ZMM>, EVEX_V512; 8502 } 8503 let Predicates = [prd, HasVLX] in { 8504 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8505 // memory forms of these instructions in Asm Parcer. They have the same 8506 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8507 // due to the same reason. 8508 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8509 null_frag, sched.XMM, _src.info128.BroadcastStr, 8510 "{x}", i128mem, _src.info128.KRCWM>, 8511 EVEX_V128; 8512 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8513 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8514 "{y}">, EVEX_V256; 8515 8516 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8517 // patterns have been disabled with null_frag. 8518 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8519 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8520 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8521 _src.info128.KRCWM:$mask), 8522 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8523 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8524 _src.info128.KRCWM:$mask), 8525 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8526 8527 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8528 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8529 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8530 _src.info128.KRCWM:$mask), 8531 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8532 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8533 _src.info128.KRCWM:$mask), 8534 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8535 8536 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8537 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8538 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8539 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8540 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8541 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8542 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8543 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8544 } 8545 8546 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8547 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8548 VR128X:$src), 0, "att">; 8549 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8550 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8551 VK2WM:$mask, VR128X:$src), 0, "att">; 8552 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8553 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8554 VK2WM:$mask, VR128X:$src), 0, "att">; 8555 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8556 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8557 i64mem:$src), 0, "att">; 8558 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8559 "$dst {${mask}}, ${src}{1to2}}", 8560 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8561 VK2WM:$mask, i64mem:$src), 0, "att">; 8562 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8563 "$dst {${mask}} {z}, ${src}{1to2}}", 8564 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8565 VK2WM:$mask, i64mem:$src), 0, "att">; 8566 8567 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8568 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8569 VR256X:$src), 0, "att">; 8570 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8571 "$dst {${mask}}, $src}", 8572 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8573 VK4WM:$mask, VR256X:$src), 0, "att">; 8574 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8575 "$dst {${mask}} {z}, $src}", 8576 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8577 VK4WM:$mask, VR256X:$src), 0, "att">; 8578 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8579 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8580 i64mem:$src), 0, "att">; 8581 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8582 "$dst {${mask}}, ${src}{1to4}}", 8583 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8584 VK4WM:$mask, i64mem:$src), 0, "att">; 8585 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8586 "$dst {${mask}} {z}, ${src}{1to4}}", 8587 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8588 VK4WM:$mask, i64mem:$src), 0, "att">; 8589} 8590 8591defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8592 X86any_VSintToFP, X86VSintToFP, 8593 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8594 8595defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8596 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8597 TB, EVEX_CD8<32, CD8VF>; 8598 8599defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8600 X86cvttp2si, X86cvttp2siSAE, 8601 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>; 8602 8603defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8604 X86cvttp2si, X86cvttp2siSAE, 8605 SchedWriteCvtPD2DQ>, 8606 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 8607 8608defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8609 X86cvttp2ui, X86cvttp2uiSAE, 8610 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>; 8611 8612defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8613 X86cvttp2ui, X86cvttp2uiSAE, 8614 SchedWriteCvtPD2DQ>, 8615 TB, REX_W, EVEX_CD8<64, CD8VF>; 8616 8617defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8618 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8619 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8620 8621defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8622 uint_to_fp, X86VUintToFpRnd, 8623 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>; 8624 8625defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8626 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8627 EVEX_CD8<32, CD8VF>; 8628 8629defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8630 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD, 8631 REX_W, EVEX_CD8<64, CD8VF>; 8632 8633defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8634 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8635 TB, EVEX_CD8<32, CD8VF>; 8636 8637defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8638 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8639 TB, EVEX_CD8<64, CD8VF>; 8640 8641defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8642 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W, 8643 TB, PD, EVEX_CD8<64, CD8VF>; 8644 8645defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8646 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8647 EVEX_CD8<32, CD8VH>; 8648 8649defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8650 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8651 TB, PD, EVEX_CD8<64, CD8VF>; 8652 8653defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8654 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8655 EVEX_CD8<32, CD8VH>; 8656 8657defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8658 X86cvttp2si, X86cvttp2siSAE, 8659 SchedWriteCvtPD2DQ>, REX_W, 8660 TB, PD, EVEX_CD8<64, CD8VF>; 8661 8662defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8663 X86cvttp2si, X86cvttp2siSAE, 8664 SchedWriteCvtPS2DQ>, TB, PD, 8665 EVEX_CD8<32, CD8VH>; 8666 8667defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8668 X86cvttp2ui, X86cvttp2uiSAE, 8669 SchedWriteCvtPD2DQ>, REX_W, 8670 TB, PD, EVEX_CD8<64, CD8VF>; 8671 8672defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8673 X86cvttp2ui, X86cvttp2uiSAE, 8674 SchedWriteCvtPS2DQ>, TB, PD, 8675 EVEX_CD8<32, CD8VH>; 8676 8677defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8678 sint_to_fp, X86VSintToFpRnd, 8679 SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8680 8681defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8682 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8683 REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8684 8685defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8686 X86any_VSintToFP, X86VMSintToFP, 8687 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8688 SchedWriteCvtDQ2PS, HasFP16>, 8689 T_MAP5, EVEX_CD8<32, CD8VF>; 8690 8691defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8692 X86any_VUintToFP, X86VMUintToFP, 8693 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8694 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD, 8695 EVEX_CD8<32, CD8VF>; 8696 8697defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8698 X86any_VSintToFP, X86VMSintToFP, 8699 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8700 SchedWriteCvtDQ2PS>, REX_W, TB, 8701 EVEX_CD8<64, CD8VF>; 8702 8703defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8704 X86any_VUintToFP, X86VMUintToFP, 8705 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8706 SchedWriteCvtDQ2PS>, REX_W, TB, XD, 8707 EVEX_CD8<64, CD8VF>; 8708 8709let Predicates = [HasVLX] in { 8710 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8711 // patterns have been disabled with null_frag. 8712 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8713 (VCVTPD2DQZ128rr VR128X:$src)>; 8714 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8715 VK2WM:$mask), 8716 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8717 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8718 VK2WM:$mask), 8719 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8720 8721 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8722 (VCVTPD2DQZ128rm addr:$src)>; 8723 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8724 VK2WM:$mask), 8725 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8726 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8727 VK2WM:$mask), 8728 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8729 8730 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8731 (VCVTPD2DQZ128rmb addr:$src)>; 8732 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8733 (v4i32 VR128X:$src0), VK2WM:$mask), 8734 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8735 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8736 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8737 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8738 8739 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8740 // patterns have been disabled with null_frag. 8741 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8742 (VCVTTPD2DQZ128rr VR128X:$src)>; 8743 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8744 VK2WM:$mask), 8745 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8746 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8747 VK2WM:$mask), 8748 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8749 8750 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8751 (VCVTTPD2DQZ128rm addr:$src)>; 8752 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8753 VK2WM:$mask), 8754 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8755 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8756 VK2WM:$mask), 8757 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8758 8759 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8760 (VCVTTPD2DQZ128rmb addr:$src)>; 8761 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8762 (v4i32 VR128X:$src0), VK2WM:$mask), 8763 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8764 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8765 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8766 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8767 8768 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8769 // patterns have been disabled with null_frag. 8770 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8771 (VCVTPD2UDQZ128rr VR128X:$src)>; 8772 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8773 VK2WM:$mask), 8774 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8775 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8776 VK2WM:$mask), 8777 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8778 8779 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8780 (VCVTPD2UDQZ128rm addr:$src)>; 8781 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8782 VK2WM:$mask), 8783 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8784 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8785 VK2WM:$mask), 8786 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8787 8788 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8789 (VCVTPD2UDQZ128rmb addr:$src)>; 8790 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8791 (v4i32 VR128X:$src0), VK2WM:$mask), 8792 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8793 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8794 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8795 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8796 8797 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8798 // patterns have been disabled with null_frag. 8799 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8800 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8801 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8802 VK2WM:$mask), 8803 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8804 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8805 VK2WM:$mask), 8806 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8807 8808 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8809 (VCVTTPD2UDQZ128rm addr:$src)>; 8810 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8811 VK2WM:$mask), 8812 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8813 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8814 VK2WM:$mask), 8815 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8816 8817 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8818 (VCVTTPD2UDQZ128rmb addr:$src)>; 8819 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8820 (v4i32 VR128X:$src0), VK2WM:$mask), 8821 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8822 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8823 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8824 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8825 8826 def : Pat<(v4i32 (lrint VR128X:$src)), (VCVTPS2DQZ128rr VR128X:$src)>; 8827 def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQZ128rm addr:$src)>; 8828 def : Pat<(v8i32 (lrint VR256X:$src)), (VCVTPS2DQZ256rr VR256X:$src)>; 8829 def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQZ256rm addr:$src)>; 8830 def : Pat<(v4i32 (lrint VR256X:$src)), (VCVTPD2DQZ256rr VR256X:$src)>; 8831 def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQZ256rm addr:$src)>; 8832} 8833def : Pat<(v16i32 (lrint VR512:$src)), (VCVTPS2DQZrr VR512:$src)>; 8834def : Pat<(v16i32 (lrint (loadv16f32 addr:$src))), (VCVTPS2DQZrm addr:$src)>; 8835def : Pat<(v8i32 (lrint VR512:$src)), (VCVTPD2DQZrr VR512:$src)>; 8836def : Pat<(v8i32 (lrint (loadv8f64 addr:$src))), (VCVTPD2DQZrm addr:$src)>; 8837 8838let Predicates = [HasDQI, HasVLX] in { 8839 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8840 (VCVTPS2QQZ128rm addr:$src)>; 8841 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8842 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8843 VR128X:$src0)), 8844 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8845 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8846 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8847 v2i64x_info.ImmAllZerosV)), 8848 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8849 8850 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8851 (VCVTPS2UQQZ128rm addr:$src)>; 8852 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8853 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8854 VR128X:$src0)), 8855 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8856 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8857 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8858 v2i64x_info.ImmAllZerosV)), 8859 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8860 8861 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8862 (VCVTTPS2QQZ128rm addr:$src)>; 8863 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8864 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8865 VR128X:$src0)), 8866 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8867 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8868 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8869 v2i64x_info.ImmAllZerosV)), 8870 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8871 8872 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8873 (VCVTTPS2UQQZ128rm addr:$src)>; 8874 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8875 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8876 VR128X:$src0)), 8877 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8878 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8879 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8880 v2i64x_info.ImmAllZerosV)), 8881 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8882 8883 def : Pat<(v4i64 (lrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>; 8884 def : Pat<(v4i64 (lrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>; 8885 def : Pat<(v4i64 (llrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>; 8886 def : Pat<(v4i64 (llrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>; 8887 def : Pat<(v2i64 (lrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>; 8888 def : Pat<(v2i64 (lrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>; 8889 def : Pat<(v4i64 (lrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>; 8890 def : Pat<(v4i64 (lrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>; 8891 def : Pat<(v2i64 (llrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>; 8892 def : Pat<(v2i64 (llrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>; 8893 def : Pat<(v4i64 (llrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>; 8894 def : Pat<(v4i64 (llrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>; 8895} 8896 8897let Predicates = [HasDQI] in { 8898 def : Pat<(v8i64 (lrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>; 8899 def : Pat<(v8i64 (lrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>; 8900 def : Pat<(v8i64 (llrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>; 8901 def : Pat<(v8i64 (llrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>; 8902 def : Pat<(v8i64 (lrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>; 8903 def : Pat<(v8i64 (lrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>; 8904 def : Pat<(v8i64 (llrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>; 8905 def : Pat<(v8i64 (llrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>; 8906} 8907 8908let Predicates = [HasVLX] in { 8909 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8910 (VCVTDQ2PDZ128rm addr:$src)>; 8911 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8912 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8913 VR128X:$src0)), 8914 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8915 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8916 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8917 v2f64x_info.ImmAllZerosV)), 8918 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8919 8920 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8921 (VCVTUDQ2PDZ128rm addr:$src)>; 8922 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8923 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8924 VR128X:$src0)), 8925 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8926 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8927 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8928 v2f64x_info.ImmAllZerosV)), 8929 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8930} 8931 8932//===----------------------------------------------------------------------===// 8933// Half precision conversion instructions 8934//===----------------------------------------------------------------------===// 8935 8936let Uses = [MXCSR], mayRaiseFPException = 1 in 8937multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8938 X86MemOperand x86memop, dag ld_dag, 8939 X86FoldableSchedWrite sched> { 8940 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8941 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8942 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 8943 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8944 T8, PD, Sched<[sched]>; 8945 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8946 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8947 (X86any_cvtph2ps (_src.VT ld_dag)), 8948 (X86cvtph2ps (_src.VT ld_dag))>, 8949 T8, PD, Sched<[sched.Folded]>; 8950} 8951 8952multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8953 X86FoldableSchedWrite sched> { 8954 let Uses = [MXCSR] in 8955 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8956 (ins _src.RC:$src), "vcvtph2ps", 8957 "{sae}, $src", "$src, {sae}", 8958 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8959 T8, PD, EVEX_B, Sched<[sched]>; 8960} 8961 8962let Predicates = [HasAVX512] in 8963 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 8964 (load addr:$src), WriteCvtPH2PSZ>, 8965 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8966 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8967 8968let Predicates = [HasVLX] in { 8969 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8970 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 8971 EVEX_CD8<32, CD8VH>; 8972 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8973 (bitconvert (v2i64 (X86vzload64 addr:$src))), 8974 WriteCvtPH2PS>, EVEX, EVEX_V128, 8975 EVEX_CD8<32, CD8VH>; 8976 8977 // Pattern match vcvtph2ps of a scalar i64 load. 8978 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 8979 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8980 (VCVTPH2PSZ128rm addr:$src)>; 8981} 8982 8983multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8984 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8985let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8986 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8987 (ins _src.RC:$src1, i32u8imm:$src2), 8988 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8989 [(set _dest.RC:$dst, 8990 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8991 Sched<[RR]>; 8992 let Constraints = "$src0 = $dst" in 8993 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8994 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8995 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8996 [(set _dest.RC:$dst, 8997 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8998 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8999 Sched<[RR]>, EVEX_K; 9000 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9001 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9002 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 9003 [(set _dest.RC:$dst, 9004 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9005 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9006 Sched<[RR]>, EVEX_KZ; 9007 let hasSideEffects = 0, mayStore = 1 in { 9008 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 9009 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 9010 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9011 Sched<[MR]>; 9012 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 9013 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9014 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 9015 EVEX_K, Sched<[MR]>; 9016 } 9017} 9018} 9019 9020multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9021 SchedWrite Sched> { 9022 let hasSideEffects = 0, Uses = [MXCSR] in { 9023 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9024 (ins _src.RC:$src1, i32u8imm:$src2), 9025 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}", 9026 [(set _dest.RC:$dst, 9027 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9028 EVEX_B, Sched<[Sched]>; 9029 let Constraints = "$src0 = $dst" in 9030 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9031 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9032 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}", 9033 [(set _dest.RC:$dst, 9034 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9035 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9036 EVEX_B, Sched<[Sched]>, EVEX_K; 9037 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9038 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9039 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}", 9040 [(set _dest.RC:$dst, 9041 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9042 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9043 EVEX_B, Sched<[Sched]>, EVEX_KZ; 9044} 9045} 9046 9047let Predicates = [HasAVX512] in { 9048 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9049 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9050 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9051 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9052 9053 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9054 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9055} 9056 9057let Predicates = [HasVLX] in { 9058 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9059 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9060 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9061 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9062 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9063 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9064 9065 def : Pat<(store (f64 (extractelt 9066 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9067 (iPTR 0))), addr:$dst), 9068 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9069 def : Pat<(store (i64 (extractelt 9070 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9071 (iPTR 0))), addr:$dst), 9072 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9073 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9074 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9075} 9076 9077// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9078multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9079 string OpcodeStr, Domain d, 9080 X86FoldableSchedWrite sched = WriteFComX> { 9081 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9082 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9083 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9084 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9085} 9086 9087let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9088 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9089 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9090 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9091 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9092 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9093 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9094 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9095 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9096} 9097 9098let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9099 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9100 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9101 EVEX_CD8<32, CD8VT1>; 9102 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9103 "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9104 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9105 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9106 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9107 EVEX_CD8<32, CD8VT1>; 9108 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9109 "comisd", SSEPackedDouble>, TB, PD, EVEX, 9110 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9111 let isCodeGenOnly = 1 in { 9112 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9113 sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9114 EVEX_CD8<32, CD8VT1>; 9115 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9116 sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9117 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9118 9119 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9120 sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9121 EVEX_CD8<32, CD8VT1>; 9122 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9123 sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX, 9124 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9125 } 9126} 9127 9128let Defs = [EFLAGS], Predicates = [HasFP16] in { 9129 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9130 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9131 EVEX_CD8<16, CD8VT1>; 9132 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9133 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9134 EVEX_CD8<16, CD8VT1>; 9135 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9136 "ucomish", SSEPackedSingle>, T_MAP5, EVEX, 9137 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9138 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9139 "comish", SSEPackedSingle>, T_MAP5, EVEX, 9140 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9141 let isCodeGenOnly = 1 in { 9142 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9143 sse_load_f16, "ucomish", SSEPackedSingle>, 9144 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9145 9146 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9147 sse_load_f16, "comish", SSEPackedSingle>, 9148 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9149 } 9150} 9151 9152/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9153multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9154 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9155 Predicate prd = HasAVX512> { 9156 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9157 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9158 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9159 "$src2, $src1", "$src1, $src2", 9160 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9161 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 9162 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9163 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9164 "$src2, $src1", "$src1, $src2", 9165 (OpNode (_.VT _.RC:$src1), 9166 (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG, 9167 Sched<[sched.Folded, sched.ReadAfterFold]>; 9168} 9169} 9170 9171defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9172 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9173 T_MAP6, PD; 9174defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9175 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9176 EVEX_CD8<16, CD8VT1>, T_MAP6, PD; 9177let Uses = [MXCSR] in { 9178defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9179 f32x_info>, EVEX_CD8<32, CD8VT1>, 9180 T8, PD; 9181defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9182 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>, 9183 T8, PD; 9184defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9185 SchedWriteFRsqrt.Scl, f32x_info>, 9186 EVEX_CD8<32, CD8VT1>, T8, PD; 9187defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9188 SchedWriteFRsqrt.Scl, f64x_info>, REX_W, 9189 EVEX_CD8<64, CD8VT1>, T8, PD; 9190} 9191 9192/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9193multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9194 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9195 let ExeDomain = _.ExeDomain in { 9196 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9197 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9198 (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD, 9199 Sched<[sched]>; 9200 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9201 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9202 (OpNode (_.VT 9203 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD, 9204 Sched<[sched.Folded, sched.ReadAfterFold]>; 9205 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9206 (ins _.ScalarMemOp:$src), OpcodeStr, 9207 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9208 (OpNode (_.VT 9209 (_.BroadcastLdFrag addr:$src)))>, 9210 EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9211 } 9212} 9213 9214multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9215 X86SchedWriteWidths sched> { 9216 let Uses = [MXCSR] in { 9217 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9218 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9219 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9220 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9221 } 9222 let Predicates = [HasFP16] in 9223 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9224 v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>; 9225 9226 // Define only if AVX512VL feature is present. 9227 let Predicates = [HasVLX], Uses = [MXCSR] in { 9228 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9229 OpNode, sched.XMM, v4f32x_info>, 9230 EVEX_V128, EVEX_CD8<32, CD8VF>; 9231 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9232 OpNode, sched.YMM, v8f32x_info>, 9233 EVEX_V256, EVEX_CD8<32, CD8VF>; 9234 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9235 OpNode, sched.XMM, v2f64x_info>, 9236 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>; 9237 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9238 OpNode, sched.YMM, v4f64x_info>, 9239 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>; 9240 } 9241 let Predicates = [HasFP16, HasVLX] in { 9242 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9243 OpNode, sched.XMM, v8f16x_info>, 9244 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>; 9245 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9246 OpNode, sched.YMM, v16f16x_info>, 9247 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>; 9248 } 9249} 9250 9251defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9252defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9253 9254/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9255multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9256 SDNode OpNode, SDNode OpNodeSAE, 9257 X86FoldableSchedWrite sched> { 9258 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9259 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9260 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9261 "$src2, $src1", "$src1, $src2", 9262 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9263 Sched<[sched]>, SIMD_EXC; 9264 9265 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9266 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9267 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9268 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9269 EVEX_B, Sched<[sched]>; 9270 9271 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9272 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9273 "$src2, $src1", "$src1, $src2", 9274 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9275 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9276 } 9277} 9278 9279multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9280 X86FoldableSchedWrite sched> { 9281 let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in { 9282 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9283 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9284 "$src2, $src1", "$src1, $src2", 9285 (null_frag)>, Sched<[sched]>, SIMD_EXC; 9286 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9287 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9288 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9289 (null_frag)>, EVEX_B, Sched<[sched]>; 9290 let mayLoad = 1 in 9291 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9292 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9293 "$src2, $src1", "$src1, $src2", 9294 (null_frag)>, 9295 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9296 } 9297} 9298 9299multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr, 9300 X86FoldableSchedWrite sched> { 9301 defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>, 9302 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV; 9303 defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>, 9304 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV; 9305} 9306 9307defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>; 9308defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>; 9309 9310multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9311 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9312 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9313 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV; 9314 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9315 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV; 9316} 9317 9318multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9319 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9320 let Predicates = [HasFP16] in 9321 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9322 EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV; 9323} 9324 9325defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9326 SchedWriteFRnd.Scl>, 9327 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9328 SchedWriteFRnd.Scl>; 9329/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9330 9331multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9332 SDNode OpNode, X86FoldableSchedWrite sched> { 9333 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9334 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9335 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9336 (OpNode (_.VT _.RC:$src))>, 9337 Sched<[sched]>; 9338 9339 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9340 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9341 (OpNode (_.VT 9342 (bitconvert (_.LdFrag addr:$src))))>, 9343 Sched<[sched.Folded, sched.ReadAfterFold]>; 9344 9345 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9346 (ins _.ScalarMemOp:$src), OpcodeStr, 9347 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9348 (OpNode (_.VT 9349 (_.BroadcastLdFrag addr:$src)))>, 9350 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9351 } 9352} 9353multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9354 SDNode OpNode, X86FoldableSchedWrite sched> { 9355 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9356 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9357 (ins _.RC:$src), OpcodeStr, 9358 "{sae}, $src", "$src, {sae}", 9359 (OpNode (_.VT _.RC:$src))>, 9360 EVEX_B, Sched<[sched]>; 9361} 9362 9363multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9364 X86FoldableSchedWrite sched> { 9365 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1, 9366 hasSideEffects = 0 in { 9367 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9368 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9369 (null_frag)>, Sched<[sched]>; 9370 let mayLoad = 1 in 9371 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9372 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9373 (null_frag)>, 9374 Sched<[sched.Folded, sched.ReadAfterFold]>; 9375 let mayLoad = 1 in 9376 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9377 (ins _.ScalarMemOp:$src), OpcodeStr, 9378 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9379 (null_frag)>, 9380 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9381 } 9382} 9383multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9384 X86FoldableSchedWrite sched> { 9385 let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in 9386 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9387 (ins _.RC:$src), OpcodeStr, 9388 "{sae}, $src", "$src, {sae}", 9389 (null_frag)>, Sched<[sched]>, EVEX_B; 9390} 9391 9392multiclass avx512_eri_ass<bits<8> opc, string OpcodeStr, 9393 X86SchedWriteWidths sched> { 9394 defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>, 9395 avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>, 9396 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9397 defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>, 9398 avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>, 9399 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9400} 9401 9402defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX; 9403defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX; 9404defm VEXP2 : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX; 9405 9406multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9407 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9408 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9409 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9410 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9411 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9412 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9413 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9414} 9415 9416multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9417 SDNode OpNode, X86SchedWriteWidths sched> { 9418 // Define only if AVX512VL feature is present. 9419 let Predicates = [HasVLX] in { 9420 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9421 sched.XMM>, 9422 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>; 9423 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9424 sched.YMM>, 9425 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>; 9426 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9427 sched.XMM>, 9428 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9429 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9430 sched.YMM>, 9431 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9432 } 9433} 9434 9435multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9436 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9437 let Predicates = [HasFP16] in 9438 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9439 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9440 T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9441 let Predicates = [HasFP16, HasVLX] in { 9442 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9443 EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9444 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9445 EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9446 } 9447} 9448defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9449 SchedWriteFRnd>, 9450 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9451 SchedWriteFRnd>, 9452 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9453 SchedWriteFRnd>, EVEX; 9454 9455multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9456 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9457 let ExeDomain = _.ExeDomain in 9458 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9459 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9460 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9461 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9462} 9463 9464multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9465 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9466 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9467 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9468 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9469 (_.VT (any_fsqrt _.RC:$src)), 9470 (_.VT (fsqrt _.RC:$src))>, EVEX, 9471 Sched<[sched]>; 9472 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9473 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9474 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9475 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9476 Sched<[sched.Folded, sched.ReadAfterFold]>; 9477 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9478 (ins _.ScalarMemOp:$src), OpcodeStr, 9479 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9480 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9481 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9482 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9483 } 9484} 9485 9486let Uses = [MXCSR], mayRaiseFPException = 1 in 9487multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9488 X86SchedWriteSizes sched> { 9489 let Predicates = [HasFP16] in 9490 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9491 sched.PH.ZMM, v32f16_info>, 9492 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9493 let Predicates = [HasFP16, HasVLX] in { 9494 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9495 sched.PH.XMM, v8f16x_info>, 9496 EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>; 9497 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9498 sched.PH.YMM, v16f16x_info>, 9499 EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>; 9500 } 9501 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9502 sched.PS.ZMM, v16f32_info>, 9503 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9504 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9505 sched.PD.ZMM, v8f64_info>, 9506 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9507 // Define only if AVX512VL feature is present. 9508 let Predicates = [HasVLX] in { 9509 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9510 sched.PS.XMM, v4f32x_info>, 9511 EVEX_V128, TB, EVEX_CD8<32, CD8VF>; 9512 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9513 sched.PS.YMM, v8f32x_info>, 9514 EVEX_V256, TB, EVEX_CD8<32, CD8VF>; 9515 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9516 sched.PD.XMM, v2f64x_info>, 9517 EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9518 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9519 sched.PD.YMM, v4f64x_info>, 9520 EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9521 } 9522} 9523 9524let Uses = [MXCSR] in 9525multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9526 X86SchedWriteSizes sched> { 9527 let Predicates = [HasFP16] in 9528 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9529 sched.PH.ZMM, v32f16_info>, 9530 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9531 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9532 sched.PS.ZMM, v16f32_info>, 9533 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9534 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9535 sched.PD.ZMM, v8f64_info>, 9536 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9537} 9538 9539multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9540 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9541 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9542 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9543 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9544 "$src2, $src1", "$src1, $src2", 9545 (X86fsqrts (_.VT _.RC:$src1), 9546 (_.VT _.RC:$src2)), "_Int">, 9547 Sched<[sched]>, SIMD_EXC; 9548 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9549 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9550 "$src2, $src1", "$src1, $src2", 9551 (X86fsqrts (_.VT _.RC:$src1), 9552 (_.ScalarIntMemFrags addr:$src2)), "_Int">, 9553 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9554 let Uses = [MXCSR] in 9555 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9556 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9557 "$rc, $src2, $src1", "$src1, $src2, $rc", 9558 (X86fsqrtRnds (_.VT _.RC:$src1), 9559 (_.VT _.RC:$src2), 9560 (i32 timm:$rc)), "_Int">, 9561 EVEX_B, EVEX_RC, Sched<[sched]>; 9562 9563 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9564 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9565 (ins _.FRC:$src1, _.FRC:$src2), 9566 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9567 Sched<[sched]>, SIMD_EXC; 9568 let mayLoad = 1 in 9569 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9570 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9571 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9572 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9573 } 9574 } 9575 9576 let Predicates = [prd] in { 9577 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9578 (!cast<Instruction>(Name#Zr) 9579 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9580 } 9581 9582 let Predicates = [prd, OptForSize] in { 9583 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9584 (!cast<Instruction>(Name#Zm) 9585 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9586 } 9587} 9588 9589multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9590 X86SchedWriteSizes sched> { 9591 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9592 EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS; 9593 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9594 EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS; 9595 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9596 EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W; 9597} 9598 9599defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9600 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9601 9602defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9603 9604multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9605 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9606 let ExeDomain = _.ExeDomain in { 9607 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9608 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9609 "$src3, $src2, $src1", "$src1, $src2, $src3", 9610 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9611 (i32 timm:$src3))), "_Int">, 9612 Sched<[sched]>, SIMD_EXC; 9613 9614 let Uses = [MXCSR] in 9615 defm rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9616 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9617 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9618 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9619 (i32 timm:$src3))), "_Int">, EVEX_B, 9620 Sched<[sched]>; 9621 9622 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9623 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9624 OpcodeStr, 9625 "$src3, $src2, $src1", "$src1, $src2, $src3", 9626 (_.VT (X86RndScales _.RC:$src1, 9627 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3))), "_Int">, 9628 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9629 9630 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9631 def rri : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9632 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9633 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9634 []>, Sched<[sched]>, SIMD_EXC; 9635 9636 let mayLoad = 1 in 9637 def rmi : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9638 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9639 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9640 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9641 } 9642 } 9643 9644 let Predicates = [HasAVX512] in { 9645 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9646 (_.EltVT (!cast<Instruction>(NAME#rri) (_.EltVT (IMPLICIT_DEF)), 9647 _.FRC:$src1, timm:$src2))>; 9648 } 9649 9650 let Predicates = [HasAVX512, OptForSize] in { 9651 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9652 (_.EltVT (!cast<Instruction>(NAME#rmi) (_.EltVT (IMPLICIT_DEF)), 9653 addr:$src1, timm:$src2))>; 9654 } 9655} 9656 9657let Predicates = [HasFP16] in 9658defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9659 SchedWriteFRnd.Scl, f16x_info>, 9660 AVX512PSIi8Base, TA, EVEX, VVVV, 9661 EVEX_CD8<16, CD8VT1>; 9662 9663defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9664 SchedWriteFRnd.Scl, f32x_info>, 9665 AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9666 EVEX_CD8<32, CD8VT1>; 9667 9668defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9669 SchedWriteFRnd.Scl, f64x_info>, 9670 REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9671 EVEX_CD8<64, CD8VT1>; 9672 9673multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9674 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9675 dag OutMask, Predicate BasePredicate> { 9676 let Predicates = [BasePredicate] in { 9677 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9678 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9679 (extractelt _.VT:$dst, (iPTR 0))))), 9680 (!cast<Instruction>("V"#OpcPrefix#rk_Int) 9681 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9682 9683 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9684 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9685 ZeroFP))), 9686 (!cast<Instruction>("V"#OpcPrefix#rkz_Int) 9687 OutMask, _.VT:$src2, _.VT:$src1)>; 9688 } 9689} 9690 9691defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9692 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9693 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9694defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9695 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9696 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9697defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9698 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9699 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9700 9701 9702//------------------------------------------------- 9703// Integer truncate and extend operations 9704//------------------------------------------------- 9705 9706multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9707 SDPatternOperator MaskNode, 9708 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9709 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9710 let ExeDomain = DestInfo.ExeDomain in { 9711 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9712 (ins SrcInfo.RC:$src), 9713 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9714 [(set DestInfo.RC:$dst, 9715 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9716 EVEX, Sched<[sched]>; 9717 let Constraints = "$src0 = $dst" in 9718 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9719 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9720 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9721 [(set DestInfo.RC:$dst, 9722 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9723 (DestInfo.VT DestInfo.RC:$src0), 9724 SrcInfo.KRCWM:$mask))]>, 9725 EVEX, EVEX_K, Sched<[sched]>; 9726 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9727 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9728 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9729 [(set DestInfo.RC:$dst, 9730 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9731 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9732 EVEX, EVEX_KZ, Sched<[sched]>; 9733 } 9734 9735 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9736 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9737 (ins x86memop:$dst, SrcInfo.RC:$src), 9738 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9739 EVEX, Sched<[sched.Folded]>; 9740 9741 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9742 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9743 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9744 EVEX, EVEX_K, Sched<[sched.Folded]>; 9745 }//mayStore = 1, hasSideEffects = 0 9746} 9747 9748multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9749 PatFrag truncFrag, PatFrag mtruncFrag, 9750 string Name> { 9751 9752 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9753 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9754 addr:$dst, SrcInfo.RC:$src)>; 9755 9756 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9757 SrcInfo.KRCWM:$mask), 9758 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9759 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9760} 9761 9762multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9763 SDNode OpNode256, SDNode OpNode512, 9764 SDPatternOperator MaskNode128, 9765 SDPatternOperator MaskNode256, 9766 SDPatternOperator MaskNode512, 9767 X86SchedWriteWidths sched, 9768 AVX512VLVectorVTInfo VTSrcInfo, 9769 X86VectorVTInfo DestInfoZ128, 9770 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9771 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9772 X86MemOperand x86memopZ, PatFrag truncFrag, 9773 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9774 9775 let Predicates = [HasVLX, prd] in { 9776 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM, 9777 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9778 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9779 mtruncFrag, NAME>, EVEX_V128; 9780 9781 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM, 9782 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9783 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9784 mtruncFrag, NAME>, EVEX_V256; 9785 } 9786 let Predicates = [prd] in 9787 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM, 9788 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9789 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9790 mtruncFrag, NAME>, EVEX_V512; 9791} 9792 9793multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9794 X86SchedWriteWidths sched, PatFrag StoreNode, 9795 PatFrag MaskedStoreNode, SDNode InVecNode, 9796 SDPatternOperator InVecMaskNode> { 9797 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9798 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9799 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9800 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9801 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9802} 9803 9804multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9805 SDPatternOperator MaskNode, 9806 X86SchedWriteWidths sched, PatFrag StoreNode, 9807 PatFrag MaskedStoreNode, SDNode InVecNode, 9808 SDPatternOperator InVecMaskNode> { 9809 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9810 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9811 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9812 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9813 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9814} 9815 9816multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9817 SDPatternOperator MaskNode, 9818 X86SchedWriteWidths sched, PatFrag StoreNode, 9819 PatFrag MaskedStoreNode, SDNode InVecNode, 9820 SDPatternOperator InVecMaskNode> { 9821 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9822 InVecMaskNode, MaskNode, MaskNode, sched, 9823 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9824 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9825 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9826} 9827 9828multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9829 SDPatternOperator MaskNode, 9830 X86SchedWriteWidths sched, PatFrag StoreNode, 9831 PatFrag MaskedStoreNode, SDNode InVecNode, 9832 SDPatternOperator InVecMaskNode> { 9833 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9834 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9835 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9836 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9837 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9838} 9839 9840multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9841 SDPatternOperator MaskNode, 9842 X86SchedWriteWidths sched, PatFrag StoreNode, 9843 PatFrag MaskedStoreNode, SDNode InVecNode, 9844 SDPatternOperator InVecMaskNode> { 9845 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9846 InVecMaskNode, MaskNode, MaskNode, sched, 9847 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9848 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9849 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9850} 9851 9852multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9853 SDPatternOperator MaskNode, 9854 X86SchedWriteWidths sched, PatFrag StoreNode, 9855 PatFrag MaskedStoreNode, SDNode InVecNode, 9856 SDPatternOperator InVecMaskNode> { 9857 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9858 InVecMaskNode, MaskNode, MaskNode, sched, 9859 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9860 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9861 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9862} 9863 9864defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 9865 SchedWriteVecTruncate, truncstorevi8, 9866 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9867defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 9868 SchedWriteVecTruncate, truncstore_s_vi8, 9869 masked_truncstore_s_vi8, X86vtruncs, 9870 X86vmtruncs>; 9871defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 9872 SchedWriteVecTruncate, truncstore_us_vi8, 9873 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 9874 9875defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9876 SchedWriteVecTruncate, truncstorevi16, 9877 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9878defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9879 SchedWriteVecTruncate, truncstore_s_vi16, 9880 masked_truncstore_s_vi16, X86vtruncs, 9881 X86vmtruncs>; 9882defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9883 select_truncus, SchedWriteVecTruncate, 9884 truncstore_us_vi16, masked_truncstore_us_vi16, 9885 X86vtruncus, X86vmtruncus>; 9886 9887defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9888 SchedWriteVecTruncate, truncstorevi32, 9889 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9890defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9891 SchedWriteVecTruncate, truncstore_s_vi32, 9892 masked_truncstore_s_vi32, X86vtruncs, 9893 X86vmtruncs>; 9894defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9895 select_truncus, SchedWriteVecTruncate, 9896 truncstore_us_vi32, masked_truncstore_us_vi32, 9897 X86vtruncus, X86vmtruncus>; 9898 9899defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9900 SchedWriteVecTruncate, truncstorevi8, 9901 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9902defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9903 SchedWriteVecTruncate, truncstore_s_vi8, 9904 masked_truncstore_s_vi8, X86vtruncs, 9905 X86vmtruncs>; 9906defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9907 select_truncus, SchedWriteVecTruncate, 9908 truncstore_us_vi8, masked_truncstore_us_vi8, 9909 X86vtruncus, X86vmtruncus>; 9910 9911defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9912 SchedWriteVecTruncate, truncstorevi16, 9913 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9914defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9915 SchedWriteVecTruncate, truncstore_s_vi16, 9916 masked_truncstore_s_vi16, X86vtruncs, 9917 X86vmtruncs>; 9918defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9919 select_truncus, SchedWriteVecTruncate, 9920 truncstore_us_vi16, masked_truncstore_us_vi16, 9921 X86vtruncus, X86vmtruncus>; 9922 9923defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9924 SchedWriteVecTruncate, truncstorevi8, 9925 masked_truncstorevi8, X86vtrunc, 9926 X86vmtrunc>; 9927defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9928 SchedWriteVecTruncate, truncstore_s_vi8, 9929 masked_truncstore_s_vi8, X86vtruncs, 9930 X86vmtruncs>; 9931defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9932 select_truncus, SchedWriteVecTruncate, 9933 truncstore_us_vi8, masked_truncstore_us_vi8, 9934 X86vtruncus, X86vmtruncus>; 9935 9936let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 9937def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9938 (v8i16 (EXTRACT_SUBREG 9939 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9940 VR256X:$src, sub_ymm)))), sub_xmm))>; 9941def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9942 (v4i32 (EXTRACT_SUBREG 9943 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9944 VR256X:$src, sub_ymm)))), sub_xmm))>; 9945} 9946 9947let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 9948def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9949 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9950 VR256X:$src, sub_ymm))), sub_xmm))>; 9951} 9952 9953// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9954multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9955 X86VectorVTInfo DestInfo, 9956 X86VectorVTInfo SrcInfo> { 9957 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9958 DestInfo.RC:$src0, 9959 SrcInfo.KRCWM:$mask)), 9960 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9961 SrcInfo.KRCWM:$mask, 9962 SrcInfo.RC:$src)>; 9963 9964 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9965 DestInfo.ImmAllZerosV, 9966 SrcInfo.KRCWM:$mask)), 9967 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9968 SrcInfo.RC:$src)>; 9969} 9970 9971let Predicates = [HasVLX] in { 9972defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9973defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9974defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9975} 9976 9977let Predicates = [HasAVX512] in { 9978defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9979defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9980defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9981 9982defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9983defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9984defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9985 9986defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9987defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9988defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9989} 9990 9991multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9992 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9993 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9994 let ExeDomain = DestInfo.ExeDomain in { 9995 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9996 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9997 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9998 EVEX, Sched<[sched]>; 9999 10000 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10001 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 10002 (DestInfo.VT (LdFrag addr:$src))>, 10003 EVEX, Sched<[sched.Folded]>; 10004 } 10005} 10006 10007multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 10008 SDNode OpNode, SDNode InVecNode, string ExtTy, 10009 X86SchedWriteWidths sched, 10010 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10011 let Predicates = [HasVLX, HasBWI] in { 10012 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info, 10013 v16i8x_info, i64mem, LdFrag, InVecNode>, 10014 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG; 10015 10016 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info, 10017 v16i8x_info, i128mem, LdFrag, OpNode>, 10018 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG; 10019 } 10020 let Predicates = [HasBWI] in { 10021 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info, 10022 v32i8x_info, i256mem, LdFrag, OpNode>, 10023 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG; 10024 } 10025} 10026 10027multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 10028 SDNode OpNode, SDNode InVecNode, string ExtTy, 10029 X86SchedWriteWidths sched, 10030 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10031 let Predicates = [HasVLX, HasAVX512] in { 10032 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10033 v16i8x_info, i32mem, LdFrag, InVecNode>, 10034 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG; 10035 10036 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10037 v16i8x_info, i64mem, LdFrag, InVecNode>, 10038 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG; 10039 } 10040 let Predicates = [HasAVX512] in { 10041 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10042 v16i8x_info, i128mem, LdFrag, OpNode>, 10043 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG; 10044 } 10045} 10046 10047multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 10048 SDNode InVecNode, string ExtTy, 10049 X86SchedWriteWidths sched, 10050 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10051 let Predicates = [HasVLX, HasAVX512] in { 10052 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10053 v16i8x_info, i16mem, LdFrag, InVecNode>, 10054 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG; 10055 10056 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10057 v16i8x_info, i32mem, LdFrag, InVecNode>, 10058 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG; 10059 } 10060 let Predicates = [HasAVX512] in { 10061 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10062 v16i8x_info, i64mem, LdFrag, InVecNode>, 10063 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG; 10064 } 10065} 10066 10067multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 10068 SDNode OpNode, SDNode InVecNode, string ExtTy, 10069 X86SchedWriteWidths sched, 10070 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10071 let Predicates = [HasVLX, HasAVX512] in { 10072 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10073 v8i16x_info, i64mem, LdFrag, InVecNode>, 10074 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG; 10075 10076 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10077 v8i16x_info, i128mem, LdFrag, OpNode>, 10078 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG; 10079 } 10080 let Predicates = [HasAVX512] in { 10081 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10082 v16i16x_info, i256mem, LdFrag, OpNode>, 10083 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG; 10084 } 10085} 10086 10087multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 10088 SDNode OpNode, SDNode InVecNode, string ExtTy, 10089 X86SchedWriteWidths sched, 10090 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10091 let Predicates = [HasVLX, HasAVX512] in { 10092 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10093 v8i16x_info, i32mem, LdFrag, InVecNode>, 10094 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG; 10095 10096 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10097 v8i16x_info, i64mem, LdFrag, InVecNode>, 10098 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG; 10099 } 10100 let Predicates = [HasAVX512] in { 10101 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10102 v8i16x_info, i128mem, LdFrag, OpNode>, 10103 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG; 10104 } 10105} 10106 10107multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 10108 SDNode OpNode, SDNode InVecNode, string ExtTy, 10109 X86SchedWriteWidths sched, 10110 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10111 10112 let Predicates = [HasVLX, HasAVX512] in { 10113 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10114 v4i32x_info, i64mem, LdFrag, InVecNode>, 10115 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128; 10116 10117 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10118 v4i32x_info, i128mem, LdFrag, OpNode>, 10119 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256; 10120 } 10121 let Predicates = [HasAVX512] in { 10122 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10123 v8i32x_info, i256mem, LdFrag, OpNode>, 10124 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512; 10125 } 10126} 10127 10128defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>; 10129defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>; 10130defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>; 10131defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>; 10132defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>; 10133defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>; 10134 10135defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>; 10136defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>; 10137defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>; 10138defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>; 10139defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>; 10140defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>; 10141 10142 10143// Patterns that we also need any extend versions of. aext_vector_inreg 10144// is currently legalized to zext_vector_inreg. 10145multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10146 // 256-bit patterns 10147 let Predicates = [HasVLX, HasBWI] in { 10148 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10149 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10150 } 10151 10152 let Predicates = [HasVLX] in { 10153 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10154 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10155 10156 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10157 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10158 } 10159 10160 // 512-bit patterns 10161 let Predicates = [HasBWI] in { 10162 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10163 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10164 } 10165 let Predicates = [HasAVX512] in { 10166 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10167 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10168 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10169 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10170 10171 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10172 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10173 10174 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10175 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10176 } 10177} 10178 10179multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10180 SDNode InVecOp> : 10181 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10182 // 128-bit patterns 10183 let Predicates = [HasVLX, HasBWI] in { 10184 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10185 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10186 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10187 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10188 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10189 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10190 } 10191 let Predicates = [HasVLX] in { 10192 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10193 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10194 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10195 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10196 10197 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10198 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10199 10200 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10201 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10202 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10203 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10204 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10205 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10206 10207 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10208 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10209 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10210 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10211 10212 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10213 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10214 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10215 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10216 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10217 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10218 } 10219 let Predicates = [HasVLX] in { 10220 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10221 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10222 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10223 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10224 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10225 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10226 10227 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10228 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10229 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10230 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10231 10232 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10233 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10234 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10235 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10236 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10237 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10238 } 10239 // 512-bit patterns 10240 let Predicates = [HasAVX512] in { 10241 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10242 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10243 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10244 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10245 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10246 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10247 } 10248} 10249 10250defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10251defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10252 10253// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10254// ext+trunc aggressively making it impossible to legalize the DAG to this 10255// pattern directly. 10256let Predicates = [HasAVX512, NoBWI] in { 10257def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10258 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10259def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10260 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10261} 10262 10263//===----------------------------------------------------------------------===// 10264// GATHER - SCATTER Operations 10265 10266// FIXME: Improve scheduling of gather/scatter instructions. 10267multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10268 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10269 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10270 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10271 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10272 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10273 !strconcat(OpcodeStr#_.Suffix, 10274 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10275 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10276 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10277} 10278 10279multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10280 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10281 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10282 vy64xmem>, EVEX_V512, REX_W; 10283 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10284 vz64mem>, EVEX_V512, REX_W; 10285let Predicates = [HasVLX] in { 10286 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10287 vx64xmem>, EVEX_V256, REX_W; 10288 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10289 vy64xmem>, EVEX_V256, REX_W; 10290 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10291 vx64xmem>, EVEX_V128, REX_W; 10292 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10293 vx64xmem>, EVEX_V128, REX_W; 10294} 10295} 10296 10297multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10298 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10299 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz32mem>, 10300 EVEX_V512; 10301 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz32mem>, 10302 EVEX_V512; 10303let Predicates = [HasVLX] in { 10304 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10305 vy32xmem>, EVEX_V256; 10306 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10307 vy32xmem>, EVEX_V256; 10308 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10309 vx32xmem>, EVEX_V128; 10310 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10311 vx32xmem, VK2WM>, EVEX_V128; 10312} 10313} 10314 10315 10316defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10317 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10318 10319defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10320 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10321 10322multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10323 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10324 10325let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10326 hasSideEffects = 0 in 10327 10328 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10329 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10330 !strconcat(OpcodeStr#_.Suffix, 10331 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10332 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10333 Sched<[WriteStore]>; 10334} 10335 10336multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10337 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10338 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10339 vy64xmem>, EVEX_V512, REX_W; 10340 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10341 vz64mem>, EVEX_V512, REX_W; 10342let Predicates = [HasVLX] in { 10343 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10344 vx64xmem>, EVEX_V256, REX_W; 10345 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10346 vy64xmem>, EVEX_V256, REX_W; 10347 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10348 vx64xmem>, EVEX_V128, REX_W; 10349 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10350 vx64xmem>, EVEX_V128, REX_W; 10351} 10352} 10353 10354multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10355 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10356 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz32mem>, 10357 EVEX_V512; 10358 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz32mem>, 10359 EVEX_V512; 10360let Predicates = [HasVLX] in { 10361 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10362 vy32xmem>, EVEX_V256; 10363 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10364 vy32xmem>, EVEX_V256; 10365 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10366 vx32xmem>, EVEX_V128; 10367 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10368 vx32xmem, VK2WM>, EVEX_V128; 10369} 10370} 10371 10372defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10373 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10374 10375defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10376 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10377 10378// prefetch 10379multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10380 RegisterClass KRC, X86MemOperand memop> { 10381 let mayLoad = 1, mayStore = 1 in 10382 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10383 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10384 EVEX, EVEX_K, Sched<[WriteLoad]>; 10385} 10386 10387defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10388 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10389 10390defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10391 VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10392 10393defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10394 VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10395 10396defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10397 VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10398 10399defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10400 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10401 10402defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10403 VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10404 10405defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10406 VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10407 10408defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10409 VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10410 10411defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10412 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10413 10414defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10415 VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10416 10417defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10418 VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10419 10420defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10421 VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10422 10423defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10424 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10425 10426defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10427 VK8WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10428 10429defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10430 VK8WM, vy64xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10431 10432defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10433 VK8WM, vz64mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10434 10435multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10436def rk : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10437 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10438 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10439 EVEX, Sched<[Sched]>; 10440} 10441 10442multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10443 string OpcodeStr, Predicate prd> { 10444let Predicates = [prd] in 10445 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10446 10447 let Predicates = [prd, HasVLX] in { 10448 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10449 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10450 } 10451} 10452 10453defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10454defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W; 10455defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10456defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W; 10457 10458multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10459 def kr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10460 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10461 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10462 EVEX, Sched<[WriteMove]>; 10463} 10464 10465// Use 512bit version to implement 128/256 bit in case NoVLX. 10466multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10467 X86VectorVTInfo _, 10468 string Name> { 10469 10470 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10471 (_.KVT (COPY_TO_REGCLASS 10472 (!cast<Instruction>(Name#"Zkr") 10473 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10474 _.RC:$src, _.SubRegIdx)), 10475 _.KRC))>; 10476} 10477 10478multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10479 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10480 let Predicates = [prd] in 10481 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10482 EVEX_V512; 10483 10484 let Predicates = [prd, HasVLX] in { 10485 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10486 EVEX_V256; 10487 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10488 EVEX_V128; 10489 } 10490 let Predicates = [prd, NoVLX, HasEVEX512] in { 10491 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10492 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10493 } 10494} 10495 10496defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10497 avx512vl_i8_info, HasBWI>; 10498defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10499 avx512vl_i16_info, HasBWI>, REX_W; 10500defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10501 avx512vl_i32_info, HasDQI>; 10502defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10503 avx512vl_i64_info, HasDQI>, REX_W; 10504 10505// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10506// is available, but BWI is not. We can't handle this in lowering because 10507// a target independent DAG combine likes to combine sext and trunc. 10508let Predicates = [HasDQI, NoBWI] in { 10509 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10510 (VPMOVDBZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>; 10511 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10512 (VPMOVDWZrr (v16i32 (VPMOVM2DZrk VK16:$src)))>; 10513} 10514 10515let Predicates = [HasDQI, NoBWI, HasVLX] in { 10516 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10517 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rk VK8:$src)))>; 10518} 10519 10520//===----------------------------------------------------------------------===// 10521// AVX-512 - COMPRESS and EXPAND 10522// 10523 10524multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10525 string OpcodeStr, X86FoldableSchedWrite sched> { 10526 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10527 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10528 (null_frag)>, AVX5128IBase, 10529 Sched<[sched]>; 10530 10531 let mayStore = 1, hasSideEffects = 0 in 10532 def mr : AVX5128I<opc, MRMDestMem, (outs), 10533 (ins _.MemOp:$dst, _.RC:$src), 10534 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10535 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10536 Sched<[sched.Folded]>; 10537 10538 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10539 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10540 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10541 []>, 10542 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10543 Sched<[sched.Folded]>; 10544} 10545 10546multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10547 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10548 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10549 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10550 10551 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10552 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10553 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10554 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10555 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10556 _.KRCWM:$mask, _.RC:$src)>; 10557 def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, undef)), 10558 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10559 _.KRCWM:$mask, _.RC:$src)>; 10560 def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10561 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10562 _.KRCWM:$mask, _.RC:$src)>; 10563 def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.RC:$passthru)), 10564 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10565 _.RC:$passthru, _.KRCWM:$mask, _.RC:$src)>; 10566} 10567 10568multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10569 X86FoldableSchedWrite sched, 10570 AVX512VLVectorVTInfo VTInfo, 10571 Predicate Pred = HasAVX512> { 10572 let Predicates = [Pred] in 10573 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10574 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10575 10576 let Predicates = [Pred, HasVLX] in { 10577 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10578 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10579 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10580 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10581 } 10582} 10583 10584// FIXME: Is there a better scheduler class for VPCOMPRESS? 10585defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10586 avx512vl_i32_info>, EVEX; 10587defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10588 avx512vl_i64_info>, EVEX, REX_W; 10589defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10590 avx512vl_f32_info>, EVEX; 10591defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10592 avx512vl_f64_info>, EVEX, REX_W; 10593 10594// expand 10595multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10596 string OpcodeStr, X86FoldableSchedWrite sched> { 10597 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10598 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10599 (null_frag)>, AVX5128IBase, 10600 Sched<[sched]>; 10601 10602 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10603 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10604 (null_frag)>, 10605 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10606 Sched<[sched.Folded, sched.ReadAfterFold]>; 10607} 10608 10609multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10610 10611 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10612 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10613 _.KRCWM:$mask, addr:$src)>; 10614 10615 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10616 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10617 _.KRCWM:$mask, addr:$src)>; 10618 10619 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10620 (_.VT _.RC:$src0))), 10621 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10622 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10623 10624 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10625 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10626 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10627 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10628 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10629 _.KRCWM:$mask, _.RC:$src)>; 10630} 10631 10632multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10633 X86FoldableSchedWrite sched, 10634 AVX512VLVectorVTInfo VTInfo, 10635 Predicate Pred = HasAVX512> { 10636 let Predicates = [Pred] in 10637 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10638 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10639 10640 let Predicates = [Pred, HasVLX] in { 10641 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10642 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10643 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10644 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10645 } 10646} 10647 10648// FIXME: Is there a better scheduler class for VPEXPAND? 10649defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10650 avx512vl_i32_info>, EVEX; 10651defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10652 avx512vl_i64_info>, EVEX, REX_W; 10653defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10654 avx512vl_f32_info>, EVEX; 10655defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10656 avx512vl_f64_info>, EVEX, REX_W; 10657 10658//handle instruction reg_vec1 = op(reg_vec,imm) 10659// op(mem_vec,imm) 10660// op(broadcast(eltVt),imm) 10661//all instruction created with FROUND_CURRENT 10662multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10663 SDPatternOperator OpNode, 10664 SDPatternOperator MaskOpNode, 10665 X86FoldableSchedWrite sched, 10666 X86VectorVTInfo _> { 10667 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10668 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10669 (ins _.RC:$src1, i32u8imm:$src2), 10670 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10671 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10672 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10673 Sched<[sched]>; 10674 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10675 (ins _.MemOp:$src1, i32u8imm:$src2), 10676 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10677 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10678 (i32 timm:$src2)), 10679 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10680 (i32 timm:$src2))>, 10681 Sched<[sched.Folded, sched.ReadAfterFold]>; 10682 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10683 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10684 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10685 "${src1}"#_.BroadcastStr#", $src2", 10686 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10687 (i32 timm:$src2)), 10688 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10689 (i32 timm:$src2))>, EVEX_B, 10690 Sched<[sched.Folded, sched.ReadAfterFold]>; 10691 } 10692} 10693 10694//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10695multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10696 SDNode OpNode, X86FoldableSchedWrite sched, 10697 X86VectorVTInfo _> { 10698 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10699 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10700 (ins _.RC:$src1, i32u8imm:$src2), 10701 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10702 "$src1, {sae}, $src2", 10703 (OpNode (_.VT _.RC:$src1), 10704 (i32 timm:$src2))>, 10705 EVEX_B, Sched<[sched]>; 10706} 10707 10708multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10709 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10710 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10711 Predicate prd>{ 10712 let Predicates = [prd] in { 10713 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10714 sched.ZMM, _.info512>, 10715 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10716 sched.ZMM, _.info512>, EVEX_V512; 10717 } 10718 let Predicates = [prd, HasVLX] in { 10719 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10720 sched.XMM, _.info128>, EVEX_V128; 10721 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10722 sched.YMM, _.info256>, EVEX_V256; 10723 } 10724} 10725 10726//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10727// op(reg_vec2,mem_vec,imm) 10728// op(reg_vec2,broadcast(eltVt),imm) 10729//all instruction created with FROUND_CURRENT 10730multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10731 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10732 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10733 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10734 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10735 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10736 (OpNode (_.VT _.RC:$src1), 10737 (_.VT _.RC:$src2), 10738 (i32 timm:$src3))>, 10739 Sched<[sched]>; 10740 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10741 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10742 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10743 (OpNode (_.VT _.RC:$src1), 10744 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10745 (i32 timm:$src3))>, 10746 Sched<[sched.Folded, sched.ReadAfterFold]>; 10747 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10748 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10749 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10750 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10751 (OpNode (_.VT _.RC:$src1), 10752 (_.VT (_.BroadcastLdFrag addr:$src2)), 10753 (i32 timm:$src3))>, EVEX_B, 10754 Sched<[sched.Folded, sched.ReadAfterFold]>; 10755 } 10756} 10757 10758//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10759// op(reg_vec2,mem_vec,imm) 10760multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10761 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10762 X86VectorVTInfo SrcInfo>{ 10763 let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in { 10764 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10765 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10766 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10767 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10768 (SrcInfo.VT SrcInfo.RC:$src2), 10769 (i8 timm:$src3)))>, 10770 Sched<[sched]>; 10771 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10772 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10773 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10774 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10775 (SrcInfo.VT (bitconvert 10776 (SrcInfo.LdFrag addr:$src2))), 10777 (i8 timm:$src3)))>, 10778 Sched<[sched.Folded, sched.ReadAfterFold]>; 10779 } 10780} 10781 10782//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10783// op(reg_vec2,mem_vec,imm) 10784// op(reg_vec2,broadcast(eltVt),imm) 10785multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10786 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10787 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10788 10789 let ExeDomain = _.ExeDomain, ImmT = Imm8 in 10790 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10791 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10792 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10793 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10794 (OpNode (_.VT _.RC:$src1), 10795 (_.VT (_.BroadcastLdFrag addr:$src2)), 10796 (i8 timm:$src3))>, EVEX_B, 10797 Sched<[sched.Folded, sched.ReadAfterFold]>; 10798} 10799 10800//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10801// op(reg_vec2,mem_scalar,imm) 10802multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10803 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10804 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10805 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10806 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10807 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10808 (OpNode (_.VT _.RC:$src1), 10809 (_.VT _.RC:$src2), 10810 (i32 timm:$src3))>, 10811 Sched<[sched]>; 10812 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10813 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10814 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10815 (OpNode (_.VT _.RC:$src1), 10816 (_.ScalarIntMemFrags addr:$src2), 10817 (i32 timm:$src3))>, 10818 Sched<[sched.Folded, sched.ReadAfterFold]>; 10819 } 10820} 10821 10822//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10823multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10824 SDNode OpNode, X86FoldableSchedWrite sched, 10825 X86VectorVTInfo _> { 10826 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10827 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10828 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10829 OpcodeStr, "$src3, {sae}, $src2, $src1", 10830 "$src1, $src2, {sae}, $src3", 10831 (OpNode (_.VT _.RC:$src1), 10832 (_.VT _.RC:$src2), 10833 (i32 timm:$src3))>, 10834 EVEX_B, Sched<[sched]>; 10835} 10836 10837//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10838multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10839 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10840 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10841 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10842 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10843 OpcodeStr, "$src3, {sae}, $src2, $src1", 10844 "$src1, $src2, {sae}, $src3", 10845 (OpNode (_.VT _.RC:$src1), 10846 (_.VT _.RC:$src2), 10847 (i32 timm:$src3))>, 10848 EVEX_B, Sched<[sched]>; 10849} 10850 10851multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10852 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10853 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10854 let Predicates = [prd] in { 10855 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10856 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10857 EVEX_V512; 10858 10859 } 10860 let Predicates = [prd, HasVLX] in { 10861 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10862 EVEX_V128; 10863 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10864 EVEX_V256; 10865 } 10866} 10867 10868multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10869 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10870 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10871 let Predicates = [Pred] in { 10872 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10873 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV; 10874 } 10875 let Predicates = [Pred, HasVLX] in { 10876 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10877 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV; 10878 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10879 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV; 10880 } 10881} 10882 10883multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10884 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10885 Predicate Pred = HasAVX512> { 10886 let Predicates = [Pred] in { 10887 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10888 EVEX_V512; 10889 } 10890 let Predicates = [Pred, HasVLX] in { 10891 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10892 EVEX_V128; 10893 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10894 EVEX_V256; 10895 } 10896} 10897 10898multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10899 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10900 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10901 let Predicates = [prd] in { 10902 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10903 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10904 } 10905} 10906 10907multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10908 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10909 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 10910 X86SchedWriteWidths sched, Predicate prd>{ 10911 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 10912 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 10913 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 10914 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10915 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10916 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 10917 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10918 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10919 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W; 10920} 10921 10922defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10923 X86VReduce, X86VReduce, X86VReduceSAE, 10924 SchedWriteFRnd, HasDQI>; 10925defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10926 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 10927 SchedWriteFRnd, HasAVX512>; 10928defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10929 X86VGetMant, X86VGetMant, X86VGetMantSAE, 10930 SchedWriteFRnd, HasAVX512>; 10931 10932defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10933 0x50, X86VRange, X86VRangeSAE, 10934 SchedWriteFAdd, HasDQI>, 10935 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10936defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10937 0x50, X86VRange, X86VRangeSAE, 10938 SchedWriteFAdd, HasDQI>, 10939 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10940 10941defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10942 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10943 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10944defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10945 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10946 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10947 10948defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10949 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10950 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10951defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10952 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10953 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10954defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 10955 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 10956 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10957 10958defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10959 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10960 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10961defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10962 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10963 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10964defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 10965 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 10966 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10967 10968multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10969 X86FoldableSchedWrite sched, 10970 X86VectorVTInfo _, 10971 X86VectorVTInfo CastInfo> { 10972 let ExeDomain = _.ExeDomain in { 10973 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10974 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10975 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10976 (_.VT (bitconvert 10977 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10978 (i8 timm:$src3)))))>, 10979 Sched<[sched]>; 10980 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10981 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10982 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10983 (_.VT 10984 (bitconvert 10985 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10986 (CastInfo.LdFrag addr:$src2), 10987 (i8 timm:$src3)))))>, 10988 Sched<[sched.Folded, sched.ReadAfterFold]>; 10989 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10990 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10991 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10992 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10993 (_.VT 10994 (bitconvert 10995 (CastInfo.VT 10996 (X86Shuf128 _.RC:$src1, 10997 (_.BroadcastLdFrag addr:$src2), 10998 (i8 timm:$src3)))))>, EVEX_B, 10999 Sched<[sched.Folded, sched.ReadAfterFold]>; 11000 } 11001} 11002 11003multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 11004 AVX512VLVectorVTInfo _, 11005 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{ 11006 let Predicates = [HasAVX512] in 11007 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11008 _.info512, CastInfo.info512>, EVEX_V512; 11009 11010 let Predicates = [HasAVX512, HasVLX] in 11011 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11012 _.info256, CastInfo.info256>, EVEX_V256; 11013} 11014 11015defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 11016 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 11017defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 11018 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 11019defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 11020 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 11021defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 11022 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 11023 11024multiclass avx512_valign<bits<8> opc, string OpcodeStr, 11025 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11026 let ExeDomain = _.ExeDomain in { 11027 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11028 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11029 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11030 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 11031 Sched<[sched]>; 11032 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11033 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11034 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11035 (_.VT (X86VAlign _.RC:$src1, 11036 (bitconvert (_.LdFrag addr:$src2)), 11037 (i8 timm:$src3)))>, 11038 Sched<[sched.Folded, sched.ReadAfterFold]>; 11039 11040 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11041 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11042 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11043 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11044 (X86VAlign _.RC:$src1, 11045 (_.VT (_.BroadcastLdFrag addr:$src2)), 11046 (i8 timm:$src3))>, EVEX_B, 11047 Sched<[sched.Folded, sched.ReadAfterFold]>; 11048 } 11049} 11050 11051multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 11052 AVX512VLVectorVTInfo _> { 11053 let Predicates = [HasAVX512] in { 11054 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 11055 AVX512AIi8Base, EVEX, VVVV, EVEX_V512; 11056 } 11057 let Predicates = [HasAVX512, HasVLX] in { 11058 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 11059 AVX512AIi8Base, EVEX, VVVV, EVEX_V128; 11060 // We can't really override the 256-bit version so change it back to unset. 11061 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 11062 AVX512AIi8Base, EVEX, VVVV, EVEX_V256; 11063 } 11064} 11065 11066defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 11067 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11068defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 11069 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 11070 REX_W; 11071 11072defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 11073 SchedWriteShuffle, avx512vl_i8_info, 11074 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 11075 11076// Fragments to help convert valignq into masked valignd. Or valignq/valignd 11077// into vpalignr. 11078def ValignqImm32XForm : SDNodeXForm<timm, [{ 11079 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 11080}]>; 11081def ValignqImm8XForm : SDNodeXForm<timm, [{ 11082 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 11083}]>; 11084def ValigndImm8XForm : SDNodeXForm<timm, [{ 11085 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 11086}]>; 11087 11088multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 11089 X86VectorVTInfo From, X86VectorVTInfo To, 11090 SDNodeXForm ImmXForm> { 11091 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11092 (bitconvert 11093 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11094 timm:$src3))), 11095 To.RC:$src0)), 11096 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 11097 To.RC:$src1, To.RC:$src2, 11098 (ImmXForm timm:$src3))>; 11099 11100 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11101 (bitconvert 11102 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11103 timm:$src3))), 11104 To.ImmAllZerosV)), 11105 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 11106 To.RC:$src1, To.RC:$src2, 11107 (ImmXForm timm:$src3))>; 11108 11109 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11110 (bitconvert 11111 (From.VT (OpNode From.RC:$src1, 11112 (From.LdFrag addr:$src2), 11113 timm:$src3))), 11114 To.RC:$src0)), 11115 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11116 To.RC:$src1, addr:$src2, 11117 (ImmXForm timm:$src3))>; 11118 11119 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11120 (bitconvert 11121 (From.VT (OpNode From.RC:$src1, 11122 (From.LdFrag addr:$src2), 11123 timm:$src3))), 11124 To.ImmAllZerosV)), 11125 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11126 To.RC:$src1, addr:$src2, 11127 (ImmXForm timm:$src3))>; 11128} 11129 11130multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11131 X86VectorVTInfo From, 11132 X86VectorVTInfo To, 11133 SDNodeXForm ImmXForm> : 11134 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11135 def : Pat<(From.VT (OpNode From.RC:$src1, 11136 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11137 timm:$src3)), 11138 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11139 (ImmXForm timm:$src3))>; 11140 11141 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11142 (bitconvert 11143 (From.VT (OpNode From.RC:$src1, 11144 (bitconvert 11145 (To.VT (To.BroadcastLdFrag addr:$src2))), 11146 timm:$src3))), 11147 To.RC:$src0)), 11148 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11149 To.RC:$src1, addr:$src2, 11150 (ImmXForm timm:$src3))>; 11151 11152 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11153 (bitconvert 11154 (From.VT (OpNode From.RC:$src1, 11155 (bitconvert 11156 (To.VT (To.BroadcastLdFrag addr:$src2))), 11157 timm:$src3))), 11158 To.ImmAllZerosV)), 11159 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11160 To.RC:$src1, addr:$src2, 11161 (ImmXForm timm:$src3))>; 11162} 11163 11164let Predicates = [HasAVX512] in { 11165 // For 512-bit we lower to the widest element type we can. So we only need 11166 // to handle converting valignq to valignd. 11167 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11168 v16i32_info, ValignqImm32XForm>; 11169} 11170 11171let Predicates = [HasVLX] in { 11172 // For 128-bit we lower to the widest element type we can. So we only need 11173 // to handle converting valignq to valignd. 11174 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11175 v4i32x_info, ValignqImm32XForm>; 11176 // For 256-bit we lower to the widest element type we can. So we only need 11177 // to handle converting valignq to valignd. 11178 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11179 v8i32x_info, ValignqImm32XForm>; 11180} 11181 11182let Predicates = [HasVLX, HasBWI] in { 11183 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11184 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11185 v16i8x_info, ValignqImm8XForm>; 11186 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11187 v16i8x_info, ValigndImm8XForm>; 11188} 11189 11190defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11191 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11192 EVEX_CD8<8, CD8VF>; 11193 11194multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11195 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11196 let ExeDomain = _.ExeDomain in { 11197 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11198 (ins _.RC:$src1), OpcodeStr, 11199 "$src1", "$src1", 11200 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11201 Sched<[sched]>; 11202 11203 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11204 (ins _.MemOp:$src1), OpcodeStr, 11205 "$src1", "$src1", 11206 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11207 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11208 Sched<[sched.Folded]>; 11209 } 11210} 11211 11212multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11213 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11214 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11215 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11216 (ins _.ScalarMemOp:$src1), OpcodeStr, 11217 "${src1}"#_.BroadcastStr, 11218 "${src1}"#_.BroadcastStr, 11219 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11220 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11221 Sched<[sched.Folded]>; 11222} 11223 11224multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11225 X86SchedWriteWidths sched, 11226 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11227 let Predicates = [prd] in 11228 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11229 EVEX_V512; 11230 11231 let Predicates = [prd, HasVLX] in { 11232 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11233 EVEX_V256; 11234 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11235 EVEX_V128; 11236 } 11237} 11238 11239multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11240 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11241 Predicate prd> { 11242 let Predicates = [prd] in 11243 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11244 EVEX_V512; 11245 11246 let Predicates = [prd, HasVLX] in { 11247 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11248 EVEX_V256; 11249 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11250 EVEX_V128; 11251 } 11252} 11253 11254multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11255 SDNode OpNode, X86SchedWriteWidths sched, 11256 Predicate prd> { 11257 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11258 avx512vl_i64_info, prd>, REX_W; 11259 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11260 avx512vl_i32_info, prd>; 11261} 11262 11263multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11264 SDNode OpNode, X86SchedWriteWidths sched, 11265 Predicate prd> { 11266 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11267 avx512vl_i16_info, prd>, WIG; 11268 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11269 avx512vl_i8_info, prd>, WIG; 11270} 11271 11272multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11273 bits<8> opc_d, bits<8> opc_q, 11274 string OpcodeStr, SDNode OpNode, 11275 X86SchedWriteWidths sched> { 11276 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11277 HasAVX512>, 11278 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11279 HasBWI>; 11280} 11281 11282defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11283 SchedWriteVecALU>; 11284 11285// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11286let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 11287 def : Pat<(v4i64 (abs VR256X:$src)), 11288 (EXTRACT_SUBREG 11289 (VPABSQZrr 11290 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11291 sub_ymm)>; 11292 def : Pat<(v2i64 (abs VR128X:$src)), 11293 (EXTRACT_SUBREG 11294 (VPABSQZrr 11295 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11296 sub_xmm)>; 11297} 11298 11299// Use 512bit version to implement 128/256 bit. 11300multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11301 AVX512VLVectorVTInfo _, Predicate prd> { 11302 let Predicates = [prd, NoVLX, HasEVEX512] in { 11303 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11304 (EXTRACT_SUBREG 11305 (!cast<Instruction>(InstrStr # "Zrr") 11306 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11307 _.info256.RC:$src1, 11308 _.info256.SubRegIdx)), 11309 _.info256.SubRegIdx)>; 11310 11311 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11312 (EXTRACT_SUBREG 11313 (!cast<Instruction>(InstrStr # "Zrr") 11314 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11315 _.info128.RC:$src1, 11316 _.info128.SubRegIdx)), 11317 _.info128.SubRegIdx)>; 11318 } 11319} 11320 11321defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11322 SchedWriteVecIMul, HasCDI>; 11323 11324// FIXME: Is there a better scheduler class for VPCONFLICT? 11325defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11326 SchedWriteVecALU, HasCDI>; 11327 11328// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11329defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11330defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11331 11332//===---------------------------------------------------------------------===// 11333// Counts number of ones - VPOPCNTD and VPOPCNTQ 11334//===---------------------------------------------------------------------===// 11335 11336// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11337defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11338 SchedWriteVecALU, HasVPOPCNTDQ>; 11339 11340defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11341defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11342 11343//===---------------------------------------------------------------------===// 11344// Replicate Single FP - MOVSHDUP and MOVSLDUP 11345//===---------------------------------------------------------------------===// 11346 11347multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11348 X86SchedWriteWidths sched> { 11349 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11350 avx512vl_f32_info, HasAVX512>, TB, XS; 11351} 11352 11353defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11354 SchedWriteFShuffle>; 11355defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11356 SchedWriteFShuffle>; 11357 11358//===----------------------------------------------------------------------===// 11359// AVX-512 - MOVDDUP 11360//===----------------------------------------------------------------------===// 11361 11362multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11363 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11364 let ExeDomain = _.ExeDomain in { 11365 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11366 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11367 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11368 Sched<[sched]>; 11369 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11370 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11371 (_.VT (_.BroadcastLdFrag addr:$src))>, 11372 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11373 Sched<[sched.Folded]>; 11374 } 11375} 11376 11377multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11378 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11379 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11380 VTInfo.info512>, EVEX_V512; 11381 11382 let Predicates = [HasAVX512, HasVLX] in { 11383 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11384 VTInfo.info256>, EVEX_V256; 11385 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11386 VTInfo.info128>, EVEX_V128; 11387 } 11388} 11389 11390multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11391 X86SchedWriteWidths sched> { 11392 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11393 avx512vl_f64_info>, TB, XD, REX_W; 11394} 11395 11396defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11397 11398let Predicates = [HasVLX] in { 11399def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11400 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11401 11402def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11403 (v2f64 VR128X:$src0)), 11404 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11405 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11406def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11407 immAllZerosV), 11408 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11409} 11410 11411//===----------------------------------------------------------------------===// 11412// AVX-512 - Unpack Instructions 11413//===----------------------------------------------------------------------===// 11414 11415let Uses = []<Register>, mayRaiseFPException = 0 in { 11416defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11417 SchedWriteFShuffleSizes, 0, 1>; 11418defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11419 SchedWriteFShuffleSizes>; 11420} 11421 11422defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11423 SchedWriteShuffle, HasBWI>; 11424defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11425 SchedWriteShuffle, HasBWI>; 11426defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11427 SchedWriteShuffle, HasBWI>; 11428defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11429 SchedWriteShuffle, HasBWI>; 11430 11431defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11432 SchedWriteShuffle, HasAVX512>; 11433defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11434 SchedWriteShuffle, HasAVX512>; 11435defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11436 SchedWriteShuffle, HasAVX512>; 11437defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11438 SchedWriteShuffle, HasAVX512>; 11439 11440//===----------------------------------------------------------------------===// 11441// AVX-512 - Extract & Insert Integer Instructions 11442//===----------------------------------------------------------------------===// 11443 11444multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11445 X86VectorVTInfo _> { 11446 def mri : AVX512Ii8<opc, MRMDestMem, (outs), 11447 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11448 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11449 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11450 addr:$dst)]>, 11451 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11452} 11453 11454multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11455 let Predicates = [HasBWI] in { 11456 def rri : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11457 (ins _.RC:$src1, u8imm:$src2), 11458 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11459 [(set GR32orGR64:$dst, 11460 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11461 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11462 11463 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD; 11464 } 11465} 11466 11467multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11468 let Predicates = [HasBWI] in { 11469 def rri : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11470 (ins _.RC:$src1, u8imm:$src2), 11471 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11472 [(set GR32orGR64:$dst, 11473 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11474 EVEX, TB, PD, Sched<[WriteVecExtract]>; 11475 11476 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11477 def rri_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11478 (ins _.RC:$src1, u8imm:$src2), 11479 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11480 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11481 11482 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD; 11483 } 11484} 11485 11486multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11487 RegisterClass GRC> { 11488 let Predicates = [HasDQI] in { 11489 def rri : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11490 (ins _.RC:$src1, u8imm:$src2), 11491 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11492 [(set GRC:$dst, 11493 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11494 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11495 11496 def mri : AVX512Ii8<0x16, MRMDestMem, (outs), 11497 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11498 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11499 [(store (extractelt (_.VT _.RC:$src1), 11500 imm:$src2),addr:$dst)]>, 11501 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD, 11502 Sched<[WriteVecExtractSt]>; 11503 } 11504} 11505 11506defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG; 11507defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG; 11508defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11509defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W; 11510 11511multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11512 X86VectorVTInfo _, PatFrag LdFrag, 11513 SDPatternOperator immoperator> { 11514 def rmi : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11515 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11516 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11517 [(set _.RC:$dst, 11518 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11519 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11520} 11521 11522multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11523 X86VectorVTInfo _, PatFrag LdFrag> { 11524 let Predicates = [HasBWI] in { 11525 def rri : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11526 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11527 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11528 [(set _.RC:$dst, 11529 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV, 11530 Sched<[WriteVecInsert]>; 11531 11532 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11533 } 11534} 11535 11536multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11537 X86VectorVTInfo _, RegisterClass GRC> { 11538 let Predicates = [HasDQI] in { 11539 def rri : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11540 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11541 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11542 [(set _.RC:$dst, 11543 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11544 EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>; 11545 11546 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11547 _.ScalarLdFrag, imm>, TA, PD; 11548 } 11549} 11550 11551defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11552 extloadi8>, TA, PD, WIG; 11553defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11554 extloadi16>, TB, PD, WIG; 11555defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11556defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W; 11557 11558let Predicates = [HasAVX512, NoBWI] in { 11559 def : Pat<(X86pinsrb VR128:$src1, 11560 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11561 timm:$src3), 11562 (VPINSRBrri VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11563 timm:$src3)>; 11564} 11565 11566let Predicates = [HasBWI] in { 11567 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 11568 (VPINSRBZrri VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 11569 GR8:$src2, sub_8bit), timm:$src3)>; 11570 def : Pat<(X86pinsrb VR128:$src1, 11571 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11572 timm:$src3), 11573 (VPINSRBZrri VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11574 timm:$src3)>; 11575} 11576 11577// Always select FP16 instructions if available. 11578let Predicates = [HasBWI], AddedComplexity = -10 in { 11579 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrmi (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; 11580 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmri addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 11581 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrri (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; 11582 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrri (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; 11583} 11584 11585//===----------------------------------------------------------------------===// 11586// VSHUFPS - VSHUFPD Operations 11587//===----------------------------------------------------------------------===// 11588 11589multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11590 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11591 SchedWriteFShuffle>, 11592 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11593 TA, EVEX, VVVV; 11594} 11595 11596defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB; 11597defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W; 11598 11599//===----------------------------------------------------------------------===// 11600// AVX-512 - Byte shift Left/Right 11601//===----------------------------------------------------------------------===// 11602 11603multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11604 Format MRMm, string OpcodeStr, 11605 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11606 def ri : AVX512<opc, MRMr, 11607 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11608 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11609 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11610 Sched<[sched]>; 11611 def mi : AVX512<opc, MRMm, 11612 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11613 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11614 [(set _.RC:$dst,(_.VT (OpNode 11615 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11616 (i8 timm:$src2))))]>, 11617 Sched<[sched.Folded, sched.ReadAfterFold]>; 11618} 11619 11620multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11621 Format MRMm, string OpcodeStr, 11622 X86SchedWriteWidths sched, Predicate prd>{ 11623 let Predicates = [prd] in 11624 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11625 sched.ZMM, v64i8_info>, EVEX_V512; 11626 let Predicates = [prd, HasVLX] in { 11627 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11628 sched.YMM, v32i8x_info>, EVEX_V256; 11629 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11630 sched.XMM, v16i8x_info>, EVEX_V128; 11631 } 11632} 11633defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11634 SchedWriteShuffle, HasBWI>, 11635 AVX512PDIi8Base, EVEX, VVVV, WIG; 11636defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11637 SchedWriteShuffle, HasBWI>, 11638 AVX512PDIi8Base, EVEX, VVVV, WIG; 11639 11640multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11641 string OpcodeStr, X86FoldableSchedWrite sched, 11642 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11643 let isCommutable = 1 in 11644 def rr : AVX512BI<opc, MRMSrcReg, 11645 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11646 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11647 [(set _dst.RC:$dst,(_dst.VT 11648 (OpNode (_src.VT _src.RC:$src1), 11649 (_src.VT _src.RC:$src2))))]>, 11650 Sched<[sched]>; 11651 def rm : AVX512BI<opc, MRMSrcMem, 11652 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11653 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11654 [(set _dst.RC:$dst,(_dst.VT 11655 (OpNode (_src.VT _src.RC:$src1), 11656 (_src.VT (bitconvert 11657 (_src.LdFrag addr:$src2))))))]>, 11658 Sched<[sched.Folded, sched.ReadAfterFold]>; 11659} 11660 11661multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11662 string OpcodeStr, X86SchedWriteWidths sched, 11663 Predicate prd> { 11664 let Predicates = [prd] in 11665 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11666 v8i64_info, v64i8_info>, EVEX_V512; 11667 let Predicates = [prd, HasVLX] in { 11668 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11669 v4i64x_info, v32i8x_info>, EVEX_V256; 11670 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11671 v2i64x_info, v16i8x_info>, EVEX_V128; 11672 } 11673} 11674 11675defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11676 SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG; 11677 11678// Transforms to swizzle an immediate to enable better matching when 11679// memory operand isn't in the right place. 11680def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11681 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11682 uint8_t Imm = N->getZExtValue(); 11683 // Swap bits 1/4 and 3/6. 11684 uint8_t NewImm = Imm & 0xa5; 11685 if (Imm & 0x02) NewImm |= 0x10; 11686 if (Imm & 0x10) NewImm |= 0x02; 11687 if (Imm & 0x08) NewImm |= 0x40; 11688 if (Imm & 0x40) NewImm |= 0x08; 11689 return getI8Imm(NewImm, SDLoc(N)); 11690}]>; 11691def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11692 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11693 uint8_t Imm = N->getZExtValue(); 11694 // Swap bits 2/4 and 3/5. 11695 uint8_t NewImm = Imm & 0xc3; 11696 if (Imm & 0x04) NewImm |= 0x10; 11697 if (Imm & 0x10) NewImm |= 0x04; 11698 if (Imm & 0x08) NewImm |= 0x20; 11699 if (Imm & 0x20) NewImm |= 0x08; 11700 return getI8Imm(NewImm, SDLoc(N)); 11701}]>; 11702def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11703 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11704 uint8_t Imm = N->getZExtValue(); 11705 // Swap bits 1/2 and 5/6. 11706 uint8_t NewImm = Imm & 0x99; 11707 if (Imm & 0x02) NewImm |= 0x04; 11708 if (Imm & 0x04) NewImm |= 0x02; 11709 if (Imm & 0x20) NewImm |= 0x40; 11710 if (Imm & 0x40) NewImm |= 0x20; 11711 return getI8Imm(NewImm, SDLoc(N)); 11712}]>; 11713def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11714 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11715 uint8_t Imm = N->getZExtValue(); 11716 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11717 uint8_t NewImm = Imm & 0x81; 11718 if (Imm & 0x02) NewImm |= 0x04; 11719 if (Imm & 0x04) NewImm |= 0x10; 11720 if (Imm & 0x08) NewImm |= 0x40; 11721 if (Imm & 0x10) NewImm |= 0x02; 11722 if (Imm & 0x20) NewImm |= 0x08; 11723 if (Imm & 0x40) NewImm |= 0x20; 11724 return getI8Imm(NewImm, SDLoc(N)); 11725}]>; 11726def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11727 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11728 uint8_t Imm = N->getZExtValue(); 11729 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11730 uint8_t NewImm = Imm & 0x81; 11731 if (Imm & 0x02) NewImm |= 0x10; 11732 if (Imm & 0x04) NewImm |= 0x02; 11733 if (Imm & 0x08) NewImm |= 0x20; 11734 if (Imm & 0x10) NewImm |= 0x04; 11735 if (Imm & 0x20) NewImm |= 0x40; 11736 if (Imm & 0x40) NewImm |= 0x08; 11737 return getI8Imm(NewImm, SDLoc(N)); 11738}]>; 11739 11740multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11741 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11742 string Name>{ 11743 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11744 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11745 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11746 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11747 (OpNode (_.VT _.RC:$src1), 11748 (_.VT _.RC:$src2), 11749 (_.VT _.RC:$src3), 11750 (i8 timm:$src4)), 1, 1>, 11751 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 11752 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11753 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11754 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11755 (OpNode (_.VT _.RC:$src1), 11756 (_.VT _.RC:$src2), 11757 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11758 (i8 timm:$src4)), 1, 0>, 11759 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11760 Sched<[sched.Folded, sched.ReadAfterFold]>; 11761 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11762 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11763 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11764 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11765 (OpNode (_.VT _.RC:$src1), 11766 (_.VT _.RC:$src2), 11767 (_.VT (_.BroadcastLdFrag addr:$src3)), 11768 (i8 timm:$src4)), 1, 0>, EVEX_B, 11769 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11770 Sched<[sched.Folded, sched.ReadAfterFold]>; 11771 }// Constraints = "$src1 = $dst" 11772 11773 // Additional patterns for matching passthru operand in other positions. 11774 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11775 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11776 _.RC:$src1)), 11777 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11778 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11779 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11780 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11781 _.RC:$src1)), 11782 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11783 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11784 11785 // Additional patterns for matching zero masking with loads in other 11786 // positions. 11787 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11788 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11789 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11790 _.ImmAllZerosV)), 11791 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11792 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11793 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11794 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11795 _.RC:$src2, (i8 timm:$src4)), 11796 _.ImmAllZerosV)), 11797 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11798 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11799 11800 // Additional patterns for matching masked loads with different 11801 // operand orders. 11802 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11803 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11804 _.RC:$src2, (i8 timm:$src4)), 11805 _.RC:$src1)), 11806 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11807 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11808 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11809 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11810 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11811 _.RC:$src1)), 11812 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11813 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11814 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11815 (OpNode _.RC:$src2, _.RC:$src1, 11816 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11817 _.RC:$src1)), 11818 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11819 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11820 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11821 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11822 _.RC:$src1, (i8 timm:$src4)), 11823 _.RC:$src1)), 11824 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11825 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11826 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11827 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11828 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11829 _.RC:$src1)), 11830 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11831 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11832 11833 // Additional patterns for matching zero masking with broadcasts in other 11834 // positions. 11835 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11836 (OpNode (_.BroadcastLdFrag addr:$src3), 11837 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11838 _.ImmAllZerosV)), 11839 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11840 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11841 (VPTERNLOG321_imm8 timm:$src4))>; 11842 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11843 (OpNode _.RC:$src1, 11844 (_.BroadcastLdFrag addr:$src3), 11845 _.RC:$src2, (i8 timm:$src4)), 11846 _.ImmAllZerosV)), 11847 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11848 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11849 (VPTERNLOG132_imm8 timm:$src4))>; 11850 11851 // Additional patterns for matching masked broadcasts with different 11852 // operand orders. 11853 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11854 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11855 _.RC:$src2, (i8 timm:$src4)), 11856 _.RC:$src1)), 11857 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11858 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11859 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11860 (OpNode (_.BroadcastLdFrag addr:$src3), 11861 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11862 _.RC:$src1)), 11863 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11864 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11865 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11866 (OpNode _.RC:$src2, _.RC:$src1, 11867 (_.BroadcastLdFrag addr:$src3), 11868 (i8 timm:$src4)), _.RC:$src1)), 11869 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11870 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11871 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11872 (OpNode _.RC:$src2, 11873 (_.BroadcastLdFrag addr:$src3), 11874 _.RC:$src1, (i8 timm:$src4)), 11875 _.RC:$src1)), 11876 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11877 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11878 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11879 (OpNode (_.BroadcastLdFrag addr:$src3), 11880 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11881 _.RC:$src1)), 11882 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11883 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11884} 11885 11886multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11887 AVX512VLVectorVTInfo _> { 11888 let Predicates = [HasAVX512] in 11889 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11890 _.info512, NAME>, EVEX_V512; 11891 let Predicates = [HasAVX512, HasVLX] in { 11892 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11893 _.info128, NAME>, EVEX_V128; 11894 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11895 _.info256, NAME>, EVEX_V256; 11896 } 11897} 11898 11899defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11900 avx512vl_i32_info>; 11901defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11902 avx512vl_i64_info>, REX_W; 11903 11904// Patterns to implement vnot using vpternlog instead of creating all ones 11905// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11906// so that the result is only dependent on src0. But we use the same source 11907// for all operands to prevent a false dependency. 11908// TODO: We should maybe have a more generalized algorithm for folding to 11909// vpternlog. 11910let Predicates = [HasAVX512] in { 11911 def : Pat<(v64i8 (vnot VR512:$src)), 11912 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11913 def : Pat<(v32i16 (vnot VR512:$src)), 11914 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11915 def : Pat<(v16i32 (vnot VR512:$src)), 11916 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11917 def : Pat<(v8i64 (vnot VR512:$src)), 11918 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11919} 11920 11921let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 11922 def : Pat<(v16i8 (vnot VR128X:$src)), 11923 (EXTRACT_SUBREG 11924 (VPTERNLOGQZrri 11925 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11926 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11927 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11928 (i8 15)), sub_xmm)>; 11929 def : Pat<(v8i16 (vnot VR128X:$src)), 11930 (EXTRACT_SUBREG 11931 (VPTERNLOGQZrri 11932 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11933 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11934 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11935 (i8 15)), sub_xmm)>; 11936 def : Pat<(v4i32 (vnot VR128X:$src)), 11937 (EXTRACT_SUBREG 11938 (VPTERNLOGQZrri 11939 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11940 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11941 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11942 (i8 15)), sub_xmm)>; 11943 def : Pat<(v2i64 (vnot VR128X:$src)), 11944 (EXTRACT_SUBREG 11945 (VPTERNLOGQZrri 11946 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11947 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11948 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11949 (i8 15)), sub_xmm)>; 11950 11951 def : Pat<(v32i8 (vnot VR256X:$src)), 11952 (EXTRACT_SUBREG 11953 (VPTERNLOGQZrri 11954 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11955 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11956 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11957 (i8 15)), sub_ymm)>; 11958 def : Pat<(v16i16 (vnot VR256X:$src)), 11959 (EXTRACT_SUBREG 11960 (VPTERNLOGQZrri 11961 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11962 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11963 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11964 (i8 15)), sub_ymm)>; 11965 def : Pat<(v8i32 (vnot VR256X:$src)), 11966 (EXTRACT_SUBREG 11967 (VPTERNLOGQZrri 11968 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11969 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11970 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11971 (i8 15)), sub_ymm)>; 11972 def : Pat<(v4i64 (vnot VR256X:$src)), 11973 (EXTRACT_SUBREG 11974 (VPTERNLOGQZrri 11975 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11976 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11977 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11978 (i8 15)), sub_ymm)>; 11979} 11980 11981let Predicates = [HasVLX] in { 11982 def : Pat<(v16i8 (vnot VR128X:$src)), 11983 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11984 def : Pat<(v8i16 (vnot VR128X:$src)), 11985 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11986 def : Pat<(v4i32 (vnot VR128X:$src)), 11987 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11988 def : Pat<(v2i64 (vnot VR128X:$src)), 11989 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11990 11991 def : Pat<(v32i8 (vnot VR256X:$src)), 11992 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11993 def : Pat<(v16i16 (vnot VR256X:$src)), 11994 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11995 def : Pat<(v8i32 (vnot VR256X:$src)), 11996 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11997 def : Pat<(v4i64 (vnot VR256X:$src)), 11998 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11999} 12000 12001//===----------------------------------------------------------------------===// 12002// AVX-512 - FixupImm 12003//===----------------------------------------------------------------------===// 12004 12005multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 12006 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12007 X86VectorVTInfo TblVT>{ 12008 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 12009 Uses = [MXCSR], mayRaiseFPException = 1 in { 12010 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12011 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12012 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12013 (X86VFixupimm (_.VT _.RC:$src1), 12014 (_.VT _.RC:$src2), 12015 (TblVT.VT _.RC:$src3), 12016 (i32 timm:$src4))>, Sched<[sched]>; 12017 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12018 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 12019 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12020 (X86VFixupimm (_.VT _.RC:$src1), 12021 (_.VT _.RC:$src2), 12022 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 12023 (i32 timm:$src4))>, 12024 Sched<[sched.Folded, sched.ReadAfterFold]>; 12025 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12026 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12027 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 12028 "$src2, ${src3}"#_.BroadcastStr#", $src4", 12029 (X86VFixupimm (_.VT _.RC:$src1), 12030 (_.VT _.RC:$src2), 12031 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 12032 (i32 timm:$src4))>, 12033 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12034 } // Constraints = "$src1 = $dst" 12035} 12036 12037multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 12038 X86FoldableSchedWrite sched, 12039 X86VectorVTInfo _, X86VectorVTInfo TblVT> { 12040let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 12041 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12042 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12043 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12044 "$src2, $src3, {sae}, $src4", 12045 (X86VFixupimmSAE (_.VT _.RC:$src1), 12046 (_.VT _.RC:$src2), 12047 (TblVT.VT _.RC:$src3), 12048 (i32 timm:$src4))>, 12049 EVEX_B, Sched<[sched]>; 12050 } 12051} 12052 12053multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 12054 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12055 X86VectorVTInfo _src3VT> { 12056 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 12057 ExeDomain = _.ExeDomain in { 12058 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12059 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12060 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12061 (X86VFixupimms (_.VT _.RC:$src1), 12062 (_.VT _.RC:$src2), 12063 (_src3VT.VT _src3VT.RC:$src3), 12064 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 12065 let Uses = [MXCSR] in 12066 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12067 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12068 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12069 "$src2, $src3, {sae}, $src4", 12070 (X86VFixupimmSAEs (_.VT _.RC:$src1), 12071 (_.VT _.RC:$src2), 12072 (_src3VT.VT _src3VT.RC:$src3), 12073 (i32 timm:$src4))>, 12074 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12075 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 12076 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12077 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12078 (X86VFixupimms (_.VT _.RC:$src1), 12079 (_.VT _.RC:$src2), 12080 (_src3VT.VT (scalar_to_vector 12081 (_src3VT.ScalarLdFrag addr:$src3))), 12082 (i32 timm:$src4))>, 12083 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 12084 } 12085} 12086 12087multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 12088 AVX512VLVectorVTInfo _Vec, 12089 AVX512VLVectorVTInfo _Tbl> { 12090 let Predicates = [HasAVX512] in 12091 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", sched.ZMM, _Vec.info512, _Tbl.info512>, 12092 avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 12093 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 12094 EVEX, VVVV, EVEX_V512; 12095 let Predicates = [HasAVX512, HasVLX] in { 12096 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 12097 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 12098 EVEX, VVVV, EVEX_V128; 12099 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 12100 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 12101 EVEX, VVVV, EVEX_V256; 12102 } 12103} 12104 12105defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12106 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 12107 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 12108defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12109 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 12110 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 12111defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 12112 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12113defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12114 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 12115 12116// Patterns used to select SSE scalar fp arithmetic instructions from 12117// either: 12118// 12119// (1) a scalar fp operation followed by a blend 12120// 12121// The effect is that the backend no longer emits unnecessary vector 12122// insert instructions immediately after SSE scalar fp instructions 12123// like addss or mulss. 12124// 12125// For example, given the following code: 12126// __m128 foo(__m128 A, __m128 B) { 12127// A[0] += B[0]; 12128// return A; 12129// } 12130// 12131// Previously we generated: 12132// addss %xmm0, %xmm1 12133// movss %xmm1, %xmm0 12134// 12135// We now generate: 12136// addss %xmm1, %xmm0 12137// 12138// (2) a vector packed single/double fp operation followed by a vector insert 12139// 12140// The effect is that the backend converts the packed fp instruction 12141// followed by a vector insert into a single SSE scalar fp instruction. 12142// 12143// For example, given the following code: 12144// __m128 foo(__m128 A, __m128 B) { 12145// __m128 C = A + B; 12146// return (__m128) {c[0], a[1], a[2], a[3]}; 12147// } 12148// 12149// Previously we generated: 12150// addps %xmm0, %xmm1 12151// movss %xmm1, %xmm0 12152// 12153// We now generate: 12154// addss %xmm1, %xmm0 12155 12156// TODO: Some canonicalization in lowering would simplify the number of 12157// patterns we have to try to match. 12158multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12159 string OpcPrefix, SDNode MoveNode, 12160 X86VectorVTInfo _, PatLeaf ZeroFP> { 12161 let Predicates = [HasAVX512] in { 12162 // extracted scalar math op with insert via movss 12163 def : Pat<(MoveNode 12164 (_.VT VR128X:$dst), 12165 (_.VT (scalar_to_vector 12166 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12167 _.FRC:$src)))), 12168 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12169 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12170 def : Pat<(MoveNode 12171 (_.VT VR128X:$dst), 12172 (_.VT (scalar_to_vector 12173 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12174 (_.ScalarLdFrag addr:$src))))), 12175 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12176 12177 // extracted masked scalar math op with insert via movss 12178 def : Pat<(MoveNode (_.VT VR128X:$src1), 12179 (scalar_to_vector 12180 (X86selects_mask VK1WM:$mask, 12181 (MaskedOp (_.EltVT 12182 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12183 _.FRC:$src2), 12184 _.FRC:$src0))), 12185 (!cast<Instruction>("V"#OpcPrefix#"Zrrk_Int") 12186 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12187 VK1WM:$mask, _.VT:$src1, 12188 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12189 def : Pat<(MoveNode (_.VT VR128X:$src1), 12190 (scalar_to_vector 12191 (X86selects_mask VK1WM:$mask, 12192 (MaskedOp (_.EltVT 12193 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12194 (_.ScalarLdFrag addr:$src2)), 12195 _.FRC:$src0))), 12196 (!cast<Instruction>("V"#OpcPrefix#"Zrmk_Int") 12197 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12198 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12199 12200 // extracted masked scalar math op with insert via movss 12201 def : Pat<(MoveNode (_.VT VR128X:$src1), 12202 (scalar_to_vector 12203 (X86selects_mask VK1WM:$mask, 12204 (MaskedOp (_.EltVT 12205 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12206 _.FRC:$src2), (_.EltVT ZeroFP)))), 12207 (!cast<I>("V"#OpcPrefix#"Zrrkz_Int") 12208 VK1WM:$mask, _.VT:$src1, 12209 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12210 def : Pat<(MoveNode (_.VT VR128X:$src1), 12211 (scalar_to_vector 12212 (X86selects_mask VK1WM:$mask, 12213 (MaskedOp (_.EltVT 12214 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12215 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12216 (!cast<I>("V"#OpcPrefix#"Zrmkz_Int") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12217 } 12218} 12219 12220defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12221defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12222defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12223defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12224 12225defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12226defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12227defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12228defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12229 12230defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12231defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12232defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12233defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12234 12235multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12236 SDNode Move, X86VectorVTInfo _> { 12237 let Predicates = [HasAVX512] in { 12238 def : Pat<(_.VT (Move _.VT:$dst, 12239 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12240 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12241 } 12242} 12243 12244defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12245defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12246defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12247 12248//===----------------------------------------------------------------------===// 12249// AES instructions 12250//===----------------------------------------------------------------------===// 12251 12252multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12253 let Predicates = [HasVLX, HasVAES] in { 12254 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12255 !cast<Intrinsic>(IntPrefix), 12256 loadv2i64, 0, VR128X, i128mem>, 12257 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG; 12258 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12259 !cast<Intrinsic>(IntPrefix#"_256"), 12260 loadv4i64, 0, VR256X, i256mem>, 12261 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG; 12262 } 12263 let Predicates = [HasAVX512, HasVAES] in 12264 defm Z : AESI_binop_rm_int<Op, OpStr, 12265 !cast<Intrinsic>(IntPrefix#"_512"), 12266 loadv8i64, 0, VR512, i512mem>, 12267 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG; 12268} 12269 12270defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12271defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12272defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12273defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12274 12275//===----------------------------------------------------------------------===// 12276// PCLMUL instructions - Carry less multiplication 12277//===----------------------------------------------------------------------===// 12278 12279let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12280defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12281 EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG; 12282 12283let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12284defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12285 EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG; 12286 12287defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12288 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256, 12289 EVEX_CD8<64, CD8VF>, WIG; 12290} 12291 12292// Aliases 12293defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12294defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12295defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12296 12297//===----------------------------------------------------------------------===// 12298// VBMI2 12299//===----------------------------------------------------------------------===// 12300 12301multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12302 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12303 let Constraints = "$src1 = $dst", 12304 ExeDomain = VTI.ExeDomain in { 12305 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12306 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12307 "$src3, $src2", "$src2, $src3", 12308 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12309 T8, PD, EVEX, VVVV, Sched<[sched]>; 12310 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12311 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12312 "$src3, $src2", "$src2, $src3", 12313 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12314 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12315 T8, PD, EVEX, VVVV, 12316 Sched<[sched.Folded, sched.ReadAfterFold]>; 12317 } 12318} 12319 12320multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12321 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12322 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12323 let Constraints = "$src1 = $dst", 12324 ExeDomain = VTI.ExeDomain in 12325 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12326 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12327 "${src3}"#VTI.BroadcastStr#", $src2", 12328 "$src2, ${src3}"#VTI.BroadcastStr, 12329 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12330 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12331 T8, PD, EVEX, VVVV, EVEX_B, 12332 Sched<[sched.Folded, sched.ReadAfterFold]>; 12333} 12334 12335multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12336 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12337 let Predicates = [HasVBMI2] in 12338 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12339 EVEX_V512; 12340 let Predicates = [HasVBMI2, HasVLX] in { 12341 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12342 EVEX_V256; 12343 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12344 EVEX_V128; 12345 } 12346} 12347 12348multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12349 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12350 let Predicates = [HasVBMI2] in 12351 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12352 EVEX_V512; 12353 let Predicates = [HasVBMI2, HasVLX] in { 12354 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12355 EVEX_V256; 12356 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12357 EVEX_V128; 12358 } 12359} 12360multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12361 SDNode OpNode, X86SchedWriteWidths sched> { 12362 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12363 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>; 12364 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12365 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12366 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12367 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 12368} 12369 12370multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12371 SDNode OpNode, X86SchedWriteWidths sched> { 12372 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12373 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12374 REX_W, EVEX_CD8<16, CD8VF>; 12375 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12376 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 12377 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12378 sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 12379} 12380 12381// Concat & Shift 12382defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12383defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12384defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12385defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12386 12387// Compress 12388defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12389 avx512vl_i8_info, HasVBMI2>, EVEX; 12390defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12391 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12392// Expand 12393defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12394 avx512vl_i8_info, HasVBMI2>, EVEX; 12395defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12396 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12397 12398//===----------------------------------------------------------------------===// 12399// VNNI 12400//===----------------------------------------------------------------------===// 12401 12402let Constraints = "$src1 = $dst" in 12403multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12404 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12405 bit IsCommutable> { 12406 let ExeDomain = VTI.ExeDomain in { 12407 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12408 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12409 "$src3, $src2", "$src2, $src3", 12410 (VTI.VT (OpNode VTI.RC:$src1, 12411 VTI.RC:$src2, VTI.RC:$src3)), 12412 IsCommutable, IsCommutable>, 12413 EVEX, VVVV, T8, Sched<[sched]>; 12414 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12415 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12416 "$src3, $src2", "$src2, $src3", 12417 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12418 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12419 EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, 12420 Sched<[sched.Folded, sched.ReadAfterFold, 12421 sched.ReadAfterFold]>; 12422 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12423 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12424 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12425 "$src2, ${src3}"#VTI.BroadcastStr, 12426 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12427 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12428 EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B, 12429 T8, Sched<[sched.Folded, sched.ReadAfterFold, 12430 sched.ReadAfterFold]>; 12431 } 12432} 12433 12434multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12435 X86SchedWriteWidths sched, bit IsCommutable, 12436 list<Predicate> prds, list<Predicate> prds512> { 12437 let Predicates = prds512 in 12438 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12439 IsCommutable>, EVEX_V512; 12440 let Predicates = prds in { 12441 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12442 IsCommutable>, EVEX_V256; 12443 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12444 IsCommutable>, EVEX_V128; 12445 } 12446} 12447 12448// FIXME: Is there a better scheduler class for VPDP? 12449defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0, 12450 [HasVNNI, HasVLX], [HasVNNI]>, PD; 12451defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0, 12452 [HasVNNI, HasVLX], [HasVNNI]>, PD; 12453defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1, 12454 [HasVNNI, HasVLX], [HasVNNI]>, PD; 12455defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1, 12456 [HasVNNI, HasVLX], [HasVNNI]>, PD; 12457 12458// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12459let Predicates = [HasVNNI] in { 12460 def : Pat<(v16i32 (add VR512:$src1, 12461 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12462 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12463 def : Pat<(v16i32 (add VR512:$src1, 12464 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12465 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12466} 12467let Predicates = [HasVNNI,HasVLX] in { 12468 def : Pat<(v8i32 (add VR256X:$src1, 12469 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12470 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12471 def : Pat<(v8i32 (add VR256X:$src1, 12472 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12473 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12474 def : Pat<(v4i32 (add VR128X:$src1, 12475 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12476 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12477 def : Pat<(v4i32 (add VR128X:$src1, 12478 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12479 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12480} 12481 12482//===----------------------------------------------------------------------===// 12483// Bit Algorithms 12484//===----------------------------------------------------------------------===// 12485 12486// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12487defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12488 avx512vl_i8_info, HasBITALG>; 12489defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12490 avx512vl_i16_info, HasBITALG>, REX_W; 12491 12492defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12493defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12494 12495multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12496 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12497 (ins VTI.RC:$src1, VTI.RC:$src2), 12498 "vpshufbitqmb", 12499 "$src2, $src1", "$src1, $src2", 12500 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12501 (VTI.VT VTI.RC:$src2)), 12502 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12503 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD, 12504 Sched<[sched]>; 12505 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12506 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12507 "vpshufbitqmb", 12508 "$src2, $src1", "$src1, $src2", 12509 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12510 (VTI.VT (VTI.LdFrag addr:$src2))), 12511 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12512 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12513 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD, 12514 Sched<[sched.Folded, sched.ReadAfterFold]>; 12515} 12516 12517multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12518 let Predicates = [HasBITALG] in 12519 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12520 let Predicates = [HasBITALG, HasVLX] in { 12521 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12522 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12523 } 12524} 12525 12526// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12527defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12528 12529//===----------------------------------------------------------------------===// 12530// GFNI 12531//===----------------------------------------------------------------------===// 12532 12533multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12534 X86SchedWriteWidths sched> { 12535 let Predicates = [HasGFNI, HasAVX512] in 12536 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12537 EVEX_V512; 12538 let Predicates = [HasGFNI, HasVLX] in { 12539 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12540 EVEX_V256; 12541 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12542 EVEX_V128; 12543 } 12544} 12545 12546defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12547 SchedWriteVecALU>, 12548 EVEX_CD8<8, CD8VF>, T8; 12549 12550multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12551 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12552 X86VectorVTInfo BcstVTI> 12553 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12554 let ExeDomain = VTI.ExeDomain in 12555 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12556 (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3), 12557 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12558 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12559 (OpNode (VTI.VT VTI.RC:$src1), 12560 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12561 (i8 timm:$src3))>, EVEX_B, 12562 Sched<[sched.Folded, sched.ReadAfterFold]>; 12563} 12564 12565multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12566 X86SchedWriteWidths sched> { 12567 let Predicates = [HasGFNI, HasAVX512] in 12568 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12569 v64i8_info, v8i64_info>, EVEX_V512; 12570 let Predicates = [HasGFNI, HasVLX] in { 12571 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12572 v32i8x_info, v4i64x_info>, EVEX_V256; 12573 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12574 v16i8x_info, v2i64x_info>, EVEX_V128; 12575 } 12576} 12577 12578defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12579 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12580 EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W, AVX512AIi8Base; 12581defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12582 X86GF2P8affineqb, SchedWriteVecIMul>, 12583 EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W, AVX512AIi8Base; 12584 12585 12586//===----------------------------------------------------------------------===// 12587// AVX5124FMAPS 12588//===----------------------------------------------------------------------===// 12589 12590let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12591 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12592defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12593 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12594 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12595 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12596 Sched<[SchedWriteFMA.ZMM.Folded]>; 12597 12598defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12599 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12600 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12601 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12602 Sched<[SchedWriteFMA.ZMM.Folded]>; 12603 12604defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12605 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12606 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12607 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12608 Sched<[SchedWriteFMA.Scl.Folded]>; 12609 12610defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12611 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12612 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12613 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12614 Sched<[SchedWriteFMA.Scl.Folded]>; 12615} 12616 12617//===----------------------------------------------------------------------===// 12618// AVX5124VNNIW 12619//===----------------------------------------------------------------------===// 12620 12621let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12622 Constraints = "$src1 = $dst" in { 12623defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12624 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12625 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12626 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12627 Sched<[SchedWriteFMA.ZMM.Folded]>; 12628 12629defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12630 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12631 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12632 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12633 Sched<[SchedWriteFMA.ZMM.Folded]>; 12634} 12635 12636let hasSideEffects = 0 in { 12637 let mayStore = 1, SchedRW = [WriteFStoreX] in 12638 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12639 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12640 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12641} 12642 12643//===----------------------------------------------------------------------===// 12644// VP2INTERSECT 12645//===----------------------------------------------------------------------===// 12646 12647multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12648 def rr : I<0x68, MRMSrcReg, 12649 (outs _.KRPC:$dst), 12650 (ins _.RC:$src1, _.RC:$src2), 12651 !strconcat("vp2intersect", _.Suffix, 12652 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12653 [(set _.KRPC:$dst, (X86vp2intersect 12654 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12655 EVEX, VVVV, T8, XD, Sched<[sched]>; 12656 12657 def rm : I<0x68, MRMSrcMem, 12658 (outs _.KRPC:$dst), 12659 (ins _.RC:$src1, _.MemOp:$src2), 12660 !strconcat("vp2intersect", _.Suffix, 12661 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12662 [(set _.KRPC:$dst, (X86vp2intersect 12663 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12664 EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>, 12665 Sched<[sched.Folded, sched.ReadAfterFold]>; 12666 12667 def rmb : I<0x68, MRMSrcMem, 12668 (outs _.KRPC:$dst), 12669 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12670 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12671 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12672 [(set _.KRPC:$dst, (X86vp2intersect 12673 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12674 EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12675 Sched<[sched.Folded, sched.ReadAfterFold]>; 12676} 12677 12678multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12679 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12680 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12681 12682 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12683 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12684 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12685 } 12686} 12687 12688let ExeDomain = SSEPackedInt in { 12689defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12690defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W; 12691} 12692 12693let ExeDomain = SSEPackedSingle in 12694defm VCVTNE2PS2BF16 : avx512_binop_all<0x72, "vcvtne2ps2bf16", 12695 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12696 avx512vl_f32_info, avx512vl_bf16_info, 12697 X86vfpround2, [HasBF16], [HasVLX, HasBF16]>, T8, XD, 12698 EVEX_CD8<32, CD8VF>; 12699 12700// Truncate Float to BFloat16, Float16 to BF8/HF8[,S] 12701multiclass avx512_cvt_trunc_ne<bits<8> opc, string OpcodeStr, 12702 AVX512VLVectorVTInfo vt_dst, 12703 AVX512VLVectorVTInfo vt_src, 12704 X86SchedWriteWidths sched, 12705 SDPatternOperator OpNode, 12706 SDPatternOperator MaskOpNode, 12707 list<Predicate> prds, list<Predicate> prds512, 12708 PatFrag bcast128 = vt_src.info128.BroadcastLdFrag, 12709 PatFrag loadVT128 = vt_src.info128.LdFrag, 12710 RegisterClass maskRC128 = vt_src.info128.KRCWM> { 12711 let ExeDomain = SSEPackedSingle in { 12712 let Predicates = prds512, Uses = []<Register>, mayRaiseFPException = 0 in { 12713 defm Z : avx512_vcvt_fp<opc, OpcodeStr, vt_dst.info256, vt_src.info512, 12714 OpNode, OpNode, sched.ZMM>, EVEX_V512; 12715 } 12716 let Predicates = prds in { 12717 let Uses = []<Register>, mayRaiseFPException = 0 in { 12718 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, vt_dst.info128, vt_src.info128, 12719 null_frag, null_frag, sched.XMM, vt_src.info128.BroadcastStr, "{x}", f128mem, 12720 vt_src.info128.KRCWM>, EVEX_V128; 12721 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, vt_dst.info128, vt_src.info256, 12722 OpNode, OpNode, 12723 sched.YMM, vt_src.info256.BroadcastStr, "{y}">, EVEX_V256; 12724 } 12725 } // Predicates = prds 12726 } // ExeDomain = SSEPackedSingle 12727 12728 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12729 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12730 VR128X:$src), 0>; 12731 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12732 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12733 f128mem:$src), 0, "intel">; 12734 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12735 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12736 VR256X:$src), 0>; 12737 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12738 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12739 f256mem:$src), 0, "intel">; 12740 12741 let Predicates = prds in { 12742 // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction 12743 // patterns have been disabled with null_frag. 12744 def : Pat<(vt_dst.info128.VT (OpNode (vt_src.info128.VT VR128X:$src))), 12745 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 12746 def : Pat<(MaskOpNode (vt_src.info128.VT VR128X:$src), (vt_dst.info128.VT VR128X:$src0), 12747 maskRC128:$mask), 12748 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 12749 def : Pat<(MaskOpNode (vt_src.info128.VT VR128X:$src), vt_dst.info128.ImmAllZerosV, 12750 maskRC128:$mask), 12751 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 12752 12753 def : Pat<(vt_dst.info128.VT (OpNode (loadVT128 addr:$src))), 12754 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 12755 def : Pat<(MaskOpNode (loadVT128 addr:$src), (vt_dst.info128.VT VR128X:$src0), 12756 maskRC128:$mask), 12757 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 12758 def : Pat<(MaskOpNode (loadVT128 addr:$src), vt_dst.info128.ImmAllZerosV, 12759 maskRC128:$mask), 12760 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 12761 12762 def : Pat<(vt_dst.info128.VT (OpNode (vt_src.info128.VT (bcast128 addr:$src)))), 12763 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 12764 def : Pat<(MaskOpNode (vt_src.info128.VT (bcast128 addr:$src)), 12765 (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), 12766 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 12767 def : Pat<(MaskOpNode (vt_src.info128.VT (bcast128 addr:$src)), 12768 vt_dst.info128.ImmAllZerosV, maskRC128:$mask), 12769 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 12770 } 12771} 12772 12773defm VCVTNEPS2BF16 : avx512_cvt_trunc_ne<0x72, "vcvtneps2bf16", 12774 avx512vl_bf16_info, avx512vl_f32_info, 12775 SchedWriteCvtPD2PS, X86cvtneps2bf16, 12776 X86mcvtneps2bf16, [HasBF16, HasVLX], 12777 [HasBF16]>, T8, XS, EVEX_CD8<32, CD8VF>; 12778 12779let Predicates = [HasBF16, HasVLX] in { 12780 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))), 12781 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12782 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))), 12783 (VCVTNEPS2BF16Z128rm addr:$src)>; 12784 12785 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))), 12786 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12787 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))), 12788 (VCVTNEPS2BF16Z256rm addr:$src)>; 12789 12790 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)), 12791 (VPBROADCASTWZ128rm addr:$src)>; 12792 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)), 12793 (VPBROADCASTWZ256rm addr:$src)>; 12794 12795 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12796 (VPBROADCASTWZ128rr VR128X:$src)>; 12797 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12798 (VPBROADCASTWZ256rr VR128X:$src)>; 12799 12800 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))), 12801 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12802 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), 12803 (VCVTNEPS2BF16Z256rm addr:$src)>; 12804 12805 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12806} 12807 12808let Predicates = [HasBF16] in { 12809 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)), 12810 (VPBROADCASTWZrm addr:$src)>; 12811 12812 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12813 (VPBROADCASTWZrr VR128X:$src)>; 12814 12815 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))), 12816 (VCVTNEPS2BF16Zrr VR512:$src)>; 12817 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))), 12818 (VCVTNEPS2BF16Zrm addr:$src)>; 12819 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12820} 12821 12822let Constraints = "$src1 = $dst" in { 12823multiclass avx512_dpf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12824 X86FoldableSchedWrite sched, 12825 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12826 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12827 (ins src_v.RC:$src2, src_v.RC:$src3), 12828 OpcodeStr, "$src3, $src2", "$src2, $src3", 12829 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12830 EVEX, VVVV, Sched<[sched]>; 12831 12832 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12833 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12834 OpcodeStr, "$src3, $src2", "$src2, $src3", 12835 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12836 (src_v.LdFrag addr:$src3)))>, EVEX, VVVV, 12837 Sched<[sched.Folded, sched.ReadAfterFold]>; 12838 12839 let mayLoad = 1, hasSideEffects = 0 in 12840 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12841 (ins src_v.RC:$src2, f32mem:$src3), 12842 OpcodeStr, 12843 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12844 !strconcat("$src2, ${src3}", _.BroadcastStr), 12845 (null_frag)>, 12846 EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 12847 12848} 12849} // Constraints = "$src1 = $dst" 12850 12851multiclass avx512_dpf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12852 AVX512VLVectorVTInfo _, list<Predicate> prds, 12853 list<Predicate> prds512> { 12854 let Predicates = prds512 in { 12855 defm Z : avx512_dpf16ps_rm<opc, OpcodeStr, OpNode, WriteFMAZ, 12856 avx512vl_f32_info.info512, _.info512>, EVEX_V512; 12857 } 12858 let Predicates = prds in { 12859 defm Z256 : avx512_dpf16ps_rm<opc, OpcodeStr, OpNode, WriteFMAY, 12860 v8f32x_info, _.info256>, EVEX_V256; 12861 defm Z128 : avx512_dpf16ps_rm<opc, OpcodeStr, OpNode, WriteFMAX, 12862 v4f32x_info, _.info128>, EVEX_V128; 12863 } 12864} 12865 12866let ExeDomain = SSEPackedSingle in 12867defm VDPBF16PS : avx512_dpf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, avx512vl_bf16_info, 12868 [HasVLX, HasBF16], [HasBF16]>, 12869 T8, XS, EVEX_CD8<32, CD8VF>; 12870 12871//===----------------------------------------------------------------------===// 12872// AVX512FP16 12873//===----------------------------------------------------------------------===// 12874 12875let Predicates = [HasFP16] in { 12876// Move word ( r/m16) to Packed word 12877def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 12878 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 12879def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 12880 "vmovw\t{$src, $dst|$dst, $src}", 12881 [(set VR128X:$dst, 12882 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 12883 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 12884 12885def : Pat<(f16 (bitconvert GR16:$src)), 12886 (f16 (COPY_TO_REGCLASS 12887 (VMOVW2SHrr 12888 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 12889 FR16X))>; 12890def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 12891 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12892def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 12893 (VMOVW2SHrr GR32:$src)>; 12894// FIXME: We should really find a way to improve these patterns. 12895def : Pat<(v8i32 (X86vzmovl 12896 (insert_subvector undef, 12897 (v4i32 (scalar_to_vector 12898 (and GR32:$src, 0xffff))), 12899 (iPTR 0)))), 12900 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12901def : Pat<(v16i32 (X86vzmovl 12902 (insert_subvector undef, 12903 (v4i32 (scalar_to_vector 12904 (and GR32:$src, 0xffff))), 12905 (iPTR 0)))), 12906 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12907 12908def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 12909 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12910 12911// AVX 128-bit movw instruction write zeros in the high 128-bit part. 12912def : Pat<(v8i16 (X86vzload16 addr:$src)), 12913 (VMOVWrm addr:$src)>; 12914def : Pat<(v16i16 (X86vzload16 addr:$src)), 12915 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12916 12917// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 12918def : Pat<(v32i16 (X86vzload16 addr:$src)), 12919 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12920 12921def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 12922 (VMOVWrm addr:$src)>; 12923def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 12924 (VMOVWrm addr:$src)>; 12925def : Pat<(v8i32 (X86vzmovl 12926 (insert_subvector undef, 12927 (v4i32 (scalar_to_vector 12928 (i32 (zextloadi16 addr:$src)))), 12929 (iPTR 0)))), 12930 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12931def : Pat<(v16i32 (X86vzmovl 12932 (insert_subvector undef, 12933 (v4i32 (scalar_to_vector 12934 (i32 (zextloadi16 addr:$src)))), 12935 (iPTR 0)))), 12936 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12937 12938// Move word from xmm register to r/m16 12939def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 12940 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>; 12941def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 12942 (ins i16mem:$dst, VR128X:$src), 12943 "vmovw\t{$src, $dst|$dst, $src}", 12944 [(store (i16 (extractelt (v8i16 VR128X:$src), 12945 (iPTR 0))), addr:$dst)]>, 12946 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 12947 12948def : Pat<(i16 (bitconvert FR16X:$src)), 12949 (i16 (EXTRACT_SUBREG 12950 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 12951 sub_16bit))>; 12952def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 12953 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 12954 12955// Allow "vmovw" to use GR64 12956let hasSideEffects = 0 in { 12957 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 12958 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 12959 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 12960 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>; 12961} 12962} 12963 12964// Convert 16-bit float to i16/u16 12965multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12966 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12967 AVX512VLVectorVTInfo _Dst, 12968 AVX512VLVectorVTInfo _Src, 12969 X86SchedWriteWidths sched> { 12970 let Predicates = [HasFP16] in { 12971 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12972 OpNode, MaskOpNode, sched.ZMM>, 12973 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 12974 OpNodeRnd, sched.ZMM>, EVEX_V512; 12975 } 12976 let Predicates = [HasFP16, HasVLX] in { 12977 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12978 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12979 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12980 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 12981 } 12982} 12983 12984// Convert 16-bit float to i16/u16 truncate 12985multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12986 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12987 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 12988 X86SchedWriteWidths sched> { 12989 let Predicates = [HasFP16] in { 12990 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12991 OpNode, MaskOpNode, sched.ZMM>, 12992 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 12993 OpNodeRnd, sched.ZMM>, EVEX_V512; 12994 } 12995 let Predicates = [HasFP16, HasVLX] in { 12996 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12997 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12998 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12999 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13000 } 13001} 13002 13003defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 13004 X86cvtp2UIntRnd, avx512vl_i16_info, 13005 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13006 T_MAP5, EVEX_CD8<16, CD8VF>; 13007defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 13008 X86VUintToFpRnd, avx512vl_f16_info, 13009 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13010 T_MAP5, XD, EVEX_CD8<16, CD8VF>; 13011defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 13012 X86cvttp2si, X86cvttp2siSAE, 13013 avx512vl_i16_info, avx512vl_f16_info, 13014 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>; 13015defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 13016 X86cvttp2ui, X86cvttp2uiSAE, 13017 avx512vl_i16_info, avx512vl_f16_info, 13018 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>; 13019defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 13020 X86cvtp2IntRnd, avx512vl_i16_info, 13021 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13022 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 13023defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 13024 X86VSintToFpRnd, avx512vl_f16_info, 13025 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13026 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 13027 13028// Convert Half to Signed/Unsigned Doubleword 13029multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13030 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13031 X86SchedWriteWidths sched> { 13032 let Predicates = [HasFP16] in { 13033 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13034 MaskOpNode, sched.ZMM>, 13035 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 13036 OpNodeRnd, sched.ZMM>, EVEX_V512; 13037 } 13038 let Predicates = [HasFP16, HasVLX] in { 13039 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13040 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13041 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13042 MaskOpNode, sched.YMM>, EVEX_V256; 13043 } 13044} 13045 13046// Convert Half to Signed/Unsigned Doubleword with truncation 13047multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13048 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13049 X86SchedWriteWidths sched> { 13050 let Predicates = [HasFP16] in { 13051 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13052 MaskOpNode, sched.ZMM>, 13053 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 13054 OpNodeRnd, sched.ZMM>, EVEX_V512; 13055 } 13056 let Predicates = [HasFP16, HasVLX] in { 13057 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13058 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13059 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13060 MaskOpNode, sched.YMM>, EVEX_V256; 13061 } 13062} 13063 13064 13065defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 13066 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13067 EVEX_CD8<16, CD8VH>; 13068defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 13069 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, 13070 EVEX_CD8<16, CD8VH>; 13071 13072defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 13073 X86cvttp2si, X86cvttp2siSAE, 13074 SchedWriteCvtPS2DQ>, T_MAP5, XS, 13075 EVEX_CD8<16, CD8VH>; 13076 13077defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 13078 X86cvttp2ui, X86cvttp2uiSAE, 13079 SchedWriteCvtPS2DQ>, T_MAP5, 13080 EVEX_CD8<16, CD8VH>; 13081 13082// Convert Half to Signed/Unsigned Quardword 13083multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13084 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13085 X86SchedWriteWidths sched> { 13086 let Predicates = [HasFP16] in { 13087 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13088 MaskOpNode, sched.ZMM>, 13089 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 13090 OpNodeRnd, sched.ZMM>, EVEX_V512; 13091 } 13092 let Predicates = [HasFP16, HasVLX] in { 13093 // Explicitly specified broadcast string, since we take only 2 elements 13094 // from v8f16x_info source 13095 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13096 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 13097 EVEX_V128; 13098 // Explicitly specified broadcast string, since we take only 4 elements 13099 // from v8f16x_info source 13100 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13101 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 13102 EVEX_V256; 13103 } 13104} 13105 13106// Convert Half to Signed/Unsigned Quardword with truncation 13107multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13108 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13109 X86SchedWriteWidths sched> { 13110 let Predicates = [HasFP16] in { 13111 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13112 MaskOpNode, sched.ZMM>, 13113 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13114 OpNodeRnd, sched.ZMM>, EVEX_V512; 13115 } 13116 let Predicates = [HasFP16, HasVLX] in { 13117 // Explicitly specified broadcast string, since we take only 2 elements 13118 // from v8f16x_info source 13119 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13120 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13121 // Explicitly specified broadcast string, since we take only 4 elements 13122 // from v8f16x_info source 13123 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13124 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13125 } 13126} 13127 13128defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13129 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13130 EVEX_CD8<16, CD8VQ>; 13131 13132defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13133 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13134 EVEX_CD8<16, CD8VQ>; 13135 13136defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13137 X86cvttp2si, X86cvttp2siSAE, 13138 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13139 EVEX_CD8<16, CD8VQ>; 13140 13141defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13142 X86cvttp2ui, X86cvttp2uiSAE, 13143 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13144 EVEX_CD8<16, CD8VQ>; 13145 13146// Convert Signed/Unsigned Quardword to Half 13147multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13148 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13149 X86SchedWriteWidths sched> { 13150 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13151 // 512 memory forms of these instructions in Asm Parcer. They have the same 13152 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13153 // due to the same reason. 13154 let Predicates = [HasFP16] in { 13155 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13156 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13157 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13158 OpNodeRnd, sched.ZMM>, EVEX_V512; 13159 } 13160 let Predicates = [HasFP16, HasVLX] in { 13161 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13162 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13163 i128mem, VK2WM>, EVEX_V128; 13164 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13165 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13166 i256mem, VK4WM>, EVEX_V256; 13167 } 13168 13169 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13170 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13171 VR128X:$src), 0, "att">; 13172 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13173 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13174 VK2WM:$mask, VR128X:$src), 0, "att">; 13175 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13176 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13177 VK2WM:$mask, VR128X:$src), 0, "att">; 13178 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13179 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13180 i64mem:$src), 0, "att">; 13181 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13182 "$dst {${mask}}, ${src}{1to2}}", 13183 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13184 VK2WM:$mask, i64mem:$src), 0, "att">; 13185 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13186 "$dst {${mask}} {z}, ${src}{1to2}}", 13187 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13188 VK2WM:$mask, i64mem:$src), 0, "att">; 13189 13190 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13191 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13192 VR256X:$src), 0, "att">; 13193 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13194 "$dst {${mask}}, $src}", 13195 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13196 VK4WM:$mask, VR256X:$src), 0, "att">; 13197 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13198 "$dst {${mask}} {z}, $src}", 13199 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13200 VK4WM:$mask, VR256X:$src), 0, "att">; 13201 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13202 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13203 i64mem:$src), 0, "att">; 13204 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13205 "$dst {${mask}}, ${src}{1to4}}", 13206 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13207 VK4WM:$mask, i64mem:$src), 0, "att">; 13208 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13209 "$dst {${mask}} {z}, ${src}{1to4}}", 13210 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13211 VK4WM:$mask, i64mem:$src), 0, "att">; 13212 13213 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13214 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13215 VR512:$src), 0, "att">; 13216 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13217 "$dst {${mask}}, $src}", 13218 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13219 VK8WM:$mask, VR512:$src), 0, "att">; 13220 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13221 "$dst {${mask}} {z}, $src}", 13222 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13223 VK8WM:$mask, VR512:$src), 0, "att">; 13224 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13225 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13226 i64mem:$src), 0, "att">; 13227 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13228 "$dst {${mask}}, ${src}{1to8}}", 13229 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13230 VK8WM:$mask, i64mem:$src), 0, "att">; 13231 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13232 "$dst {${mask}} {z}, ${src}{1to8}}", 13233 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13234 VK8WM:$mask, i64mem:$src), 0, "att">; 13235} 13236 13237defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13238 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, 13239 EVEX_CD8<64, CD8VF>; 13240 13241defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13242 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD, 13243 EVEX_CD8<64, CD8VF>; 13244 13245// Convert half to signed/unsigned int 32/64 13246defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13247 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13248 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13249defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13250 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13251 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13252defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13253 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13254 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13255defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13256 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13257 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13258 13259defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13260 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13261 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13262defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13263 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13264 "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13265defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13266 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13267 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13268defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13269 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13270 "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13271 13272let Predicates = [HasFP16] in { 13273 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13274 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13275 T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13276 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13277 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13278 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13279 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13280 v8f16x_info, i32mem, loadi32, 13281 "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13282 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13283 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13284 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13285 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13286 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13287 13288 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13289 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13290 13291 13292 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13293 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13294 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13295 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13296 13297 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13298 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13299 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13300 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13301 13302 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13303 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13304 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13305 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13306 13307 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13308 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13309 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13310 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13311 13312 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13313 // which produce unnecessary vmovsh instructions 13314 def : Pat<(v8f16 (X86Movsh 13315 (v8f16 VR128X:$dst), 13316 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13317 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13318 13319 def : Pat<(v8f16 (X86Movsh 13320 (v8f16 VR128X:$dst), 13321 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13322 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13323 13324 def : Pat<(v8f16 (X86Movsh 13325 (v8f16 VR128X:$dst), 13326 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13327 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13328 13329 def : Pat<(v8f16 (X86Movsh 13330 (v8f16 VR128X:$dst), 13331 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13332 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13333 13334 def : Pat<(v8f16 (X86Movsh 13335 (v8f16 VR128X:$dst), 13336 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13337 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13338 13339 def : Pat<(v8f16 (X86Movsh 13340 (v8f16 VR128X:$dst), 13341 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13342 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13343 13344 def : Pat<(v8f16 (X86Movsh 13345 (v8f16 VR128X:$dst), 13346 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13347 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13348 13349 def : Pat<(v8f16 (X86Movsh 13350 (v8f16 VR128X:$dst), 13351 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13352 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13353} // Predicates = [HasFP16] 13354 13355let Predicates = [HasFP16, HasVLX] in { 13356 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13357 // patterns have been disabled with null_frag. 13358 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13359 (VCVTQQ2PHZ256rr VR256X:$src)>; 13360 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13361 VK4WM:$mask), 13362 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13363 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13364 VK4WM:$mask), 13365 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13366 13367 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13368 (VCVTQQ2PHZ256rm addr:$src)>; 13369 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13370 VK4WM:$mask), 13371 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13372 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13373 VK4WM:$mask), 13374 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13375 13376 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13377 (VCVTQQ2PHZ256rmb addr:$src)>; 13378 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13379 (v8f16 VR128X:$src0), VK4WM:$mask), 13380 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13381 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13382 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13383 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13384 13385 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13386 (VCVTQQ2PHZ128rr VR128X:$src)>; 13387 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13388 VK2WM:$mask), 13389 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13390 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13391 VK2WM:$mask), 13392 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13393 13394 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13395 (VCVTQQ2PHZ128rm addr:$src)>; 13396 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13397 VK2WM:$mask), 13398 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13399 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13400 VK2WM:$mask), 13401 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13402 13403 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13404 (VCVTQQ2PHZ128rmb addr:$src)>; 13405 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13406 (v8f16 VR128X:$src0), VK2WM:$mask), 13407 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13408 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13409 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13410 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13411 13412 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13413 // patterns have been disabled with null_frag. 13414 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13415 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13416 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13417 VK4WM:$mask), 13418 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13419 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13420 VK4WM:$mask), 13421 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13422 13423 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13424 (VCVTUQQ2PHZ256rm addr:$src)>; 13425 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13426 VK4WM:$mask), 13427 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13428 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13429 VK4WM:$mask), 13430 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13431 13432 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13433 (VCVTUQQ2PHZ256rmb addr:$src)>; 13434 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13435 (v8f16 VR128X:$src0), VK4WM:$mask), 13436 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13437 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13438 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13439 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13440 13441 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13442 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13443 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13444 VK2WM:$mask), 13445 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13446 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13447 VK2WM:$mask), 13448 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13449 13450 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13451 (VCVTUQQ2PHZ128rm addr:$src)>; 13452 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13453 VK2WM:$mask), 13454 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13455 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13456 VK2WM:$mask), 13457 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13458 13459 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13460 (VCVTUQQ2PHZ128rmb addr:$src)>; 13461 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13462 (v8f16 VR128X:$src0), VK2WM:$mask), 13463 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13464 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13465 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13466 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13467} 13468 13469let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13470 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13471 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13472 (ins _.RC:$src2, _.RC:$src3), 13473 OpcodeStr, "$src3, $src2", "$src2, $src3", 13474 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV; 13475 13476 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13477 (ins _.RC:$src2, _.MemOp:$src3), 13478 OpcodeStr, "$src3, $src2", "$src2, $src3", 13479 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV; 13480 13481 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13482 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13483 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13484 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV; 13485 } 13486} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13487 13488multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13489 X86VectorVTInfo _> { 13490 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13491 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13492 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13493 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13494 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13495 EVEX, VVVV, EVEX_B, EVEX_RC; 13496} 13497 13498 13499multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13500 let Predicates = [HasFP16] in { 13501 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13502 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13503 EVEX_V512, Sched<[WriteFMAZ]>; 13504 } 13505 let Predicates = [HasVLX, HasFP16] in { 13506 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13507 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13508 } 13509} 13510 13511multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13512 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13513 let Predicates = [HasFP16] in { 13514 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13515 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13516 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13517 "", "@earlyclobber $dst">, EVEX_V512; 13518 } 13519 let Predicates = [HasVLX, HasFP16] in { 13520 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13521 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13522 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13523 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13524 } 13525} 13526 13527 13528let Uses = [MXCSR] in { 13529 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13530 T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13531 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13532 T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13533 13534 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13535 x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13536 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13537 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13538} 13539 13540 13541multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13542 bit IsCommutable> { 13543 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13544 defm r : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13545 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13546 "$src3, $src2", "$src2, $src3", 13547 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13548 Sched<[WriteFMAX]>; 13549 defm m : AVX512_maskable_3src_scalar<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13550 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13551 "$src3, $src2", "$src2, $src3", 13552 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13553 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13554 defm rb : AVX512_maskable_3src_scalar<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13555 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13556 "$rc, $src3, $src2", "$src2, $src3, $rc", 13557 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13558 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13559 } 13560} 13561 13562multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13563 SDNode OpNodeRnd, bit IsCommutable> { 13564 let Predicates = [HasFP16] in { 13565 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13566 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13567 "$src2, $src1", "$src1, $src2", 13568 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13569 IsCommutable, IsCommutable, IsCommutable, 13570 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13571 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13572 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13573 "$src2, $src1", "$src1, $src2", 13574 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13575 0, 0, 0, X86selects, "@earlyclobber $dst">, 13576 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13577 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13578 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13579 "$rc, $src2, $src1", "$src1, $src2, $rc", 13580 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13581 0, 0, 0, X86selects, "@earlyclobber $dst">, 13582 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13583 } 13584} 13585 13586let Uses = [MXCSR] in { 13587 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13588 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13589 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13590 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13591 13592 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13593 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13594 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13595 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13596} 13597