10b57cec5SDimitry Andric//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric// 90b57cec5SDimitry Andric// This file describes the various vector pseudo instructions used by the 100b57cec5SDimitry Andric// compiler, as well as Pat patterns used during instruction selection. 110b57cec5SDimitry Andric// 120b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 150b57cec5SDimitry Andric// Non-instruction patterns 160b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 170b57cec5SDimitry Andric 180b57cec5SDimitry Andriclet Predicates = [NoAVX512] in { 190b57cec5SDimitry Andric // A vector extract of the first f32/f64 position is a subregister copy 2081ad6265SDimitry Andric def : Pat<(f16 (extractelt (v8f16 VR128:$src), (iPTR 0))), 2181ad6265SDimitry Andric (COPY_TO_REGCLASS (v8f16 VR128:$src), FR16)>; 220b57cec5SDimitry Andric def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), 230b57cec5SDimitry Andric (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; 240b57cec5SDimitry Andric def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), 250b57cec5SDimitry Andric (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; 260b57cec5SDimitry Andric} 270b57cec5SDimitry Andric 280b57cec5SDimitry Andriclet Predicates = [HasAVX512] in { 290b57cec5SDimitry Andric // A vector extract of the first f32/f64 position is a subregister copy 30349cc55cSDimitry Andric def : Pat<(f16 (extractelt (v8f16 VR128X:$src), (iPTR 0))), 31349cc55cSDimitry Andric (COPY_TO_REGCLASS (v8f16 VR128X:$src), FR16X)>; 320b57cec5SDimitry Andric def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))), 330b57cec5SDimitry Andric (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>; 340b57cec5SDimitry Andric def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))), 350b57cec5SDimitry Andric (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X)>; 360b57cec5SDimitry Andric} 370b57cec5SDimitry Andric 380b57cec5SDimitry Andriclet Predicates = [NoVLX] in { 3981ad6265SDimitry Andric def : Pat<(v8f16 (scalar_to_vector FR16:$src)), 4081ad6265SDimitry Andric (COPY_TO_REGCLASS FR16:$src, VR128)>; 410b57cec5SDimitry Andric // Implicitly promote a 32-bit scalar to a vector. 420b57cec5SDimitry Andric def : Pat<(v4f32 (scalar_to_vector FR32:$src)), 430b57cec5SDimitry Andric (COPY_TO_REGCLASS FR32:$src, VR128)>; 440b57cec5SDimitry Andric // Implicitly promote a 64-bit scalar to a vector. 450b57cec5SDimitry Andric def : Pat<(v2f64 (scalar_to_vector FR64:$src)), 460b57cec5SDimitry Andric (COPY_TO_REGCLASS FR64:$src, VR128)>; 470b57cec5SDimitry Andric} 480b57cec5SDimitry Andric 490b57cec5SDimitry Andriclet Predicates = [HasVLX] in { 50349cc55cSDimitry Andric def : Pat<(v8f16 (scalar_to_vector FR16X:$src)), 51349cc55cSDimitry Andric (COPY_TO_REGCLASS FR16X:$src, VR128X)>; 520b57cec5SDimitry Andric // Implicitly promote a 32-bit scalar to a vector. 530b57cec5SDimitry Andric def : Pat<(v4f32 (scalar_to_vector FR32X:$src)), 540b57cec5SDimitry Andric (COPY_TO_REGCLASS FR32X:$src, VR128X)>; 550b57cec5SDimitry Andric // Implicitly promote a 64-bit scalar to a vector. 560b57cec5SDimitry Andric def : Pat<(v2f64 (scalar_to_vector FR64X:$src)), 570b57cec5SDimitry Andric (COPY_TO_REGCLASS FR64X:$src, VR128X)>; 580b57cec5SDimitry Andric} 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 610b57cec5SDimitry Andric// Subvector tricks 620b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric// Patterns for insert_subvector/extract_subvector to/from index=0 650b57cec5SDimitry Andricmulticlass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT, 660b57cec5SDimitry Andric RegisterClass RC, ValueType VT, 670b57cec5SDimitry Andric SubRegIndex subIdx> { 680b57cec5SDimitry Andric def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 690b57cec5SDimitry Andric (subVT (EXTRACT_SUBREG RC:$src, subIdx))>; 700b57cec5SDimitry Andric 71bdd1243dSDimitry Andric def : Pat<(VT (insert_subvector undef_or_freeze_undef, subRC:$src, (iPTR 0))), 720b57cec5SDimitry Andric (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>; 730b57cec5SDimitry Andric} 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric// A 128-bit subvector extract from the first 256-bit vector position is a 760b57cec5SDimitry Andric// subregister copy that needs no instruction. Likewise, a 128-bit subvector 770b57cec5SDimitry Andric// insert to the first 256-bit vector position is a subregister copy that needs 780b57cec5SDimitry Andric// no instruction. 790b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32, sub_xmm>; 800b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32, sub_xmm>; 810b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>; 820b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>; 830b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>; 840b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>; 85349cc55cSDimitry Andricdefm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>; 86*439352acSDimitry Andricdefm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>; 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric// A 128-bit subvector extract from the first 512-bit vector position is a 890b57cec5SDimitry Andric// subregister copy that needs no instruction. Likewise, a 128-bit subvector 900b57cec5SDimitry Andric// insert to the first 512-bit vector position is a subregister copy that needs 910b57cec5SDimitry Andric// no instruction. 920b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>; 930b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>; 940b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>; 950b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>; 960b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>; 970b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>; 98349cc55cSDimitry Andricdefm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>; 99*439352acSDimitry Andricdefm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric// A 128-bit subvector extract from the first 512-bit vector position is a 1020b57cec5SDimitry Andric// subregister copy that needs no instruction. Likewise, a 128-bit subvector 1030b57cec5SDimitry Andric// insert to the first 512-bit vector position is a subregister copy that needs 1040b57cec5SDimitry Andric// no instruction. 1050b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR256, v8i32, VR512, v16i32, sub_ymm>; 1060b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR256, v8f32, VR512, v16f32, sub_ymm>; 1070b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>; 1080b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>; 1090b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>; 1100b57cec5SDimitry Andricdefm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>; 111349cc55cSDimitry Andricdefm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>; 112*439352acSDimitry Andricdefm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>; 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric// If we're inserting into an all zeros vector, just use a plain move which 1160b57cec5SDimitry Andric// will zero the upper bits. A post-isel hook will take care of removing 1170b57cec5SDimitry Andric// any moves that we can prove are unnecessary. 1180b57cec5SDimitry Andricmulticlass subvec_zero_lowering<string MoveStr, 1190b57cec5SDimitry Andric RegisterClass RC, ValueType DstTy, 120349cc55cSDimitry Andric ValueType SrcTy, SubRegIndex SubIdx> { 1210b57cec5SDimitry Andric def : Pat<(DstTy (insert_subvector immAllZerosV, 1220b57cec5SDimitry Andric (SrcTy RC:$src), (iPTR 0))), 1230b57cec5SDimitry Andric (SUBREG_TO_REG (i64 0), 1240b57cec5SDimitry Andric (SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>; 1250b57cec5SDimitry Andric} 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 128349cc55cSDimitry Andric defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, sub_xmm>; 129349cc55cSDimitry Andric defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, sub_xmm>; 130349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, sub_xmm>; 131349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, sub_xmm>; 132349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, sub_xmm>; 133349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>; 1340b57cec5SDimitry Andric} 1350b57cec5SDimitry Andric 136647cbc5dSDimitry Andriclet Predicates = [HasAVXNECONVERT, NoVLX] in 137647cbc5dSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v16bf16, v8bf16, sub_xmm>; 138647cbc5dSDimitry Andric 1390b57cec5SDimitry Andriclet Predicates = [HasVLX] in { 140349cc55cSDimitry Andric defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>; 141349cc55cSDimitry Andric defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>; 142349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, sub_xmm>; 143349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, sub_xmm>; 144349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, sub_xmm>; 145349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, sub_xmm>; 1460b57cec5SDimitry Andric 147349cc55cSDimitry Andric defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, sub_xmm>; 148349cc55cSDimitry Andric defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, sub_xmm>; 149349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, sub_xmm>; 150349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, sub_xmm>; 151349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, sub_xmm>; 152349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, sub_xmm>; 1530b57cec5SDimitry Andric 154349cc55cSDimitry Andric defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, sub_ymm>; 155349cc55cSDimitry Andric defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, sub_ymm>; 156349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, sub_ymm>; 157349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, sub_ymm>; 158349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, sub_ymm>; 159349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, sub_ymm>; 1600b57cec5SDimitry Andric} 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andriclet Predicates = [HasAVX512, NoVLX] in { 163349cc55cSDimitry Andric defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, sub_xmm>; 164349cc55cSDimitry Andric defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, sub_xmm>; 165349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, sub_xmm>; 166349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, sub_xmm>; 167349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, sub_xmm>; 168349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, sub_xmm>; 1690b57cec5SDimitry Andric 170349cc55cSDimitry Andric defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, sub_ymm>; 171349cc55cSDimitry Andric defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, sub_ymm>; 172349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, sub_ymm>; 173349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, sub_ymm>; 174349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, sub_ymm>; 175349cc55cSDimitry Andric defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, sub_ymm>; 176349cc55cSDimitry Andric} 177349cc55cSDimitry Andric 178349cc55cSDimitry Andriclet Predicates = [HasFP16, HasVLX] in { 179349cc55cSDimitry Andric defm : subvec_zero_lowering<"APSZ128", VR128X, v16f16, v8f16, sub_xmm>; 180349cc55cSDimitry Andric defm : subvec_zero_lowering<"APSZ128", VR128X, v32f16, v8f16, sub_xmm>; 181349cc55cSDimitry Andric defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>; 1820b57cec5SDimitry Andric} 1830b57cec5SDimitry Andric 184647cbc5dSDimitry Andriclet Predicates = [HasBF16, HasVLX] in { 185647cbc5dSDimitry Andric defm : subvec_zero_lowering<"APSZ128", VR128X, v16bf16, v8bf16, sub_xmm>; 186647cbc5dSDimitry Andric defm : subvec_zero_lowering<"APSZ128", VR128X, v32bf16, v8bf16, sub_xmm>; 187647cbc5dSDimitry Andric defm : subvec_zero_lowering<"APSZ256", VR256X, v32bf16, v16bf16, sub_ymm>; 188647cbc5dSDimitry Andric} 189647cbc5dSDimitry Andric 1900b57cec5SDimitry Andricclass maskzeroupper<ValueType vt, RegisterClass RC> : 1910b57cec5SDimitry Andric PatLeaf<(vt RC:$src), [{ 1920b57cec5SDimitry Andric return isMaskZeroExtended(N); 1930b57cec5SDimitry Andric }]>; 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andricdef maskzeroupperv1i1 : maskzeroupper<v1i1, VK1>; 1960b57cec5SDimitry Andricdef maskzeroupperv2i1 : maskzeroupper<v2i1, VK2>; 1970b57cec5SDimitry Andricdef maskzeroupperv4i1 : maskzeroupper<v4i1, VK4>; 1980b57cec5SDimitry Andricdef maskzeroupperv8i1 : maskzeroupper<v8i1, VK8>; 1990b57cec5SDimitry Andricdef maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>; 2000b57cec5SDimitry Andricdef maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>; 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric// The patterns determine if we can depend on the upper bits of a mask register 2030b57cec5SDimitry Andric// being zeroed by the previous operation so that we can skip explicit 2040b57cec5SDimitry Andric// zeroing. 2050b57cec5SDimitry Andriclet Predicates = [HasBWI] in { 2060b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 2070b57cec5SDimitry Andric maskzeroupperv1i1:$src, (iPTR 0))), 2080b57cec5SDimitry Andric (COPY_TO_REGCLASS VK1:$src, VK32)>; 2090b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 2100b57cec5SDimitry Andric maskzeroupperv8i1:$src, (iPTR 0))), 2110b57cec5SDimitry Andric (COPY_TO_REGCLASS VK8:$src, VK32)>; 2120b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 2130b57cec5SDimitry Andric maskzeroupperv16i1:$src, (iPTR 0))), 2140b57cec5SDimitry Andric (COPY_TO_REGCLASS VK16:$src, VK32)>; 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 2170b57cec5SDimitry Andric maskzeroupperv1i1:$src, (iPTR 0))), 2180b57cec5SDimitry Andric (COPY_TO_REGCLASS VK1:$src, VK64)>; 2190b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 2200b57cec5SDimitry Andric maskzeroupperv8i1:$src, (iPTR 0))), 2210b57cec5SDimitry Andric (COPY_TO_REGCLASS VK8:$src, VK64)>; 2220b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 2230b57cec5SDimitry Andric maskzeroupperv16i1:$src, (iPTR 0))), 2240b57cec5SDimitry Andric (COPY_TO_REGCLASS VK16:$src, VK64)>; 2250b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 2260b57cec5SDimitry Andric maskzeroupperv32i1:$src, (iPTR 0))), 2270b57cec5SDimitry Andric (COPY_TO_REGCLASS VK32:$src, VK64)>; 2280b57cec5SDimitry Andric} 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andriclet Predicates = [HasAVX512] in { 2310b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2320b57cec5SDimitry Andric maskzeroupperv1i1:$src, (iPTR 0))), 2330b57cec5SDimitry Andric (COPY_TO_REGCLASS VK1:$src, VK16)>; 2340b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2350b57cec5SDimitry Andric maskzeroupperv8i1:$src, (iPTR 0))), 2360b57cec5SDimitry Andric (COPY_TO_REGCLASS VK8:$src, VK16)>; 2370b57cec5SDimitry Andric} 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andriclet Predicates = [HasDQI] in { 2400b57cec5SDimitry Andric def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 2410b57cec5SDimitry Andric maskzeroupperv1i1:$src, (iPTR 0))), 2420b57cec5SDimitry Andric (COPY_TO_REGCLASS VK1:$src, VK8)>; 2430b57cec5SDimitry Andric} 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andriclet Predicates = [HasVLX, HasDQI] in { 2460b57cec5SDimitry Andric def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 2470b57cec5SDimitry Andric maskzeroupperv2i1:$src, (iPTR 0))), 2480b57cec5SDimitry Andric (COPY_TO_REGCLASS VK2:$src, VK8)>; 2490b57cec5SDimitry Andric def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 2500b57cec5SDimitry Andric maskzeroupperv4i1:$src, (iPTR 0))), 2510b57cec5SDimitry Andric (COPY_TO_REGCLASS VK4:$src, VK8)>; 2520b57cec5SDimitry Andric} 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andriclet Predicates = [HasVLX] in { 2550b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2560b57cec5SDimitry Andric maskzeroupperv2i1:$src, (iPTR 0))), 2570b57cec5SDimitry Andric (COPY_TO_REGCLASS VK2:$src, VK16)>; 2580b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2590b57cec5SDimitry Andric maskzeroupperv4i1:$src, (iPTR 0))), 2600b57cec5SDimitry Andric (COPY_TO_REGCLASS VK4:$src, VK16)>; 2610b57cec5SDimitry Andric} 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andriclet Predicates = [HasBWI, HasVLX] in { 2640b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 2650b57cec5SDimitry Andric maskzeroupperv2i1:$src, (iPTR 0))), 2660b57cec5SDimitry Andric (COPY_TO_REGCLASS VK2:$src, VK32)>; 2670b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 2680b57cec5SDimitry Andric maskzeroupperv4i1:$src, (iPTR 0))), 2690b57cec5SDimitry Andric (COPY_TO_REGCLASS VK4:$src, VK32)>; 2700b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 2710b57cec5SDimitry Andric maskzeroupperv2i1:$src, (iPTR 0))), 2720b57cec5SDimitry Andric (COPY_TO_REGCLASS VK2:$src, VK64)>; 2730b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 2740b57cec5SDimitry Andric maskzeroupperv4i1:$src, (iPTR 0))), 2750b57cec5SDimitry Andric (COPY_TO_REGCLASS VK4:$src, VK64)>; 2760b57cec5SDimitry Andric} 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric// If the bits are not zero we have to fall back to explicitly zeroing by 2790b57cec5SDimitry Andric// using shifts. 2800b57cec5SDimitry Andriclet Predicates = [HasAVX512] in { 2810b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2820b57cec5SDimitry Andric (v1i1 VK1:$mask), (iPTR 0))), 2830b57cec5SDimitry Andric (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16), 2840b57cec5SDimitry Andric (i8 15)), (i8 15))>; 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2870b57cec5SDimitry Andric (v2i1 VK2:$mask), (iPTR 0))), 2880b57cec5SDimitry Andric (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), 2890b57cec5SDimitry Andric (i8 14)), (i8 14))>; 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2920b57cec5SDimitry Andric (v4i1 VK4:$mask), (iPTR 0))), 2930b57cec5SDimitry Andric (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), 2940b57cec5SDimitry Andric (i8 12)), (i8 12))>; 2950b57cec5SDimitry Andric} 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andriclet Predicates = [HasAVX512, NoDQI] in { 2980b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 2990b57cec5SDimitry Andric (v8i1 VK8:$mask), (iPTR 0))), 3000b57cec5SDimitry Andric (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16), 3010b57cec5SDimitry Andric (i8 8)), (i8 8))>; 3020b57cec5SDimitry Andric} 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andriclet Predicates = [HasDQI] in { 3050b57cec5SDimitry Andric def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), 3060b57cec5SDimitry Andric (v8i1 VK8:$mask), (iPTR 0))), 3070b57cec5SDimitry Andric (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>; 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 3100b57cec5SDimitry Andric (v1i1 VK1:$mask), (iPTR 0))), 3110b57cec5SDimitry Andric (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8), 3120b57cec5SDimitry Andric (i8 7)), (i8 7))>; 3130b57cec5SDimitry Andric def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 3140b57cec5SDimitry Andric (v2i1 VK2:$mask), (iPTR 0))), 3150b57cec5SDimitry Andric (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8), 3160b57cec5SDimitry Andric (i8 6)), (i8 6))>; 3170b57cec5SDimitry Andric def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), 3180b57cec5SDimitry Andric (v4i1 VK4:$mask), (iPTR 0))), 3190b57cec5SDimitry Andric (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8), 3200b57cec5SDimitry Andric (i8 4)), (i8 4))>; 3210b57cec5SDimitry Andric} 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andriclet Predicates = [HasBWI] in { 3240b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 3250b57cec5SDimitry Andric (v16i1 VK16:$mask), (iPTR 0))), 3260b57cec5SDimitry Andric (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>; 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 3290b57cec5SDimitry Andric (v16i1 VK16:$mask), (iPTR 0))), 3300b57cec5SDimitry Andric (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>; 3310b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 3320b57cec5SDimitry Andric (v32i1 VK32:$mask), (iPTR 0))), 3330b57cec5SDimitry Andric (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>; 3340b57cec5SDimitry Andric} 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andriclet Predicates = [HasBWI, NoDQI] in { 3370b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 3380b57cec5SDimitry Andric (v8i1 VK8:$mask), (iPTR 0))), 3390b57cec5SDimitry Andric (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32), 3400b57cec5SDimitry Andric (i8 24)), (i8 24))>; 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 3430b57cec5SDimitry Andric (v8i1 VK8:$mask), (iPTR 0))), 3440b57cec5SDimitry Andric (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64), 3450b57cec5SDimitry Andric (i8 56)), (i8 56))>; 3460b57cec5SDimitry Andric} 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andriclet Predicates = [HasBWI, HasDQI] in { 3490b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 3500b57cec5SDimitry Andric (v8i1 VK8:$mask), (iPTR 0))), 3510b57cec5SDimitry Andric (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>; 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 3540b57cec5SDimitry Andric (v8i1 VK8:$mask), (iPTR 0))), 3550b57cec5SDimitry Andric (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>; 3560b57cec5SDimitry Andric} 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andriclet Predicates = [HasBWI] in { 3590b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 3600b57cec5SDimitry Andric (v1i1 VK1:$mask), (iPTR 0))), 3610b57cec5SDimitry Andric (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32), 3620b57cec5SDimitry Andric (i8 31)), (i8 31))>; 3630b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 3640b57cec5SDimitry Andric (v2i1 VK2:$mask), (iPTR 0))), 3650b57cec5SDimitry Andric (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32), 3660b57cec5SDimitry Andric (i8 30)), (i8 30))>; 3670b57cec5SDimitry Andric def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), 3680b57cec5SDimitry Andric (v4i1 VK4:$mask), (iPTR 0))), 3690b57cec5SDimitry Andric (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32), 3700b57cec5SDimitry Andric (i8 28)), (i8 28))>; 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 3730b57cec5SDimitry Andric (v1i1 VK1:$mask), (iPTR 0))), 3740b57cec5SDimitry Andric (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64), 3750b57cec5SDimitry Andric (i8 63)), (i8 63))>; 3760b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 3770b57cec5SDimitry Andric (v2i1 VK2:$mask), (iPTR 0))), 3780b57cec5SDimitry Andric (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64), 3790b57cec5SDimitry Andric (i8 62)), (i8 62))>; 3800b57cec5SDimitry Andric def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), 3810b57cec5SDimitry Andric (v4i1 VK4:$mask), (iPTR 0))), 3820b57cec5SDimitry Andric (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64), 3830b57cec5SDimitry Andric (i8 60)), (i8 60))>; 3840b57cec5SDimitry Andric} 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 3870b57cec5SDimitry Andric// Extra selection patterns for f128, f128mem 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2. 3900b57cec5SDimitry Andriclet Predicates = [NoAVX] in { 3910b57cec5SDimitry Andricdef : Pat<(alignedstore (f128 VR128:$src), addr:$dst), 3920b57cec5SDimitry Andric (MOVAPSmr addr:$dst, VR128:$src)>; 3930b57cec5SDimitry Andricdef : Pat<(store (f128 VR128:$src), addr:$dst), 3940b57cec5SDimitry Andric (MOVUPSmr addr:$dst, VR128:$src)>; 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andricdef : Pat<(alignedloadf128 addr:$src), 3970b57cec5SDimitry Andric (MOVAPSrm addr:$src)>; 3980b57cec5SDimitry Andricdef : Pat<(loadf128 addr:$src), 3990b57cec5SDimitry Andric (MOVUPSrm addr:$src)>; 4000b57cec5SDimitry Andric} 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4030b57cec5SDimitry Andricdef : Pat<(alignedstore (f128 VR128:$src), addr:$dst), 4040b57cec5SDimitry Andric (VMOVAPSmr addr:$dst, VR128:$src)>; 4050b57cec5SDimitry Andricdef : Pat<(store (f128 VR128:$src), addr:$dst), 4060b57cec5SDimitry Andric (VMOVUPSmr addr:$dst, VR128:$src)>; 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andricdef : Pat<(alignedloadf128 addr:$src), 4090b57cec5SDimitry Andric (VMOVAPSrm addr:$src)>; 4100b57cec5SDimitry Andricdef : Pat<(loadf128 addr:$src), 4110b57cec5SDimitry Andric (VMOVUPSrm addr:$src)>; 4120b57cec5SDimitry Andric} 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andriclet Predicates = [HasVLX] in { 4150b57cec5SDimitry Andricdef : Pat<(alignedstore (f128 VR128X:$src), addr:$dst), 4160b57cec5SDimitry Andric (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 4170b57cec5SDimitry Andricdef : Pat<(store (f128 VR128X:$src), addr:$dst), 4180b57cec5SDimitry Andric (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andricdef : Pat<(alignedloadf128 addr:$src), 4210b57cec5SDimitry Andric (VMOVAPSZ128rm addr:$src)>; 4220b57cec5SDimitry Andricdef : Pat<(loadf128 addr:$src), 4230b57cec5SDimitry Andric (VMOVUPSZ128rm addr:$src)>; 4240b57cec5SDimitry Andric} 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andriclet Predicates = [UseSSE1] in { 4270b57cec5SDimitry Andric// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2 4280b57cec5SDimitry Andricdef : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))), 4290b57cec5SDimitry Andric (ANDPSrm VR128:$src1, f128mem:$src2)>; 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andricdef : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)), 4320b57cec5SDimitry Andric (ANDPSrr VR128:$src1, VR128:$src2)>; 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andricdef : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))), 4350b57cec5SDimitry Andric (ORPSrm VR128:$src1, f128mem:$src2)>; 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andricdef : Pat<(f128 (X86for VR128:$src1, VR128:$src2)), 4380b57cec5SDimitry Andric (ORPSrr VR128:$src1, VR128:$src2)>; 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andricdef : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))), 4410b57cec5SDimitry Andric (XORPSrm VR128:$src1, f128mem:$src2)>; 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andricdef : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)), 4440b57cec5SDimitry Andric (XORPSrr VR128:$src1, VR128:$src2)>; 4450b57cec5SDimitry Andric} 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andriclet Predicates = [HasAVX, NoVLX] in { 4480b57cec5SDimitry Andric// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2 4490b57cec5SDimitry Andricdef : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))), 4500b57cec5SDimitry Andric (VANDPSrm VR128:$src1, f128mem:$src2)>; 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andricdef : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)), 4530b57cec5SDimitry Andric (VANDPSrr VR128:$src1, VR128:$src2)>; 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andricdef : Pat<(f128 (X86for VR128:$src1, (loadf128 addr:$src2))), 4560b57cec5SDimitry Andric (VORPSrm VR128:$src1, f128mem:$src2)>; 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andricdef : Pat<(f128 (X86for VR128:$src1, VR128:$src2)), 4590b57cec5SDimitry Andric (VORPSrr VR128:$src1, VR128:$src2)>; 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andricdef : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))), 4620b57cec5SDimitry Andric (VXORPSrm VR128:$src1, f128mem:$src2)>; 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andricdef : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)), 4650b57cec5SDimitry Andric (VXORPSrr VR128:$src1, VR128:$src2)>; 4660b57cec5SDimitry Andric} 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andriclet Predicates = [HasVLX] in { 4690b57cec5SDimitry Andric// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2 4700b57cec5SDimitry Andricdef : Pat<(f128 (X86fand VR128X:$src1, (loadf128 addr:$src2))), 4710b57cec5SDimitry Andric (VANDPSZ128rm VR128X:$src1, f128mem:$src2)>; 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andricdef : Pat<(f128 (X86fand VR128X:$src1, VR128X:$src2)), 4740b57cec5SDimitry Andric (VANDPSZ128rr VR128X:$src1, VR128X:$src2)>; 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andricdef : Pat<(f128 (X86for VR128X:$src1, (loadf128 addr:$src2))), 4770b57cec5SDimitry Andric (VORPSZ128rm VR128X:$src1, f128mem:$src2)>; 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andricdef : Pat<(f128 (X86for VR128X:$src1, VR128X:$src2)), 4800b57cec5SDimitry Andric (VORPSZ128rr VR128X:$src1, VR128X:$src2)>; 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andricdef : Pat<(f128 (X86fxor VR128X:$src1, (loadf128 addr:$src2))), 4830b57cec5SDimitry Andric (VXORPSZ128rm VR128X:$src1, f128mem:$src2)>; 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andricdef : Pat<(f128 (X86fxor VR128X:$src1, VR128X:$src2)), 4860b57cec5SDimitry Andric (VXORPSZ128rr VR128X:$src1, VR128X:$src2)>; 4870b57cec5SDimitry Andric} 488