1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw,avx512vl,avx512f | FileCheck %s --check-prefix=AVX512 5 6; PR37427 - https://bugs.llvm.org/show_bug.cgi?id=37427 7 8define <8 x i32> @eq_zero(ptr %p, <8 x i32> %x, <8 x i32> %y) { 9; AVX1-LABEL: eq_zero: 10; AVX1: # %bb.0: 11; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 12; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 13; AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 14; AVX1-NEXT: vpmovsxbd %xmm2, %xmm3 15; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] 16; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 17; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 18; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 19; AVX1-NEXT: retq 20; 21; AVX2-LABEL: eq_zero: 22; AVX2: # %bb.0: 23; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 24; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 25; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 26; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 27; AVX2-NEXT: retq 28; 29; AVX512-LABEL: eq_zero: 30; AVX512: # %bb.0: 31; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 32; AVX512-NEXT: vptestnmb %xmm2, %xmm2, %k1 33; AVX512-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 34; AVX512-NEXT: retq 35 %load = load <8 x i8>, ptr %p 36 %cmp = icmp eq <8 x i8> %load, zeroinitializer 37 %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %y 38 ret <8 x i32> %sel 39} 40 41define <4 x i64> @ne_zero(ptr %p, <4 x i64> %x, <4 x i64> %y) { 42; AVX1-LABEL: ne_zero: 43; AVX1: # %bb.0: 44; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 45; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 46; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 47; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 48; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 49; AVX1-NEXT: vpmovsxwq %xmm2, %xmm3 50; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] 51; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 52; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 53; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 54; AVX1-NEXT: retq 55; 56; AVX2-LABEL: ne_zero: 57; AVX2: # %bb.0: 58; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 59; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 60; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 61; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 62; AVX2-NEXT: retq 63; 64; AVX512-LABEL: ne_zero: 65; AVX512: # %bb.0: 66; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 67; AVX512-NEXT: vptestmw %xmm2, %xmm2, %k1 68; AVX512-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} 69; AVX512-NEXT: retq 70 %load = load <4 x i16>, ptr %p 71 %cmp = icmp ne <4 x i16> %load, zeroinitializer 72 %sel = select <4 x i1> %cmp, <4 x i64> %x, <4 x i64> %y 73 ret <4 x i64> %sel 74} 75 76define <16 x i16> @sgt_zero(ptr %p, <16 x i16> %x, <16 x i16> %y) { 77; AVX1-LABEL: sgt_zero: 78; AVX1: # %bb.0: 79; AVX1-NEXT: vmovdqa (%rdi), %xmm2 80; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 81; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 82; AVX1-NEXT: vpmovsxbw %xmm2, %xmm3 83; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 84; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 85; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 86; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 87; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 88; AVX1-NEXT: retq 89; 90; AVX2-LABEL: sgt_zero: 91; AVX2: # %bb.0: 92; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2 93; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 94; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 95; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 96; AVX2-NEXT: retq 97; 98; AVX512-LABEL: sgt_zero: 99; AVX512: # %bb.0: 100; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 101; AVX512-NEXT: vpcmpltb (%rdi), %xmm2, %k1 102; AVX512-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} 103; AVX512-NEXT: retq 104 %load = load <16 x i8>, ptr %p 105 %cmp = icmp sgt <16 x i8> %load, zeroinitializer 106 %sel = select <16 x i1> %cmp, <16 x i16> %x, <16 x i16> %y 107 ret <16 x i16> %sel 108} 109 110define <8 x i32> @slt_zero(ptr %p, <8 x i32> %x, <8 x i32> %y) { 111; AVX1-LABEL: slt_zero: 112; AVX1: # %bb.0: 113; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 114; AVX1-NEXT: vpmovsxbd %xmm2, %xmm3 115; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] 116; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 117; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 118; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 119; AVX1-NEXT: retq 120; 121; AVX2-LABEL: slt_zero: 122; AVX2: # %bb.0: 123; AVX2-NEXT: vpmovsxbd (%rdi), %ymm2 124; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 125; AVX2-NEXT: retq 126; 127; AVX512-LABEL: slt_zero: 128; AVX512: # %bb.0: 129; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 130; AVX512-NEXT: vpmovb2m %xmm2, %k1 131; AVX512-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 132; AVX512-NEXT: retq 133 %load = load <8 x i8>, ptr %p 134 %cmp = icmp slt <8 x i8> %load, zeroinitializer 135 %sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %y 136 ret <8 x i32> %sel 137} 138 139define <4 x double> @eq_zero_fp_select(ptr %p, <4 x double> %x, <4 x double> %y) { 140; AVX1-LABEL: eq_zero_fp_select: 141; AVX1: # %bb.0: 142; AVX1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 143; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 144; AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 145; AVX1-NEXT: vpmovsxbq %xmm2, %xmm3 146; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 147; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 148; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 149; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 150; AVX1-NEXT: retq 151; 152; AVX2-LABEL: eq_zero_fp_select: 153; AVX2: # %bb.0: 154; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 155; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 156; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 157; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 158; AVX2-NEXT: retq 159; 160; AVX512-LABEL: eq_zero_fp_select: 161; AVX512: # %bb.0: 162; AVX512-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 163; AVX512-NEXT: vptestnmb %xmm2, %xmm2, %k1 164; AVX512-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 165; AVX512-NEXT: retq 166 %load = load <4 x i8>, ptr %p 167 %cmp = icmp eq <4 x i8> %load, zeroinitializer 168 %sel = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y 169 ret <4 x double> %sel 170} 171 172define <8 x float> @ne_zero_fp_select(ptr %p, <8 x float> %x, <8 x float> %y) { 173; AVX1-LABEL: ne_zero_fp_select: 174; AVX1: # %bb.0: 175; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 176; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 177; AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 178; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 179; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 180; AVX1-NEXT: vpmovsxbd %xmm2, %xmm3 181; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] 182; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 183; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 184; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 185; AVX1-NEXT: retq 186; 187; AVX2-LABEL: ne_zero_fp_select: 188; AVX2: # %bb.0: 189; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 190; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 191; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 192; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 193; AVX2-NEXT: retq 194; 195; AVX512-LABEL: ne_zero_fp_select: 196; AVX512: # %bb.0: 197; AVX512-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 198; AVX512-NEXT: vptestmb %xmm2, %xmm2, %k1 199; AVX512-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 200; AVX512-NEXT: retq 201 %load = load <8 x i8>, ptr %p 202 %cmp = icmp ne <8 x i8> %load, zeroinitializer 203 %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y 204 ret <8 x float> %sel 205} 206 207define <4 x double> @sgt_zero_fp_select(ptr %p, <4 x double> %x, <4 x double> %y) { 208; AVX1-LABEL: sgt_zero_fp_select: 209; AVX1: # %bb.0: 210; AVX1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 211; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 212; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 213; AVX1-NEXT: vpmovsxbq %xmm2, %xmm3 214; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 215; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 216; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 217; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 218; AVX1-NEXT: retq 219; 220; AVX2-LABEL: sgt_zero_fp_select: 221; AVX2: # %bb.0: 222; AVX2-NEXT: vpmovsxbq (%rdi), %ymm2 223; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 224; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 225; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 226; AVX2-NEXT: retq 227; 228; AVX512-LABEL: sgt_zero_fp_select: 229; AVX512: # %bb.0: 230; AVX512-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 231; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 232; AVX512-NEXT: vpcmpgtb %xmm3, %xmm2, %k1 233; AVX512-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 234; AVX512-NEXT: retq 235 %load = load <4 x i8>, ptr %p 236 %cmp = icmp sgt <4 x i8> %load, zeroinitializer 237 %sel = select <4 x i1> %cmp, <4 x double> %x, <4 x double> %y 238 ret <4 x double> %sel 239} 240 241define <8 x float> @slt_zero_fp_select(ptr %p, <8 x float> %x, <8 x float> %y) { 242; AVX1-LABEL: slt_zero_fp_select: 243; AVX1: # %bb.0: 244; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm2 245; AVX1-NEXT: vpmovsxwd (%rdi), %xmm3 246; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 247; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 248; AVX1-NEXT: retq 249; 250; AVX2-LABEL: slt_zero_fp_select: 251; AVX2: # %bb.0: 252; AVX2-NEXT: vpmovsxwd (%rdi), %ymm2 253; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 254; AVX2-NEXT: retq 255; 256; AVX512-LABEL: slt_zero_fp_select: 257; AVX512: # %bb.0: 258; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 259; AVX512-NEXT: vpcmpgtw (%rdi), %xmm2, %k1 260; AVX512-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} 261; AVX512-NEXT: retq 262 %load = load <8 x i16>, ptr %p 263 %cmp = icmp slt <8 x i16> %load, zeroinitializer 264 %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y 265 ret <8 x float> %sel 266} 267 268