1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 5 6declare void @use_v8i1(<8 x i1>) 7declare void @use_v8i8(<8 x i8>) 8 9define <8 x i16> @cmp_ne_load_const(ptr %x) nounwind { 10; SSE-LABEL: cmp_ne_load_const: 11; SSE: # %bb.0: 12; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 13; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 14; SSE-NEXT: pxor %xmm1, %xmm1 15; SSE-NEXT: pcmpeqb %xmm0, %xmm1 16; SSE-NEXT: pcmpeqd %xmm0, %xmm0 17; SSE-NEXT: pxor %xmm1, %xmm0 18; SSE-NEXT: retq 19; 20; AVX-LABEL: cmp_ne_load_const: 21; AVX: # %bb.0: 22; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 23; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 24; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 25; AVX-NEXT: retq 26 %loadx = load <8 x i8>, ptr %x 27 %icmp = icmp ne <8 x i8> %loadx, zeroinitializer 28 %sext = sext <8 x i1> %icmp to <8 x i16> 29 ret <8 x i16> %sext 30} 31 32; negative test - simple loads only 33 34define <8 x i16> @cmp_ne_load_const_volatile(ptr %x) nounwind { 35; SSE-LABEL: cmp_ne_load_const_volatile: 36; SSE: # %bb.0: 37; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 38; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 39; SSE-NEXT: pxor %xmm1, %xmm1 40; SSE-NEXT: pcmpeqb %xmm0, %xmm1 41; SSE-NEXT: pcmpeqd %xmm0, %xmm0 42; SSE-NEXT: pxor %xmm1, %xmm0 43; SSE-NEXT: retq 44; 45; AVX2-LABEL: cmp_ne_load_const_volatile: 46; AVX2: # %bb.0: 47; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 48; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 49; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 50; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 51; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 52; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 53; AVX2-NEXT: retq 54; 55; AVX512-LABEL: cmp_ne_load_const_volatile: 56; AVX512: # %bb.0: 57; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 58; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 59; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 60; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 61; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 62; AVX512-NEXT: vzeroupper 63; AVX512-NEXT: retq 64 %loadx = load volatile <8 x i8>, ptr %x 65 %icmp = icmp ne <8 x i8> %loadx, zeroinitializer 66 %sext = sext <8 x i1> %icmp to <8 x i16> 67 ret <8 x i16> %sext 68} 69 70; negative test - don't create extra load 71 72define <8 x i16> @cmp_ne_load_const_extra_use1(ptr %x) nounwind { 73; SSE-LABEL: cmp_ne_load_const_extra_use1: 74; SSE: # %bb.0: 75; SSE-NEXT: subq $24, %rsp 76; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 77; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 78; SSE-NEXT: callq use_v8i8@PLT 79; SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 80; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 81; SSE-NEXT: pxor %xmm1, %xmm1 82; SSE-NEXT: pcmpeqb %xmm0, %xmm1 83; SSE-NEXT: pcmpeqd %xmm0, %xmm0 84; SSE-NEXT: pxor %xmm1, %xmm0 85; SSE-NEXT: addq $24, %rsp 86; SSE-NEXT: retq 87; 88; AVX2-LABEL: cmp_ne_load_const_extra_use1: 89; AVX2: # %bb.0: 90; AVX2-NEXT: subq $24, %rsp 91; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 92; AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 93; AVX2-NEXT: callq use_v8i8@PLT 94; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 95; AVX2-NEXT: vpcmpeqb (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 96; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 97; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 98; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 99; AVX2-NEXT: addq $24, %rsp 100; AVX2-NEXT: retq 101; 102; AVX512-LABEL: cmp_ne_load_const_extra_use1: 103; AVX512: # %bb.0: 104; AVX512-NEXT: subq $24, %rsp 105; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 106; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 107; AVX512-NEXT: callq use_v8i8@PLT 108; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 109; AVX512-NEXT: vpcmpeqb (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 110; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 111; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 112; AVX512-NEXT: addq $24, %rsp 113; AVX512-NEXT: vzeroupper 114; AVX512-NEXT: retq 115 %loadx = load <8 x i8>, ptr %x 116 call void @use_v8i8(<8 x i8> %loadx) 117 %icmp = icmp ne <8 x i8> %loadx, zeroinitializer 118 %sext = sext <8 x i1> %icmp to <8 x i16> 119 ret <8 x i16> %sext 120} 121 122; negative test - don't create extra compare 123 124define <8 x i16> @cmp_ne_load_const_extra_use2(ptr %x) nounwind { 125; SSE-LABEL: cmp_ne_load_const_extra_use2: 126; SSE: # %bb.0: 127; SSE-NEXT: subq $24, %rsp 128; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 129; SSE-NEXT: pxor %xmm1, %xmm1 130; SSE-NEXT: pcmpeqb %xmm0, %xmm1 131; SSE-NEXT: pcmpeqd %xmm0, %xmm0 132; SSE-NEXT: pxor %xmm1, %xmm0 133; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 134; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 135; SSE-NEXT: callq use_v8i1@PLT 136; SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 137; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 138; SSE-NEXT: addq $24, %rsp 139; SSE-NEXT: retq 140; 141; AVX2-LABEL: cmp_ne_load_const_extra_use2: 142; AVX2: # %bb.0: 143; AVX2-NEXT: subq $24, %rsp 144; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 145; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 146; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 147; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 148; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 149; AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill 150; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 151; AVX2-NEXT: callq use_v8i1@PLT 152; AVX2-NEXT: vpmovsxbw (%rsp), %xmm0 # 16-byte Folded Reload 153; AVX2-NEXT: addq $24, %rsp 154; AVX2-NEXT: retq 155; 156; AVX512-LABEL: cmp_ne_load_const_extra_use2: 157; AVX512: # %bb.0: 158; AVX512-NEXT: subq $72, %rsp 159; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 160; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 161; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 162; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 163; AVX512-NEXT: vmovdqu64 %zmm0, (%rsp) # 64-byte Spill 164; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 165; AVX512-NEXT: vzeroupper 166; AVX512-NEXT: callq use_v8i1@PLT 167; AVX512-NEXT: vpmovsxbw (%rsp), %xmm0 # 16-byte Folded Reload 168; AVX512-NEXT: addq $72, %rsp 169; AVX512-NEXT: retq 170 %loadx = load <8 x i8>, ptr %x 171 %icmp = icmp ne <8 x i8> %loadx, zeroinitializer 172 call void @use_v8i1(<8 x i1> %icmp) 173 %sext = sext <8 x i1> %icmp to <8 x i16> 174 ret <8 x i16> %sext 175} 176 177; negative test - not free extend 178 179define <8 x i16> @cmp_ne_no_load_const(i64 %x) nounwind { 180; SSE-LABEL: cmp_ne_no_load_const: 181; SSE: # %bb.0: 182; SSE-NEXT: movq %rdi, %xmm0 183; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 184; SSE-NEXT: pxor %xmm1, %xmm1 185; SSE-NEXT: pcmpeqb %xmm0, %xmm1 186; SSE-NEXT: pcmpeqd %xmm0, %xmm0 187; SSE-NEXT: pxor %xmm1, %xmm0 188; SSE-NEXT: retq 189; 190; AVX2-LABEL: cmp_ne_no_load_const: 191; AVX2: # %bb.0: 192; AVX2-NEXT: vmovq %rdi, %xmm0 193; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 194; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 195; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 196; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 197; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 198; AVX2-NEXT: retq 199; 200; AVX512-LABEL: cmp_ne_no_load_const: 201; AVX512: # %bb.0: 202; AVX512-NEXT: vmovq %rdi, %xmm0 203; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 204; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 205; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 206; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 207; AVX512-NEXT: vzeroupper 208; AVX512-NEXT: retq 209 %t = bitcast i64 %x to <8 x i8> 210 %icmp = icmp ne <8 x i8> %t, zeroinitializer 211 %sext = sext <8 x i1> %icmp to <8 x i16> 212 ret <8 x i16> %sext 213} 214 215define <4 x i32> @cmp_ult_load_const(ptr %x) nounwind { 216; SSE-LABEL: cmp_ult_load_const: 217; SSE: # %bb.0: 218; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 219; SSE-NEXT: movd {{.*#+}} xmm1 = [42,214,0,255,0,0,0,0,0,0,0,0,0,0,0,0] 220; SSE-NEXT: pmaxub %xmm0, %xmm1 221; SSE-NEXT: pcmpeqb %xmm0, %xmm1 222; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 223; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 224; SSE-NEXT: pcmpeqd %xmm0, %xmm0 225; SSE-NEXT: pxor %xmm1, %xmm0 226; SSE-NEXT: retq 227; 228; AVX-LABEL: cmp_ult_load_const: 229; AVX: # %bb.0: 230; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 231; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = [42,214,0,255] 232; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 233; AVX-NEXT: retq 234 %loadx = load <4 x i8>, ptr %x 235 %icmp = icmp ult <4 x i8> %loadx, <i8 42, i8 -42, i8 0, i8 -1> 236 %sext = sext <4 x i1> %icmp to <4 x i32> 237 ret <4 x i32> %sext 238} 239 240; negative test - type must be legal 241 242define <3 x i32> @cmp_ult_load_const_bad_type(ptr %x) nounwind { 243; SSE-LABEL: cmp_ult_load_const_bad_type: 244; SSE: # %bb.0: 245; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 246; SSE-NEXT: movd {{.*#+}} xmm1 = [42,214,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 247; SSE-NEXT: pmaxub %xmm0, %xmm1 248; SSE-NEXT: pcmpeqb %xmm0, %xmm1 249; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 250; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 251; SSE-NEXT: pcmpeqd %xmm0, %xmm0 252; SSE-NEXT: pxor %xmm1, %xmm0 253; SSE-NEXT: retq 254; 255; AVX2-LABEL: cmp_ult_load_const_bad_type: 256; AVX2: # %bb.0: 257; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 258; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 259; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 260; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 261; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 262; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0 263; AVX2-NEXT: retq 264; 265; AVX512-LABEL: cmp_ult_load_const_bad_type: 266; AVX512: # %bb.0: 267; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 268; AVX512-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 269; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 270; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 271; AVX512-NEXT: vpmovsxbd %xmm0, %xmm0 272; AVX512-NEXT: vzeroupper 273; AVX512-NEXT: retq 274 %loadx = load <3 x i8>, ptr %x 275 %icmp = icmp ult <3 x i8> %loadx, <i8 42, i8 -42, i8 0> 276 %sext = sext <3 x i1> %icmp to <3 x i32> 277 ret <3 x i32> %sext 278} 279 280; Signed compare needs signed extend. 281 282define <4 x i32> @cmp_slt_load_const(ptr %x) nounwind { 283; SSE-LABEL: cmp_slt_load_const: 284; SSE: # %bb.0: 285; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 286; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 287; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 288; SSE-NEXT: movdqa {{.*#+}} xmm0 = [10794,10794,54998,54998,0,0,65535,65535] 289; SSE-NEXT: pcmpgtb %xmm1, %xmm0 290; SSE-NEXT: retq 291; 292; AVX-LABEL: cmp_slt_load_const: 293; AVX: # %bb.0: 294; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 295; AVX-NEXT: vpmovsxbd {{.*#+}} xmm1 = [42,4294967254,0,4294967295] 296; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 297; AVX-NEXT: retq 298 %loadx = load <4 x i8>, ptr %x 299 %icmp = icmp slt <4 x i8> %loadx, <i8 42, i8 -42, i8 0, i8 -1> 300 %sext = sext <4 x i1> %icmp to <4 x i32> 301 ret <4 x i32> %sext 302} 303 304define <2 x i64> @cmp_ne_zextload(ptr %x, ptr %y) nounwind { 305; SSE-LABEL: cmp_ne_zextload: 306; SSE: # %bb.0: 307; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 308; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 309; SSE-NEXT: pcmpeqd %xmm0, %xmm1 310; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 311; SSE-NEXT: pcmpeqd %xmm0, %xmm0 312; SSE-NEXT: pxor %xmm1, %xmm0 313; SSE-NEXT: retq 314; 315; AVX2-LABEL: cmp_ne_zextload: 316; AVX2: # %bb.0: 317; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 318; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 319; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 320; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 321; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 322; AVX2-NEXT: retq 323; 324; AVX512-LABEL: cmp_ne_zextload: 325; AVX512: # %bb.0: 326; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 327; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 328; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 329; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 330; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 331; AVX512-NEXT: vzeroupper 332; AVX512-NEXT: retq 333 %loadx = load <2 x i32>, ptr %x 334 %loady = load <2 x i32>, ptr %y 335 %icmp = icmp ne <2 x i32> %loadx, %loady 336 %sext = sext <2 x i1> %icmp to <2 x i64> 337 ret <2 x i64> %sext 338} 339 340define <8 x i16> @cmp_ugt_zextload(ptr %x, ptr %y) nounwind { 341; SSE-LABEL: cmp_ugt_zextload: 342; SSE: # %bb.0: 343; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 344; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 345; SSE-NEXT: pminub %xmm0, %xmm1 346; SSE-NEXT: pcmpeqb %xmm0, %xmm1 347; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 348; SSE-NEXT: pcmpeqd %xmm0, %xmm0 349; SSE-NEXT: pxor %xmm1, %xmm0 350; SSE-NEXT: retq 351; 352; AVX-LABEL: cmp_ugt_zextload: 353; AVX: # %bb.0: 354; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 355; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 356; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 357; AVX-NEXT: retq 358 %loadx = load <8 x i8>, ptr %x 359 %loady = load <8 x i8>, ptr %y 360 %icmp = icmp ugt <8 x i8> %loadx, %loady 361 %sext = sext <8 x i1> %icmp to <8 x i16> 362 ret <8 x i16> %sext 363} 364 365; Signed compare needs signed extends. 366 367define <8 x i16> @cmp_sgt_zextload(ptr %x, ptr %y) nounwind { 368; SSE-LABEL: cmp_sgt_zextload: 369; SSE: # %bb.0: 370; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 371; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 372; SSE-NEXT: pcmpgtb %xmm1, %xmm0 373; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 374; SSE-NEXT: retq 375; 376; AVX-LABEL: cmp_sgt_zextload: 377; AVX: # %bb.0: 378; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 379; AVX-NEXT: vpmovsxbw (%rsi), %xmm1 380; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 381; AVX-NEXT: retq 382 %loadx = load <8 x i8>, ptr %x 383 %loady = load <8 x i8>, ptr %y 384 %icmp = icmp sgt <8 x i8> %loadx, %loady 385 %sext = sext <8 x i1> %icmp to <8 x i16> 386 ret <8 x i16> %sext 387} 388 389; negative test - don't change a legal op 390; TODO: Or should we? We can eliminate the vpmovsxwd at the cost of a 256-bit ymm vpcmpeqw. 391 392define <8 x i32> @cmp_ne_zextload_from_legal_op(ptr %x, ptr %y) { 393; SSE-LABEL: cmp_ne_zextload_from_legal_op: 394; SSE: # %bb.0: 395; SSE-NEXT: movdqa (%rdi), %xmm0 396; SSE-NEXT: pcmpeqw (%rsi), %xmm0 397; SSE-NEXT: pcmpeqd %xmm1, %xmm1 398; SSE-NEXT: pxor %xmm0, %xmm1 399; SSE-NEXT: movdqa %xmm1, %xmm0 400; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 401; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 402; SSE-NEXT: retq 403; 404; AVX2-LABEL: cmp_ne_zextload_from_legal_op: 405; AVX2: # %bb.0: 406; AVX2-NEXT: vmovdqa (%rdi), %xmm0 407; AVX2-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 408; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 409; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 410; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 411; AVX2-NEXT: retq 412; 413; AVX512-LABEL: cmp_ne_zextload_from_legal_op: 414; AVX512: # %bb.0: 415; AVX512-NEXT: vmovdqa (%rdi), %xmm0 416; AVX512-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 417; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 418; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 419; AVX512-NEXT: retq 420 %loadx = load <8 x i16>, ptr %x 421 %loady = load <8 x i16>, ptr %y 422 %icmp = icmp ne <8 x i16> %loadx, %loady 423 %sext = sext <8 x i1> %icmp to <8 x i32> 424 ret <8 x i32> %sext 425} 426 427; Both uses of the load can be absorbed by the zext-load, so we eliminate the explicit casts. 428 429define <8 x i32> @PR50055(ptr %src, ptr %dst) nounwind { 430; SSE-LABEL: PR50055: 431; SSE: # %bb.0: 432; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 433; SSE-NEXT: pxor %xmm3, %xmm3 434; SSE-NEXT: movdqa %xmm2, %xmm1 435; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 436; SSE-NEXT: movdqa %xmm1, %xmm0 437; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 438; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 439; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 440; SSE-NEXT: pcmpeqb %xmm3, %xmm2 441; SSE-NEXT: pcmpeqd %xmm3, %xmm3 442; SSE-NEXT: pxor %xmm2, %xmm3 443; SSE-NEXT: movdqa %xmm3, %xmm2 444; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 445; SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 446; SSE-NEXT: movdqa %xmm3, 16(%rsi) 447; SSE-NEXT: movdqa %xmm2, (%rsi) 448; SSE-NEXT: retq 449; 450; AVX-LABEL: PR50055: 451; AVX: # %bb.0: 452; AVX-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 453; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 454; AVX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm1 455; AVX-NEXT: vmovdqa %ymm1, (%rsi) 456; AVX-NEXT: retq 457 %load = load <8 x i8>, ptr %src 458 %zext = zext <8 x i8> %load to <8 x i32> 459 %icmp = icmp ne <8 x i8> %load, zeroinitializer 460 %sext = sext <8 x i1> %icmp to <8 x i32> 461 store <8 x i32> %sext, ptr %dst 462 ret <8 x i32> %zext 463} 464 465; negative test - extra uses must be absorbable by a zext-load. 466 467define <8 x i16> @multi_use_narrower_size(ptr %src, ptr %dst) nounwind { 468; SSE-LABEL: multi_use_narrower_size: 469; SSE: # %bb.0: 470; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 471; SSE-NEXT: pxor %xmm2, %xmm2 472; SSE-NEXT: movdqa %xmm1, %xmm0 473; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 474; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 475; SSE-NEXT: pcmpeqb %xmm2, %xmm1 476; SSE-NEXT: movdqa %xmm1, %xmm2 477; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 478; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 479; SSE-NEXT: movdqa %xmm1, 16(%rsi) 480; SSE-NEXT: movdqa %xmm2, (%rsi) 481; SSE-NEXT: retq 482; 483; AVX-LABEL: multi_use_narrower_size: 484; AVX: # %bb.0: 485; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 486; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 487; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 488; AVX-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 489; AVX-NEXT: vpmovsxbd %xmm1, %ymm1 490; AVX-NEXT: vmovdqa %ymm1, (%rsi) 491; AVX-NEXT: vzeroupper 492; AVX-NEXT: retq 493 %load = load <8 x i8>, ptr %src 494 %zext = zext <8 x i8> %load to <8 x i16> 495 %icmp = icmp eq <8 x i8> %load, zeroinitializer 496 %sext = sext <8 x i1> %icmp to <8 x i32> 497 store <8 x i32> %sext, ptr %dst 498 ret <8 x i16> %zext 499} 500 501; negative test - extra uses must be absorbable by a zext-load. 502 503define <8 x i32> @multi_use_wider_size(ptr %src, ptr %dst) nounwind { 504; SSE-LABEL: multi_use_wider_size: 505; SSE: # %bb.0: 506; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 507; SSE-NEXT: pxor %xmm3, %xmm3 508; SSE-NEXT: movdqa %xmm2, %xmm1 509; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 510; SSE-NEXT: movdqa %xmm1, %xmm0 511; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 512; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 513; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 514; SSE-NEXT: pcmpeqb %xmm3, %xmm2 515; SSE-NEXT: movdqa %xmm2, (%rsi) 516; SSE-NEXT: retq 517; 518; AVX-LABEL: multi_use_wider_size: 519; AVX: # %bb.0: 520; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 521; AVX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero 522; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 523; AVX-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 524; AVX-NEXT: vpmovsxbw %xmm1, %xmm1 525; AVX-NEXT: vmovdqa %xmm1, (%rsi) 526; AVX-NEXT: retq 527 %load = load <8 x i8>, ptr %src 528 %zext = zext <8 x i8> %load to <8 x i32> 529 %icmp = icmp eq <8 x i8> %load, zeroinitializer 530 %sext = sext <8 x i1> %icmp to <8 x i16> 531 store <8 x i16> %sext, ptr %dst 532 ret <8 x i32> %zext 533} 534 535define <4 x i64> @PR50055_signed(ptr %src, ptr %dst) { 536; SSE-LABEL: PR50055_signed: 537; SSE: # %bb.0: 538; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 539; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 540; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 541; SSE-NEXT: psrad $24, %xmm0 542; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 543; SSE-NEXT: psrad $24, %xmm1 544; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 545; SSE-NEXT: pxor %xmm3, %xmm3 546; SSE-NEXT: pcmpgtb %xmm3, %xmm2 547; SSE-NEXT: movdqa %xmm2, %xmm3 548; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 549; SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 550; SSE-NEXT: movdqa %xmm2, 16(%rsi) 551; SSE-NEXT: movdqa %xmm3, (%rsi) 552; SSE-NEXT: retq 553; 554; AVX-LABEL: PR50055_signed: 555; AVX: # %bb.0: 556; AVX-NEXT: vpmovsxbd (%rdi), %ymm0 557; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 558; AVX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm1 559; AVX-NEXT: vmovdqa %ymm1, (%rsi) 560; AVX-NEXT: retq 561 %t1 = load <8 x i8>, ptr %src, align 1 562 %conv = sext <8 x i8> %t1 to <8 x i32> 563 %t2 = bitcast <8 x i32> %conv to <4 x i64> 564 %cmp = icmp sgt <8 x i8> %t1, zeroinitializer 565 %sext = sext <8 x i1> %cmp to <8 x i32> 566 store <8 x i32> %sext, ptr %dst, align 32 567 ret <4 x i64> %t2 568} 569 570define <8 x i32> @PR63946(<8 x i32> %a0, <8 x i32> %b0) nounwind { 571; SSE-LABEL: PR63946: 572; SSE: # %bb.0: # %entry 573; SSE-NEXT: movdqa %xmm1, %xmm4 574; SSE-NEXT: movdqa %xmm0, %xmm13 575; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,2,3,0] 576; SSE-NEXT: pshufd {{.*#+}} xmm9 = xmm3[1,2,3,0] 577; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1] 578; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm2[2,3,0,1] 579; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[3,0,1,2] 580; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,0,1,2] 581; SSE-NEXT: pcmpeqd %xmm2, %xmm0 582; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 583; SSE-NEXT: movdqa %xmm9, %xmm11 584; SSE-NEXT: pcmpeqd %xmm4, %xmm11 585; SSE-NEXT: movdqa %xmm7, %xmm12 586; SSE-NEXT: movdqa %xmm8, %xmm10 587; SSE-NEXT: movdqa %xmm5, %xmm15 588; SSE-NEXT: pcmpeqd %xmm4, %xmm15 589; SSE-NEXT: movdqa %xmm1, %xmm14 590; SSE-NEXT: pcmpeqd %xmm4, %xmm14 591; SSE-NEXT: pcmpeqd %xmm4, %xmm2 592; SSE-NEXT: pcmpeqd %xmm4, %xmm7 593; SSE-NEXT: pcmpeqd %xmm4, %xmm8 594; SSE-NEXT: movdqa %xmm6, %xmm0 595; SSE-NEXT: pcmpeqd %xmm4, %xmm6 596; SSE-NEXT: pcmpeqd %xmm3, %xmm4 597; SSE-NEXT: por %xmm4, %xmm11 598; SSE-NEXT: pcmpeqd %xmm13, %xmm12 599; SSE-NEXT: por {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload 600; SSE-NEXT: pcmpeqd %xmm13, %xmm10 601; SSE-NEXT: pcmpeqd %xmm13, %xmm0 602; SSE-NEXT: por %xmm15, %xmm2 603; SSE-NEXT: por %xmm11, %xmm2 604; SSE-NEXT: pcmpeqd %xmm13, %xmm3 605; SSE-NEXT: por %xmm3, %xmm10 606; SSE-NEXT: por %xmm12, %xmm10 607; SSE-NEXT: por %xmm14, %xmm7 608; SSE-NEXT: pcmpeqd %xmm13, %xmm9 609; SSE-NEXT: por %xmm0, %xmm9 610; SSE-NEXT: pcmpeqd %xmm13, %xmm5 611; SSE-NEXT: por %xmm9, %xmm5 612; SSE-NEXT: por %xmm10, %xmm5 613; SSE-NEXT: por %xmm7, %xmm8 614; SSE-NEXT: por %xmm2, %xmm8 615; SSE-NEXT: packssdw %xmm8, %xmm5 616; SSE-NEXT: pcmpeqd %xmm13, %xmm1 617; SSE-NEXT: packssdw %xmm6, %xmm1 618; SSE-NEXT: por %xmm5, %xmm1 619; SSE-NEXT: movdqa %xmm1, %xmm0 620; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 621; SSE-NEXT: pslld $31, %xmm0 622; SSE-NEXT: psrad $31, %xmm0 623; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 624; SSE-NEXT: pslld $31, %xmm1 625; SSE-NEXT: psrad $31, %xmm1 626; SSE-NEXT: retq 627; 628; AVX2-LABEL: PR63946: 629; AVX2: # %bb.0: # %entry 630; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,2,3,0,5,6,7,4] 631; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm1[2,3,0,1,6,7,4,5] 632; AVX2-NEXT: vpshufd {{.*#+}} ymm4 = ymm1[3,0,1,2,7,4,5,6] 633; AVX2-NEXT: vpermq {{.*#+}} ymm5 = ymm1[2,3,0,1] 634; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm2[2,3,0,1] 635; AVX2-NEXT: vpermq {{.*#+}} ymm7 = ymm3[2,3,0,1] 636; AVX2-NEXT: vpermq {{.*#+}} ymm8 = ymm4[2,3,0,1] 637; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm1 638; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm2 639; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 640; AVX2-NEXT: vpcmpeqd %ymm0, %ymm3, %ymm2 641; AVX2-NEXT: vpcmpeqd %ymm0, %ymm4, %ymm3 642; AVX2-NEXT: vpcmpeqd %ymm0, %ymm5, %ymm4 643; AVX2-NEXT: vpor %ymm4, %ymm2, %ymm2 644; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 645; AVX2-NEXT: vpcmpeqd %ymm0, %ymm6, %ymm2 646; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 647; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 648; AVX2-NEXT: vpcmpeqd %ymm0, %ymm7, %ymm2 649; AVX2-NEXT: vpcmpeqd %ymm0, %ymm8, %ymm0 650; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 651; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 652; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 653; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 654; AVX2-NEXT: retq 655; 656; AVX512-LABEL: PR63946: 657; AVX512: # %bb.0: # %entry 658; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 659; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 660; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,2,3,0,5,6,7,4] 661; AVX512-NEXT: vpshufd {{.*#+}} ymm3 = ymm1[2,3,0,1,6,7,4,5] 662; AVX512-NEXT: vpshufd {{.*#+}} ymm4 = ymm1[3,0,1,2,7,4,5,6] 663; AVX512-NEXT: vpermq {{.*#+}} ymm5 = ymm1[2,3,0,1] 664; AVX512-NEXT: vpermq {{.*#+}} ymm6 = ymm2[2,3,0,1] 665; AVX512-NEXT: vpermq {{.*#+}} ymm7 = ymm3[2,3,0,1] 666; AVX512-NEXT: vpermq {{.*#+}} ymm8 = ymm4[2,3,0,1] 667; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 668; AVX512-NEXT: vpcmpeqd %zmm0, %zmm2, %k1 669; AVX512-NEXT: vpcmpeqd %zmm0, %zmm3, %k3 670; AVX512-NEXT: vpcmpeqd %zmm0, %zmm4, %k2 671; AVX512-NEXT: vpcmpeqd %zmm0, %zmm5, %k4 672; AVX512-NEXT: vpcmpeqd %zmm0, %zmm6, %k5 673; AVX512-NEXT: vpcmpeqd %zmm0, %zmm7, %k6 674; AVX512-NEXT: vpcmpeqd %zmm0, %zmm8, %k7 675; AVX512-NEXT: korw %k0, %k1, %k0 676; AVX512-NEXT: korw %k3, %k0, %k0 677; AVX512-NEXT: korw %k4, %k0, %k0 678; AVX512-NEXT: korw %k2, %k0, %k0 679; AVX512-NEXT: korw %k5, %k0, %k0 680; AVX512-NEXT: korw %k6, %k0, %k0 681; AVX512-NEXT: korw %k7, %k0, %k1 682; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 683; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 684; AVX512-NEXT: retq 685entry: 686 %shuffle = shufflevector <8 x i32> %b0, <8 x i32> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 687 %shuffle1 = shufflevector <8 x i32> %b0, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> 688 %shuffle2 = shufflevector <8 x i32> %shuffle, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> 689 %shuffle3 = shufflevector <8 x i32> %b0, <8 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 690 %shuffle4 = shufflevector <8 x i32> %shuffle, <8 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 691 %shuffle5 = shufflevector <8 x i32> %shuffle1, <8 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 692 %shuffle6 = shufflevector <8 x i32> %shuffle2, <8 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 693 %cmp = icmp eq <8 x i32> %a0, %b0 694 %cmp7 = icmp eq <8 x i32> %shuffle, %a0 695 %cmp9 = icmp eq <8 x i32> %shuffle1, %a0 696 %cmp11 = icmp eq <8 x i32> %shuffle2, %a0 697 %cmp13 = icmp eq <8 x i32> %shuffle3, %a0 698 %cmp15 = icmp eq <8 x i32> %shuffle4, %a0 699 %cmp17 = icmp eq <8 x i32> %shuffle5, %a0 700 %cmp19 = icmp eq <8 x i32> %shuffle6, %a0 701 %or2365 = or <8 x i1> %cmp7, %cmp 702 %or2264 = or <8 x i1> %or2365, %cmp9 703 %or2567 = or <8 x i1> %or2264, %cmp13 704 %or2163 = or <8 x i1> %or2567, %cmp11 705 %or62 = or <8 x i1> %or2163, %cmp15 706 %or2466 = or <8 x i1> %or62, %cmp17 707 %or2668 = or <8 x i1> %or2466, %cmp19 708 %or26 = sext <8 x i1> %or2668 to <8 x i32> 709 ret <8 x i32> %or26 710} 711