1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] 45; X86-SSE42-NEXT: pxor %xmm3, %xmm0 46; X86-SSE42-NEXT: pxor %xmm2, %xmm3 47; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 48; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 49; X86-SSE42-NEXT: movd %xmm2, %eax 50; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 51; X86-SSE42-NEXT: retl 52; 53; X86-AVX1-LABEL: test_reduce_v2i64: 54; X86-AVX1: ## %bb.0: 55; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 56; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 57; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 58; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3 59; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2 60; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 61; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 62; X86-AVX1-NEXT: vmovd %xmm0, %eax 63; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 64; X86-AVX1-NEXT: retl 65; 66; X86-AVX2-LABEL: test_reduce_v2i64: 67; X86-AVX2: ## %bb.0: 68; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 69; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 70; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 71; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 72; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 73; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 74; X86-AVX2-NEXT: vmovd %xmm0, %eax 75; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 76; X86-AVX2-NEXT: retl 77; 78; X64-SSE2-LABEL: test_reduce_v2i64: 79; X64-SSE2: ## %bb.0: 80; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 81; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 82; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 83; X64-SSE2-NEXT: pxor %xmm2, %xmm3 84; X64-SSE2-NEXT: pxor %xmm1, %xmm2 85; X64-SSE2-NEXT: movdqa %xmm3, %xmm4 86; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 87; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 88; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 89; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 90; X64-SSE2-NEXT: pand %xmm5, %xmm2 91; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 92; X64-SSE2-NEXT: por %xmm2, %xmm3 93; X64-SSE2-NEXT: pand %xmm3, %xmm0 94; X64-SSE2-NEXT: pandn %xmm1, %xmm3 95; X64-SSE2-NEXT: por %xmm0, %xmm3 96; X64-SSE2-NEXT: movq %xmm3, %rax 97; X64-SSE2-NEXT: retq 98; 99; X64-SSE42-LABEL: test_reduce_v2i64: 100; X64-SSE42: ## %bb.0: 101; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 102; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 103; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 104; X64-SSE42-NEXT: pxor %xmm3, %xmm0 105; X64-SSE42-NEXT: pxor %xmm2, %xmm3 106; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 107; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 108; X64-SSE42-NEXT: movq %xmm2, %rax 109; X64-SSE42-NEXT: retq 110; 111; X64-AVX1-LABEL: test_reduce_v2i64: 112; X64-AVX1: ## %bb.0: 113; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 114; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 115; X64-AVX1-NEXT: ## xmm2 = mem[0,0] 116; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 117; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 118; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 119; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 120; X64-AVX1-NEXT: vmovq %xmm0, %rax 121; X64-AVX1-NEXT: retq 122; 123; X64-AVX2-LABEL: test_reduce_v2i64: 124; X64-AVX2: ## %bb.0: 125; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 126; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 127; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 128; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 129; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 130; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 131; X64-AVX2-NEXT: vmovq %xmm0, %rax 132; X64-AVX2-NEXT: retq 133; 134; X64-AVX512-LABEL: test_reduce_v2i64: 135; X64-AVX512: ## %bb.0: 136; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 137; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 138; X64-AVX512-NEXT: vmovq %xmm0, %rax 139; X64-AVX512-NEXT: retq 140 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 141 %2 = icmp ugt <2 x i64> %a0, %1 142 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 143 %4 = extractelement <2 x i64> %3, i32 0 144 ret i64 %4 145} 146 147define i32 @test_reduce_v4i32(<4 x i32> %a0) { 148; X86-SSE2-LABEL: test_reduce_v4i32: 149; X86-SSE2: ## %bb.0: 150; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] 151; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 152; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 153; X86-SSE2-NEXT: pxor %xmm2, %xmm1 154; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 155; X86-SSE2-NEXT: pxor %xmm2, %xmm4 156; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 157; X86-SSE2-NEXT: pand %xmm1, %xmm0 158; X86-SSE2-NEXT: pandn %xmm3, %xmm1 159; X86-SSE2-NEXT: por %xmm0, %xmm1 160; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 161; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 162; X86-SSE2-NEXT: pxor %xmm2, %xmm3 163; X86-SSE2-NEXT: pxor %xmm0, %xmm2 164; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 165; X86-SSE2-NEXT: pand %xmm3, %xmm1 166; X86-SSE2-NEXT: pandn %xmm0, %xmm3 167; X86-SSE2-NEXT: por %xmm1, %xmm3 168; X86-SSE2-NEXT: movd %xmm3, %eax 169; X86-SSE2-NEXT: retl 170; 171; X86-SSE42-LABEL: test_reduce_v4i32: 172; X86-SSE42: ## %bb.0: 173; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 174; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1 175; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 176; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 177; X86-SSE42-NEXT: movd %xmm0, %eax 178; X86-SSE42-NEXT: retl 179; 180; X86-AVX-LABEL: test_reduce_v4i32: 181; X86-AVX: ## %bb.0: 182; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 183; X86-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 184; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 185; X86-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 186; X86-AVX-NEXT: vmovd %xmm0, %eax 187; X86-AVX-NEXT: retl 188; 189; X64-SSE2-LABEL: test_reduce_v4i32: 190; X64-SSE2: ## %bb.0: 191; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 192; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 193; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 194; X64-SSE2-NEXT: pxor %xmm2, %xmm3 195; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 196; X64-SSE2-NEXT: pxor %xmm2, %xmm4 197; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 198; X64-SSE2-NEXT: pand %xmm3, %xmm0 199; X64-SSE2-NEXT: pandn %xmm1, %xmm3 200; X64-SSE2-NEXT: por %xmm0, %xmm3 201; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 202; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 203; X64-SSE2-NEXT: pxor %xmm2, %xmm1 204; X64-SSE2-NEXT: pxor %xmm0, %xmm2 205; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 206; X64-SSE2-NEXT: pand %xmm1, %xmm3 207; X64-SSE2-NEXT: pandn %xmm0, %xmm1 208; X64-SSE2-NEXT: por %xmm3, %xmm1 209; X64-SSE2-NEXT: movd %xmm1, %eax 210; X64-SSE2-NEXT: retq 211; 212; X64-SSE42-LABEL: test_reduce_v4i32: 213; X64-SSE42: ## %bb.0: 214; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 215; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1 216; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 217; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 218; X64-SSE42-NEXT: movd %xmm0, %eax 219; X64-SSE42-NEXT: retq 220; 221; X64-AVX-LABEL: test_reduce_v4i32: 222; X64-AVX: ## %bb.0: 223; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 224; X64-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 225; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 226; X64-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 227; X64-AVX-NEXT: vmovd %xmm0, %eax 228; X64-AVX-NEXT: retq 229 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 230 %2 = icmp ugt <4 x i32> %a0, %1 231 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 232 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 233 %5 = icmp ugt <4 x i32> %3, %4 234 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 235 %7 = extractelement <4 x i32> %6, i32 0 236 ret i32 %7 237} 238 239define i16 @test_reduce_v8i16(<8 x i16> %a0) { 240; X86-SSE2-LABEL: test_reduce_v8i16: 241; X86-SSE2: ## %bb.0: 242; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 243; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 244; X86-SSE2-NEXT: paddw %xmm0, %xmm1 245; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 246; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 247; X86-SSE2-NEXT: paddw %xmm1, %xmm0 248; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 249; X86-SSE2-NEXT: psrld $16, %xmm1 250; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 251; X86-SSE2-NEXT: paddw %xmm0, %xmm1 252; X86-SSE2-NEXT: movd %xmm1, %eax 253; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 254; X86-SSE2-NEXT: retl 255; 256; X86-SSE42-LABEL: test_reduce_v8i16: 257; X86-SSE42: ## %bb.0: 258; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 259; X86-SSE42-NEXT: pxor %xmm0, %xmm1 260; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 261; X86-SSE42-NEXT: movd %xmm0, %eax 262; X86-SSE42-NEXT: notl %eax 263; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 264; X86-SSE42-NEXT: retl 265; 266; X86-AVX-LABEL: test_reduce_v8i16: 267; X86-AVX: ## %bb.0: 268; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 269; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 270; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 271; X86-AVX-NEXT: vmovd %xmm0, %eax 272; X86-AVX-NEXT: notl %eax 273; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 274; X86-AVX-NEXT: retl 275; 276; X64-SSE2-LABEL: test_reduce_v8i16: 277; X64-SSE2: ## %bb.0: 278; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 279; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 280; X64-SSE2-NEXT: paddw %xmm0, %xmm1 281; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 282; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 283; X64-SSE2-NEXT: paddw %xmm1, %xmm0 284; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 285; X64-SSE2-NEXT: psrld $16, %xmm1 286; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 287; X64-SSE2-NEXT: paddw %xmm0, %xmm1 288; X64-SSE2-NEXT: movd %xmm1, %eax 289; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 290; X64-SSE2-NEXT: retq 291; 292; X64-SSE42-LABEL: test_reduce_v8i16: 293; X64-SSE42: ## %bb.0: 294; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 295; X64-SSE42-NEXT: pxor %xmm0, %xmm1 296; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 297; X64-SSE42-NEXT: movd %xmm0, %eax 298; X64-SSE42-NEXT: notl %eax 299; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 300; X64-SSE42-NEXT: retq 301; 302; X64-AVX1-LABEL: test_reduce_v8i16: 303; X64-AVX1: ## %bb.0: 304; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 305; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 306; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 307; X64-AVX1-NEXT: vmovd %xmm0, %eax 308; X64-AVX1-NEXT: notl %eax 309; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 310; X64-AVX1-NEXT: retq 311; 312; X64-AVX2-LABEL: test_reduce_v8i16: 313; X64-AVX2: ## %bb.0: 314; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 315; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 316; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 317; X64-AVX2-NEXT: vmovd %xmm0, %eax 318; X64-AVX2-NEXT: notl %eax 319; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 320; X64-AVX2-NEXT: retq 321; 322; X64-AVX512-LABEL: test_reduce_v8i16: 323; X64-AVX512: ## %bb.0: 324; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 325; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 326; X64-AVX512-NEXT: vmovd %xmm0, %eax 327; X64-AVX512-NEXT: notl %eax 328; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 329; X64-AVX512-NEXT: retq 330 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 331 %2 = icmp ugt <8 x i16> %a0, %1 332 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 333 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 334 %5 = icmp ugt <8 x i16> %3, %4 335 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 336 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 337 %8 = icmp ugt <8 x i16> %6, %7 338 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 339 %10 = extractelement <8 x i16> %9, i32 0 340 ret i16 %10 341} 342 343define i8 @test_reduce_v16i8(<16 x i8> %a0) { 344; X86-SSE2-LABEL: test_reduce_v16i8: 345; X86-SSE2: ## %bb.0: 346; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 347; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 348; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 349; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 350; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 351; X86-SSE2-NEXT: psrld $16, %xmm1 352; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 353; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 354; X86-SSE2-NEXT: psrlw $8, %xmm0 355; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 356; X86-SSE2-NEXT: movd %xmm0, %eax 357; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 358; X86-SSE2-NEXT: retl 359; 360; X86-SSE42-LABEL: test_reduce_v16i8: 361; X86-SSE42: ## %bb.0: 362; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 363; X86-SSE42-NEXT: pxor %xmm0, %xmm1 364; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 365; X86-SSE42-NEXT: psrlw $8, %xmm0 366; X86-SSE42-NEXT: pminub %xmm1, %xmm0 367; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 368; X86-SSE42-NEXT: movd %xmm0, %eax 369; X86-SSE42-NEXT: notb %al 370; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 371; X86-SSE42-NEXT: retl 372; 373; X86-AVX-LABEL: test_reduce_v16i8: 374; X86-AVX: ## %bb.0: 375; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 376; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 377; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 378; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 379; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 380; X86-AVX-NEXT: vmovd %xmm0, %eax 381; X86-AVX-NEXT: notb %al 382; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 383; X86-AVX-NEXT: retl 384; 385; X64-SSE2-LABEL: test_reduce_v16i8: 386; X64-SSE2: ## %bb.0: 387; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 388; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 389; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 390; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 391; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 392; X64-SSE2-NEXT: psrld $16, %xmm1 393; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 394; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 395; X64-SSE2-NEXT: psrlw $8, %xmm0 396; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 397; X64-SSE2-NEXT: movd %xmm0, %eax 398; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 399; X64-SSE2-NEXT: retq 400; 401; X64-SSE42-LABEL: test_reduce_v16i8: 402; X64-SSE42: ## %bb.0: 403; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 404; X64-SSE42-NEXT: pxor %xmm0, %xmm1 405; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 406; X64-SSE42-NEXT: psrlw $8, %xmm0 407; X64-SSE42-NEXT: pminub %xmm1, %xmm0 408; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 409; X64-SSE42-NEXT: movd %xmm0, %eax 410; X64-SSE42-NEXT: notb %al 411; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 412; X64-SSE42-NEXT: retq 413; 414; X64-AVX1-LABEL: test_reduce_v16i8: 415; X64-AVX1: ## %bb.0: 416; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 417; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 418; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 419; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 420; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 421; X64-AVX1-NEXT: vmovd %xmm0, %eax 422; X64-AVX1-NEXT: notb %al 423; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 424; X64-AVX1-NEXT: retq 425; 426; X64-AVX2-LABEL: test_reduce_v16i8: 427; X64-AVX2: ## %bb.0: 428; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 429; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 430; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 431; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 432; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 433; X64-AVX2-NEXT: vmovd %xmm0, %eax 434; X64-AVX2-NEXT: notb %al 435; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 436; X64-AVX2-NEXT: retq 437; 438; X64-AVX512-LABEL: test_reduce_v16i8: 439; X64-AVX512: ## %bb.0: 440; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 441; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 442; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 443; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 444; X64-AVX512-NEXT: vmovd %xmm0, %eax 445; X64-AVX512-NEXT: notb %al 446; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 447; X64-AVX512-NEXT: retq 448 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 449 %2 = icmp ugt <16 x i8> %a0, %1 450 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 451 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 452 %5 = icmp ugt <16 x i8> %3, %4 453 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 454 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 455 %8 = icmp ugt <16 x i8> %6, %7 456 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 457 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 458 %11 = icmp ugt <16 x i8> %9, %10 459 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 460 %13 = extractelement <16 x i8> %12, i32 0 461 ret i8 %13 462} 463 464; 465; 256-bit Vectors 466; 467 468define i64 @test_reduce_v4i64(<4 x i64> %a0) { 469; X86-SSE2-LABEL: test_reduce_v4i64: 470; X86-SSE2: ## %bb.0: 471; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 472; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 473; X86-SSE2-NEXT: pxor %xmm2, %xmm3 474; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 475; X86-SSE2-NEXT: pxor %xmm2, %xmm4 476; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 477; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 478; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 479; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 480; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 481; X86-SSE2-NEXT: pand %xmm6, %xmm4 482; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 483; X86-SSE2-NEXT: por %xmm4, %xmm3 484; X86-SSE2-NEXT: pand %xmm3, %xmm0 485; X86-SSE2-NEXT: pandn %xmm1, %xmm3 486; X86-SSE2-NEXT: por %xmm0, %xmm3 487; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 488; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 489; X86-SSE2-NEXT: pxor %xmm2, %xmm1 490; X86-SSE2-NEXT: pxor %xmm0, %xmm2 491; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 492; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 493; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 494; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 495; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 496; X86-SSE2-NEXT: pand %xmm5, %xmm1 497; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 498; X86-SSE2-NEXT: por %xmm1, %xmm2 499; X86-SSE2-NEXT: pand %xmm2, %xmm3 500; X86-SSE2-NEXT: pandn %xmm0, %xmm2 501; X86-SSE2-NEXT: por %xmm3, %xmm2 502; X86-SSE2-NEXT: movd %xmm2, %eax 503; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 504; X86-SSE2-NEXT: movd %xmm0, %edx 505; X86-SSE2-NEXT: retl 506; 507; X86-SSE42-LABEL: test_reduce_v4i64: 508; X86-SSE42: ## %bb.0: 509; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 510; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] 511; X86-SSE42-NEXT: movdqa %xmm1, %xmm4 512; X86-SSE42-NEXT: pxor %xmm3, %xmm4 513; X86-SSE42-NEXT: pxor %xmm3, %xmm0 514; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 515; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 516; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 517; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 518; X86-SSE42-NEXT: pxor %xmm3, %xmm0 519; X86-SSE42-NEXT: pxor %xmm2, %xmm3 520; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 521; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 522; X86-SSE42-NEXT: movd %xmm2, %eax 523; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 524; X86-SSE42-NEXT: retl 525; 526; X86-AVX1-LABEL: test_reduce_v4i64: 527; X86-AVX1: ## %bb.0: 528; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 529; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 530; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 531; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3 532; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4 533; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 534; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 535; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 536; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 537; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 538; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 539; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 540; X86-AVX1-NEXT: vmovd %xmm0, %eax 541; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 542; X86-AVX1-NEXT: vzeroupper 543; X86-AVX1-NEXT: retl 544; 545; X86-AVX2-LABEL: test_reduce_v4i64: 546; X86-AVX2: ## %bb.0: 547; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 548; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 549; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 550; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4 551; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 552; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 553; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 554; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 555; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 556; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 557; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 558; X86-AVX2-NEXT: vmovd %xmm0, %eax 559; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 560; X86-AVX2-NEXT: vzeroupper 561; X86-AVX2-NEXT: retl 562; 563; X64-SSE2-LABEL: test_reduce_v4i64: 564; X64-SSE2: ## %bb.0: 565; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 566; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 567; X64-SSE2-NEXT: pxor %xmm2, %xmm3 568; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 569; X64-SSE2-NEXT: pxor %xmm2, %xmm4 570; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 571; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 572; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 573; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 574; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 575; X64-SSE2-NEXT: pand %xmm6, %xmm3 576; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 577; X64-SSE2-NEXT: por %xmm3, %xmm4 578; X64-SSE2-NEXT: pand %xmm4, %xmm0 579; X64-SSE2-NEXT: pandn %xmm1, %xmm4 580; X64-SSE2-NEXT: por %xmm0, %xmm4 581; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 582; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 583; X64-SSE2-NEXT: pxor %xmm2, %xmm1 584; X64-SSE2-NEXT: pxor %xmm0, %xmm2 585; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 586; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 587; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 588; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 589; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 590; X64-SSE2-NEXT: pand %xmm5, %xmm1 591; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 592; X64-SSE2-NEXT: por %xmm1, %xmm2 593; X64-SSE2-NEXT: pand %xmm2, %xmm4 594; X64-SSE2-NEXT: pandn %xmm0, %xmm2 595; X64-SSE2-NEXT: por %xmm4, %xmm2 596; X64-SSE2-NEXT: movq %xmm2, %rax 597; X64-SSE2-NEXT: retq 598; 599; X64-SSE42-LABEL: test_reduce_v4i64: 600; X64-SSE42: ## %bb.0: 601; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 602; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 603; X64-SSE42-NEXT: movdqa %xmm1, %xmm4 604; X64-SSE42-NEXT: pxor %xmm3, %xmm4 605; X64-SSE42-NEXT: pxor %xmm3, %xmm0 606; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 607; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 608; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 609; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 610; X64-SSE42-NEXT: pxor %xmm3, %xmm0 611; X64-SSE42-NEXT: pxor %xmm2, %xmm3 612; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 613; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 614; X64-SSE42-NEXT: movq %xmm2, %rax 615; X64-SSE42-NEXT: retq 616; 617; X64-AVX1-LABEL: test_reduce_v4i64: 618; X64-AVX1: ## %bb.0: 619; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 620; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 621; X64-AVX1-NEXT: ## xmm2 = mem[0,0] 622; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 623; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm4 624; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 625; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 626; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 627; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 628; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 629; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 630; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 631; X64-AVX1-NEXT: vmovq %xmm0, %rax 632; X64-AVX1-NEXT: vzeroupper 633; X64-AVX1-NEXT: retq 634; 635; X64-AVX2-LABEL: test_reduce_v4i64: 636; X64-AVX2: ## %bb.0: 637; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 638; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 639; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 640; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4 641; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 642; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 643; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 644; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 645; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 646; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 647; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 648; X64-AVX2-NEXT: vmovq %xmm0, %rax 649; X64-AVX2-NEXT: vzeroupper 650; X64-AVX2-NEXT: retq 651; 652; X64-AVX512-LABEL: test_reduce_v4i64: 653; X64-AVX512: ## %bb.0: 654; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 655; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 656; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 657; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 658; X64-AVX512-NEXT: vmovq %xmm0, %rax 659; X64-AVX512-NEXT: vzeroupper 660; X64-AVX512-NEXT: retq 661 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 662 %2 = icmp ugt <4 x i64> %a0, %1 663 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 664 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 665 %5 = icmp ugt <4 x i64> %3, %4 666 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 667 %7 = extractelement <4 x i64> %6, i32 0 668 ret i64 %7 669} 670 671define i32 @test_reduce_v8i32(<8 x i32> %a0) { 672; X86-SSE2-LABEL: test_reduce_v8i32: 673; X86-SSE2: ## %bb.0: 674; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 675; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 676; X86-SSE2-NEXT: pxor %xmm2, %xmm4 677; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 678; X86-SSE2-NEXT: pxor %xmm2, %xmm3 679; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 680; X86-SSE2-NEXT: pand %xmm3, %xmm0 681; X86-SSE2-NEXT: pandn %xmm1, %xmm3 682; X86-SSE2-NEXT: por %xmm0, %xmm3 683; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 684; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 685; X86-SSE2-NEXT: pxor %xmm2, %xmm0 686; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 687; X86-SSE2-NEXT: pxor %xmm2, %xmm4 688; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 689; X86-SSE2-NEXT: pand %xmm0, %xmm3 690; X86-SSE2-NEXT: pandn %xmm1, %xmm0 691; X86-SSE2-NEXT: por %xmm3, %xmm0 692; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 693; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 694; X86-SSE2-NEXT: pxor %xmm2, %xmm3 695; X86-SSE2-NEXT: pxor %xmm1, %xmm2 696; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 697; X86-SSE2-NEXT: pand %xmm3, %xmm0 698; X86-SSE2-NEXT: pandn %xmm1, %xmm3 699; X86-SSE2-NEXT: por %xmm0, %xmm3 700; X86-SSE2-NEXT: movd %xmm3, %eax 701; X86-SSE2-NEXT: retl 702; 703; X86-SSE42-LABEL: test_reduce_v8i32: 704; X86-SSE42: ## %bb.0: 705; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 706; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 707; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1 708; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 709; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 710; X86-SSE42-NEXT: movd %xmm0, %eax 711; X86-SSE42-NEXT: retl 712; 713; X86-AVX1-LABEL: test_reduce_v8i32: 714; X86-AVX1: ## %bb.0: 715; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 716; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 717; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 718; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 719; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 720; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 721; X86-AVX1-NEXT: vmovd %xmm0, %eax 722; X86-AVX1-NEXT: vzeroupper 723; X86-AVX1-NEXT: retl 724; 725; X86-AVX2-LABEL: test_reduce_v8i32: 726; X86-AVX2: ## %bb.0: 727; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 728; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 729; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 730; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 731; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 732; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 733; X86-AVX2-NEXT: vmovd %xmm0, %eax 734; X86-AVX2-NEXT: vzeroupper 735; X86-AVX2-NEXT: retl 736; 737; X64-SSE2-LABEL: test_reduce_v8i32: 738; X64-SSE2: ## %bb.0: 739; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 740; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 741; X64-SSE2-NEXT: pxor %xmm2, %xmm3 742; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 743; X64-SSE2-NEXT: pxor %xmm2, %xmm4 744; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 745; X64-SSE2-NEXT: pand %xmm4, %xmm0 746; X64-SSE2-NEXT: pandn %xmm1, %xmm4 747; X64-SSE2-NEXT: por %xmm0, %xmm4 748; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 749; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 750; X64-SSE2-NEXT: pxor %xmm2, %xmm1 751; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 752; X64-SSE2-NEXT: pxor %xmm2, %xmm3 753; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm1 754; X64-SSE2-NEXT: pand %xmm1, %xmm4 755; X64-SSE2-NEXT: pandn %xmm0, %xmm1 756; X64-SSE2-NEXT: por %xmm4, %xmm1 757; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 758; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 759; X64-SSE2-NEXT: pxor %xmm2, %xmm3 760; X64-SSE2-NEXT: pxor %xmm0, %xmm2 761; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 762; X64-SSE2-NEXT: pand %xmm3, %xmm1 763; X64-SSE2-NEXT: pandn %xmm0, %xmm3 764; X64-SSE2-NEXT: por %xmm1, %xmm3 765; X64-SSE2-NEXT: movd %xmm3, %eax 766; X64-SSE2-NEXT: retq 767; 768; X64-SSE42-LABEL: test_reduce_v8i32: 769; X64-SSE42: ## %bb.0: 770; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 771; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 772; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1 773; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 774; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 775; X64-SSE42-NEXT: movd %xmm0, %eax 776; X64-SSE42-NEXT: retq 777; 778; X64-AVX1-LABEL: test_reduce_v8i32: 779; X64-AVX1: ## %bb.0: 780; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 781; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 782; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 783; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 784; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 785; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 786; X64-AVX1-NEXT: vmovd %xmm0, %eax 787; X64-AVX1-NEXT: vzeroupper 788; X64-AVX1-NEXT: retq 789; 790; X64-AVX2-LABEL: test_reduce_v8i32: 791; X64-AVX2: ## %bb.0: 792; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 793; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 794; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 795; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 796; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 797; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 798; X64-AVX2-NEXT: vmovd %xmm0, %eax 799; X64-AVX2-NEXT: vzeroupper 800; X64-AVX2-NEXT: retq 801; 802; X64-AVX512-LABEL: test_reduce_v8i32: 803; X64-AVX512: ## %bb.0: 804; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 805; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 806; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 807; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 808; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 809; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 810; X64-AVX512-NEXT: vmovd %xmm0, %eax 811; X64-AVX512-NEXT: vzeroupper 812; X64-AVX512-NEXT: retq 813 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 814 %2 = icmp ugt <8 x i32> %a0, %1 815 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 816 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 817 %5 = icmp ugt <8 x i32> %3, %4 818 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 819 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 820 %8 = icmp ugt <8 x i32> %6, %7 821 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 822 %10 = extractelement <8 x i32> %9, i32 0 823 ret i32 %10 824} 825 826define i16 @test_reduce_v16i16(<16 x i16> %a0) { 827; X86-SSE2-LABEL: test_reduce_v16i16: 828; X86-SSE2: ## %bb.0: 829; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 830; X86-SSE2-NEXT: paddw %xmm0, %xmm1 831; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 832; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 833; X86-SSE2-NEXT: paddw %xmm1, %xmm0 834; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 835; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 836; X86-SSE2-NEXT: paddw %xmm0, %xmm1 837; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 838; X86-SSE2-NEXT: psrld $16, %xmm0 839; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 840; X86-SSE2-NEXT: paddw %xmm1, %xmm0 841; X86-SSE2-NEXT: movd %xmm0, %eax 842; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 843; X86-SSE2-NEXT: retl 844; 845; X86-SSE42-LABEL: test_reduce_v16i16: 846; X86-SSE42: ## %bb.0: 847; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0 848; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 849; X86-SSE42-NEXT: pxor %xmm0, %xmm1 850; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 851; X86-SSE42-NEXT: movd %xmm0, %eax 852; X86-SSE42-NEXT: notl %eax 853; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 854; X86-SSE42-NEXT: retl 855; 856; X86-AVX1-LABEL: test_reduce_v16i16: 857; X86-AVX1: ## %bb.0: 858; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 859; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 860; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 861; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 862; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 863; X86-AVX1-NEXT: vmovd %xmm0, %eax 864; X86-AVX1-NEXT: notl %eax 865; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 866; X86-AVX1-NEXT: vzeroupper 867; X86-AVX1-NEXT: retl 868; 869; X86-AVX2-LABEL: test_reduce_v16i16: 870; X86-AVX2: ## %bb.0: 871; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 872; X86-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 873; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 874; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 875; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 876; X86-AVX2-NEXT: vmovd %xmm0, %eax 877; X86-AVX2-NEXT: notl %eax 878; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 879; X86-AVX2-NEXT: vzeroupper 880; X86-AVX2-NEXT: retl 881; 882; X64-SSE2-LABEL: test_reduce_v16i16: 883; X64-SSE2: ## %bb.0: 884; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 885; X64-SSE2-NEXT: paddw %xmm0, %xmm1 886; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 887; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 888; X64-SSE2-NEXT: paddw %xmm1, %xmm0 889; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 890; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 891; X64-SSE2-NEXT: paddw %xmm0, %xmm1 892; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 893; X64-SSE2-NEXT: psrld $16, %xmm0 894; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 895; X64-SSE2-NEXT: paddw %xmm1, %xmm0 896; X64-SSE2-NEXT: movd %xmm0, %eax 897; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 898; X64-SSE2-NEXT: retq 899; 900; X64-SSE42-LABEL: test_reduce_v16i16: 901; X64-SSE42: ## %bb.0: 902; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0 903; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 904; X64-SSE42-NEXT: pxor %xmm0, %xmm1 905; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 906; X64-SSE42-NEXT: movd %xmm0, %eax 907; X64-SSE42-NEXT: notl %eax 908; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 909; X64-SSE42-NEXT: retq 910; 911; X64-AVX1-LABEL: test_reduce_v16i16: 912; X64-AVX1: ## %bb.0: 913; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 914; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 915; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 916; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 917; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 918; X64-AVX1-NEXT: vmovd %xmm0, %eax 919; X64-AVX1-NEXT: notl %eax 920; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 921; X64-AVX1-NEXT: vzeroupper 922; X64-AVX1-NEXT: retq 923; 924; X64-AVX2-LABEL: test_reduce_v16i16: 925; X64-AVX2: ## %bb.0: 926; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 927; X64-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 928; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 929; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 930; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 931; X64-AVX2-NEXT: vmovd %xmm0, %eax 932; X64-AVX2-NEXT: notl %eax 933; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 934; X64-AVX2-NEXT: vzeroupper 935; X64-AVX2-NEXT: retq 936; 937; X64-AVX512-LABEL: test_reduce_v16i16: 938; X64-AVX512: ## %bb.0: 939; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 940; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 941; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 942; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 943; X64-AVX512-NEXT: vmovd %xmm0, %eax 944; X64-AVX512-NEXT: notl %eax 945; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 946; X64-AVX512-NEXT: vzeroupper 947; X64-AVX512-NEXT: retq 948 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 949 %2 = icmp ugt <16 x i16> %a0, %1 950 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 951 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 952 %5 = icmp ugt <16 x i16> %3, %4 953 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 954 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 955 %8 = icmp ugt <16 x i16> %6, %7 956 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 957 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 958 %11 = icmp ugt <16 x i16> %9, %10 959 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 960 %13 = extractelement <16 x i16> %12, i32 0 961 ret i16 %13 962} 963 964define i8 @test_reduce_v32i8(<32 x i8> %a0) { 965; X86-SSE2-LABEL: test_reduce_v32i8: 966; X86-SSE2: ## %bb.0: 967; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 968; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 969; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 970; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 971; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 972; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 973; X86-SSE2-NEXT: psrld $16, %xmm1 974; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 975; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 976; X86-SSE2-NEXT: psrlw $8, %xmm0 977; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 978; X86-SSE2-NEXT: movd %xmm0, %eax 979; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 980; X86-SSE2-NEXT: retl 981; 982; X86-SSE42-LABEL: test_reduce_v32i8: 983; X86-SSE42: ## %bb.0: 984; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0 985; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 986; X86-SSE42-NEXT: pxor %xmm0, %xmm1 987; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 988; X86-SSE42-NEXT: psrlw $8, %xmm0 989; X86-SSE42-NEXT: pminub %xmm1, %xmm0 990; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 991; X86-SSE42-NEXT: movd %xmm0, %eax 992; X86-SSE42-NEXT: notb %al 993; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 994; X86-SSE42-NEXT: retl 995; 996; X86-AVX1-LABEL: test_reduce_v32i8: 997; X86-AVX1: ## %bb.0: 998; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 999; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1000; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1001; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1002; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1003; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1004; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1005; X86-AVX1-NEXT: vmovd %xmm0, %eax 1006; X86-AVX1-NEXT: notb %al 1007; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1008; X86-AVX1-NEXT: vzeroupper 1009; X86-AVX1-NEXT: retl 1010; 1011; X86-AVX2-LABEL: test_reduce_v32i8: 1012; X86-AVX2: ## %bb.0: 1013; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1014; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1015; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1016; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1017; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1018; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1019; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1020; X86-AVX2-NEXT: vmovd %xmm0, %eax 1021; X86-AVX2-NEXT: notb %al 1022; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1023; X86-AVX2-NEXT: vzeroupper 1024; X86-AVX2-NEXT: retl 1025; 1026; X64-SSE2-LABEL: test_reduce_v32i8: 1027; X64-SSE2: ## %bb.0: 1028; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1029; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1030; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1031; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1032; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1033; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1034; X64-SSE2-NEXT: psrld $16, %xmm1 1035; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1036; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1037; X64-SSE2-NEXT: psrlw $8, %xmm0 1038; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1039; X64-SSE2-NEXT: movd %xmm0, %eax 1040; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1041; X64-SSE2-NEXT: retq 1042; 1043; X64-SSE42-LABEL: test_reduce_v32i8: 1044; X64-SSE42: ## %bb.0: 1045; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0 1046; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1047; X64-SSE42-NEXT: pxor %xmm0, %xmm1 1048; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1049; X64-SSE42-NEXT: psrlw $8, %xmm0 1050; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1051; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1052; X64-SSE42-NEXT: movd %xmm0, %eax 1053; X64-SSE42-NEXT: notb %al 1054; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1055; X64-SSE42-NEXT: retq 1056; 1057; X64-AVX1-LABEL: test_reduce_v32i8: 1058; X64-AVX1: ## %bb.0: 1059; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1060; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1061; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1062; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1063; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1064; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1065; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1066; X64-AVX1-NEXT: vmovd %xmm0, %eax 1067; X64-AVX1-NEXT: notb %al 1068; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1069; X64-AVX1-NEXT: vzeroupper 1070; X64-AVX1-NEXT: retq 1071; 1072; X64-AVX2-LABEL: test_reduce_v32i8: 1073; X64-AVX2: ## %bb.0: 1074; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1075; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1076; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1077; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1078; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1079; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1080; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1081; X64-AVX2-NEXT: vmovd %xmm0, %eax 1082; X64-AVX2-NEXT: notb %al 1083; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1084; X64-AVX2-NEXT: vzeroupper 1085; X64-AVX2-NEXT: retq 1086; 1087; X64-AVX512-LABEL: test_reduce_v32i8: 1088; X64-AVX512: ## %bb.0: 1089; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1090; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1091; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1092; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1093; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1094; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1095; X64-AVX512-NEXT: vmovd %xmm0, %eax 1096; X64-AVX512-NEXT: notb %al 1097; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1098; X64-AVX512-NEXT: vzeroupper 1099; X64-AVX512-NEXT: retq 1100 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1101 %2 = icmp ugt <32 x i8> %a0, %1 1102 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1103 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1104 %5 = icmp ugt <32 x i8> %3, %4 1105 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1106 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1107 %8 = icmp ugt <32 x i8> %6, %7 1108 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1109 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1110 %11 = icmp ugt <32 x i8> %9, %10 1111 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1112 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1113 %14 = icmp ugt <32 x i8> %12, %13 1114 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1115 %16 = extractelement <32 x i8> %15, i32 0 1116 ret i8 %16 1117} 1118 1119; 1120; 512-bit Vectors 1121; 1122 1123define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1124; X86-SSE2-LABEL: test_reduce_v8i64: 1125; X86-SSE2: ## %bb.0: 1126; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1127; X86-SSE2-NEXT: movdqa %xmm2, %xmm5 1128; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1129; X86-SSE2-NEXT: movdqa %xmm0, %xmm6 1130; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1131; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1132; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1133; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1134; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1135; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1136; X86-SSE2-NEXT: pand %xmm5, %xmm6 1137; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1138; X86-SSE2-NEXT: por %xmm6, %xmm5 1139; X86-SSE2-NEXT: pand %xmm5, %xmm0 1140; X86-SSE2-NEXT: pandn %xmm2, %xmm5 1141; X86-SSE2-NEXT: por %xmm0, %xmm5 1142; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1143; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1144; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1145; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1146; X86-SSE2-NEXT: movdqa %xmm2, %xmm6 1147; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1148; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1149; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1150; X86-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] 1151; X86-SSE2-NEXT: pand %xmm0, %xmm7 1152; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1153; X86-SSE2-NEXT: por %xmm7, %xmm2 1154; X86-SSE2-NEXT: pand %xmm2, %xmm1 1155; X86-SSE2-NEXT: pandn %xmm3, %xmm2 1156; X86-SSE2-NEXT: por %xmm1, %xmm2 1157; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1158; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1159; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 1160; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1161; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 1162; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1163; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1164; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1165; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1166; X86-SSE2-NEXT: pand %xmm0, %xmm1 1167; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1168; X86-SSE2-NEXT: por %xmm1, %xmm0 1169; X86-SSE2-NEXT: pand %xmm0, %xmm5 1170; X86-SSE2-NEXT: pandn %xmm2, %xmm0 1171; X86-SSE2-NEXT: por %xmm5, %xmm0 1172; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1173; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1174; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1175; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1176; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1177; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1178; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1179; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1180; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1181; X86-SSE2-NEXT: pand %xmm2, %xmm4 1182; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1183; X86-SSE2-NEXT: por %xmm4, %xmm2 1184; X86-SSE2-NEXT: pand %xmm2, %xmm0 1185; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1186; X86-SSE2-NEXT: por %xmm0, %xmm2 1187; X86-SSE2-NEXT: movd %xmm2, %eax 1188; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1189; X86-SSE2-NEXT: movd %xmm0, %edx 1190; X86-SSE2-NEXT: retl 1191; 1192; X86-SSE42-LABEL: test_reduce_v8i64: 1193; X86-SSE42: ## %bb.0: 1194; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1195; X86-SSE42-NEXT: movdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648] 1196; X86-SSE42-NEXT: movdqa %xmm2, %xmm6 1197; X86-SSE42-NEXT: pxor %xmm5, %xmm6 1198; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1199; X86-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1200; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1201; X86-SSE42-NEXT: movdqa %xmm3, %xmm4 1202; X86-SSE42-NEXT: pxor %xmm5, %xmm4 1203; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1204; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1205; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1206; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1207; X86-SSE42-NEXT: movapd %xmm3, %xmm1 1208; X86-SSE42-NEXT: xorpd %xmm5, %xmm1 1209; X86-SSE42-NEXT: movapd %xmm2, %xmm0 1210; X86-SSE42-NEXT: xorpd %xmm5, %xmm0 1211; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1212; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1213; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1214; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1215; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1216; X86-SSE42-NEXT: pxor %xmm1, %xmm5 1217; X86-SSE42-NEXT: pcmpgtq %xmm5, %xmm0 1218; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1219; X86-SSE42-NEXT: movd %xmm1, %eax 1220; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1221; X86-SSE42-NEXT: retl 1222; 1223; X86-AVX1-LABEL: test_reduce_v8i64: 1224; X86-AVX1: ## %bb.0: 1225; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 1226; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 1227; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3 1228; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4 1229; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1230; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm3 1231; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1232; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm4 1233; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1234; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm5 1235; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 1236; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm0 1237; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1 1238; X86-AVX1-NEXT: vxorpd %xmm2, %xmm3, %xmm4 1239; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 1240; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm0, %xmm0 1241; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1242; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1243; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1244; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1245; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1246; X86-AVX1-NEXT: vmovd %xmm0, %eax 1247; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1248; X86-AVX1-NEXT: vzeroupper 1249; X86-AVX1-NEXT: retl 1250; 1251; X86-AVX2-LABEL: test_reduce_v8i64: 1252; X86-AVX2: ## %bb.0: 1253; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] 1254; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 1255; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm4 1256; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1257; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1258; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1259; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3 1260; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4 1261; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1262; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1263; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1264; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1265; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1266; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1267; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1268; X86-AVX2-NEXT: vmovd %xmm0, %eax 1269; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1270; X86-AVX2-NEXT: vzeroupper 1271; X86-AVX2-NEXT: retl 1272; 1273; X64-SSE2-LABEL: test_reduce_v8i64: 1274; X64-SSE2: ## %bb.0: 1275; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] 1276; X64-SSE2-NEXT: movdqa %xmm2, %xmm5 1277; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1278; X64-SSE2-NEXT: movdqa %xmm0, %xmm6 1279; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1280; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1281; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1282; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1283; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1284; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1285; X64-SSE2-NEXT: pand %xmm8, %xmm6 1286; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1287; X64-SSE2-NEXT: por %xmm6, %xmm5 1288; X64-SSE2-NEXT: pand %xmm5, %xmm0 1289; X64-SSE2-NEXT: pandn %xmm2, %xmm5 1290; X64-SSE2-NEXT: por %xmm0, %xmm5 1291; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1292; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1293; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1294; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1295; X64-SSE2-NEXT: movdqa %xmm2, %xmm6 1296; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1297; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1298; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1299; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 1300; X64-SSE2-NEXT: pand %xmm7, %xmm0 1301; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1302; X64-SSE2-NEXT: por %xmm0, %xmm2 1303; X64-SSE2-NEXT: pand %xmm2, %xmm1 1304; X64-SSE2-NEXT: pandn %xmm3, %xmm2 1305; X64-SSE2-NEXT: por %xmm1, %xmm2 1306; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1307; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1308; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1309; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1310; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 1311; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1312; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 1313; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1314; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1315; X64-SSE2-NEXT: pand %xmm6, %xmm0 1316; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1317; X64-SSE2-NEXT: por %xmm0, %xmm1 1318; X64-SSE2-NEXT: pand %xmm1, %xmm5 1319; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1320; X64-SSE2-NEXT: por %xmm5, %xmm1 1321; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1322; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1323; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1324; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1325; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1326; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1327; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1328; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1329; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1330; X64-SSE2-NEXT: pand %xmm5, %xmm2 1331; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1332; X64-SSE2-NEXT: por %xmm2, %xmm3 1333; X64-SSE2-NEXT: pand %xmm3, %xmm1 1334; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1335; X64-SSE2-NEXT: por %xmm1, %xmm3 1336; X64-SSE2-NEXT: movq %xmm3, %rax 1337; X64-SSE2-NEXT: retq 1338; 1339; X64-SSE42-LABEL: test_reduce_v8i64: 1340; X64-SSE42: ## %bb.0: 1341; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1342; X64-SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] 1343; X64-SSE42-NEXT: movdqa %xmm2, %xmm6 1344; X64-SSE42-NEXT: pxor %xmm5, %xmm6 1345; X64-SSE42-NEXT: pxor %xmm5, %xmm0 1346; X64-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1347; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1348; X64-SSE42-NEXT: movdqa %xmm3, %xmm4 1349; X64-SSE42-NEXT: pxor %xmm5, %xmm4 1350; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1351; X64-SSE42-NEXT: pxor %xmm5, %xmm0 1352; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1353; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1354; X64-SSE42-NEXT: movapd %xmm3, %xmm1 1355; X64-SSE42-NEXT: xorpd %xmm5, %xmm1 1356; X64-SSE42-NEXT: movapd %xmm2, %xmm0 1357; X64-SSE42-NEXT: xorpd %xmm5, %xmm0 1358; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1359; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1360; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1361; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1362; X64-SSE42-NEXT: pxor %xmm5, %xmm0 1363; X64-SSE42-NEXT: pxor %xmm1, %xmm5 1364; X64-SSE42-NEXT: pcmpgtq %xmm5, %xmm0 1365; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1366; X64-SSE42-NEXT: movq %xmm1, %rax 1367; X64-SSE42-NEXT: retq 1368; 1369; X64-AVX1-LABEL: test_reduce_v8i64: 1370; X64-AVX1: ## %bb.0: 1371; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1372; X64-AVX1-NEXT: ## xmm2 = mem[0,0] 1373; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 1374; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm4 1375; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1376; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm3 1377; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1378; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm4 1379; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1380; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm5 1381; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 1382; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm0 1383; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1 1384; X64-AVX1-NEXT: vxorpd %xmm2, %xmm3, %xmm4 1385; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 1386; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm0, %xmm0 1387; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1388; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1389; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1390; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1391; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1392; X64-AVX1-NEXT: vmovq %xmm0, %rax 1393; X64-AVX1-NEXT: vzeroupper 1394; X64-AVX1-NEXT: retq 1395; 1396; X64-AVX2-LABEL: test_reduce_v8i64: 1397; X64-AVX2: ## %bb.0: 1398; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1399; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 1400; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm4 1401; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1402; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1403; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1404; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3 1405; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4 1406; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1407; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1408; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1409; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1410; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1411; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1412; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1413; X64-AVX2-NEXT: vmovq %xmm0, %rax 1414; X64-AVX2-NEXT: vzeroupper 1415; X64-AVX2-NEXT: retq 1416; 1417; X64-AVX512-LABEL: test_reduce_v8i64: 1418; X64-AVX512: ## %bb.0: 1419; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1420; X64-AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 1421; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1422; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 1423; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1424; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 1425; X64-AVX512-NEXT: vmovq %xmm0, %rax 1426; X64-AVX512-NEXT: vzeroupper 1427; X64-AVX512-NEXT: retq 1428 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1429 %2 = icmp ugt <8 x i64> %a0, %1 1430 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1431 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1432 %5 = icmp ugt <8 x i64> %3, %4 1433 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1434 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1435 %8 = icmp ugt <8 x i64> %6, %7 1436 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1437 %10 = extractelement <8 x i64> %9, i32 0 1438 ret i64 %10 1439} 1440 1441define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1442; X86-SSE2-LABEL: test_reduce_v16i32: 1443; X86-SSE2: ## %bb.0: 1444; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1445; X86-SSE2-NEXT: movdqa %xmm2, %xmm6 1446; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1447; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 1448; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1449; X86-SSE2-NEXT: pcmpgtd %xmm6, %xmm5 1450; X86-SSE2-NEXT: pand %xmm5, %xmm0 1451; X86-SSE2-NEXT: pandn %xmm2, %xmm5 1452; X86-SSE2-NEXT: por %xmm0, %xmm5 1453; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 1454; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1455; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1456; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1457; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1458; X86-SSE2-NEXT: pand %xmm0, %xmm1 1459; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1460; X86-SSE2-NEXT: por %xmm1, %xmm0 1461; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1462; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1463; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 1464; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1465; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1466; X86-SSE2-NEXT: pand %xmm1, %xmm5 1467; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1468; X86-SSE2-NEXT: por %xmm5, %xmm1 1469; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 1470; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1471; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1472; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1473; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1474; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm0 1475; X86-SSE2-NEXT: pand %xmm0, %xmm1 1476; X86-SSE2-NEXT: pandn %xmm2, %xmm0 1477; X86-SSE2-NEXT: por %xmm1, %xmm0 1478; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1479; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1480; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1481; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1482; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm2 1483; X86-SSE2-NEXT: pand %xmm2, %xmm0 1484; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1485; X86-SSE2-NEXT: por %xmm0, %xmm2 1486; X86-SSE2-NEXT: movd %xmm2, %eax 1487; X86-SSE2-NEXT: retl 1488; 1489; X86-SSE42-LABEL: test_reduce_v16i32: 1490; X86-SSE42: ## %bb.0: 1491; X86-SSE42-NEXT: pmaxud %xmm3, %xmm1 1492; X86-SSE42-NEXT: pmaxud %xmm2, %xmm0 1493; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 1494; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1495; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1 1496; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1497; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 1498; X86-SSE42-NEXT: movd %xmm0, %eax 1499; X86-SSE42-NEXT: retl 1500; 1501; X86-AVX1-LABEL: test_reduce_v16i32: 1502; X86-AVX1: ## %bb.0: 1503; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1504; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1505; X86-AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 1506; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1507; X86-AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0 1508; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1509; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1510; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1511; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1512; X86-AVX1-NEXT: vmovd %xmm0, %eax 1513; X86-AVX1-NEXT: vzeroupper 1514; X86-AVX1-NEXT: retl 1515; 1516; X86-AVX2-LABEL: test_reduce_v16i32: 1517; X86-AVX2: ## %bb.0: 1518; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 1519; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1520; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1521; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1522; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1523; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1524; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1525; X86-AVX2-NEXT: vmovd %xmm0, %eax 1526; X86-AVX2-NEXT: vzeroupper 1527; X86-AVX2-NEXT: retl 1528; 1529; X64-SSE2-LABEL: test_reduce_v16i32: 1530; X64-SSE2: ## %bb.0: 1531; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1532; X64-SSE2-NEXT: movdqa %xmm2, %xmm6 1533; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1534; X64-SSE2-NEXT: movdqa %xmm0, %xmm5 1535; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1536; X64-SSE2-NEXT: pcmpgtd %xmm6, %xmm5 1537; X64-SSE2-NEXT: pand %xmm5, %xmm0 1538; X64-SSE2-NEXT: pandn %xmm2, %xmm5 1539; X64-SSE2-NEXT: por %xmm0, %xmm5 1540; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1541; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1542; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1543; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1544; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1545; X64-SSE2-NEXT: pand %xmm2, %xmm1 1546; X64-SSE2-NEXT: pandn %xmm3, %xmm2 1547; X64-SSE2-NEXT: por %xmm1, %xmm2 1548; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1549; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1550; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1551; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1552; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1553; X64-SSE2-NEXT: pand %xmm1, %xmm5 1554; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1555; X64-SSE2-NEXT: por %xmm5, %xmm1 1556; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1557; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1558; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1559; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 1560; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1561; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm2 1562; X64-SSE2-NEXT: pand %xmm2, %xmm1 1563; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1564; X64-SSE2-NEXT: por %xmm1, %xmm2 1565; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1566; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1567; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1568; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1569; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1570; X64-SSE2-NEXT: pand %xmm1, %xmm2 1571; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1572; X64-SSE2-NEXT: por %xmm2, %xmm1 1573; X64-SSE2-NEXT: movd %xmm1, %eax 1574; X64-SSE2-NEXT: retq 1575; 1576; X64-SSE42-LABEL: test_reduce_v16i32: 1577; X64-SSE42: ## %bb.0: 1578; X64-SSE42-NEXT: pmaxud %xmm3, %xmm1 1579; X64-SSE42-NEXT: pmaxud %xmm2, %xmm0 1580; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 1581; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1582; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1 1583; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1584; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 1585; X64-SSE42-NEXT: movd %xmm0, %eax 1586; X64-SSE42-NEXT: retq 1587; 1588; X64-AVX1-LABEL: test_reduce_v16i32: 1589; X64-AVX1: ## %bb.0: 1590; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1591; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1592; X64-AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 1593; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1594; X64-AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0 1595; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1596; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1597; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1598; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1599; X64-AVX1-NEXT: vmovd %xmm0, %eax 1600; X64-AVX1-NEXT: vzeroupper 1601; X64-AVX1-NEXT: retq 1602; 1603; X64-AVX2-LABEL: test_reduce_v16i32: 1604; X64-AVX2: ## %bb.0: 1605; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 1606; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1607; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1608; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1609; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1610; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1611; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1612; X64-AVX2-NEXT: vmovd %xmm0, %eax 1613; X64-AVX2-NEXT: vzeroupper 1614; X64-AVX2-NEXT: retq 1615; 1616; X64-AVX512-LABEL: test_reduce_v16i32: 1617; X64-AVX512: ## %bb.0: 1618; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1619; X64-AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0 1620; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1621; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1622; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1623; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1624; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1625; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1626; X64-AVX512-NEXT: vmovd %xmm0, %eax 1627; X64-AVX512-NEXT: vzeroupper 1628; X64-AVX512-NEXT: retq 1629 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1630 %2 = icmp ugt <16 x i32> %a0, %1 1631 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1632 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1633 %5 = icmp ugt <16 x i32> %3, %4 1634 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1635 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1636 %8 = icmp ugt <16 x i32> %6, %7 1637 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1638 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1639 %11 = icmp ugt <16 x i32> %9, %10 1640 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1641 %13 = extractelement <16 x i32> %12, i32 0 1642 ret i32 %13 1643} 1644 1645define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1646; X86-SSE2-LABEL: test_reduce_v32i16: 1647; X86-SSE2: ## %bb.0: 1648; X86-SSE2-NEXT: psubusw %xmm0, %xmm2 1649; X86-SSE2-NEXT: paddw %xmm0, %xmm2 1650; X86-SSE2-NEXT: psubusw %xmm1, %xmm3 1651; X86-SSE2-NEXT: paddw %xmm1, %xmm3 1652; X86-SSE2-NEXT: psubusw %xmm2, %xmm3 1653; X86-SSE2-NEXT: paddw %xmm2, %xmm3 1654; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 1655; X86-SSE2-NEXT: psubusw %xmm3, %xmm0 1656; X86-SSE2-NEXT: paddw %xmm3, %xmm0 1657; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1658; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 1659; X86-SSE2-NEXT: paddw %xmm0, %xmm1 1660; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1661; X86-SSE2-NEXT: psrld $16, %xmm0 1662; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 1663; X86-SSE2-NEXT: paddw %xmm1, %xmm0 1664; X86-SSE2-NEXT: movd %xmm0, %eax 1665; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1666; X86-SSE2-NEXT: retl 1667; 1668; X86-SSE42-LABEL: test_reduce_v32i16: 1669; X86-SSE42: ## %bb.0: 1670; X86-SSE42-NEXT: pmaxuw %xmm3, %xmm1 1671; X86-SSE42-NEXT: pmaxuw %xmm2, %xmm0 1672; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0 1673; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1674; X86-SSE42-NEXT: pxor %xmm0, %xmm1 1675; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1676; X86-SSE42-NEXT: movd %xmm0, %eax 1677; X86-SSE42-NEXT: notl %eax 1678; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1679; X86-SSE42-NEXT: retl 1680; 1681; X86-AVX1-LABEL: test_reduce_v32i16: 1682; X86-AVX1: ## %bb.0: 1683; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1684; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1685; X86-AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 1686; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1687; X86-AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0 1688; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1689; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1690; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1691; X86-AVX1-NEXT: vmovd %xmm0, %eax 1692; X86-AVX1-NEXT: notl %eax 1693; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1694; X86-AVX1-NEXT: vzeroupper 1695; X86-AVX1-NEXT: retl 1696; 1697; X86-AVX2-LABEL: test_reduce_v32i16: 1698; X86-AVX2: ## %bb.0: 1699; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 1700; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1701; X86-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1702; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1703; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1704; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1705; X86-AVX2-NEXT: vmovd %xmm0, %eax 1706; X86-AVX2-NEXT: notl %eax 1707; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1708; X86-AVX2-NEXT: vzeroupper 1709; X86-AVX2-NEXT: retl 1710; 1711; X64-SSE2-LABEL: test_reduce_v32i16: 1712; X64-SSE2: ## %bb.0: 1713; X64-SSE2-NEXT: psubusw %xmm0, %xmm2 1714; X64-SSE2-NEXT: paddw %xmm0, %xmm2 1715; X64-SSE2-NEXT: psubusw %xmm1, %xmm3 1716; X64-SSE2-NEXT: paddw %xmm1, %xmm3 1717; X64-SSE2-NEXT: psubusw %xmm2, %xmm3 1718; X64-SSE2-NEXT: paddw %xmm2, %xmm3 1719; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 1720; X64-SSE2-NEXT: psubusw %xmm3, %xmm0 1721; X64-SSE2-NEXT: paddw %xmm3, %xmm0 1722; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1723; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 1724; X64-SSE2-NEXT: paddw %xmm0, %xmm1 1725; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1726; X64-SSE2-NEXT: psrld $16, %xmm0 1727; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 1728; X64-SSE2-NEXT: paddw %xmm1, %xmm0 1729; X64-SSE2-NEXT: movd %xmm0, %eax 1730; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1731; X64-SSE2-NEXT: retq 1732; 1733; X64-SSE42-LABEL: test_reduce_v32i16: 1734; X64-SSE42: ## %bb.0: 1735; X64-SSE42-NEXT: pmaxuw %xmm3, %xmm1 1736; X64-SSE42-NEXT: pmaxuw %xmm2, %xmm0 1737; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0 1738; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1739; X64-SSE42-NEXT: pxor %xmm0, %xmm1 1740; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1741; X64-SSE42-NEXT: movd %xmm0, %eax 1742; X64-SSE42-NEXT: notl %eax 1743; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1744; X64-SSE42-NEXT: retq 1745; 1746; X64-AVX1-LABEL: test_reduce_v32i16: 1747; X64-AVX1: ## %bb.0: 1748; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1749; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1750; X64-AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 1751; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1752; X64-AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0 1753; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1754; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1755; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1756; X64-AVX1-NEXT: vmovd %xmm0, %eax 1757; X64-AVX1-NEXT: notl %eax 1758; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1759; X64-AVX1-NEXT: vzeroupper 1760; X64-AVX1-NEXT: retq 1761; 1762; X64-AVX2-LABEL: test_reduce_v32i16: 1763; X64-AVX2: ## %bb.0: 1764; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 1765; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1766; X64-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1767; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1768; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1769; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1770; X64-AVX2-NEXT: vmovd %xmm0, %eax 1771; X64-AVX2-NEXT: notl %eax 1772; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1773; X64-AVX2-NEXT: vzeroupper 1774; X64-AVX2-NEXT: retq 1775; 1776; X64-AVX512-LABEL: test_reduce_v32i16: 1777; X64-AVX512: ## %bb.0: 1778; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1779; X64-AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 1780; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1781; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1782; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1783; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1784; X64-AVX512-NEXT: vmovd %xmm0, %eax 1785; X64-AVX512-NEXT: notl %eax 1786; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1787; X64-AVX512-NEXT: vzeroupper 1788; X64-AVX512-NEXT: retq 1789 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1790 %2 = icmp ugt <32 x i16> %a0, %1 1791 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1792 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1793 %5 = icmp ugt <32 x i16> %3, %4 1794 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1795 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1796 %8 = icmp ugt <32 x i16> %6, %7 1797 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1798 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1799 %11 = icmp ugt <32 x i16> %9, %10 1800 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1801 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1802 %14 = icmp ugt <32 x i16> %12, %13 1803 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1804 %16 = extractelement <32 x i16> %15, i32 0 1805 ret i16 %16 1806} 1807 1808define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1809; X86-SSE2-LABEL: test_reduce_v64i8: 1810; X86-SSE2: ## %bb.0: 1811; X86-SSE2-NEXT: pmaxub %xmm3, %xmm1 1812; X86-SSE2-NEXT: pmaxub %xmm2, %xmm0 1813; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 1814; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1815; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 1816; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1817; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 1818; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1819; X86-SSE2-NEXT: psrld $16, %xmm1 1820; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 1821; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1822; X86-SSE2-NEXT: psrlw $8, %xmm0 1823; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 1824; X86-SSE2-NEXT: movd %xmm0, %eax 1825; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1826; X86-SSE2-NEXT: retl 1827; 1828; X86-SSE42-LABEL: test_reduce_v64i8: 1829; X86-SSE42: ## %bb.0: 1830; X86-SSE42-NEXT: pmaxub %xmm3, %xmm1 1831; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0 1832; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0 1833; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1834; X86-SSE42-NEXT: pxor %xmm0, %xmm1 1835; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1836; X86-SSE42-NEXT: psrlw $8, %xmm0 1837; X86-SSE42-NEXT: pminub %xmm1, %xmm0 1838; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1839; X86-SSE42-NEXT: movd %xmm0, %eax 1840; X86-SSE42-NEXT: notb %al 1841; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1842; X86-SSE42-NEXT: retl 1843; 1844; X86-AVX1-LABEL: test_reduce_v64i8: 1845; X86-AVX1: ## %bb.0: 1846; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1847; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1848; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 1849; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1850; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0 1851; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1852; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1853; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1854; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1855; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1856; X86-AVX1-NEXT: vmovd %xmm0, %eax 1857; X86-AVX1-NEXT: notb %al 1858; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1859; X86-AVX1-NEXT: vzeroupper 1860; X86-AVX1-NEXT: retl 1861; 1862; X86-AVX2-LABEL: test_reduce_v64i8: 1863; X86-AVX2: ## %bb.0: 1864; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 1865; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1866; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1867; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1868; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1869; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1870; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1871; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1872; X86-AVX2-NEXT: vmovd %xmm0, %eax 1873; X86-AVX2-NEXT: notb %al 1874; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1875; X86-AVX2-NEXT: vzeroupper 1876; X86-AVX2-NEXT: retl 1877; 1878; X64-SSE2-LABEL: test_reduce_v64i8: 1879; X64-SSE2: ## %bb.0: 1880; X64-SSE2-NEXT: pmaxub %xmm3, %xmm1 1881; X64-SSE2-NEXT: pmaxub %xmm2, %xmm0 1882; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1883; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1884; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1885; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1886; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1887; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1888; X64-SSE2-NEXT: psrld $16, %xmm1 1889; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1890; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1891; X64-SSE2-NEXT: psrlw $8, %xmm0 1892; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1893; X64-SSE2-NEXT: movd %xmm0, %eax 1894; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1895; X64-SSE2-NEXT: retq 1896; 1897; X64-SSE42-LABEL: test_reduce_v64i8: 1898; X64-SSE42: ## %bb.0: 1899; X64-SSE42-NEXT: pmaxub %xmm3, %xmm1 1900; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0 1901; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0 1902; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1903; X64-SSE42-NEXT: pxor %xmm0, %xmm1 1904; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1905; X64-SSE42-NEXT: psrlw $8, %xmm0 1906; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1907; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1908; X64-SSE42-NEXT: movd %xmm0, %eax 1909; X64-SSE42-NEXT: notb %al 1910; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1911; X64-SSE42-NEXT: retq 1912; 1913; X64-AVX1-LABEL: test_reduce_v64i8: 1914; X64-AVX1: ## %bb.0: 1915; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1916; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1917; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 1918; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1919; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0 1920; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1921; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1922; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1923; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1924; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1925; X64-AVX1-NEXT: vmovd %xmm0, %eax 1926; X64-AVX1-NEXT: notb %al 1927; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1928; X64-AVX1-NEXT: vzeroupper 1929; X64-AVX1-NEXT: retq 1930; 1931; X64-AVX2-LABEL: test_reduce_v64i8: 1932; X64-AVX2: ## %bb.0: 1933; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 1934; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1935; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1936; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1937; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1938; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1939; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1940; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1941; X64-AVX2-NEXT: vmovd %xmm0, %eax 1942; X64-AVX2-NEXT: notb %al 1943; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1944; X64-AVX2-NEXT: vzeroupper 1945; X64-AVX2-NEXT: retq 1946; 1947; X64-AVX512-LABEL: test_reduce_v64i8: 1948; X64-AVX512: ## %bb.0: 1949; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1950; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 1951; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1952; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1953; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1954; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1955; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1956; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1957; X64-AVX512-NEXT: vmovd %xmm0, %eax 1958; X64-AVX512-NEXT: notb %al 1959; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1960; X64-AVX512-NEXT: vzeroupper 1961; X64-AVX512-NEXT: retq 1962 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1963 %2 = icmp ugt <64 x i8> %a0, %1 1964 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1965 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1966 %5 = icmp ugt <64 x i8> %3, %4 1967 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1968 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1969 %8 = icmp ugt <64 x i8> %6, %7 1970 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1971 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1972 %11 = icmp ugt <64 x i8> %9, %10 1973 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1974 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1975 %14 = icmp ugt <64 x i8> %12, %13 1976 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1977 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1978 %17 = icmp ugt <64 x i8> %15, %16 1979 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1980 %19 = extractelement <64 x i8> %18, i32 0 1981 ret i8 %19 1982} 1983 1984; 1985; Partial Vector Reductions 1986; 1987 1988define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1989; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1990; X86-SSE2: ## %bb.0: 1991; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1992; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 1993; X86-SSE2-NEXT: paddw %xmm0, %xmm1 1994; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1995; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 1996; X86-SSE2-NEXT: paddw %xmm1, %xmm0 1997; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1998; X86-SSE2-NEXT: psrld $16, %xmm1 1999; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 2000; X86-SSE2-NEXT: paddw %xmm0, %xmm1 2001; X86-SSE2-NEXT: movd %xmm1, %eax 2002; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2003; X86-SSE2-NEXT: retl 2004; 2005; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 2006; X86-SSE42: ## %bb.0: 2007; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2008; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2009; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2010; X86-SSE42-NEXT: movd %xmm0, %eax 2011; X86-SSE42-NEXT: notl %eax 2012; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2013; X86-SSE42-NEXT: retl 2014; 2015; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 2016; X86-AVX: ## %bb.0: 2017; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2018; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2019; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2020; X86-AVX-NEXT: vmovd %xmm0, %eax 2021; X86-AVX-NEXT: notl %eax 2022; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 2023; X86-AVX-NEXT: vzeroupper 2024; X86-AVX-NEXT: retl 2025; 2026; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 2027; X64-SSE2: ## %bb.0: 2028; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2029; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2030; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2031; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2032; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 2033; X64-SSE2-NEXT: paddw %xmm1, %xmm0 2034; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2035; X64-SSE2-NEXT: psrld $16, %xmm1 2036; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2037; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2038; X64-SSE2-NEXT: movd %xmm1, %eax 2039; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2040; X64-SSE2-NEXT: retq 2041; 2042; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 2043; X64-SSE42: ## %bb.0: 2044; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2045; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2046; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2047; X64-SSE42-NEXT: movd %xmm0, %eax 2048; X64-SSE42-NEXT: notl %eax 2049; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2050; X64-SSE42-NEXT: retq 2051; 2052; X64-AVX1-LABEL: test_reduce_v16i16_v8i16: 2053; X64-AVX1: ## %bb.0: 2054; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2055; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2056; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2057; X64-AVX1-NEXT: vmovd %xmm0, %eax 2058; X64-AVX1-NEXT: notl %eax 2059; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 2060; X64-AVX1-NEXT: vzeroupper 2061; X64-AVX1-NEXT: retq 2062; 2063; X64-AVX2-LABEL: test_reduce_v16i16_v8i16: 2064; X64-AVX2: ## %bb.0: 2065; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2066; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2067; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2068; X64-AVX2-NEXT: vmovd %xmm0, %eax 2069; X64-AVX2-NEXT: notl %eax 2070; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 2071; X64-AVX2-NEXT: vzeroupper 2072; X64-AVX2-NEXT: retq 2073; 2074; X64-AVX512-LABEL: test_reduce_v16i16_v8i16: 2075; X64-AVX512: ## %bb.0: 2076; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2077; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2078; X64-AVX512-NEXT: vmovd %xmm0, %eax 2079; X64-AVX512-NEXT: notl %eax 2080; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 2081; X64-AVX512-NEXT: vzeroupper 2082; X64-AVX512-NEXT: retq 2083 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2084 %2 = icmp ugt <16 x i16> %a0, %1 2085 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 2086 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2087 %5 = icmp ugt <16 x i16> %3, %4 2088 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 2089 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2090 %8 = icmp ugt <16 x i16> %6, %7 2091 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 2092 %10 = extractelement <16 x i16> %9, i32 0 2093 ret i16 %10 2094} 2095 2096define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 2097; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 2098; X86-SSE2: ## %bb.0: 2099; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2100; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 2101; X86-SSE2-NEXT: paddw %xmm0, %xmm1 2102; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2103; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 2104; X86-SSE2-NEXT: paddw %xmm1, %xmm0 2105; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2106; X86-SSE2-NEXT: psrld $16, %xmm1 2107; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 2108; X86-SSE2-NEXT: paddw %xmm0, %xmm1 2109; X86-SSE2-NEXT: movd %xmm1, %eax 2110; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2111; X86-SSE2-NEXT: retl 2112; 2113; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 2114; X86-SSE42: ## %bb.0: 2115; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2116; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2117; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2118; X86-SSE42-NEXT: movd %xmm0, %eax 2119; X86-SSE42-NEXT: notl %eax 2120; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2121; X86-SSE42-NEXT: retl 2122; 2123; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 2124; X86-AVX: ## %bb.0: 2125; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2126; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2127; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2128; X86-AVX-NEXT: vmovd %xmm0, %eax 2129; X86-AVX-NEXT: notl %eax 2130; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 2131; X86-AVX-NEXT: vzeroupper 2132; X86-AVX-NEXT: retl 2133; 2134; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 2135; X64-SSE2: ## %bb.0: 2136; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2137; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2138; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2139; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2140; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 2141; X64-SSE2-NEXT: paddw %xmm1, %xmm0 2142; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2143; X64-SSE2-NEXT: psrld $16, %xmm1 2144; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2145; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2146; X64-SSE2-NEXT: movd %xmm1, %eax 2147; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2148; X64-SSE2-NEXT: retq 2149; 2150; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 2151; X64-SSE42: ## %bb.0: 2152; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2153; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2154; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2155; X64-SSE42-NEXT: movd %xmm0, %eax 2156; X64-SSE42-NEXT: notl %eax 2157; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2158; X64-SSE42-NEXT: retq 2159; 2160; X64-AVX1-LABEL: test_reduce_v32i16_v8i16: 2161; X64-AVX1: ## %bb.0: 2162; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2163; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2164; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2165; X64-AVX1-NEXT: vmovd %xmm0, %eax 2166; X64-AVX1-NEXT: notl %eax 2167; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 2168; X64-AVX1-NEXT: vzeroupper 2169; X64-AVX1-NEXT: retq 2170; 2171; X64-AVX2-LABEL: test_reduce_v32i16_v8i16: 2172; X64-AVX2: ## %bb.0: 2173; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2174; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2175; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2176; X64-AVX2-NEXT: vmovd %xmm0, %eax 2177; X64-AVX2-NEXT: notl %eax 2178; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 2179; X64-AVX2-NEXT: vzeroupper 2180; X64-AVX2-NEXT: retq 2181; 2182; X64-AVX512-LABEL: test_reduce_v32i16_v8i16: 2183; X64-AVX512: ## %bb.0: 2184; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2185; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2186; X64-AVX512-NEXT: vmovd %xmm0, %eax 2187; X64-AVX512-NEXT: notl %eax 2188; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 2189; X64-AVX512-NEXT: vzeroupper 2190; X64-AVX512-NEXT: retq 2191 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2192 %2 = icmp ugt <32 x i16> %a0, %1 2193 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 2194 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2195 %5 = icmp ugt <32 x i16> %3, %4 2196 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 2197 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2198 %8 = icmp ugt <32 x i16> %6, %7 2199 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 2200 %10 = extractelement <32 x i16> %9, i32 0 2201 ret i16 %10 2202} 2203 2204define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 2205; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 2206; X86-SSE2: ## %bb.0: 2207; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2208; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2209; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2210; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2211; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2212; X86-SSE2-NEXT: psrld $16, %xmm1 2213; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2214; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2215; X86-SSE2-NEXT: psrlw $8, %xmm0 2216; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2217; X86-SSE2-NEXT: movd %xmm0, %eax 2218; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2219; X86-SSE2-NEXT: retl 2220; 2221; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2222; X86-SSE42: ## %bb.0: 2223; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2224; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2225; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 2226; X86-SSE42-NEXT: psrlw $8, %xmm0 2227; X86-SSE42-NEXT: pminub %xmm1, %xmm0 2228; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 2229; X86-SSE42-NEXT: movd %xmm0, %eax 2230; X86-SSE42-NEXT: notb %al 2231; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2232; X86-SSE42-NEXT: retl 2233; 2234; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2235; X86-AVX: ## %bb.0: 2236; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2237; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2238; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2239; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2240; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2241; X86-AVX-NEXT: vmovd %xmm0, %eax 2242; X86-AVX-NEXT: notb %al 2243; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2244; X86-AVX-NEXT: vzeroupper 2245; X86-AVX-NEXT: retl 2246; 2247; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2248; X64-SSE2: ## %bb.0: 2249; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2250; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2251; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2252; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2253; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2254; X64-SSE2-NEXT: psrld $16, %xmm1 2255; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2256; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2257; X64-SSE2-NEXT: psrlw $8, %xmm0 2258; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2259; X64-SSE2-NEXT: movd %xmm0, %eax 2260; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2261; X64-SSE2-NEXT: retq 2262; 2263; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2264; X64-SSE42: ## %bb.0: 2265; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2266; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2267; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 2268; X64-SSE42-NEXT: psrlw $8, %xmm0 2269; X64-SSE42-NEXT: pminub %xmm1, %xmm0 2270; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 2271; X64-SSE42-NEXT: movd %xmm0, %eax 2272; X64-SSE42-NEXT: notb %al 2273; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2274; X64-SSE42-NEXT: retq 2275; 2276; X64-AVX1-LABEL: test_reduce_v32i8_v16i8: 2277; X64-AVX1: ## %bb.0: 2278; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2279; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2280; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 2281; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 2282; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2283; X64-AVX1-NEXT: vmovd %xmm0, %eax 2284; X64-AVX1-NEXT: notb %al 2285; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 2286; X64-AVX1-NEXT: vzeroupper 2287; X64-AVX1-NEXT: retq 2288; 2289; X64-AVX2-LABEL: test_reduce_v32i8_v16i8: 2290; X64-AVX2: ## %bb.0: 2291; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2292; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2293; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 2294; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2295; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2296; X64-AVX2-NEXT: vmovd %xmm0, %eax 2297; X64-AVX2-NEXT: notb %al 2298; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 2299; X64-AVX2-NEXT: vzeroupper 2300; X64-AVX2-NEXT: retq 2301; 2302; X64-AVX512-LABEL: test_reduce_v32i8_v16i8: 2303; X64-AVX512: ## %bb.0: 2304; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2305; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2306; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2307; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2308; X64-AVX512-NEXT: vmovd %xmm0, %eax 2309; X64-AVX512-NEXT: notb %al 2310; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2311; X64-AVX512-NEXT: vzeroupper 2312; X64-AVX512-NEXT: retq 2313 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2314 %2 = icmp ugt <32 x i8> %a0, %1 2315 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2316 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2317 %5 = icmp ugt <32 x i8> %3, %4 2318 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2319 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2320 %8 = icmp ugt <32 x i8> %6, %7 2321 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2322 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2323 %11 = icmp ugt <32 x i8> %9, %10 2324 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2325 %13 = extractelement <32 x i8> %12, i32 0 2326 ret i8 %13 2327} 2328 2329define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2330; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2331; X86-SSE2: ## %bb.0: 2332; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2333; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2334; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2335; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2336; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2337; X86-SSE2-NEXT: psrld $16, %xmm1 2338; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2339; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2340; X86-SSE2-NEXT: psrlw $8, %xmm0 2341; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2342; X86-SSE2-NEXT: movd %xmm0, %eax 2343; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2344; X86-SSE2-NEXT: retl 2345; 2346; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2347; X86-SSE42: ## %bb.0: 2348; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2349; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2350; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 2351; X86-SSE42-NEXT: psrlw $8, %xmm0 2352; X86-SSE42-NEXT: pminub %xmm1, %xmm0 2353; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 2354; X86-SSE42-NEXT: movd %xmm0, %eax 2355; X86-SSE42-NEXT: notb %al 2356; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2357; X86-SSE42-NEXT: retl 2358; 2359; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2360; X86-AVX: ## %bb.0: 2361; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2362; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2363; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2364; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2365; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2366; X86-AVX-NEXT: vmovd %xmm0, %eax 2367; X86-AVX-NEXT: notb %al 2368; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2369; X86-AVX-NEXT: vzeroupper 2370; X86-AVX-NEXT: retl 2371; 2372; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2373; X64-SSE2: ## %bb.0: 2374; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2375; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2376; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2377; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2378; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2379; X64-SSE2-NEXT: psrld $16, %xmm1 2380; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2381; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2382; X64-SSE2-NEXT: psrlw $8, %xmm0 2383; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2384; X64-SSE2-NEXT: movd %xmm0, %eax 2385; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2386; X64-SSE2-NEXT: retq 2387; 2388; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2389; X64-SSE42: ## %bb.0: 2390; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2391; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2392; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 2393; X64-SSE42-NEXT: psrlw $8, %xmm0 2394; X64-SSE42-NEXT: pminub %xmm1, %xmm0 2395; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 2396; X64-SSE42-NEXT: movd %xmm0, %eax 2397; X64-SSE42-NEXT: notb %al 2398; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2399; X64-SSE42-NEXT: retq 2400; 2401; X64-AVX1-LABEL: test_reduce_v64i8_v16i8: 2402; X64-AVX1: ## %bb.0: 2403; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2404; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2405; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 2406; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 2407; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2408; X64-AVX1-NEXT: vmovd %xmm0, %eax 2409; X64-AVX1-NEXT: notb %al 2410; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 2411; X64-AVX1-NEXT: vzeroupper 2412; X64-AVX1-NEXT: retq 2413; 2414; X64-AVX2-LABEL: test_reduce_v64i8_v16i8: 2415; X64-AVX2: ## %bb.0: 2416; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2417; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2418; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 2419; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2420; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2421; X64-AVX2-NEXT: vmovd %xmm0, %eax 2422; X64-AVX2-NEXT: notb %al 2423; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 2424; X64-AVX2-NEXT: vzeroupper 2425; X64-AVX2-NEXT: retq 2426; 2427; X64-AVX512-LABEL: test_reduce_v64i8_v16i8: 2428; X64-AVX512: ## %bb.0: 2429; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2430; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2431; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2432; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2433; X64-AVX512-NEXT: vmovd %xmm0, %eax 2434; X64-AVX512-NEXT: notb %al 2435; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2436; X64-AVX512-NEXT: vzeroupper 2437; X64-AVX512-NEXT: retq 2438 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2439 %2 = icmp ugt <64 x i8> %a0, %1 2440 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2441 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2442 %5 = icmp ugt <64 x i8> %3, %4 2443 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2444 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2445 %8 = icmp ugt <64 x i8> %6, %7 2446 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2447 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2448 %11 = icmp ugt <64 x i8> %9, %10 2449 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2450 %13 = extractelement <64 x i8> %12, i32 0 2451 ret i8 %13 2452} 2453