1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648] 45; X86-SSE42-NEXT: movdqa %xmm1, %xmm3 46; X86-SSE42-NEXT: pxor %xmm0, %xmm3 47; X86-SSE42-NEXT: pxor %xmm2, %xmm0 48; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 49; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 50; X86-SSE42-NEXT: movd %xmm2, %eax 51; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 52; X86-SSE42-NEXT: retl 53; 54; X86-AVX1-LABEL: test_reduce_v2i64: 55; X86-AVX1: ## %bb.0: 56; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 57; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 58; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 59; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3 60; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2 61; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 62; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 63; X86-AVX1-NEXT: vmovd %xmm0, %eax 64; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 65; X86-AVX1-NEXT: retl 66; 67; X86-AVX2-LABEL: test_reduce_v2i64: 68; X86-AVX2: ## %bb.0: 69; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 70; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 71; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 72; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 73; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 74; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 75; X86-AVX2-NEXT: vmovd %xmm0, %eax 76; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 77; X86-AVX2-NEXT: retl 78; 79; X64-SSE2-LABEL: test_reduce_v2i64: 80; X64-SSE2: ## %bb.0: 81; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 82; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 83; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 84; X64-SSE2-NEXT: pxor %xmm2, %xmm3 85; X64-SSE2-NEXT: pxor %xmm1, %xmm2 86; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 87; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 88; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 89; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 90; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 91; X64-SSE2-NEXT: pand %xmm5, %xmm2 92; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 93; X64-SSE2-NEXT: por %xmm2, %xmm3 94; X64-SSE2-NEXT: pand %xmm3, %xmm0 95; X64-SSE2-NEXT: pandn %xmm1, %xmm3 96; X64-SSE2-NEXT: por %xmm0, %xmm3 97; X64-SSE2-NEXT: movq %xmm3, %rax 98; X64-SSE2-NEXT: retq 99; 100; X64-SSE42-LABEL: test_reduce_v2i64: 101; X64-SSE42: ## %bb.0: 102; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 103; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 104; X64-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 105; X64-SSE42-NEXT: movdqa %xmm1, %xmm3 106; X64-SSE42-NEXT: pxor %xmm0, %xmm3 107; X64-SSE42-NEXT: pxor %xmm2, %xmm0 108; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 109; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 110; X64-SSE42-NEXT: movq %xmm2, %rax 111; X64-SSE42-NEXT: retq 112; 113; X64-AVX1-LABEL: test_reduce_v2i64: 114; X64-AVX1: ## %bb.0: 115; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 116; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 117; X64-AVX1-NEXT: ## xmm2 = mem[0,0] 118; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 119; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 120; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 121; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 122; X64-AVX1-NEXT: vmovq %xmm0, %rax 123; X64-AVX1-NEXT: retq 124; 125; X64-AVX2-LABEL: test_reduce_v2i64: 126; X64-AVX2: ## %bb.0: 127; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 128; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 129; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 130; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 131; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 132; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 133; X64-AVX2-NEXT: vmovq %xmm0, %rax 134; X64-AVX2-NEXT: retq 135; 136; X64-AVX512-LABEL: test_reduce_v2i64: 137; X64-AVX512: ## %bb.0: 138; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 139; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 140; X64-AVX512-NEXT: vmovq %xmm0, %rax 141; X64-AVX512-NEXT: retq 142 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 143 %2 = icmp ult <2 x i64> %a0, %1 144 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 145 %4 = extractelement <2 x i64> %3, i32 0 146 ret i64 %4 147} 148 149define i32 @test_reduce_v4i32(<4 x i32> %a0) { 150; X86-SSE2-LABEL: test_reduce_v4i32: 151; X86-SSE2: ## %bb.0: 152; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] 153; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 154; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 155; X86-SSE2-NEXT: pxor %xmm1, %xmm4 156; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 157; X86-SSE2-NEXT: pxor %xmm1, %xmm2 158; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm2 159; X86-SSE2-NEXT: pand %xmm2, %xmm0 160; X86-SSE2-NEXT: pandn %xmm3, %xmm2 161; X86-SSE2-NEXT: por %xmm0, %xmm2 162; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 163; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 164; X86-SSE2-NEXT: pxor %xmm1, %xmm3 165; X86-SSE2-NEXT: pxor %xmm0, %xmm1 166; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm1 167; X86-SSE2-NEXT: pand %xmm1, %xmm2 168; X86-SSE2-NEXT: pandn %xmm0, %xmm1 169; X86-SSE2-NEXT: por %xmm2, %xmm1 170; X86-SSE2-NEXT: movd %xmm1, %eax 171; X86-SSE2-NEXT: retl 172; 173; X86-SSE42-LABEL: test_reduce_v4i32: 174; X86-SSE42: ## %bb.0: 175; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 176; X86-SSE42-NEXT: pminud %xmm0, %xmm1 177; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 178; X86-SSE42-NEXT: pminud %xmm1, %xmm0 179; X86-SSE42-NEXT: movd %xmm0, %eax 180; X86-SSE42-NEXT: retl 181; 182; X86-AVX-LABEL: test_reduce_v4i32: 183; X86-AVX: ## %bb.0: 184; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 185; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 186; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 187; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 188; X86-AVX-NEXT: vmovd %xmm0, %eax 189; X86-AVX-NEXT: retl 190; 191; X64-SSE2-LABEL: test_reduce_v4i32: 192; X64-SSE2: ## %bb.0: 193; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 194; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 195; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 196; X64-SSE2-NEXT: pxor %xmm2, %xmm3 197; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 198; X64-SSE2-NEXT: pxor %xmm2, %xmm4 199; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 200; X64-SSE2-NEXT: pand %xmm4, %xmm0 201; X64-SSE2-NEXT: pandn %xmm1, %xmm4 202; X64-SSE2-NEXT: por %xmm0, %xmm4 203; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,1,1] 204; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 205; X64-SSE2-NEXT: pxor %xmm2, %xmm1 206; X64-SSE2-NEXT: pxor %xmm0, %xmm2 207; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 208; X64-SSE2-NEXT: pand %xmm2, %xmm4 209; X64-SSE2-NEXT: pandn %xmm0, %xmm2 210; X64-SSE2-NEXT: por %xmm4, %xmm2 211; X64-SSE2-NEXT: movd %xmm2, %eax 212; X64-SSE2-NEXT: retq 213; 214; X64-SSE42-LABEL: test_reduce_v4i32: 215; X64-SSE42: ## %bb.0: 216; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 217; X64-SSE42-NEXT: pminud %xmm0, %xmm1 218; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 219; X64-SSE42-NEXT: pminud %xmm1, %xmm0 220; X64-SSE42-NEXT: movd %xmm0, %eax 221; X64-SSE42-NEXT: retq 222; 223; X64-AVX-LABEL: test_reduce_v4i32: 224; X64-AVX: ## %bb.0: 225; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 226; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 227; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 228; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 229; X64-AVX-NEXT: vmovd %xmm0, %eax 230; X64-AVX-NEXT: retq 231 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 232 %2 = icmp ult <4 x i32> %a0, %1 233 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 234 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 235 %5 = icmp ult <4 x i32> %3, %4 236 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 237 %7 = extractelement <4 x i32> %6, i32 0 238 ret i32 %7 239} 240 241define i16 @test_reduce_v8i16(<8 x i16> %a0) { 242; X86-SSE2-LABEL: test_reduce_v8i16: 243; X86-SSE2: ## %bb.0: 244; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 245; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 246; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 247; X86-SSE2-NEXT: psubw %xmm2, %xmm0 248; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 249; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 250; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 251; X86-SSE2-NEXT: psubw %xmm2, %xmm0 252; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 253; X86-SSE2-NEXT: psrld $16, %xmm1 254; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 255; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 256; X86-SSE2-NEXT: psubw %xmm2, %xmm0 257; X86-SSE2-NEXT: movd %xmm0, %eax 258; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 259; X86-SSE2-NEXT: retl 260; 261; X86-SSE42-LABEL: test_reduce_v8i16: 262; X86-SSE42: ## %bb.0: 263; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 264; X86-SSE42-NEXT: movd %xmm0, %eax 265; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 266; X86-SSE42-NEXT: retl 267; 268; X86-AVX-LABEL: test_reduce_v8i16: 269; X86-AVX: ## %bb.0: 270; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 271; X86-AVX-NEXT: vmovd %xmm0, %eax 272; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 273; X86-AVX-NEXT: retl 274; 275; X64-SSE2-LABEL: test_reduce_v8i16: 276; X64-SSE2: ## %bb.0: 277; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 278; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 279; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 280; X64-SSE2-NEXT: psubw %xmm2, %xmm0 281; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 282; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 283; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 284; X64-SSE2-NEXT: psubw %xmm2, %xmm0 285; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 286; X64-SSE2-NEXT: psrld $16, %xmm1 287; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 288; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 289; X64-SSE2-NEXT: psubw %xmm2, %xmm0 290; X64-SSE2-NEXT: movd %xmm0, %eax 291; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 292; X64-SSE2-NEXT: retq 293; 294; X64-SSE42-LABEL: test_reduce_v8i16: 295; X64-SSE42: ## %bb.0: 296; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 297; X64-SSE42-NEXT: movd %xmm0, %eax 298; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 299; X64-SSE42-NEXT: retq 300; 301; X64-AVX-LABEL: test_reduce_v8i16: 302; X64-AVX: ## %bb.0: 303; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 304; X64-AVX-NEXT: vmovd %xmm0, %eax 305; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 306; X64-AVX-NEXT: retq 307 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 308 %2 = icmp ult <8 x i16> %a0, %1 309 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 310 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 311 %5 = icmp ult <8 x i16> %3, %4 312 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 313 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 314 %8 = icmp ult <8 x i16> %6, %7 315 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 316 %10 = extractelement <8 x i16> %9, i32 0 317 ret i16 %10 318} 319 320define i8 @test_reduce_v16i8(<16 x i8> %a0) { 321; X86-SSE2-LABEL: test_reduce_v16i8: 322; X86-SSE2: ## %bb.0: 323; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 324; X86-SSE2-NEXT: pminub %xmm0, %xmm1 325; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 326; X86-SSE2-NEXT: pminub %xmm1, %xmm0 327; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 328; X86-SSE2-NEXT: psrld $16, %xmm1 329; X86-SSE2-NEXT: pminub %xmm0, %xmm1 330; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 331; X86-SSE2-NEXT: psrlw $8, %xmm0 332; X86-SSE2-NEXT: pminub %xmm1, %xmm0 333; X86-SSE2-NEXT: movd %xmm0, %eax 334; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 335; X86-SSE2-NEXT: retl 336; 337; X86-SSE42-LABEL: test_reduce_v16i8: 338; X86-SSE42: ## %bb.0: 339; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 340; X86-SSE42-NEXT: psrlw $8, %xmm1 341; X86-SSE42-NEXT: pminub %xmm0, %xmm1 342; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 343; X86-SSE42-NEXT: movd %xmm0, %eax 344; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 345; X86-SSE42-NEXT: retl 346; 347; X86-AVX-LABEL: test_reduce_v16i8: 348; X86-AVX: ## %bb.0: 349; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 350; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 351; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 352; X86-AVX-NEXT: vmovd %xmm0, %eax 353; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 354; X86-AVX-NEXT: retl 355; 356; X64-SSE2-LABEL: test_reduce_v16i8: 357; X64-SSE2: ## %bb.0: 358; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 359; X64-SSE2-NEXT: pminub %xmm0, %xmm1 360; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 361; X64-SSE2-NEXT: pminub %xmm1, %xmm0 362; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 363; X64-SSE2-NEXT: psrld $16, %xmm1 364; X64-SSE2-NEXT: pminub %xmm0, %xmm1 365; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 366; X64-SSE2-NEXT: psrlw $8, %xmm0 367; X64-SSE2-NEXT: pminub %xmm1, %xmm0 368; X64-SSE2-NEXT: movd %xmm0, %eax 369; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 370; X64-SSE2-NEXT: retq 371; 372; X64-SSE42-LABEL: test_reduce_v16i8: 373; X64-SSE42: ## %bb.0: 374; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 375; X64-SSE42-NEXT: psrlw $8, %xmm1 376; X64-SSE42-NEXT: pminub %xmm0, %xmm1 377; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 378; X64-SSE42-NEXT: movd %xmm0, %eax 379; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 380; X64-SSE42-NEXT: retq 381; 382; X64-AVX-LABEL: test_reduce_v16i8: 383; X64-AVX: ## %bb.0: 384; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 385; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 386; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 387; X64-AVX-NEXT: vmovd %xmm0, %eax 388; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 389; X64-AVX-NEXT: retq 390 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 391 %2 = icmp ult <16 x i8> %a0, %1 392 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 393 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 394 %5 = icmp ult <16 x i8> %3, %4 395 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 396 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 397 %8 = icmp ult <16 x i8> %6, %7 398 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 399 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 400 %11 = icmp ult <16 x i8> %9, %10 401 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 402 %13 = extractelement <16 x i8> %12, i32 0 403 ret i8 %13 404} 405 406; 407; 256-bit Vectors 408; 409 410define i64 @test_reduce_v4i64(<4 x i64> %a0) { 411; X86-SSE2-LABEL: test_reduce_v4i64: 412; X86-SSE2: ## %bb.0: 413; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 414; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 415; X86-SSE2-NEXT: pxor %xmm2, %xmm3 416; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 417; X86-SSE2-NEXT: pxor %xmm2, %xmm4 418; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 419; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 420; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 421; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 422; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 423; X86-SSE2-NEXT: pand %xmm6, %xmm4 424; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 425; X86-SSE2-NEXT: por %xmm4, %xmm3 426; X86-SSE2-NEXT: pand %xmm3, %xmm0 427; X86-SSE2-NEXT: pandn %xmm1, %xmm3 428; X86-SSE2-NEXT: por %xmm0, %xmm3 429; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 430; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 431; X86-SSE2-NEXT: pxor %xmm2, %xmm1 432; X86-SSE2-NEXT: pxor %xmm0, %xmm2 433; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 434; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm4 435; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 436; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 437; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 438; X86-SSE2-NEXT: pand %xmm5, %xmm1 439; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 440; X86-SSE2-NEXT: por %xmm1, %xmm2 441; X86-SSE2-NEXT: pand %xmm2, %xmm3 442; X86-SSE2-NEXT: pandn %xmm0, %xmm2 443; X86-SSE2-NEXT: por %xmm3, %xmm2 444; X86-SSE2-NEXT: movd %xmm2, %eax 445; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 446; X86-SSE2-NEXT: movd %xmm0, %edx 447; X86-SSE2-NEXT: retl 448; 449; X86-SSE42-LABEL: test_reduce_v4i64: 450; X86-SSE42: ## %bb.0: 451; X86-SSE42-NEXT: movdqa %xmm0, %xmm3 452; X86-SSE42-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 453; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 454; X86-SSE42-NEXT: pxor %xmm2, %xmm4 455; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 456; X86-SSE42-NEXT: pxor %xmm2, %xmm0 457; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 458; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 459; X86-SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] 460; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 461; X86-SSE42-NEXT: pxor %xmm2, %xmm0 462; X86-SSE42-NEXT: pxor %xmm3, %xmm2 463; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm2 464; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 465; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 466; X86-SSE42-NEXT: movd %xmm3, %eax 467; X86-SSE42-NEXT: pextrd $1, %xmm3, %edx 468; X86-SSE42-NEXT: retl 469; 470; X86-AVX1-LABEL: test_reduce_v4i64: 471; X86-AVX1: ## %bb.0: 472; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [0,2147483648,0,2147483648] 473; X86-AVX1-NEXT: ## xmm1 = mem[0,0] 474; X86-AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm2 475; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 476; X86-AVX1-NEXT: vxorps %xmm1, %xmm3, %xmm4 477; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 478; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 479; X86-AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[2,3,2,3] 480; X86-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3 481; X86-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1 482; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 483; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 484; X86-AVX1-NEXT: vmovd %xmm0, %eax 485; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 486; X86-AVX1-NEXT: vzeroupper 487; X86-AVX1-NEXT: retl 488; 489; X86-AVX2-LABEL: test_reduce_v4i64: 490; X86-AVX2: ## %bb.0: 491; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 492; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 493; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 494; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4 495; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 496; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 497; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 498; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 499; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 500; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 501; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 502; X86-AVX2-NEXT: vmovd %xmm0, %eax 503; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 504; X86-AVX2-NEXT: vzeroupper 505; X86-AVX2-NEXT: retl 506; 507; X64-SSE2-LABEL: test_reduce_v4i64: 508; X64-SSE2: ## %bb.0: 509; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 510; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 511; X64-SSE2-NEXT: pxor %xmm2, %xmm3 512; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 513; X64-SSE2-NEXT: pxor %xmm2, %xmm4 514; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 515; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 516; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 517; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 518; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 519; X64-SSE2-NEXT: pand %xmm6, %xmm3 520; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 521; X64-SSE2-NEXT: por %xmm3, %xmm4 522; X64-SSE2-NEXT: pand %xmm4, %xmm0 523; X64-SSE2-NEXT: pandn %xmm1, %xmm4 524; X64-SSE2-NEXT: por %xmm0, %xmm4 525; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 526; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 527; X64-SSE2-NEXT: pxor %xmm2, %xmm1 528; X64-SSE2-NEXT: pxor %xmm0, %xmm2 529; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 530; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 531; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 532; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 533; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 534; X64-SSE2-NEXT: pand %xmm5, %xmm1 535; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 536; X64-SSE2-NEXT: por %xmm1, %xmm2 537; X64-SSE2-NEXT: pand %xmm2, %xmm4 538; X64-SSE2-NEXT: pandn %xmm0, %xmm2 539; X64-SSE2-NEXT: por %xmm4, %xmm2 540; X64-SSE2-NEXT: movq %xmm2, %rax 541; X64-SSE2-NEXT: retq 542; 543; X64-SSE42-LABEL: test_reduce_v4i64: 544; X64-SSE42: ## %bb.0: 545; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 546; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 547; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 548; X64-SSE42-NEXT: pxor %xmm3, %xmm4 549; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 550; X64-SSE42-NEXT: pxor %xmm3, %xmm0 551; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 552; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 553; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 554; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 555; X64-SSE42-NEXT: pxor %xmm3, %xmm0 556; X64-SSE42-NEXT: pxor %xmm2, %xmm3 557; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm3 558; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 559; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 560; X64-SSE42-NEXT: movq %xmm2, %rax 561; X64-SSE42-NEXT: retq 562; 563; X64-AVX1-LABEL: test_reduce_v4i64: 564; X64-AVX1: ## %bb.0: 565; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 566; X64-AVX1-NEXT: ## xmm1 = mem[0,0] 567; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 568; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 569; X64-AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm4 570; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 571; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 572; X64-AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[2,3,2,3] 573; X64-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3 574; X64-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1 575; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 576; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 577; X64-AVX1-NEXT: vmovq %xmm0, %rax 578; X64-AVX1-NEXT: vzeroupper 579; X64-AVX1-NEXT: retq 580; 581; X64-AVX2-LABEL: test_reduce_v4i64: 582; X64-AVX2: ## %bb.0: 583; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 584; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 585; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 586; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4 587; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 588; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 589; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 590; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 591; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 592; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 593; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 594; X64-AVX2-NEXT: vmovq %xmm0, %rax 595; X64-AVX2-NEXT: vzeroupper 596; X64-AVX2-NEXT: retq 597; 598; X64-AVX512-LABEL: test_reduce_v4i64: 599; X64-AVX512: ## %bb.0: 600; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 601; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 602; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 603; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 604; X64-AVX512-NEXT: vmovq %xmm0, %rax 605; X64-AVX512-NEXT: vzeroupper 606; X64-AVX512-NEXT: retq 607 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 608 %2 = icmp ult <4 x i64> %a0, %1 609 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 610 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 611 %5 = icmp ult <4 x i64> %3, %4 612 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 613 %7 = extractelement <4 x i64> %6, i32 0 614 ret i64 %7 615} 616 617define i32 @test_reduce_v8i32(<8 x i32> %a0) { 618; X86-SSE2-LABEL: test_reduce_v8i32: 619; X86-SSE2: ## %bb.0: 620; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 621; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 622; X86-SSE2-NEXT: pxor %xmm2, %xmm4 623; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 624; X86-SSE2-NEXT: pxor %xmm2, %xmm3 625; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 626; X86-SSE2-NEXT: pand %xmm3, %xmm0 627; X86-SSE2-NEXT: pandn %xmm1, %xmm3 628; X86-SSE2-NEXT: por %xmm0, %xmm3 629; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 630; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 631; X86-SSE2-NEXT: pxor %xmm2, %xmm4 632; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 633; X86-SSE2-NEXT: pxor %xmm2, %xmm0 634; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 635; X86-SSE2-NEXT: pand %xmm0, %xmm3 636; X86-SSE2-NEXT: pandn %xmm1, %xmm0 637; X86-SSE2-NEXT: por %xmm3, %xmm0 638; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 639; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 640; X86-SSE2-NEXT: pxor %xmm2, %xmm3 641; X86-SSE2-NEXT: pxor %xmm1, %xmm2 642; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm2 643; X86-SSE2-NEXT: pand %xmm2, %xmm0 644; X86-SSE2-NEXT: pandn %xmm1, %xmm2 645; X86-SSE2-NEXT: por %xmm0, %xmm2 646; X86-SSE2-NEXT: movd %xmm2, %eax 647; X86-SSE2-NEXT: retl 648; 649; X86-SSE42-LABEL: test_reduce_v8i32: 650; X86-SSE42: ## %bb.0: 651; X86-SSE42-NEXT: pminud %xmm1, %xmm0 652; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 653; X86-SSE42-NEXT: pminud %xmm0, %xmm1 654; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 655; X86-SSE42-NEXT: pminud %xmm1, %xmm0 656; X86-SSE42-NEXT: movd %xmm0, %eax 657; X86-SSE42-NEXT: retl 658; 659; X86-AVX1-LABEL: test_reduce_v8i32: 660; X86-AVX1: ## %bb.0: 661; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 662; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 663; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 664; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 665; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 666; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 667; X86-AVX1-NEXT: vmovd %xmm0, %eax 668; X86-AVX1-NEXT: vzeroupper 669; X86-AVX1-NEXT: retl 670; 671; X86-AVX2-LABEL: test_reduce_v8i32: 672; X86-AVX2: ## %bb.0: 673; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 674; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 675; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 676; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 677; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 678; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 679; X86-AVX2-NEXT: vmovd %xmm0, %eax 680; X86-AVX2-NEXT: vzeroupper 681; X86-AVX2-NEXT: retl 682; 683; X64-SSE2-LABEL: test_reduce_v8i32: 684; X64-SSE2: ## %bb.0: 685; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 686; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 687; X64-SSE2-NEXT: pxor %xmm2, %xmm3 688; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 689; X64-SSE2-NEXT: pxor %xmm2, %xmm4 690; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 691; X64-SSE2-NEXT: pand %xmm4, %xmm0 692; X64-SSE2-NEXT: pandn %xmm1, %xmm4 693; X64-SSE2-NEXT: por %xmm0, %xmm4 694; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 695; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 696; X64-SSE2-NEXT: pxor %xmm2, %xmm1 697; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 698; X64-SSE2-NEXT: pxor %xmm2, %xmm3 699; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 700; X64-SSE2-NEXT: pand %xmm3, %xmm4 701; X64-SSE2-NEXT: pandn %xmm0, %xmm3 702; X64-SSE2-NEXT: por %xmm4, %xmm3 703; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 704; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 705; X64-SSE2-NEXT: pxor %xmm2, %xmm1 706; X64-SSE2-NEXT: pxor %xmm0, %xmm2 707; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 708; X64-SSE2-NEXT: pand %xmm2, %xmm3 709; X64-SSE2-NEXT: pandn %xmm0, %xmm2 710; X64-SSE2-NEXT: por %xmm3, %xmm2 711; X64-SSE2-NEXT: movd %xmm2, %eax 712; X64-SSE2-NEXT: retq 713; 714; X64-SSE42-LABEL: test_reduce_v8i32: 715; X64-SSE42: ## %bb.0: 716; X64-SSE42-NEXT: pminud %xmm1, %xmm0 717; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 718; X64-SSE42-NEXT: pminud %xmm0, %xmm1 719; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 720; X64-SSE42-NEXT: pminud %xmm1, %xmm0 721; X64-SSE42-NEXT: movd %xmm0, %eax 722; X64-SSE42-NEXT: retq 723; 724; X64-AVX1-LABEL: test_reduce_v8i32: 725; X64-AVX1: ## %bb.0: 726; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 727; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 728; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 729; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 730; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 731; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 732; X64-AVX1-NEXT: vmovd %xmm0, %eax 733; X64-AVX1-NEXT: vzeroupper 734; X64-AVX1-NEXT: retq 735; 736; X64-AVX2-LABEL: test_reduce_v8i32: 737; X64-AVX2: ## %bb.0: 738; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 739; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 740; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 741; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 742; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 743; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 744; X64-AVX2-NEXT: vmovd %xmm0, %eax 745; X64-AVX2-NEXT: vzeroupper 746; X64-AVX2-NEXT: retq 747; 748; X64-AVX512-LABEL: test_reduce_v8i32: 749; X64-AVX512: ## %bb.0: 750; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 751; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 752; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 753; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 754; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 755; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 756; X64-AVX512-NEXT: vmovd %xmm0, %eax 757; X64-AVX512-NEXT: vzeroupper 758; X64-AVX512-NEXT: retq 759 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 760 %2 = icmp ult <8 x i32> %a0, %1 761 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 762 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 763 %5 = icmp ult <8 x i32> %3, %4 764 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 765 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 766 %8 = icmp ult <8 x i32> %6, %7 767 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 768 %10 = extractelement <8 x i32> %9, i32 0 769 ret i32 %10 770} 771 772define i16 @test_reduce_v16i16(<16 x i16> %a0) { 773; X86-SSE2-LABEL: test_reduce_v16i16: 774; X86-SSE2: ## %bb.0: 775; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 776; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 777; X86-SSE2-NEXT: psubw %xmm2, %xmm0 778; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 779; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 780; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 781; X86-SSE2-NEXT: psubw %xmm2, %xmm0 782; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 783; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 784; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 785; X86-SSE2-NEXT: psubw %xmm2, %xmm0 786; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 787; X86-SSE2-NEXT: psrld $16, %xmm1 788; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 789; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 790; X86-SSE2-NEXT: psubw %xmm2, %xmm0 791; X86-SSE2-NEXT: movd %xmm0, %eax 792; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 793; X86-SSE2-NEXT: retl 794; 795; X86-SSE42-LABEL: test_reduce_v16i16: 796; X86-SSE42: ## %bb.0: 797; X86-SSE42-NEXT: pminuw %xmm1, %xmm0 798; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 799; X86-SSE42-NEXT: movd %xmm0, %eax 800; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 801; X86-SSE42-NEXT: retl 802; 803; X86-AVX1-LABEL: test_reduce_v16i16: 804; X86-AVX1: ## %bb.0: 805; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 806; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 807; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 808; X86-AVX1-NEXT: vmovd %xmm0, %eax 809; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 810; X86-AVX1-NEXT: vzeroupper 811; X86-AVX1-NEXT: retl 812; 813; X86-AVX2-LABEL: test_reduce_v16i16: 814; X86-AVX2: ## %bb.0: 815; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 816; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 817; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 818; X86-AVX2-NEXT: vmovd %xmm0, %eax 819; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 820; X86-AVX2-NEXT: vzeroupper 821; X86-AVX2-NEXT: retl 822; 823; X64-SSE2-LABEL: test_reduce_v16i16: 824; X64-SSE2: ## %bb.0: 825; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 826; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 827; X64-SSE2-NEXT: psubw %xmm2, %xmm0 828; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 829; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 830; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 831; X64-SSE2-NEXT: psubw %xmm2, %xmm0 832; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 833; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 834; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 835; X64-SSE2-NEXT: psubw %xmm2, %xmm0 836; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 837; X64-SSE2-NEXT: psrld $16, %xmm1 838; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 839; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 840; X64-SSE2-NEXT: psubw %xmm2, %xmm0 841; X64-SSE2-NEXT: movd %xmm0, %eax 842; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 843; X64-SSE2-NEXT: retq 844; 845; X64-SSE42-LABEL: test_reduce_v16i16: 846; X64-SSE42: ## %bb.0: 847; X64-SSE42-NEXT: pminuw %xmm1, %xmm0 848; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 849; X64-SSE42-NEXT: movd %xmm0, %eax 850; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 851; X64-SSE42-NEXT: retq 852; 853; X64-AVX1-LABEL: test_reduce_v16i16: 854; X64-AVX1: ## %bb.0: 855; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 856; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 857; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 858; X64-AVX1-NEXT: vmovd %xmm0, %eax 859; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 860; X64-AVX1-NEXT: vzeroupper 861; X64-AVX1-NEXT: retq 862; 863; X64-AVX2-LABEL: test_reduce_v16i16: 864; X64-AVX2: ## %bb.0: 865; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 866; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 867; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 868; X64-AVX2-NEXT: vmovd %xmm0, %eax 869; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 870; X64-AVX2-NEXT: vzeroupper 871; X64-AVX2-NEXT: retq 872; 873; X64-AVX512-LABEL: test_reduce_v16i16: 874; X64-AVX512: ## %bb.0: 875; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 876; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 877; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 878; X64-AVX512-NEXT: vmovd %xmm0, %eax 879; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 880; X64-AVX512-NEXT: vzeroupper 881; X64-AVX512-NEXT: retq 882 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 883 %2 = icmp ult <16 x i16> %a0, %1 884 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 885 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 886 %5 = icmp ult <16 x i16> %3, %4 887 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 888 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 889 %8 = icmp ult <16 x i16> %6, %7 890 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 891 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 892 %11 = icmp ult <16 x i16> %9, %10 893 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 894 %13 = extractelement <16 x i16> %12, i32 0 895 ret i16 %13 896} 897 898define i8 @test_reduce_v32i8(<32 x i8> %a0) { 899; X86-SSE2-LABEL: test_reduce_v32i8: 900; X86-SSE2: ## %bb.0: 901; X86-SSE2-NEXT: pminub %xmm1, %xmm0 902; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 903; X86-SSE2-NEXT: pminub %xmm0, %xmm1 904; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 905; X86-SSE2-NEXT: pminub %xmm1, %xmm0 906; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 907; X86-SSE2-NEXT: psrld $16, %xmm1 908; X86-SSE2-NEXT: pminub %xmm0, %xmm1 909; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 910; X86-SSE2-NEXT: psrlw $8, %xmm0 911; X86-SSE2-NEXT: pminub %xmm1, %xmm0 912; X86-SSE2-NEXT: movd %xmm0, %eax 913; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 914; X86-SSE2-NEXT: retl 915; 916; X86-SSE42-LABEL: test_reduce_v32i8: 917; X86-SSE42: ## %bb.0: 918; X86-SSE42-NEXT: pminub %xmm1, %xmm0 919; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 920; X86-SSE42-NEXT: psrlw $8, %xmm1 921; X86-SSE42-NEXT: pminub %xmm0, %xmm1 922; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 923; X86-SSE42-NEXT: movd %xmm0, %eax 924; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 925; X86-SSE42-NEXT: retl 926; 927; X86-AVX1-LABEL: test_reduce_v32i8: 928; X86-AVX1: ## %bb.0: 929; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 930; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 931; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 932; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 933; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 934; X86-AVX1-NEXT: vmovd %xmm0, %eax 935; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 936; X86-AVX1-NEXT: vzeroupper 937; X86-AVX1-NEXT: retl 938; 939; X86-AVX2-LABEL: test_reduce_v32i8: 940; X86-AVX2: ## %bb.0: 941; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 942; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 943; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 944; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 945; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 946; X86-AVX2-NEXT: vmovd %xmm0, %eax 947; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 948; X86-AVX2-NEXT: vzeroupper 949; X86-AVX2-NEXT: retl 950; 951; X64-SSE2-LABEL: test_reduce_v32i8: 952; X64-SSE2: ## %bb.0: 953; X64-SSE2-NEXT: pminub %xmm1, %xmm0 954; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 955; X64-SSE2-NEXT: pminub %xmm0, %xmm1 956; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 957; X64-SSE2-NEXT: pminub %xmm1, %xmm0 958; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 959; X64-SSE2-NEXT: psrld $16, %xmm1 960; X64-SSE2-NEXT: pminub %xmm0, %xmm1 961; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 962; X64-SSE2-NEXT: psrlw $8, %xmm0 963; X64-SSE2-NEXT: pminub %xmm1, %xmm0 964; X64-SSE2-NEXT: movd %xmm0, %eax 965; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 966; X64-SSE2-NEXT: retq 967; 968; X64-SSE42-LABEL: test_reduce_v32i8: 969; X64-SSE42: ## %bb.0: 970; X64-SSE42-NEXT: pminub %xmm1, %xmm0 971; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 972; X64-SSE42-NEXT: psrlw $8, %xmm1 973; X64-SSE42-NEXT: pminub %xmm0, %xmm1 974; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 975; X64-SSE42-NEXT: movd %xmm0, %eax 976; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 977; X64-SSE42-NEXT: retq 978; 979; X64-AVX1-LABEL: test_reduce_v32i8: 980; X64-AVX1: ## %bb.0: 981; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 982; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 983; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 984; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 985; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 986; X64-AVX1-NEXT: vmovd %xmm0, %eax 987; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 988; X64-AVX1-NEXT: vzeroupper 989; X64-AVX1-NEXT: retq 990; 991; X64-AVX2-LABEL: test_reduce_v32i8: 992; X64-AVX2: ## %bb.0: 993; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 994; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 995; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 996; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 997; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 998; X64-AVX2-NEXT: vmovd %xmm0, %eax 999; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1000; X64-AVX2-NEXT: vzeroupper 1001; X64-AVX2-NEXT: retq 1002; 1003; X64-AVX512-LABEL: test_reduce_v32i8: 1004; X64-AVX512: ## %bb.0: 1005; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1006; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1007; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1008; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1009; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1010; X64-AVX512-NEXT: vmovd %xmm0, %eax 1011; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1012; X64-AVX512-NEXT: vzeroupper 1013; X64-AVX512-NEXT: retq 1014 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1015 %2 = icmp ult <32 x i8> %a0, %1 1016 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1017 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1018 %5 = icmp ult <32 x i8> %3, %4 1019 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1020 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1021 %8 = icmp ult <32 x i8> %6, %7 1022 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1023 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1024 %11 = icmp ult <32 x i8> %9, %10 1025 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1026 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1027 %14 = icmp ult <32 x i8> %12, %13 1028 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1029 %16 = extractelement <32 x i8> %15, i32 0 1030 ret i8 %16 1031} 1032 1033; 1034; 512-bit Vectors 1035; 1036 1037define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1038; X86-SSE2-LABEL: test_reduce_v8i64: 1039; X86-SSE2: ## %bb.0: 1040; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1041; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1042; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1043; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1044; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1045; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1046; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1047; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1048; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1049; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1050; X86-SSE2-NEXT: pand %xmm5, %xmm6 1051; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1052; X86-SSE2-NEXT: por %xmm6, %xmm5 1053; X86-SSE2-NEXT: pand %xmm5, %xmm1 1054; X86-SSE2-NEXT: pandn %xmm3, %xmm5 1055; X86-SSE2-NEXT: por %xmm1, %xmm5 1056; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1057; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1058; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1059; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1060; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1061; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1062; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1063; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 1064; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1065; X86-SSE2-NEXT: pand %xmm1, %xmm3 1066; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 1067; X86-SSE2-NEXT: por %xmm3, %xmm1 1068; X86-SSE2-NEXT: pand %xmm1, %xmm0 1069; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1070; X86-SSE2-NEXT: por %xmm0, %xmm1 1071; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1072; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1073; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1074; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1075; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1076; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1077; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1078; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1079; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1080; X86-SSE2-NEXT: pand %xmm0, %xmm2 1081; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1082; X86-SSE2-NEXT: por %xmm2, %xmm0 1083; X86-SSE2-NEXT: pand %xmm0, %xmm1 1084; X86-SSE2-NEXT: pandn %xmm5, %xmm0 1085; X86-SSE2-NEXT: por %xmm1, %xmm0 1086; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1087; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1088; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1089; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1090; X86-SSE2-NEXT: movdqa %xmm4, %xmm3 1091; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1092; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1093; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1094; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1095; X86-SSE2-NEXT: pand %xmm2, %xmm4 1096; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1097; X86-SSE2-NEXT: por %xmm4, %xmm2 1098; X86-SSE2-NEXT: pand %xmm2, %xmm0 1099; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1100; X86-SSE2-NEXT: por %xmm0, %xmm2 1101; X86-SSE2-NEXT: movd %xmm2, %eax 1102; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1103; X86-SSE2-NEXT: movd %xmm0, %edx 1104; X86-SSE2-NEXT: retl 1105; 1106; X86-SSE42-LABEL: test_reduce_v8i64: 1107; X86-SSE42: ## %bb.0: 1108; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1109; X86-SSE42-NEXT: movdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648] 1110; X86-SSE42-NEXT: movdqa %xmm1, %xmm6 1111; X86-SSE42-NEXT: pxor %xmm5, %xmm6 1112; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1113; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1114; X86-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1115; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1116; X86-SSE42-NEXT: movdqa %xmm4, %xmm1 1117; X86-SSE42-NEXT: pxor %xmm5, %xmm1 1118; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 1119; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1120; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1121; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1122; X86-SSE42-NEXT: movapd %xmm2, %xmm1 1123; X86-SSE42-NEXT: xorpd %xmm5, %xmm1 1124; X86-SSE42-NEXT: movapd %xmm3, %xmm0 1125; X86-SSE42-NEXT: xorpd %xmm5, %xmm0 1126; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1127; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1128; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1129; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1130; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1131; X86-SSE42-NEXT: pxor %xmm1, %xmm5 1132; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm5 1133; X86-SSE42-NEXT: movdqa %xmm5, %xmm0 1134; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1135; X86-SSE42-NEXT: movd %xmm1, %eax 1136; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1137; X86-SSE42-NEXT: retl 1138; 1139; X86-AVX1-LABEL: test_reduce_v8i64: 1140; X86-AVX1: ## %bb.0: 1141; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1142; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 1143; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 1144; X86-AVX1-NEXT: vxorps %xmm2, %xmm3, %xmm4 1145; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1146; X86-AVX1-NEXT: vxorps %xmm2, %xmm5, %xmm6 1147; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 1148; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm5, %xmm3 1149; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4 1150; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm5 1151; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 1152; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm0 1153; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1 1154; X86-AVX1-NEXT: vxorpd %xmm2, %xmm3, %xmm4 1155; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 1156; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 1157; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1158; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1159; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1160; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1161; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1162; X86-AVX1-NEXT: vmovd %xmm0, %eax 1163; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1164; X86-AVX1-NEXT: vzeroupper 1165; X86-AVX1-NEXT: retl 1166; 1167; X86-AVX2-LABEL: test_reduce_v8i64: 1168; X86-AVX2: ## %bb.0: 1169; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] 1170; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 1171; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 1172; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1173; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1174; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1175; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1176; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4 1177; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1178; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1179; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1180; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1181; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1182; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1183; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1184; X86-AVX2-NEXT: vmovd %xmm0, %eax 1185; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1186; X86-AVX2-NEXT: vzeroupper 1187; X86-AVX2-NEXT: retl 1188; 1189; X64-SSE2-LABEL: test_reduce_v8i64: 1190; X64-SSE2: ## %bb.0: 1191; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] 1192; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1193; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1194; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1195; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1196; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1197; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1198; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1199; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1200; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1201; X64-SSE2-NEXT: pand %xmm8, %xmm6 1202; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1203; X64-SSE2-NEXT: por %xmm6, %xmm5 1204; X64-SSE2-NEXT: pand %xmm5, %xmm1 1205; X64-SSE2-NEXT: pandn %xmm3, %xmm5 1206; X64-SSE2-NEXT: por %xmm1, %xmm5 1207; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1208; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1209; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1210; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1211; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1212; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1213; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1214; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1215; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1216; X64-SSE2-NEXT: pand %xmm7, %xmm1 1217; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1218; X64-SSE2-NEXT: por %xmm1, %xmm3 1219; X64-SSE2-NEXT: pand %xmm3, %xmm0 1220; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1221; X64-SSE2-NEXT: por %xmm0, %xmm3 1222; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1223; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1224; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1225; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1226; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1227; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1228; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] 1229; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1230; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1231; X64-SSE2-NEXT: pand %xmm6, %xmm0 1232; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1233; X64-SSE2-NEXT: por %xmm0, %xmm1 1234; X64-SSE2-NEXT: pand %xmm1, %xmm3 1235; X64-SSE2-NEXT: pandn %xmm5, %xmm1 1236; X64-SSE2-NEXT: por %xmm3, %xmm1 1237; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1238; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1239; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1240; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1241; X64-SSE2-NEXT: movdqa %xmm4, %xmm3 1242; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1243; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1244; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1245; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1246; X64-SSE2-NEXT: pand %xmm5, %xmm2 1247; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1248; X64-SSE2-NEXT: por %xmm2, %xmm3 1249; X64-SSE2-NEXT: pand %xmm3, %xmm1 1250; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1251; X64-SSE2-NEXT: por %xmm1, %xmm3 1252; X64-SSE2-NEXT: movq %xmm3, %rax 1253; X64-SSE2-NEXT: retq 1254; 1255; X64-SSE42-LABEL: test_reduce_v8i64: 1256; X64-SSE42: ## %bb.0: 1257; X64-SSE42-NEXT: movdqa %xmm0, %xmm5 1258; X64-SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 1259; X64-SSE42-NEXT: movdqa %xmm1, %xmm6 1260; X64-SSE42-NEXT: pxor %xmm4, %xmm6 1261; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1262; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1263; X64-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1264; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1265; X64-SSE42-NEXT: movdqa %xmm5, %xmm1 1266; X64-SSE42-NEXT: pxor %xmm4, %xmm1 1267; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 1268; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1269; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1270; X64-SSE42-NEXT: blendvpd %xmm0, %xmm5, %xmm2 1271; X64-SSE42-NEXT: movapd %xmm2, %xmm1 1272; X64-SSE42-NEXT: xorpd %xmm4, %xmm1 1273; X64-SSE42-NEXT: movapd %xmm3, %xmm0 1274; X64-SSE42-NEXT: xorpd %xmm4, %xmm0 1275; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1276; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1277; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1278; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1279; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1280; X64-SSE42-NEXT: pxor %xmm1, %xmm4 1281; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm4 1282; X64-SSE42-NEXT: movdqa %xmm4, %xmm0 1283; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1284; X64-SSE42-NEXT: movq %xmm1, %rax 1285; X64-SSE42-NEXT: retq 1286; 1287; X64-AVX1-LABEL: test_reduce_v8i64: 1288; X64-AVX1: ## %bb.0: 1289; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1290; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 1291; X64-AVX1-NEXT: ## xmm3 = mem[0,0] 1292; X64-AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4 1293; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1294; X64-AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 1295; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 1296; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm5, %xmm2 1297; X64-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4 1298; X64-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5 1299; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 1300; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm0 1301; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm1 1302; X64-AVX1-NEXT: vxorpd %xmm3, %xmm2, %xmm4 1303; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 1304; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 1305; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1306; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 1307; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 1308; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1309; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1310; X64-AVX1-NEXT: vmovq %xmm0, %rax 1311; X64-AVX1-NEXT: vzeroupper 1312; X64-AVX1-NEXT: retq 1313; 1314; X64-AVX2-LABEL: test_reduce_v8i64: 1315; X64-AVX2: ## %bb.0: 1316; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1317; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 1318; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 1319; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1320; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1321; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1322; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1323; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4 1324; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1325; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1326; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1327; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1328; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1329; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1330; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1331; X64-AVX2-NEXT: vmovq %xmm0, %rax 1332; X64-AVX2-NEXT: vzeroupper 1333; X64-AVX2-NEXT: retq 1334; 1335; X64-AVX512-LABEL: test_reduce_v8i64: 1336; X64-AVX512: ## %bb.0: 1337; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1338; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 1339; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1340; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 1341; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1342; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 1343; X64-AVX512-NEXT: vmovq %xmm0, %rax 1344; X64-AVX512-NEXT: vzeroupper 1345; X64-AVX512-NEXT: retq 1346 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1347 %2 = icmp ult <8 x i64> %a0, %1 1348 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1349 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1350 %5 = icmp ult <8 x i64> %3, %4 1351 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1352 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1353 %8 = icmp ult <8 x i64> %6, %7 1354 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1355 %10 = extractelement <8 x i64> %9, i32 0 1356 ret i64 %10 1357} 1358 1359define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1360; X86-SSE2-LABEL: test_reduce_v16i32: 1361; X86-SSE2: ## %bb.0: 1362; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1363; X86-SSE2-NEXT: movdqa %xmm1, %xmm6 1364; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1365; X86-SSE2-NEXT: movdqa %xmm3, %xmm5 1366; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1367; X86-SSE2-NEXT: pcmpgtd %xmm6, %xmm5 1368; X86-SSE2-NEXT: pand %xmm5, %xmm1 1369; X86-SSE2-NEXT: pandn %xmm3, %xmm5 1370; X86-SSE2-NEXT: por %xmm1, %xmm5 1371; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1372; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1373; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1374; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1375; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm1 1376; X86-SSE2-NEXT: pand %xmm1, %xmm0 1377; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1378; X86-SSE2-NEXT: por %xmm0, %xmm1 1379; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1380; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1381; X86-SSE2-NEXT: movdqa %xmm5, %xmm0 1382; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1383; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1384; X86-SSE2-NEXT: pand %xmm0, %xmm1 1385; X86-SSE2-NEXT: pandn %xmm5, %xmm0 1386; X86-SSE2-NEXT: por %xmm1, %xmm0 1387; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1388; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1389; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1390; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1391; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1392; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm1 1393; X86-SSE2-NEXT: pand %xmm1, %xmm0 1394; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1395; X86-SSE2-NEXT: por %xmm0, %xmm1 1396; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1397; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1398; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1399; X86-SSE2-NEXT: pxor %xmm0, %xmm4 1400; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 1401; X86-SSE2-NEXT: pand %xmm4, %xmm1 1402; X86-SSE2-NEXT: pandn %xmm0, %xmm4 1403; X86-SSE2-NEXT: por %xmm1, %xmm4 1404; X86-SSE2-NEXT: movd %xmm4, %eax 1405; X86-SSE2-NEXT: retl 1406; 1407; X86-SSE42-LABEL: test_reduce_v16i32: 1408; X86-SSE42: ## %bb.0: 1409; X86-SSE42-NEXT: pminud %xmm3, %xmm1 1410; X86-SSE42-NEXT: pminud %xmm2, %xmm0 1411; X86-SSE42-NEXT: pminud %xmm1, %xmm0 1412; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1413; X86-SSE42-NEXT: pminud %xmm0, %xmm1 1414; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1415; X86-SSE42-NEXT: pminud %xmm1, %xmm0 1416; X86-SSE42-NEXT: movd %xmm0, %eax 1417; X86-SSE42-NEXT: retl 1418; 1419; X86-AVX1-LABEL: test_reduce_v16i32: 1420; X86-AVX1: ## %bb.0: 1421; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1422; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1423; X86-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1424; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1425; X86-AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0 1426; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1427; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1428; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1429; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1430; X86-AVX1-NEXT: vmovd %xmm0, %eax 1431; X86-AVX1-NEXT: vzeroupper 1432; X86-AVX1-NEXT: retl 1433; 1434; X86-AVX2-LABEL: test_reduce_v16i32: 1435; X86-AVX2: ## %bb.0: 1436; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1437; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1438; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1439; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1440; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1441; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1442; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1443; X86-AVX2-NEXT: vmovd %xmm0, %eax 1444; X86-AVX2-NEXT: vzeroupper 1445; X86-AVX2-NEXT: retl 1446; 1447; X64-SSE2-LABEL: test_reduce_v16i32: 1448; X64-SSE2: ## %bb.0: 1449; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1450; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1451; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1452; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1453; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1454; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1455; X64-SSE2-NEXT: pand %xmm6, %xmm1 1456; X64-SSE2-NEXT: pandn %xmm3, %xmm6 1457; X64-SSE2-NEXT: por %xmm1, %xmm6 1458; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1459; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1460; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1461; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1462; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1463; X64-SSE2-NEXT: pand %xmm3, %xmm0 1464; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1465; X64-SSE2-NEXT: por %xmm0, %xmm3 1466; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1467; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1468; X64-SSE2-NEXT: movdqa %xmm6, %xmm1 1469; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1470; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1471; X64-SSE2-NEXT: pand %xmm1, %xmm3 1472; X64-SSE2-NEXT: pandn %xmm6, %xmm1 1473; X64-SSE2-NEXT: por %xmm3, %xmm1 1474; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1475; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1476; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1477; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 1478; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1479; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1480; X64-SSE2-NEXT: pand %xmm3, %xmm1 1481; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1482; X64-SSE2-NEXT: por %xmm1, %xmm3 1483; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 1484; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 1485; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1486; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1487; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm4 1488; X64-SSE2-NEXT: pand %xmm4, %xmm3 1489; X64-SSE2-NEXT: pandn %xmm0, %xmm4 1490; X64-SSE2-NEXT: por %xmm3, %xmm4 1491; X64-SSE2-NEXT: movd %xmm4, %eax 1492; X64-SSE2-NEXT: retq 1493; 1494; X64-SSE42-LABEL: test_reduce_v16i32: 1495; X64-SSE42: ## %bb.0: 1496; X64-SSE42-NEXT: pminud %xmm3, %xmm1 1497; X64-SSE42-NEXT: pminud %xmm2, %xmm0 1498; X64-SSE42-NEXT: pminud %xmm1, %xmm0 1499; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1500; X64-SSE42-NEXT: pminud %xmm0, %xmm1 1501; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1502; X64-SSE42-NEXT: pminud %xmm1, %xmm0 1503; X64-SSE42-NEXT: movd %xmm0, %eax 1504; X64-SSE42-NEXT: retq 1505; 1506; X64-AVX1-LABEL: test_reduce_v16i32: 1507; X64-AVX1: ## %bb.0: 1508; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1509; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1510; X64-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1511; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1512; X64-AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0 1513; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1514; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1515; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1516; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1517; X64-AVX1-NEXT: vmovd %xmm0, %eax 1518; X64-AVX1-NEXT: vzeroupper 1519; X64-AVX1-NEXT: retq 1520; 1521; X64-AVX2-LABEL: test_reduce_v16i32: 1522; X64-AVX2: ## %bb.0: 1523; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1524; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1525; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1526; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1527; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1528; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1529; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1530; X64-AVX2-NEXT: vmovd %xmm0, %eax 1531; X64-AVX2-NEXT: vzeroupper 1532; X64-AVX2-NEXT: retq 1533; 1534; X64-AVX512-LABEL: test_reduce_v16i32: 1535; X64-AVX512: ## %bb.0: 1536; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1537; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0 1538; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1539; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 1540; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1541; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 1542; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1543; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 1544; X64-AVX512-NEXT: vmovd %xmm0, %eax 1545; X64-AVX512-NEXT: vzeroupper 1546; X64-AVX512-NEXT: retq 1547 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1548 %2 = icmp ult <16 x i32> %a0, %1 1549 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1550 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1551 %5 = icmp ult <16 x i32> %3, %4 1552 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1553 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1554 %8 = icmp ult <16 x i32> %6, %7 1555 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1556 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1557 %11 = icmp ult <16 x i32> %9, %10 1558 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1559 %13 = extractelement <16 x i32> %12, i32 0 1560 ret i32 %13 1561} 1562 1563define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1564; X86-SSE2-LABEL: test_reduce_v32i16: 1565; X86-SSE2: ## %bb.0: 1566; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1567; X86-SSE2-NEXT: psubusw %xmm3, %xmm4 1568; X86-SSE2-NEXT: psubw %xmm4, %xmm1 1569; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1570; X86-SSE2-NEXT: psubusw %xmm2, %xmm3 1571; X86-SSE2-NEXT: psubw %xmm3, %xmm0 1572; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1573; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1574; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1575; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1576; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1577; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1578; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1579; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1580; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1581; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1582; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1583; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1584; X86-SSE2-NEXT: psrld $16, %xmm1 1585; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1586; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1587; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1588; X86-SSE2-NEXT: movd %xmm0, %eax 1589; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1590; X86-SSE2-NEXT: retl 1591; 1592; X86-SSE42-LABEL: test_reduce_v32i16: 1593; X86-SSE42: ## %bb.0: 1594; X86-SSE42-NEXT: pminuw %xmm3, %xmm1 1595; X86-SSE42-NEXT: pminuw %xmm2, %xmm0 1596; X86-SSE42-NEXT: pminuw %xmm1, %xmm0 1597; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1598; X86-SSE42-NEXT: movd %xmm0, %eax 1599; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1600; X86-SSE42-NEXT: retl 1601; 1602; X86-AVX1-LABEL: test_reduce_v32i16: 1603; X86-AVX1: ## %bb.0: 1604; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1605; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1606; X86-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1607; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1608; X86-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0 1609; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1610; X86-AVX1-NEXT: vmovd %xmm0, %eax 1611; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1612; X86-AVX1-NEXT: vzeroupper 1613; X86-AVX1-NEXT: retl 1614; 1615; X86-AVX2-LABEL: test_reduce_v32i16: 1616; X86-AVX2: ## %bb.0: 1617; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1618; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1619; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1620; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1621; X86-AVX2-NEXT: vmovd %xmm0, %eax 1622; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1623; X86-AVX2-NEXT: vzeroupper 1624; X86-AVX2-NEXT: retl 1625; 1626; X64-SSE2-LABEL: test_reduce_v32i16: 1627; X64-SSE2: ## %bb.0: 1628; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1629; X64-SSE2-NEXT: psubusw %xmm3, %xmm4 1630; X64-SSE2-NEXT: psubw %xmm4, %xmm1 1631; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 1632; X64-SSE2-NEXT: psubusw %xmm2, %xmm3 1633; X64-SSE2-NEXT: psubw %xmm3, %xmm0 1634; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1635; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1636; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1637; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1638; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1639; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1640; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1641; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1642; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1643; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1644; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1645; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1646; X64-SSE2-NEXT: psrld $16, %xmm1 1647; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1648; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1649; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1650; X64-SSE2-NEXT: movd %xmm0, %eax 1651; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1652; X64-SSE2-NEXT: retq 1653; 1654; X64-SSE42-LABEL: test_reduce_v32i16: 1655; X64-SSE42: ## %bb.0: 1656; X64-SSE42-NEXT: pminuw %xmm3, %xmm1 1657; X64-SSE42-NEXT: pminuw %xmm2, %xmm0 1658; X64-SSE42-NEXT: pminuw %xmm1, %xmm0 1659; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1660; X64-SSE42-NEXT: movd %xmm0, %eax 1661; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1662; X64-SSE42-NEXT: retq 1663; 1664; X64-AVX1-LABEL: test_reduce_v32i16: 1665; X64-AVX1: ## %bb.0: 1666; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1667; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1668; X64-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1669; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1670; X64-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0 1671; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1672; X64-AVX1-NEXT: vmovd %xmm0, %eax 1673; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1674; X64-AVX1-NEXT: vzeroupper 1675; X64-AVX1-NEXT: retq 1676; 1677; X64-AVX2-LABEL: test_reduce_v32i16: 1678; X64-AVX2: ## %bb.0: 1679; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1680; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1681; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1682; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1683; X64-AVX2-NEXT: vmovd %xmm0, %eax 1684; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1685; X64-AVX2-NEXT: vzeroupper 1686; X64-AVX2-NEXT: retq 1687; 1688; X64-AVX512-LABEL: test_reduce_v32i16: 1689; X64-AVX512: ## %bb.0: 1690; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1691; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1692; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1693; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1694; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1695; X64-AVX512-NEXT: vmovd %xmm0, %eax 1696; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1697; X64-AVX512-NEXT: vzeroupper 1698; X64-AVX512-NEXT: retq 1699 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1700 %2 = icmp ult <32 x i16> %a0, %1 1701 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1702 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1703 %5 = icmp ult <32 x i16> %3, %4 1704 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1705 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1706 %8 = icmp ult <32 x i16> %6, %7 1707 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1708 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1709 %11 = icmp ult <32 x i16> %9, %10 1710 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1711 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1712 %14 = icmp ult <32 x i16> %12, %13 1713 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1714 %16 = extractelement <32 x i16> %15, i32 0 1715 ret i16 %16 1716} 1717 1718define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1719; X86-SSE2-LABEL: test_reduce_v64i8: 1720; X86-SSE2: ## %bb.0: 1721; X86-SSE2-NEXT: pminub %xmm3, %xmm1 1722; X86-SSE2-NEXT: pminub %xmm2, %xmm0 1723; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1724; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1725; X86-SSE2-NEXT: pminub %xmm0, %xmm1 1726; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1727; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1728; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1729; X86-SSE2-NEXT: psrld $16, %xmm1 1730; X86-SSE2-NEXT: pminub %xmm0, %xmm1 1731; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1732; X86-SSE2-NEXT: psrlw $8, %xmm0 1733; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1734; X86-SSE2-NEXT: movd %xmm0, %eax 1735; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1736; X86-SSE2-NEXT: retl 1737; 1738; X86-SSE42-LABEL: test_reduce_v64i8: 1739; X86-SSE42: ## %bb.0: 1740; X86-SSE42-NEXT: pminub %xmm3, %xmm1 1741; X86-SSE42-NEXT: pminub %xmm2, %xmm0 1742; X86-SSE42-NEXT: pminub %xmm1, %xmm0 1743; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 1744; X86-SSE42-NEXT: psrlw $8, %xmm1 1745; X86-SSE42-NEXT: pminub %xmm0, %xmm1 1746; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1747; X86-SSE42-NEXT: movd %xmm0, %eax 1748; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1749; X86-SSE42-NEXT: retl 1750; 1751; X86-AVX1-LABEL: test_reduce_v64i8: 1752; X86-AVX1: ## %bb.0: 1753; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1754; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1755; X86-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1756; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1757; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1758; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1759; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1760; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1761; X86-AVX1-NEXT: vmovd %xmm0, %eax 1762; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1763; X86-AVX1-NEXT: vzeroupper 1764; X86-AVX1-NEXT: retl 1765; 1766; X86-AVX2-LABEL: test_reduce_v64i8: 1767; X86-AVX2: ## %bb.0: 1768; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1769; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1770; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1771; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1772; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1773; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1774; X86-AVX2-NEXT: vmovd %xmm0, %eax 1775; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1776; X86-AVX2-NEXT: vzeroupper 1777; X86-AVX2-NEXT: retl 1778; 1779; X64-SSE2-LABEL: test_reduce_v64i8: 1780; X64-SSE2: ## %bb.0: 1781; X64-SSE2-NEXT: pminub %xmm3, %xmm1 1782; X64-SSE2-NEXT: pminub %xmm2, %xmm0 1783; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1784; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1785; X64-SSE2-NEXT: pminub %xmm0, %xmm1 1786; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1787; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1788; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1789; X64-SSE2-NEXT: psrld $16, %xmm1 1790; X64-SSE2-NEXT: pminub %xmm0, %xmm1 1791; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1792; X64-SSE2-NEXT: psrlw $8, %xmm0 1793; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1794; X64-SSE2-NEXT: movd %xmm0, %eax 1795; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1796; X64-SSE2-NEXT: retq 1797; 1798; X64-SSE42-LABEL: test_reduce_v64i8: 1799; X64-SSE42: ## %bb.0: 1800; X64-SSE42-NEXT: pminub %xmm3, %xmm1 1801; X64-SSE42-NEXT: pminub %xmm2, %xmm0 1802; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1803; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 1804; X64-SSE42-NEXT: psrlw $8, %xmm1 1805; X64-SSE42-NEXT: pminub %xmm0, %xmm1 1806; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1807; X64-SSE42-NEXT: movd %xmm0, %eax 1808; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1809; X64-SSE42-NEXT: retq 1810; 1811; X64-AVX1-LABEL: test_reduce_v64i8: 1812; X64-AVX1: ## %bb.0: 1813; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1814; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1815; X64-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1816; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1817; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1818; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1819; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1820; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1821; X64-AVX1-NEXT: vmovd %xmm0, %eax 1822; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1823; X64-AVX1-NEXT: vzeroupper 1824; X64-AVX1-NEXT: retq 1825; 1826; X64-AVX2-LABEL: test_reduce_v64i8: 1827; X64-AVX2: ## %bb.0: 1828; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1829; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1830; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1831; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1832; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1833; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1834; X64-AVX2-NEXT: vmovd %xmm0, %eax 1835; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1836; X64-AVX2-NEXT: vzeroupper 1837; X64-AVX2-NEXT: retq 1838; 1839; X64-AVX512-LABEL: test_reduce_v64i8: 1840; X64-AVX512: ## %bb.0: 1841; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1842; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0 1843; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1844; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1845; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1846; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1847; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1848; X64-AVX512-NEXT: vmovd %xmm0, %eax 1849; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1850; X64-AVX512-NEXT: vzeroupper 1851; X64-AVX512-NEXT: retq 1852 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1853 %2 = icmp ult <64 x i8> %a0, %1 1854 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1855 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1856 %5 = icmp ult <64 x i8> %3, %4 1857 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1858 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1859 %8 = icmp ult <64 x i8> %6, %7 1860 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1861 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1862 %11 = icmp ult <64 x i8> %9, %10 1863 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1864 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1865 %14 = icmp ult <64 x i8> %12, %13 1866 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1867 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1868 %17 = icmp ult <64 x i8> %15, %16 1869 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1870 %19 = extractelement <64 x i8> %18, i32 0 1871 ret i8 %19 1872} 1873 1874; 1875; Partial Vector Reductions 1876; 1877 1878define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1879; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1880; X86-SSE2: ## %bb.0: 1881; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1882; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1883; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1884; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1885; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1886; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1887; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1888; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1889; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1890; X86-SSE2-NEXT: psrld $16, %xmm1 1891; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1892; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1893; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1894; X86-SSE2-NEXT: movd %xmm0, %eax 1895; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1896; X86-SSE2-NEXT: retl 1897; 1898; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 1899; X86-SSE42: ## %bb.0: 1900; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1901; X86-SSE42-NEXT: movd %xmm0, %eax 1902; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1903; X86-SSE42-NEXT: retl 1904; 1905; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 1906; X86-AVX: ## %bb.0: 1907; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1908; X86-AVX-NEXT: vmovd %xmm0, %eax 1909; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1910; X86-AVX-NEXT: vzeroupper 1911; X86-AVX-NEXT: retl 1912; 1913; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 1914; X64-SSE2: ## %bb.0: 1915; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1916; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1917; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1918; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1919; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1920; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1921; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1922; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1923; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1924; X64-SSE2-NEXT: psrld $16, %xmm1 1925; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1926; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1927; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1928; X64-SSE2-NEXT: movd %xmm0, %eax 1929; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1930; X64-SSE2-NEXT: retq 1931; 1932; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 1933; X64-SSE42: ## %bb.0: 1934; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1935; X64-SSE42-NEXT: movd %xmm0, %eax 1936; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1937; X64-SSE42-NEXT: retq 1938; 1939; X64-AVX-LABEL: test_reduce_v16i16_v8i16: 1940; X64-AVX: ## %bb.0: 1941; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 1942; X64-AVX-NEXT: vmovd %xmm0, %eax 1943; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1944; X64-AVX-NEXT: vzeroupper 1945; X64-AVX-NEXT: retq 1946 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1947 %2 = icmp ult <16 x i16> %a0, %1 1948 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 1949 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1950 %5 = icmp ult <16 x i16> %3, %4 1951 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 1952 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1953 %8 = icmp ult <16 x i16> %6, %7 1954 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 1955 %10 = extractelement <16 x i16> %9, i32 0 1956 ret i16 %10 1957} 1958 1959define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 1960; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 1961; X86-SSE2: ## %bb.0: 1962; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1963; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1964; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1965; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1966; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1967; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1968; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1969; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1970; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1971; X86-SSE2-NEXT: psrld $16, %xmm1 1972; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1973; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1974; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1975; X86-SSE2-NEXT: movd %xmm0, %eax 1976; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1977; X86-SSE2-NEXT: retl 1978; 1979; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 1980; X86-SSE42: ## %bb.0: 1981; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1982; X86-SSE42-NEXT: movd %xmm0, %eax 1983; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1984; X86-SSE42-NEXT: retl 1985; 1986; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 1987; X86-AVX: ## %bb.0: 1988; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1989; X86-AVX-NEXT: vmovd %xmm0, %eax 1990; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1991; X86-AVX-NEXT: vzeroupper 1992; X86-AVX-NEXT: retl 1993; 1994; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 1995; X64-SSE2: ## %bb.0: 1996; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1997; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1998; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1999; X64-SSE2-NEXT: psubw %xmm2, %xmm0 2000; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2001; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2002; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 2003; X64-SSE2-NEXT: psubw %xmm2, %xmm0 2004; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2005; X64-SSE2-NEXT: psrld $16, %xmm1 2006; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2007; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 2008; X64-SSE2-NEXT: psubw %xmm2, %xmm0 2009; X64-SSE2-NEXT: movd %xmm0, %eax 2010; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2011; X64-SSE2-NEXT: retq 2012; 2013; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 2014; X64-SSE42: ## %bb.0: 2015; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 2016; X64-SSE42-NEXT: movd %xmm0, %eax 2017; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2018; X64-SSE42-NEXT: retq 2019; 2020; X64-AVX-LABEL: test_reduce_v32i16_v8i16: 2021; X64-AVX: ## %bb.0: 2022; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2023; X64-AVX-NEXT: vmovd %xmm0, %eax 2024; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 2025; X64-AVX-NEXT: vzeroupper 2026; X64-AVX-NEXT: retq 2027 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2028 %2 = icmp ult <32 x i16> %a0, %1 2029 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 2030 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2031 %5 = icmp ult <32 x i16> %3, %4 2032 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 2033 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2034 %8 = icmp ult <32 x i16> %6, %7 2035 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 2036 %10 = extractelement <32 x i16> %9, i32 0 2037 ret i16 %10 2038} 2039 2040define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 2041; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 2042; X86-SSE2: ## %bb.0: 2043; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2044; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2045; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2046; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2047; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2048; X86-SSE2-NEXT: psrld $16, %xmm1 2049; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2050; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2051; X86-SSE2-NEXT: psrlw $8, %xmm0 2052; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2053; X86-SSE2-NEXT: movd %xmm0, %eax 2054; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2055; X86-SSE2-NEXT: retl 2056; 2057; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2058; X86-SSE42: ## %bb.0: 2059; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2060; X86-SSE42-NEXT: psrlw $8, %xmm1 2061; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2062; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2063; X86-SSE42-NEXT: movd %xmm0, %eax 2064; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2065; X86-SSE42-NEXT: retl 2066; 2067; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2068; X86-AVX: ## %bb.0: 2069; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2070; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2071; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2072; X86-AVX-NEXT: vmovd %xmm0, %eax 2073; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2074; X86-AVX-NEXT: vzeroupper 2075; X86-AVX-NEXT: retl 2076; 2077; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2078; X64-SSE2: ## %bb.0: 2079; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2080; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2081; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2082; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2083; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2084; X64-SSE2-NEXT: psrld $16, %xmm1 2085; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2086; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2087; X64-SSE2-NEXT: psrlw $8, %xmm0 2088; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2089; X64-SSE2-NEXT: movd %xmm0, %eax 2090; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2091; X64-SSE2-NEXT: retq 2092; 2093; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2094; X64-SSE42: ## %bb.0: 2095; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2096; X64-SSE42-NEXT: psrlw $8, %xmm1 2097; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2098; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2099; X64-SSE42-NEXT: movd %xmm0, %eax 2100; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2101; X64-SSE42-NEXT: retq 2102; 2103; X64-AVX-LABEL: test_reduce_v32i8_v16i8: 2104; X64-AVX: ## %bb.0: 2105; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2106; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2107; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2108; X64-AVX-NEXT: vmovd %xmm0, %eax 2109; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2110; X64-AVX-NEXT: vzeroupper 2111; X64-AVX-NEXT: retq 2112 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2113 %2 = icmp ult <32 x i8> %a0, %1 2114 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2115 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2116 %5 = icmp ult <32 x i8> %3, %4 2117 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2118 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2119 %8 = icmp ult <32 x i8> %6, %7 2120 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2121 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2122 %11 = icmp ult <32 x i8> %9, %10 2123 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2124 %13 = extractelement <32 x i8> %12, i32 0 2125 ret i8 %13 2126} 2127 2128define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2129; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2130; X86-SSE2: ## %bb.0: 2131; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2132; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2133; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2134; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2135; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2136; X86-SSE2-NEXT: psrld $16, %xmm1 2137; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2138; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2139; X86-SSE2-NEXT: psrlw $8, %xmm0 2140; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2141; X86-SSE2-NEXT: movd %xmm0, %eax 2142; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2143; X86-SSE2-NEXT: retl 2144; 2145; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2146; X86-SSE42: ## %bb.0: 2147; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2148; X86-SSE42-NEXT: psrlw $8, %xmm1 2149; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2150; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2151; X86-SSE42-NEXT: movd %xmm0, %eax 2152; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2153; X86-SSE42-NEXT: retl 2154; 2155; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2156; X86-AVX: ## %bb.0: 2157; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2158; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2159; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2160; X86-AVX-NEXT: vmovd %xmm0, %eax 2161; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2162; X86-AVX-NEXT: vzeroupper 2163; X86-AVX-NEXT: retl 2164; 2165; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2166; X64-SSE2: ## %bb.0: 2167; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2168; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2169; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2170; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2171; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2172; X64-SSE2-NEXT: psrld $16, %xmm1 2173; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2174; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2175; X64-SSE2-NEXT: psrlw $8, %xmm0 2176; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2177; X64-SSE2-NEXT: movd %xmm0, %eax 2178; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2179; X64-SSE2-NEXT: retq 2180; 2181; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2182; X64-SSE42: ## %bb.0: 2183; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2184; X64-SSE42-NEXT: psrlw $8, %xmm1 2185; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2186; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2187; X64-SSE42-NEXT: movd %xmm0, %eax 2188; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2189; X64-SSE42-NEXT: retq 2190; 2191; X64-AVX-LABEL: test_reduce_v64i8_v16i8: 2192; X64-AVX: ## %bb.0: 2193; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2194; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2195; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2196; X64-AVX-NEXT: vmovd %xmm0, %eax 2197; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2198; X64-AVX-NEXT: vzeroupper 2199; X64-AVX-NEXT: retq 2200 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2201 %2 = icmp ult <64 x i8> %a0, %1 2202 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2203 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2204 %5 = icmp ult <64 x i8> %3, %4 2205 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2206 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2207 %8 = icmp ult <64 x i8> %6, %7 2208 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2209 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2210 %11 = icmp ult <64 x i8> %9, %10 2211 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2212 %13 = extractelement <64 x i8> %12, i32 0 2213 ret i8 %13 2214} 2215