1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 45; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 46; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 47; X86-SSE42-NEXT: movd %xmm2, %eax 48; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 49; X86-SSE42-NEXT: retl 50; 51; X86-AVX-LABEL: test_reduce_v2i64: 52; X86-AVX: ## %bb.0: 53; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 54; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 55; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 56; X86-AVX-NEXT: vmovd %xmm0, %eax 57; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx 58; X86-AVX-NEXT: retl 59; 60; X64-SSE2-LABEL: test_reduce_v2i64: 61; X64-SSE2: ## %bb.0: 62; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 63; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 64; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 65; X64-SSE2-NEXT: pxor %xmm2, %xmm3 66; X64-SSE2-NEXT: pxor %xmm1, %xmm2 67; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 68; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 69; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 70; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 71; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 72; X64-SSE2-NEXT: pand %xmm5, %xmm2 73; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 74; X64-SSE2-NEXT: por %xmm2, %xmm3 75; X64-SSE2-NEXT: pand %xmm3, %xmm0 76; X64-SSE2-NEXT: pandn %xmm1, %xmm3 77; X64-SSE2-NEXT: por %xmm0, %xmm3 78; X64-SSE2-NEXT: movq %xmm3, %rax 79; X64-SSE2-NEXT: retq 80; 81; X64-SSE42-LABEL: test_reduce_v2i64: 82; X64-SSE42: ## %bb.0: 83; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 84; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 85; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 86; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 87; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 88; X64-SSE42-NEXT: movq %xmm2, %rax 89; X64-SSE42-NEXT: retq 90; 91; X64-AVX1OR2-LABEL: test_reduce_v2i64: 92; X64-AVX1OR2: ## %bb.0: 93; X64-AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 94; X64-AVX1OR2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 95; X64-AVX1OR2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 96; X64-AVX1OR2-NEXT: vmovq %xmm0, %rax 97; X64-AVX1OR2-NEXT: retq 98; 99; X64-AVX512-LABEL: test_reduce_v2i64: 100; X64-AVX512: ## %bb.0: 101; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 102; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 103; X64-AVX512-NEXT: vmovq %xmm0, %rax 104; X64-AVX512-NEXT: retq 105 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 106 %2 = icmp slt <2 x i64> %a0, %1 107 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 108 %4 = extractelement <2 x i64> %3, i32 0 109 ret i64 %4 110} 111 112define i32 @test_reduce_v4i32(<4 x i32> %a0) { 113; X86-SSE2-LABEL: test_reduce_v4i32: 114; X86-SSE2: ## %bb.0: 115; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 116; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 117; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 118; X86-SSE2-NEXT: pand %xmm2, %xmm0 119; X86-SSE2-NEXT: pandn %xmm1, %xmm2 120; X86-SSE2-NEXT: por %xmm0, %xmm2 121; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 122; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 123; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 124; X86-SSE2-NEXT: pand %xmm1, %xmm2 125; X86-SSE2-NEXT: pandn %xmm0, %xmm1 126; X86-SSE2-NEXT: por %xmm2, %xmm1 127; X86-SSE2-NEXT: movd %xmm1, %eax 128; X86-SSE2-NEXT: retl 129; 130; X86-SSE42-LABEL: test_reduce_v4i32: 131; X86-SSE42: ## %bb.0: 132; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 133; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 134; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 135; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 136; X86-SSE42-NEXT: movd %xmm0, %eax 137; X86-SSE42-NEXT: retl 138; 139; X86-AVX-LABEL: test_reduce_v4i32: 140; X86-AVX: ## %bb.0: 141; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 142; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 143; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 144; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 145; X86-AVX-NEXT: vmovd %xmm0, %eax 146; X86-AVX-NEXT: retl 147; 148; X64-SSE2-LABEL: test_reduce_v4i32: 149; X64-SSE2: ## %bb.0: 150; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 151; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 152; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 153; X64-SSE2-NEXT: pand %xmm2, %xmm0 154; X64-SSE2-NEXT: pandn %xmm1, %xmm2 155; X64-SSE2-NEXT: por %xmm0, %xmm2 156; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 157; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 158; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 159; X64-SSE2-NEXT: pand %xmm1, %xmm2 160; X64-SSE2-NEXT: pandn %xmm0, %xmm1 161; X64-SSE2-NEXT: por %xmm2, %xmm1 162; X64-SSE2-NEXT: movd %xmm1, %eax 163; X64-SSE2-NEXT: retq 164; 165; X64-SSE42-LABEL: test_reduce_v4i32: 166; X64-SSE42: ## %bb.0: 167; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 168; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 169; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 170; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 171; X64-SSE42-NEXT: movd %xmm0, %eax 172; X64-SSE42-NEXT: retq 173; 174; X64-AVX-LABEL: test_reduce_v4i32: 175; X64-AVX: ## %bb.0: 176; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 177; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 178; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 179; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 180; X64-AVX-NEXT: vmovd %xmm0, %eax 181; X64-AVX-NEXT: retq 182 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 183 %2 = icmp slt <4 x i32> %a0, %1 184 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 185 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 186 %5 = icmp slt <4 x i32> %3, %4 187 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 188 %7 = extractelement <4 x i32> %6, i32 0 189 ret i32 %7 190} 191 192define i16 @test_reduce_v8i16(<8 x i16> %a0) { 193; X86-SSE2-LABEL: test_reduce_v8i16: 194; X86-SSE2: ## %bb.0: 195; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 196; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 197; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 198; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 199; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 200; X86-SSE2-NEXT: psrld $16, %xmm1 201; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 202; X86-SSE2-NEXT: movd %xmm1, %eax 203; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 204; X86-SSE2-NEXT: retl 205; 206; X86-SSE42-LABEL: test_reduce_v8i16: 207; X86-SSE42: ## %bb.0: 208; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 209; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 210; X86-SSE42-NEXT: movd %xmm0, %eax 211; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 212; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 213; X86-SSE42-NEXT: retl 214; 215; X86-AVX-LABEL: test_reduce_v8i16: 216; X86-AVX: ## %bb.0: 217; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 218; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 219; X86-AVX-NEXT: vmovd %xmm0, %eax 220; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 221; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 222; X86-AVX-NEXT: retl 223; 224; X64-SSE2-LABEL: test_reduce_v8i16: 225; X64-SSE2: ## %bb.0: 226; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 227; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 228; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 229; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 230; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 231; X64-SSE2-NEXT: psrld $16, %xmm1 232; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 233; X64-SSE2-NEXT: movd %xmm1, %eax 234; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 235; X64-SSE2-NEXT: retq 236; 237; X64-SSE42-LABEL: test_reduce_v8i16: 238; X64-SSE42: ## %bb.0: 239; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 240; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 241; X64-SSE42-NEXT: movd %xmm0, %eax 242; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 243; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 244; X64-SSE42-NEXT: retq 245; 246; X64-AVX1OR2-LABEL: test_reduce_v8i16: 247; X64-AVX1OR2: ## %bb.0: 248; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 249; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 250; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 251; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000 252; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax 253; X64-AVX1OR2-NEXT: retq 254; 255; X64-AVX512-LABEL: test_reduce_v8i16: 256; X64-AVX512: ## %bb.0: 257; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 258; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 259; X64-AVX512-NEXT: vmovd %xmm0, %eax 260; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 261; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 262; X64-AVX512-NEXT: retq 263 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 264 %2 = icmp slt <8 x i16> %a0, %1 265 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 266 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 267 %5 = icmp slt <8 x i16> %3, %4 268 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 269 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 270 %8 = icmp slt <8 x i16> %6, %7 271 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 272 %10 = extractelement <8 x i16> %9, i32 0 273 ret i16 %10 274} 275 276define i8 @test_reduce_v16i8(<16 x i8> %a0) { 277; X86-SSE2-LABEL: test_reduce_v16i8: 278; X86-SSE2: ## %bb.0: 279; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 280; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 281; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 282; X86-SSE2-NEXT: pand %xmm2, %xmm0 283; X86-SSE2-NEXT: pandn %xmm1, %xmm2 284; X86-SSE2-NEXT: por %xmm0, %xmm2 285; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 286; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 287; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm0 288; X86-SSE2-NEXT: pand %xmm0, %xmm2 289; X86-SSE2-NEXT: pandn %xmm1, %xmm0 290; X86-SSE2-NEXT: por %xmm2, %xmm0 291; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 292; X86-SSE2-NEXT: psrld $16, %xmm2 293; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 294; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 295; X86-SSE2-NEXT: pand %xmm1, %xmm0 296; X86-SSE2-NEXT: pandn %xmm2, %xmm1 297; X86-SSE2-NEXT: por %xmm0, %xmm1 298; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 299; X86-SSE2-NEXT: psrlw $8, %xmm0 300; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 301; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 302; X86-SSE2-NEXT: pand %xmm2, %xmm1 303; X86-SSE2-NEXT: pandn %xmm0, %xmm2 304; X86-SSE2-NEXT: por %xmm1, %xmm2 305; X86-SSE2-NEXT: movd %xmm2, %eax 306; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 307; X86-SSE2-NEXT: retl 308; 309; X86-SSE42-LABEL: test_reduce_v16i8: 310; X86-SSE42: ## %bb.0: 311; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 312; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 313; X86-SSE42-NEXT: psrlw $8, %xmm1 314; X86-SSE42-NEXT: pminub %xmm0, %xmm1 315; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 316; X86-SSE42-NEXT: movd %xmm0, %eax 317; X86-SSE42-NEXT: addb $-128, %al 318; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 319; X86-SSE42-NEXT: retl 320; 321; X86-AVX-LABEL: test_reduce_v16i8: 322; X86-AVX: ## %bb.0: 323; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 324; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 325; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 326; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 327; X86-AVX-NEXT: vmovd %xmm0, %eax 328; X86-AVX-NEXT: addb $-128, %al 329; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 330; X86-AVX-NEXT: retl 331; 332; X64-SSE2-LABEL: test_reduce_v16i8: 333; X64-SSE2: ## %bb.0: 334; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 335; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 336; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 337; X64-SSE2-NEXT: pand %xmm2, %xmm0 338; X64-SSE2-NEXT: pandn %xmm1, %xmm2 339; X64-SSE2-NEXT: por %xmm0, %xmm2 340; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 341; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 342; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 343; X64-SSE2-NEXT: pand %xmm1, %xmm2 344; X64-SSE2-NEXT: pandn %xmm0, %xmm1 345; X64-SSE2-NEXT: por %xmm2, %xmm1 346; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 347; X64-SSE2-NEXT: psrld $16, %xmm0 348; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 349; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 350; X64-SSE2-NEXT: pand %xmm2, %xmm1 351; X64-SSE2-NEXT: pandn %xmm0, %xmm2 352; X64-SSE2-NEXT: por %xmm1, %xmm2 353; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 354; X64-SSE2-NEXT: psrlw $8, %xmm0 355; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 356; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 357; X64-SSE2-NEXT: pand %xmm1, %xmm2 358; X64-SSE2-NEXT: pandn %xmm0, %xmm1 359; X64-SSE2-NEXT: por %xmm2, %xmm1 360; X64-SSE2-NEXT: movd %xmm1, %eax 361; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 362; X64-SSE2-NEXT: retq 363; 364; X64-SSE42-LABEL: test_reduce_v16i8: 365; X64-SSE42: ## %bb.0: 366; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 367; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 368; X64-SSE42-NEXT: psrlw $8, %xmm1 369; X64-SSE42-NEXT: pminub %xmm0, %xmm1 370; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 371; X64-SSE42-NEXT: movd %xmm0, %eax 372; X64-SSE42-NEXT: addb $-128, %al 373; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 374; X64-SSE42-NEXT: retq 375; 376; X64-AVX1OR2-LABEL: test_reduce_v16i8: 377; X64-AVX1OR2: ## %bb.0: 378; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 379; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1 380; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0 381; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 382; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 383; X64-AVX1OR2-NEXT: addb $-128, %al 384; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax 385; X64-AVX1OR2-NEXT: retq 386; 387; X64-AVX512-LABEL: test_reduce_v16i8: 388; X64-AVX512: ## %bb.0: 389; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 390; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 391; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 392; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 393; X64-AVX512-NEXT: vmovd %xmm0, %eax 394; X64-AVX512-NEXT: addb $-128, %al 395; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 396; X64-AVX512-NEXT: retq 397 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 398 %2 = icmp slt <16 x i8> %a0, %1 399 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 400 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 401 %5 = icmp slt <16 x i8> %3, %4 402 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 403 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 404 %8 = icmp slt <16 x i8> %6, %7 405 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 406 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 407 %11 = icmp slt <16 x i8> %9, %10 408 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 409 %13 = extractelement <16 x i8> %12, i32 0 410 ret i8 %13 411} 412 413; 414; 256-bit Vectors 415; 416 417define i64 @test_reduce_v4i64(<4 x i64> %a0) { 418; X86-SSE2-LABEL: test_reduce_v4i64: 419; X86-SSE2: ## %bb.0: 420; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 421; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 422; X86-SSE2-NEXT: pxor %xmm2, %xmm3 423; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 424; X86-SSE2-NEXT: pxor %xmm2, %xmm4 425; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 426; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 427; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 428; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 429; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 430; X86-SSE2-NEXT: pand %xmm6, %xmm4 431; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 432; X86-SSE2-NEXT: por %xmm4, %xmm3 433; X86-SSE2-NEXT: pand %xmm3, %xmm0 434; X86-SSE2-NEXT: pandn %xmm1, %xmm3 435; X86-SSE2-NEXT: por %xmm0, %xmm3 436; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 437; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 438; X86-SSE2-NEXT: pxor %xmm2, %xmm1 439; X86-SSE2-NEXT: pxor %xmm0, %xmm2 440; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 441; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm4 442; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 443; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 444; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 445; X86-SSE2-NEXT: pand %xmm5, %xmm1 446; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 447; X86-SSE2-NEXT: por %xmm1, %xmm2 448; X86-SSE2-NEXT: pand %xmm2, %xmm3 449; X86-SSE2-NEXT: pandn %xmm0, %xmm2 450; X86-SSE2-NEXT: por %xmm3, %xmm2 451; X86-SSE2-NEXT: movd %xmm2, %eax 452; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 453; X86-SSE2-NEXT: movd %xmm0, %edx 454; X86-SSE2-NEXT: retl 455; 456; X86-SSE42-LABEL: test_reduce_v4i64: 457; X86-SSE42: ## %bb.0: 458; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 459; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 460; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 461; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 462; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 463; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 464; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 465; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 466; X86-SSE42-NEXT: movd %xmm2, %eax 467; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 468; X86-SSE42-NEXT: retl 469; 470; X86-AVX1-LABEL: test_reduce_v4i64: 471; X86-AVX1: ## %bb.0: 472; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 473; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 474; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 475; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 476; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 477; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 478; X86-AVX1-NEXT: vmovd %xmm0, %eax 479; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 480; X86-AVX1-NEXT: vzeroupper 481; X86-AVX1-NEXT: retl 482; 483; X86-AVX2-LABEL: test_reduce_v4i64: 484; X86-AVX2: ## %bb.0: 485; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 486; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 487; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 488; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 489; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 490; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 491; X86-AVX2-NEXT: vmovd %xmm0, %eax 492; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 493; X86-AVX2-NEXT: vzeroupper 494; X86-AVX2-NEXT: retl 495; 496; X64-SSE2-LABEL: test_reduce_v4i64: 497; X64-SSE2: ## %bb.0: 498; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 499; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 500; X64-SSE2-NEXT: pxor %xmm2, %xmm3 501; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 502; X64-SSE2-NEXT: pxor %xmm2, %xmm4 503; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 504; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 505; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 506; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 507; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 508; X64-SSE2-NEXT: pand %xmm6, %xmm3 509; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 510; X64-SSE2-NEXT: por %xmm3, %xmm4 511; X64-SSE2-NEXT: pand %xmm4, %xmm0 512; X64-SSE2-NEXT: pandn %xmm1, %xmm4 513; X64-SSE2-NEXT: por %xmm0, %xmm4 514; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 515; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 516; X64-SSE2-NEXT: pxor %xmm2, %xmm1 517; X64-SSE2-NEXT: pxor %xmm0, %xmm2 518; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 519; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 520; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 521; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 522; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 523; X64-SSE2-NEXT: pand %xmm5, %xmm1 524; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 525; X64-SSE2-NEXT: por %xmm1, %xmm2 526; X64-SSE2-NEXT: pand %xmm2, %xmm4 527; X64-SSE2-NEXT: pandn %xmm0, %xmm2 528; X64-SSE2-NEXT: por %xmm4, %xmm2 529; X64-SSE2-NEXT: movq %xmm2, %rax 530; X64-SSE2-NEXT: retq 531; 532; X64-SSE42-LABEL: test_reduce_v4i64: 533; X64-SSE42: ## %bb.0: 534; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 535; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 536; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 537; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 538; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 539; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 540; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 541; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 542; X64-SSE42-NEXT: movq %xmm2, %rax 543; X64-SSE42-NEXT: retq 544; 545; X64-AVX1-LABEL: test_reduce_v4i64: 546; X64-AVX1: ## %bb.0: 547; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 548; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 549; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 550; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 551; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 552; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 553; X64-AVX1-NEXT: vmovq %xmm0, %rax 554; X64-AVX1-NEXT: vzeroupper 555; X64-AVX1-NEXT: retq 556; 557; X64-AVX2-LABEL: test_reduce_v4i64: 558; X64-AVX2: ## %bb.0: 559; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 560; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 561; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 562; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 563; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 564; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 565; X64-AVX2-NEXT: vmovq %xmm0, %rax 566; X64-AVX2-NEXT: vzeroupper 567; X64-AVX2-NEXT: retq 568; 569; X64-AVX512-LABEL: test_reduce_v4i64: 570; X64-AVX512: ## %bb.0: 571; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 572; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 573; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 574; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 575; X64-AVX512-NEXT: vmovq %xmm0, %rax 576; X64-AVX512-NEXT: vzeroupper 577; X64-AVX512-NEXT: retq 578 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 579 %2 = icmp slt <4 x i64> %a0, %1 580 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 581 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 582 %5 = icmp slt <4 x i64> %3, %4 583 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 584 %7 = extractelement <4 x i64> %6, i32 0 585 ret i64 %7 586} 587 588define i32 @test_reduce_v8i32(<8 x i32> %a0) { 589; X86-SSE2-LABEL: test_reduce_v8i32: 590; X86-SSE2: ## %bb.0: 591; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 592; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 593; X86-SSE2-NEXT: pand %xmm2, %xmm0 594; X86-SSE2-NEXT: pandn %xmm1, %xmm2 595; X86-SSE2-NEXT: por %xmm0, %xmm2 596; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 597; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 598; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 599; X86-SSE2-NEXT: pand %xmm1, %xmm2 600; X86-SSE2-NEXT: pandn %xmm0, %xmm1 601; X86-SSE2-NEXT: por %xmm2, %xmm1 602; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 603; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 604; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 605; X86-SSE2-NEXT: pand %xmm2, %xmm1 606; X86-SSE2-NEXT: pandn %xmm0, %xmm2 607; X86-SSE2-NEXT: por %xmm1, %xmm2 608; X86-SSE2-NEXT: movd %xmm2, %eax 609; X86-SSE2-NEXT: retl 610; 611; X86-SSE42-LABEL: test_reduce_v8i32: 612; X86-SSE42: ## %bb.0: 613; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 614; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 615; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 616; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 617; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 618; X86-SSE42-NEXT: movd %xmm0, %eax 619; X86-SSE42-NEXT: retl 620; 621; X86-AVX1-LABEL: test_reduce_v8i32: 622; X86-AVX1: ## %bb.0: 623; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 624; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 625; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 626; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 627; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 628; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 629; X86-AVX1-NEXT: vmovd %xmm0, %eax 630; X86-AVX1-NEXT: vzeroupper 631; X86-AVX1-NEXT: retl 632; 633; X86-AVX2-LABEL: test_reduce_v8i32: 634; X86-AVX2: ## %bb.0: 635; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 636; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 637; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 638; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 639; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 640; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 641; X86-AVX2-NEXT: vmovd %xmm0, %eax 642; X86-AVX2-NEXT: vzeroupper 643; X86-AVX2-NEXT: retl 644; 645; X64-SSE2-LABEL: test_reduce_v8i32: 646; X64-SSE2: ## %bb.0: 647; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 648; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 649; X64-SSE2-NEXT: pand %xmm2, %xmm0 650; X64-SSE2-NEXT: pandn %xmm1, %xmm2 651; X64-SSE2-NEXT: por %xmm0, %xmm2 652; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 653; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 654; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 655; X64-SSE2-NEXT: pand %xmm1, %xmm2 656; X64-SSE2-NEXT: pandn %xmm0, %xmm1 657; X64-SSE2-NEXT: por %xmm2, %xmm1 658; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 659; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 660; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 661; X64-SSE2-NEXT: pand %xmm2, %xmm1 662; X64-SSE2-NEXT: pandn %xmm0, %xmm2 663; X64-SSE2-NEXT: por %xmm1, %xmm2 664; X64-SSE2-NEXT: movd %xmm2, %eax 665; X64-SSE2-NEXT: retq 666; 667; X64-SSE42-LABEL: test_reduce_v8i32: 668; X64-SSE42: ## %bb.0: 669; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 670; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 671; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 672; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 673; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 674; X64-SSE42-NEXT: movd %xmm0, %eax 675; X64-SSE42-NEXT: retq 676; 677; X64-AVX1-LABEL: test_reduce_v8i32: 678; X64-AVX1: ## %bb.0: 679; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 680; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 681; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 682; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 683; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 684; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 685; X64-AVX1-NEXT: vmovd %xmm0, %eax 686; X64-AVX1-NEXT: vzeroupper 687; X64-AVX1-NEXT: retq 688; 689; X64-AVX2-LABEL: test_reduce_v8i32: 690; X64-AVX2: ## %bb.0: 691; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 692; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 693; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 694; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 695; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 696; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 697; X64-AVX2-NEXT: vmovd %xmm0, %eax 698; X64-AVX2-NEXT: vzeroupper 699; X64-AVX2-NEXT: retq 700; 701; X64-AVX512-LABEL: test_reduce_v8i32: 702; X64-AVX512: ## %bb.0: 703; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 704; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 705; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 706; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 707; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 708; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 709; X64-AVX512-NEXT: vmovd %xmm0, %eax 710; X64-AVX512-NEXT: vzeroupper 711; X64-AVX512-NEXT: retq 712 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 713 %2 = icmp slt <8 x i32> %a0, %1 714 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 715 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 716 %5 = icmp slt <8 x i32> %3, %4 717 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 718 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 719 %8 = icmp slt <8 x i32> %6, %7 720 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 721 %10 = extractelement <8 x i32> %9, i32 0 722 ret i32 %10 723} 724 725define i16 @test_reduce_v16i16(<16 x i16> %a0) { 726; X86-SSE2-LABEL: test_reduce_v16i16: 727; X86-SSE2: ## %bb.0: 728; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 729; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 730; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 731; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 732; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 733; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 734; X86-SSE2-NEXT: psrld $16, %xmm1 735; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 736; X86-SSE2-NEXT: movd %xmm1, %eax 737; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 738; X86-SSE2-NEXT: retl 739; 740; X86-SSE42-LABEL: test_reduce_v16i16: 741; X86-SSE42: ## %bb.0: 742; X86-SSE42-NEXT: pminsw %xmm1, %xmm0 743; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 744; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 745; X86-SSE42-NEXT: movd %xmm0, %eax 746; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 747; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 748; X86-SSE42-NEXT: retl 749; 750; X86-AVX1-LABEL: test_reduce_v16i16: 751; X86-AVX1: ## %bb.0: 752; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 753; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 754; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 755; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 756; X86-AVX1-NEXT: vmovd %xmm0, %eax 757; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 758; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 759; X86-AVX1-NEXT: vzeroupper 760; X86-AVX1-NEXT: retl 761; 762; X86-AVX2-LABEL: test_reduce_v16i16: 763; X86-AVX2: ## %bb.0: 764; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 765; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 766; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 767; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 768; X86-AVX2-NEXT: vmovd %xmm0, %eax 769; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 770; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 771; X86-AVX2-NEXT: vzeroupper 772; X86-AVX2-NEXT: retl 773; 774; X64-SSE2-LABEL: test_reduce_v16i16: 775; X64-SSE2: ## %bb.0: 776; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 777; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 778; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 779; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 780; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 781; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 782; X64-SSE2-NEXT: psrld $16, %xmm1 783; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 784; X64-SSE2-NEXT: movd %xmm1, %eax 785; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 786; X64-SSE2-NEXT: retq 787; 788; X64-SSE42-LABEL: test_reduce_v16i16: 789; X64-SSE42: ## %bb.0: 790; X64-SSE42-NEXT: pminsw %xmm1, %xmm0 791; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 792; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 793; X64-SSE42-NEXT: movd %xmm0, %eax 794; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 795; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 796; X64-SSE42-NEXT: retq 797; 798; X64-AVX1-LABEL: test_reduce_v16i16: 799; X64-AVX1: ## %bb.0: 800; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 801; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 802; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 803; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 804; X64-AVX1-NEXT: vmovd %xmm0, %eax 805; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 806; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 807; X64-AVX1-NEXT: vzeroupper 808; X64-AVX1-NEXT: retq 809; 810; X64-AVX2-LABEL: test_reduce_v16i16: 811; X64-AVX2: ## %bb.0: 812; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 813; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 814; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 815; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 816; X64-AVX2-NEXT: vmovd %xmm0, %eax 817; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 818; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 819; X64-AVX2-NEXT: vzeroupper 820; X64-AVX2-NEXT: retq 821; 822; X64-AVX512-LABEL: test_reduce_v16i16: 823; X64-AVX512: ## %bb.0: 824; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 825; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 826; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 827; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 828; X64-AVX512-NEXT: vmovd %xmm0, %eax 829; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 830; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 831; X64-AVX512-NEXT: vzeroupper 832; X64-AVX512-NEXT: retq 833 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 834 %2 = icmp slt <16 x i16> %a0, %1 835 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 836 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 837 %5 = icmp slt <16 x i16> %3, %4 838 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 839 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 840 %8 = icmp slt <16 x i16> %6, %7 841 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 842 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 843 %11 = icmp slt <16 x i16> %9, %10 844 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 845 %13 = extractelement <16 x i16> %12, i32 0 846 ret i16 %13 847} 848 849define i8 @test_reduce_v32i8(<32 x i8> %a0) { 850; X86-SSE2-LABEL: test_reduce_v32i8: 851; X86-SSE2: ## %bb.0: 852; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 853; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 854; X86-SSE2-NEXT: pand %xmm2, %xmm0 855; X86-SSE2-NEXT: pandn %xmm1, %xmm2 856; X86-SSE2-NEXT: por %xmm0, %xmm2 857; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 858; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 859; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 860; X86-SSE2-NEXT: pand %xmm1, %xmm2 861; X86-SSE2-NEXT: pandn %xmm0, %xmm1 862; X86-SSE2-NEXT: por %xmm2, %xmm1 863; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 864; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 865; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 866; X86-SSE2-NEXT: pand %xmm0, %xmm1 867; X86-SSE2-NEXT: pandn %xmm2, %xmm0 868; X86-SSE2-NEXT: por %xmm1, %xmm0 869; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 870; X86-SSE2-NEXT: psrld $16, %xmm2 871; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 872; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 873; X86-SSE2-NEXT: pand %xmm1, %xmm0 874; X86-SSE2-NEXT: pandn %xmm2, %xmm1 875; X86-SSE2-NEXT: por %xmm0, %xmm1 876; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 877; X86-SSE2-NEXT: psrlw $8, %xmm0 878; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 879; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 880; X86-SSE2-NEXT: pand %xmm2, %xmm1 881; X86-SSE2-NEXT: pandn %xmm0, %xmm2 882; X86-SSE2-NEXT: por %xmm1, %xmm2 883; X86-SSE2-NEXT: movd %xmm2, %eax 884; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 885; X86-SSE2-NEXT: retl 886; 887; X86-SSE42-LABEL: test_reduce_v32i8: 888; X86-SSE42: ## %bb.0: 889; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 890; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 891; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 892; X86-SSE42-NEXT: psrlw $8, %xmm1 893; X86-SSE42-NEXT: pminub %xmm0, %xmm1 894; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 895; X86-SSE42-NEXT: movd %xmm0, %eax 896; X86-SSE42-NEXT: addb $-128, %al 897; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 898; X86-SSE42-NEXT: retl 899; 900; X86-AVX1-LABEL: test_reduce_v32i8: 901; X86-AVX1: ## %bb.0: 902; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 903; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 904; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 905; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 906; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 907; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 908; X86-AVX1-NEXT: vmovd %xmm0, %eax 909; X86-AVX1-NEXT: addb $-128, %al 910; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 911; X86-AVX1-NEXT: vzeroupper 912; X86-AVX1-NEXT: retl 913; 914; X86-AVX2-LABEL: test_reduce_v32i8: 915; X86-AVX2: ## %bb.0: 916; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 917; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 918; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 919; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 920; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 921; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 922; X86-AVX2-NEXT: vmovd %xmm0, %eax 923; X86-AVX2-NEXT: addb $-128, %al 924; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 925; X86-AVX2-NEXT: vzeroupper 926; X86-AVX2-NEXT: retl 927; 928; X64-SSE2-LABEL: test_reduce_v32i8: 929; X64-SSE2: ## %bb.0: 930; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 931; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 932; X64-SSE2-NEXT: pand %xmm2, %xmm0 933; X64-SSE2-NEXT: pandn %xmm1, %xmm2 934; X64-SSE2-NEXT: por %xmm0, %xmm2 935; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 936; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 937; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 938; X64-SSE2-NEXT: pand %xmm1, %xmm2 939; X64-SSE2-NEXT: pandn %xmm0, %xmm1 940; X64-SSE2-NEXT: por %xmm2, %xmm1 941; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 942; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 943; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 944; X64-SSE2-NEXT: pand %xmm2, %xmm1 945; X64-SSE2-NEXT: pandn %xmm0, %xmm2 946; X64-SSE2-NEXT: por %xmm1, %xmm2 947; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 948; X64-SSE2-NEXT: psrld $16, %xmm0 949; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 950; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 951; X64-SSE2-NEXT: pand %xmm1, %xmm2 952; X64-SSE2-NEXT: pandn %xmm0, %xmm1 953; X64-SSE2-NEXT: por %xmm2, %xmm1 954; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 955; X64-SSE2-NEXT: psrlw $8, %xmm0 956; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 957; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 958; X64-SSE2-NEXT: pand %xmm2, %xmm1 959; X64-SSE2-NEXT: pandn %xmm0, %xmm2 960; X64-SSE2-NEXT: por %xmm1, %xmm2 961; X64-SSE2-NEXT: movd %xmm2, %eax 962; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 963; X64-SSE2-NEXT: retq 964; 965; X64-SSE42-LABEL: test_reduce_v32i8: 966; X64-SSE42: ## %bb.0: 967; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 968; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 969; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 970; X64-SSE42-NEXT: psrlw $8, %xmm1 971; X64-SSE42-NEXT: pminub %xmm0, %xmm1 972; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 973; X64-SSE42-NEXT: movd %xmm0, %eax 974; X64-SSE42-NEXT: addb $-128, %al 975; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 976; X64-SSE42-NEXT: retq 977; 978; X64-AVX1-LABEL: test_reduce_v32i8: 979; X64-AVX1: ## %bb.0: 980; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 981; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 982; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 983; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 984; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 985; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 986; X64-AVX1-NEXT: vmovd %xmm0, %eax 987; X64-AVX1-NEXT: addb $-128, %al 988; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 989; X64-AVX1-NEXT: vzeroupper 990; X64-AVX1-NEXT: retq 991; 992; X64-AVX2-LABEL: test_reduce_v32i8: 993; X64-AVX2: ## %bb.0: 994; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 995; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 996; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 997; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 998; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 999; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1000; X64-AVX2-NEXT: vmovd %xmm0, %eax 1001; X64-AVX2-NEXT: addb $-128, %al 1002; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1003; X64-AVX2-NEXT: vzeroupper 1004; X64-AVX2-NEXT: retq 1005; 1006; X64-AVX512-LABEL: test_reduce_v32i8: 1007; X64-AVX512: ## %bb.0: 1008; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1009; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1010; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1011; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1012; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1013; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1014; X64-AVX512-NEXT: vmovd %xmm0, %eax 1015; X64-AVX512-NEXT: addb $-128, %al 1016; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1017; X64-AVX512-NEXT: vzeroupper 1018; X64-AVX512-NEXT: retq 1019 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1020 %2 = icmp slt <32 x i8> %a0, %1 1021 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1022 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1023 %5 = icmp slt <32 x i8> %3, %4 1024 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1025 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1026 %8 = icmp slt <32 x i8> %6, %7 1027 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1028 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1029 %11 = icmp slt <32 x i8> %9, %10 1030 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1031 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1032 %14 = icmp slt <32 x i8> %12, %13 1033 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1034 %16 = extractelement <32 x i8> %15, i32 0 1035 ret i8 %16 1036} 1037 1038; 1039; 512-bit Vectors 1040; 1041 1042define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1043; X86-SSE2-LABEL: test_reduce_v8i64: 1044; X86-SSE2: ## %bb.0: 1045; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1046; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1047; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1048; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1049; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1050; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1051; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1052; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1053; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1054; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1055; X86-SSE2-NEXT: pand %xmm5, %xmm6 1056; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1057; X86-SSE2-NEXT: por %xmm6, %xmm5 1058; X86-SSE2-NEXT: pand %xmm5, %xmm1 1059; X86-SSE2-NEXT: pandn %xmm3, %xmm5 1060; X86-SSE2-NEXT: por %xmm1, %xmm5 1061; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1062; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1063; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1064; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1065; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1066; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1067; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1068; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 1069; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1070; X86-SSE2-NEXT: pand %xmm1, %xmm3 1071; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 1072; X86-SSE2-NEXT: por %xmm3, %xmm1 1073; X86-SSE2-NEXT: pand %xmm1, %xmm0 1074; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1075; X86-SSE2-NEXT: por %xmm0, %xmm1 1076; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1077; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1078; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1079; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1080; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1081; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1082; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1083; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1084; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1085; X86-SSE2-NEXT: pand %xmm0, %xmm2 1086; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1087; X86-SSE2-NEXT: por %xmm2, %xmm0 1088; X86-SSE2-NEXT: pand %xmm0, %xmm1 1089; X86-SSE2-NEXT: pandn %xmm5, %xmm0 1090; X86-SSE2-NEXT: por %xmm1, %xmm0 1091; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1092; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1093; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1094; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1095; X86-SSE2-NEXT: movdqa %xmm4, %xmm3 1096; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1097; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1098; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1099; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1100; X86-SSE2-NEXT: pand %xmm2, %xmm4 1101; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1102; X86-SSE2-NEXT: por %xmm4, %xmm2 1103; X86-SSE2-NEXT: pand %xmm2, %xmm0 1104; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1105; X86-SSE2-NEXT: por %xmm0, %xmm2 1106; X86-SSE2-NEXT: movd %xmm2, %eax 1107; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1108; X86-SSE2-NEXT: movd %xmm0, %edx 1109; X86-SSE2-NEXT: retl 1110; 1111; X86-SSE42-LABEL: test_reduce_v8i64: 1112; X86-SSE42: ## %bb.0: 1113; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1114; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 1115; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1116; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1117; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1118; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1119; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1120; X86-SSE42-NEXT: movapd %xmm3, %xmm0 1121; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1122; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1123; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1124; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1125; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1126; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1127; X86-SSE42-NEXT: movd %xmm1, %eax 1128; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1129; X86-SSE42-NEXT: retl 1130; 1131; X86-AVX1-LABEL: test_reduce_v8i64: 1132; X86-AVX1: ## %bb.0: 1133; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1134; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 1135; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1136; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1137; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1138; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1139; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1 1140; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0 1141; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1142; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1143; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1144; X86-AVX1-NEXT: vmovd %xmm0, %eax 1145; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1146; X86-AVX1-NEXT: vzeroupper 1147; X86-AVX1-NEXT: retl 1148; 1149; X86-AVX2-LABEL: test_reduce_v8i64: 1150; X86-AVX2: ## %bb.0: 1151; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1152; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1153; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1154; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1155; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1156; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1157; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1158; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1159; X86-AVX2-NEXT: vmovd %xmm0, %eax 1160; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1161; X86-AVX2-NEXT: vzeroupper 1162; X86-AVX2-NEXT: retl 1163; 1164; X64-SSE2-LABEL: test_reduce_v8i64: 1165; X64-SSE2: ## %bb.0: 1166; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 1167; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1168; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1169; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1170; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1171; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1172; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1173; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1174; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1175; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1176; X64-SSE2-NEXT: pand %xmm8, %xmm6 1177; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1178; X64-SSE2-NEXT: por %xmm6, %xmm5 1179; X64-SSE2-NEXT: pand %xmm5, %xmm1 1180; X64-SSE2-NEXT: pandn %xmm3, %xmm5 1181; X64-SSE2-NEXT: por %xmm1, %xmm5 1182; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1183; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1184; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1185; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1186; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1187; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1188; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1189; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1190; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1191; X64-SSE2-NEXT: pand %xmm7, %xmm1 1192; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1193; X64-SSE2-NEXT: por %xmm1, %xmm3 1194; X64-SSE2-NEXT: pand %xmm3, %xmm0 1195; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1196; X64-SSE2-NEXT: por %xmm0, %xmm3 1197; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1198; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1199; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1200; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1201; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1202; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1203; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] 1204; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1205; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1206; X64-SSE2-NEXT: pand %xmm6, %xmm0 1207; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1208; X64-SSE2-NEXT: por %xmm0, %xmm1 1209; X64-SSE2-NEXT: pand %xmm1, %xmm3 1210; X64-SSE2-NEXT: pandn %xmm5, %xmm1 1211; X64-SSE2-NEXT: por %xmm3, %xmm1 1212; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1213; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1214; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1215; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1216; X64-SSE2-NEXT: movdqa %xmm4, %xmm3 1217; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1218; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1219; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1220; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1221; X64-SSE2-NEXT: pand %xmm5, %xmm2 1222; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1223; X64-SSE2-NEXT: por %xmm2, %xmm3 1224; X64-SSE2-NEXT: pand %xmm3, %xmm1 1225; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1226; X64-SSE2-NEXT: por %xmm1, %xmm3 1227; X64-SSE2-NEXT: movq %xmm3, %rax 1228; X64-SSE2-NEXT: retq 1229; 1230; X64-SSE42-LABEL: test_reduce_v8i64: 1231; X64-SSE42: ## %bb.0: 1232; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1233; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 1234; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1235; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1236; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1237; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1238; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1239; X64-SSE42-NEXT: movapd %xmm3, %xmm0 1240; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1241; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1242; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1243; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1244; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1245; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1246; X64-SSE42-NEXT: movq %xmm1, %rax 1247; X64-SSE42-NEXT: retq 1248; 1249; X64-AVX1-LABEL: test_reduce_v8i64: 1250; X64-AVX1: ## %bb.0: 1251; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1252; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 1253; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1254; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1255; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1256; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1257; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1 1258; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0 1259; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1260; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1261; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1262; X64-AVX1-NEXT: vmovq %xmm0, %rax 1263; X64-AVX1-NEXT: vzeroupper 1264; X64-AVX1-NEXT: retq 1265; 1266; X64-AVX2-LABEL: test_reduce_v8i64: 1267; X64-AVX2: ## %bb.0: 1268; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1269; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1270; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1271; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1272; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1273; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1274; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1275; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1276; X64-AVX2-NEXT: vmovq %xmm0, %rax 1277; X64-AVX2-NEXT: vzeroupper 1278; X64-AVX2-NEXT: retq 1279; 1280; X64-AVX512-LABEL: test_reduce_v8i64: 1281; X64-AVX512: ## %bb.0: 1282; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1283; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 1284; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1285; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 1286; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1287; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 1288; X64-AVX512-NEXT: vmovq %xmm0, %rax 1289; X64-AVX512-NEXT: vzeroupper 1290; X64-AVX512-NEXT: retq 1291 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1292 %2 = icmp slt <8 x i64> %a0, %1 1293 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1294 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1295 %5 = icmp slt <8 x i64> %3, %4 1296 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1297 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1298 %8 = icmp slt <8 x i64> %6, %7 1299 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1300 %10 = extractelement <8 x i64> %9, i32 0 1301 ret i64 %10 1302} 1303 1304define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1305; X86-SSE2-LABEL: test_reduce_v16i32: 1306; X86-SSE2: ## %bb.0: 1307; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 1308; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1309; X86-SSE2-NEXT: pand %xmm4, %xmm0 1310; X86-SSE2-NEXT: pandn %xmm2, %xmm4 1311; X86-SSE2-NEXT: por %xmm0, %xmm4 1312; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1313; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1314; X86-SSE2-NEXT: pand %xmm0, %xmm1 1315; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1316; X86-SSE2-NEXT: por %xmm1, %xmm0 1317; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1318; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1319; X86-SSE2-NEXT: pand %xmm1, %xmm4 1320; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1321; X86-SSE2-NEXT: por %xmm4, %xmm1 1322; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1323; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1324; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1325; X86-SSE2-NEXT: pand %xmm2, %xmm1 1326; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1327; X86-SSE2-NEXT: por %xmm1, %xmm2 1328; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1329; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1330; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1331; X86-SSE2-NEXT: pand %xmm1, %xmm2 1332; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1333; X86-SSE2-NEXT: por %xmm2, %xmm1 1334; X86-SSE2-NEXT: movd %xmm1, %eax 1335; X86-SSE2-NEXT: retl 1336; 1337; X86-SSE42-LABEL: test_reduce_v16i32: 1338; X86-SSE42: ## %bb.0: 1339; X86-SSE42-NEXT: pminsd %xmm3, %xmm1 1340; X86-SSE42-NEXT: pminsd %xmm2, %xmm0 1341; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 1342; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1343; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 1344; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1345; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 1346; X86-SSE42-NEXT: movd %xmm0, %eax 1347; X86-SSE42-NEXT: retl 1348; 1349; X86-AVX1-LABEL: test_reduce_v16i32: 1350; X86-AVX1: ## %bb.0: 1351; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1352; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1353; X86-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1354; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1355; X86-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0 1356; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1357; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1358; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1359; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1360; X86-AVX1-NEXT: vmovd %xmm0, %eax 1361; X86-AVX1-NEXT: vzeroupper 1362; X86-AVX1-NEXT: retl 1363; 1364; X86-AVX2-LABEL: test_reduce_v16i32: 1365; X86-AVX2: ## %bb.0: 1366; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1367; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1368; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1369; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1370; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1371; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1372; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1373; X86-AVX2-NEXT: vmovd %xmm0, %eax 1374; X86-AVX2-NEXT: vzeroupper 1375; X86-AVX2-NEXT: retl 1376; 1377; X64-SSE2-LABEL: test_reduce_v16i32: 1378; X64-SSE2: ## %bb.0: 1379; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 1380; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1381; X64-SSE2-NEXT: pand %xmm4, %xmm0 1382; X64-SSE2-NEXT: pandn %xmm2, %xmm4 1383; X64-SSE2-NEXT: por %xmm0, %xmm4 1384; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1385; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1386; X64-SSE2-NEXT: pand %xmm0, %xmm1 1387; X64-SSE2-NEXT: pandn %xmm3, %xmm0 1388; X64-SSE2-NEXT: por %xmm1, %xmm0 1389; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1390; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1391; X64-SSE2-NEXT: pand %xmm1, %xmm4 1392; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1393; X64-SSE2-NEXT: por %xmm4, %xmm1 1394; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1395; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1396; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1397; X64-SSE2-NEXT: pand %xmm2, %xmm1 1398; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1399; X64-SSE2-NEXT: por %xmm1, %xmm2 1400; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1401; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1402; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1403; X64-SSE2-NEXT: pand %xmm1, %xmm2 1404; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1405; X64-SSE2-NEXT: por %xmm2, %xmm1 1406; X64-SSE2-NEXT: movd %xmm1, %eax 1407; X64-SSE2-NEXT: retq 1408; 1409; X64-SSE42-LABEL: test_reduce_v16i32: 1410; X64-SSE42: ## %bb.0: 1411; X64-SSE42-NEXT: pminsd %xmm3, %xmm1 1412; X64-SSE42-NEXT: pminsd %xmm2, %xmm0 1413; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 1414; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1415; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 1416; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1417; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 1418; X64-SSE42-NEXT: movd %xmm0, %eax 1419; X64-SSE42-NEXT: retq 1420; 1421; X64-AVX1-LABEL: test_reduce_v16i32: 1422; X64-AVX1: ## %bb.0: 1423; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1424; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1425; X64-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1426; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1427; X64-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0 1428; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1429; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1430; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1431; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1432; X64-AVX1-NEXT: vmovd %xmm0, %eax 1433; X64-AVX1-NEXT: vzeroupper 1434; X64-AVX1-NEXT: retq 1435; 1436; X64-AVX2-LABEL: test_reduce_v16i32: 1437; X64-AVX2: ## %bb.0: 1438; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1439; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1440; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1441; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1442; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1443; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1444; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1445; X64-AVX2-NEXT: vmovd %xmm0, %eax 1446; X64-AVX2-NEXT: vzeroupper 1447; X64-AVX2-NEXT: retq 1448; 1449; X64-AVX512-LABEL: test_reduce_v16i32: 1450; X64-AVX512: ## %bb.0: 1451; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1452; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0 1453; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1454; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1455; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1456; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1457; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1458; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1459; X64-AVX512-NEXT: vmovd %xmm0, %eax 1460; X64-AVX512-NEXT: vzeroupper 1461; X64-AVX512-NEXT: retq 1462 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1463 %2 = icmp slt <16 x i32> %a0, %1 1464 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1465 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1466 %5 = icmp slt <16 x i32> %3, %4 1467 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1468 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1469 %8 = icmp slt <16 x i32> %6, %7 1470 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1471 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1472 %11 = icmp slt <16 x i32> %9, %10 1473 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1474 %13 = extractelement <16 x i32> %12, i32 0 1475 ret i32 %13 1476} 1477 1478define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1479; X86-SSE2-LABEL: test_reduce_v32i16: 1480; X86-SSE2: ## %bb.0: 1481; X86-SSE2-NEXT: pminsw %xmm3, %xmm1 1482; X86-SSE2-NEXT: pminsw %xmm2, %xmm0 1483; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1484; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1485; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1486; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1487; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1488; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1489; X86-SSE2-NEXT: psrld $16, %xmm1 1490; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1491; X86-SSE2-NEXT: movd %xmm1, %eax 1492; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1493; X86-SSE2-NEXT: retl 1494; 1495; X86-SSE42-LABEL: test_reduce_v32i16: 1496; X86-SSE42: ## %bb.0: 1497; X86-SSE42-NEXT: pminsw %xmm3, %xmm1 1498; X86-SSE42-NEXT: pminsw %xmm2, %xmm0 1499; X86-SSE42-NEXT: pminsw %xmm1, %xmm0 1500; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1501; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1502; X86-SSE42-NEXT: movd %xmm0, %eax 1503; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1504; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1505; X86-SSE42-NEXT: retl 1506; 1507; X86-AVX1-LABEL: test_reduce_v32i16: 1508; X86-AVX1: ## %bb.0: 1509; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1510; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1511; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1512; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1513; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 1514; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1515; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1516; X86-AVX1-NEXT: vmovd %xmm0, %eax 1517; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 1518; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1519; X86-AVX1-NEXT: vzeroupper 1520; X86-AVX1-NEXT: retl 1521; 1522; X86-AVX2-LABEL: test_reduce_v32i16: 1523; X86-AVX2: ## %bb.0: 1524; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1525; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1526; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1527; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1528; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1529; X86-AVX2-NEXT: vmovd %xmm0, %eax 1530; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 1531; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1532; X86-AVX2-NEXT: vzeroupper 1533; X86-AVX2-NEXT: retl 1534; 1535; X64-SSE2-LABEL: test_reduce_v32i16: 1536; X64-SSE2: ## %bb.0: 1537; X64-SSE2-NEXT: pminsw %xmm3, %xmm1 1538; X64-SSE2-NEXT: pminsw %xmm2, %xmm0 1539; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1540; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1541; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1542; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1543; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1544; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1545; X64-SSE2-NEXT: psrld $16, %xmm1 1546; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1547; X64-SSE2-NEXT: movd %xmm1, %eax 1548; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1549; X64-SSE2-NEXT: retq 1550; 1551; X64-SSE42-LABEL: test_reduce_v32i16: 1552; X64-SSE42: ## %bb.0: 1553; X64-SSE42-NEXT: pminsw %xmm3, %xmm1 1554; X64-SSE42-NEXT: pminsw %xmm2, %xmm0 1555; X64-SSE42-NEXT: pminsw %xmm1, %xmm0 1556; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1557; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1558; X64-SSE42-NEXT: movd %xmm0, %eax 1559; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1560; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1561; X64-SSE42-NEXT: retq 1562; 1563; X64-AVX1-LABEL: test_reduce_v32i16: 1564; X64-AVX1: ## %bb.0: 1565; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1566; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1567; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1568; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1569; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 1570; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1571; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1572; X64-AVX1-NEXT: vmovd %xmm0, %eax 1573; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 1574; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1575; X64-AVX1-NEXT: vzeroupper 1576; X64-AVX1-NEXT: retq 1577; 1578; X64-AVX2-LABEL: test_reduce_v32i16: 1579; X64-AVX2: ## %bb.0: 1580; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1581; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1582; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1583; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1584; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1585; X64-AVX2-NEXT: vmovd %xmm0, %eax 1586; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 1587; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1588; X64-AVX2-NEXT: vzeroupper 1589; X64-AVX2-NEXT: retq 1590; 1591; X64-AVX512-LABEL: test_reduce_v32i16: 1592; X64-AVX512: ## %bb.0: 1593; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1594; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1595; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1596; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1597; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1598; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1599; X64-AVX512-NEXT: vmovd %xmm0, %eax 1600; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 1601; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1602; X64-AVX512-NEXT: vzeroupper 1603; X64-AVX512-NEXT: retq 1604 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1605 %2 = icmp slt <32 x i16> %a0, %1 1606 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1607 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1608 %5 = icmp slt <32 x i16> %3, %4 1609 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1610 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1611 %8 = icmp slt <32 x i16> %6, %7 1612 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1613 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1614 %11 = icmp slt <32 x i16> %9, %10 1615 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1616 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1617 %14 = icmp slt <32 x i16> %12, %13 1618 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1619 %16 = extractelement <32 x i16> %15, i32 0 1620 ret i16 %16 1621} 1622 1623define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1624; X86-SSE2-LABEL: test_reduce_v64i8: 1625; X86-SSE2: ## %bb.0: 1626; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 1627; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1628; X86-SSE2-NEXT: pand %xmm4, %xmm0 1629; X86-SSE2-NEXT: pandn %xmm2, %xmm4 1630; X86-SSE2-NEXT: por %xmm0, %xmm4 1631; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1632; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1633; X86-SSE2-NEXT: pand %xmm0, %xmm1 1634; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1635; X86-SSE2-NEXT: por %xmm1, %xmm0 1636; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1637; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm1 1638; X86-SSE2-NEXT: pand %xmm1, %xmm4 1639; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1640; X86-SSE2-NEXT: por %xmm4, %xmm1 1641; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1642; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1643; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1644; X86-SSE2-NEXT: pand %xmm2, %xmm1 1645; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1646; X86-SSE2-NEXT: por %xmm1, %xmm2 1647; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 1648; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1649; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm0 1650; X86-SSE2-NEXT: pand %xmm0, %xmm2 1651; X86-SSE2-NEXT: pandn %xmm1, %xmm0 1652; X86-SSE2-NEXT: por %xmm2, %xmm0 1653; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1654; X86-SSE2-NEXT: psrld $16, %xmm2 1655; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1656; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1657; X86-SSE2-NEXT: pand %xmm1, %xmm0 1658; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1659; X86-SSE2-NEXT: por %xmm0, %xmm1 1660; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1661; X86-SSE2-NEXT: psrlw $8, %xmm0 1662; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1663; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1664; X86-SSE2-NEXT: pand %xmm2, %xmm1 1665; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1666; X86-SSE2-NEXT: por %xmm1, %xmm2 1667; X86-SSE2-NEXT: movd %xmm2, %eax 1668; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1669; X86-SSE2-NEXT: retl 1670; 1671; X86-SSE42-LABEL: test_reduce_v64i8: 1672; X86-SSE42: ## %bb.0: 1673; X86-SSE42-NEXT: pminsb %xmm3, %xmm1 1674; X86-SSE42-NEXT: pminsb %xmm2, %xmm0 1675; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 1676; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1677; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 1678; X86-SSE42-NEXT: psrlw $8, %xmm1 1679; X86-SSE42-NEXT: pminub %xmm0, %xmm1 1680; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1681; X86-SSE42-NEXT: movd %xmm0, %eax 1682; X86-SSE42-NEXT: addb $-128, %al 1683; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1684; X86-SSE42-NEXT: retl 1685; 1686; X86-AVX1-LABEL: test_reduce_v64i8: 1687; X86-AVX1: ## %bb.0: 1688; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1689; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1690; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1691; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1692; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 1693; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1694; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1695; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1696; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1697; X86-AVX1-NEXT: vmovd %xmm0, %eax 1698; X86-AVX1-NEXT: addb $-128, %al 1699; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1700; X86-AVX1-NEXT: vzeroupper 1701; X86-AVX1-NEXT: retl 1702; 1703; X86-AVX2-LABEL: test_reduce_v64i8: 1704; X86-AVX2: ## %bb.0: 1705; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1706; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1707; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1708; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1709; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1710; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1711; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1712; X86-AVX2-NEXT: vmovd %xmm0, %eax 1713; X86-AVX2-NEXT: addb $-128, %al 1714; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1715; X86-AVX2-NEXT: vzeroupper 1716; X86-AVX2-NEXT: retl 1717; 1718; X64-SSE2-LABEL: test_reduce_v64i8: 1719; X64-SSE2: ## %bb.0: 1720; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 1721; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1722; X64-SSE2-NEXT: pand %xmm4, %xmm0 1723; X64-SSE2-NEXT: pandn %xmm2, %xmm4 1724; X64-SSE2-NEXT: por %xmm0, %xmm4 1725; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1726; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1727; X64-SSE2-NEXT: pand %xmm0, %xmm1 1728; X64-SSE2-NEXT: pandn %xmm3, %xmm0 1729; X64-SSE2-NEXT: por %xmm1, %xmm0 1730; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1731; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm1 1732; X64-SSE2-NEXT: pand %xmm1, %xmm4 1733; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1734; X64-SSE2-NEXT: por %xmm4, %xmm1 1735; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1736; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1737; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1738; X64-SSE2-NEXT: pand %xmm2, %xmm1 1739; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1740; X64-SSE2-NEXT: por %xmm1, %xmm2 1741; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1742; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1743; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1744; X64-SSE2-NEXT: pand %xmm1, %xmm2 1745; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1746; X64-SSE2-NEXT: por %xmm2, %xmm1 1747; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1748; X64-SSE2-NEXT: psrld $16, %xmm0 1749; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1750; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1751; X64-SSE2-NEXT: pand %xmm2, %xmm1 1752; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1753; X64-SSE2-NEXT: por %xmm1, %xmm2 1754; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1755; X64-SSE2-NEXT: psrlw $8, %xmm0 1756; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1757; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1758; X64-SSE2-NEXT: pand %xmm1, %xmm2 1759; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1760; X64-SSE2-NEXT: por %xmm2, %xmm1 1761; X64-SSE2-NEXT: movd %xmm1, %eax 1762; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1763; X64-SSE2-NEXT: retq 1764; 1765; X64-SSE42-LABEL: test_reduce_v64i8: 1766; X64-SSE42: ## %bb.0: 1767; X64-SSE42-NEXT: pminsb %xmm3, %xmm1 1768; X64-SSE42-NEXT: pminsb %xmm2, %xmm0 1769; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 1770; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1771; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 1772; X64-SSE42-NEXT: psrlw $8, %xmm1 1773; X64-SSE42-NEXT: pminub %xmm0, %xmm1 1774; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1775; X64-SSE42-NEXT: movd %xmm0, %eax 1776; X64-SSE42-NEXT: addb $-128, %al 1777; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1778; X64-SSE42-NEXT: retq 1779; 1780; X64-AVX1-LABEL: test_reduce_v64i8: 1781; X64-AVX1: ## %bb.0: 1782; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1783; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1784; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1785; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1786; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 1787; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1788; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1789; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1790; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1791; X64-AVX1-NEXT: vmovd %xmm0, %eax 1792; X64-AVX1-NEXT: addb $-128, %al 1793; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1794; X64-AVX1-NEXT: vzeroupper 1795; X64-AVX1-NEXT: retq 1796; 1797; X64-AVX2-LABEL: test_reduce_v64i8: 1798; X64-AVX2: ## %bb.0: 1799; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1800; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1801; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1802; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1803; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1804; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1805; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1806; X64-AVX2-NEXT: vmovd %xmm0, %eax 1807; X64-AVX2-NEXT: addb $-128, %al 1808; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1809; X64-AVX2-NEXT: vzeroupper 1810; X64-AVX2-NEXT: retq 1811; 1812; X64-AVX512-LABEL: test_reduce_v64i8: 1813; X64-AVX512: ## %bb.0: 1814; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1815; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1816; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1817; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1818; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1819; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1820; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1821; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1822; X64-AVX512-NEXT: vmovd %xmm0, %eax 1823; X64-AVX512-NEXT: addb $-128, %al 1824; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1825; X64-AVX512-NEXT: vzeroupper 1826; X64-AVX512-NEXT: retq 1827 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1828 %2 = icmp slt <64 x i8> %a0, %1 1829 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1830 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1831 %5 = icmp slt <64 x i8> %3, %4 1832 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1833 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1834 %8 = icmp slt <64 x i8> %6, %7 1835 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1836 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1837 %11 = icmp slt <64 x i8> %9, %10 1838 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1839 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1840 %14 = icmp slt <64 x i8> %12, %13 1841 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1842 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1843 %17 = icmp slt <64 x i8> %15, %16 1844 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1845 %19 = extractelement <64 x i8> %18, i32 0 1846 ret i8 %19 1847} 1848 1849; 1850; Partial Vector Reductions 1851; 1852 1853define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1854; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1855; X86-SSE2: ## %bb.0: 1856; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1857; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1858; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1859; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1860; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1861; X86-SSE2-NEXT: psrld $16, %xmm1 1862; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1863; X86-SSE2-NEXT: movd %xmm1, %eax 1864; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1865; X86-SSE2-NEXT: retl 1866; 1867; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 1868; X86-SSE42: ## %bb.0: 1869; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1870; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1871; X86-SSE42-NEXT: movd %xmm0, %eax 1872; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1873; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1874; X86-SSE42-NEXT: retl 1875; 1876; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 1877; X86-AVX: ## %bb.0: 1878; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1879; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1880; X86-AVX-NEXT: vmovd %xmm0, %eax 1881; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 1882; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1883; X86-AVX-NEXT: vzeroupper 1884; X86-AVX-NEXT: retl 1885; 1886; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 1887; X64-SSE2: ## %bb.0: 1888; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1889; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1890; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1891; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1892; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1893; X64-SSE2-NEXT: psrld $16, %xmm1 1894; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1895; X64-SSE2-NEXT: movd %xmm1, %eax 1896; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1897; X64-SSE2-NEXT: retq 1898; 1899; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 1900; X64-SSE42: ## %bb.0: 1901; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1902; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1903; X64-SSE42-NEXT: movd %xmm0, %eax 1904; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1905; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1906; X64-SSE42-NEXT: retq 1907; 1908; X64-AVX1OR2-LABEL: test_reduce_v16i16_v8i16: 1909; X64-AVX1OR2: ## %bb.0: 1910; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1911; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 1912; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 1913; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000 1914; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax 1915; X64-AVX1OR2-NEXT: vzeroupper 1916; X64-AVX1OR2-NEXT: retq 1917; 1918; X64-AVX512-LABEL: test_reduce_v16i16_v8i16: 1919; X64-AVX512: ## %bb.0: 1920; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1921; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1922; X64-AVX512-NEXT: vmovd %xmm0, %eax 1923; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 1924; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1925; X64-AVX512-NEXT: vzeroupper 1926; X64-AVX512-NEXT: retq 1927 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1928 %2 = icmp slt <16 x i16> %a0, %1 1929 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 1930 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1931 %5 = icmp slt <16 x i16> %3, %4 1932 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 1933 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1934 %8 = icmp slt <16 x i16> %6, %7 1935 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 1936 %10 = extractelement <16 x i16> %9, i32 0 1937 ret i16 %10 1938} 1939 1940define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 1941; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 1942; X86-SSE2: ## %bb.0: 1943; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1944; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1945; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1946; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1947; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1948; X86-SSE2-NEXT: psrld $16, %xmm1 1949; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1950; X86-SSE2-NEXT: movd %xmm1, %eax 1951; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1952; X86-SSE2-NEXT: retl 1953; 1954; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 1955; X86-SSE42: ## %bb.0: 1956; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1957; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1958; X86-SSE42-NEXT: movd %xmm0, %eax 1959; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1960; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1961; X86-SSE42-NEXT: retl 1962; 1963; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 1964; X86-AVX: ## %bb.0: 1965; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1966; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1967; X86-AVX-NEXT: vmovd %xmm0, %eax 1968; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 1969; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1970; X86-AVX-NEXT: vzeroupper 1971; X86-AVX-NEXT: retl 1972; 1973; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 1974; X64-SSE2: ## %bb.0: 1975; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1976; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1977; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1978; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1979; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1980; X64-SSE2-NEXT: psrld $16, %xmm1 1981; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1982; X64-SSE2-NEXT: movd %xmm1, %eax 1983; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1984; X64-SSE2-NEXT: retq 1985; 1986; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 1987; X64-SSE42: ## %bb.0: 1988; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1989; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1990; X64-SSE42-NEXT: movd %xmm0, %eax 1991; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1992; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1993; X64-SSE42-NEXT: retq 1994; 1995; X64-AVX1OR2-LABEL: test_reduce_v32i16_v8i16: 1996; X64-AVX1OR2: ## %bb.0: 1997; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1998; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 1999; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 2000; X64-AVX1OR2-NEXT: xorl $32768, %eax ## imm = 0x8000 2001; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax 2002; X64-AVX1OR2-NEXT: vzeroupper 2003; X64-AVX1OR2-NEXT: retq 2004; 2005; X64-AVX512-LABEL: test_reduce_v32i16_v8i16: 2006; X64-AVX512: ## %bb.0: 2007; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2008; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2009; X64-AVX512-NEXT: vmovd %xmm0, %eax 2010; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 2011; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 2012; X64-AVX512-NEXT: vzeroupper 2013; X64-AVX512-NEXT: retq 2014 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2015 %2 = icmp slt <32 x i16> %a0, %1 2016 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 2017 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2018 %5 = icmp slt <32 x i16> %3, %4 2019 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 2020 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2021 %8 = icmp slt <32 x i16> %6, %7 2022 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 2023 %10 = extractelement <32 x i16> %9, i32 0 2024 ret i16 %10 2025} 2026 2027define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 2028; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 2029; X86-SSE2: ## %bb.0: 2030; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2031; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2032; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2033; X86-SSE2-NEXT: pand %xmm2, %xmm0 2034; X86-SSE2-NEXT: pandn %xmm1, %xmm2 2035; X86-SSE2-NEXT: por %xmm0, %xmm2 2036; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 2037; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2038; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm0 2039; X86-SSE2-NEXT: pand %xmm0, %xmm2 2040; X86-SSE2-NEXT: pandn %xmm1, %xmm0 2041; X86-SSE2-NEXT: por %xmm2, %xmm0 2042; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2043; X86-SSE2-NEXT: psrld $16, %xmm2 2044; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 2045; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2046; X86-SSE2-NEXT: pand %xmm1, %xmm0 2047; X86-SSE2-NEXT: pandn %xmm2, %xmm1 2048; X86-SSE2-NEXT: por %xmm0, %xmm1 2049; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2050; X86-SSE2-NEXT: psrlw $8, %xmm0 2051; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2052; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2053; X86-SSE2-NEXT: pand %xmm2, %xmm1 2054; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2055; X86-SSE2-NEXT: por %xmm1, %xmm2 2056; X86-SSE2-NEXT: movd %xmm2, %eax 2057; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2058; X86-SSE2-NEXT: retl 2059; 2060; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2061; X86-SSE42: ## %bb.0: 2062; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2063; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2064; X86-SSE42-NEXT: psrlw $8, %xmm1 2065; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2066; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2067; X86-SSE42-NEXT: movd %xmm0, %eax 2068; X86-SSE42-NEXT: addb $-128, %al 2069; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2070; X86-SSE42-NEXT: retl 2071; 2072; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2073; X86-AVX: ## %bb.0: 2074; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2075; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2076; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2077; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2078; X86-AVX-NEXT: vmovd %xmm0, %eax 2079; X86-AVX-NEXT: addb $-128, %al 2080; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2081; X86-AVX-NEXT: vzeroupper 2082; X86-AVX-NEXT: retl 2083; 2084; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2085; X64-SSE2: ## %bb.0: 2086; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2087; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2088; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2089; X64-SSE2-NEXT: pand %xmm2, %xmm0 2090; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2091; X64-SSE2-NEXT: por %xmm0, %xmm2 2092; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2093; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2094; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2095; X64-SSE2-NEXT: pand %xmm1, %xmm2 2096; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2097; X64-SSE2-NEXT: por %xmm2, %xmm1 2098; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2099; X64-SSE2-NEXT: psrld $16, %xmm0 2100; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2101; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2102; X64-SSE2-NEXT: pand %xmm2, %xmm1 2103; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2104; X64-SSE2-NEXT: por %xmm1, %xmm2 2105; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2106; X64-SSE2-NEXT: psrlw $8, %xmm0 2107; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2108; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2109; X64-SSE2-NEXT: pand %xmm1, %xmm2 2110; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2111; X64-SSE2-NEXT: por %xmm2, %xmm1 2112; X64-SSE2-NEXT: movd %xmm1, %eax 2113; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2114; X64-SSE2-NEXT: retq 2115; 2116; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2117; X64-SSE42: ## %bb.0: 2118; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2119; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2120; X64-SSE42-NEXT: psrlw $8, %xmm1 2121; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2122; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2123; X64-SSE42-NEXT: movd %xmm0, %eax 2124; X64-SSE42-NEXT: addb $-128, %al 2125; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2126; X64-SSE42-NEXT: retq 2127; 2128; X64-AVX1OR2-LABEL: test_reduce_v32i8_v16i8: 2129; X64-AVX1OR2: ## %bb.0: 2130; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2131; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1 2132; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2133; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 2134; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 2135; X64-AVX1OR2-NEXT: addb $-128, %al 2136; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax 2137; X64-AVX1OR2-NEXT: vzeroupper 2138; X64-AVX1OR2-NEXT: retq 2139; 2140; X64-AVX512-LABEL: test_reduce_v32i8_v16i8: 2141; X64-AVX512: ## %bb.0: 2142; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2143; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2144; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2145; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2146; X64-AVX512-NEXT: vmovd %xmm0, %eax 2147; X64-AVX512-NEXT: addb $-128, %al 2148; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2149; X64-AVX512-NEXT: vzeroupper 2150; X64-AVX512-NEXT: retq 2151 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2152 %2 = icmp slt <32 x i8> %a0, %1 2153 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2154 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2155 %5 = icmp slt <32 x i8> %3, %4 2156 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2157 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2158 %8 = icmp slt <32 x i8> %6, %7 2159 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2160 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2161 %11 = icmp slt <32 x i8> %9, %10 2162 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2163 %13 = extractelement <32 x i8> %12, i32 0 2164 ret i8 %13 2165} 2166 2167define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2168; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2169; X86-SSE2: ## %bb.0: 2170; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2171; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2172; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2173; X86-SSE2-NEXT: pand %xmm2, %xmm0 2174; X86-SSE2-NEXT: pandn %xmm1, %xmm2 2175; X86-SSE2-NEXT: por %xmm0, %xmm2 2176; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 2177; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2178; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm0 2179; X86-SSE2-NEXT: pand %xmm0, %xmm2 2180; X86-SSE2-NEXT: pandn %xmm1, %xmm0 2181; X86-SSE2-NEXT: por %xmm2, %xmm0 2182; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2183; X86-SSE2-NEXT: psrld $16, %xmm2 2184; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 2185; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2186; X86-SSE2-NEXT: pand %xmm1, %xmm0 2187; X86-SSE2-NEXT: pandn %xmm2, %xmm1 2188; X86-SSE2-NEXT: por %xmm0, %xmm1 2189; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2190; X86-SSE2-NEXT: psrlw $8, %xmm0 2191; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2192; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2193; X86-SSE2-NEXT: pand %xmm2, %xmm1 2194; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2195; X86-SSE2-NEXT: por %xmm1, %xmm2 2196; X86-SSE2-NEXT: movd %xmm2, %eax 2197; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2198; X86-SSE2-NEXT: retl 2199; 2200; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2201; X86-SSE42: ## %bb.0: 2202; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2203; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2204; X86-SSE42-NEXT: psrlw $8, %xmm1 2205; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2206; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2207; X86-SSE42-NEXT: movd %xmm0, %eax 2208; X86-SSE42-NEXT: addb $-128, %al 2209; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2210; X86-SSE42-NEXT: retl 2211; 2212; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2213; X86-AVX: ## %bb.0: 2214; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2215; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2216; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2217; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2218; X86-AVX-NEXT: vmovd %xmm0, %eax 2219; X86-AVX-NEXT: addb $-128, %al 2220; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2221; X86-AVX-NEXT: vzeroupper 2222; X86-AVX-NEXT: retl 2223; 2224; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2225; X64-SSE2: ## %bb.0: 2226; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2227; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2228; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2229; X64-SSE2-NEXT: pand %xmm2, %xmm0 2230; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2231; X64-SSE2-NEXT: por %xmm0, %xmm2 2232; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2233; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2234; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2235; X64-SSE2-NEXT: pand %xmm1, %xmm2 2236; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2237; X64-SSE2-NEXT: por %xmm2, %xmm1 2238; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2239; X64-SSE2-NEXT: psrld $16, %xmm0 2240; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2241; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2242; X64-SSE2-NEXT: pand %xmm2, %xmm1 2243; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2244; X64-SSE2-NEXT: por %xmm1, %xmm2 2245; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2246; X64-SSE2-NEXT: psrlw $8, %xmm0 2247; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2248; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2249; X64-SSE2-NEXT: pand %xmm1, %xmm2 2250; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2251; X64-SSE2-NEXT: por %xmm2, %xmm1 2252; X64-SSE2-NEXT: movd %xmm1, %eax 2253; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2254; X64-SSE2-NEXT: retq 2255; 2256; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2257; X64-SSE42: ## %bb.0: 2258; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2259; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2260; X64-SSE42-NEXT: psrlw $8, %xmm1 2261; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2262; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2263; X64-SSE42-NEXT: movd %xmm0, %eax 2264; X64-SSE42-NEXT: addb $-128, %al 2265; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2266; X64-SSE42-NEXT: retq 2267; 2268; X64-AVX1OR2-LABEL: test_reduce_v64i8_v16i8: 2269; X64-AVX1OR2: ## %bb.0: 2270; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2271; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1 2272; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2273; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 2274; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 2275; X64-AVX1OR2-NEXT: addb $-128, %al 2276; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax 2277; X64-AVX1OR2-NEXT: vzeroupper 2278; X64-AVX1OR2-NEXT: retq 2279; 2280; X64-AVX512-LABEL: test_reduce_v64i8_v16i8: 2281; X64-AVX512: ## %bb.0: 2282; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2283; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2284; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2285; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2286; X64-AVX512-NEXT: vmovd %xmm0, %eax 2287; X64-AVX512-NEXT: addb $-128, %al 2288; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2289; X64-AVX512-NEXT: vzeroupper 2290; X64-AVX512-NEXT: retq 2291 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2292 %2 = icmp slt <64 x i8> %a0, %1 2293 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2294 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2295 %5 = icmp slt <64 x i8> %3, %4 2296 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2297 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2298 %8 = icmp slt <64 x i8> %6, %7 2299 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2300 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2301 %11 = icmp slt <64 x i8> %9, %10 2302 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2303 %13 = extractelement <64 x i8> %12, i32 0 2304 ret i8 %13 2305} 2306