1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1OR2,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 45; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 46; X86-SSE42-NEXT: movd %xmm2, %eax 47; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 48; X86-SSE42-NEXT: retl 49; 50; X86-AVX-LABEL: test_reduce_v2i64: 51; X86-AVX: ## %bb.0: 52; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 53; X86-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 54; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 55; X86-AVX-NEXT: vmovd %xmm0, %eax 56; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx 57; X86-AVX-NEXT: retl 58; 59; X64-SSE2-LABEL: test_reduce_v2i64: 60; X64-SSE2: ## %bb.0: 61; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 62; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 63; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 64; X64-SSE2-NEXT: pxor %xmm2, %xmm3 65; X64-SSE2-NEXT: pxor %xmm1, %xmm2 66; X64-SSE2-NEXT: movdqa %xmm3, %xmm4 67; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 68; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 69; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 70; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 71; X64-SSE2-NEXT: pand %xmm5, %xmm2 72; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 73; X64-SSE2-NEXT: por %xmm2, %xmm3 74; X64-SSE2-NEXT: pand %xmm3, %xmm0 75; X64-SSE2-NEXT: pandn %xmm1, %xmm3 76; X64-SSE2-NEXT: por %xmm0, %xmm3 77; X64-SSE2-NEXT: movq %xmm3, %rax 78; X64-SSE2-NEXT: retq 79; 80; X64-SSE42-LABEL: test_reduce_v2i64: 81; X64-SSE42: ## %bb.0: 82; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 83; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 84; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 85; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 86; X64-SSE42-NEXT: movq %xmm2, %rax 87; X64-SSE42-NEXT: retq 88; 89; X64-AVX1OR2-LABEL: test_reduce_v2i64: 90; X64-AVX1OR2: ## %bb.0: 91; X64-AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 92; X64-AVX1OR2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 93; X64-AVX1OR2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 94; X64-AVX1OR2-NEXT: vmovq %xmm0, %rax 95; X64-AVX1OR2-NEXT: retq 96; 97; X64-AVX512-LABEL: test_reduce_v2i64: 98; X64-AVX512: ## %bb.0: 99; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 100; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 101; X64-AVX512-NEXT: vmovq %xmm0, %rax 102; X64-AVX512-NEXT: retq 103 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 104 %2 = icmp sgt <2 x i64> %a0, %1 105 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 106 %4 = extractelement <2 x i64> %3, i32 0 107 ret i64 %4 108} 109 110define i32 @test_reduce_v4i32(<4 x i32> %a0) { 111; X86-SSE2-LABEL: test_reduce_v4i32: 112; X86-SSE2: ## %bb.0: 113; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 114; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 115; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 116; X86-SSE2-NEXT: pand %xmm2, %xmm0 117; X86-SSE2-NEXT: pandn %xmm1, %xmm2 118; X86-SSE2-NEXT: por %xmm0, %xmm2 119; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 120; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 121; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 122; X86-SSE2-NEXT: pand %xmm1, %xmm2 123; X86-SSE2-NEXT: pandn %xmm0, %xmm1 124; X86-SSE2-NEXT: por %xmm2, %xmm1 125; X86-SSE2-NEXT: movd %xmm1, %eax 126; X86-SSE2-NEXT: retl 127; 128; X86-SSE42-LABEL: test_reduce_v4i32: 129; X86-SSE42: ## %bb.0: 130; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 131; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 132; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 133; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 134; X86-SSE42-NEXT: movd %xmm0, %eax 135; X86-SSE42-NEXT: retl 136; 137; X86-AVX-LABEL: test_reduce_v4i32: 138; X86-AVX: ## %bb.0: 139; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 140; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 141; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 142; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 143; X86-AVX-NEXT: vmovd %xmm0, %eax 144; X86-AVX-NEXT: retl 145; 146; X64-SSE2-LABEL: test_reduce_v4i32: 147; X64-SSE2: ## %bb.0: 148; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 149; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 150; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 151; X64-SSE2-NEXT: pand %xmm2, %xmm0 152; X64-SSE2-NEXT: pandn %xmm1, %xmm2 153; X64-SSE2-NEXT: por %xmm0, %xmm2 154; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 155; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 156; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 157; X64-SSE2-NEXT: pand %xmm1, %xmm2 158; X64-SSE2-NEXT: pandn %xmm0, %xmm1 159; X64-SSE2-NEXT: por %xmm2, %xmm1 160; X64-SSE2-NEXT: movd %xmm1, %eax 161; X64-SSE2-NEXT: retq 162; 163; X64-SSE42-LABEL: test_reduce_v4i32: 164; X64-SSE42: ## %bb.0: 165; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 166; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 167; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 168; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 169; X64-SSE42-NEXT: movd %xmm0, %eax 170; X64-SSE42-NEXT: retq 171; 172; X64-AVX-LABEL: test_reduce_v4i32: 173; X64-AVX: ## %bb.0: 174; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 175; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 176; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 177; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 178; X64-AVX-NEXT: vmovd %xmm0, %eax 179; X64-AVX-NEXT: retq 180 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 181 %2 = icmp sgt <4 x i32> %a0, %1 182 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 183 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 184 %5 = icmp sgt <4 x i32> %3, %4 185 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 186 %7 = extractelement <4 x i32> %6, i32 0 187 ret i32 %7 188} 189 190define i16 @test_reduce_v8i16(<8 x i16> %a0) { 191; X86-SSE2-LABEL: test_reduce_v8i16: 192; X86-SSE2: ## %bb.0: 193; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 194; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 195; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 196; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 197; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 198; X86-SSE2-NEXT: psrld $16, %xmm1 199; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 200; X86-SSE2-NEXT: movd %xmm1, %eax 201; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 202; X86-SSE2-NEXT: retl 203; 204; X86-SSE42-LABEL: test_reduce_v8i16: 205; X86-SSE42: ## %bb.0: 206; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 207; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 208; X86-SSE42-NEXT: movd %xmm0, %eax 209; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 210; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 211; X86-SSE42-NEXT: retl 212; 213; X86-AVX-LABEL: test_reduce_v8i16: 214; X86-AVX: ## %bb.0: 215; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 216; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 217; X86-AVX-NEXT: vmovd %xmm0, %eax 218; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 219; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 220; X86-AVX-NEXT: retl 221; 222; X64-SSE2-LABEL: test_reduce_v8i16: 223; X64-SSE2: ## %bb.0: 224; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 225; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 226; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 227; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 228; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 229; X64-SSE2-NEXT: psrld $16, %xmm1 230; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 231; X64-SSE2-NEXT: movd %xmm1, %eax 232; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 233; X64-SSE2-NEXT: retq 234; 235; X64-SSE42-LABEL: test_reduce_v8i16: 236; X64-SSE42: ## %bb.0: 237; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 238; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 239; X64-SSE42-NEXT: movd %xmm0, %eax 240; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 241; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 242; X64-SSE42-NEXT: retq 243; 244; X64-AVX1OR2-LABEL: test_reduce_v8i16: 245; X64-AVX1OR2: ## %bb.0: 246; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 247; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 248; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 249; X64-AVX1OR2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 250; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax 251; X64-AVX1OR2-NEXT: retq 252; 253; X64-AVX512-LABEL: test_reduce_v8i16: 254; X64-AVX512: ## %bb.0: 255; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 256; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 257; X64-AVX512-NEXT: vmovd %xmm0, %eax 258; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF 259; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 260; X64-AVX512-NEXT: retq 261 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 262 %2 = icmp sgt <8 x i16> %a0, %1 263 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 264 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 265 %5 = icmp sgt <8 x i16> %3, %4 266 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 267 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 268 %8 = icmp sgt <8 x i16> %6, %7 269 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 270 %10 = extractelement <8 x i16> %9, i32 0 271 ret i16 %10 272} 273 274define i8 @test_reduce_v16i8(<16 x i8> %a0) { 275; X86-SSE2-LABEL: test_reduce_v16i8: 276; X86-SSE2: ## %bb.0: 277; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 278; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 279; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 280; X86-SSE2-NEXT: pand %xmm2, %xmm0 281; X86-SSE2-NEXT: pandn %xmm1, %xmm2 282; X86-SSE2-NEXT: por %xmm0, %xmm2 283; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 284; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 285; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 286; X86-SSE2-NEXT: pand %xmm0, %xmm2 287; X86-SSE2-NEXT: pandn %xmm1, %xmm0 288; X86-SSE2-NEXT: por %xmm2, %xmm0 289; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 290; X86-SSE2-NEXT: psrld $16, %xmm2 291; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 292; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 293; X86-SSE2-NEXT: pand %xmm1, %xmm0 294; X86-SSE2-NEXT: pandn %xmm2, %xmm1 295; X86-SSE2-NEXT: por %xmm0, %xmm1 296; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 297; X86-SSE2-NEXT: psrlw $8, %xmm0 298; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 299; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 300; X86-SSE2-NEXT: pand %xmm2, %xmm1 301; X86-SSE2-NEXT: pandn %xmm0, %xmm2 302; X86-SSE2-NEXT: por %xmm1, %xmm2 303; X86-SSE2-NEXT: movd %xmm2, %eax 304; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 305; X86-SSE2-NEXT: retl 306; 307; X86-SSE42-LABEL: test_reduce_v16i8: 308; X86-SSE42: ## %bb.0: 309; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 310; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 311; X86-SSE42-NEXT: psrlw $8, %xmm1 312; X86-SSE42-NEXT: pminub %xmm0, %xmm1 313; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 314; X86-SSE42-NEXT: movd %xmm0, %eax 315; X86-SSE42-NEXT: xorb $127, %al 316; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 317; X86-SSE42-NEXT: retl 318; 319; X86-AVX-LABEL: test_reduce_v16i8: 320; X86-AVX: ## %bb.0: 321; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 322; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 323; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 324; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 325; X86-AVX-NEXT: vmovd %xmm0, %eax 326; X86-AVX-NEXT: xorb $127, %al 327; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 328; X86-AVX-NEXT: retl 329; 330; X64-SSE2-LABEL: test_reduce_v16i8: 331; X64-SSE2: ## %bb.0: 332; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 333; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 334; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 335; X64-SSE2-NEXT: pand %xmm2, %xmm0 336; X64-SSE2-NEXT: pandn %xmm1, %xmm2 337; X64-SSE2-NEXT: por %xmm0, %xmm2 338; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 339; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 340; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 341; X64-SSE2-NEXT: pand %xmm1, %xmm2 342; X64-SSE2-NEXT: pandn %xmm0, %xmm1 343; X64-SSE2-NEXT: por %xmm2, %xmm1 344; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 345; X64-SSE2-NEXT: psrld $16, %xmm0 346; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 347; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 348; X64-SSE2-NEXT: pand %xmm2, %xmm1 349; X64-SSE2-NEXT: pandn %xmm0, %xmm2 350; X64-SSE2-NEXT: por %xmm1, %xmm2 351; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 352; X64-SSE2-NEXT: psrlw $8, %xmm0 353; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 354; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 355; X64-SSE2-NEXT: pand %xmm1, %xmm2 356; X64-SSE2-NEXT: pandn %xmm0, %xmm1 357; X64-SSE2-NEXT: por %xmm2, %xmm1 358; X64-SSE2-NEXT: movd %xmm1, %eax 359; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 360; X64-SSE2-NEXT: retq 361; 362; X64-SSE42-LABEL: test_reduce_v16i8: 363; X64-SSE42: ## %bb.0: 364; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 365; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 366; X64-SSE42-NEXT: psrlw $8, %xmm1 367; X64-SSE42-NEXT: pminub %xmm0, %xmm1 368; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 369; X64-SSE42-NEXT: movd %xmm0, %eax 370; X64-SSE42-NEXT: xorb $127, %al 371; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 372; X64-SSE42-NEXT: retq 373; 374; X64-AVX1OR2-LABEL: test_reduce_v16i8: 375; X64-AVX1OR2: ## %bb.0: 376; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 377; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1 378; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0 379; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 380; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 381; X64-AVX1OR2-NEXT: xorb $127, %al 382; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax 383; X64-AVX1OR2-NEXT: retq 384; 385; X64-AVX512-LABEL: test_reduce_v16i8: 386; X64-AVX512: ## %bb.0: 387; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 388; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 389; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 390; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 391; X64-AVX512-NEXT: vmovd %xmm0, %eax 392; X64-AVX512-NEXT: xorb $127, %al 393; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 394; X64-AVX512-NEXT: retq 395 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 396 %2 = icmp sgt <16 x i8> %a0, %1 397 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 398 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 399 %5 = icmp sgt <16 x i8> %3, %4 400 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 401 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 402 %8 = icmp sgt <16 x i8> %6, %7 403 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 404 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 405 %11 = icmp sgt <16 x i8> %9, %10 406 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 407 %13 = extractelement <16 x i8> %12, i32 0 408 ret i8 %13 409} 410 411; 412; 256-bit Vectors 413; 414 415define i64 @test_reduce_v4i64(<4 x i64> %a0) { 416; X86-SSE2-LABEL: test_reduce_v4i64: 417; X86-SSE2: ## %bb.0: 418; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 419; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 420; X86-SSE2-NEXT: pxor %xmm2, %xmm3 421; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 422; X86-SSE2-NEXT: pxor %xmm2, %xmm4 423; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 424; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 425; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 426; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 427; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 428; X86-SSE2-NEXT: pand %xmm6, %xmm4 429; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 430; X86-SSE2-NEXT: por %xmm4, %xmm3 431; X86-SSE2-NEXT: pand %xmm3, %xmm0 432; X86-SSE2-NEXT: pandn %xmm1, %xmm3 433; X86-SSE2-NEXT: por %xmm0, %xmm3 434; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 435; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 436; X86-SSE2-NEXT: pxor %xmm2, %xmm1 437; X86-SSE2-NEXT: pxor %xmm0, %xmm2 438; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 439; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 440; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 441; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 442; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 443; X86-SSE2-NEXT: pand %xmm5, %xmm1 444; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 445; X86-SSE2-NEXT: por %xmm1, %xmm2 446; X86-SSE2-NEXT: pand %xmm2, %xmm3 447; X86-SSE2-NEXT: pandn %xmm0, %xmm2 448; X86-SSE2-NEXT: por %xmm3, %xmm2 449; X86-SSE2-NEXT: movd %xmm2, %eax 450; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 451; X86-SSE2-NEXT: movd %xmm0, %edx 452; X86-SSE2-NEXT: retl 453; 454; X86-SSE42-LABEL: test_reduce_v4i64: 455; X86-SSE42: ## %bb.0: 456; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 457; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 458; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 459; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 460; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 461; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 462; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 463; X86-SSE42-NEXT: movd %xmm2, %eax 464; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 465; X86-SSE42-NEXT: retl 466; 467; X86-AVX1-LABEL: test_reduce_v4i64: 468; X86-AVX1: ## %bb.0: 469; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 470; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 471; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 472; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 473; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 474; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 475; X86-AVX1-NEXT: vmovd %xmm0, %eax 476; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 477; X86-AVX1-NEXT: vzeroupper 478; X86-AVX1-NEXT: retl 479; 480; X86-AVX2-LABEL: test_reduce_v4i64: 481; X86-AVX2: ## %bb.0: 482; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 483; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 484; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 485; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 486; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 487; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 488; X86-AVX2-NEXT: vmovd %xmm0, %eax 489; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 490; X86-AVX2-NEXT: vzeroupper 491; X86-AVX2-NEXT: retl 492; 493; X64-SSE2-LABEL: test_reduce_v4i64: 494; X64-SSE2: ## %bb.0: 495; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 496; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 497; X64-SSE2-NEXT: pxor %xmm2, %xmm3 498; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 499; X64-SSE2-NEXT: pxor %xmm2, %xmm4 500; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 501; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 502; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 503; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 504; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 505; X64-SSE2-NEXT: pand %xmm6, %xmm3 506; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 507; X64-SSE2-NEXT: por %xmm3, %xmm4 508; X64-SSE2-NEXT: pand %xmm4, %xmm0 509; X64-SSE2-NEXT: pandn %xmm1, %xmm4 510; X64-SSE2-NEXT: por %xmm0, %xmm4 511; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 512; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 513; X64-SSE2-NEXT: pxor %xmm2, %xmm1 514; X64-SSE2-NEXT: pxor %xmm0, %xmm2 515; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 516; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 517; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 518; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 519; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 520; X64-SSE2-NEXT: pand %xmm5, %xmm1 521; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 522; X64-SSE2-NEXT: por %xmm1, %xmm2 523; X64-SSE2-NEXT: pand %xmm2, %xmm4 524; X64-SSE2-NEXT: pandn %xmm0, %xmm2 525; X64-SSE2-NEXT: por %xmm4, %xmm2 526; X64-SSE2-NEXT: movq %xmm2, %rax 527; X64-SSE2-NEXT: retq 528; 529; X64-SSE42-LABEL: test_reduce_v4i64: 530; X64-SSE42: ## %bb.0: 531; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 532; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 533; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 534; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 535; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 536; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 537; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 538; X64-SSE42-NEXT: movq %xmm2, %rax 539; X64-SSE42-NEXT: retq 540; 541; X64-AVX1-LABEL: test_reduce_v4i64: 542; X64-AVX1: ## %bb.0: 543; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 544; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 545; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 546; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 547; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 548; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 549; X64-AVX1-NEXT: vmovq %xmm0, %rax 550; X64-AVX1-NEXT: vzeroupper 551; X64-AVX1-NEXT: retq 552; 553; X64-AVX2-LABEL: test_reduce_v4i64: 554; X64-AVX2: ## %bb.0: 555; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 556; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 557; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 558; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 559; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 560; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 561; X64-AVX2-NEXT: vmovq %xmm0, %rax 562; X64-AVX2-NEXT: vzeroupper 563; X64-AVX2-NEXT: retq 564; 565; X64-AVX512-LABEL: test_reduce_v4i64: 566; X64-AVX512: ## %bb.0: 567; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 568; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 569; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 570; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 571; X64-AVX512-NEXT: vmovq %xmm0, %rax 572; X64-AVX512-NEXT: vzeroupper 573; X64-AVX512-NEXT: retq 574 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 575 %2 = icmp sgt <4 x i64> %a0, %1 576 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 577 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 578 %5 = icmp sgt <4 x i64> %3, %4 579 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 580 %7 = extractelement <4 x i64> %6, i32 0 581 ret i64 %7 582} 583 584define i32 @test_reduce_v8i32(<8 x i32> %a0) { 585; X86-SSE2-LABEL: test_reduce_v8i32: 586; X86-SSE2: ## %bb.0: 587; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 588; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 589; X86-SSE2-NEXT: pand %xmm2, %xmm0 590; X86-SSE2-NEXT: pandn %xmm1, %xmm2 591; X86-SSE2-NEXT: por %xmm0, %xmm2 592; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 593; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 594; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 595; X86-SSE2-NEXT: pand %xmm1, %xmm2 596; X86-SSE2-NEXT: pandn %xmm0, %xmm1 597; X86-SSE2-NEXT: por %xmm2, %xmm1 598; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 599; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 600; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 601; X86-SSE2-NEXT: pand %xmm2, %xmm1 602; X86-SSE2-NEXT: pandn %xmm0, %xmm2 603; X86-SSE2-NEXT: por %xmm1, %xmm2 604; X86-SSE2-NEXT: movd %xmm2, %eax 605; X86-SSE2-NEXT: retl 606; 607; X86-SSE42-LABEL: test_reduce_v8i32: 608; X86-SSE42: ## %bb.0: 609; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 610; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 611; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 612; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 613; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 614; X86-SSE42-NEXT: movd %xmm0, %eax 615; X86-SSE42-NEXT: retl 616; 617; X86-AVX1-LABEL: test_reduce_v8i32: 618; X86-AVX1: ## %bb.0: 619; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 620; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 621; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 622; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 623; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 624; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 625; X86-AVX1-NEXT: vmovd %xmm0, %eax 626; X86-AVX1-NEXT: vzeroupper 627; X86-AVX1-NEXT: retl 628; 629; X86-AVX2-LABEL: test_reduce_v8i32: 630; X86-AVX2: ## %bb.0: 631; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 632; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 633; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 634; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 635; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 636; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 637; X86-AVX2-NEXT: vmovd %xmm0, %eax 638; X86-AVX2-NEXT: vzeroupper 639; X86-AVX2-NEXT: retl 640; 641; X64-SSE2-LABEL: test_reduce_v8i32: 642; X64-SSE2: ## %bb.0: 643; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 644; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 645; X64-SSE2-NEXT: pand %xmm2, %xmm0 646; X64-SSE2-NEXT: pandn %xmm1, %xmm2 647; X64-SSE2-NEXT: por %xmm0, %xmm2 648; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 649; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 650; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 651; X64-SSE2-NEXT: pand %xmm1, %xmm2 652; X64-SSE2-NEXT: pandn %xmm0, %xmm1 653; X64-SSE2-NEXT: por %xmm2, %xmm1 654; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 655; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 656; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 657; X64-SSE2-NEXT: pand %xmm2, %xmm1 658; X64-SSE2-NEXT: pandn %xmm0, %xmm2 659; X64-SSE2-NEXT: por %xmm1, %xmm2 660; X64-SSE2-NEXT: movd %xmm2, %eax 661; X64-SSE2-NEXT: retq 662; 663; X64-SSE42-LABEL: test_reduce_v8i32: 664; X64-SSE42: ## %bb.0: 665; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 666; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 667; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 668; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 669; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 670; X64-SSE42-NEXT: movd %xmm0, %eax 671; X64-SSE42-NEXT: retq 672; 673; X64-AVX1-LABEL: test_reduce_v8i32: 674; X64-AVX1: ## %bb.0: 675; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 676; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 677; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 678; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 679; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 680; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 681; X64-AVX1-NEXT: vmovd %xmm0, %eax 682; X64-AVX1-NEXT: vzeroupper 683; X64-AVX1-NEXT: retq 684; 685; X64-AVX2-LABEL: test_reduce_v8i32: 686; X64-AVX2: ## %bb.0: 687; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 688; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 689; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 690; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 691; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 692; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 693; X64-AVX2-NEXT: vmovd %xmm0, %eax 694; X64-AVX2-NEXT: vzeroupper 695; X64-AVX2-NEXT: retq 696; 697; X64-AVX512-LABEL: test_reduce_v8i32: 698; X64-AVX512: ## %bb.0: 699; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 700; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 701; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 702; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 703; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 704; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 705; X64-AVX512-NEXT: vmovd %xmm0, %eax 706; X64-AVX512-NEXT: vzeroupper 707; X64-AVX512-NEXT: retq 708 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 709 %2 = icmp sgt <8 x i32> %a0, %1 710 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 711 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 712 %5 = icmp sgt <8 x i32> %3, %4 713 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 714 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 715 %8 = icmp sgt <8 x i32> %6, %7 716 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 717 %10 = extractelement <8 x i32> %9, i32 0 718 ret i32 %10 719} 720 721define i16 @test_reduce_v16i16(<16 x i16> %a0) { 722; X86-SSE2-LABEL: test_reduce_v16i16: 723; X86-SSE2: ## %bb.0: 724; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 725; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 726; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 727; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 728; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 729; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 730; X86-SSE2-NEXT: psrld $16, %xmm1 731; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 732; X86-SSE2-NEXT: movd %xmm1, %eax 733; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 734; X86-SSE2-NEXT: retl 735; 736; X86-SSE42-LABEL: test_reduce_v16i16: 737; X86-SSE42: ## %bb.0: 738; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0 739; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 740; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 741; X86-SSE42-NEXT: movd %xmm0, %eax 742; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 743; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 744; X86-SSE42-NEXT: retl 745; 746; X86-AVX1-LABEL: test_reduce_v16i16: 747; X86-AVX1: ## %bb.0: 748; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 749; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 750; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 751; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 752; X86-AVX1-NEXT: vmovd %xmm0, %eax 753; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 754; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 755; X86-AVX1-NEXT: vzeroupper 756; X86-AVX1-NEXT: retl 757; 758; X86-AVX2-LABEL: test_reduce_v16i16: 759; X86-AVX2: ## %bb.0: 760; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 761; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 762; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 763; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 764; X86-AVX2-NEXT: vmovd %xmm0, %eax 765; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 766; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 767; X86-AVX2-NEXT: vzeroupper 768; X86-AVX2-NEXT: retl 769; 770; X64-SSE2-LABEL: test_reduce_v16i16: 771; X64-SSE2: ## %bb.0: 772; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 773; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 774; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 775; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 776; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 777; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 778; X64-SSE2-NEXT: psrld $16, %xmm1 779; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 780; X64-SSE2-NEXT: movd %xmm1, %eax 781; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 782; X64-SSE2-NEXT: retq 783; 784; X64-SSE42-LABEL: test_reduce_v16i16: 785; X64-SSE42: ## %bb.0: 786; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0 787; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 788; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 789; X64-SSE42-NEXT: movd %xmm0, %eax 790; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 791; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 792; X64-SSE42-NEXT: retq 793; 794; X64-AVX1-LABEL: test_reduce_v16i16: 795; X64-AVX1: ## %bb.0: 796; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 797; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 798; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 799; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 800; X64-AVX1-NEXT: vmovd %xmm0, %eax 801; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 802; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 803; X64-AVX1-NEXT: vzeroupper 804; X64-AVX1-NEXT: retq 805; 806; X64-AVX2-LABEL: test_reduce_v16i16: 807; X64-AVX2: ## %bb.0: 808; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 809; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 810; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 811; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 812; X64-AVX2-NEXT: vmovd %xmm0, %eax 813; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 814; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 815; X64-AVX2-NEXT: vzeroupper 816; X64-AVX2-NEXT: retq 817; 818; X64-AVX512-LABEL: test_reduce_v16i16: 819; X64-AVX512: ## %bb.0: 820; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 821; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 822; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 823; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 824; X64-AVX512-NEXT: vmovd %xmm0, %eax 825; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF 826; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 827; X64-AVX512-NEXT: vzeroupper 828; X64-AVX512-NEXT: retq 829 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 830 %2 = icmp sgt <16 x i16> %a0, %1 831 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 832 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 833 %5 = icmp sgt <16 x i16> %3, %4 834 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 835 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 836 %8 = icmp sgt <16 x i16> %6, %7 837 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 838 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 839 %11 = icmp sgt <16 x i16> %9, %10 840 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 841 %13 = extractelement <16 x i16> %12, i32 0 842 ret i16 %13 843} 844 845define i8 @test_reduce_v32i8(<32 x i8> %a0) { 846; X86-SSE2-LABEL: test_reduce_v32i8: 847; X86-SSE2: ## %bb.0: 848; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 849; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 850; X86-SSE2-NEXT: pand %xmm2, %xmm0 851; X86-SSE2-NEXT: pandn %xmm1, %xmm2 852; X86-SSE2-NEXT: por %xmm0, %xmm2 853; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 854; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 855; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 856; X86-SSE2-NEXT: pand %xmm1, %xmm2 857; X86-SSE2-NEXT: pandn %xmm0, %xmm1 858; X86-SSE2-NEXT: por %xmm2, %xmm1 859; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 860; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 861; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm0 862; X86-SSE2-NEXT: pand %xmm0, %xmm1 863; X86-SSE2-NEXT: pandn %xmm2, %xmm0 864; X86-SSE2-NEXT: por %xmm1, %xmm0 865; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 866; X86-SSE2-NEXT: psrld $16, %xmm2 867; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 868; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 869; X86-SSE2-NEXT: pand %xmm1, %xmm0 870; X86-SSE2-NEXT: pandn %xmm2, %xmm1 871; X86-SSE2-NEXT: por %xmm0, %xmm1 872; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 873; X86-SSE2-NEXT: psrlw $8, %xmm0 874; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 875; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 876; X86-SSE2-NEXT: pand %xmm2, %xmm1 877; X86-SSE2-NEXT: pandn %xmm0, %xmm2 878; X86-SSE2-NEXT: por %xmm1, %xmm2 879; X86-SSE2-NEXT: movd %xmm2, %eax 880; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 881; X86-SSE2-NEXT: retl 882; 883; X86-SSE42-LABEL: test_reduce_v32i8: 884; X86-SSE42: ## %bb.0: 885; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0 886; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 887; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 888; X86-SSE42-NEXT: psrlw $8, %xmm1 889; X86-SSE42-NEXT: pminub %xmm0, %xmm1 890; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 891; X86-SSE42-NEXT: movd %xmm0, %eax 892; X86-SSE42-NEXT: xorb $127, %al 893; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 894; X86-SSE42-NEXT: retl 895; 896; X86-AVX1-LABEL: test_reduce_v32i8: 897; X86-AVX1: ## %bb.0: 898; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 899; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 900; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 901; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 902; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 903; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 904; X86-AVX1-NEXT: vmovd %xmm0, %eax 905; X86-AVX1-NEXT: xorb $127, %al 906; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 907; X86-AVX1-NEXT: vzeroupper 908; X86-AVX1-NEXT: retl 909; 910; X86-AVX2-LABEL: test_reduce_v32i8: 911; X86-AVX2: ## %bb.0: 912; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 913; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 914; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 915; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 916; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 917; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 918; X86-AVX2-NEXT: vmovd %xmm0, %eax 919; X86-AVX2-NEXT: xorb $127, %al 920; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 921; X86-AVX2-NEXT: vzeroupper 922; X86-AVX2-NEXT: retl 923; 924; X64-SSE2-LABEL: test_reduce_v32i8: 925; X64-SSE2: ## %bb.0: 926; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 927; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 928; X64-SSE2-NEXT: pand %xmm2, %xmm0 929; X64-SSE2-NEXT: pandn %xmm1, %xmm2 930; X64-SSE2-NEXT: por %xmm0, %xmm2 931; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 932; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 933; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 934; X64-SSE2-NEXT: pand %xmm1, %xmm2 935; X64-SSE2-NEXT: pandn %xmm0, %xmm1 936; X64-SSE2-NEXT: por %xmm2, %xmm1 937; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 938; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 939; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 940; X64-SSE2-NEXT: pand %xmm2, %xmm1 941; X64-SSE2-NEXT: pandn %xmm0, %xmm2 942; X64-SSE2-NEXT: por %xmm1, %xmm2 943; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 944; X64-SSE2-NEXT: psrld $16, %xmm0 945; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 946; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 947; X64-SSE2-NEXT: pand %xmm1, %xmm2 948; X64-SSE2-NEXT: pandn %xmm0, %xmm1 949; X64-SSE2-NEXT: por %xmm2, %xmm1 950; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 951; X64-SSE2-NEXT: psrlw $8, %xmm0 952; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 953; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 954; X64-SSE2-NEXT: pand %xmm2, %xmm1 955; X64-SSE2-NEXT: pandn %xmm0, %xmm2 956; X64-SSE2-NEXT: por %xmm1, %xmm2 957; X64-SSE2-NEXT: movd %xmm2, %eax 958; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 959; X64-SSE2-NEXT: retq 960; 961; X64-SSE42-LABEL: test_reduce_v32i8: 962; X64-SSE42: ## %bb.0: 963; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0 964; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 965; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 966; X64-SSE42-NEXT: psrlw $8, %xmm1 967; X64-SSE42-NEXT: pminub %xmm0, %xmm1 968; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 969; X64-SSE42-NEXT: movd %xmm0, %eax 970; X64-SSE42-NEXT: xorb $127, %al 971; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 972; X64-SSE42-NEXT: retq 973; 974; X64-AVX1-LABEL: test_reduce_v32i8: 975; X64-AVX1: ## %bb.0: 976; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 977; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 978; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 979; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 980; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 981; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 982; X64-AVX1-NEXT: vmovd %xmm0, %eax 983; X64-AVX1-NEXT: xorb $127, %al 984; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 985; X64-AVX1-NEXT: vzeroupper 986; X64-AVX1-NEXT: retq 987; 988; X64-AVX2-LABEL: test_reduce_v32i8: 989; X64-AVX2: ## %bb.0: 990; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 991; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 992; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 993; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 994; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 995; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 996; X64-AVX2-NEXT: vmovd %xmm0, %eax 997; X64-AVX2-NEXT: xorb $127, %al 998; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 999; X64-AVX2-NEXT: vzeroupper 1000; X64-AVX2-NEXT: retq 1001; 1002; X64-AVX512-LABEL: test_reduce_v32i8: 1003; X64-AVX512: ## %bb.0: 1004; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1005; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1006; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1007; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1008; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1009; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1010; X64-AVX512-NEXT: vmovd %xmm0, %eax 1011; X64-AVX512-NEXT: xorb $127, %al 1012; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1013; X64-AVX512-NEXT: vzeroupper 1014; X64-AVX512-NEXT: retq 1015 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1016 %2 = icmp sgt <32 x i8> %a0, %1 1017 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1018 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1019 %5 = icmp sgt <32 x i8> %3, %4 1020 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1021 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1022 %8 = icmp sgt <32 x i8> %6, %7 1023 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1024 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1025 %11 = icmp sgt <32 x i8> %9, %10 1026 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1027 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1028 %14 = icmp sgt <32 x i8> %12, %13 1029 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1030 %16 = extractelement <32 x i8> %15, i32 0 1031 ret i8 %16 1032} 1033 1034; 1035; 512-bit Vectors 1036; 1037 1038define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1039; X86-SSE2-LABEL: test_reduce_v8i64: 1040; X86-SSE2: ## %bb.0: 1041; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1042; X86-SSE2-NEXT: movdqa %xmm2, %xmm5 1043; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1044; X86-SSE2-NEXT: movdqa %xmm0, %xmm6 1045; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1046; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1047; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1048; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1049; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1050; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1051; X86-SSE2-NEXT: pand %xmm5, %xmm6 1052; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1053; X86-SSE2-NEXT: por %xmm6, %xmm5 1054; X86-SSE2-NEXT: pand %xmm5, %xmm0 1055; X86-SSE2-NEXT: pandn %xmm2, %xmm5 1056; X86-SSE2-NEXT: por %xmm0, %xmm5 1057; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1058; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1059; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1060; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1061; X86-SSE2-NEXT: movdqa %xmm2, %xmm6 1062; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1063; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1064; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1065; X86-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] 1066; X86-SSE2-NEXT: pand %xmm0, %xmm7 1067; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1068; X86-SSE2-NEXT: por %xmm7, %xmm2 1069; X86-SSE2-NEXT: pand %xmm2, %xmm1 1070; X86-SSE2-NEXT: pandn %xmm3, %xmm2 1071; X86-SSE2-NEXT: por %xmm1, %xmm2 1072; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1073; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1074; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 1075; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1076; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 1077; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1078; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1079; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1080; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1081; X86-SSE2-NEXT: pand %xmm0, %xmm1 1082; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1083; X86-SSE2-NEXT: por %xmm1, %xmm0 1084; X86-SSE2-NEXT: pand %xmm0, %xmm5 1085; X86-SSE2-NEXT: pandn %xmm2, %xmm0 1086; X86-SSE2-NEXT: por %xmm5, %xmm0 1087; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1088; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1089; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1090; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1091; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1092; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1093; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1094; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1095; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1096; X86-SSE2-NEXT: pand %xmm2, %xmm4 1097; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1098; X86-SSE2-NEXT: por %xmm4, %xmm2 1099; X86-SSE2-NEXT: pand %xmm2, %xmm0 1100; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1101; X86-SSE2-NEXT: por %xmm0, %xmm2 1102; X86-SSE2-NEXT: movd %xmm2, %eax 1103; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1104; X86-SSE2-NEXT: movd %xmm0, %edx 1105; X86-SSE2-NEXT: retl 1106; 1107; X86-SSE42-LABEL: test_reduce_v8i64: 1108; X86-SSE42: ## %bb.0: 1109; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1110; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1111; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1112; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1113; X86-SSE42-NEXT: movdqa %xmm4, %xmm0 1114; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1115; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1116; X86-SSE42-NEXT: movapd %xmm2, %xmm0 1117; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1118; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1119; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1120; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1121; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1122; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1123; X86-SSE42-NEXT: movd %xmm1, %eax 1124; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1125; X86-SSE42-NEXT: retl 1126; 1127; X86-AVX1-LABEL: test_reduce_v8i64: 1128; X86-AVX1: ## %bb.0: 1129; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1130; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1131; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 1132; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm2, %xmm2 1133; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1134; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1135; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1 1136; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 1137; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1138; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1139; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1140; X86-AVX1-NEXT: vmovd %xmm0, %eax 1141; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1142; X86-AVX1-NEXT: vzeroupper 1143; X86-AVX1-NEXT: retl 1144; 1145; X86-AVX2-LABEL: test_reduce_v8i64: 1146; X86-AVX2: ## %bb.0: 1147; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1148; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1149; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1150; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1151; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1152; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1153; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1154; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1155; X86-AVX2-NEXT: vmovd %xmm0, %eax 1156; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1157; X86-AVX2-NEXT: vzeroupper 1158; X86-AVX2-NEXT: retl 1159; 1160; X64-SSE2-LABEL: test_reduce_v8i64: 1161; X64-SSE2: ## %bb.0: 1162; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 1163; X64-SSE2-NEXT: movdqa %xmm2, %xmm5 1164; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1165; X64-SSE2-NEXT: movdqa %xmm0, %xmm6 1166; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1167; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1168; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1169; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1170; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1171; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1172; X64-SSE2-NEXT: pand %xmm8, %xmm6 1173; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1174; X64-SSE2-NEXT: por %xmm6, %xmm5 1175; X64-SSE2-NEXT: pand %xmm5, %xmm0 1176; X64-SSE2-NEXT: pandn %xmm2, %xmm5 1177; X64-SSE2-NEXT: por %xmm0, %xmm5 1178; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1179; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1180; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1181; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1182; X64-SSE2-NEXT: movdqa %xmm2, %xmm6 1183; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1184; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1185; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1186; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 1187; X64-SSE2-NEXT: pand %xmm7, %xmm0 1188; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1189; X64-SSE2-NEXT: por %xmm0, %xmm2 1190; X64-SSE2-NEXT: pand %xmm2, %xmm1 1191; X64-SSE2-NEXT: pandn %xmm3, %xmm2 1192; X64-SSE2-NEXT: por %xmm1, %xmm2 1193; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1194; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1195; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1196; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1197; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 1198; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1199; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 1200; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1201; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1202; X64-SSE2-NEXT: pand %xmm6, %xmm0 1203; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1204; X64-SSE2-NEXT: por %xmm0, %xmm1 1205; X64-SSE2-NEXT: pand %xmm1, %xmm5 1206; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1207; X64-SSE2-NEXT: por %xmm5, %xmm1 1208; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1209; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1210; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1211; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1212; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1213; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1214; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1215; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1216; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1217; X64-SSE2-NEXT: pand %xmm5, %xmm2 1218; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1219; X64-SSE2-NEXT: por %xmm2, %xmm3 1220; X64-SSE2-NEXT: pand %xmm3, %xmm1 1221; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1222; X64-SSE2-NEXT: por %xmm1, %xmm3 1223; X64-SSE2-NEXT: movq %xmm3, %rax 1224; X64-SSE2-NEXT: retq 1225; 1226; X64-SSE42-LABEL: test_reduce_v8i64: 1227; X64-SSE42: ## %bb.0: 1228; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1229; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1230; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1231; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1232; X64-SSE42-NEXT: movdqa %xmm4, %xmm0 1233; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1234; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1235; X64-SSE42-NEXT: movapd %xmm2, %xmm0 1236; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1237; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1238; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1239; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1240; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1241; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1242; X64-SSE42-NEXT: movq %xmm1, %rax 1243; X64-SSE42-NEXT: retq 1244; 1245; X64-AVX1-LABEL: test_reduce_v8i64: 1246; X64-AVX1: ## %bb.0: 1247; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1248; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1249; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 1250; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm2, %xmm2 1251; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1252; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1253; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1 1254; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 1255; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1256; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1257; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1258; X64-AVX1-NEXT: vmovq %xmm0, %rax 1259; X64-AVX1-NEXT: vzeroupper 1260; X64-AVX1-NEXT: retq 1261; 1262; X64-AVX2-LABEL: test_reduce_v8i64: 1263; X64-AVX2: ## %bb.0: 1264; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1265; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1266; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1267; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1268; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1269; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1270; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1271; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1272; X64-AVX2-NEXT: vmovq %xmm0, %rax 1273; X64-AVX2-NEXT: vzeroupper 1274; X64-AVX2-NEXT: retq 1275; 1276; X64-AVX512-LABEL: test_reduce_v8i64: 1277; X64-AVX512: ## %bb.0: 1278; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1279; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 1280; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1281; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 1282; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1283; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 1284; X64-AVX512-NEXT: vmovq %xmm0, %rax 1285; X64-AVX512-NEXT: vzeroupper 1286; X64-AVX512-NEXT: retq 1287 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1288 %2 = icmp sgt <8 x i64> %a0, %1 1289 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1290 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1291 %5 = icmp sgt <8 x i64> %3, %4 1292 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1293 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1294 %8 = icmp sgt <8 x i64> %6, %7 1295 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1296 %10 = extractelement <8 x i64> %9, i32 0 1297 ret i64 %10 1298} 1299 1300define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1301; X86-SSE2-LABEL: test_reduce_v16i32: 1302; X86-SSE2: ## %bb.0: 1303; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1304; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1305; X86-SSE2-NEXT: pand %xmm4, %xmm1 1306; X86-SSE2-NEXT: pandn %xmm3, %xmm4 1307; X86-SSE2-NEXT: por %xmm1, %xmm4 1308; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1309; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1310; X86-SSE2-NEXT: pand %xmm1, %xmm0 1311; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1312; X86-SSE2-NEXT: por %xmm0, %xmm1 1313; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1314; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1315; X86-SSE2-NEXT: pand %xmm0, %xmm1 1316; X86-SSE2-NEXT: pandn %xmm4, %xmm0 1317; X86-SSE2-NEXT: por %xmm1, %xmm0 1318; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1319; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1320; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1321; X86-SSE2-NEXT: pand %xmm2, %xmm0 1322; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1323; X86-SSE2-NEXT: por %xmm0, %xmm2 1324; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1325; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1326; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1327; X86-SSE2-NEXT: pand %xmm1, %xmm2 1328; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1329; X86-SSE2-NEXT: por %xmm2, %xmm1 1330; X86-SSE2-NEXT: movd %xmm1, %eax 1331; X86-SSE2-NEXT: retl 1332; 1333; X86-SSE42-LABEL: test_reduce_v16i32: 1334; X86-SSE42: ## %bb.0: 1335; X86-SSE42-NEXT: pmaxsd %xmm3, %xmm1 1336; X86-SSE42-NEXT: pmaxsd %xmm2, %xmm0 1337; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1338; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1339; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1340; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1341; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1342; X86-SSE42-NEXT: movd %xmm0, %eax 1343; X86-SSE42-NEXT: retl 1344; 1345; X86-AVX1-LABEL: test_reduce_v16i32: 1346; X86-AVX1: ## %bb.0: 1347; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1348; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1349; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 1350; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1351; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0 1352; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1353; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1354; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1355; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1356; X86-AVX1-NEXT: vmovd %xmm0, %eax 1357; X86-AVX1-NEXT: vzeroupper 1358; X86-AVX1-NEXT: retl 1359; 1360; X86-AVX2-LABEL: test_reduce_v16i32: 1361; X86-AVX2: ## %bb.0: 1362; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1363; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1364; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1365; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1366; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1367; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1368; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1369; X86-AVX2-NEXT: vmovd %xmm0, %eax 1370; X86-AVX2-NEXT: vzeroupper 1371; X86-AVX2-NEXT: retl 1372; 1373; X64-SSE2-LABEL: test_reduce_v16i32: 1374; X64-SSE2: ## %bb.0: 1375; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1376; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1377; X64-SSE2-NEXT: pand %xmm4, %xmm1 1378; X64-SSE2-NEXT: pandn %xmm3, %xmm4 1379; X64-SSE2-NEXT: por %xmm1, %xmm4 1380; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1381; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1382; X64-SSE2-NEXT: pand %xmm1, %xmm0 1383; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1384; X64-SSE2-NEXT: por %xmm0, %xmm1 1385; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1386; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1387; X64-SSE2-NEXT: pand %xmm0, %xmm1 1388; X64-SSE2-NEXT: pandn %xmm4, %xmm0 1389; X64-SSE2-NEXT: por %xmm1, %xmm0 1390; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1391; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1392; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1393; X64-SSE2-NEXT: pand %xmm2, %xmm0 1394; X64-SSE2-NEXT: pandn %xmm1, %xmm2 1395; X64-SSE2-NEXT: por %xmm0, %xmm2 1396; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1397; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1398; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1399; X64-SSE2-NEXT: pand %xmm1, %xmm2 1400; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1401; X64-SSE2-NEXT: por %xmm2, %xmm1 1402; X64-SSE2-NEXT: movd %xmm1, %eax 1403; X64-SSE2-NEXT: retq 1404; 1405; X64-SSE42-LABEL: test_reduce_v16i32: 1406; X64-SSE42: ## %bb.0: 1407; X64-SSE42-NEXT: pmaxsd %xmm3, %xmm1 1408; X64-SSE42-NEXT: pmaxsd %xmm2, %xmm0 1409; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1410; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1411; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1412; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1413; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1414; X64-SSE42-NEXT: movd %xmm0, %eax 1415; X64-SSE42-NEXT: retq 1416; 1417; X64-AVX1-LABEL: test_reduce_v16i32: 1418; X64-AVX1: ## %bb.0: 1419; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1420; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1421; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 1422; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1423; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0 1424; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1425; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1426; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1427; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1428; X64-AVX1-NEXT: vmovd %xmm0, %eax 1429; X64-AVX1-NEXT: vzeroupper 1430; X64-AVX1-NEXT: retq 1431; 1432; X64-AVX2-LABEL: test_reduce_v16i32: 1433; X64-AVX2: ## %bb.0: 1434; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1435; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1436; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1437; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1438; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1439; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1440; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1441; X64-AVX2-NEXT: vmovd %xmm0, %eax 1442; X64-AVX2-NEXT: vzeroupper 1443; X64-AVX2-NEXT: retq 1444; 1445; X64-AVX512-LABEL: test_reduce_v16i32: 1446; X64-AVX512: ## %bb.0: 1447; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1448; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1449; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1450; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1451; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1452; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1453; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1454; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1455; X64-AVX512-NEXT: vmovd %xmm0, %eax 1456; X64-AVX512-NEXT: vzeroupper 1457; X64-AVX512-NEXT: retq 1458 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1459 %2 = icmp sgt <16 x i32> %a0, %1 1460 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1461 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1462 %5 = icmp sgt <16 x i32> %3, %4 1463 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1464 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1465 %8 = icmp sgt <16 x i32> %6, %7 1466 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1467 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1468 %11 = icmp sgt <16 x i32> %9, %10 1469 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1470 %13 = extractelement <16 x i32> %12, i32 0 1471 ret i32 %13 1472} 1473 1474define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1475; X86-SSE2-LABEL: test_reduce_v32i16: 1476; X86-SSE2: ## %bb.0: 1477; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1 1478; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0 1479; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1480; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1481; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1482; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1483; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1484; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1485; X86-SSE2-NEXT: psrld $16, %xmm1 1486; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1487; X86-SSE2-NEXT: movd %xmm1, %eax 1488; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1489; X86-SSE2-NEXT: retl 1490; 1491; X86-SSE42-LABEL: test_reduce_v32i16: 1492; X86-SSE42: ## %bb.0: 1493; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1 1494; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0 1495; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0 1496; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1497; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1498; X86-SSE42-NEXT: movd %xmm0, %eax 1499; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1500; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1501; X86-SSE42-NEXT: retl 1502; 1503; X86-AVX1-LABEL: test_reduce_v32i16: 1504; X86-AVX1: ## %bb.0: 1505; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1506; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1507; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 1508; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1509; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 1510; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1511; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1512; X86-AVX1-NEXT: vmovd %xmm0, %eax 1513; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1514; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1515; X86-AVX1-NEXT: vzeroupper 1516; X86-AVX1-NEXT: retl 1517; 1518; X86-AVX2-LABEL: test_reduce_v32i16: 1519; X86-AVX2: ## %bb.0: 1520; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1521; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1522; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1523; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1524; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1525; X86-AVX2-NEXT: vmovd %xmm0, %eax 1526; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1527; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1528; X86-AVX2-NEXT: vzeroupper 1529; X86-AVX2-NEXT: retl 1530; 1531; X64-SSE2-LABEL: test_reduce_v32i16: 1532; X64-SSE2: ## %bb.0: 1533; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1 1534; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0 1535; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1536; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1537; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1538; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1539; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1540; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1541; X64-SSE2-NEXT: psrld $16, %xmm1 1542; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1543; X64-SSE2-NEXT: movd %xmm1, %eax 1544; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1545; X64-SSE2-NEXT: retq 1546; 1547; X64-SSE42-LABEL: test_reduce_v32i16: 1548; X64-SSE42: ## %bb.0: 1549; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1 1550; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0 1551; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0 1552; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1553; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1554; X64-SSE42-NEXT: movd %xmm0, %eax 1555; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1556; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1557; X64-SSE42-NEXT: retq 1558; 1559; X64-AVX1-LABEL: test_reduce_v32i16: 1560; X64-AVX1: ## %bb.0: 1561; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1562; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1563; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 1564; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1565; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 1566; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1567; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1568; X64-AVX1-NEXT: vmovd %xmm0, %eax 1569; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1570; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1571; X64-AVX1-NEXT: vzeroupper 1572; X64-AVX1-NEXT: retq 1573; 1574; X64-AVX2-LABEL: test_reduce_v32i16: 1575; X64-AVX2: ## %bb.0: 1576; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1577; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1578; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1579; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1580; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1581; X64-AVX2-NEXT: vmovd %xmm0, %eax 1582; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1583; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1584; X64-AVX2-NEXT: vzeroupper 1585; X64-AVX2-NEXT: retq 1586; 1587; X64-AVX512-LABEL: test_reduce_v32i16: 1588; X64-AVX512: ## %bb.0: 1589; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1590; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1591; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1592; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1593; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1594; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1595; X64-AVX512-NEXT: vmovd %xmm0, %eax 1596; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1597; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1598; X64-AVX512-NEXT: vzeroupper 1599; X64-AVX512-NEXT: retq 1600 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1601 %2 = icmp sgt <32 x i16> %a0, %1 1602 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1603 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1604 %5 = icmp sgt <32 x i16> %3, %4 1605 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1606 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1607 %8 = icmp sgt <32 x i16> %6, %7 1608 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1609 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1610 %11 = icmp sgt <32 x i16> %9, %10 1611 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1612 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1613 %14 = icmp sgt <32 x i16> %12, %13 1614 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1615 %16 = extractelement <32 x i16> %15, i32 0 1616 ret i16 %16 1617} 1618 1619define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1620; X86-SSE2-LABEL: test_reduce_v64i8: 1621; X86-SSE2: ## %bb.0: 1622; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1623; X86-SSE2-NEXT: pcmpgtb %xmm3, %xmm4 1624; X86-SSE2-NEXT: pand %xmm4, %xmm1 1625; X86-SSE2-NEXT: pandn %xmm3, %xmm4 1626; X86-SSE2-NEXT: por %xmm1, %xmm4 1627; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1628; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1629; X86-SSE2-NEXT: pand %xmm1, %xmm0 1630; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1631; X86-SSE2-NEXT: por %xmm0, %xmm1 1632; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1633; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm0 1634; X86-SSE2-NEXT: pand %xmm0, %xmm1 1635; X86-SSE2-NEXT: pandn %xmm4, %xmm0 1636; X86-SSE2-NEXT: por %xmm1, %xmm0 1637; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1638; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1639; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1640; X86-SSE2-NEXT: pand %xmm2, %xmm0 1641; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1642; X86-SSE2-NEXT: por %xmm0, %xmm2 1643; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 1644; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1645; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1646; X86-SSE2-NEXT: pand %xmm0, %xmm2 1647; X86-SSE2-NEXT: pandn %xmm1, %xmm0 1648; X86-SSE2-NEXT: por %xmm2, %xmm0 1649; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1650; X86-SSE2-NEXT: psrld $16, %xmm2 1651; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1652; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1653; X86-SSE2-NEXT: pand %xmm1, %xmm0 1654; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1655; X86-SSE2-NEXT: por %xmm0, %xmm1 1656; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1657; X86-SSE2-NEXT: psrlw $8, %xmm0 1658; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1659; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1660; X86-SSE2-NEXT: pand %xmm2, %xmm1 1661; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1662; X86-SSE2-NEXT: por %xmm1, %xmm2 1663; X86-SSE2-NEXT: movd %xmm2, %eax 1664; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1665; X86-SSE2-NEXT: retl 1666; 1667; X86-SSE42-LABEL: test_reduce_v64i8: 1668; X86-SSE42: ## %bb.0: 1669; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1 1670; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0 1671; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0 1672; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1673; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 1674; X86-SSE42-NEXT: psrlw $8, %xmm1 1675; X86-SSE42-NEXT: pminub %xmm0, %xmm1 1676; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1677; X86-SSE42-NEXT: movd %xmm0, %eax 1678; X86-SSE42-NEXT: xorb $127, %al 1679; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1680; X86-SSE42-NEXT: retl 1681; 1682; X86-AVX1-LABEL: test_reduce_v64i8: 1683; X86-AVX1: ## %bb.0: 1684; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1685; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1686; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 1687; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1688; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 1689; X86-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1690; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1691; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1692; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1693; X86-AVX1-NEXT: vmovd %xmm0, %eax 1694; X86-AVX1-NEXT: xorb $127, %al 1695; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1696; X86-AVX1-NEXT: vzeroupper 1697; X86-AVX1-NEXT: retl 1698; 1699; X86-AVX2-LABEL: test_reduce_v64i8: 1700; X86-AVX2: ## %bb.0: 1701; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1702; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1703; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1704; X86-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1705; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1706; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1707; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1708; X86-AVX2-NEXT: vmovd %xmm0, %eax 1709; X86-AVX2-NEXT: xorb $127, %al 1710; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1711; X86-AVX2-NEXT: vzeroupper 1712; X86-AVX2-NEXT: retl 1713; 1714; X64-SSE2-LABEL: test_reduce_v64i8: 1715; X64-SSE2: ## %bb.0: 1716; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1717; X64-SSE2-NEXT: pcmpgtb %xmm3, %xmm4 1718; X64-SSE2-NEXT: pand %xmm4, %xmm1 1719; X64-SSE2-NEXT: pandn %xmm3, %xmm4 1720; X64-SSE2-NEXT: por %xmm1, %xmm4 1721; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1722; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1723; X64-SSE2-NEXT: pand %xmm1, %xmm0 1724; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1725; X64-SSE2-NEXT: por %xmm0, %xmm1 1726; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1727; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm0 1728; X64-SSE2-NEXT: pand %xmm0, %xmm1 1729; X64-SSE2-NEXT: pandn %xmm4, %xmm0 1730; X64-SSE2-NEXT: por %xmm1, %xmm0 1731; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1732; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1733; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1734; X64-SSE2-NEXT: pand %xmm2, %xmm0 1735; X64-SSE2-NEXT: pandn %xmm1, %xmm2 1736; X64-SSE2-NEXT: por %xmm0, %xmm2 1737; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1738; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1739; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1740; X64-SSE2-NEXT: pand %xmm1, %xmm2 1741; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1742; X64-SSE2-NEXT: por %xmm2, %xmm1 1743; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1744; X64-SSE2-NEXT: psrld $16, %xmm0 1745; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1746; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1747; X64-SSE2-NEXT: pand %xmm2, %xmm1 1748; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1749; X64-SSE2-NEXT: por %xmm1, %xmm2 1750; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1751; X64-SSE2-NEXT: psrlw $8, %xmm0 1752; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1753; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1754; X64-SSE2-NEXT: pand %xmm1, %xmm2 1755; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1756; X64-SSE2-NEXT: por %xmm2, %xmm1 1757; X64-SSE2-NEXT: movd %xmm1, %eax 1758; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1759; X64-SSE2-NEXT: retq 1760; 1761; X64-SSE42-LABEL: test_reduce_v64i8: 1762; X64-SSE42: ## %bb.0: 1763; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1 1764; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0 1765; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0 1766; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1767; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 1768; X64-SSE42-NEXT: psrlw $8, %xmm1 1769; X64-SSE42-NEXT: pminub %xmm0, %xmm1 1770; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1771; X64-SSE42-NEXT: movd %xmm0, %eax 1772; X64-SSE42-NEXT: xorb $127, %al 1773; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1774; X64-SSE42-NEXT: retq 1775; 1776; X64-AVX1-LABEL: test_reduce_v64i8: 1777; X64-AVX1: ## %bb.0: 1778; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1779; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1780; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 1781; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1782; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 1783; X64-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1784; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1785; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1786; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1787; X64-AVX1-NEXT: vmovd %xmm0, %eax 1788; X64-AVX1-NEXT: xorb $127, %al 1789; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1790; X64-AVX1-NEXT: vzeroupper 1791; X64-AVX1-NEXT: retq 1792; 1793; X64-AVX2-LABEL: test_reduce_v64i8: 1794; X64-AVX2: ## %bb.0: 1795; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1796; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1797; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1798; X64-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1799; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1800; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1801; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1802; X64-AVX2-NEXT: vmovd %xmm0, %eax 1803; X64-AVX2-NEXT: xorb $127, %al 1804; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1805; X64-AVX2-NEXT: vzeroupper 1806; X64-AVX2-NEXT: retq 1807; 1808; X64-AVX512-LABEL: test_reduce_v64i8: 1809; X64-AVX512: ## %bb.0: 1810; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1811; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1812; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1813; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1814; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1815; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1816; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1817; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1818; X64-AVX512-NEXT: vmovd %xmm0, %eax 1819; X64-AVX512-NEXT: xorb $127, %al 1820; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1821; X64-AVX512-NEXT: vzeroupper 1822; X64-AVX512-NEXT: retq 1823 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1824 %2 = icmp sgt <64 x i8> %a0, %1 1825 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1826 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1827 %5 = icmp sgt <64 x i8> %3, %4 1828 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1829 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1830 %8 = icmp sgt <64 x i8> %6, %7 1831 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1832 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1833 %11 = icmp sgt <64 x i8> %9, %10 1834 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1835 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1836 %14 = icmp sgt <64 x i8> %12, %13 1837 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1838 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1839 %17 = icmp sgt <64 x i8> %15, %16 1840 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1841 %19 = extractelement <64 x i8> %18, i32 0 1842 ret i8 %19 1843} 1844 1845; 1846; Partial Vector Reductions 1847; 1848 1849define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1850; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1851; X86-SSE2: ## %bb.0: 1852; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1853; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1854; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1855; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1856; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1857; X86-SSE2-NEXT: psrld $16, %xmm1 1858; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1859; X86-SSE2-NEXT: movd %xmm1, %eax 1860; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1861; X86-SSE2-NEXT: retl 1862; 1863; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 1864; X86-SSE42: ## %bb.0: 1865; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1866; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1867; X86-SSE42-NEXT: movd %xmm0, %eax 1868; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1869; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1870; X86-SSE42-NEXT: retl 1871; 1872; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 1873; X86-AVX: ## %bb.0: 1874; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1875; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1876; X86-AVX-NEXT: vmovd %xmm0, %eax 1877; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1878; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1879; X86-AVX-NEXT: vzeroupper 1880; X86-AVX-NEXT: retl 1881; 1882; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 1883; X64-SSE2: ## %bb.0: 1884; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1885; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1886; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1887; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1888; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1889; X64-SSE2-NEXT: psrld $16, %xmm1 1890; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1891; X64-SSE2-NEXT: movd %xmm1, %eax 1892; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1893; X64-SSE2-NEXT: retq 1894; 1895; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 1896; X64-SSE42: ## %bb.0: 1897; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1898; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1899; X64-SSE42-NEXT: movd %xmm0, %eax 1900; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1901; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1902; X64-SSE42-NEXT: retq 1903; 1904; X64-AVX1OR2-LABEL: test_reduce_v16i16_v8i16: 1905; X64-AVX1OR2: ## %bb.0: 1906; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1907; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 1908; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 1909; X64-AVX1OR2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1910; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax 1911; X64-AVX1OR2-NEXT: vzeroupper 1912; X64-AVX1OR2-NEXT: retq 1913; 1914; X64-AVX512-LABEL: test_reduce_v16i16_v8i16: 1915; X64-AVX512: ## %bb.0: 1916; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1917; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1918; X64-AVX512-NEXT: vmovd %xmm0, %eax 1919; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1920; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1921; X64-AVX512-NEXT: vzeroupper 1922; X64-AVX512-NEXT: retq 1923 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1924 %2 = icmp sgt <16 x i16> %a0, %1 1925 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 1926 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1927 %5 = icmp sgt <16 x i16> %3, %4 1928 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 1929 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1930 %8 = icmp sgt <16 x i16> %6, %7 1931 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 1932 %10 = extractelement <16 x i16> %9, i32 0 1933 ret i16 %10 1934} 1935 1936define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 1937; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 1938; X86-SSE2: ## %bb.0: 1939; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1940; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1941; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1942; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1943; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1944; X86-SSE2-NEXT: psrld $16, %xmm1 1945; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1946; X86-SSE2-NEXT: movd %xmm1, %eax 1947; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1948; X86-SSE2-NEXT: retl 1949; 1950; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 1951; X86-SSE42: ## %bb.0: 1952; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1953; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1954; X86-SSE42-NEXT: movd %xmm0, %eax 1955; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1956; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1957; X86-SSE42-NEXT: retl 1958; 1959; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 1960; X86-AVX: ## %bb.0: 1961; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1962; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1963; X86-AVX-NEXT: vmovd %xmm0, %eax 1964; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1965; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1966; X86-AVX-NEXT: vzeroupper 1967; X86-AVX-NEXT: retl 1968; 1969; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 1970; X64-SSE2: ## %bb.0: 1971; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1972; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1973; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1974; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1975; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1976; X64-SSE2-NEXT: psrld $16, %xmm1 1977; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1978; X64-SSE2-NEXT: movd %xmm1, %eax 1979; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1980; X64-SSE2-NEXT: retq 1981; 1982; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 1983; X64-SSE42: ## %bb.0: 1984; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1985; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1986; X64-SSE42-NEXT: movd %xmm0, %eax 1987; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1988; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1989; X64-SSE42-NEXT: retq 1990; 1991; X64-AVX1OR2-LABEL: test_reduce_v32i16_v8i16: 1992; X64-AVX1OR2: ## %bb.0: 1993; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1994; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 1995; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 1996; X64-AVX1OR2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1997; X64-AVX1OR2-NEXT: ## kill: def $ax killed $ax killed $eax 1998; X64-AVX1OR2-NEXT: vzeroupper 1999; X64-AVX1OR2-NEXT: retq 2000; 2001; X64-AVX512-LABEL: test_reduce_v32i16_v8i16: 2002; X64-AVX512: ## %bb.0: 2003; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2004; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2005; X64-AVX512-NEXT: vmovd %xmm0, %eax 2006; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF 2007; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 2008; X64-AVX512-NEXT: vzeroupper 2009; X64-AVX512-NEXT: retq 2010 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2011 %2 = icmp sgt <32 x i16> %a0, %1 2012 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 2013 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2014 %5 = icmp sgt <32 x i16> %3, %4 2015 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 2016 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2017 %8 = icmp sgt <32 x i16> %6, %7 2018 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 2019 %10 = extractelement <32 x i16> %9, i32 0 2020 ret i16 %10 2021} 2022 2023define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 2024; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 2025; X86-SSE2: ## %bb.0: 2026; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2027; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2028; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2029; X86-SSE2-NEXT: pand %xmm2, %xmm0 2030; X86-SSE2-NEXT: pandn %xmm1, %xmm2 2031; X86-SSE2-NEXT: por %xmm0, %xmm2 2032; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 2033; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 2034; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 2035; X86-SSE2-NEXT: pand %xmm0, %xmm2 2036; X86-SSE2-NEXT: pandn %xmm1, %xmm0 2037; X86-SSE2-NEXT: por %xmm2, %xmm0 2038; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2039; X86-SSE2-NEXT: psrld $16, %xmm2 2040; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2041; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2042; X86-SSE2-NEXT: pand %xmm1, %xmm0 2043; X86-SSE2-NEXT: pandn %xmm2, %xmm1 2044; X86-SSE2-NEXT: por %xmm0, %xmm1 2045; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2046; X86-SSE2-NEXT: psrlw $8, %xmm0 2047; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2048; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2049; X86-SSE2-NEXT: pand %xmm2, %xmm1 2050; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2051; X86-SSE2-NEXT: por %xmm1, %xmm2 2052; X86-SSE2-NEXT: movd %xmm2, %eax 2053; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2054; X86-SSE2-NEXT: retl 2055; 2056; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2057; X86-SSE42: ## %bb.0: 2058; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2059; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2060; X86-SSE42-NEXT: psrlw $8, %xmm1 2061; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2062; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2063; X86-SSE42-NEXT: movd %xmm0, %eax 2064; X86-SSE42-NEXT: xorb $127, %al 2065; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2066; X86-SSE42-NEXT: retl 2067; 2068; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2069; X86-AVX: ## %bb.0: 2070; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2071; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2072; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2073; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2074; X86-AVX-NEXT: vmovd %xmm0, %eax 2075; X86-AVX-NEXT: xorb $127, %al 2076; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2077; X86-AVX-NEXT: vzeroupper 2078; X86-AVX-NEXT: retl 2079; 2080; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2081; X64-SSE2: ## %bb.0: 2082; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2083; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2084; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2085; X64-SSE2-NEXT: pand %xmm2, %xmm0 2086; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2087; X64-SSE2-NEXT: por %xmm0, %xmm2 2088; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2089; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2090; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2091; X64-SSE2-NEXT: pand %xmm1, %xmm2 2092; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2093; X64-SSE2-NEXT: por %xmm2, %xmm1 2094; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2095; X64-SSE2-NEXT: psrld $16, %xmm0 2096; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2097; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2098; X64-SSE2-NEXT: pand %xmm2, %xmm1 2099; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2100; X64-SSE2-NEXT: por %xmm1, %xmm2 2101; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2102; X64-SSE2-NEXT: psrlw $8, %xmm0 2103; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2104; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2105; X64-SSE2-NEXT: pand %xmm1, %xmm2 2106; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2107; X64-SSE2-NEXT: por %xmm2, %xmm1 2108; X64-SSE2-NEXT: movd %xmm1, %eax 2109; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2110; X64-SSE2-NEXT: retq 2111; 2112; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2113; X64-SSE42: ## %bb.0: 2114; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2115; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2116; X64-SSE42-NEXT: psrlw $8, %xmm1 2117; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2118; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2119; X64-SSE42-NEXT: movd %xmm0, %eax 2120; X64-SSE42-NEXT: xorb $127, %al 2121; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2122; X64-SSE42-NEXT: retq 2123; 2124; X64-AVX1OR2-LABEL: test_reduce_v32i8_v16i8: 2125; X64-AVX1OR2: ## %bb.0: 2126; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2127; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1 2128; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2129; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 2130; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 2131; X64-AVX1OR2-NEXT: xorb $127, %al 2132; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax 2133; X64-AVX1OR2-NEXT: vzeroupper 2134; X64-AVX1OR2-NEXT: retq 2135; 2136; X64-AVX512-LABEL: test_reduce_v32i8_v16i8: 2137; X64-AVX512: ## %bb.0: 2138; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2139; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2140; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2141; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2142; X64-AVX512-NEXT: vmovd %xmm0, %eax 2143; X64-AVX512-NEXT: xorb $127, %al 2144; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2145; X64-AVX512-NEXT: vzeroupper 2146; X64-AVX512-NEXT: retq 2147 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2148 %2 = icmp sgt <32 x i8> %a0, %1 2149 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2150 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2151 %5 = icmp sgt <32 x i8> %3, %4 2152 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2153 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2154 %8 = icmp sgt <32 x i8> %6, %7 2155 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2156 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2157 %11 = icmp sgt <32 x i8> %9, %10 2158 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2159 %13 = extractelement <32 x i8> %12, i32 0 2160 ret i8 %13 2161} 2162 2163define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2164; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2165; X86-SSE2: ## %bb.0: 2166; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2167; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2168; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2169; X86-SSE2-NEXT: pand %xmm2, %xmm0 2170; X86-SSE2-NEXT: pandn %xmm1, %xmm2 2171; X86-SSE2-NEXT: por %xmm0, %xmm2 2172; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 2173; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 2174; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 2175; X86-SSE2-NEXT: pand %xmm0, %xmm2 2176; X86-SSE2-NEXT: pandn %xmm1, %xmm0 2177; X86-SSE2-NEXT: por %xmm2, %xmm0 2178; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2179; X86-SSE2-NEXT: psrld $16, %xmm2 2180; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2181; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2182; X86-SSE2-NEXT: pand %xmm1, %xmm0 2183; X86-SSE2-NEXT: pandn %xmm2, %xmm1 2184; X86-SSE2-NEXT: por %xmm0, %xmm1 2185; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2186; X86-SSE2-NEXT: psrlw $8, %xmm0 2187; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2188; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2189; X86-SSE2-NEXT: pand %xmm2, %xmm1 2190; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2191; X86-SSE2-NEXT: por %xmm1, %xmm2 2192; X86-SSE2-NEXT: movd %xmm2, %eax 2193; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2194; X86-SSE2-NEXT: retl 2195; 2196; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2197; X86-SSE42: ## %bb.0: 2198; X86-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 2199; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2200; X86-SSE42-NEXT: psrlw $8, %xmm1 2201; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2202; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2203; X86-SSE42-NEXT: movd %xmm0, %eax 2204; X86-SSE42-NEXT: xorb $127, %al 2205; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2206; X86-SSE42-NEXT: retl 2207; 2208; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2209; X86-AVX: ## %bb.0: 2210; X86-AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 2211; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2212; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2213; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2214; X86-AVX-NEXT: vmovd %xmm0, %eax 2215; X86-AVX-NEXT: xorb $127, %al 2216; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2217; X86-AVX-NEXT: vzeroupper 2218; X86-AVX-NEXT: retl 2219; 2220; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2221; X64-SSE2: ## %bb.0: 2222; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2223; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2224; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2225; X64-SSE2-NEXT: pand %xmm2, %xmm0 2226; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2227; X64-SSE2-NEXT: por %xmm0, %xmm2 2228; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2229; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2230; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2231; X64-SSE2-NEXT: pand %xmm1, %xmm2 2232; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2233; X64-SSE2-NEXT: por %xmm2, %xmm1 2234; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2235; X64-SSE2-NEXT: psrld $16, %xmm0 2236; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2237; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2238; X64-SSE2-NEXT: pand %xmm2, %xmm1 2239; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2240; X64-SSE2-NEXT: por %xmm1, %xmm2 2241; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2242; X64-SSE2-NEXT: psrlw $8, %xmm0 2243; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2244; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2245; X64-SSE2-NEXT: pand %xmm1, %xmm2 2246; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2247; X64-SSE2-NEXT: por %xmm2, %xmm1 2248; X64-SSE2-NEXT: movd %xmm1, %eax 2249; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2250; X64-SSE2-NEXT: retq 2251; 2252; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2253; X64-SSE42: ## %bb.0: 2254; X64-SSE42-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2255; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2256; X64-SSE42-NEXT: psrlw $8, %xmm1 2257; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2258; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2259; X64-SSE42-NEXT: movd %xmm0, %eax 2260; X64-SSE42-NEXT: xorb $127, %al 2261; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2262; X64-SSE42-NEXT: retq 2263; 2264; X64-AVX1OR2-LABEL: test_reduce_v64i8_v16i8: 2265; X64-AVX1OR2: ## %bb.0: 2266; X64-AVX1OR2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2267; X64-AVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm1 2268; X64-AVX1OR2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2269; X64-AVX1OR2-NEXT: vphminposuw %xmm0, %xmm0 2270; X64-AVX1OR2-NEXT: vmovd %xmm0, %eax 2271; X64-AVX1OR2-NEXT: xorb $127, %al 2272; X64-AVX1OR2-NEXT: ## kill: def $al killed $al killed $eax 2273; X64-AVX1OR2-NEXT: vzeroupper 2274; X64-AVX1OR2-NEXT: retq 2275; 2276; X64-AVX512-LABEL: test_reduce_v64i8_v16i8: 2277; X64-AVX512: ## %bb.0: 2278; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2279; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2280; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2281; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2282; X64-AVX512-NEXT: vmovd %xmm0, %eax 2283; X64-AVX512-NEXT: xorb $127, %al 2284; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2285; X64-AVX512-NEXT: vzeroupper 2286; X64-AVX512-NEXT: retq 2287 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2288 %2 = icmp sgt <64 x i8> %a0, %1 2289 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2290 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2291 %5 = icmp sgt <64 x i8> %3, %4 2292 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2293 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2294 %8 = icmp sgt <64 x i8> %6, %7 2295 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2296 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2297 %11 = icmp sgt <64 x i8> %9, %10 2298 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2299 %13 = extractelement <64 x i8> %12, i32 0 2300 ret i8 %13 2301} 2302