1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX 4 5define void @main(<16 x i32> %0, i32 %1) { 6; SSE-LABEL: main: 7; SSE: # %bb.0: # %entry 8; SSE-NEXT: movd %edi, %xmm4 9; SSE-NEXT: movss {{.*#+}} xmm0 = [1,0,0,0] 10; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm4[1,0] 11; SSE-NEXT: paddd %xmm0, %xmm0 12; SSE-NEXT: paddd %xmm1, %xmm1 13; SSE-NEXT: paddd %xmm3, %xmm3 14; SSE-NEXT: paddd %xmm2, %xmm2 15; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,1,3] 16; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 17; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 18; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[1,0] 19; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[1,3] 20; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm3[1,0] 21; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[1,3] 22; SSE-NEXT: xorps %xmm2, %xmm0 23; SSE-NEXT: xorps %xmm4, %xmm1 24; SSE-NEXT: xorps %xmm0, %xmm1 25; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 26; SSE-NEXT: pxor %xmm1, %xmm0 27; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 28; SSE-NEXT: pxor %xmm0, %xmm1 29; SSE-NEXT: movd %xmm1, 0 30; SSE-NEXT: retq 31; 32; AVX-LABEL: main: 33; AVX: # %bb.0: # %entry 34; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 35; AVX-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3] 36; AVX-NEXT: movl $1, %eax 37; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 38; AVX-NEXT: vpinsrd $3, %edi, %xmm2, %xmm2 39; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] 40; AVX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 41; AVX-NEXT: vpaddd %ymm1, %ymm1, %ymm1 42; AVX-NEXT: vpmovsxbd {{.*#+}} ymm2 = [0,1,1,3,3,5,5,7] 43; AVX-NEXT: vpermd %ymm0, %ymm2, %ymm2 44; AVX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 45; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7] 46; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,1,3,4,5,5,7] 47; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 48; AVX-NEXT: vpxor %ymm0, %ymm2, %ymm0 49; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 50; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 51; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 52; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 53; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 54; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 55; AVX-NEXT: vmovd %xmm0, 0 56; AVX-NEXT: vzeroupper 57; AVX-NEXT: retq 58entry: 59 %2 = insertelement <16 x i32> %0, i32 1, i64 1 60 %3 = insertelement <16 x i32> %2, i32 %1, i64 3 61 %4 = insertelement <16 x i32> %3, i32 0, i64 0 62 %5 = shl <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 63 %6 = shufflevector <16 x i32> %5, <16 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15> 64 %7 = tail call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %6) 65 store i32 %7, ptr null, align 4 66 ret void 67} 68declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) 69