1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X86-SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X64-SSE 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 9 10; 11; vXi64 12; 13 14define i64 @test_v2i64(<2 x i64> %a0) nounwind { 15; X86-SSE-LABEL: test_v2i64: 16; X86-SSE: # %bb.0: 17; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 18; X86-SSE-NEXT: pxor %xmm0, %xmm1 19; X86-SSE-NEXT: movd %xmm1, %eax 20; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 21; X86-SSE-NEXT: movd %xmm0, %edx 22; X86-SSE-NEXT: retl 23; 24; X64-SSE-LABEL: test_v2i64: 25; X64-SSE: # %bb.0: 26; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 27; X64-SSE-NEXT: pxor %xmm0, %xmm1 28; X64-SSE-NEXT: movq %xmm1, %rax 29; X64-SSE-NEXT: retq 30; 31; AVX-LABEL: test_v2i64: 32; AVX: # %bb.0: 33; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 34; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 35; AVX-NEXT: vmovq %xmm0, %rax 36; AVX-NEXT: retq 37 %1 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a0) 38 ret i64 %1 39} 40 41define i64 @test_v4i64(<4 x i64> %a0) nounwind { 42; X86-SSE-LABEL: test_v4i64: 43; X86-SSE: # %bb.0: 44; X86-SSE-NEXT: pxor %xmm1, %xmm0 45; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 46; X86-SSE-NEXT: pxor %xmm0, %xmm1 47; X86-SSE-NEXT: movd %xmm1, %eax 48; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 49; X86-SSE-NEXT: movd %xmm0, %edx 50; X86-SSE-NEXT: retl 51; 52; X64-SSE-LABEL: test_v4i64: 53; X64-SSE: # %bb.0: 54; X64-SSE-NEXT: pxor %xmm1, %xmm0 55; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 56; X64-SSE-NEXT: pxor %xmm0, %xmm1 57; X64-SSE-NEXT: movq %xmm1, %rax 58; X64-SSE-NEXT: retq 59; 60; AVX1-LABEL: test_v4i64: 61; AVX1: # %bb.0: 62; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 63; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 64; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 65; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 66; AVX1-NEXT: vmovq %xmm0, %rax 67; AVX1-NEXT: vzeroupper 68; AVX1-NEXT: retq 69; 70; AVX2-LABEL: test_v4i64: 71; AVX2: # %bb.0: 72; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 73; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 74; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 75; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 76; AVX2-NEXT: vmovq %xmm0, %rax 77; AVX2-NEXT: vzeroupper 78; AVX2-NEXT: retq 79; 80; AVX512-LABEL: test_v4i64: 81; AVX512: # %bb.0: 82; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 83; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 84; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 85; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 86; AVX512-NEXT: vmovq %xmm0, %rax 87; AVX512-NEXT: vzeroupper 88; AVX512-NEXT: retq 89 %1 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %a0) 90 ret i64 %1 91} 92 93define i64 @test_v8i64(<8 x i64> %a0) nounwind { 94; X86-SSE-LABEL: test_v8i64: 95; X86-SSE: # %bb.0: 96; X86-SSE-NEXT: pushl %ebp 97; X86-SSE-NEXT: movl %esp, %ebp 98; X86-SSE-NEXT: andl $-16, %esp 99; X86-SSE-NEXT: subl $16, %esp 100; X86-SSE-NEXT: pxor %xmm2, %xmm0 101; X86-SSE-NEXT: pxor 8(%ebp), %xmm1 102; X86-SSE-NEXT: pxor %xmm0, %xmm1 103; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 104; X86-SSE-NEXT: pxor %xmm1, %xmm0 105; X86-SSE-NEXT: movd %xmm0, %eax 106; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 107; X86-SSE-NEXT: movd %xmm0, %edx 108; X86-SSE-NEXT: movl %ebp, %esp 109; X86-SSE-NEXT: popl %ebp 110; X86-SSE-NEXT: retl 111; 112; X64-SSE-LABEL: test_v8i64: 113; X64-SSE: # %bb.0: 114; X64-SSE-NEXT: pxor %xmm3, %xmm1 115; X64-SSE-NEXT: pxor %xmm2, %xmm0 116; X64-SSE-NEXT: pxor %xmm1, %xmm0 117; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 118; X64-SSE-NEXT: pxor %xmm0, %xmm1 119; X64-SSE-NEXT: movq %xmm1, %rax 120; X64-SSE-NEXT: retq 121; 122; AVX1-LABEL: test_v8i64: 123; AVX1: # %bb.0: 124; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 125; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 126; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 127; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 128; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 129; AVX1-NEXT: vmovq %xmm0, %rax 130; AVX1-NEXT: vzeroupper 131; AVX1-NEXT: retq 132; 133; AVX2-LABEL: test_v8i64: 134; AVX2: # %bb.0: 135; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 136; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 137; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 138; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 139; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 140; AVX2-NEXT: vmovq %xmm0, %rax 141; AVX2-NEXT: vzeroupper 142; AVX2-NEXT: retq 143; 144; AVX512-LABEL: test_v8i64: 145; AVX512: # %bb.0: 146; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 147; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 148; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 149; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 150; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 151; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 152; AVX512-NEXT: vmovq %xmm0, %rax 153; AVX512-NEXT: vzeroupper 154; AVX512-NEXT: retq 155 %1 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %a0) 156 ret i64 %1 157} 158 159define i64 @test_v16i64(<16 x i64> %a0) nounwind { 160; X86-SSE-LABEL: test_v16i64: 161; X86-SSE: # %bb.0: 162; X86-SSE-NEXT: pushl %ebp 163; X86-SSE-NEXT: movl %esp, %ebp 164; X86-SSE-NEXT: andl $-16, %esp 165; X86-SSE-NEXT: subl $16, %esp 166; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3 167; X86-SSE-NEXT: pxor 56(%ebp), %xmm2 168; X86-SSE-NEXT: pxor 24(%ebp), %xmm0 169; X86-SSE-NEXT: pxor %xmm2, %xmm0 170; X86-SSE-NEXT: pxor 72(%ebp), %xmm3 171; X86-SSE-NEXT: pxor 40(%ebp), %xmm1 172; X86-SSE-NEXT: pxor %xmm3, %xmm1 173; X86-SSE-NEXT: pxor %xmm0, %xmm1 174; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 175; X86-SSE-NEXT: pxor %xmm1, %xmm0 176; X86-SSE-NEXT: movd %xmm0, %eax 177; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 178; X86-SSE-NEXT: movd %xmm0, %edx 179; X86-SSE-NEXT: movl %ebp, %esp 180; X86-SSE-NEXT: popl %ebp 181; X86-SSE-NEXT: retl 182; 183; X64-SSE-LABEL: test_v16i64: 184; X64-SSE: # %bb.0: 185; X64-SSE-NEXT: pxor %xmm6, %xmm2 186; X64-SSE-NEXT: pxor %xmm4, %xmm0 187; X64-SSE-NEXT: pxor %xmm2, %xmm0 188; X64-SSE-NEXT: pxor %xmm7, %xmm3 189; X64-SSE-NEXT: pxor %xmm5, %xmm1 190; X64-SSE-NEXT: pxor %xmm3, %xmm1 191; X64-SSE-NEXT: pxor %xmm0, %xmm1 192; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 193; X64-SSE-NEXT: pxor %xmm1, %xmm0 194; X64-SSE-NEXT: movq %xmm0, %rax 195; X64-SSE-NEXT: retq 196; 197; AVX1-LABEL: test_v16i64: 198; AVX1: # %bb.0: 199; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 200; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 201; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 202; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 203; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 204; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 205; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 206; AVX1-NEXT: vmovq %xmm0, %rax 207; AVX1-NEXT: vzeroupper 208; AVX1-NEXT: retq 209; 210; AVX2-LABEL: test_v16i64: 211; AVX2: # %bb.0: 212; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 213; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 214; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 215; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 216; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 217; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 218; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 219; AVX2-NEXT: vmovq %xmm0, %rax 220; AVX2-NEXT: vzeroupper 221; AVX2-NEXT: retq 222; 223; AVX512-LABEL: test_v16i64: 224; AVX512: # %bb.0: 225; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 226; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 227; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 228; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 229; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 230; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 231; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 232; AVX512-NEXT: vmovq %xmm0, %rax 233; AVX512-NEXT: vzeroupper 234; AVX512-NEXT: retq 235 %1 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %a0) 236 ret i64 %1 237} 238 239; 240; vXi32 241; 242 243define i32 @test_v2i32(<2 x i32> %a0) nounwind { 244; SSE-LABEL: test_v2i32: 245; SSE: # %bb.0: 246; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 247; SSE-NEXT: pxor %xmm0, %xmm1 248; SSE-NEXT: movd %xmm1, %eax 249; SSE-NEXT: ret{{[l|q]}} 250; 251; AVX-LABEL: test_v2i32: 252; AVX: # %bb.0: 253; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 254; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 255; AVX-NEXT: vmovd %xmm0, %eax 256; AVX-NEXT: retq 257 %1 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a0) 258 ret i32 %1 259} 260 261define i32 @test_v4i32(<4 x i32> %a0) nounwind { 262; SSE-LABEL: test_v4i32: 263; SSE: # %bb.0: 264; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 265; SSE-NEXT: pxor %xmm0, %xmm1 266; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 267; SSE-NEXT: pxor %xmm1, %xmm0 268; SSE-NEXT: movd %xmm0, %eax 269; SSE-NEXT: ret{{[l|q]}} 270; 271; AVX-LABEL: test_v4i32: 272; AVX: # %bb.0: 273; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 274; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 275; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 276; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 277; AVX-NEXT: vmovd %xmm0, %eax 278; AVX-NEXT: retq 279 %1 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a0) 280 ret i32 %1 281} 282 283define i32 @test_v8i32(<8 x i32> %a0) nounwind { 284; SSE-LABEL: test_v8i32: 285; SSE: # %bb.0: 286; SSE-NEXT: pxor %xmm1, %xmm0 287; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 288; SSE-NEXT: pxor %xmm0, %xmm1 289; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 290; SSE-NEXT: pxor %xmm1, %xmm0 291; SSE-NEXT: movd %xmm0, %eax 292; SSE-NEXT: ret{{[l|q]}} 293; 294; AVX1-LABEL: test_v8i32: 295; AVX1: # %bb.0: 296; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 297; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 298; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 299; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 300; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 301; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 302; AVX1-NEXT: vmovd %xmm0, %eax 303; AVX1-NEXT: vzeroupper 304; AVX1-NEXT: retq 305; 306; AVX2-LABEL: test_v8i32: 307; AVX2: # %bb.0: 308; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 309; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 310; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 311; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 312; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 313; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 314; AVX2-NEXT: vmovd %xmm0, %eax 315; AVX2-NEXT: vzeroupper 316; AVX2-NEXT: retq 317; 318; AVX512-LABEL: test_v8i32: 319; AVX512: # %bb.0: 320; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 321; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 322; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 323; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 324; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 325; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 326; AVX512-NEXT: vmovd %xmm0, %eax 327; AVX512-NEXT: vzeroupper 328; AVX512-NEXT: retq 329 %1 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a0) 330 ret i32 %1 331} 332 333define i32 @test_v16i32(<16 x i32> %a0) nounwind { 334; X86-SSE-LABEL: test_v16i32: 335; X86-SSE: # %bb.0: 336; X86-SSE-NEXT: pushl %ebp 337; X86-SSE-NEXT: movl %esp, %ebp 338; X86-SSE-NEXT: andl $-16, %esp 339; X86-SSE-NEXT: subl $16, %esp 340; X86-SSE-NEXT: pxor %xmm2, %xmm0 341; X86-SSE-NEXT: pxor 8(%ebp), %xmm1 342; X86-SSE-NEXT: pxor %xmm0, %xmm1 343; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 344; X86-SSE-NEXT: pxor %xmm1, %xmm0 345; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 346; X86-SSE-NEXT: pxor %xmm0, %xmm1 347; X86-SSE-NEXT: movd %xmm1, %eax 348; X86-SSE-NEXT: movl %ebp, %esp 349; X86-SSE-NEXT: popl %ebp 350; X86-SSE-NEXT: retl 351; 352; X64-SSE-LABEL: test_v16i32: 353; X64-SSE: # %bb.0: 354; X64-SSE-NEXT: pxor %xmm3, %xmm1 355; X64-SSE-NEXT: pxor %xmm2, %xmm0 356; X64-SSE-NEXT: pxor %xmm1, %xmm0 357; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 358; X64-SSE-NEXT: pxor %xmm0, %xmm1 359; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 360; X64-SSE-NEXT: pxor %xmm1, %xmm0 361; X64-SSE-NEXT: movd %xmm0, %eax 362; X64-SSE-NEXT: retq 363; 364; AVX1-LABEL: test_v16i32: 365; AVX1: # %bb.0: 366; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 367; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 368; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 369; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 370; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 371; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1] 372; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 373; AVX1-NEXT: vmovd %xmm0, %eax 374; AVX1-NEXT: vzeroupper 375; AVX1-NEXT: retq 376; 377; AVX2-LABEL: test_v16i32: 378; AVX2: # %bb.0: 379; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 380; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 381; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 382; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 383; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 384; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 385; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 386; AVX2-NEXT: vmovd %xmm0, %eax 387; AVX2-NEXT: vzeroupper 388; AVX2-NEXT: retq 389; 390; AVX512-LABEL: test_v16i32: 391; AVX512: # %bb.0: 392; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 393; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 394; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 395; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 396; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 397; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 398; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 399; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 400; AVX512-NEXT: vmovd %xmm0, %eax 401; AVX512-NEXT: vzeroupper 402; AVX512-NEXT: retq 403 %1 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %a0) 404 ret i32 %1 405} 406 407define i32 @test_v32i32(<32 x i32> %a0) nounwind { 408; X86-SSE-LABEL: test_v32i32: 409; X86-SSE: # %bb.0: 410; X86-SSE-NEXT: pushl %ebp 411; X86-SSE-NEXT: movl %esp, %ebp 412; X86-SSE-NEXT: andl $-16, %esp 413; X86-SSE-NEXT: subl $16, %esp 414; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3 415; X86-SSE-NEXT: pxor 56(%ebp), %xmm2 416; X86-SSE-NEXT: pxor 24(%ebp), %xmm0 417; X86-SSE-NEXT: pxor %xmm2, %xmm0 418; X86-SSE-NEXT: pxor 72(%ebp), %xmm3 419; X86-SSE-NEXT: pxor 40(%ebp), %xmm1 420; X86-SSE-NEXT: pxor %xmm3, %xmm1 421; X86-SSE-NEXT: pxor %xmm0, %xmm1 422; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 423; X86-SSE-NEXT: pxor %xmm1, %xmm0 424; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 425; X86-SSE-NEXT: pxor %xmm0, %xmm1 426; X86-SSE-NEXT: movd %xmm1, %eax 427; X86-SSE-NEXT: movl %ebp, %esp 428; X86-SSE-NEXT: popl %ebp 429; X86-SSE-NEXT: retl 430; 431; X64-SSE-LABEL: test_v32i32: 432; X64-SSE: # %bb.0: 433; X64-SSE-NEXT: pxor %xmm6, %xmm2 434; X64-SSE-NEXT: pxor %xmm4, %xmm0 435; X64-SSE-NEXT: pxor %xmm2, %xmm0 436; X64-SSE-NEXT: pxor %xmm7, %xmm3 437; X64-SSE-NEXT: pxor %xmm5, %xmm1 438; X64-SSE-NEXT: pxor %xmm3, %xmm1 439; X64-SSE-NEXT: pxor %xmm0, %xmm1 440; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 441; X64-SSE-NEXT: pxor %xmm1, %xmm0 442; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 443; X64-SSE-NEXT: pxor %xmm0, %xmm1 444; X64-SSE-NEXT: movd %xmm1, %eax 445; X64-SSE-NEXT: retq 446; 447; AVX1-LABEL: test_v32i32: 448; AVX1: # %bb.0: 449; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 450; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 451; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 452; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 453; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 454; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 455; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 456; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1] 457; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 458; AVX1-NEXT: vmovd %xmm0, %eax 459; AVX1-NEXT: vzeroupper 460; AVX1-NEXT: retq 461; 462; AVX2-LABEL: test_v32i32: 463; AVX2: # %bb.0: 464; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 465; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 466; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 467; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 468; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 469; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 470; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 471; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 472; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 473; AVX2-NEXT: vmovd %xmm0, %eax 474; AVX2-NEXT: vzeroupper 475; AVX2-NEXT: retq 476; 477; AVX512-LABEL: test_v32i32: 478; AVX512: # %bb.0: 479; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 480; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 481; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 482; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 483; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 484; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 485; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 486; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 487; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 488; AVX512-NEXT: vmovd %xmm0, %eax 489; AVX512-NEXT: vzeroupper 490; AVX512-NEXT: retq 491 %1 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> %a0) 492 ret i32 %1 493} 494 495; 496; vXi16 497; 498 499define i16 @test_v2i16(<2 x i16> %a0) nounwind { 500; SSE-LABEL: test_v2i16: 501; SSE: # %bb.0: 502; SSE-NEXT: movdqa %xmm0, %xmm1 503; SSE-NEXT: psrld $16, %xmm1 504; SSE-NEXT: pxor %xmm0, %xmm1 505; SSE-NEXT: movd %xmm1, %eax 506; SSE-NEXT: # kill: def $ax killed $ax killed $eax 507; SSE-NEXT: ret{{[l|q]}} 508; 509; AVX-LABEL: test_v2i16: 510; AVX: # %bb.0: 511; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 512; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0 513; AVX-NEXT: vmovd %xmm0, %eax 514; AVX-NEXT: # kill: def $ax killed $ax killed $eax 515; AVX-NEXT: retq 516 %1 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %a0) 517 ret i16 %1 518} 519 520define i16 @test_v4i16(<4 x i16> %a0) nounwind { 521; SSE-LABEL: test_v4i16: 522; SSE: # %bb.0: 523; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 524; SSE-NEXT: pxor %xmm0, %xmm1 525; SSE-NEXT: movdqa %xmm1, %xmm0 526; SSE-NEXT: psrld $16, %xmm0 527; SSE-NEXT: pxor %xmm1, %xmm0 528; SSE-NEXT: movd %xmm0, %eax 529; SSE-NEXT: # kill: def $ax killed $ax killed $eax 530; SSE-NEXT: ret{{[l|q]}} 531; 532; AVX-LABEL: test_v4i16: 533; AVX: # %bb.0: 534; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 535; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 536; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 537; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0 538; AVX-NEXT: vmovd %xmm0, %eax 539; AVX-NEXT: # kill: def $ax killed $ax killed $eax 540; AVX-NEXT: retq 541 %1 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a0) 542 ret i16 %1 543} 544 545define i16 @test_v8i16(<8 x i16> %a0) nounwind { 546; SSE-LABEL: test_v8i16: 547; SSE: # %bb.0: 548; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 549; SSE-NEXT: pxor %xmm0, %xmm1 550; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 551; SSE-NEXT: pxor %xmm1, %xmm0 552; SSE-NEXT: movdqa %xmm0, %xmm1 553; SSE-NEXT: psrld $16, %xmm1 554; SSE-NEXT: pxor %xmm0, %xmm1 555; SSE-NEXT: movd %xmm1, %eax 556; SSE-NEXT: # kill: def $ax killed $ax killed $eax 557; SSE-NEXT: ret{{[l|q]}} 558; 559; AVX-LABEL: test_v8i16: 560; AVX: # %bb.0: 561; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 562; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 563; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 564; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 565; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 566; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0 567; AVX-NEXT: vmovd %xmm0, %eax 568; AVX-NEXT: # kill: def $ax killed $ax killed $eax 569; AVX-NEXT: retq 570 %1 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a0) 571 ret i16 %1 572} 573 574define i16 @test_v16i16(<16 x i16> %a0) nounwind { 575; SSE-LABEL: test_v16i16: 576; SSE: # %bb.0: 577; SSE-NEXT: pxor %xmm1, %xmm0 578; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 579; SSE-NEXT: pxor %xmm0, %xmm1 580; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 581; SSE-NEXT: pxor %xmm1, %xmm0 582; SSE-NEXT: movdqa %xmm0, %xmm1 583; SSE-NEXT: psrld $16, %xmm1 584; SSE-NEXT: pxor %xmm0, %xmm1 585; SSE-NEXT: movd %xmm1, %eax 586; SSE-NEXT: # kill: def $ax killed $ax killed $eax 587; SSE-NEXT: ret{{[l|q]}} 588; 589; AVX1-LABEL: test_v16i16: 590; AVX1: # %bb.0: 591; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 592; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 593; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 594; AVX1-NEXT: vpxor %xmm0, %xmm1, %xmm0 595; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 596; AVX1-NEXT: vpxor %xmm0, %xmm1, %xmm0 597; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 598; AVX1-NEXT: vpxor %xmm0, %xmm1, %xmm0 599; AVX1-NEXT: vmovd %xmm0, %eax 600; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 601; AVX1-NEXT: vzeroupper 602; AVX1-NEXT: retq 603; 604; AVX2-LABEL: test_v16i16: 605; AVX2: # %bb.0: 606; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 607; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 608; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 609; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 610; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 611; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 612; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 613; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 614; AVX2-NEXT: vmovd %xmm0, %eax 615; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 616; AVX2-NEXT: vzeroupper 617; AVX2-NEXT: retq 618; 619; AVX512-LABEL: test_v16i16: 620; AVX512: # %bb.0: 621; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 622; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 623; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 624; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 625; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 626; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 627; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 628; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 629; AVX512-NEXT: vmovd %xmm0, %eax 630; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 631; AVX512-NEXT: vzeroupper 632; AVX512-NEXT: retq 633 %1 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %a0) 634 ret i16 %1 635} 636 637define i16 @test_v32i16(<32 x i16> %a0) nounwind { 638; X86-SSE-LABEL: test_v32i16: 639; X86-SSE: # %bb.0: 640; X86-SSE-NEXT: pushl %ebp 641; X86-SSE-NEXT: movl %esp, %ebp 642; X86-SSE-NEXT: andl $-16, %esp 643; X86-SSE-NEXT: subl $16, %esp 644; X86-SSE-NEXT: pxor %xmm2, %xmm0 645; X86-SSE-NEXT: pxor 8(%ebp), %xmm1 646; X86-SSE-NEXT: pxor %xmm0, %xmm1 647; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 648; X86-SSE-NEXT: pxor %xmm1, %xmm0 649; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 650; X86-SSE-NEXT: pxor %xmm0, %xmm1 651; X86-SSE-NEXT: movdqa %xmm1, %xmm0 652; X86-SSE-NEXT: psrld $16, %xmm0 653; X86-SSE-NEXT: pxor %xmm1, %xmm0 654; X86-SSE-NEXT: movd %xmm0, %eax 655; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax 656; X86-SSE-NEXT: movl %ebp, %esp 657; X86-SSE-NEXT: popl %ebp 658; X86-SSE-NEXT: retl 659; 660; X64-SSE-LABEL: test_v32i16: 661; X64-SSE: # %bb.0: 662; X64-SSE-NEXT: pxor %xmm3, %xmm1 663; X64-SSE-NEXT: pxor %xmm2, %xmm0 664; X64-SSE-NEXT: pxor %xmm1, %xmm0 665; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 666; X64-SSE-NEXT: pxor %xmm0, %xmm1 667; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 668; X64-SSE-NEXT: pxor %xmm1, %xmm0 669; X64-SSE-NEXT: movdqa %xmm0, %xmm1 670; X64-SSE-NEXT: psrld $16, %xmm1 671; X64-SSE-NEXT: pxor %xmm0, %xmm1 672; X64-SSE-NEXT: movd %xmm1, %eax 673; X64-SSE-NEXT: # kill: def $ax killed $ax killed $eax 674; X64-SSE-NEXT: retq 675; 676; AVX1-LABEL: test_v32i16: 677; AVX1: # %bb.0: 678; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 679; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 680; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 681; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 682; AVX1-NEXT: vxorps %xmm0, %xmm1, %xmm0 683; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1] 684; AVX1-NEXT: vxorps %xmm0, %xmm1, %xmm0 685; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 686; AVX1-NEXT: vpxor %xmm0, %xmm1, %xmm0 687; AVX1-NEXT: vmovd %xmm0, %eax 688; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 689; AVX1-NEXT: vzeroupper 690; AVX1-NEXT: retq 691; 692; AVX2-LABEL: test_v32i16: 693; AVX2: # %bb.0: 694; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 695; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 696; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 697; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 698; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 699; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 700; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 701; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 702; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 703; AVX2-NEXT: vmovd %xmm0, %eax 704; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 705; AVX2-NEXT: vzeroupper 706; AVX2-NEXT: retq 707; 708; AVX512-LABEL: test_v32i16: 709; AVX512: # %bb.0: 710; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 711; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 712; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 713; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 714; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 715; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 716; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 717; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 718; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 719; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 720; AVX512-NEXT: vmovd %xmm0, %eax 721; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 722; AVX512-NEXT: vzeroupper 723; AVX512-NEXT: retq 724 %1 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> %a0) 725 ret i16 %1 726} 727 728define i16 @test_v64i16(<64 x i16> %a0) nounwind { 729; X86-SSE-LABEL: test_v64i16: 730; X86-SSE: # %bb.0: 731; X86-SSE-NEXT: pushl %ebp 732; X86-SSE-NEXT: movl %esp, %ebp 733; X86-SSE-NEXT: andl $-16, %esp 734; X86-SSE-NEXT: subl $16, %esp 735; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3 736; X86-SSE-NEXT: pxor 56(%ebp), %xmm2 737; X86-SSE-NEXT: pxor 24(%ebp), %xmm0 738; X86-SSE-NEXT: pxor %xmm2, %xmm0 739; X86-SSE-NEXT: pxor 72(%ebp), %xmm3 740; X86-SSE-NEXT: pxor 40(%ebp), %xmm1 741; X86-SSE-NEXT: pxor %xmm3, %xmm1 742; X86-SSE-NEXT: pxor %xmm0, %xmm1 743; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 744; X86-SSE-NEXT: pxor %xmm1, %xmm0 745; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 746; X86-SSE-NEXT: pxor %xmm0, %xmm1 747; X86-SSE-NEXT: movdqa %xmm1, %xmm0 748; X86-SSE-NEXT: psrld $16, %xmm0 749; X86-SSE-NEXT: pxor %xmm1, %xmm0 750; X86-SSE-NEXT: movd %xmm0, %eax 751; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax 752; X86-SSE-NEXT: movl %ebp, %esp 753; X86-SSE-NEXT: popl %ebp 754; X86-SSE-NEXT: retl 755; 756; X64-SSE-LABEL: test_v64i16: 757; X64-SSE: # %bb.0: 758; X64-SSE-NEXT: pxor %xmm6, %xmm2 759; X64-SSE-NEXT: pxor %xmm4, %xmm0 760; X64-SSE-NEXT: pxor %xmm2, %xmm0 761; X64-SSE-NEXT: pxor %xmm7, %xmm3 762; X64-SSE-NEXT: pxor %xmm5, %xmm1 763; X64-SSE-NEXT: pxor %xmm3, %xmm1 764; X64-SSE-NEXT: pxor %xmm0, %xmm1 765; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 766; X64-SSE-NEXT: pxor %xmm1, %xmm0 767; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 768; X64-SSE-NEXT: pxor %xmm0, %xmm1 769; X64-SSE-NEXT: movdqa %xmm1, %xmm0 770; X64-SSE-NEXT: psrld $16, %xmm0 771; X64-SSE-NEXT: pxor %xmm1, %xmm0 772; X64-SSE-NEXT: movd %xmm0, %eax 773; X64-SSE-NEXT: # kill: def $ax killed $ax killed $eax 774; X64-SSE-NEXT: retq 775; 776; AVX1-LABEL: test_v64i16: 777; AVX1: # %bb.0: 778; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 779; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 780; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 781; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 782; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 783; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 784; AVX1-NEXT: vxorps %xmm0, %xmm1, %xmm0 785; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1] 786; AVX1-NEXT: vxorps %xmm0, %xmm1, %xmm0 787; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 788; AVX1-NEXT: vpxor %xmm0, %xmm1, %xmm0 789; AVX1-NEXT: vmovd %xmm0, %eax 790; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 791; AVX1-NEXT: vzeroupper 792; AVX1-NEXT: retq 793; 794; AVX2-LABEL: test_v64i16: 795; AVX2: # %bb.0: 796; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 797; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 798; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 799; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 800; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 801; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 802; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 803; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 804; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 805; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 806; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 807; AVX2-NEXT: vmovd %xmm0, %eax 808; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 809; AVX2-NEXT: vzeroupper 810; AVX2-NEXT: retq 811; 812; AVX512-LABEL: test_v64i16: 813; AVX512: # %bb.0: 814; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 815; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 816; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 817; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 818; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 819; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 820; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 821; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 822; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 823; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 824; AVX512-NEXT: vpxor %xmm0, %xmm1, %xmm0 825; AVX512-NEXT: vmovd %xmm0, %eax 826; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 827; AVX512-NEXT: vzeroupper 828; AVX512-NEXT: retq 829 %1 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> %a0) 830 ret i16 %1 831} 832 833; 834; vXi8 835; 836 837define i8 @test_v2i8(<2 x i8> %a0) nounwind { 838; SSE-LABEL: test_v2i8: 839; SSE: # %bb.0: 840; SSE-NEXT: movdqa %xmm0, %xmm1 841; SSE-NEXT: psrlw $8, %xmm1 842; SSE-NEXT: pxor %xmm0, %xmm1 843; SSE-NEXT: movd %xmm1, %eax 844; SSE-NEXT: # kill: def $al killed $al killed $eax 845; SSE-NEXT: ret{{[l|q]}} 846; 847; AVX-LABEL: test_v2i8: 848; AVX: # %bb.0: 849; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 850; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 851; AVX-NEXT: vmovd %xmm0, %eax 852; AVX-NEXT: # kill: def $al killed $al killed $eax 853; AVX-NEXT: retq 854 %1 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> %a0) 855 ret i8 %1 856} 857 858define i8 @test_v4i8(<4 x i8> %a0) nounwind { 859; SSE-LABEL: test_v4i8: 860; SSE: # %bb.0: 861; SSE-NEXT: movdqa %xmm0, %xmm1 862; SSE-NEXT: psrld $16, %xmm1 863; SSE-NEXT: pxor %xmm0, %xmm1 864; SSE-NEXT: movdqa %xmm1, %xmm0 865; SSE-NEXT: psrlw $8, %xmm0 866; SSE-NEXT: pxor %xmm1, %xmm0 867; SSE-NEXT: movd %xmm0, %eax 868; SSE-NEXT: # kill: def $al killed $al killed $eax 869; SSE-NEXT: ret{{[l|q]}} 870; 871; AVX-LABEL: test_v4i8: 872; AVX: # %bb.0: 873; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 874; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 875; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 876; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 877; AVX-NEXT: vmovd %xmm0, %eax 878; AVX-NEXT: # kill: def $al killed $al killed $eax 879; AVX-NEXT: retq 880 %1 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a0) 881 ret i8 %1 882} 883 884define i8 @test_v8i8(<8 x i8> %a0) nounwind { 885; SSE-LABEL: test_v8i8: 886; SSE: # %bb.0: 887; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 888; SSE-NEXT: pxor %xmm0, %xmm1 889; SSE-NEXT: movdqa %xmm1, %xmm0 890; SSE-NEXT: psrld $16, %xmm0 891; SSE-NEXT: pxor %xmm1, %xmm0 892; SSE-NEXT: movdqa %xmm0, %xmm1 893; SSE-NEXT: psrlw $8, %xmm1 894; SSE-NEXT: pxor %xmm0, %xmm1 895; SSE-NEXT: movd %xmm1, %eax 896; SSE-NEXT: # kill: def $al killed $al killed $eax 897; SSE-NEXT: ret{{[l|q]}} 898; 899; AVX-LABEL: test_v8i8: 900; AVX: # %bb.0: 901; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 902; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 903; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 904; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 905; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 906; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 907; AVX-NEXT: vmovd %xmm0, %eax 908; AVX-NEXT: # kill: def $al killed $al killed $eax 909; AVX-NEXT: retq 910 %1 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a0) 911 ret i8 %1 912} 913 914define i8 @test_v16i8(<16 x i8> %a0) nounwind { 915; SSE-LABEL: test_v16i8: 916; SSE: # %bb.0: 917; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 918; SSE-NEXT: pxor %xmm0, %xmm1 919; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 920; SSE-NEXT: pxor %xmm1, %xmm0 921; SSE-NEXT: movdqa %xmm0, %xmm1 922; SSE-NEXT: psrld $16, %xmm1 923; SSE-NEXT: pxor %xmm0, %xmm1 924; SSE-NEXT: movdqa %xmm1, %xmm0 925; SSE-NEXT: psrlw $8, %xmm0 926; SSE-NEXT: pxor %xmm1, %xmm0 927; SSE-NEXT: movd %xmm0, %eax 928; SSE-NEXT: # kill: def $al killed $al killed $eax 929; SSE-NEXT: ret{{[l|q]}} 930; 931; AVX-LABEL: test_v16i8: 932; AVX: # %bb.0: 933; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 934; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 935; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 936; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 937; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 938; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 939; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 940; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 941; AVX-NEXT: vmovd %xmm0, %eax 942; AVX-NEXT: # kill: def $al killed $al killed $eax 943; AVX-NEXT: retq 944 %1 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a0) 945 ret i8 %1 946} 947 948define i8 @test_v32i8(<32 x i8> %a0) nounwind { 949; SSE-LABEL: test_v32i8: 950; SSE: # %bb.0: 951; SSE-NEXT: pxor %xmm1, %xmm0 952; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 953; SSE-NEXT: pxor %xmm0, %xmm1 954; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 955; SSE-NEXT: pxor %xmm1, %xmm0 956; SSE-NEXT: movdqa %xmm0, %xmm1 957; SSE-NEXT: psrld $16, %xmm1 958; SSE-NEXT: pxor %xmm0, %xmm1 959; SSE-NEXT: movdqa %xmm1, %xmm0 960; SSE-NEXT: psrlw $8, %xmm0 961; SSE-NEXT: pxor %xmm1, %xmm0 962; SSE-NEXT: movd %xmm0, %eax 963; SSE-NEXT: # kill: def $al killed $al killed $eax 964; SSE-NEXT: ret{{[l|q]}} 965; 966; AVX1-LABEL: test_v32i8: 967; AVX1: # %bb.0: 968; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 969; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 970; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 971; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 972; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 973; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 974; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 975; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 976; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 977; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 978; AVX1-NEXT: vmovd %xmm0, %eax 979; AVX1-NEXT: # kill: def $al killed $al killed $eax 980; AVX1-NEXT: vzeroupper 981; AVX1-NEXT: retq 982; 983; AVX2-LABEL: test_v32i8: 984; AVX2: # %bb.0: 985; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 986; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 987; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 988; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 989; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 990; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 991; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 992; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 993; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 994; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 995; AVX2-NEXT: vmovd %xmm0, %eax 996; AVX2-NEXT: # kill: def $al killed $al killed $eax 997; AVX2-NEXT: vzeroupper 998; AVX2-NEXT: retq 999; 1000; AVX512-LABEL: test_v32i8: 1001; AVX512: # %bb.0: 1002; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1003; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1004; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1005; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1006; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1007; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1008; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 1009; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1010; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1011; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1012; AVX512-NEXT: vmovd %xmm0, %eax 1013; AVX512-NEXT: # kill: def $al killed $al killed $eax 1014; AVX512-NEXT: vzeroupper 1015; AVX512-NEXT: retq 1016 %1 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a0) 1017 ret i8 %1 1018} 1019 1020define i8 @test_v64i8(<64 x i8> %a0) nounwind { 1021; X86-SSE-LABEL: test_v64i8: 1022; X86-SSE: # %bb.0: 1023; X86-SSE-NEXT: pushl %ebp 1024; X86-SSE-NEXT: movl %esp, %ebp 1025; X86-SSE-NEXT: andl $-16, %esp 1026; X86-SSE-NEXT: subl $16, %esp 1027; X86-SSE-NEXT: pxor %xmm2, %xmm0 1028; X86-SSE-NEXT: pxor 8(%ebp), %xmm1 1029; X86-SSE-NEXT: pxor %xmm0, %xmm1 1030; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1031; X86-SSE-NEXT: pxor %xmm1, %xmm0 1032; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1033; X86-SSE-NEXT: pxor %xmm0, %xmm1 1034; X86-SSE-NEXT: movdqa %xmm1, %xmm0 1035; X86-SSE-NEXT: psrld $16, %xmm0 1036; X86-SSE-NEXT: pxor %xmm1, %xmm0 1037; X86-SSE-NEXT: movdqa %xmm0, %xmm1 1038; X86-SSE-NEXT: psrlw $8, %xmm1 1039; X86-SSE-NEXT: pxor %xmm0, %xmm1 1040; X86-SSE-NEXT: movd %xmm1, %eax 1041; X86-SSE-NEXT: # kill: def $al killed $al killed $eax 1042; X86-SSE-NEXT: movl %ebp, %esp 1043; X86-SSE-NEXT: popl %ebp 1044; X86-SSE-NEXT: retl 1045; 1046; X64-SSE-LABEL: test_v64i8: 1047; X64-SSE: # %bb.0: 1048; X64-SSE-NEXT: pxor %xmm3, %xmm1 1049; X64-SSE-NEXT: pxor %xmm2, %xmm0 1050; X64-SSE-NEXT: pxor %xmm1, %xmm0 1051; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1052; X64-SSE-NEXT: pxor %xmm0, %xmm1 1053; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1054; X64-SSE-NEXT: pxor %xmm1, %xmm0 1055; X64-SSE-NEXT: movdqa %xmm0, %xmm1 1056; X64-SSE-NEXT: psrld $16, %xmm1 1057; X64-SSE-NEXT: pxor %xmm0, %xmm1 1058; X64-SSE-NEXT: movdqa %xmm1, %xmm0 1059; X64-SSE-NEXT: psrlw $8, %xmm0 1060; X64-SSE-NEXT: pxor %xmm1, %xmm0 1061; X64-SSE-NEXT: movd %xmm0, %eax 1062; X64-SSE-NEXT: # kill: def $al killed $al killed $eax 1063; X64-SSE-NEXT: retq 1064; 1065; AVX1-LABEL: test_v64i8: 1066; AVX1: # %bb.0: 1067; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 1068; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1069; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 1070; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1071; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 1072; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1] 1073; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 1074; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1075; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1076; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1077; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1078; AVX1-NEXT: vmovd %xmm0, %eax 1079; AVX1-NEXT: # kill: def $al killed $al killed $eax 1080; AVX1-NEXT: vzeroupper 1081; AVX1-NEXT: retq 1082; 1083; AVX2-LABEL: test_v64i8: 1084; AVX2: # %bb.0: 1085; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1086; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1087; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1088; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1089; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1090; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1091; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1092; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1093; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1094; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1095; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1096; AVX2-NEXT: vmovd %xmm0, %eax 1097; AVX2-NEXT: # kill: def $al killed $al killed $eax 1098; AVX2-NEXT: vzeroupper 1099; AVX2-NEXT: retq 1100; 1101; AVX512-LABEL: test_v64i8: 1102; AVX512: # %bb.0: 1103; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1104; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 1105; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1106; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1107; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1108; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1109; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1110; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1111; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 1112; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1113; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1114; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1115; AVX512-NEXT: vmovd %xmm0, %eax 1116; AVX512-NEXT: # kill: def $al killed $al killed $eax 1117; AVX512-NEXT: vzeroupper 1118; AVX512-NEXT: retq 1119 %1 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> %a0) 1120 ret i8 %1 1121} 1122 1123define i8 @test_v128i8(<128 x i8> %a0) nounwind { 1124; X86-SSE-LABEL: test_v128i8: 1125; X86-SSE: # %bb.0: 1126; X86-SSE-NEXT: pushl %ebp 1127; X86-SSE-NEXT: movl %esp, %ebp 1128; X86-SSE-NEXT: andl $-16, %esp 1129; X86-SSE-NEXT: subl $16, %esp 1130; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3 1131; X86-SSE-NEXT: pxor 56(%ebp), %xmm2 1132; X86-SSE-NEXT: pxor 24(%ebp), %xmm0 1133; X86-SSE-NEXT: pxor %xmm2, %xmm0 1134; X86-SSE-NEXT: pxor 72(%ebp), %xmm3 1135; X86-SSE-NEXT: pxor 40(%ebp), %xmm1 1136; X86-SSE-NEXT: pxor %xmm3, %xmm1 1137; X86-SSE-NEXT: pxor %xmm0, %xmm1 1138; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1139; X86-SSE-NEXT: pxor %xmm1, %xmm0 1140; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1141; X86-SSE-NEXT: pxor %xmm0, %xmm1 1142; X86-SSE-NEXT: movdqa %xmm1, %xmm0 1143; X86-SSE-NEXT: psrld $16, %xmm0 1144; X86-SSE-NEXT: pxor %xmm1, %xmm0 1145; X86-SSE-NEXT: movdqa %xmm0, %xmm1 1146; X86-SSE-NEXT: psrlw $8, %xmm1 1147; X86-SSE-NEXT: pxor %xmm0, %xmm1 1148; X86-SSE-NEXT: movd %xmm1, %eax 1149; X86-SSE-NEXT: # kill: def $al killed $al killed $eax 1150; X86-SSE-NEXT: movl %ebp, %esp 1151; X86-SSE-NEXT: popl %ebp 1152; X86-SSE-NEXT: retl 1153; 1154; X64-SSE-LABEL: test_v128i8: 1155; X64-SSE: # %bb.0: 1156; X64-SSE-NEXT: pxor %xmm6, %xmm2 1157; X64-SSE-NEXT: pxor %xmm4, %xmm0 1158; X64-SSE-NEXT: pxor %xmm2, %xmm0 1159; X64-SSE-NEXT: pxor %xmm7, %xmm3 1160; X64-SSE-NEXT: pxor %xmm5, %xmm1 1161; X64-SSE-NEXT: pxor %xmm3, %xmm1 1162; X64-SSE-NEXT: pxor %xmm0, %xmm1 1163; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1164; X64-SSE-NEXT: pxor %xmm1, %xmm0 1165; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1166; X64-SSE-NEXT: pxor %xmm0, %xmm1 1167; X64-SSE-NEXT: movdqa %xmm1, %xmm0 1168; X64-SSE-NEXT: psrld $16, %xmm0 1169; X64-SSE-NEXT: pxor %xmm1, %xmm0 1170; X64-SSE-NEXT: movdqa %xmm0, %xmm1 1171; X64-SSE-NEXT: psrlw $8, %xmm1 1172; X64-SSE-NEXT: pxor %xmm0, %xmm1 1173; X64-SSE-NEXT: movd %xmm1, %eax 1174; X64-SSE-NEXT: # kill: def $al killed $al killed $eax 1175; X64-SSE-NEXT: retq 1176; 1177; AVX1-LABEL: test_v128i8: 1178; AVX1: # %bb.0: 1179; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 1180; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 1181; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 1182; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1183; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 1184; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1185; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 1186; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[1,1,1,1] 1187; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 1188; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1189; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1190; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1191; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1192; AVX1-NEXT: vmovd %xmm0, %eax 1193; AVX1-NEXT: # kill: def $al killed $al killed $eax 1194; AVX1-NEXT: vzeroupper 1195; AVX1-NEXT: retq 1196; 1197; AVX2-LABEL: test_v128i8: 1198; AVX2: # %bb.0: 1199; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 1200; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 1201; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1202; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1203; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1204; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1205; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1206; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1207; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1208; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1209; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1210; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1211; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1212; AVX2-NEXT: vmovd %xmm0, %eax 1213; AVX2-NEXT: # kill: def $al killed $al killed $eax 1214; AVX2-NEXT: vzeroupper 1215; AVX2-NEXT: retq 1216; 1217; AVX512-LABEL: test_v128i8: 1218; AVX512: # %bb.0: 1219; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 1220; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1221; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 1222; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1223; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1224; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1225; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1226; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1227; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1228; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 1229; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1230; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1231; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1232; AVX512-NEXT: vmovd %xmm0, %eax 1233; AVX512-NEXT: # kill: def $al killed $al killed $eax 1234; AVX512-NEXT: vzeroupper 1235; AVX512-NEXT: retq 1236 %1 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> %a0) 1237 ret i8 %1 1238} 1239 1240declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) 1241declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) 1242declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>) 1243declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) 1244 1245declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) 1246declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) 1247declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) 1248declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) 1249declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>) 1250 1251declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>) 1252declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) 1253declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) 1254declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) 1255declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>) 1256declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>) 1257 1258declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>) 1259declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>) 1260declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) 1261declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) 1262declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) 1263declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>) 1264declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>) 1265