xref: /llvm-project/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll (revision a45a903a2133c6b03882377f0632b79f24154d5c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX
4
5define <8 x i16> @blend_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
6; SSE-LABEL: blend_packusdw:
7; SSE:       # %bb.0:
8; SSE-NEXT:    packusdw %xmm2, %xmm0
9; SSE-NEXT:    retq
10;
11; AVX-LABEL: blend_packusdw:
12; AVX:       # %bb.0:
13; AVX-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
14; AVX-NEXT:    retq
15  %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
16  %p1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3)
17  %s0 = shufflevector <8 x i16> %p0, <8 x i16> %p1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
18  ret <8 x i16> %s0
19}
20
21define <16 x i8> @blend_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
22; SSE-LABEL: blend_packuswb:
23; SSE:       # %bb.0:
24; SSE-NEXT:    packuswb %xmm2, %xmm0
25; SSE-NEXT:    retq
26;
27; AVX-LABEL: blend_packuswb:
28; AVX:       # %bb.0:
29; AVX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
30; AVX-NEXT:    retq
31  %p0 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
32  %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
33  %s0 = shufflevector <16 x i8> %p0, <16 x i8> %p1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
34  ret <16 x i8> %s0
35}
36
37define <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16> %a2, <8 x i16> %a3) {
38; SSE-LABEL: blend_packusdw_packuswb:
39; SSE:       # %bb.0:
40; SSE-NEXT:    packusdw %xmm0, %xmm0
41; SSE-NEXT:    packuswb %xmm2, %xmm2
42; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
43; SSE-NEXT:    retq
44;
45; AVX-LABEL: blend_packusdw_packuswb:
46; AVX:       # %bb.0:
47; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
48; AVX-NEXT:    vpackuswb %xmm2, %xmm2, %xmm1
49; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
50; AVX-NEXT:    retq
51  %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
52  %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
53  %b1 = bitcast <16 x i8> %p1 to <8 x i16>
54  %s0 = shufflevector <8 x i16> %p0, <8 x i16> %b1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
55  ret <8 x i16> %s0
56}
57
58declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
59declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
60