xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-uzp.ll (revision f7018ba0eeaad8dc3e1917cfb986fc9689d72e85)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s
4
5define <8 x i8> @vuzpi8(<8 x i8> %A, <8 x i8> %B) nounwind {
6; CHECK-LABEL: vuzpi8:
7; CHECK:       // %bb.0:
8; CHECK-NEXT:    uzp1.8b v2, v0, v1
9; CHECK-NEXT:    uzp2.8b v0, v0, v1
10; CHECK-NEXT:    add.8b v0, v2, v0
11; CHECK-NEXT:    ret
12  %tmp3 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
13  %tmp4 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
14  %tmp5 = add <8 x i8> %tmp3, %tmp4
15  ret <8 x i8> %tmp5
16}
17
18define <4 x i16> @vuzpi16(<4 x i16> %A, <4 x i16> %B) nounwind {
19; CHECK-LABEL: vuzpi16:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    uzp1.4h v2, v0, v1
22; CHECK-NEXT:    uzp2.4h v0, v0, v1
23; CHECK-NEXT:    add.4h v0, v2, v0
24; CHECK-NEXT:    ret
25  %tmp3 = shufflevector <4 x i16> %A, <4 x i16> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
26  %tmp4 = shufflevector <4 x i16> %A, <4 x i16> %B, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
27  %tmp5 = add <4 x i16> %tmp3, %tmp4
28  ret <4 x i16> %tmp5
29}
30
31define <16 x i8> @vuzpQi8(<16 x i8> %A, <16 x i8> %B) nounwind {
32; CHECK-LABEL: vuzpQi8:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    uzp1.16b v2, v0, v1
35; CHECK-NEXT:    uzp2.16b v0, v0, v1
36; CHECK-NEXT:    eor.16b v0, v2, v0
37; CHECK-NEXT:    ret
38  %tmp3 = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
39  %tmp4 = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
40  %tmp5 = xor <16 x i8> %tmp3, %tmp4
41  ret <16 x i8> %tmp5
42}
43
44define <8 x i16> @vuzpQi16(<8 x i16> %A, <8 x i16> %B) nounwind {
45; CHECK-LABEL: vuzpQi16:
46; CHECK:       // %bb.0:
47; CHECK-NEXT:    uzp1.8h v2, v0, v1
48; CHECK-NEXT:    uzp2.8h v0, v0, v1
49; CHECK-NEXT:    eor.16b v0, v2, v0
50; CHECK-NEXT:    ret
51  %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
52  %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
53  %tmp5 = xor <8 x i16> %tmp3, %tmp4
54  ret <8 x i16> %tmp5
55}
56
57define <4 x i32> @vuzpQi32(<4 x i32> %A, <4 x i32> %B) nounwind {
58; CHECK-LABEL: vuzpQi32:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    uzp1.4s v2, v0, v1
61; CHECK-NEXT:    uzp2.4s v0, v0, v1
62; CHECK-NEXT:    eor.16b v0, v2, v0
63; CHECK-NEXT:    ret
64  %tmp3 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
65  %tmp4 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
66  %tmp5 = xor <4 x i32> %tmp3, %tmp4
67  ret <4 x i32> %tmp5
68}
69
70define <4 x float> @vuzpQf(<4 x float> %A, <4 x float> %B) nounwind {
71; CHECK-LABEL: vuzpQf:
72; CHECK:       // %bb.0:
73; CHECK-NEXT:    uzp1.4s v2, v0, v1
74; CHECK-NEXT:    uzp2.4s v0, v0, v1
75; CHECK-NEXT:    fsub.4s v0, v2, v0
76; CHECK-NEXT:    ret
77  %tmp3 = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
78  %tmp4 = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
79  %tmp5 = fsub <4 x float> %tmp3, %tmp4
80  ret <4 x float> %tmp5
81}
82
83; Undef shuffle indices should not prevent matching to VUZP:
84
85define <8 x i8> @vuzpi8_undef(<8 x i8> %A, <8 x i8> %B) nounwind {
86; CHECK-LABEL: vuzpi8_undef:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    uzp1.8b v2, v0, v1
89; CHECK-NEXT:    uzp2.8b v0, v0, v1
90; CHECK-NEXT:    add.8b v0, v2, v0
91; CHECK-NEXT:    ret
92  %tmp3 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
93  %tmp4 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
94  %tmp5 = add <8 x i8> %tmp3, %tmp4
95  ret <8 x i8> %tmp5
96}
97
98define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind {
99; CHECK-LABEL: vuzpQi16_undef1:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    uzp1.8h v2, v0, v1
102; CHECK-NEXT:    uzp2.8h v0, v0, v1
103; CHECK-NEXT:    eor.16b v0, v2, v0
104; CHECK-NEXT:    ret
105  %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
106  %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
107  %tmp5 = xor <8 x i16> %tmp3, %tmp4
108  ret <8 x i16> %tmp5
109}
110
111define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
112; CHECK-LABEL: vuzpQi16_undef0:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    uzp1.8h v2, v0, v1
115; CHECK-NEXT:    uzp2.8h v0, v0, v1
116; CHECK-NEXT:    eor.16b v0, v2, v0
117; CHECK-NEXT:    ret
118  %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
119  %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
120  %tmp5 = xor <8 x i16> %tmp3, %tmp4
121  ret <8 x i16> %tmp5
122}
123
124define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
125; CHECK-LABEL: vuzpQi16_undef01:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    uzp1.8h v2, v0, v1
128; CHECK-NEXT:    uzp2.8h v0, v0, v1
129; CHECK-NEXT:    eor.16b v0, v2, v0
130; CHECK-NEXT:    ret
131  %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
132  %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
133  %tmp5 = xor <8 x i16> %tmp3, %tmp4
134  ret <8 x i16> %tmp5
135}
136
137define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind {
138; CHECK-LABEL: vuzpQi16_undef012:
139; CHECK:       // %bb.0:
140; CHECK-NEXT:    uzp1.8h v2, v0, v1
141; CHECK-NEXT:    uzp2.8h v0, v0, v1
142; CHECK-NEXT:    eor.16b v0, v2, v0
143; CHECK-NEXT:    ret
144  %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14>
145  %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15>
146  %tmp5 = xor <8 x i16> %tmp3, %tmp4
147  ret <8 x i16> %tmp5
148}
149