xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll (revision 643c38333fc1b1e7e705e6e1035c595bbd95bc74)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s
3
4define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv4f16(<vscale x 4 x half> %vec) {
5; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
8; CHECK-NEXT:    uzp2 z2.s, z0.s, z0.s
9; CHECK-NEXT:    uunpklo z0.d, z1.s
10; CHECK-NEXT:    uunpklo z1.d, z2.s
11; CHECK-NEXT:    ret
12  %retval = call {<vscale x 2 x half>, <vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %vec)
13  ret {<vscale x 2 x half>, <vscale x 2 x half>}   %retval
14}
15
16define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
17; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
20; CHECK-NEXT:    uzp2 z2.h, z0.h, z0.h
21; CHECK-NEXT:    uunpklo z0.s, z1.h
22; CHECK-NEXT:    uunpklo z1.s, z2.h
23; CHECK-NEXT:    ret
24  %retval = call {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %vec)
25  ret {<vscale x 4 x half>, <vscale x 4 x half>}   %retval
26}
27
28define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
29; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    uzp1 z2.h, z0.h, z1.h
32; CHECK-NEXT:    uzp2 z1.h, z0.h, z1.h
33; CHECK-NEXT:    mov z0.d, z2.d
34; CHECK-NEXT:    ret
35  %retval = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %vec)
36  ret {<vscale x 8 x half>, <vscale x 8 x half>}   %retval
37}
38
39define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32_nxv4f32(<vscale x 4 x float> %vec) {
40; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv4f32:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
43; CHECK-NEXT:    uzp2 z2.s, z0.s, z0.s
44; CHECK-NEXT:    uunpklo z0.d, z1.s
45; CHECK-NEXT:    uunpklo z1.d, z2.s
46; CHECK-NEXT:    ret
47  %retval = call {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float> %vec)
48  ret {<vscale x 2 x float>, <vscale x 2 x float>}   %retval
49}
50
51define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32_nxv8f32(<vscale x 8 x float> %vec) {
52; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv8f32:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    uzp1 z2.s, z0.s, z1.s
55; CHECK-NEXT:    uzp2 z1.s, z0.s, z1.s
56; CHECK-NEXT:    mov z0.d, z2.d
57; CHECK-NEXT:    ret
58  %retval = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %vec)
59ret  {<vscale x 4 x float>, <vscale x 4 x float>}   %retval
60}
61
62define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f64_nxv4f64(<vscale x 4 x double> %vec) {
63; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    uzp1 z2.d, z0.d, z1.d
66; CHECK-NEXT:    uzp2 z1.d, z0.d, z1.d
67; CHECK-NEXT:    mov z0.d, z2.d
68; CHECK-NEXT:    ret
69  %retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
70  ret {<vscale x 2 x double>, <vscale x 2 x double>}   %retval
71}
72
73; Integers
74
75define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv32i8(<vscale x 32 x i8> %vec) {
76; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv32i8:
77; CHECK:       // %bb.0:
78; CHECK-NEXT:    uzp1 z2.b, z0.b, z1.b
79; CHECK-NEXT:    uzp2 z1.b, z0.b, z1.b
80; CHECK-NEXT:    mov z0.d, z2.d
81; CHECK-NEXT:    ret
82  %retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
83  ret {<vscale x 16 x i8>, <vscale x 16 x i8>}   %retval
84}
85
86define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv16i16(<vscale x 16 x i16> %vec) {
87; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv16i16:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    uzp1 z2.h, z0.h, z1.h
90; CHECK-NEXT:    uzp2 z1.h, z0.h, z1.h
91; CHECK-NEXT:    mov z0.d, z2.d
92; CHECK-NEXT:    ret
93  %retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %vec)
94  ret {<vscale x 8 x i16>, <vscale x 8 x i16>}   %retval
95}
96
97define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxvv8i32(<vscale x 8 x i32> %vec) {
98; CHECK-LABEL: vector_deinterleave_nxv4i32_nxvv8i32:
99; CHECK:       // %bb.0:
100; CHECK-NEXT:    uzp1 z2.s, z0.s, z1.s
101; CHECK-NEXT:    uzp2 z1.s, z0.s, z1.s
102; CHECK-NEXT:    mov z0.d, z2.d
103; CHECK-NEXT:    ret
104  %retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %vec)
105  ret {<vscale x 4 x i32>, <vscale x 4 x i32>}   %retval
106}
107
108define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv4i64(<vscale x 4 x i64> %vec) {
109; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    uzp1 z2.d, z0.d, z1.d
112; CHECK-NEXT:    uzp2 z1.d, z0.d, z1.d
113; CHECK-NEXT:    mov z0.d, z2.d
114; CHECK-NEXT:    ret
115  %retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
116  ret {<vscale x 2 x i64>, <vscale x 2 x i64>}   %retval
117}
118
119; Predicated
120define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
121; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    uzp1 p2.b, p0.b, p1.b
124; CHECK-NEXT:    uzp2 p1.b, p0.b, p1.b
125; CHECK-NEXT:    mov p0.b, p2.b
126; CHECK-NEXT:    ret
127  %retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
128  ret {<vscale x 16 x i1>, <vscale x 16 x i1>}   %retval
129}
130
131define {<vscale x 8 x i1>, <vscale x 8 x i1>} @vector_deinterleave_nxv8i1_nxv16i1(<vscale x 16 x i1> %vec) {
132; CHECK-LABEL: vector_deinterleave_nxv8i1_nxv16i1:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    uzp1 p1.b, p0.b, p0.b
135; CHECK-NEXT:    uzp2 p2.b, p0.b, p0.b
136; CHECK-NEXT:    punpklo p0.h, p1.b
137; CHECK-NEXT:    punpklo p1.h, p2.b
138; CHECK-NEXT:    ret
139  %retval = call {<vscale x 8 x i1>, <vscale x 8 x i1>} @llvm.vector.deinterleave2.nxv16i1(<vscale x 16 x i1> %vec)
140  ret {<vscale x 8 x i1>, <vscale x 8 x i1>}   %retval
141}
142
143define {<vscale x 4 x i1>, <vscale x 4 x i1>} @vector_deinterleave_nxv4i1_nxv8i1(<vscale x 8 x i1> %vec) {
144; CHECK-LABEL: vector_deinterleave_nxv4i1_nxv8i1:
145; CHECK:       // %bb.0:
146; CHECK-NEXT:    uzp1 p1.h, p0.h, p0.h
147; CHECK-NEXT:    uzp2 p2.h, p0.h, p0.h
148; CHECK-NEXT:    punpklo p0.h, p1.b
149; CHECK-NEXT:    punpklo p1.h, p2.b
150; CHECK-NEXT:    ret
151  %retval = call {<vscale x 4 x i1>, <vscale x 4 x i1>} @llvm.vector.deinterleave2.nxv8i1(<vscale x 8 x i1> %vec)
152  ret {<vscale x 4 x i1>, <vscale x 4 x i1>}   %retval
153}
154
155define {<vscale x 2 x i1>, <vscale x 2 x i1>} @vector_deinterleave_nxv2i1_nxv4i1(<vscale x 4 x i1> %vec) {
156; CHECK-LABEL: vector_deinterleave_nxv2i1_nxv4i1:
157; CHECK:       // %bb.0:
158; CHECK-NEXT:    uzp1 p1.s, p0.s, p0.s
159; CHECK-NEXT:    uzp2 p2.s, p0.s, p0.s
160; CHECK-NEXT:    punpklo p0.h, p1.b
161; CHECK-NEXT:    punpklo p1.h, p2.b
162; CHECK-NEXT:    ret
163  %retval = call {<vscale x 2 x i1>, <vscale x 2 x i1>} @llvm.vector.deinterleave2.nxv4i1(<vscale x 4 x i1> %vec)
164  ret {<vscale x 2 x i1>, <vscale x 2 x i1>}   %retval
165}
166
167
168; Split illegal types
169
170define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv8i64(<vscale x 8 x i64> %vec) {
171; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    uzp1 z4.d, z2.d, z3.d
174; CHECK-NEXT:    uzp1 z5.d, z0.d, z1.d
175; CHECK-NEXT:    uzp2 z6.d, z0.d, z1.d
176; CHECK-NEXT:    uzp2 z3.d, z2.d, z3.d
177; CHECK-NEXT:    mov z0.d, z5.d
178; CHECK-NEXT:    mov z1.d, z4.d
179; CHECK-NEXT:    mov z2.d, z6.d
180; CHECK-NEXT:    ret
181%retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
182ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
183}
184
185define {<vscale x 8 x i64>, <vscale x 8 x i64>}  @vector_deinterleave_nxv8i64_nxv16i64(<vscale x 16 x i64> %vec) {
186; CHECK-LABEL: vector_deinterleave_nxv8i64_nxv16i64:
187; CHECK:       // %bb.0:
188; CHECK-NEXT:    uzp1 z24.d, z2.d, z3.d
189; CHECK-NEXT:    uzp1 z25.d, z0.d, z1.d
190; CHECK-NEXT:    uzp1 z26.d, z4.d, z5.d
191; CHECK-NEXT:    uzp1 z27.d, z6.d, z7.d
192; CHECK-NEXT:    uzp2 z28.d, z0.d, z1.d
193; CHECK-NEXT:    uzp2 z29.d, z2.d, z3.d
194; CHECK-NEXT:    uzp2 z30.d, z4.d, z5.d
195; CHECK-NEXT:    uzp2 z7.d, z6.d, z7.d
196; CHECK-NEXT:    mov z0.d, z25.d
197; CHECK-NEXT:    mov z1.d, z24.d
198; CHECK-NEXT:    mov z2.d, z26.d
199; CHECK-NEXT:    mov z3.d, z27.d
200; CHECK-NEXT:    mov z4.d, z28.d
201; CHECK-NEXT:    mov z5.d, z29.d
202; CHECK-NEXT:    mov z6.d, z30.d
203; CHECK-NEXT:    ret
204%retval = call {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64> %vec)
205ret {<vscale x 8 x i64>, <vscale x 8 x i64>}  %retval
206}
207
208
209; Promote illegal type size
210
211define {<vscale x 8 x i8>, <vscale x 8 x i8>} @vector_deinterleave_nxv8i8_nxv16i8(<vscale x 16 x i8> %vec) {
212; CHECK-LABEL: vector_deinterleave_nxv8i8_nxv16i8:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    uunpkhi z1.h, z0.b
215; CHECK-NEXT:    uunpklo z2.h, z0.b
216; CHECK-NEXT:    uzp1 z0.h, z2.h, z1.h
217; CHECK-NEXT:    uzp2 z1.h, z2.h, z1.h
218; CHECK-NEXT:    ret
219%retval = call {<vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave2.nxv16i8(<vscale x 16 x i8> %vec)
220ret {<vscale x 8 x i8>, <vscale x 8 x i8>} %retval
221}
222
223define {<vscale x 4 x i16>, <vscale x 4 x i16>} @vector_deinterleave_nxv4i16_nxv8i16(<vscale x 8 x i16> %vec) {
224; CHECK-LABEL: vector_deinterleave_nxv4i16_nxv8i16:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    uunpkhi z1.s, z0.h
227; CHECK-NEXT:    uunpklo z2.s, z0.h
228; CHECK-NEXT:    uzp1 z0.s, z2.s, z1.s
229; CHECK-NEXT:    uzp2 z1.s, z2.s, z1.s
230; CHECK-NEXT:    ret
231%retval = call {<vscale x 4 x i16>, <vscale x 4 x i16>} @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %vec)
232ret {<vscale x 4 x i16>, <vscale x 4 x i16>} %retval
233}
234
235define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv4i32(<vscale x 4 x i32> %vec) {
236; CHECK-LABEL: vector_deinterleave_nxv2i32_nxv4i32:
237; CHECK:       // %bb.0:
238; CHECK-NEXT:    uunpkhi z1.d, z0.s
239; CHECK-NEXT:    uunpklo z2.d, z0.s
240; CHECK-NEXT:    uzp1 z0.d, z2.d, z1.d
241; CHECK-NEXT:    uzp2 z1.d, z2.d, z1.d
242; CHECK-NEXT:    ret
243%retval = call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %vec)
244ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
245}
246
247
248; Floating declarations
249declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
250declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)
251declare {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float>)
252declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half>)
253declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
254declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
255
256; Integer declarations
257declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
258declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
259declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
260declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>)
261
262; Predicated declarations
263declare {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.vector.deinterleave2.nxv32i1(<vscale x 32 x i1>)
264declare {<vscale x 8 x i1>, <vscale x 8 x i1>} @llvm.vector.deinterleave2.nxv16i1(<vscale x 16 x i1>)
265declare {<vscale x 4 x i1>, <vscale x 4 x i1>} @llvm.vector.deinterleave2.nxv8i1(<vscale x 8 x i1>)
266declare {<vscale x 2 x i1>, <vscale x 2 x i1>} @llvm.vector.deinterleave2.nxv4i1(<vscale x 4 x i1>)
267
268; Illegal size type
269declare {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64>)
270declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64>)
271
272declare {<vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave2.nxv16i8(<vscale x 16 x i8>)
273declare {<vscale x 4 x i16>, <vscale x 4 x i16>} @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16>)
274declare {<vscale x 2 x i32>, <vscale x 2 x i32>} @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32>)
275