xref: /llvm-project/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll (revision f6947e479e14e7904aa0b2539a95f5dfdc8f9295)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
3
4define <8 x i8> @extract_2_v4i16(<4 x i16> %a, <4 x i16> %b) {
5; CHECK-LABEL: extract_2_v4i16:
6; CHECK:       // %bb.0: // %entry
7; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
8; CHECK-NEXT:    ret
9entry:
10  %a0 = extractelement <4 x i16> %a, i32 0
11  %a1 = extractelement <4 x i16> %a, i32 1
12  %a2 = extractelement <4 x i16> %a, i32 2
13  %a3 = extractelement <4 x i16> %a, i32 3
14  %b0 = extractelement <4 x i16> %b, i32 0
15  %b1 = extractelement <4 x i16> %b, i32 1
16  %b2 = extractelement <4 x i16> %b, i32 2
17  %b3 = extractelement <4 x i16> %b, i32 3
18  %t0 = trunc i16 %a0 to i8
19  %t1 = trunc i16 %a1 to i8
20  %t2 = trunc i16 %a2 to i8
21  %t3 = trunc i16 %a3 to i8
22  %t4 = trunc i16 %b0 to i8
23  %t5 = trunc i16 %b1 to i8
24  %t6 = trunc i16 %b2 to i8
25  %t7 = trunc i16 %b3 to i8
26  %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0
27  %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1
28  %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2
29  %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3
30  %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4
31  %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5
32  %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6
33  %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7
34  ret <8 x i8> %i7
35}
36
37define <8 x i8> @extract_2_v4i32(<4 x i32> %a, <4 x i32> %b) {
38; CHECK-LABEL: extract_2_v4i32:
39; CHECK:       // %bb.0: // %entry
40; CHECK-NEXT:    mov w8, v0.s[1]
41; CHECK-NEXT:    mov w9, v0.s[2]
42; CHECK-NEXT:    mov w10, v0.s[3]
43; CHECK-NEXT:    mov v0.b[1], w8
44; CHECK-NEXT:    mov w8, v1.s[1]
45; CHECK-NEXT:    mov v0.b[2], w9
46; CHECK-NEXT:    mov w9, v1.s[2]
47; CHECK-NEXT:    mov v0.b[3], w10
48; CHECK-NEXT:    mov v0.b[4], v1.b[0]
49; CHECK-NEXT:    mov v0.b[5], w8
50; CHECK-NEXT:    mov w8, v1.s[3]
51; CHECK-NEXT:    mov v0.b[6], w9
52; CHECK-NEXT:    mov v0.b[7], w8
53; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
54; CHECK-NEXT:    ret
55entry:
56  %a0 = extractelement <4 x i32> %a, i32 0
57  %a1 = extractelement <4 x i32> %a, i32 1
58  %a2 = extractelement <4 x i32> %a, i32 2
59  %a3 = extractelement <4 x i32> %a, i32 3
60  %b0 = extractelement <4 x i32> %b, i32 0
61  %b1 = extractelement <4 x i32> %b, i32 1
62  %b2 = extractelement <4 x i32> %b, i32 2
63  %b3 = extractelement <4 x i32> %b, i32 3
64  %t0 = trunc i32 %a0 to i8
65  %t1 = trunc i32 %a1 to i8
66  %t2 = trunc i32 %a2 to i8
67  %t3 = trunc i32 %a3 to i8
68  %t4 = trunc i32 %b0 to i8
69  %t5 = trunc i32 %b1 to i8
70  %t6 = trunc i32 %b2 to i8
71  %t7 = trunc i32 %b3 to i8
72  %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0
73  %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1
74  %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2
75  %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3
76  %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4
77  %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5
78  %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6
79  %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7
80  ret <8 x i8> %i7
81}
82
83define <16 x i8> @extract_4_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
84; CHECK-LABEL: extract_4_v4i16:
85; CHECK:       // %bb.0: // %entry
86; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
87; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
88; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
89; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
90; CHECK-NEXT:    mov v2.d[1], v3.d[0]
91; CHECK-NEXT:    mov v0.d[1], v1.d[0]
92; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
93; CHECK-NEXT:    ret
94entry:
95  %a0 = extractelement <4 x i16> %a, i32 0
96  %a1 = extractelement <4 x i16> %a, i32 1
97  %a2 = extractelement <4 x i16> %a, i32 2
98  %a3 = extractelement <4 x i16> %a, i32 3
99  %b0 = extractelement <4 x i16> %b, i32 0
100  %b1 = extractelement <4 x i16> %b, i32 1
101  %b2 = extractelement <4 x i16> %b, i32 2
102  %b3 = extractelement <4 x i16> %b, i32 3
103  %c0 = extractelement <4 x i16> %c, i32 0
104  %c1 = extractelement <4 x i16> %c, i32 1
105  %c2 = extractelement <4 x i16> %c, i32 2
106  %c3 = extractelement <4 x i16> %c, i32 3
107  %d0 = extractelement <4 x i16> %d, i32 0
108  %d1 = extractelement <4 x i16> %d, i32 1
109  %d2 = extractelement <4 x i16> %d, i32 2
110  %d3 = extractelement <4 x i16> %d, i32 3
111  %t0 = trunc i16 %a0 to i8
112  %t1 = trunc i16 %a1 to i8
113  %t2 = trunc i16 %a2 to i8
114  %t3 = trunc i16 %a3 to i8
115  %t4 = trunc i16 %b0 to i8
116  %t5 = trunc i16 %b1 to i8
117  %t6 = trunc i16 %b2 to i8
118  %t7 = trunc i16 %b3 to i8
119  %t8 = trunc i16 %c0 to i8
120  %t9 = trunc i16 %c1 to i8
121  %t10 = trunc i16 %c2 to i8
122  %t11 = trunc i16 %c3 to i8
123  %t12 = trunc i16 %d0 to i8
124  %t13 = trunc i16 %d1 to i8
125  %t14 = trunc i16 %d2 to i8
126  %t15 = trunc i16 %d3 to i8
127  %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
128  %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
129  %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
130  %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
131  %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
132  %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
133  %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
134  %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
135  %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
136  %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
137  %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
138  %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
139  %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
140  %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
141  %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
142  %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
143  ret <16 x i8> %i15
144}
145
146define <16 x i8> @extract_4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
147; CHECK-LABEL: extract_4_v4i32:
148; CHECK:       // %bb.0: // %entry
149; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
150; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
151; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
152; CHECK-NEXT:    ret
153entry:
154  %a0 = extractelement <4 x i32> %a, i32 0
155  %a1 = extractelement <4 x i32> %a, i32 1
156  %a2 = extractelement <4 x i32> %a, i32 2
157  %a3 = extractelement <4 x i32> %a, i32 3
158  %b0 = extractelement <4 x i32> %b, i32 0
159  %b1 = extractelement <4 x i32> %b, i32 1
160  %b2 = extractelement <4 x i32> %b, i32 2
161  %b3 = extractelement <4 x i32> %b, i32 3
162  %c0 = extractelement <4 x i32> %c, i32 0
163  %c1 = extractelement <4 x i32> %c, i32 1
164  %c2 = extractelement <4 x i32> %c, i32 2
165  %c3 = extractelement <4 x i32> %c, i32 3
166  %d0 = extractelement <4 x i32> %d, i32 0
167  %d1 = extractelement <4 x i32> %d, i32 1
168  %d2 = extractelement <4 x i32> %d, i32 2
169  %d3 = extractelement <4 x i32> %d, i32 3
170  %t0 = trunc i32 %a0 to i8
171  %t1 = trunc i32 %a1 to i8
172  %t2 = trunc i32 %a2 to i8
173  %t3 = trunc i32 %a3 to i8
174  %t4 = trunc i32 %b0 to i8
175  %t5 = trunc i32 %b1 to i8
176  %t6 = trunc i32 %b2 to i8
177  %t7 = trunc i32 %b3 to i8
178  %t8 = trunc i32 %c0 to i8
179  %t9 = trunc i32 %c1 to i8
180  %t10 = trunc i32 %c2 to i8
181  %t11 = trunc i32 %c3 to i8
182  %t12 = trunc i32 %d0 to i8
183  %t13 = trunc i32 %d1 to i8
184  %t14 = trunc i32 %d2 to i8
185  %t15 = trunc i32 %d3 to i8
186  %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
187  %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
188  %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
189  %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
190  %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
191  %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
192  %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
193  %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
194  %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
195  %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
196  %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
197  %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
198  %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
199  %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
200  %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
201  %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
202  ret <16 x i8> %i15
203}
204
205define <16 x i8> @extract_4_mixed(<4 x i16> %a, <4 x i32> %b, <4 x i32> %c, <4 x i16> %d) {
206; CHECK-LABEL: extract_4_mixed:
207; CHECK:       // %bb.0: // %entry
208; CHECK-NEXT:    xtn v2.4h, v2.4s
209; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
210; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
211; CHECK-NEXT:    xtn2 v0.8h, v1.4s
212; CHECK-NEXT:    mov v2.d[1], v3.d[0]
213; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
214; CHECK-NEXT:    ret
215entry:
216  %a0 = extractelement <4 x i16> %a, i32 0
217  %a1 = extractelement <4 x i16> %a, i32 1
218  %a2 = extractelement <4 x i16> %a, i32 2
219  %a3 = extractelement <4 x i16> %a, i32 3
220  %b0 = extractelement <4 x i32> %b, i32 0
221  %b1 = extractelement <4 x i32> %b, i32 1
222  %b2 = extractelement <4 x i32> %b, i32 2
223  %b3 = extractelement <4 x i32> %b, i32 3
224  %c0 = extractelement <4 x i32> %c, i32 0
225  %c1 = extractelement <4 x i32> %c, i32 1
226  %c2 = extractelement <4 x i32> %c, i32 2
227  %c3 = extractelement <4 x i32> %c, i32 3
228  %d0 = extractelement <4 x i16> %d, i32 0
229  %d1 = extractelement <4 x i16> %d, i32 1
230  %d2 = extractelement <4 x i16> %d, i32 2
231  %d3 = extractelement <4 x i16> %d, i32 3
232  %t0 = trunc i16 %a0 to i8
233  %t1 = trunc i16 %a1 to i8
234  %t2 = trunc i16 %a2 to i8
235  %t3 = trunc i16 %a3 to i8
236  %t4 = trunc i32 %b0 to i8
237  %t5 = trunc i32 %b1 to i8
238  %t6 = trunc i32 %b2 to i8
239  %t7 = trunc i32 %b3 to i8
240  %t8 = trunc i32 %c0 to i8
241  %t9 = trunc i32 %c1 to i8
242  %t10 = trunc i32 %c2 to i8
243  %t11 = trunc i32 %c3 to i8
244  %t12 = trunc i16 %d0 to i8
245  %t13 = trunc i16 %d1 to i8
246  %t14 = trunc i16 %d2 to i8
247  %t15 = trunc i16 %d3 to i8
248  %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
249  %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
250  %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
251  %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
252  %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
253  %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
254  %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
255  %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
256  %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
257  %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
258  %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
259  %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
260  %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
261  %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
262  %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
263  %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
264  ret <16 x i8> %i15
265}
266
267define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
268; CHECK-LABEL: extract_4_v4i32_badindex:
269; CHECK:       // %bb.0: // %entry
270; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
271; CHECK-NEXT:    adrp x8, .LCPI5_0
272; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
273; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI5_0]
274; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
275; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
276; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
277; CHECK-NEXT:    ret
278entry:
279  %a0 = extractelement <4 x i32> %a, i32 0
280  %a1 = extractelement <4 x i32> %a, i32 1
281  %a2 = extractelement <4 x i32> %a, i32 2
282  %a3 = extractelement <4 x i32> %a, i32 3
283  %b0 = extractelement <4 x i32> %b, i32 0
284  %b1 = extractelement <4 x i32> %b, i32 2
285  %b2 = extractelement <4 x i32> %b, i32 1
286  %b3 = extractelement <4 x i32> %b, i32 3
287  %c0 = extractelement <4 x i32> %c, i32 0
288  %c1 = extractelement <4 x i32> %c, i32 1
289  %c2 = extractelement <4 x i32> %c, i32 2
290  %c3 = extractelement <4 x i32> %c, i32 3
291  %d0 = extractelement <4 x i32> %d, i32 0
292  %d1 = extractelement <4 x i32> %d, i32 1
293  %d2 = extractelement <4 x i32> %d, i32 2
294  %d3 = extractelement <4 x i32> %d, i32 3
295  %t0 = trunc i32 %a0 to i8
296  %t1 = trunc i32 %a1 to i8
297  %t2 = trunc i32 %a2 to i8
298  %t3 = trunc i32 %a3 to i8
299  %t4 = trunc i32 %b0 to i8
300  %t5 = trunc i32 %b1 to i8
301  %t6 = trunc i32 %b2 to i8
302  %t7 = trunc i32 %b3 to i8
303  %t8 = trunc i32 %c0 to i8
304  %t9 = trunc i32 %c1 to i8
305  %t10 = trunc i32 %c2 to i8
306  %t11 = trunc i32 %c3 to i8
307  %t12 = trunc i32 %d0 to i8
308  %t13 = trunc i32 %d1 to i8
309  %t14 = trunc i32 %d2 to i8
310  %t15 = trunc i32 %d3 to i8
311  %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
312  %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
313  %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
314  %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
315  %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
316  %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
317  %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
318  %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
319  %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
320  %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
321  %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
322  %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
323  %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
324  %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
325  %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
326  %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
327  ret <16 x i8> %i15
328}
329
330define <16 x i8> @extract_4_v4i32_one(<4 x i32> %a) {
331; CHECK-LABEL: extract_4_v4i32_one:
332; CHECK:       // %bb.0: // %entry
333; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
334; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v0.16b
335; CHECK-NEXT:    ret
336entry:
337  %a0 = extractelement <4 x i32> %a, i32 0
338  %a1 = extractelement <4 x i32> %a, i32 1
339  %a2 = extractelement <4 x i32> %a, i32 2
340  %a3 = extractelement <4 x i32> %a, i32 3
341  %t0 = trunc i32 %a0 to i8
342  %t1 = trunc i32 %a1 to i8
343  %t2 = trunc i32 %a2 to i8
344  %t3 = trunc i32 %a3 to i8
345  %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
346  %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
347  %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
348  %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
349  %i4 = insertelement <16 x i8> %i3, i8 %t0, i32 4
350  %i5 = insertelement <16 x i8> %i4, i8 %t1, i32 5
351  %i6 = insertelement <16 x i8> %i5, i8 %t2, i32 6
352  %i7 = insertelement <16 x i8> %i6, i8 %t3, i32 7
353  %i8 = insertelement <16 x i8> %i7, i8 %t0, i32 8
354  %i9 = insertelement <16 x i8> %i8, i8 %t1, i32 9
355  %i10 = insertelement <16 x i8> %i9, i8 %t2, i32 10
356  %i11 = insertelement <16 x i8> %i10, i8 %t3, i32 11
357  %i12 = insertelement <16 x i8> %i11, i8 %t0, i32 12
358  %i13 = insertelement <16 x i8> %i12, i8 %t1, i32 13
359  %i14 = insertelement <16 x i8> %i13, i8 %t2, i32 14
360  %i15 = insertelement <16 x i8> %i14, i8 %t3, i32 15
361  ret <16 x i8> %i15
362}
363
364