xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-sext.ll (revision ca78151001d80d0fd1a2a6db4742f5f673572f7c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i1(<4 x i32> %m) {
5; CHECK-LABEL: sext_v4i32_v4i32_v4i1:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vshl.i32 q0, q0, #31
8; CHECK-NEXT:    vshr.s32 q0, q0, #31
9; CHECK-NEXT:    bx lr
10entry:
11  %shl = shl <4 x i32> %m, <i32 31, i32 31, i32 31, i32 31>
12  %shr = ashr exact <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
13  ret <4 x i32> %shr
14}
15
16define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i8(<4 x i32> %m) {
17; CHECK-LABEL: sext_v4i32_v4i32_v4i8:
18; CHECK:       @ %bb.0: @ %entry
19; CHECK-NEXT:    vmovlb.s8 q0, q0
20; CHECK-NEXT:    vmovlb.s16 q0, q0
21; CHECK-NEXT:    bx lr
22entry:
23  %shl = shl <4 x i32> %m, <i32 24, i32 24, i32 24, i32 24>
24  %shr = ashr exact <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
25  ret <4 x i32> %shr
26}
27
28define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i16(<4 x i32> %m) {
29; CHECK-LABEL: sext_v4i32_v4i32_v4i16:
30; CHECK:       @ %bb.0: @ %entry
31; CHECK-NEXT:    vmovlb.s16 q0, q0
32; CHECK-NEXT:    bx lr
33entry:
34  %shl = shl <4 x i32> %m, <i32 16, i32 16, i32 16, i32 16>
35  %shr = ashr exact <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
36  ret <4 x i32> %shr
37}
38
39define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i8(<8 x i16> %m) {
40; CHECK-LABEL: sext_v8i16_v8i16_v8i8:
41; CHECK:       @ %bb.0: @ %entry
42; CHECK-NEXT:    vmovlb.s8 q0, q0
43; CHECK-NEXT:    bx lr
44entry:
45  %shl = shl <8 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
46  %shr = ashr exact <8 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
47  ret <8 x i16> %shr
48}
49
50define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i1(<8 x i16> %m) {
51; CHECK-LABEL: sext_v8i16_v8i16_v8i1:
52; CHECK:       @ %bb.0: @ %entry
53; CHECK-NEXT:    vshl.i16 q0, q0, #15
54; CHECK-NEXT:    vshr.s16 q0, q0, #15
55; CHECK-NEXT:    bx lr
56entry:
57  %shl = shl <8 x i16> %m, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
58  %shr = ashr exact <8 x i16> %shl, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
59  ret <8 x i16> %shr
60}
61
62define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i32(<2 x i64> %m) {
63; CHECK-LABEL: sext_v2i64_v2i64_v2i32:
64; CHECK:       @ %bb.0: @ %entry
65; CHECK-NEXT:    vmov r0, s2
66; CHECK-NEXT:    vmov r1, s0
67; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
68; CHECK-NEXT:    asrs r0, r0, #31
69; CHECK-NEXT:    asrs r1, r1, #31
70; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
71; CHECK-NEXT:    bx lr
72entry:
73  %shl = shl <2 x i64> %m, <i64 32, i64 32>
74  %shr = ashr exact <2 x i64> %shl, <i64 32, i64 32>
75  ret <2 x i64> %shr
76}
77
78define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) {
79; CHECK-LABEL: sext_v2i64_v2i64_v2i35:
80; CHECK:       @ %bb.0: @ %entry
81; CHECK-NEXT:    vmov r0, r1, d1
82; CHECK-NEXT:    vmov r2, r3, d0
83; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
84; CHECK-NEXT:    sbfx r0, r1, #0, #3
85; CHECK-NEXT:    sbfx r1, r3, #0, #3
86; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
87; CHECK-NEXT:    bx lr
88entry:
89  %shl = shl <2 x i64> %m, <i64 29, i64 29>
90  %shr = ashr exact <2 x i64> %shl, <i64 29, i64 29>
91  ret <2 x i64> %shr
92}
93
94define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) {
95; CHECK-LABEL: sext_v8i8_v8i16:
96; CHECK:       @ %bb.0: @ %entry
97; CHECK-NEXT:    vmovlb.s8 q0, q0
98; CHECK-NEXT:    bx lr
99entry:
100  %0 = sext <8 x i8> %src to <8 x i16>
101  ret <8 x i16> %0
102}
103
104define arm_aapcs_vfpcc <4 x i32> @sext_v4i16_v4i32(<4 x i16> %src) {
105; CHECK-LABEL: sext_v4i16_v4i32:
106; CHECK:       @ %bb.0: @ %entry
107; CHECK-NEXT:    vmovlb.s16 q0, q0
108; CHECK-NEXT:    bx lr
109entry:
110  %0 = sext <4 x i16> %src to <4 x i32>
111  ret <4 x i32> %0
112}
113
114define arm_aapcs_vfpcc <4 x i32> @sext_v4i8_v4i32(<4 x i8> %src) {
115; CHECK-LABEL: sext_v4i8_v4i32:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vmovlb.s8 q0, q0
118; CHECK-NEXT:    vmovlb.s16 q0, q0
119; CHECK-NEXT:    bx lr
120entry:
121  %0 = sext <4 x i8> %src to <4 x i32>
122  ret <4 x i32> %0
123}
124
125define arm_aapcs_vfpcc <16 x i16> @sext_v16i8_v16i16(<16 x i8> %src) {
126; CHECK-LABEL: sext_v16i8_v16i16:
127; CHECK:       @ %bb.0: @ %entry
128; CHECK-NEXT:    .pad #16
129; CHECK-NEXT:    sub sp, #16
130; CHECK-NEXT:    mov r0, sp
131; CHECK-NEXT:    vstrw.32 q0, [r0]
132; CHECK-NEXT:    vldrb.s16 q0, [r0]
133; CHECK-NEXT:    vldrb.s16 q1, [r0, #8]
134; CHECK-NEXT:    add sp, #16
135; CHECK-NEXT:    bx lr
136entry:
137  %0 = sext <16 x i8> %src to <16 x i16>
138  ret <16 x i16> %0
139}
140
141define arm_aapcs_vfpcc <8 x i32> @sext_v8i16_v8i32(<8 x i16> %src) {
142; CHECK-LABEL: sext_v8i16_v8i32:
143; CHECK:       @ %bb.0: @ %entry
144; CHECK-NEXT:    .pad #16
145; CHECK-NEXT:    sub sp, #16
146; CHECK-NEXT:    mov r0, sp
147; CHECK-NEXT:    vstrw.32 q0, [r0]
148; CHECK-NEXT:    vldrh.s32 q0, [r0]
149; CHECK-NEXT:    vldrh.s32 q1, [r0, #8]
150; CHECK-NEXT:    add sp, #16
151; CHECK-NEXT:    bx lr
152entry:
153  %0 = sext <8 x i16> %src to <8 x i32>
154  ret <8 x i32> %0
155}
156
157define arm_aapcs_vfpcc <16 x i32> @sext_v16i8_v16i32(<16 x i8> %src) {
158; CHECK-LABEL: sext_v16i8_v16i32:
159; CHECK:       @ %bb.0: @ %entry
160; CHECK-NEXT:    .pad #48
161; CHECK-NEXT:    sub sp, #48
162; CHECK-NEXT:    mov r0, sp
163; CHECK-NEXT:    add r1, sp, #32
164; CHECK-NEXT:    vstrw.32 q0, [r0]
165; CHECK-NEXT:    vldrb.s16 q0, [r0]
166; CHECK-NEXT:    vstrw.32 q0, [r1]
167; CHECK-NEXT:    vldrb.s16 q0, [r0, #8]
168; CHECK-NEXT:    add r0, sp, #16
169; CHECK-NEXT:    vstrw.32 q0, [r0]
170; CHECK-NEXT:    vldrh.s32 q0, [r1]
171; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
172; CHECK-NEXT:    vldrh.s32 q2, [r0]
173; CHECK-NEXT:    vldrh.s32 q3, [r0, #8]
174; CHECK-NEXT:    add sp, #48
175; CHECK-NEXT:    bx lr
176entry:
177  %0 = sext <16 x i8> %src to <16 x i32>
178  ret <16 x i32> %0
179}
180
181define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) {
182; CHECK-LABEL: sext_v2i32_v2i64:
183; CHECK:       @ %bb.0: @ %entry
184; CHECK-NEXT:    vmov r0, s2
185; CHECK-NEXT:    vmov r1, s0
186; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
187; CHECK-NEXT:    asrs r0, r0, #31
188; CHECK-NEXT:    asrs r1, r1, #31
189; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
190; CHECK-NEXT:    bx lr
191entry:
192  %0 = sext <2 x i32> %src to <2 x i64>
193  ret <2 x i64> %0
194}
195
196
197define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
198; CHECK-LABEL: zext_v8i8_v8i16:
199; CHECK:       @ %bb.0: @ %entry
200; CHECK-NEXT:    vmovlb.u8 q0, q0
201; CHECK-NEXT:    bx lr
202entry:
203  %0 = zext <8 x i8> %src to <8 x i16>
204  ret <8 x i16> %0
205}
206
207define arm_aapcs_vfpcc <4 x i32> @zext_v4i16_v4i32(<4 x i16> %src) {
208; CHECK-LABEL: zext_v4i16_v4i32:
209; CHECK:       @ %bb.0: @ %entry
210; CHECK-NEXT:    vmovlb.u16 q0, q0
211; CHECK-NEXT:    bx lr
212entry:
213  %0 = zext <4 x i16> %src to <4 x i32>
214  ret <4 x i32> %0
215}
216
217define arm_aapcs_vfpcc <4 x i32> @zext_v4i8_v4i32(<4 x i8> %src) {
218; CHECK-LABEL: zext_v4i8_v4i32:
219; CHECK:       @ %bb.0: @ %entry
220; CHECK-NEXT:    vmov.i32 q1, #0xff
221; CHECK-NEXT:    vand q0, q0, q1
222; CHECK-NEXT:    bx lr
223entry:
224  %0 = zext <4 x i8> %src to <4 x i32>
225  ret <4 x i32> %0
226}
227
228define arm_aapcs_vfpcc <16 x i16> @zext_v16i8_v16i16(<16 x i8> %src) {
229; CHECK-LABEL: zext_v16i8_v16i16:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    .pad #16
232; CHECK-NEXT:    sub sp, #16
233; CHECK-NEXT:    mov r0, sp
234; CHECK-NEXT:    vstrw.32 q0, [r0]
235; CHECK-NEXT:    vldrb.u16 q0, [r0]
236; CHECK-NEXT:    vldrb.u16 q1, [r0, #8]
237; CHECK-NEXT:    add sp, #16
238; CHECK-NEXT:    bx lr
239entry:
240  %0 = zext <16 x i8> %src to <16 x i16>
241  ret <16 x i16> %0
242}
243
244define arm_aapcs_vfpcc <8 x i32> @zext_v8i16_v8i32(<8 x i16> %src) {
245; CHECK-LABEL: zext_v8i16_v8i32:
246; CHECK:       @ %bb.0: @ %entry
247; CHECK-NEXT:    .pad #16
248; CHECK-NEXT:    sub sp, #16
249; CHECK-NEXT:    mov r0, sp
250; CHECK-NEXT:    vstrw.32 q0, [r0]
251; CHECK-NEXT:    vldrh.u32 q0, [r0]
252; CHECK-NEXT:    vldrh.u32 q1, [r0, #8]
253; CHECK-NEXT:    add sp, #16
254; CHECK-NEXT:    bx lr
255entry:
256  %0 = zext <8 x i16> %src to <8 x i32>
257  ret <8 x i32> %0
258}
259
260define arm_aapcs_vfpcc <16 x i32> @zext_v16i8_v16i32(<16 x i8> %src) {
261; CHECK-LABEL: zext_v16i8_v16i32:
262; CHECK:       @ %bb.0: @ %entry
263; CHECK-NEXT:    .pad #48
264; CHECK-NEXT:    sub sp, #48
265; CHECK-NEXT:    mov r0, sp
266; CHECK-NEXT:    add r1, sp, #32
267; CHECK-NEXT:    vstrw.32 q0, [r0]
268; CHECK-NEXT:    vldrb.u16 q0, [r0]
269; CHECK-NEXT:    vstrw.32 q0, [r1]
270; CHECK-NEXT:    vldrb.u16 q0, [r0, #8]
271; CHECK-NEXT:    add r0, sp, #16
272; CHECK-NEXT:    vstrw.32 q0, [r0]
273; CHECK-NEXT:    vldrh.u32 q0, [r1]
274; CHECK-NEXT:    vldrh.u32 q1, [r1, #8]
275; CHECK-NEXT:    vldrh.u32 q2, [r0]
276; CHECK-NEXT:    vldrh.u32 q3, [r0, #8]
277; CHECK-NEXT:    add sp, #48
278; CHECK-NEXT:    bx lr
279entry:
280  %0 = zext <16 x i8> %src to <16 x i32>
281  ret <16 x i32> %0
282}
283
284define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
285; CHECK-LABEL: zext_v2i32_v2i64:
286; CHECK:       @ %bb.0: @ %entry
287; CHECK-NEXT:    vmov.i64 q1, #0xffffffff
288; CHECK-NEXT:    vand q0, q0, q1
289; CHECK-NEXT:    bx lr
290entry:
291  %0 = zext <2 x i32> %src to <2 x i64>
292  ret <2 x i64> %0
293}
294
295
296define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {
297; CHECK-LABEL: trunc_v8i16_v8i8:
298; CHECK:       @ %bb.0: @ %entry
299; CHECK-NEXT:    bx lr
300entry:
301  %0 = trunc <8 x i16> %src to <8 x i8>
302  ret <8 x i8> %0
303}
304
305define arm_aapcs_vfpcc <4 x i16> @trunc_v4i32_v4i16(<4 x i32> %src) {
306; CHECK-LABEL: trunc_v4i32_v4i16:
307; CHECK:       @ %bb.0: @ %entry
308; CHECK-NEXT:    bx lr
309entry:
310  %0 = trunc <4 x i32> %src to <4 x i16>
311  ret <4 x i16> %0
312}
313
314define arm_aapcs_vfpcc <4 x i8> @trunc_v4i32_v4i8(<4 x i32> %src) {
315; CHECK-LABEL: trunc_v4i32_v4i8:
316; CHECK:       @ %bb.0: @ %entry
317; CHECK-NEXT:    bx lr
318entry:
319  %0 = trunc <4 x i32> %src to <4 x i8>
320  ret <4 x i8> %0
321}
322
323define arm_aapcs_vfpcc <16 x i8> @trunc_v16i16_v16i8(<16 x i16> %src) {
324; CHECK-LABEL: trunc_v16i16_v16i8:
325; CHECK:       @ %bb.0: @ %entry
326; CHECK-NEXT:    .pad #16
327; CHECK-NEXT:    sub sp, #16
328; CHECK-NEXT:    mov r0, sp
329; CHECK-NEXT:    vstrb.16 q1, [r0, #8]
330; CHECK-NEXT:    vstrb.16 q0, [r0]
331; CHECK-NEXT:    vldrw.u32 q0, [r0]
332; CHECK-NEXT:    add sp, #16
333; CHECK-NEXT:    bx lr
334entry:
335  %0 = trunc <16 x i16> %src to <16 x i8>
336  ret <16 x i8> %0
337}
338
339define arm_aapcs_vfpcc <8 x i16> @trunc_v8i32_v8i16(<8 x i32> %src) {
340; CHECK-LABEL: trunc_v8i32_v8i16:
341; CHECK:       @ %bb.0: @ %entry
342; CHECK-NEXT:    .pad #16
343; CHECK-NEXT:    sub sp, #16
344; CHECK-NEXT:    mov r0, sp
345; CHECK-NEXT:    vstrh.32 q1, [r0, #8]
346; CHECK-NEXT:    vstrh.32 q0, [r0]
347; CHECK-NEXT:    vldrw.u32 q0, [r0]
348; CHECK-NEXT:    add sp, #16
349; CHECK-NEXT:    bx lr
350entry:
351  %0 = trunc <8 x i32> %src to <8 x i16>
352  ret <8 x i16> %0
353}
354
355define arm_aapcs_vfpcc <16 x i8> @trunc_v16i32_v16i8(<16 x i32> %src) {
356; CHECK-LABEL: trunc_v16i32_v16i8:
357; CHECK:       @ %bb.0: @ %entry
358; CHECK-NEXT:    .pad #16
359; CHECK-NEXT:    sub sp, #16
360; CHECK-NEXT:    mov r0, sp
361; CHECK-NEXT:    vstrb.32 q3, [r0, #12]
362; CHECK-NEXT:    vstrb.32 q2, [r0, #8]
363; CHECK-NEXT:    vstrb.32 q1, [r0, #4]
364; CHECK-NEXT:    vstrb.32 q0, [r0]
365; CHECK-NEXT:    vldrw.u32 q0, [r0]
366; CHECK-NEXT:    add sp, #16
367; CHECK-NEXT:    bx lr
368entry:
369  %0 = trunc <16 x i32> %src to <16 x i8>
370  ret <16 x i8> %0
371}
372
373define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) {
374; CHECK-LABEL: trunc_v2i64_v2i32:
375; CHECK:       @ %bb.0: @ %entry
376; CHECK-NEXT:    bx lr
377entry:
378  %0 = trunc <2 x i64> %src to <2 x i32>
379  ret <2 x i32> %0
380}
381
382