xref: /llvm-project/llvm/test/CodeGen/ARM/funnel-shift-rot.ll (revision e0ed0333f0fed2e73f805afd58b61176a87aa3ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR
3; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
4
5declare i8 @llvm.fshl.i8(i8, i8, i8)
6declare i16 @llvm.fshl.i16(i16, i16, i16)
7declare i32 @llvm.fshl.i32(i32, i32, i32)
8declare i64 @llvm.fshl.i64(i64, i64, i64)
9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10
11declare i8 @llvm.fshr.i8(i8, i8, i8)
12declare i16 @llvm.fshr.i16(i16, i16, i16)
13declare i32 @llvm.fshr.i32(i32, i32, i32)
14declare i64 @llvm.fshr.i64(i64, i64, i64)
15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
16
17; When first 2 operands match, it's a rotate.
18
19define i8 @rotl_i8_const_shift(i8 %x) {
20; CHECK-LABEL: rotl_i8_const_shift:
21; CHECK:       @ %bb.0:
22; CHECK-NEXT:    uxtb r1, r0
23; CHECK-NEXT:    lsl r0, r0, #3
24; CHECK-NEXT:    orr r0, r0, r1, lsr #5
25; CHECK-NEXT:    bx lr
26  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
27  ret i8 %f
28}
29
30define i64 @rotl_i64_const_shift(i64 %x) {
31; CHECK-LABEL: rotl_i64_const_shift:
32; CHECK:       @ %bb.0:
33; CHECK-NEXT:    lsl r2, r0, #3
34; CHECK-NEXT:    orr r2, r2, r1, lsr #29
35; CHECK-NEXT:    lsl r1, r1, #3
36; CHECK-NEXT:    orr r1, r1, r0, lsr #29
37; CHECK-NEXT:    mov r0, r2
38; CHECK-NEXT:    bx lr
39  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
40  ret i64 %f
41}
42
43; When first 2 operands match, it's a rotate (by variable amount).
44
45define i16 @rotl_i16(i16 %x, i16 %z) {
46; CHECK-LABEL: rotl_i16:
47; CHECK:       @ %bb.0:
48; CHECK-NEXT:    and r2, r1, #15
49; CHECK-NEXT:    rsb r1, r1, #0
50; CHECK-NEXT:    and r1, r1, #15
51; CHECK-NEXT:    lsl r2, r0, r2
52; CHECK-NEXT:    uxth r0, r0
53; CHECK-NEXT:    orr r0, r2, r0, lsr r1
54; CHECK-NEXT:    bx lr
55  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
56  ret i16 %f
57}
58
59define i32 @rotl_i32(i32 %x, i32 %z) {
60; CHECK-LABEL: rotl_i32:
61; CHECK:       @ %bb.0:
62; CHECK-NEXT:    rsb r1, r1, #0
63; CHECK-NEXT:    ror r0, r0, r1
64; CHECK-NEXT:    bx lr
65  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
66  ret i32 %f
67}
68
69define i64 @rotl_i64(i64 %x, i64 %z) {
70; CHECK-LABEL: rotl_i64:
71; CHECK:       @ %bb.0:
72; CHECK-NEXT:    .save {r4, lr}
73; CHECK-NEXT:    push {r4, lr}
74; CHECK-NEXT:    tst r2, #32
75; CHECK-NEXT:    mov r3, r0
76; CHECK-NEXT:    and r12, r2, #31
77; CHECK-NEXT:    movne r3, r1
78; CHECK-NEXT:    movne r1, r0
79; CHECK-NEXT:    mov r4, #31
80; CHECK-NEXT:    bic r2, r4, r2
81; CHECK-NEXT:    lsl lr, r3, r12
82; CHECK-NEXT:    lsr r0, r1, #1
83; CHECK-NEXT:    lsl r1, r1, r12
84; CHECK-NEXT:    lsr r3, r3, #1
85; CHECK-NEXT:    orr r0, lr, r0, lsr r2
86; CHECK-NEXT:    orr r1, r1, r3, lsr r2
87; CHECK-NEXT:    pop {r4, pc}
88  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
89  ret i64 %f
90}
91
92; Vector rotate.
93
94define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
95; SCALAR-LABEL: rotl_v4i32:
96; SCALAR:       @ %bb.0:
97; SCALAR-NEXT:    ldr r12, [sp]
98; SCALAR-NEXT:    rsb r12, r12, #0
99; SCALAR-NEXT:    ror r0, r0, r12
100; SCALAR-NEXT:    ldr r12, [sp, #4]
101; SCALAR-NEXT:    rsb r12, r12, #0
102; SCALAR-NEXT:    ror r1, r1, r12
103; SCALAR-NEXT:    ldr r12, [sp, #8]
104; SCALAR-NEXT:    rsb r12, r12, #0
105; SCALAR-NEXT:    ror r2, r2, r12
106; SCALAR-NEXT:    ldr r12, [sp, #12]
107; SCALAR-NEXT:    rsb r12, r12, #0
108; SCALAR-NEXT:    ror r3, r3, r12
109; SCALAR-NEXT:    bx lr
110;
111; NEON-LABEL: rotl_v4i32:
112; NEON:       @ %bb.0:
113; NEON-NEXT:    mov r12, sp
114; NEON-NEXT:    vld1.64 {d16, d17}, [r12]
115; NEON-NEXT:    vmov.i32 q10, #0x1f
116; NEON-NEXT:    vneg.s32 q9, q8
117; NEON-NEXT:    vmov d23, r2, r3
118; NEON-NEXT:    vand q9, q9, q10
119; NEON-NEXT:    vand q8, q8, q10
120; NEON-NEXT:    vmov d22, r0, r1
121; NEON-NEXT:    vneg.s32 q9, q9
122; NEON-NEXT:    vshl.u32 q8, q11, q8
123; NEON-NEXT:    vshl.u32 q9, q11, q9
124; NEON-NEXT:    vorr q8, q8, q9
125; NEON-NEXT:    vmov r0, r1, d16
126; NEON-NEXT:    vmov r2, r3, d17
127; NEON-NEXT:    bx lr
128  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
129  ret <4 x i32> %f
130}
131
132; Vector rotate by constant splat amount.
133
134define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
135; SCALAR-LABEL: rotl_v4i32_rotl_const_shift:
136; SCALAR:       @ %bb.0:
137; SCALAR-NEXT:    ror r0, r0, #29
138; SCALAR-NEXT:    ror r1, r1, #29
139; SCALAR-NEXT:    ror r2, r2, #29
140; SCALAR-NEXT:    ror r3, r3, #29
141; SCALAR-NEXT:    bx lr
142;
143; NEON-LABEL: rotl_v4i32_rotl_const_shift:
144; NEON:       @ %bb.0:
145; NEON-NEXT:    vmov d17, r2, r3
146; NEON-NEXT:    vmov d16, r0, r1
147; NEON-NEXT:    vshr.u32 q9, q8, #29
148; NEON-NEXT:    vshl.i32 q8, q8, #3
149; NEON-NEXT:    vorr q8, q8, q9
150; NEON-NEXT:    vmov r0, r1, d16
151; NEON-NEXT:    vmov r2, r3, d17
152; NEON-NEXT:    bx lr
153  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
154  ret <4 x i32> %f
155}
156
157; Repeat everything for funnel shift right.
158
159; When first 2 operands match, it's a rotate.
160
161define i8 @rotr_i8_const_shift(i8 %x) {
162; CHECK-LABEL: rotr_i8_const_shift:
163; CHECK:       @ %bb.0:
164; CHECK-NEXT:    uxtb r1, r0
165; CHECK-NEXT:    lsr r1, r1, #3
166; CHECK-NEXT:    orr r0, r1, r0, lsl #5
167; CHECK-NEXT:    bx lr
168  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
169  ret i8 %f
170}
171
172define i32 @rotr_i32_const_shift(i32 %x) {
173; CHECK-LABEL: rotr_i32_const_shift:
174; CHECK:       @ %bb.0:
175; CHECK-NEXT:    ror r0, r0, #3
176; CHECK-NEXT:    bx lr
177  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
178  ret i32 %f
179}
180
181; When first 2 operands match, it's a rotate (by variable amount).
182
183define i16 @rotr_i16(i16 %x, i16 %z) {
184; CHECK-LABEL: rotr_i16:
185; CHECK:       @ %bb.0:
186; CHECK-NEXT:    and r2, r1, #15
187; CHECK-NEXT:    rsb r1, r1, #0
188; CHECK-NEXT:    and r1, r1, #15
189; CHECK-NEXT:    uxth r3, r0
190; CHECK-NEXT:    lsr r2, r3, r2
191; CHECK-NEXT:    orr r0, r2, r0, lsl r1
192; CHECK-NEXT:    bx lr
193  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
194  ret i16 %f
195}
196
197define i32 @rotr_i32(i32 %x, i32 %z) {
198; CHECK-LABEL: rotr_i32:
199; CHECK:       @ %bb.0:
200; CHECK-NEXT:    ror r0, r0, r1
201; CHECK-NEXT:    bx lr
202  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
203  ret i32 %f
204}
205
206define i64 @rotr_i64(i64 %x, i64 %z) {
207; CHECK-LABEL: rotr_i64:
208; CHECK:       @ %bb.0:
209; CHECK-NEXT:    tst r2, #32
210; CHECK-NEXT:    mov r3, r1
211; CHECK-NEXT:    moveq r3, r0
212; CHECK-NEXT:    moveq r0, r1
213; CHECK-NEXT:    mov r1, #31
214; CHECK-NEXT:    lsl r12, r0, #1
215; CHECK-NEXT:    bic r1, r1, r2
216; CHECK-NEXT:    and r2, r2, #31
217; CHECK-NEXT:    lsl r12, r12, r1
218; CHECK-NEXT:    orr r12, r12, r3, lsr r2
219; CHECK-NEXT:    lsl r3, r3, #1
220; CHECK-NEXT:    lsl r1, r3, r1
221; CHECK-NEXT:    orr r1, r1, r0, lsr r2
222; CHECK-NEXT:    mov r0, r12
223; CHECK-NEXT:    bx lr
224  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
225  ret i64 %f
226}
227
228; Vector rotate.
229
230define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
231; SCALAR-LABEL: rotr_v4i32:
232; SCALAR:       @ %bb.0:
233; SCALAR-NEXT:    ldr r12, [sp]
234; SCALAR-NEXT:    ror r0, r0, r12
235; SCALAR-NEXT:    ldr r12, [sp, #4]
236; SCALAR-NEXT:    ror r1, r1, r12
237; SCALAR-NEXT:    ldr r12, [sp, #8]
238; SCALAR-NEXT:    ror r2, r2, r12
239; SCALAR-NEXT:    ldr r12, [sp, #12]
240; SCALAR-NEXT:    ror r3, r3, r12
241; SCALAR-NEXT:    bx lr
242;
243; NEON-LABEL: rotr_v4i32:
244; NEON:       @ %bb.0:
245; NEON-NEXT:    mov r12, sp
246; NEON-NEXT:    vld1.64 {d16, d17}, [r12]
247; NEON-NEXT:    vmov.i32 q9, #0x1f
248; NEON-NEXT:    vneg.s32 q10, q8
249; NEON-NEXT:    vand q8, q8, q9
250; NEON-NEXT:    vmov d23, r2, r3
251; NEON-NEXT:    vand q9, q10, q9
252; NEON-NEXT:    vneg.s32 q8, q8
253; NEON-NEXT:    vmov d22, r0, r1
254; NEON-NEXT:    vshl.u32 q9, q11, q9
255; NEON-NEXT:    vshl.u32 q8, q11, q8
256; NEON-NEXT:    vorr q8, q8, q9
257; NEON-NEXT:    vmov r0, r1, d16
258; NEON-NEXT:    vmov r2, r3, d17
259; NEON-NEXT:    bx lr
260  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
261  ret <4 x i32> %f
262}
263
264; Vector rotate by constant splat amount.
265
266define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
267; SCALAR-LABEL: rotr_v4i32_const_shift:
268; SCALAR:       @ %bb.0:
269; SCALAR-NEXT:    ror r0, r0, #3
270; SCALAR-NEXT:    ror r1, r1, #3
271; SCALAR-NEXT:    ror r2, r2, #3
272; SCALAR-NEXT:    ror r3, r3, #3
273; SCALAR-NEXT:    bx lr
274;
275; NEON-LABEL: rotr_v4i32_const_shift:
276; NEON:       @ %bb.0:
277; NEON-NEXT:    vmov d17, r2, r3
278; NEON-NEXT:    vmov d16, r0, r1
279; NEON-NEXT:    vshl.i32 q9, q8, #29
280; NEON-NEXT:    vshr.u32 q8, q8, #3
281; NEON-NEXT:    vorr q8, q8, q9
282; NEON-NEXT:    vmov r0, r1, d16
283; NEON-NEXT:    vmov r2, r3, d17
284; NEON-NEXT:    bx lr
285  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
286  ret <4 x i32> %f
287}
288
289define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
290; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
291; CHECK:       @ %bb.0:
292; CHECK-NEXT:    bx lr
293  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
294  ret i32 %f
295}
296
297define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
298; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
299; CHECK:       @ %bb.0:
300; CHECK-NEXT:    bx lr
301  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
302  ret i32 %f
303}
304
305define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
306; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
307; CHECK:       @ %bb.0:
308; CHECK-NEXT:    bx lr
309  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
310  ret <4 x i32> %f
311}
312
313define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
314; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
315; CHECK:       @ %bb.0:
316; CHECK-NEXT:    bx lr
317  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
318  ret <4 x i32> %f
319}
320
321