xref: /llvm-project/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3
4declare i8 @llvm.fshl.i8(i8, i8, i8)
5declare i16 @llvm.fshl.i16(i16, i16, i16)
6declare i32 @llvm.fshl.i32(i32, i32, i32)
7declare i64 @llvm.fshl.i64(i64, i64, i64)
8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
9
10declare i8 @llvm.fshr.i8(i8, i8, i8)
11declare i16 @llvm.fshr.i16(i16, i16, i16)
12declare i32 @llvm.fshr.i32(i32, i32, i32)
13declare i64 @llvm.fshr.i64(i64, i64, i64)
14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
15
16; When first 2 operands match, it's a rotate.
17
18define i8 @rotl_i8_const_shift(i8 %x) {
19; CHECK-LABEL: rotl_i8_const_shift:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    ubfx w8, w0, #5, #3
22; CHECK-NEXT:    orr w0, w8, w0, lsl #3
23; CHECK-NEXT:    ret
24  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
25  ret i8 %f
26}
27
28define i64 @rotl_i64_const_shift(i64 %x) {
29; CHECK-LABEL: rotl_i64_const_shift:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    ror x0, x0, #61
32; CHECK-NEXT:    ret
33  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
34  ret i64 %f
35}
36
37; When first 2 operands match, it's a rotate (by variable amount).
38
39define i16 @rotl_i16(i16 %x, i16 %z) {
40; CHECK-LABEL: rotl_i16:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    neg w8, w1
43; CHECK-NEXT:    and w9, w0, #0xffff
44; CHECK-NEXT:    and w10, w1, #0xf
45; CHECK-NEXT:    and w8, w8, #0xf
46; CHECK-NEXT:    lsl w10, w0, w10
47; CHECK-NEXT:    lsr w8, w9, w8
48; CHECK-NEXT:    orr w0, w10, w8
49; CHECK-NEXT:    ret
50  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
51  ret i16 %f
52}
53
54define i32 @rotl_i32(i32 %x, i32 %z) {
55; CHECK-LABEL: rotl_i32:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    neg w8, w1
58; CHECK-NEXT:    ror w0, w0, w8
59; CHECK-NEXT:    ret
60  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
61  ret i32 %f
62}
63
64define i64 @rotl_i64(i64 %x, i64 %z) {
65; CHECK-LABEL: rotl_i64:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    neg w8, w1
68; CHECK-NEXT:    ror x0, x0, x8
69; CHECK-NEXT:    ret
70  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
71  ret i64 %f
72}
73
74; Vector rotate.
75
76define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
77; CHECK-LABEL: rotl_v4i32:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    movi v2.4s, #31
80; CHECK-NEXT:    neg v3.4s, v1.4s
81; CHECK-NEXT:    and v3.16b, v3.16b, v2.16b
82; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
83; CHECK-NEXT:    neg v2.4s, v3.4s
84; CHECK-NEXT:    ushl v1.4s, v0.4s, v1.4s
85; CHECK-NEXT:    ushl v0.4s, v0.4s, v2.4s
86; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
87; CHECK-NEXT:    ret
88  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
89  ret <4 x i32> %f
90}
91
92; Vector rotate by constant splat amount.
93
94define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
95; CHECK-LABEL: rotl_v4i32_rotl_const_shift:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    shl v1.4s, v0.4s, #3
98; CHECK-NEXT:    usra v1.4s, v0.4s, #29
99; CHECK-NEXT:    mov v0.16b, v1.16b
100; CHECK-NEXT:    ret
101  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
102  ret <4 x i32> %f
103}
104
105; Repeat everything for funnel shift right.
106
107; When first 2 operands match, it's a rotate.
108
109define i8 @rotr_i8_const_shift(i8 %x) {
110; CHECK-LABEL: rotr_i8_const_shift:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    lsl w8, w0, #5
113; CHECK-NEXT:    bfxil w8, w0, #3, #5
114; CHECK-NEXT:    mov w0, w8
115; CHECK-NEXT:    ret
116  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
117  ret i8 %f
118}
119
120define i32 @rotr_i32_const_shift(i32 %x) {
121; CHECK-LABEL: rotr_i32_const_shift:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ror w0, w0, #3
124; CHECK-NEXT:    ret
125  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
126  ret i32 %f
127}
128
129; When first 2 operands match, it's a rotate (by variable amount).
130
131define i16 @rotr_i16(i16 %x, i16 %z) {
132; CHECK-LABEL: rotr_i16:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    neg w8, w1
135; CHECK-NEXT:    and w9, w0, #0xffff
136; CHECK-NEXT:    and w10, w1, #0xf
137; CHECK-NEXT:    and w8, w8, #0xf
138; CHECK-NEXT:    lsr w9, w9, w10
139; CHECK-NEXT:    lsl w8, w0, w8
140; CHECK-NEXT:    orr w0, w9, w8
141; CHECK-NEXT:    ret
142  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
143  ret i16 %f
144}
145
146define i32 @rotr_i32(i32 %x, i32 %z) {
147; CHECK-LABEL: rotr_i32:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    ror w0, w0, w1
150; CHECK-NEXT:    ret
151  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
152  ret i32 %f
153}
154
155define i64 @rotr_i64(i64 %x, i64 %z) {
156; CHECK-LABEL: rotr_i64:
157; CHECK:       // %bb.0:
158; CHECK-NEXT:    ror x0, x0, x1
159; CHECK-NEXT:    ret
160  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
161  ret i64 %f
162}
163
164; Vector rotate.
165
166define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
167; CHECK-LABEL: rotr_v4i32:
168; CHECK:       // %bb.0:
169; CHECK-NEXT:    movi v2.4s, #31
170; CHECK-NEXT:    neg v3.4s, v1.4s
171; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
172; CHECK-NEXT:    and v2.16b, v3.16b, v2.16b
173; CHECK-NEXT:    neg v1.4s, v1.4s
174; CHECK-NEXT:    ushl v2.4s, v0.4s, v2.4s
175; CHECK-NEXT:    ushl v0.4s, v0.4s, v1.4s
176; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
177; CHECK-NEXT:    ret
178  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
179  ret <4 x i32> %f
180}
181
182; Vector rotate by constant splat amount.
183
184define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
185; CHECK-LABEL: rotr_v4i32_const_shift:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    shl v1.4s, v0.4s, #29
188; CHECK-NEXT:    usra v1.4s, v0.4s, #3
189; CHECK-NEXT:    mov v0.16b, v1.16b
190; CHECK-NEXT:    ret
191  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
192  ret <4 x i32> %f
193}
194
195define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
196; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
197; CHECK:       // %bb.0:
198; CHECK-NEXT:    ret
199  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
200  ret i32 %f
201}
202
203define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
204; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
205; CHECK:       // %bb.0:
206; CHECK-NEXT:    ret
207  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
208  ret i32 %f
209}
210
211define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
212; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    ret
215  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
216  ret <4 x i32> %f
217}
218
219define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
220; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    ret
223  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
224  ret <4 x i32> %f
225}
226
227