xref: /llvm-project/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll (revision d1924f0474b65fe3189ffd658a12f452e4696c28)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
5
6declare i8 @llvm.fshl.i8(i8, i8, i8)
7declare i16 @llvm.fshl.i16(i16, i16, i16)
8declare i32 @llvm.fshl.i32(i32, i32, i32)
9declare i64 @llvm.fshl.i64(i64, i64, i64)
10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
11
12declare i8 @llvm.fshr.i8(i8, i8, i8)
13declare i16 @llvm.fshr.i16(i16, i16, i16)
14declare i32 @llvm.fshr.i32(i32, i32, i32)
15declare i64 @llvm.fshr.i64(i64, i64, i64)
16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
17
18; When first 2 operands match, it's a rotate.
19
20define i8 @rotl_i8_const_shift(i8 %x) {
21; CHECK-LABEL: rotl_i8_const_shift:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    rotlwi 4, 3, 27
24; CHECK-NEXT:    rlwimi 4, 3, 3, 0, 28
25; CHECK-NEXT:    mr 3, 4
26; CHECK-NEXT:    blr
27  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
28  ret i8 %f
29}
30
31define i64 @rotl_i64_const_shift(i64 %x) {
32; CHECK32-LABEL: rotl_i64_const_shift:
33; CHECK32:       # %bb.0:
34; CHECK32-NEXT:    rotlwi 5, 4, 3
35; CHECK32-NEXT:    rotlwi 6, 3, 3
36; CHECK32-NEXT:    rlwimi 5, 3, 3, 0, 28
37; CHECK32-NEXT:    rlwimi 6, 4, 3, 0, 28
38; CHECK32-NEXT:    mr 3, 5
39; CHECK32-NEXT:    mr 4, 6
40; CHECK32-NEXT:    blr
41;
42; CHECK64-LABEL: rotl_i64_const_shift:
43; CHECK64:       # %bb.0:
44; CHECK64-NEXT:    rotldi 3, 3, 3
45; CHECK64-NEXT:    blr
46  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
47  ret i64 %f
48}
49
50; When first 2 operands match, it's a rotate (by variable amount).
51
52define i16 @rotl_i16(i16 %x, i16 %z) {
53; CHECK-LABEL: rotl_i16:
54; CHECK:       # %bb.0:
55; CHECK-NEXT:    clrlwi 6, 4, 28
56; CHECK-NEXT:    neg 4, 4
57; CHECK-NEXT:    clrlwi 5, 3, 16
58; CHECK-NEXT:    clrlwi 4, 4, 28
59; CHECK-NEXT:    slw 3, 3, 6
60; CHECK-NEXT:    srw 4, 5, 4
61; CHECK-NEXT:    or 3, 3, 4
62; CHECK-NEXT:    blr
63  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
64  ret i16 %f
65}
66
67define i32 @rotl_i32(i32 %x, i32 %z) {
68; CHECK-LABEL: rotl_i32:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    rotlw 3, 3, 4
71; CHECK-NEXT:    blr
72  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
73  ret i32 %f
74}
75
76define i64 @rotl_i64(i64 %x, i64 %z) {
77; CHECK32-LABEL: rotl_i64:
78; CHECK32:       # %bb.0:
79; CHECK32-NEXT:    andi. 5, 6, 32
80; CHECK32-NEXT:    mr 5, 3
81; CHECK32-NEXT:    bne 0, .LBB4_2
82; CHECK32-NEXT:  # %bb.1:
83; CHECK32-NEXT:    mr 5, 4
84; CHECK32-NEXT:  .LBB4_2:
85; CHECK32-NEXT:    clrlwi 6, 6, 27
86; CHECK32-NEXT:    subfic 8, 6, 32
87; CHECK32-NEXT:    srw 7, 5, 8
88; CHECK32-NEXT:    bne 0, .LBB4_4
89; CHECK32-NEXT:  # %bb.3:
90; CHECK32-NEXT:    mr 4, 3
91; CHECK32-NEXT:  .LBB4_4:
92; CHECK32-NEXT:    slw 3, 4, 6
93; CHECK32-NEXT:    srw 4, 4, 8
94; CHECK32-NEXT:    slw 5, 5, 6
95; CHECK32-NEXT:    or 3, 3, 7
96; CHECK32-NEXT:    or 4, 5, 4
97; CHECK32-NEXT:    blr
98;
99; CHECK64-LABEL: rotl_i64:
100; CHECK64:       # %bb.0:
101; CHECK64-NEXT:    rotld 3, 3, 4
102; CHECK64-NEXT:    blr
103  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
104  ret i64 %f
105}
106
107; Vector rotate.
108
109define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
110; CHECK32_32-LABEL: rotl_v4i32:
111; CHECK32_32:       # %bb.0:
112; CHECK32_32-NEXT:    rotlw 3, 3, 7
113; CHECK32_32-NEXT:    rotlw 4, 4, 8
114; CHECK32_32-NEXT:    rotlw 5, 5, 9
115; CHECK32_32-NEXT:    rotlw 6, 6, 10
116; CHECK32_32-NEXT:    blr
117;
118; CHECK32_64-LABEL: rotl_v4i32:
119; CHECK32_64:       # %bb.0:
120; CHECK32_64-NEXT:    vrlw 2, 2, 3
121; CHECK32_64-NEXT:    blr
122;
123; CHECK64-LABEL: rotl_v4i32:
124; CHECK64:       # %bb.0:
125; CHECK64-NEXT:    vrlw 2, 2, 3
126; CHECK64-NEXT:    blr
127  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
128  ret <4 x i32> %f
129}
130
131; Vector rotate by constant splat amount.
132
133define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
134; CHECK32_32-LABEL: rotl_v4i32_const_shift:
135; CHECK32_32:       # %bb.0:
136; CHECK32_32-NEXT:    rotlwi 3, 3, 3
137; CHECK32_32-NEXT:    rotlwi 4, 4, 3
138; CHECK32_32-NEXT:    rotlwi 5, 5, 3
139; CHECK32_32-NEXT:    rotlwi 6, 6, 3
140; CHECK32_32-NEXT:    blr
141;
142; CHECK32_64-LABEL: rotl_v4i32_const_shift:
143; CHECK32_64:       # %bb.0:
144; CHECK32_64-NEXT:    vspltisw 3, 3
145; CHECK32_64-NEXT:    vrlw 2, 2, 3
146; CHECK32_64-NEXT:    blr
147;
148; CHECK64-LABEL: rotl_v4i32_const_shift:
149; CHECK64:       # %bb.0:
150; CHECK64-NEXT:    vspltisw 3, 3
151; CHECK64-NEXT:    vrlw 2, 2, 3
152; CHECK64-NEXT:    blr
153  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
154  ret <4 x i32> %f
155}
156
157; Repeat everything for funnel shift right.
158
159define i8 @rotr_i8_const_shift(i8 %x) {
160; CHECK-LABEL: rotr_i8_const_shift:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    rotlwi 4, 3, 29
163; CHECK-NEXT:    rlwimi 4, 3, 5, 0, 26
164; CHECK-NEXT:    mr 3, 4
165; CHECK-NEXT:    blr
166  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
167  ret i8 %f
168}
169
170define i32 @rotr_i32_const_shift(i32 %x) {
171; CHECK-LABEL: rotr_i32_const_shift:
172; CHECK:       # %bb.0:
173; CHECK-NEXT:    rotlwi 3, 3, 29
174; CHECK-NEXT:    blr
175  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
176  ret i32 %f
177}
178
179; When first 2 operands match, it's a rotate (by variable amount).
180
181define i16 @rotr_i16(i16 %x, i16 %z) {
182; CHECK-LABEL: rotr_i16:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    clrlwi 6, 4, 28
185; CHECK-NEXT:    neg 4, 4
186; CHECK-NEXT:    clrlwi 5, 3, 16
187; CHECK-NEXT:    clrlwi 4, 4, 28
188; CHECK-NEXT:    srw 5, 5, 6
189; CHECK-NEXT:    slw 3, 3, 4
190; CHECK-NEXT:    or 3, 5, 3
191; CHECK-NEXT:    blr
192  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
193  ret i16 %f
194}
195
196define i32 @rotr_i32(i32 %x, i32 %z) {
197; CHECK-LABEL: rotr_i32:
198; CHECK:       # %bb.0:
199; CHECK-NEXT:    neg 4, 4
200; CHECK-NEXT:    rotlw 3, 3, 4
201; CHECK-NEXT:    blr
202  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
203  ret i32 %f
204}
205
206define i64 @rotr_i64(i64 %x, i64 %z) {
207; CHECK32-LABEL: rotr_i64:
208; CHECK32:       # %bb.0:
209; CHECK32-NEXT:    andi. 5, 6, 32
210; CHECK32-NEXT:    mr 5, 3
211; CHECK32-NEXT:    beq 0, .LBB11_2
212; CHECK32-NEXT:  # %bb.1:
213; CHECK32-NEXT:    mr 5, 4
214; CHECK32-NEXT:  .LBB11_2:
215; CHECK32-NEXT:    clrlwi 7, 6, 27
216; CHECK32-NEXT:    srw 6, 5, 7
217; CHECK32-NEXT:    beq 0, .LBB11_4
218; CHECK32-NEXT:  # %bb.3:
219; CHECK32-NEXT:    mr 4, 3
220; CHECK32-NEXT:  .LBB11_4:
221; CHECK32-NEXT:    subfic 3, 7, 32
222; CHECK32-NEXT:    srw 7, 4, 7
223; CHECK32-NEXT:    slw 4, 4, 3
224; CHECK32-NEXT:    slw 5, 5, 3
225; CHECK32-NEXT:    or 3, 4, 6
226; CHECK32-NEXT:    or 4, 5, 7
227; CHECK32-NEXT:    blr
228;
229; CHECK64-LABEL: rotr_i64:
230; CHECK64:       # %bb.0:
231; CHECK64-NEXT:    neg 4, 4
232; CHECK64-NEXT:    rotld 3, 3, 4
233; CHECK64-NEXT:    blr
234  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
235  ret i64 %f
236}
237
238; Vector rotate.
239
240define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
241; CHECK32_32-LABEL: rotr_v4i32:
242; CHECK32_32:       # %bb.0:
243; CHECK32_32-NEXT:    neg 7, 7
244; CHECK32_32-NEXT:    neg 8, 8
245; CHECK32_32-NEXT:    neg 9, 9
246; CHECK32_32-NEXT:    neg 10, 10
247; CHECK32_32-NEXT:    rotlw 3, 3, 7
248; CHECK32_32-NEXT:    rotlw 4, 4, 8
249; CHECK32_32-NEXT:    rotlw 5, 5, 9
250; CHECK32_32-NEXT:    rotlw 6, 6, 10
251; CHECK32_32-NEXT:    blr
252;
253; CHECK32_64-LABEL: rotr_v4i32:
254; CHECK32_64:       # %bb.0:
255; CHECK32_64-NEXT:    vxor 4, 4, 4
256; CHECK32_64-NEXT:    vsubuwm 3, 4, 3
257; CHECK32_64-NEXT:    vrlw 2, 2, 3
258; CHECK32_64-NEXT:    blr
259;
260; CHECK64-LABEL: rotr_v4i32:
261; CHECK64:       # %bb.0:
262; CHECK64-NEXT:    xxlxor 36, 36, 36
263; CHECK64-NEXT:    vsubuwm 3, 4, 3
264; CHECK64-NEXT:    vrlw 2, 2, 3
265; CHECK64-NEXT:    blr
266  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
267  ret <4 x i32> %f
268}
269
270; Vector rotate by constant splat amount.
271
272define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
273; CHECK32_32-LABEL: rotr_v4i32_const_shift:
274; CHECK32_32:       # %bb.0:
275; CHECK32_32-NEXT:    rotlwi 3, 3, 29
276; CHECK32_32-NEXT:    rotlwi 4, 4, 29
277; CHECK32_32-NEXT:    rotlwi 5, 5, 29
278; CHECK32_32-NEXT:    rotlwi 6, 6, 29
279; CHECK32_32-NEXT:    blr
280;
281; CHECK32_64-LABEL: rotr_v4i32_const_shift:
282; CHECK32_64:       # %bb.0:
283; CHECK32_64-NEXT:    vspltisw 3, -16
284; CHECK32_64-NEXT:    vspltisw 4, 13
285; CHECK32_64-NEXT:    vsubuwm 3, 4, 3
286; CHECK32_64-NEXT:    vrlw 2, 2, 3
287; CHECK32_64-NEXT:    blr
288;
289; CHECK64-LABEL: rotr_v4i32_const_shift:
290; CHECK64:       # %bb.0:
291; CHECK64-NEXT:    vspltisw 3, -16
292; CHECK64-NEXT:    vspltisw 4, 13
293; CHECK64-NEXT:    vsubuwm 3, 4, 3
294; CHECK64-NEXT:    vrlw 2, 2, 3
295; CHECK64-NEXT:    blr
296  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
297  ret <4 x i32> %f
298}
299
300define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
301; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    blr
304  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
305  ret i32 %f
306}
307
308define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
309; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
310; CHECK:       # %bb.0:
311; CHECK-NEXT:    blr
312  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
313  ret i32 %f
314}
315
316define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
317; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
318; CHECK:       # %bb.0:
319; CHECK-NEXT:    blr
320  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
321  ret <4 x i32> %f
322}
323
324define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
325; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
326; CHECK:       # %bb.0:
327; CHECK-NEXT:    blr
328  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
329  ret <4 x i32> %f
330}
331
332