xref: /llvm-project/llvm/test/Transforms/InstCombine/rotate.ll (revision 59ced72bc211f150518cf31606b58b11cb6ff310)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3; RUN: opt < %s -passes=instcombine -use-constant-int-for-fixed-length-splat -S | FileCheck %s
4
5target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
6
7; Canonicalize rotate by constant to funnel shift intrinsics.
8; This should help cost modeling for vectorization, inlining, etc.
9; If a target does not have a rotate instruction, the expansion will
10; be exactly these same 3 basic ops (shl/lshr/or).
11
12define i32 @rotl_i32_constant(i32 %x) {
13; CHECK-LABEL: @rotl_i32_constant(
14; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 11)
15; CHECK-NEXT:    ret i32 [[R]]
16;
17  %shl = shl i32 %x, 11
18  %shr = lshr i32 %x, 21
19  %r = or i32 %shr, %shl
20  ret i32 %r
21}
22
23define i42 @rotr_i42_constant(i42 %x) {
24; CHECK-LABEL: @rotr_i42_constant(
25; CHECK-NEXT:    [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[X:%.*]], i42 [[X]], i42 31)
26; CHECK-NEXT:    ret i42 [[R]]
27;
28  %shl = shl i42 %x, 31
29  %shr = lshr i42 %x, 11
30  %r = or i42 %shr, %shl
31  ret i42 %r
32}
33
34define i8 @rotr_i8_constant_commute(i8 %x) {
35; CHECK-LABEL: @rotr_i8_constant_commute(
36; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 5)
37; CHECK-NEXT:    ret i8 [[R]]
38;
39  %shl = shl i8 %x, 5
40  %shr = lshr i8 %x, 3
41  %r = or i8 %shl, %shr
42  ret i8 %r
43}
44
45define i88 @rotl_i88_constant_commute(i88 %x) {
46; CHECK-LABEL: @rotl_i88_constant_commute(
47; CHECK-NEXT:    [[R:%.*]] = call i88 @llvm.fshl.i88(i88 [[X:%.*]], i88 [[X]], i88 44)
48; CHECK-NEXT:    ret i88 [[R]]
49;
50  %shl = shl i88 %x, 44
51  %shr = lshr i88 %x, 44
52  %r = or i88 %shl, %shr
53  ret i88 %r
54}
55
56; Vector types are allowed.
57
58define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
59; CHECK-LABEL: @rotl_v2i16_constant_splat(
60; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
61; CHECK-NEXT:    ret <2 x i16> [[R]]
62;
63  %shl = shl <2 x i16> %x, <i16 1, i16 1>
64  %shr = lshr <2 x i16> %x, <i16 15, i16 15>
65  %r = or <2 x i16> %shl, %shr
66  ret <2 x i16> %r
67}
68
69define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) {
70; CHECK-LABEL: @rotl_v2i16_constant_splat_poison0(
71; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
72; CHECK-NEXT:    ret <2 x i16> [[R]]
73;
74  %shl = shl <2 x i16> %x, <i16 poison, i16 1>
75  %shr = lshr <2 x i16> %x, <i16 15, i16 15>
76  %r = or <2 x i16> %shl, %shr
77  ret <2 x i16> %r
78}
79
80define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) {
81; CHECK-LABEL: @rotl_v2i16_constant_splat_poison1(
82; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
83; CHECK-NEXT:    ret <2 x i16> [[R]]
84;
85  %shl = shl <2 x i16> %x, <i16 1, i16 1>
86  %shr = lshr <2 x i16> %x, <i16 15, i16 poison>
87  %r = or <2 x i16> %shl, %shr
88  ret <2 x i16> %r
89}
90
91; Non-power-of-2 vector types are allowed.
92
93define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
94; CHECK-LABEL: @rotr_v2i17_constant_splat(
95; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12))
96; CHECK-NEXT:    ret <2 x i17> [[R]]
97;
98  %shl = shl <2 x i17> %x, <i17 12, i17 12>
99  %shr = lshr <2 x i17> %x, <i17 5, i17 5>
100  %r = or <2 x i17> %shr, %shl
101  ret <2 x i17> %r
102}
103
104define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) {
105; CHECK-LABEL: @rotr_v2i17_constant_splat_poison0(
106; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12))
107; CHECK-NEXT:    ret <2 x i17> [[R]]
108;
109  %shl = shl <2 x i17> %x, <i17 12, i17 poison>
110  %shr = lshr <2 x i17> %x, <i17 poison, i17 5>
111  %r = or <2 x i17> %shr, %shl
112  ret <2 x i17> %r
113}
114
115define <2 x i17> @rotr_v2i17_constant_splat_poison1(<2 x i17> %x) {
116; CHECK-LABEL: @rotr_v2i17_constant_splat_poison1(
117; CHECK-NEXT:    [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12))
118; CHECK-NEXT:    ret <2 x i17> [[R]]
119;
120  %shl = shl <2 x i17> %x, <i17 12, i17 poison>
121  %shr = lshr <2 x i17> %x, <i17 5, i17 poison>
122  %r = or <2 x i17> %shr, %shl
123  ret <2 x i17> %r
124}
125
126; Allow arbitrary shift constants.
127; Support poison elements.
128
129define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
130; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
131; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 19>)
132; CHECK-NEXT:    ret <2 x i32> [[R]]
133;
134  %shl = shl <2 x i32> %x, <i32 17, i32 19>
135  %shr = lshr <2 x i32> %x, <i32 15, i32 13>
136  %r = or <2 x i32> %shl, %shr
137  ret <2 x i32> %r
138}
139
140define <2 x i32> @rotr_v2i32_constant_nonsplat_poison0(<2 x i32> %x) {
141; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison0(
142; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 poison, i32 19>)
143; CHECK-NEXT:    ret <2 x i32> [[R]]
144;
145  %shl = shl <2 x i32> %x, <i32 poison, i32 19>
146  %shr = lshr <2 x i32> %x, <i32 15, i32 13>
147  %r = or <2 x i32> %shl, %shr
148  ret <2 x i32> %r
149}
150
151define <2 x i32> @rotr_v2i32_constant_nonsplat_poison1(<2 x i32> %x) {
152; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison1(
153; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 0>)
154; CHECK-NEXT:    ret <2 x i32> [[R]]
155;
156  %shl = shl <2 x i32> %x, <i32 17, i32 19>
157  %shr = lshr <2 x i32> %x, <i32 15, i32 poison>
158  %r = or <2 x i32> %shl, %shr
159  ret <2 x i32> %r
160}
161
162define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) {
163; CHECK-LABEL: @rotl_v2i36_constant_nonsplat(
164; CHECK-NEXT:    [[R:%.*]] = call <2 x i36> @llvm.fshl.v2i36(<2 x i36> [[X:%.*]], <2 x i36> [[X]], <2 x i36> <i36 21, i36 11>)
165; CHECK-NEXT:    ret <2 x i36> [[R]]
166;
167  %shl = shl <2 x i36> %x, <i36 21, i36 11>
168  %shr = lshr <2 x i36> %x, <i36 15, i36 25>
169  %r = or <2 x i36> %shl, %shr
170  ret <2 x i36> %r
171}
172
173define <3 x i36> @rotl_v3i36_constant_nonsplat_poison0(<3 x i36> %x) {
174; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_poison0(
175; CHECK-NEXT:    [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 poison>)
176; CHECK-NEXT:    ret <3 x i36> [[R]]
177;
178  %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 poison>
179  %shr = lshr <3 x i36> %x, <i36 15, i36 25, i36 poison>
180  %r = or <3 x i36> %shl, %shr
181  ret <3 x i36> %r
182}
183
184; The most basic rotate by variable - no guards for UB due to oversized shifts.
185; This cannot be canonicalized to funnel shift target-independently. The safe
186; expansion includes masking for the shift amount that is not included here,
187; so it could be more expensive.
188
189define i32 @rotl_i32(i32 %x, i32 %y) {
190; CHECK-LABEL: @rotl_i32(
191; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
192; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
193; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
194; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
195; CHECK-NEXT:    ret i32 [[R]]
196;
197  %sub = sub i32 32, %y
198  %shl = shl i32 %x, %y
199  %shr = lshr i32 %x, %sub
200  %r = or i32 %shr, %shl
201  ret i32 %r
202}
203
204; Non-power-of-2 types should follow the same reasoning. Left/right is determined by subtract.
205
206define i37 @rotr_i37(i37 %x, i37 %y) {
207; CHECK-LABEL: @rotr_i37(
208; CHECK-NEXT:    [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
209; CHECK-NEXT:    [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
210; CHECK-NEXT:    [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
211; CHECK-NEXT:    [[R:%.*]] = or disjoint i37 [[SHR]], [[SHL]]
212; CHECK-NEXT:    ret i37 [[R]]
213;
214  %sub = sub i37 37, %y
215  %shl = shl i37 %x, %sub
216  %shr = lshr i37 %x, %y
217  %r = or i37 %shr, %shl
218  ret i37 %r
219}
220
221; Commute 'or' operands.
222
223define i8 @rotr_i8_commute(i8 %x, i8 %y) {
224; CHECK-LABEL: @rotr_i8_commute(
225; CHECK-NEXT:    [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
226; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
227; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
228; CHECK-NEXT:    [[R:%.*]] = or disjoint i8 [[SHL]], [[SHR]]
229; CHECK-NEXT:    ret i8 [[R]]
230;
231  %sub = sub i8 8, %y
232  %shl = shl i8 %x, %sub
233  %shr = lshr i8 %x, %y
234  %r = or i8 %shl, %shr
235  ret i8 %r
236}
237
238; Vector types should follow the same rules.
239
240define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
241; CHECK-LABEL: @rotl_v4i32(
242; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]]
243; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
244; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
245; CHECK-NEXT:    [[R:%.*]] = or disjoint <4 x i32> [[SHL]], [[SHR]]
246; CHECK-NEXT:    ret <4 x i32> [[R]]
247;
248  %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
249  %shl = shl <4 x i32> %x, %y
250  %shr = lshr <4 x i32> %x, %sub
251  %r = or <4 x i32> %shl, %shr
252  ret <4 x i32> %r
253}
254
255; Non-power-of-2 vector types should follow the same rules.
256
257define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
258; CHECK-LABEL: @rotr_v3i42(
259; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]]
260; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
261; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
262; CHECK-NEXT:    [[R:%.*]] = or disjoint <3 x i42> [[SHR]], [[SHL]]
263; CHECK-NEXT:    ret <3 x i42> [[R]]
264;
265  %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
266  %shl = shl <3 x i42> %x, %sub
267  %shr = lshr <3 x i42> %x, %y
268  %r = or <3 x i42> %shr, %shl
269  ret <3 x i42> %r
270}
271
272; This is the canonical pattern for a UB-safe rotate-by-variable with power-of-2-size scalar type.
273; The backend expansion of funnel shift for targets that don't have a rotate instruction should
274; match the original IR, so it is always good to canonicalize to the intrinsics for this pattern.
275
276define i32 @rotl_safe_i32(i32 %x, i32 %y) {
277; CHECK-LABEL: @rotl_safe_i32(
278; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
279; CHECK-NEXT:    ret i32 [[R]]
280;
281  %negy = sub i32 0, %y
282  %ymask = and i32 %y, 31
283  %negymask = and i32 %negy, 31
284  %shl = shl i32 %x, %ymask
285  %shr = lshr i32 %x, %negymask
286  %r = or i32 %shr, %shl
287  ret i32 %r
288}
289
290; Extra uses don't change anything.
291
292define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, ptr %p) {
293; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
294; CHECK-NEXT:    [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
295; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
296; CHECK-NEXT:    store i16 [[NEGYMASK]], ptr [[P:%.*]], align 2
297; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]])
298; CHECK-NEXT:    ret i16 [[R]]
299;
300  %negy = sub i16 0, %y
301  %ymask = and i16 %y, 15
302  %negymask = and i16 %negy, 15
303  store i16 %negymask, ptr %p
304  %shl = shl i16 %x, %ymask
305  %shr = lshr i16 %x, %negymask
306  %r = or i16 %shl, %shr
307  ret i16 %r
308}
309
310; Left/right is determined by the negation.
311
312define i64 @rotr_safe_i64(i64 %x, i64 %y) {
313; CHECK-LABEL: @rotr_safe_i64(
314; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
315; CHECK-NEXT:    ret i64 [[R]]
316;
317  %negy = sub i64 0, %y
318  %ymask = and i64 %y, 63
319  %negymask = and i64 %negy, 63
320  %shl = shl i64 %x, %negymask
321  %shr = lshr i64 %x, %ymask
322  %r = or i64 %shr, %shl
323  ret i64 %r
324}
325
326; Extra uses don't change anything.
327
328define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, ptr %p) {
329; CHECK-LABEL: @rotr_safe_i8_commute_extra_use(
330; CHECK-NEXT:    [[NEGY:%.*]] = sub i8 0, [[Y:%.*]]
331; CHECK-NEXT:    [[YMASK:%.*]] = and i8 [[Y]], 7
332; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i8 [[NEGY]], 7
333; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[NEGYMASK]]
334; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[YMASK]]
335; CHECK-NEXT:    store i8 [[SHR]], ptr [[P:%.*]], align 1
336; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
337; CHECK-NEXT:    ret i8 [[R]]
338;
339  %negy = sub i8 0, %y
340  %ymask = and i8 %y, 7
341  %negymask = and i8 %negy, 7
342  %shl = shl i8 %x, %negymask
343  %shr = lshr i8 %x, %ymask
344  store i8 %shr, ptr %p
345  %r = or i8 %shl, %shr
346  ret i8 %r
347}
348
349; Vectors follow the same rules.
350
351define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
352; CHECK-LABEL: @rotl_safe_v2i32(
353; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]])
354; CHECK-NEXT:    ret <2 x i32> [[R]]
355;
356  %negy = sub <2 x i32> zeroinitializer, %y
357  %ymask = and <2 x i32> %y, <i32 31, i32 31>
358  %negymask = and <2 x i32> %negy, <i32 31, i32 31>
359  %shl = shl <2 x i32> %x, %ymask
360  %shr = lshr <2 x i32> %x, %negymask
361  %r = or <2 x i32> %shr, %shl
362  ret <2 x i32> %r
363}
364
365; Vectors follow the same rules.
366
367define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
368; CHECK-LABEL: @rotr_safe_v3i16(
369; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]])
370; CHECK-NEXT:    ret <3 x i16> [[R]]
371;
372  %negy = sub <3 x i16> zeroinitializer, %y
373  %ymask = and <3 x i16> %y, <i16 15, i16 15, i16 15>
374  %negymask = and <3 x i16> %negy, <i16 15, i16 15, i16 15>
375  %shl = shl <3 x i16> %x, %negymask
376  %shr = lshr <3 x i16> %x, %ymask
377  %r = or <3 x i16> %shr, %shl
378  ret <3 x i16> %r
379}
380
381; These are optionally UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
382; See PR34046, PR16726, and PR39624 for motivating examples:
383; https://bugs.llvm.org/show_bug.cgi?id=34046
384; https://bugs.llvm.org/show_bug.cgi?id=16726
385; https://bugs.llvm.org/show_bug.cgi?id=39624
386
387define i16 @rotate_left_16bit(i16 %v, i32 %shift) {
388; CHECK-LABEL: @rotate_left_16bit(
389; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i16
390; CHECK-NEXT:    [[CONV2:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
391; CHECK-NEXT:    ret i16 [[CONV2]]
392;
393  %and = and i32 %shift, 15
394  %conv = zext i16 %v to i32
395  %shl = shl i32 %conv, %and
396  %sub = sub i32 16, %and
397  %shr = lshr i32 %conv, %sub
398  %or = or i32 %shr, %shl
399  %conv2 = trunc i32 %or to i16
400  ret i16 %conv2
401}
402
403; Commute the 'or' operands and try a vector type.
404
405define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) {
406; CHECK-LABEL: @rotate_left_commute_16bit_vec(
407; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[SHIFT:%.*]] to <2 x i16>
408; CHECK-NEXT:    [[CONV2:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[V:%.*]], <2 x i16> [[V]], <2 x i16> [[TMP1]])
409; CHECK-NEXT:    ret <2 x i16> [[CONV2]]
410;
411  %and = and <2 x i32> %shift, <i32 15, i32 15>
412  %conv = zext <2 x i16> %v to <2 x i32>
413  %shl = shl <2 x i32> %conv, %and
414  %sub = sub <2 x i32> <i32 16, i32 16>, %and
415  %shr = lshr <2 x i32> %conv, %sub
416  %or = or <2 x i32> %shl, %shr
417  %conv2 = trunc <2 x i32> %or to <2 x i16>
418  ret <2 x i16> %conv2
419}
420
421; Change the size, rotation direction (the subtract is on the left-shift), and mask op.
422
423define i8 @rotate_right_8bit(i8 %v, i3 %shift) {
424; CHECK-LABEL: @rotate_right_8bit(
425; CHECK-NEXT:    [[TMP1:%.*]] = zext i3 [[SHIFT:%.*]] to i8
426; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
427; CHECK-NEXT:    ret i8 [[CONV2]]
428;
429  %and = zext i3 %shift to i32
430  %conv = zext i8 %v to i32
431  %shr = lshr i32 %conv, %and
432  %sub = sub i32 8, %and
433  %shl = shl i32 %conv, %sub
434  %or = or i32 %shl, %shr
435  %conv2 = trunc i32 %or to i8
436  ret i8 %conv2
437}
438
439; The right-shifted value does not need to be a zexted value; here it is masked.
440; The shift mask could be less than the bitwidth, but this is still ok.
441
442define i8 @rotate_right_commute_8bit_unmasked_shl(i32 %v, i32 %shift) {
443; CHECK-LABEL: @rotate_right_commute_8bit_unmasked_shl(
444; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
445; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
446; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
447; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[V]] to i8
448; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
449; CHECK-NEXT:    ret i8 [[CONV2]]
450;
451  %and = and i32 %shift, 3
452  %conv = and i32 %v, 255
453  %shr = lshr i32 %conv, %and
454  %sub = sub i32 8, %and
455  %shl = shl i32 %conv, %sub
456  %or = or i32 %shr, %shl
457  %conv2 = trunc i32 %or to i8
458  ret i8 %conv2
459}
460
461; The left-shifted value does not need to be masked at all.
462
463define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
464; CHECK-LABEL: @rotate_right_commute_8bit(
465; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
466; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
467; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
468; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[V]] to i8
469; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP4]], i8 [[TMP2]])
470; CHECK-NEXT:    ret i8 [[CONV2]]
471;
472  %and = and i32 %shift, 3
473  %conv = and i32 %v, 255
474  %shr = lshr i32 %conv, %and
475  %sub = sub i32 8, %and
476  %shl = shl i32 %v, %sub
477  %or = or i32 %shr, %shl
478  %conv2 = trunc i32 %or to i8
479  ret i8 %conv2
480}
481
482; If the original source does not mask the shift amount,
483; we still do the transform by adding masks to make it safe.
484
485define i8 @rotate8_not_safe(i8 %v, i32 %shamt) {
486; CHECK-LABEL: @rotate8_not_safe(
487; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
488; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
489; CHECK-NEXT:    ret i8 [[RET]]
490;
491  %conv = zext i8 %v to i32
492  %sub = sub i32 8, %shamt
493  %shr = lshr i32 %conv, %sub
494  %shl = shl i32 %conv, %shamt
495  %or = or i32 %shr, %shl
496  %ret = trunc i32 %or to i8
497  ret i8 %ret
498}
499
500; A non-power-of-2 destination type can't be masked as above.
501
502define i9 @rotate9_not_safe(i9 %v, i32 %shamt) {
503; CHECK-LABEL: @rotate9_not_safe(
504; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i32
505; CHECK-NEXT:    [[SUB:%.*]] = sub i32 9, [[SHAMT:%.*]]
506; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[SUB]]
507; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[SHAMT]]
508; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
509; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i9
510; CHECK-NEXT:    ret i9 [[RET]]
511;
512  %conv = zext i9 %v to i32
513  %sub = sub i32 9, %shamt
514  %shr = lshr i32 %conv, %sub
515  %shl = shl i32 %conv, %shamt
516  %or = or i32 %shr, %shl
517  %ret = trunc i32 %or to i9
518  ret i9 %ret
519}
520
521; We should narrow (v << (s & 15)) | (v >> (-s & 15))
522; when both v and s have been promoted.
523
524define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) {
525; CHECK-LABEL: @rotateleft_16_neg_mask(
526; CHECK-NEXT:    [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
527; CHECK-NEXT:    ret i16 [[OR]]
528;
529  %neg = sub i16 0, %shamt
530  %lshamt = and i16 %shamt, 15
531  %lshamtconv = zext i16 %lshamt to i32
532  %rshamt = and i16 %neg, 15
533  %rshamtconv = zext i16 %rshamt to i32
534  %conv = zext i16 %v to i32
535  %shl = shl i32 %conv, %lshamtconv
536  %shr = lshr i32 %conv, %rshamtconv
537  %or = or i32 %shr, %shl
538  %ret = trunc i32 %or to i16
539  ret i16 %ret
540}
541
542define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) {
543; CHECK-LABEL: @rotateleft_16_neg_mask_commute(
544; CHECK-NEXT:    [[OR:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
545; CHECK-NEXT:    ret i16 [[OR]]
546;
547  %neg = sub i16 0, %shamt
548  %lshamt = and i16 %shamt, 15
549  %lshamtconv = zext i16 %lshamt to i32
550  %rshamt = and i16 %neg, 15
551  %rshamtconv = zext i16 %rshamt to i32
552  %conv = zext i16 %v to i32
553  %shl = shl i32 %conv, %lshamtconv
554  %shr = lshr i32 %conv, %rshamtconv
555  %or = or i32 %shl, %shr
556  %ret = trunc i32 %or to i16
557  ret i16 %ret
558}
559
560define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) {
561; CHECK-LABEL: @rotateright_8_neg_mask(
562; CHECK-NEXT:    [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
563; CHECK-NEXT:    ret i8 [[OR]]
564;
565  %neg = sub i8 0, %shamt
566  %rshamt = and i8 %shamt, 7
567  %rshamtconv = zext i8 %rshamt to i32
568  %lshamt = and i8 %neg, 7
569  %lshamtconv = zext i8 %lshamt to i32
570  %conv = zext i8 %v to i32
571  %shl = shl i32 %conv, %lshamtconv
572  %shr = lshr i32 %conv, %rshamtconv
573  %or = or i32 %shr, %shl
574  %ret = trunc i32 %or to i8
575  ret i8 %ret
576}
577
578define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) {
579; CHECK-LABEL: @rotateright_8_neg_mask_commute(
580; CHECK-NEXT:    [[OR:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
581; CHECK-NEXT:    ret i8 [[OR]]
582;
583  %neg = sub i8 0, %shamt
584  %rshamt = and i8 %shamt, 7
585  %rshamtconv = zext i8 %rshamt to i32
586  %lshamt = and i8 %neg, 7
587  %lshamtconv = zext i8 %lshamt to i32
588  %conv = zext i8 %v to i32
589  %shl = shl i32 %conv, %lshamtconv
590  %shr = lshr i32 %conv, %rshamtconv
591  %or = or i32 %shl, %shr
592  %ret = trunc i32 %or to i8
593  ret i8 %ret
594}
595
596; The shift amount may already be in the wide type,
597; so we need to truncate it going into the rotate pattern.
598
599define i16 @rotateright_16_neg_mask_wide_amount(i16 %v, i32 %shamt) {
600; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount(
601; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
602; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
603; CHECK-NEXT:    ret i16 [[RET]]
604;
605  %neg = sub i32 0, %shamt
606  %rshamt = and i32 %shamt, 15
607  %lshamt = and i32 %neg, 15
608  %conv = zext i16 %v to i32
609  %shl = shl i32 %conv, %lshamt
610  %shr = lshr i32 %conv, %rshamt
611  %or = or i32 %shr, %shl
612  %ret = trunc i32 %or to i16
613  ret i16 %ret
614}
615
616define i16 @rotateright_16_neg_mask_wide_amount_commute(i16 %v, i32 %shamt) {
617; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount_commute(
618; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
619; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
620; CHECK-NEXT:    ret i16 [[RET]]
621;
622  %neg = sub i32 0, %shamt
623  %rshamt = and i32 %shamt, 15
624  %lshamt = and i32 %neg, 15
625  %conv = zext i16 %v to i32
626  %shl = shl i32 %conv, %lshamt
627  %shr = lshr i32 %conv, %rshamt
628  %or = or i32 %shl, %shr
629  %ret = trunc i32 %or to i16
630  ret i16 %ret
631}
632
633define i64 @rotateright_64_zext_neg_mask_amount(i64 %0, i32 %1) {
634; CHECK-LABEL: @rotateright_64_zext_neg_mask_amount(
635; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64
636; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]])
637; CHECK-NEXT:    ret i64 [[TMP4]]
638;
639  %3 = and i32 %1, 63
640  %4 = zext i32 %3 to i64
641  %5 = lshr i64 %0, %4
642  %6 = sub nsw i32 0, %1
643  %7 = and i32 %6, 63
644  %8 = zext i32 %7 to i64
645  %9 = shl i64 %0, %8
646  %10 = or i64 %5, %9
647  ret i64 %10
648}
649
650define i8 @rotateleft_8_neg_mask_wide_amount(i8 %v, i32 %shamt) {
651; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount(
652; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
653; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
654; CHECK-NEXT:    ret i8 [[RET]]
655;
656  %neg = sub i32 0, %shamt
657  %lshamt = and i32 %shamt, 7
658  %rshamt = and i32 %neg, 7
659  %conv = zext i8 %v to i32
660  %shl = shl i32 %conv, %lshamt
661  %shr = lshr i32 %conv, %rshamt
662  %or = or i32 %shr, %shl
663  %ret = trunc i32 %or to i8
664  ret i8 %ret
665}
666
667define i8 @rotateleft_8_neg_mask_wide_amount_commute(i8 %v, i32 %shamt) {
668; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount_commute(
669; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
670; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
671; CHECK-NEXT:    ret i8 [[RET]]
672;
673  %neg = sub i32 0, %shamt
674  %lshamt = and i32 %shamt, 7
675  %rshamt = and i32 %neg, 7
676  %conv = zext i8 %v to i32
677  %shl = shl i32 %conv, %lshamt
678  %shr = lshr i32 %conv, %rshamt
679  %or = or i32 %shl, %shr
680  %ret = trunc i32 %or to i8
681  ret i8 %ret
682}
683
684define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) {
685; CHECK-LABEL: @rotateleft_64_zext_neg_mask_amount(
686; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP1:%.*]] to i64
687; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP3]])
688; CHECK-NEXT:    ret i64 [[TMP4]]
689;
690  %3 = and i32 %1, 63
691  %4 = zext i32 %3 to i64
692  %5 = shl i64 %0, %4
693  %6 = sub nsw i32 0, %1
694  %7 = and i32 %6, 63
695  %8 = zext i32 %7 to i64
696  %9 = lshr i64 %0, %8
697  %10 = or i64 %5, %9
698  ret i64 %10
699}
700
701; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
702
703define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
704; CHECK-LABEL: @rotateleft_9_neg_mask_wide_amount_commute(
705; CHECK-NEXT:    [[NEG:%.*]] = sub i33 0, [[SHAMT:%.*]]
706; CHECK-NEXT:    [[LSHAMT:%.*]] = and i33 [[SHAMT]], 8
707; CHECK-NEXT:    [[RSHAMT:%.*]] = and i33 [[NEG]], 8
708; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i33
709; CHECK-NEXT:    [[SHL:%.*]] = shl nuw nsw i33 [[CONV]], [[LSHAMT]]
710; CHECK-NEXT:    [[SHR:%.*]] = lshr i33 [[CONV]], [[RSHAMT]]
711; CHECK-NEXT:    [[OR:%.*]] = or i33 [[SHL]], [[SHR]]
712; CHECK-NEXT:    [[RET:%.*]] = trunc i33 [[OR]] to i9
713; CHECK-NEXT:    ret i9 [[RET]]
714;
715  %neg = sub i33 0, %shamt
716  %lshamt = and i33 %shamt, 8
717  %rshamt = and i33 %neg, 8
718  %conv = zext i9 %v to i33
719  %shl = shl i33 %conv, %lshamt
720  %shr = lshr i33 %conv, %rshamt
721  %or = or i33 %shl, %shr
722  %ret = trunc i33 %or to i9
723  ret i9 %ret
724}
725
726; Fold or(shl(v,x),lshr(v,bw-x)) iff x < bw
727
728define i64 @rotl_sub_mask(i64 %0, i64 %1) {
729; CHECK-LABEL: @rotl_sub_mask(
730; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
731; CHECK-NEXT:    ret i64 [[TMP3]]
732;
733  %3 = and i64 %1, 63
734  %4 = shl i64 %0, %3
735  %5 = sub nuw nsw i64 64, %3
736  %6 = lshr i64 %0, %5
737  %7 = or i64 %6, %4
738  ret i64 %7
739}
740
741; Fold or(lshr(v,x),shl(v,bw-x)) iff x < bw
742
743define i64 @rotr_sub_mask(i64 %0, i64 %1) {
744; CHECK-LABEL: @rotr_sub_mask(
745; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
746; CHECK-NEXT:    ret i64 [[TMP3]]
747;
748  %3 = and i64 %1, 63
749  %4 = lshr i64 %0, %3
750  %5 = sub nuw nsw i64 64, %3
751  %6 = shl i64 %0, %5
752  %7 = or i64 %6, %4
753  ret i64 %7
754}
755
756define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) {
757; CHECK-LABEL: @rotr_sub_mask_vector(
758; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]])
759; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
760;
761  %3 = and <2 x i64> %1, <i64 63, i64 63>
762  %4 = lshr <2 x i64> %0, %3
763  %5 = sub nuw nsw <2 x i64> <i64 64, i64 64>, %3
764  %6 = shl <2 x i64> %0, %5
765  %7 = or <2 x i64> %6, %4
766  ret <2 x i64> %7
767}
768
769; Convert select pattern to masked shift that ends in 'or'.
770
771define i32 @rotr_select(i32 %x, i32 %shamt) {
772; CHECK-LABEL: @rotr_select(
773; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
774; CHECK-NEXT:    ret i32 [[R]]
775;
776  %cmp = icmp eq i32 %shamt, 0
777  %sub = sub i32 32, %shamt
778  %shr = lshr i32 %x, %shamt
779  %shl = shl i32 %x, %sub
780  %or = or i32 %shr, %shl
781  %r = select i1 %cmp, i32 %x, i32 %or
782  ret i32 %r
783}
784
785; Convert select pattern to masked shift that ends in 'or'.
786
787define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
788; CHECK-LABEL: @rotr_select_commute(
789; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
790; CHECK-NEXT:    ret i8 [[R]]
791;
792  %cmp = icmp eq i8 %shamt, 0
793  %sub = sub i8 8, %shamt
794  %shr = lshr i8 %x, %shamt
795  %shl = shl i8 %x, %sub
796  %or = or i8 %shl, %shr
797  %r = select i1 %cmp, i8 %x, i8 %or
798  ret i8 %r
799}
800
801; Convert select pattern to masked shift that ends in 'or'.
802
803define i16 @rotl_select(i16 %x, i16 %shamt) {
804; CHECK-LABEL: @rotl_select(
805; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
806; CHECK-NEXT:    ret i16 [[R]]
807;
808  %cmp = icmp eq i16 %shamt, 0
809  %sub = sub i16 16, %shamt
810  %shr = lshr i16 %x, %sub
811  %shl = shl i16 %x, %shamt
812  %or = or i16 %shr, %shl
813  %r = select i1 %cmp, i16 %x, i16 %or
814  ret i16 %r
815}
816
817; Convert select pattern to masked shift that ends in 'or'.
818
819define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
820; CHECK-LABEL: @rotl_select_commute(
821; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
822; CHECK-NEXT:    ret <2 x i64> [[R]]
823;
824  %cmp = icmp eq <2 x i64> %shamt, zeroinitializer
825  %sub = sub <2 x i64> <i64 64, i64 64>, %shamt
826  %shr = lshr <2 x i64> %x, %sub
827  %shl = shl <2 x i64> %x, %shamt
828  %or = or <2 x i64> %shl, %shr
829  %r = select <2 x i1> %cmp, <2 x i64> %x, <2 x i64> %or
830  ret <2 x i64> %r
831}
832
833; Negative test - the transform is only valid with power-of-2 types.
834
835define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
836; CHECK-LABEL: @rotl_select_weird_type(
837; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i24 [[SHAMT:%.*]], 0
838; CHECK-NEXT:    [[SUB:%.*]] = sub i24 24, [[SHAMT]]
839; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
840; CHECK-NEXT:    [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
841; CHECK-NEXT:    [[OR:%.*]] = or disjoint i24 [[SHL]], [[SHR]]
842; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
843; CHECK-NEXT:    ret i24 [[R]]
844;
845  %cmp = icmp eq i24 %shamt, 0
846  %sub = sub i24 24, %shamt
847  %shr = lshr i24 %x, %sub
848  %shl = shl i24 %x, %shamt
849  %or = or i24 %shl, %shr
850  %r = select i1 %cmp, i24 %x, i24 %or
851  ret i24 %r
852}
853
854define i32 @rotl_select_zext_shamt(i32 %x, i8 %y) {
855; CHECK-LABEL: @rotl_select_zext_shamt(
856; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[Y:%.*]] to i32
857; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[TMP1]])
858; CHECK-NEXT:    ret i32 [[R]]
859;
860  %rem = and i8 %y, 31
861  %cmp = icmp eq i8 %rem, 0
862  %sh_prom = zext i8 %rem to i32
863  %sub = sub nuw nsw i8 32, %rem
864  %sh_prom1 = zext i8 %sub to i32
865  %shr = lshr i32 %x, %sh_prom1
866  %shl = shl i32 %x, %sh_prom
867  %or = or i32 %shl, %shr
868  %r = select i1 %cmp, i32 %x, i32 %or
869  ret i32 %r
870}
871
872define i64 @rotr_select_zext_shamt(i64 %x, i32 %y) {
873; CHECK-LABEL: @rotr_select_zext_shamt(
874; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[Y:%.*]] to i64
875; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[TMP1]])
876; CHECK-NEXT:    ret i64 [[R]]
877;
878  %rem = and i32 %y, 63
879  %cmp = icmp eq i32 %rem, 0
880  %sh_prom = zext i32 %rem to i64
881  %shr = lshr i64 %x, %sh_prom
882  %sub = sub nuw nsw i32 64, %rem
883  %sh_prom1 = zext i32 %sub to i64
884  %shl = shl i64 %x, %sh_prom1
885  %or = or i64 %shl, %shr
886  %r = select i1 %cmp, i64 %x, i64 %or
887  ret i64 %r
888}
889
890; Test that the transform doesn't crash when there's an "or" with a ConstantExpr operand.
891
892@external_global = external global i8
893
894define i32 @rotl_constant_expr(i32 %shamt) {
895; CHECK-LABEL: @rotl_constant_expr(
896; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 ptrtoint (ptr @external_global to i32), [[SHAMT:%.*]]
897; CHECK-NEXT:    [[SHL:%.*]] = shl i32 ptrtoint (ptr @external_global to i32), 11
898; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
899; CHECK-NEXT:    ret i32 [[R]]
900;
901  %shr = lshr i32 ptrtoint (ptr @external_global to i32), %shamt
902  %shl = shl i32 ptrtoint (ptr @external_global to i32), 11
903  %r = or i32 %shr, %shl
904  ret i32 %r
905}
906
907; PR20750 - https://bugs.llvm.org/show_bug.cgi?id=20750
908; This IR corresponds to C source where the shift amount is a smaller type than the rotated value:
909; unsigned int rotate32_doubleand1(unsigned int v, unsigned char r) { r = r & 31; return (v << r) | (v >> (((32 - r)) & 31)); }
910
911define i32 @rotateleft32_doubleand1(i32 %v, i8 %r) {
912; CHECK-LABEL: @rotateleft32_doubleand1(
913; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[R:%.*]] to i32
914; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
915; CHECK-NEXT:    ret i32 [[OR]]
916;
917  %m = and i8 %r, 31
918  %z = zext i8 %m to i32
919  %neg = sub nsw i32 0, %z
920  %and2 = and i32 %neg, 31
921  %shl = shl i32 %v, %z
922  %shr = lshr i32 %v, %and2
923  %or = or i32 %shr, %shl
924  ret i32 %or
925}
926
927define i32 @rotateright32_doubleand1(i32 %v, i16 %r) {
928; CHECK-LABEL: @rotateright32_doubleand1(
929; CHECK-NEXT:    [[Z:%.*]] = zext i16 [[R:%.*]] to i32
930; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[V:%.*]], i32 [[V]], i32 [[Z]])
931; CHECK-NEXT:    ret i32 [[OR]]
932;
933  %m = and i16 %r, 31
934  %z = zext i16 %m to i32
935  %neg = sub nsw i32 0, %z
936  %and2 = and i32 %neg, 31
937  %shl = shl i32 %v, %and2
938  %shr = lshr i32 %v, %z
939  %or = or i32 %shr, %shl
940  ret i32 %or
941}
942
943; TODO: This should be a rotate (funnel-shift).
944
945define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %shamt) {
946; CHECK-LABEL: @unmasked_shlop_unmasked_shift_amount(
947; CHECK-NEXT:    [[MASKX:%.*]] = and i32 [[X:%.*]], 255
948; CHECK-NEXT:    [[T4:%.*]] = sub i32 8, [[SHAMT:%.*]]
949; CHECK-NEXT:    [[T5:%.*]] = shl i32 [[X]], [[T4]]
950; CHECK-NEXT:    [[T6:%.*]] = lshr i32 [[MASKX]], [[SHAMT]]
951; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T5]], [[T6]]
952; CHECK-NEXT:    [[T8:%.*]] = trunc i32 [[T7]] to i8
953; CHECK-NEXT:    ret i8 [[T8]]
954;
955  %maskx = and i32 %x, 255
956  %t4 = sub i32 8, %shamt
957  %t5 = shl i32 %x, %t4
958  %t6 = lshr i32 %maskx, %shamt
959  %t7 = or i32 %t5, %t6
960  %t8 = trunc i32 %t7 to i8
961  ret i8 %t8
962}
963
964define i16 @check_rotate_masked_16bit(i8 %shamt, i32 %cond) {
965; CHECK-LABEL: @check_rotate_masked_16bit(
966; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[SHAMT:%.*]] to i16
967; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[COND:%.*]] to i16
968; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 1
969; CHECK-NEXT:    [[TRUNC:%.*]] = call i16 @llvm.fshr.i16(i16 [[TMP3]], i16 [[TMP3]], i16 [[TMP1]])
970; CHECK-NEXT:    ret i16 [[TRUNC]]
971;
972  %maskx = and i32 %cond, 1
973  %masky = and i8 %shamt, 15
974  %z = zext i8 %masky to i32
975  %shr = lshr i32 %maskx, %z
976  %sub = sub i8 0, %shamt
977  %maskw = and i8 %sub, 15
978  %z2 = zext i8 %maskw to i32
979  %shl = shl nuw nsw i32 %maskx, %z2
980  %or = or i32 %shr, %shl
981  %trunc = trunc i32 %or to i16
982  ret i16 %trunc
983}
984
985define i32 @rotl_i32_add(i32 %x, i32 %y) {
986; CHECK-LABEL: @rotl_i32_add(
987; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
988; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
989; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
990; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
991; CHECK-NEXT:    ret i32 [[R]]
992;
993  %sub = sub i32 32, %y
994  %shl = shl i32 %x, %y
995  %shr = lshr i32 %x, %sub
996  %r = add i32 %shr, %shl
997  ret i32 %r
998}
999
1000define i32 @rotr_i32_add(i32 %x, i32 %y) {
1001; CHECK-LABEL: @rotr_i32_add(
1002; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
1003; CHECK-NEXT:    [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]]
1004; CHECK-NEXT:    [[SHR:%.*]] = shl i32 [[X]], [[SUB]]
1005; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1006; CHECK-NEXT:    ret i32 [[R]]
1007;
1008  %sub = sub i32 32, %y
1009  %shl = lshr i32 %x, %y
1010  %shr = shl i32 %x, %sub
1011  %r = add i32 %shr, %shl
1012  ret i32 %r
1013}
1014
1015define i32 @fshr_i32_add(i32 %x, i32 %y, i32 %z) {
1016; CHECK-LABEL: @fshr_i32_add(
1017; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Z:%.*]]
1018; CHECK-NEXT:    [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Z]]
1019; CHECK-NEXT:    [[SHR:%.*]] = shl i32 [[Y:%.*]], [[SUB]]
1020; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1021; CHECK-NEXT:    ret i32 [[R]]
1022;
1023  %sub = sub i32 32, %z
1024  %shl = lshr i32 %x, %z
1025  %shr = shl i32 %y, %sub
1026  %r = add i32 %shr, %shl
1027  ret i32 %r
1028}
1029
1030define i32 @fshl_i32_add(i32 %x, i32 %y, i32 %z) {
1031; CHECK-LABEL: @fshl_i32_add(
1032; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Z:%.*]]
1033; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[Y:%.*]], [[Z]]
1034; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X:%.*]], [[SUB]]
1035; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1036; CHECK-NEXT:    ret i32 [[R]]
1037;
1038  %sub = sub i32 32, %z
1039  %shl = shl i32 %y, %z
1040  %shr = lshr i32 %x, %sub
1041  %r = add i32 %shr, %shl
1042  ret i32 %r
1043}
1044
1045define i32 @rotl_i32_add_greater(i32 %x, i32 %y) {
1046; CHECK-LABEL: @rotl_i32_add_greater(
1047; CHECK-NEXT:    [[SUB:%.*]] = sub i32 33, [[Y:%.*]]
1048; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
1049; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
1050; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1051; CHECK-NEXT:    ret i32 [[R]]
1052;
1053  %sub = sub i32 33, %y
1054  %shl = shl i32 %x, %y
1055  %shr = lshr i32 %x, %sub
1056  %r = add i32 %shr, %shl
1057  ret i32 %r
1058}
1059
1060define i32 @rotr_i32_add_greater(i32 %x, i32 %y) {
1061; CHECK-LABEL: @rotr_i32_add_greater(
1062; CHECK-NEXT:    [[SUB:%.*]] = sub i32 34, [[Y:%.*]]
1063; CHECK-NEXT:    [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]]
1064; CHECK-NEXT:    [[SHR:%.*]] = shl i32 [[X]], [[SUB]]
1065; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1066; CHECK-NEXT:    ret i32 [[R]]
1067;
1068  %sub = sub i32 34, %y
1069  %shl = lshr i32 %x, %y
1070  %shr = shl i32 %x, %sub
1071  %r = add i32 %shr, %shl
1072  ret i32 %r
1073}
1074
1075define i32 @not_rotl_i32_add_less(i32 %x, i32 %y) {
1076; CHECK-LABEL: @not_rotl_i32_add_less(
1077; CHECK-NEXT:    [[SUB:%.*]] = sub i32 31, [[Y:%.*]]
1078; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
1079; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
1080; CHECK-NEXT:    [[R:%.*]] = add i32 [[SHR]], [[SHL]]
1081; CHECK-NEXT:    ret i32 [[R]]
1082;
1083  %sub = sub i32 31, %y
1084  %shl = shl i32 %x, %y
1085  %shr = lshr i32 %x, %sub
1086  %r = add i32 %shr, %shl
1087  ret i32 %r
1088}
1089