xref: /llvm-project/llvm/test/Transforms/InstCombine/fsh.ll (revision 2131115be5b9d8b39af80973d9b64c0adc41d38d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4declare i16 @llvm.fshl.i16(i16, i16, i16)
5declare i16 @llvm.fshr.i16(i16, i16, i16)
6declare i32 @llvm.fshl.i32(i32, i32, i32)
7declare i33 @llvm.fshr.i33(i33, i33, i33)
8declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
9declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
10declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
11
12declare void @use_v2(<2 x i31>)
13declare void @use_v3(<3 x i16>)
14
15; If the shift mask doesn't include any demanded bits, the funnel shift can be eliminated.
16
17define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) {
18; CHECK-LABEL: @fshl_mask_simplify1(
19; CHECK-NEXT:    ret i32 [[X:%.*]]
20;
21  %maskedsh = and i32 %sh, 32
22  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
23  ret i32 %r
24}
25
26define <2 x i32> @fshr_mask_simplify2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
27; CHECK-LABEL: @fshr_mask_simplify2(
28; CHECK-NEXT:    ret <2 x i32> [[Y:%.*]]
29;
30  %maskedsh = and <2 x i32> %sh, <i32 64, i32 64>
31  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %maskedsh)
32  ret <2 x i32> %r
33}
34
35; Negative test.
36
37define i32 @fshl_mask_simplify3(i32 %x, i32 %y, i32 %sh) {
38; CHECK-LABEL: @fshl_mask_simplify3(
39; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 16
40; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
41; CHECK-NEXT:    ret i32 [[R]]
42;
43  %maskedsh = and i32 %sh, 16
44  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
45  ret i32 %r
46}
47
48; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
49
50define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) {
51; CHECK-LABEL: @fshr_mask_simplify1(
52; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 64
53; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]])
54; CHECK-NEXT:    ret i33 [[R]]
55;
56  %maskedsh = and i33 %sh, 64
57  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh)
58  ret i33 %r
59}
60
61; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
62
63define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
64; CHECK-LABEL: @fshl_mask_simplify2(
65; CHECK-NEXT:    [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], splat (i31 32)
66; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[MASKEDSH]])
67; CHECK-NEXT:    ret <2 x i31> [[R]]
68;
69  %maskedsh = and <2 x i31> %sh, <i31 32, i31 32>
70  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %maskedsh)
71  ret <2 x i31> %r
72}
73
74; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
75
76define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) {
77; CHECK-LABEL: @fshr_mask_simplify3(
78; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 32
79; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]])
80; CHECK-NEXT:    ret i33 [[R]]
81;
82  %maskedsh = and i33 %sh, 32
83  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh)
84  ret i33 %r
85}
86
87; This mask op is unnecessary.
88
89define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) {
90; CHECK-LABEL: @fshl_mask_not_required(
91; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[SH:%.*]])
92; CHECK-NEXT:    ret i32 [[R]]
93;
94  %maskedsh = and i32 %sh, 31
95  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
96  ret i32 %r
97}
98
99; This mask op can be reduced.
100
101define i32 @fshl_mask_reduce_constant(i32 %x, i32 %y, i32 %sh) {
102; CHECK-LABEL: @fshl_mask_reduce_constant(
103; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 1
104; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
105; CHECK-NEXT:    ret i32 [[R]]
106;
107  %maskedsh = and i32 %sh, 33
108  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
109  ret i32 %r
110}
111
112; But this mask op is required.
113
114define i32 @fshl_mask_negative(i32 %x, i32 %y, i32 %sh) {
115; CHECK-LABEL: @fshl_mask_negative(
116; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 15
117; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
118; CHECK-NEXT:    ret i32 [[R]]
119;
120  %maskedsh = and i32 %sh, 15
121  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
122  ret i32 %r
123}
124
125; The transform is not limited to mask ops.
126
127define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
128; CHECK-LABEL: @fshr_set_but_not_demanded_vec(
129; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[SH:%.*]])
130; CHECK-NEXT:    ret <2 x i32> [[R]]
131;
132  %bogusbits = or <2 x i32> %sh, <i32 32, i32 32>
133  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %bogusbits)
134  ret <2 x i32> %r
135}
136
137; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
138
139define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
140; CHECK-LABEL: @fshl_set_but_not_demanded_vec(
141; CHECK-NEXT:    [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], splat (i31 32)
142; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[BOGUSBITS]])
143; CHECK-NEXT:    ret <2 x i31> [[R]]
144;
145  %bogusbits = or <2 x i31> %sh, <i31 32, i31 32>
146  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %bogusbits)
147  ret <2 x i31> %r
148}
149
150; Simplify one undef or zero operand and constant shift amount.
151
152define i32 @fshl_op0_undef(i32 %x) {
153; CHECK-LABEL: @fshl_op0_undef(
154; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
155; CHECK-NEXT:    ret i32 [[R]]
156;
157  %r = call i32 @llvm.fshl.i32(i32 undef, i32 %x, i32 7)
158  ret i32 %r
159}
160
161define i32 @fshl_op0_zero(i32 %x) {
162; CHECK-LABEL: @fshl_op0_zero(
163; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
164; CHECK-NEXT:    ret i32 [[R]]
165;
166  %r = call i32 @llvm.fshl.i32(i32 0, i32 %x, i32 7)
167  ret i32 %r
168}
169
170define i33 @fshr_op0_undef(i33 %x) {
171; CHECK-LABEL: @fshr_op0_undef(
172; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
173; CHECK-NEXT:    ret i33 [[R]]
174;
175  %r = call i33 @llvm.fshr.i33(i33 undef, i33 %x, i33 7)
176  ret i33 %r
177}
178
179define i33 @fshr_op0_zero(i33 %x) {
180; CHECK-LABEL: @fshr_op0_zero(
181; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
182; CHECK-NEXT:    ret i33 [[R]]
183;
184  %r = call i33 @llvm.fshr.i33(i33 0, i33 %x, i33 7)
185  ret i33 %r
186}
187
188define i32 @fshl_op1_undef(i32 %x) {
189; CHECK-LABEL: @fshl_op1_undef(
190; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
191; CHECK-NEXT:    ret i32 [[R]]
192;
193  %r = call i32 @llvm.fshl.i32(i32 %x, i32 undef, i32 7)
194  ret i32 %r
195}
196
197define i32 @fshl_op1_zero(i32 %x) {
198; CHECK-LABEL: @fshl_op1_zero(
199; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
200; CHECK-NEXT:    ret i32 [[R]]
201;
202  %r = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 7)
203  ret i32 %r
204}
205
206define i33 @fshr_op1_undef(i33 %x) {
207; CHECK-LABEL: @fshr_op1_undef(
208; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
209; CHECK-NEXT:    ret i33 [[R]]
210;
211  %r = call i33 @llvm.fshr.i33(i33 %x, i33 undef, i33 7)
212  ret i33 %r
213}
214
215define i33 @fshr_op1_zero(i33 %x) {
216; CHECK-LABEL: @fshr_op1_zero(
217; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
218; CHECK-NEXT:    ret i33 [[R]]
219;
220  %r = call i33 @llvm.fshr.i33(i33 %x, i33 0, i33 7)
221  ret i33 %r
222}
223
224define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) {
225; CHECK-LABEL: @fshl_op0_zero_splat_vec(
226; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i31> [[X:%.*]], splat (i31 24)
227; CHECK-NEXT:    ret <2 x i31> [[R]]
228;
229  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 7, i31 7>)
230  ret <2 x i31> %r
231}
232
233define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) {
234; CHECK-LABEL: @fshl_op1_undef_splat_vec(
235; CHECK-NEXT:    [[R:%.*]] = shl <2 x i31> [[X:%.*]], splat (i31 7)
236; CHECK-NEXT:    ret <2 x i31> [[R]]
237;
238  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 7, i31 7>)
239  ret <2 x i31> %r
240}
241
242define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) {
243; CHECK-LABEL: @fshr_op0_undef_splat_vec(
244; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 7)
245; CHECK-NEXT:    ret <2 x i32> [[R]]
246;
247  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 7, i32 7>)
248  ret <2 x i32> %r
249}
250
251define <2 x i32> @fshr_op1_zero_splat_vec(<2 x i32> %x) {
252; CHECK-LABEL: @fshr_op1_zero_splat_vec(
253; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 25)
254; CHECK-NEXT:    ret <2 x i32> [[R]]
255;
256  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 7, i32 7>)
257  ret <2 x i32> %r
258}
259
260define <2 x i31> @fshl_op0_zero_vec(<2 x i31> %x) {
261; CHECK-LABEL: @fshl_op0_zero_vec(
262; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 30, i31 29>
263; CHECK-NEXT:    ret <2 x i31> [[R]]
264;
265  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 -1, i31 33>)
266  ret <2 x i31> %r
267}
268
269define <2 x i31> @fshl_op1_undef_vec(<2 x i31> %x) {
270; CHECK-LABEL: @fshl_op1_undef_vec(
271; CHECK-NEXT:    [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 1, i31 2>
272; CHECK-NEXT:    ret <2 x i31> [[R]]
273;
274  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 -1, i31 33>)
275  ret <2 x i31> %r
276}
277
278define <2 x i32> @fshr_op0_undef_vec(<2 x i32> %x) {
279; CHECK-LABEL: @fshr_op0_undef_vec(
280; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 1>
281; CHECK-NEXT:    ret <2 x i32> [[R]]
282;
283  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 -1, i32 33>)
284  ret <2 x i32> %r
285}
286
287define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) {
288; CHECK-LABEL: @fshr_op1_zero_vec(
289; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 31>
290; CHECK-NEXT:    ret <2 x i32> [[R]]
291;
292  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 33>)
293  ret <2 x i32> %r
294}
295
296; Only demand bits from one of the operands.
297
298define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) {
299; CHECK-LABEL: @fshl_only_op0_demanded(
300; CHECK-NEXT:    [[Z:%.*]] = shl i32 [[X:%.*]], 7
301; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 128
302; CHECK-NEXT:    ret i32 [[R]]
303;
304  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
305  %r = and i32 %z, 128
306  ret i32 %r
307}
308
309define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) {
310; CHECK-LABEL: @fshl_only_op1_demanded(
311; CHECK-NEXT:    [[Z:%.*]] = lshr i32 [[Y:%.*]], 25
312; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 63
313; CHECK-NEXT:    ret i32 [[R]]
314;
315  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
316  %r = and i32 %z, 63
317  ret i32 %r
318}
319
320define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) {
321; CHECK-LABEL: @fshr_only_op1_demanded(
322; CHECK-NEXT:    [[Z:%.*]] = lshr i33 [[Y:%.*]], 7
323; CHECK-NEXT:    [[R:%.*]] = and i33 [[Z]], 12392
324; CHECK-NEXT:    ret i33 [[R]]
325;
326  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7)
327  %r = and i33 %z, 12392
328  ret i33 %r
329}
330
331define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) {
332; CHECK-LABEL: @fshr_only_op0_demanded(
333; CHECK-NEXT:    [[TMP1:%.*]] = lshr i33 [[X:%.*]], 4
334; CHECK-NEXT:    [[R:%.*]] = and i33 [[TMP1]], 7
335; CHECK-NEXT:    ret i33 [[R]]
336;
337  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7)
338  %r = lshr i33 %z, 30
339  ret i33 %r
340}
341
342define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) {
343; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat(
344; CHECK-NEXT:    [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], splat (i31 24)
345; CHECK-NEXT:    [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
346; CHECK-NEXT:    ret <2 x i31> [[R]]
347;
348  %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 7>)
349  %r = and <2 x i31> %z, <i31 63, i31 31>
350  ret <2 x i31> %r
351}
352
353define i32 @fshl_constant_shift_amount_modulo_bitwidth(i32 %x, i32 %y) {
354; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth(
355; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 1)
356; CHECK-NEXT:    ret i32 [[R]]
357;
358  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 33)
359  ret i32 %r
360}
361
362define i33 @fshr_constant_shift_amount_modulo_bitwidth(i33 %x, i33 %y) {
363; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth(
364; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 32)
365; CHECK-NEXT:    ret i33 [[R]]
366;
367  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 34)
368  ret i33 %r
369}
370
371define i32 @fshl_undef_shift_amount(i32 %x, i32 %y) {
372; CHECK-LABEL: @fshl_undef_shift_amount(
373; CHECK-NEXT:    ret i32 [[X:%.*]]
374;
375  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 undef)
376  ret i32 %r
377}
378
379define i33 @fshr_undef_shift_amount(i33 %x, i33 %y) {
380; CHECK-LABEL: @fshr_undef_shift_amount(
381; CHECK-NEXT:    ret i33 [[Y:%.*]]
382;
383  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 undef)
384  ret i33 %r
385}
386
387@external_global = external global i8
388
389define i33 @fshr_constant_shift_amount_modulo_bitwidth_constexpr(i33 %x, i33 %y) {
390; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_constexpr(
391; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 ptrtoint (ptr @external_global to i33))
392; CHECK-NEXT:    ret i33 [[R]]
393;
394  %shamt = ptrtoint ptr @external_global to i33
395  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %shamt)
396  ret i33 %r
397}
398
399define <2 x i32> @fshr_constant_shift_amount_modulo_bitwidth_vec(<2 x i32> %x, <2 x i32> %y) {
400; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_vec(
401; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 30, i32 1>)
402; CHECK-NEXT:    ret <2 x i32> [[R]]
403;
404  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 34, i32 -1>)
405  ret <2 x i32> %r
406}
407
408define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec(<2 x i31> %x, <2 x i31> %y) {
409; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec(
410; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 3, i31 1>)
411; CHECK-NEXT:    ret <2 x i31> [[R]]
412;
413  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 -1>)
414  ret <2 x i31> %r
415}
416
417define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(<2 x i31> %x, <2 x i31> %y) {
418; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(
419; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 34, i31 ptrtoint (ptr @external_global to i31)>)
420; CHECK-NEXT:    ret <2 x i31> [[R]]
421;
422  %shamt = ptrtoint ptr @external_global to i31
423  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 ptrtoint (ptr @external_global to i31)>)
424  ret <2 x i31> %r
425}
426
427define <2 x i31> @fshl_undef_shift_amount_vec(<2 x i31> %x, <2 x i31> %y) {
428; CHECK-LABEL: @fshl_undef_shift_amount_vec(
429; CHECK-NEXT:    ret <2 x i31> [[X:%.*]]
430;
431  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> undef)
432  ret <2 x i31> %r
433}
434
435define <2 x i32> @fshr_undef_shift_amount_vec(<2 x i32> %x, <2 x i32> %y) {
436; CHECK-LABEL: @fshr_undef_shift_amount_vec(
437; CHECK-NEXT:    ret <2 x i32> [[Y:%.*]]
438;
439  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> undef)
440  ret <2 x i32> %r
441}
442
443define i32 @rotl_common_demanded(i32 %a0) {
444; CHECK-LABEL: @rotl_common_demanded(
445; CHECK-NEXT:    [[X:%.*]] = xor i32 [[A0:%.*]], 2
446; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 8)
447; CHECK-NEXT:    ret i32 [[R]]
448;
449  %x = xor i32 %a0, 2
450  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 8)
451  ret i32 %r
452}
453
454define i33 @rotr_common_demanded(i33 %a0) {
455; CHECK-LABEL: @rotr_common_demanded(
456; CHECK-NEXT:    [[X:%.*]] = xor i33 [[A0:%.*]], 2
457; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[X]], i33 25)
458; CHECK-NEXT:    ret i33 [[R]]
459;
460  %x = xor i33 %a0, 2
461  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %x, i33 8)
462  ret i33 %r
463}
464
465; The shift modulo bitwidth is the same for all vector elements.
466
467define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) {
468; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat(
469; CHECK-NEXT:    [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], splat (i31 24)
470; CHECK-NEXT:    [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
471; CHECK-NEXT:    ret <2 x i31> [[R]]
472;
473  %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 38>)
474  %r = and <2 x i31> %z, <i31 63, i31 31>
475  ret <2 x i31> %r
476}
477
478define i32 @rotl_constant_shift_amount(i32 %x) {
479; CHECK-LABEL: @rotl_constant_shift_amount(
480; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 1)
481; CHECK-NEXT:    ret i32 [[R]]
482;
483  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 33)
484  ret i32 %r
485}
486
487define <2 x i31> @rotl_constant_shift_amount_vec(<2 x i31> %x) {
488; CHECK-LABEL: @rotl_constant_shift_amount_vec(
489; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> splat (i31 1))
490; CHECK-NEXT:    ret <2 x i31> [[R]]
491;
492  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %x, <2 x i31> <i31 32, i31 -1>)
493  ret <2 x i31> %r
494}
495
496define i33 @rotr_constant_shift_amount(i33 %x) {
497; CHECK-LABEL: @rotr_constant_shift_amount(
498; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[X]], i33 32)
499; CHECK-NEXT:    ret i33 [[R]]
500;
501  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %x, i33 34)
502  ret i33 %r
503}
504
505define <2 x i32> @rotr_constant_shift_amount_vec(<2 x i32> %x) {
506; CHECK-LABEL: @rotr_constant_shift_amount_vec(
507; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 31, i32 1>)
508; CHECK-NEXT:    ret <2 x i32> [[R]]
509;
510  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 33, i32 -1>)
511  ret <2 x i32> %r
512}
513
514; Demand bits from both operands -- cannot simplify.
515
516define i32 @fshl_both_ops_demanded(i32 %x, i32 %y) {
517; CHECK-LABEL: @fshl_both_ops_demanded(
518; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
519; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 192
520; CHECK-NEXT:    ret i32 [[R]]
521;
522  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
523  %r = and i32 %z, 192
524  ret i32 %r
525}
526
527define i33 @fshr_both_ops_demanded(i33 %x, i33 %y) {
528; CHECK-LABEL: @fshr_both_ops_demanded(
529; CHECK-NEXT:    [[Z:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7)
530; CHECK-NEXT:    [[R:%.*]] = and i33 [[Z]], 192
531; CHECK-NEXT:    ret i33 [[R]]
532;
533  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 26)
534  %r = and i33 %z, 192
535  ret i33 %r
536}
537
538; Both operands are demanded, but there are known bits.
539
540define i32 @fshl_known_bits(i32 %x, i32 %y) {
541; CHECK-LABEL: @fshl_known_bits(
542; CHECK-NEXT:    ret i32 128
543;
544  %x2 = or i32 %x, 1   ; lo bit set
545  %y2 = lshr i32 %y, 1 ; hi bit clear
546  %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 7)
547  %r = and i32 %z, 192
548  ret i32 %r
549}
550
551define i33 @fshr_known_bits(i33 %x, i33 %y) {
552; CHECK-LABEL: @fshr_known_bits(
553; CHECK-NEXT:    ret i33 128
554;
555  %x2 = or i33 %x, 1 ; lo bit set
556  %y2 = lshr i33 %y, 1 ; hi bit set
557  %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 26)
558  %r = and i33 %z, 192
559  ret i33 %r
560}
561
562; This case fails to simplify due to multiple uses.
563
564define i33 @fshr_multi_use(i33 %a) {
565; CHECK-LABEL: @fshr_multi_use(
566; CHECK-NEXT:    [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32)
567; CHECK-NEXT:    [[C:%.*]] = lshr i33 [[B]], 23
568; CHECK-NEXT:    [[D:%.*]] = xor i33 [[C]], [[B]]
569; CHECK-NEXT:    [[E:%.*]] = and i33 [[D]], 31
570; CHECK-NEXT:    ret i33 [[E]]
571;
572  %b = tail call i33 @llvm.fshr.i33(i33 %a, i33 %a, i33 1)
573  %c = lshr i33 %b, 23
574  %d = xor i33 %c, %b
575  %e = and i33 %d, 31
576  ret i33 %e
577}
578
579; This demonstrates the same simplification working if the fshr intrinsic
580; is expanded into shifts and or.
581
582define i33 @expanded_fshr_multi_use(i33 %a) {
583; CHECK-LABEL: @expanded_fshr_multi_use(
584; CHECK-NEXT:    [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32)
585; CHECK-NEXT:    [[C:%.*]] = lshr i33 [[B]], 23
586; CHECK-NEXT:    [[D:%.*]] = xor i33 [[C]], [[B]]
587; CHECK-NEXT:    [[E:%.*]] = and i33 [[D]], 31
588; CHECK-NEXT:    ret i33 [[E]]
589;
590  %t = lshr i33 %a, 1
591  %t2 = shl i33 %a, 32
592  %b = or i33 %t, %t2
593  %c = lshr i33 %b, 23
594  %d = xor i33 %c, %b
595  %e = and i33 %d, 31
596  ret i33 %e
597}
598
599; Special-case: rotate a 16-bit value left/right by 8-bits is bswap.
600
601define i16 @fshl_bswap(i16 %x) {
602; CHECK-LABEL: @fshl_bswap(
603; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]])
604; CHECK-NEXT:    ret i16 [[R]]
605;
606  %r = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 8)
607  ret i16 %r
608}
609
610define i16 @fshr_bswap(i16 %x) {
611; CHECK-LABEL: @fshr_bswap(
612; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]])
613; CHECK-NEXT:    ret i16 [[R]]
614;
615  %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 8)
616  ret i16 %r
617}
618
619define <3 x i16> @fshl_bswap_vector(<3 x i16> %x) {
620; CHECK-LABEL: @fshl_bswap_vector(
621; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.bswap.v3i16(<3 x i16> [[X:%.*]])
622; CHECK-NEXT:    ret <3 x i16> [[R]]
623;
624  %r = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %x, <3 x i16> %x, <3 x i16> <i16 8, i16 8, i16 8>)
625  ret <3 x i16> %r
626}
627
628; Negative test
629
630define i16 @fshl_bswap_wrong_op(i16 %x, i16 %y) {
631; CHECK-LABEL: @fshl_bswap_wrong_op(
632; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[Y:%.*]], i16 8)
633; CHECK-NEXT:    ret i16 [[R]]
634;
635  %r = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 8)
636  ret i16 %r
637}
638
639; Negative test
640
641define i16 @fshr_bswap_wrong_amount(i16 %x) {
642; CHECK-LABEL: @fshr_bswap_wrong_amount(
643; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 12)
644; CHECK-NEXT:    ret i16 [[R]]
645;
646  %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 4)
647  ret i16 %r
648}
649
650; Negative test
651
652define i32 @fshl_bswap_wrong_width(i32 %x) {
653; CHECK-LABEL: @fshl_bswap_wrong_width(
654; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8)
655; CHECK-NEXT:    ret i32 [[R]]
656;
657  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 8)
658  ret i32 %r
659}
660
661define i32 @fshl_mask_args_same1(i32 %a) {
662; CHECK-LABEL: @fshl_mask_args_same1(
663; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[A:%.*]], 16
664; CHECK-NEXT:    ret i32 [[T2]]
665;
666  %t1 = and i32 %a, 4294901760 ; 0xffff0000
667  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 16)
668  ret i32 %t2
669}
670
671define i32 @fshl_mask_args_same2(i32 %a) {
672; CHECK-LABEL: @fshl_mask_args_same2(
673; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i16
674; CHECK-NEXT:    [[REV:%.*]] = shl i16 [[TRUNC]], 8
675; CHECK-NEXT:    [[T2:%.*]] = zext i16 [[REV]] to i32
676; CHECK-NEXT:    ret i32 [[T2]]
677;
678  %t1 = and i32 %a, 255
679  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 8)
680  ret i32 %t2
681}
682
683define i32 @fshl_mask_args_same3(i32 %a) {
684; CHECK-LABEL: @fshl_mask_args_same3(
685; CHECK-NEXT:    [[REV:%.*]] = shl i32 [[A:%.*]], 24
686; CHECK-NEXT:    ret i32 [[REV]]
687;
688  %t1 = and i32 %a, 255
689  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 24)
690  ret i32 %t2
691}
692
693define i32 @fshl_mask_args_different(i32 %a) {
694; CHECK-LABEL: @fshl_mask_args_different(
695; CHECK-NEXT:    [[T1:%.*]] = lshr i32 [[A:%.*]], 15
696; CHECK-NEXT:    [[T3:%.*]] = and i32 [[T1]], 130560
697; CHECK-NEXT:    ret i32 [[T3]]
698;
699  %t2 = and i32 %a, 4294901760 ; 0xfffff00f
700  %t1 = and i32 %a, 4278190080 ; 0xff00f00f
701  %t3 = call i32 @llvm.fshl.i32(i32 %t2, i32 %t1, i32 17)
702  ret i32 %t3
703}
704
705define i32 @fsh_andconst_rotate(i32 %a) {
706; CHECK-LABEL: @fsh_andconst_rotate(
707; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[A:%.*]], 16
708; CHECK-NEXT:    ret i32 [[T2]]
709;
710  %t1 = and i32 %a, 4294901760 ; 0xffff0000
711  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 16)
712  ret i32 %t2
713}
714
715define i32 @fsh_orconst_rotate(i32 %a) {
716; CHECK-LABEL: @fsh_orconst_rotate(
717; CHECK-NEXT:    [[T2:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 -268435456, i32 4)
718; CHECK-NEXT:    ret i32 [[T2]]
719;
720  %t1 = or i32 %a, 4026531840 ; 0xf0000000
721  %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 4)
722  ret i32 %t2
723}
724
725define i32 @fsh_rotate_5(i8 %x, i32 %y) {
726; CHECK-LABEL: @fsh_rotate_5(
727; CHECK-NEXT:    [[T1:%.*]] = zext i8 [[X:%.*]] to i32
728; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[T1]]
729; CHECK-NEXT:    [[OR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[OR1]], i32 [[OR1]], i32 5)
730; CHECK-NEXT:    ret i32 [[OR2]]
731;
732
733  %t1 = zext i8 %x to i32
734  %or1 = or i32 %t1, %y
735  %shr = lshr i32 %or1, 27
736  %shl = shl i32 %or1, 5
737  %or2 = or i32 %shr, %shl
738  ret i32 %or2
739}
740
741define i32 @fsh_rotate_18(i8 %x, i32 %y) {
742; CHECK-LABEL: @fsh_rotate_18(
743; CHECK-NEXT:    [[T1:%.*]] = zext i8 [[X:%.*]] to i32
744; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[T1]]
745; CHECK-NEXT:    [[OR2:%.*]] = call i32 @llvm.fshl.i32(i32 [[OR1]], i32 [[OR1]], i32 18)
746; CHECK-NEXT:    ret i32 [[OR2]]
747;
748
749  %t1 = zext i8 %x to i32
750  %or1 = or i32 %t1, %y
751  %shr = lshr i32 %or1, 14
752  %shl = shl i32 %or1, 18
753  %or2 = or i32 %shr, %shl
754  ret i32 %or2
755}
756
757define i32 @fsh_load_rotate_12(ptr %data) {
758; CHECK-LABEL: @fsh_load_rotate_12(
759; CHECK-NEXT:  entry:
760; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DATA:%.*]], align 1
761; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
762; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 [[CONV]], 24
763; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 1
764; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
765; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
766; CHECK-NEXT:    [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 16
767; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[SHL3]], [[SHL]]
768; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 2
769; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
770; CHECK-NEXT:    [[CONV5:%.*]] = zext i8 [[TMP2]] to i32
771; CHECK-NEXT:    [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 8
772; CHECK-NEXT:    [[OR7:%.*]] = or disjoint i32 [[OR]], [[SHL6]]
773; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 3
774; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1
775; CHECK-NEXT:    [[CONV9:%.*]] = zext i8 [[TMP3]] to i32
776; CHECK-NEXT:    [[OR10:%.*]] = or disjoint i32 [[OR7]], [[CONV9]]
777; CHECK-NEXT:    [[OR15:%.*]] = call i32 @llvm.fshl.i32(i32 [[OR10]], i32 [[OR10]], i32 12)
778; CHECK-NEXT:    ret i32 [[OR15]]
779;
780
781entry:
782  %0 = load i8, ptr %data
783  %conv = zext i8 %0 to i32
784  %shl = shl nuw i32 %conv, 24
785  %arrayidx1 = getelementptr inbounds i8, ptr %data, i64 1
786  %1 = load i8, ptr %arrayidx1
787  %conv2 = zext i8 %1 to i32
788  %shl3 = shl nuw nsw i32 %conv2, 16
789  %or = or i32 %shl3, %shl
790  %arrayidx4 = getelementptr inbounds i8, ptr %data, i64 2
791  %2 = load i8, ptr %arrayidx4
792  %conv5 = zext i8 %2 to i32
793  %shl6 = shl nuw nsw i32 %conv5, 8
794  %or7 = or i32 %or, %shl6
795  %arrayidx8 = getelementptr inbounds i8, ptr %data, i64 3
796  %3 = load i8, ptr %arrayidx8
797  %conv9 = zext i8 %3 to i32
798  %or10 = or i32 %or7, %conv9
799  %shr = lshr i32 %or10, 20
800  %shl7 = shl i32 %or10, 12
801  %or15 = or i32 %shr, %shl7
802  ret i32 %or15
803}
804
805define i32 @fsh_load_rotate_25(ptr %data) {
806; CHECK-LABEL: @fsh_load_rotate_25(
807; CHECK-NEXT:  entry:
808; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DATA:%.*]], align 1
809; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
810; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 [[CONV]], 24
811; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 1
812; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
813; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
814; CHECK-NEXT:    [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 16
815; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[SHL3]], [[SHL]]
816; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 2
817; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
818; CHECK-NEXT:    [[CONV5:%.*]] = zext i8 [[TMP2]] to i32
819; CHECK-NEXT:    [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 8
820; CHECK-NEXT:    [[OR7:%.*]] = or disjoint i32 [[OR]], [[SHL6]]
821; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 3
822; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1
823; CHECK-NEXT:    [[CONV9:%.*]] = zext i8 [[TMP3]] to i32
824; CHECK-NEXT:    [[OR10:%.*]] = or disjoint i32 [[OR7]], [[CONV9]]
825; CHECK-NEXT:    [[OR15:%.*]] = call i32 @llvm.fshl.i32(i32 [[OR10]], i32 [[OR10]], i32 25)
826; CHECK-NEXT:    ret i32 [[OR15]]
827;
828
829entry:
830  %0 = load i8, ptr %data
831  %conv = zext i8 %0 to i32
832  %shl = shl nuw i32 %conv, 24
833  %arrayidx1 = getelementptr inbounds i8, ptr %data, i64 1
834  %1 = load i8, ptr %arrayidx1
835  %conv2 = zext i8 %1 to i32
836  %shl3 = shl nuw nsw i32 %conv2, 16
837  %or = or i32 %shl3, %shl
838  %arrayidx4 = getelementptr inbounds i8, ptr %data, i64 2
839  %2 = load i8, ptr %arrayidx4
840  %conv5 = zext i8 %2 to i32
841  %shl6 = shl nuw nsw i32 %conv5, 8
842  %or7 = or i32 %or, %shl6
843  %arrayidx8 = getelementptr inbounds i8, ptr %data, i64 3
844  %3 = load i8, ptr %arrayidx8
845  %conv9 = zext i8 %3 to i32
846  %or10 = or i32 %or7, %conv9
847  %shr = lshr i32 %or10, 7
848  %shl7 = shl i32 %or10, 25
849  %or15 = or i32 %shr, %shl7
850  ret i32 %or15
851}
852
853define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) {
854; CHECK-LABEL: @fshr_mask_args_same_vector(
855; CHECK-NEXT:    [[T3:%.*]] = shl <2 x i31> [[A:%.*]], splat (i31 10)
856; CHECK-NEXT:    ret <2 x i31> [[T3]]
857;
858  %t1 = and <2 x i31> %a, <i31 1000, i31 1000>
859  %t2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943>
860  %t3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %t2, <2 x i31> %t1, <2 x i31> <i31 10, i31 10>)
861  ret <2 x i31> %t3
862}
863
864define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) {
865; CHECK-LABEL: @fshr_mask_args_same_vector2(
866; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1000000, i32 100000>
867; CHECK-NEXT:    [[T3:%.*]] = lshr exact <2 x i32> [[T1]], splat (i32 3)
868; CHECK-NEXT:    ret <2 x i32> [[T3]]
869;
870  %t1 = and <2 x i32> %a, <i32 1000000, i32 100000>
871  %t2 = and <2 x i32> %a, <i32 6442450943, i32 6442450943>
872  %t3 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %t1, <2 x i32> %t1, <2 x i32> <i32 3, i32 3>)
873  ret <2 x i32> %t3
874}
875
876define <2 x i31> @fshr_mask_args_same_vector3_different_but_still_prunable(<2 x i31> %a) {
877; CHECK-LABEL: @fshr_mask_args_same_vector3_different_but_still_prunable(
878; CHECK-NEXT:    [[T1:%.*]] = and <2 x i31> [[A:%.*]], splat (i31 1000)
879; CHECK-NEXT:    [[T3:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[A]], <2 x i31> [[T1]], <2 x i31> <i31 10, i31 3>)
880; CHECK-NEXT:    ret <2 x i31> [[T3]]
881;
882  %t1 = and <2 x i31> %a, <i31 1000, i31 1000>
883  %t2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943>
884  %t3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %t2, <2 x i31> %t1, <2 x i31> <i31 10, i31 3>)
885  ret <2 x i31> %t3
886}
887
888define <2 x i32> @fsh_unary_shuffle_ops(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
889; CHECK-LABEL: @fsh_unary_shuffle_ops(
890; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]])
891; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
892; CHECK-NEXT:    ret <2 x i32> [[R]]
893;
894  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
895  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
896  %c = shufflevector <2 x i32> %z, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
897  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c)
898  ret <2 x i32> %r
899}
900
901define <3 x i16> @fsh_unary_shuffle_ops_widening(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z) {
902; CHECK-LABEL: @fsh_unary_shuffle_ops_widening(
903; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> poison, <3 x i32> <i32 1, i32 0, i32 1>
904; CHECK-NEXT:    call void @use_v3(<3 x i16> [[A]])
905; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X]], <2 x i16> [[Y:%.*]], <2 x i16> [[Z:%.*]])
906; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <3 x i32> <i32 1, i32 0, i32 1>
907; CHECK-NEXT:    ret <3 x i16> [[R]]
908;
909  %a = shufflevector <2 x i16> %x, <2 x i16> poison, <3 x i32> <i32 1, i32 0, i32 1>
910  call void @use_v3(<3 x i16> %a)
911  %b = shufflevector <2 x i16> %y, <2 x i16> poison, <3 x i32> <i32 1, i32 0, i32 1>
912  %c = shufflevector <2 x i16> %z, <2 x i16> poison, <3 x i32> <i32 1, i32 0, i32 1>
913  %r = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %a, <3 x i16> %b, <3 x i16> %c)
914  ret <3 x i16> %r
915}
916
917define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3 x i31> %z) {
918; CHECK-LABEL: @fsh_unary_shuffle_ops_narrowing(
919; CHECK-NEXT:    [[B:%.*]] = shufflevector <3 x i31> [[Y:%.*]], <3 x i31> poison, <2 x i32> <i32 1, i32 0>
920; CHECK-NEXT:    call void @use_v2(<2 x i31> [[B]])
921; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i31> @llvm.fshl.v3i31(<3 x i31> [[X:%.*]], <3 x i31> [[Y]], <3 x i31> [[Z:%.*]])
922; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i31> [[TMP1]], <3 x i31> poison, <2 x i32> <i32 1, i32 0>
923; CHECK-NEXT:    ret <2 x i31> [[R]]
924;
925  %a = shufflevector <3 x i31> %x, <3 x i31> poison, <2 x i32> <i32 1, i32 0>
926  %b = shufflevector <3 x i31> %y, <3 x i31> poison, <2 x i32> <i32 1, i32 0>
927  call void @use_v2(<2 x i31> %b)
928  %c = shufflevector <3 x i31> %z, <3 x i31> poison, <2 x i32> <i32 1, i32 0>
929  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %a, <2 x i31> %b, <2 x i31> %c)
930  ret <2 x i31> %r
931}
932
933; negative test - must have 3 shuffles
934
935define <2 x i32> @fsh_unary_shuffle_ops_unshuffled(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
936; CHECK-LABEL: @fsh_unary_shuffle_ops_unshuffled(
937; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
938; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
939; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> [[Z:%.*]])
940; CHECK-NEXT:    ret <2 x i32> [[R]]
941;
942  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
943  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
944  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %z)
945  ret <2 x i32> %r
946}
947
948; negative test - must have identical masks
949
950define <2 x i32> @fsh_unary_shuffle_ops_wrong_mask(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
951; CHECK-LABEL: @fsh_unary_shuffle_ops_wrong_mask(
952; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
953; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
954; CHECK-NEXT:    [[C:%.*]] = shufflevector <2 x i32> [[Z:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
955; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> [[C]])
956; CHECK-NEXT:    ret <2 x i32> [[R]]
957;
958  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
959  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 0, i32 0>
960  %c = shufflevector <2 x i32> %z, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
961  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c)
962  ret <2 x i32> %r
963}
964
965; negative test - too many uses
966
967define <2 x i31> @fsh_unary_shuffle_ops_uses(<2 x i31> %x, <2 x i31> %y, <2 x i31> %z) {
968; CHECK-LABEL: @fsh_unary_shuffle_ops_uses(
969; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i31> [[X:%.*]], <2 x i31> poison, <2 x i32> <i32 1, i32 0>
970; CHECK-NEXT:    call void @use_v2(<2 x i31> [[A]])
971; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i31> [[Y:%.*]], <2 x i31> poison, <2 x i32> <i32 1, i32 0>
972; CHECK-NEXT:    call void @use_v2(<2 x i31> [[B]])
973; CHECK-NEXT:    [[C:%.*]] = shufflevector <2 x i31> [[Z:%.*]], <2 x i31> poison, <2 x i32> <i32 1, i32 0>
974; CHECK-NEXT:    call void @use_v2(<2 x i31> [[C]])
975; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[A]], <2 x i31> [[B]], <2 x i31> [[C]])
976; CHECK-NEXT:    ret <2 x i31> [[R]]
977;
978  %a = shufflevector <2 x i31> %x, <2 x i31> poison, <2 x i32> <i32 1, i32 0>
979  call void @use_v2(<2 x i31> %a)
980  %b = shufflevector <2 x i31> %y, <2 x i31> poison, <2 x i32> <i32 1, i32 0>
981  call void @use_v2(<2 x i31> %b)
982  %c = shufflevector <2 x i31> %z, <2 x i31> poison, <2 x i32> <i32 1, i32 0>
983  call void @use_v2(<2 x i31> %c)
984  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %a, <2 x i31> %b, <2 x i31> %c)
985  ret <2 x i31> %r
986}
987
988; negative test - all source ops must have the same type
989
990define <2 x i32> @fsh_unary_shuffle_ops_partial_widening(<3 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
991; CHECK-LABEL: @fsh_unary_shuffle_ops_partial_widening(
992; CHECK-NEXT:    [[A:%.*]] = shufflevector <3 x i32> [[X:%.*]], <3 x i32> poison, <2 x i32> <i32 1, i32 0>
993; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
994; CHECK-NEXT:    [[C:%.*]] = shufflevector <2 x i32> [[Z:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
995; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> [[C]])
996; CHECK-NEXT:    ret <2 x i32> [[R]]
997;
998  %a = shufflevector <3 x i32> %x, <3 x i32> poison, <2 x i32> <i32 1, i32 0>
999  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
1000  %c = shufflevector <2 x i32> %z, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
1001  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c)
1002  ret <2 x i32> %r
1003}
1004
1005define <2 x i32> @fshr_vec_zero_elem(<2 x i32> %x, <2 x i32> %y) {
1006; CHECK-LABEL: @fshr_vec_zero_elem(
1007; CHECK-NEXT:    [[FSH:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 2, i32 0>)
1008; CHECK-NEXT:    ret <2 x i32> [[FSH]]
1009;
1010  %fsh = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 2, i32 0>)
1011  ret <2 x i32> %fsh
1012}
1013
1014define i16 @fshl_i16_shl(i16 %x, i16 %y) {
1015; CHECK-LABEL: @fshl_i16_shl(
1016; CHECK-NEXT:  entry:
1017; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
1018; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
1019; CHECK-NEXT:    ret i16 [[RES]]
1020;
1021entry:
1022  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
1023  ret i16 %res
1024}
1025
1026define i32 @fshl_i32_shl(i32 %x, i32 %y) {
1027; CHECK-LABEL: @fshl_i32_shl(
1028; CHECK-NEXT:  entry:
1029; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
1030; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
1031; CHECK-NEXT:    ret i32 [[RES]]
1032;
1033entry:
1034  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
1035  ret i32 %res
1036}
1037
1038define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {
1039; CHECK-LABEL: @fshl_vi16_shl(
1040; CHECK-NEXT:  entry:
1041; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i16> [[Y:%.*]], splat (i16 15)
1042; CHECK-NEXT:    [[RES:%.*]] = shl <2 x i16> [[X:%.*]], [[TMP0]]
1043; CHECK-NEXT:    ret <2 x i16> [[RES]]
1044;
1045entry:
1046  %res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> zeroinitializer, <2 x i16> %y)
1047  ret <2 x i16> %res
1048}
1049
1050define i32 @fshr_i32_shl_negative_test(i32 %x, i32 %y) {
1051; CHECK-LABEL: @fshr_i32_shl_negative_test(
1052; CHECK-NEXT:  entry:
1053; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 0, i32 [[Y:%.*]])
1054; CHECK-NEXT:    ret i32 [[RES]]
1055;
1056entry:
1057  %res = call i32 @llvm.fshr.i32(i32 %x, i32 0, i32 %y)
1058  ret i32 %res
1059}
1060
1061define <2 x i31> @fshl_vi31_shl_negative_test(<2 x i31> %x, <2 x i31> %y) {
1062; CHECK-LABEL: @fshl_vi31_shl_negative_test(
1063; CHECK-NEXT:  entry:
1064; CHECK-NEXT:    [[RES:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> zeroinitializer, <2 x i31> [[Y:%.*]])
1065; CHECK-NEXT:    ret <2 x i31> [[RES]]
1066;
1067entry:
1068  %res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> zeroinitializer, <2 x i31>  %y)
1069  ret <2 x i31>  %res
1070}
1071
1072;; Issue #124387 Range attribute no longer holds after operands changed.
1073define i8 @fshl_range_trunc(i1 %x) {
1074; CHECK-LABEL: @fshl_range_trunc(
1075; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[X:%.*]] to i32
1076; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[ZEXT]], 126
1077; CHECK-NEXT:    [[FSHL:%.*]] = call i32 @llvm.fshl.i32(i32 [[OR]], i32 -2, i32 1)
1078; CHECK-NEXT:    [[TR:%.*]] = trunc nuw i32 [[FSHL]] to i8
1079; CHECK-NEXT:    ret i8 [[TR]]
1080;
1081  %zext = zext i1 %x to i32
1082  %or = or disjoint i32 %zext, -2
1083  %fshl = call range(i32 -4, 2) i32 @llvm.fshl.i32(i32 %or, i32 %or, i32 1)
1084  %tr = trunc nsw i32 %fshl to i8
1085  ret i8 %tr
1086}
1087