xref: /llvm-project/llvm/test/Transforms/InstCombine/shift-add.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; This test makes sure that these instructions are properly eliminated.
3;
4; RUN: opt < %s -passes=instcombine -S | FileCheck %s
5
6declare void @use(i8)
7
8define i32 @shl_C1_add_A_C2_i32(i16 %A) {
9; CHECK-LABEL: @shl_C1_add_A_C2_i32(
10; CHECK-NEXT:    [[B:%.*]] = zext nneg i16 [[A:%.*]] to i32
11; CHECK-NEXT:    [[D:%.*]] = shl i32 192, [[B]]
12; CHECK-NEXT:    ret i32 [[D]]
13;
14  %B = zext i16 %A to i32
15  %C = add i32 %B, 5
16  %D = shl i32 6, %C
17  ret i32 %D
18}
19
20define i32 @ashr_C1_add_A_C2_i32(i32 %A) {
21; CHECK-LABEL: @ashr_C1_add_A_C2_i32(
22; CHECK-NEXT:    ret i32 0
23;
24  %B = and i32 %A, 65535
25  %C = add i32 %B, 5
26  %D = ashr i32 6, %C
27  ret i32 %D
28}
29
30define i32 @lshr_C1_add_A_C2_i32(i32 %A) {
31; CHECK-LABEL: @lshr_C1_add_A_C2_i32(
32; CHECK-NEXT:    [[B:%.*]] = and i32 [[A:%.*]], 65535
33; CHECK-NEXT:    [[D:%.*]] = shl i32 192, [[B]]
34; CHECK-NEXT:    ret i32 [[D]]
35;
36  %B = and i32 %A, 65535
37  %C = add i32 %B, 5
38  %D = shl i32 6, %C
39  ret i32 %D
40}
41
42define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
43; CHECK-LABEL: @shl_C1_add_A_C2_v4i32(
44; CHECK-NEXT:    [[B:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32>
45; CHECK-NEXT:    [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[B]]
46; CHECK-NEXT:    ret <4 x i32> [[D]]
47;
48  %B = zext <4 x i16> %A to <4 x i32>
49  %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
50  %D = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
51  ret <4 x i32> %D
52}
53
54define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
55; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32(
56; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
57; CHECK-NEXT:    [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[B]]
58; CHECK-NEXT:    ret <4 x i32> [[D]]
59;
60  %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
61  %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
62  %D = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
63  ret <4 x i32> %D
64}
65
66define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) {
67; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32(
68; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
69; CHECK-NEXT:    [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[B]]
70; CHECK-NEXT:    ret <4 x i32> [[D]]
71;
72  %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
73  %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
74  %D = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
75  ret <4 x i32> %D
76}
77
78define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) {
79; CHECK-LABEL: @shl_C1_add_A_C2_v4i32_splat(
80; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
81; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
82; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer
83; CHECK-NEXT:    [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[C]]
84; CHECK-NEXT:    ret <4 x i32> [[E]]
85;
86  %A = zext i16 %I to i32
87  %B = insertelement <4 x i32> undef, i32 %A, i32 0
88  %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
89  %D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
90  %E = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
91  ret <4 x i32> %E
92}
93
94define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) {
95; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32_splat(
96; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
97; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
98; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer
99; CHECK-NEXT:    [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[C]]
100; CHECK-NEXT:    ret <4 x i32> [[E]]
101;
102  %A = zext i16 %I to i32
103  %B = insertelement <4 x i32> undef, i32 %A, i32 0
104  %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
105  %D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
106  %E = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
107  ret <4 x i32> %E
108}
109
110define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) {
111; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32_splat(
112; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
113; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
114; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer
115; CHECK-NEXT:    [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[C]]
116; CHECK-NEXT:    ret <4 x i32> [[E]]
117;
118  %A = zext i16 %I to i32
119  %B = insertelement <4 x i32> undef, i32 %A, i32 0
120  %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
121  %D = add <4 x i32> %C, <i32 0, i32 1, i32 50, i32 16>
122  %E = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %D
123  ret <4 x i32> %E
124}
125
126define i32 @shl_add_nuw(i32 %x) {
127; CHECK-LABEL: @shl_add_nuw(
128; CHECK-NEXT:    [[R:%.*]] = shl i32 192, [[X:%.*]]
129; CHECK-NEXT:    ret i32 [[R]]
130;
131  %a = add nuw i32 %x, 5
132  %r = shl i32 6, %a
133  ret i32 %r
134}
135
136; vectors with arbitrary constants work too
137
138define <2 x i12> @lshr_add_nuw(<2 x i12> %x) {
139; CHECK-LABEL: @lshr_add_nuw(
140; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i12> <i12 0, i12 21>, [[X:%.*]]
141; CHECK-NEXT:    ret <2 x i12> [[R]]
142;
143  %a = add nuw <2 x i12> %x, <i12 5, i12 1>
144  %r = lshr <2 x i12> <i12 6, i12 42>, %a
145  ret <2 x i12> %r
146}
147
148; extra use is ok and in this case the result can be simplified to a constant
149
150define i32 @ashr_add_nuw(i32 %x, ptr %p) {
151; CHECK-LABEL: @ashr_add_nuw(
152; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 5
153; CHECK-NEXT:    store i32 [[A]], ptr [[P:%.*]], align 4
154; CHECK-NEXT:    ret i32 -1
155;
156  %a = add nuw i32 %x, 5
157  store i32 %a, ptr %p
158  %r = ashr i32 -6, %a
159  ret i32 %r
160}
161
162; Preserve nuw and exact flags.
163
164define i32 @shl_nuw_add_nuw(i32 %x) {
165; CHECK-LABEL: @shl_nuw_add_nuw(
166; CHECK-NEXT:    [[R:%.*]] = shl nuw i32 2, [[X:%.*]]
167; CHECK-NEXT:    ret i32 [[R]]
168;
169  %a = add nuw i32 %x, 1
170  %r = shl nuw i32 1, %a
171  ret i32 %r
172}
173
174define i32 @shl_nsw_add_nuw(i32 %x) {
175; CHECK-LABEL: @shl_nsw_add_nuw(
176; CHECK-NEXT:    [[R:%.*]] = shl nsw i32 -2, [[X:%.*]]
177; CHECK-NEXT:    ret i32 [[R]]
178;
179  %a = add nuw i32 %x, 1
180  %r = shl nsw i32 -1, %a
181  ret i32 %r
182}
183
184define i32 @lshr_exact_add_nuw(i32 %x) {
185; CHECK-LABEL: @lshr_exact_add_nuw(
186; CHECK-NEXT:    [[R:%.*]] = lshr exact i32 2, [[X:%.*]]
187; CHECK-NEXT:    ret i32 [[R]]
188;
189  %a = add nuw i32 %x, 1
190  %r = lshr exact i32 4, %a
191  ret i32 %r
192}
193
194define i32 @ashr_exact_add_nuw(i32 %x) {
195; CHECK-LABEL: @ashr_exact_add_nuw(
196; CHECK-NEXT:    [[R:%.*]] = ashr exact i32 -2, [[X:%.*]]
197; CHECK-NEXT:    ret i32 [[R]]
198;
199  %a = add nuw i32 %x, 1
200  %r = ashr exact i32 -4, %a
201  ret i32 %r
202}
203
204; negative test - must have 'nuw'
205
206define i32 @shl_add_nsw(i32 %x) {
207; CHECK-LABEL: @shl_add_nsw(
208; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 5
209; CHECK-NEXT:    [[R:%.*]] = shl i32 6, [[A]]
210; CHECK-NEXT:    ret i32 [[R]]
211;
212  %a = add nsw i32 %x, 5
213  %r = shl i32 6, %a
214  ret i32 %r
215}
216
217; offset precondition check (must be negative constant) for lshr_exact_add_negative_shift_positive
218
219define i32 @lshr_exact_add_positive_shift_positive(i32 %x) {
220; CHECK-LABEL: @lshr_exact_add_positive_shift_positive(
221; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 1
222; CHECK-NEXT:    [[R:%.*]] = lshr exact i32 2, [[A]]
223; CHECK-NEXT:    ret i32 [[R]]
224;
225  %a = add i32 %x, 1
226  %r = lshr exact i32 2, %a
227  ret i32 %r
228}
229
230define i32 @lshr_exact_add_big_negative_offset(i32 %x) {
231; CHECK-LABEL: @lshr_exact_add_big_negative_offset(
232; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -33
233; CHECK-NEXT:    [[R:%.*]] = lshr exact i32 2, [[A]]
234; CHECK-NEXT:    ret i32 [[R]]
235;
236  %a = add i32 %x, -33
237  %r = lshr exact i32 2, %a
238  ret i32 %r
239}
240
241; leading zeros for shifted constant precondition check for lshr_exact_add_negative_shift_positive
242
243define i32 @lshr_exact_add_negative_shift_negative(i32 %x) {
244; CHECK-LABEL: @lshr_exact_add_negative_shift_negative(
245; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -1
246; CHECK-NEXT:    [[R:%.*]] = lshr exact i32 -2, [[A]]
247; CHECK-NEXT:    ret i32 [[R]]
248;
249  %a = add i32 %x, -1
250  %r = lshr exact i32 -2, %a
251  ret i32 %r
252}
253
254; exact precondition check for lshr_exact_add_negative_shift_positive
255
256define i32 @lshr_add_negative_shift_no_exact(i32 %x) {
257; CHECK-LABEL: @lshr_add_negative_shift_no_exact(
258; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -1
259; CHECK-NEXT:    [[R:%.*]] = lshr i32 2, [[A]]
260; CHECK-NEXT:    ret i32 [[R]]
261;
262  %a = add i32 %x, -1
263  %r = lshr i32 2, %a
264  ret i32 %r
265}
266
267define i32 @lshr_exact_add_negative_shift_positive(i32 %x) {
268; CHECK-LABEL: @lshr_exact_add_negative_shift_positive(
269; CHECK-NEXT:    [[R:%.*]] = lshr exact i32 4, [[X:%.*]]
270; CHECK-NEXT:    ret i32 [[R]]
271;
272  %a = add i32 %x, -1
273  %r = lshr exact i32 2, %a
274  ret i32 %r
275}
276
277define i8 @lshr_exact_add_negative_shift_positive_extra_use(i8 %x) {
278; CHECK-LABEL: @lshr_exact_add_negative_shift_positive_extra_use(
279; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], -1
280; CHECK-NEXT:    call void @use(i8 [[A]])
281; CHECK-NEXT:    [[R:%.*]] = lshr exact i8 -128, [[X]]
282; CHECK-NEXT:    ret i8 [[R]]
283;
284  %a = add i8 %x, -1
285  call void @use(i8 %a)
286  %r = lshr exact i8 64, %a
287  ret i8 %r
288}
289
290define <2 x i9> @lshr_exact_add_negative_shift_positive_vec(<2 x i9> %x) {
291; CHECK-LABEL: @lshr_exact_add_negative_shift_positive_vec(
292; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i9> splat (i9 -256), [[X:%.*]]
293; CHECK-NEXT:    ret <2 x i9> [[R]]
294;
295  %a = add <2 x i9> %x, <i9 -7, i9 -7>
296  %r = lshr exact <2 x i9> <i9 2, i9 2>, %a
297  ret <2 x i9> %r
298}
299
300; not enough leading zeros in shift constant
301
302define <2 x i9> @lshr_exact_add_negative_shift_lzcnt(<2 x i9> %x) {
303; CHECK-LABEL: @lshr_exact_add_negative_shift_lzcnt(
304; CHECK-NEXT:    [[A:%.*]] = add <2 x i9> [[X:%.*]], splat (i9 -7)
305; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i9> splat (i9 4), [[A]]
306; CHECK-NEXT:    ret <2 x i9> [[R]]
307;
308  %a = add <2 x i9> %x, <i9 -7, i9 -7>
309  %r = lshr exact <2 x i9> <i9 4, i9 4>, %a
310  ret <2 x i9> %r
311}
312
313; leading ones precondition check for ashr_exact_add_negative_shift_[positive,negative]
314
315define i8 @ashr_exact_add_negative_shift_no_trailing_zeros(i8 %x) {
316; CHECK-LABEL: @ashr_exact_add_negative_shift_no_trailing_zeros(
317; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], -4
318; CHECK-NEXT:    [[R:%.*]] = ashr exact i8 -112, [[A]]
319; CHECK-NEXT:    ret i8 [[R]]
320;
321  %a = add i8 %x, -4
322  %r = ashr exact i8 -112, %a ; 0b1001_0000
323  ret i8 %r
324}
325
326define i32 @ashr_exact_add_big_negative_offset(i32 %x) {
327; CHECK-LABEL: @ashr_exact_add_big_negative_offset(
328; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -33
329; CHECK-NEXT:    [[R:%.*]] = ashr exact i32 -2, [[A]]
330; CHECK-NEXT:    ret i32 [[R]]
331;
332  %a = add i32 %x, -33
333  %r = ashr exact i32 -2, %a
334  ret i32 %r
335}
336
337; exact precondition check for ashr_exact_add_negative_shift_[positive,negative]
338
339define i32 @ashr_add_negative_shift_no_exact(i32 %x) {
340; CHECK-LABEL: @ashr_add_negative_shift_no_exact(
341; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -1
342; CHECK-NEXT:    [[R:%.*]] = ashr i32 -2, [[A]]
343; CHECK-NEXT:    ret i32 [[R]]
344;
345  %a = add i32 %x, -1
346  %r = ashr i32 -2, %a
347  ret i32 %r
348}
349
350define i32 @ashr_exact_add_negative_shift_negative(i32 %x) {
351; CHECK-LABEL: @ashr_exact_add_negative_shift_negative(
352; CHECK-NEXT:    [[R:%.*]] = ashr exact i32 -4, [[X:%.*]]
353; CHECK-NEXT:    ret i32 [[R]]
354;
355  %a = add i32 %x, -1
356  %r = ashr exact i32 -2, %a
357  ret i32 %r
358}
359
360define i8 @ashr_exact_add_negative_shift_negative_extra_use(i8 %x) {
361; CHECK-LABEL: @ashr_exact_add_negative_shift_negative_extra_use(
362; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], -2
363; CHECK-NEXT:    call void @use(i8 [[A]])
364; CHECK-NEXT:    [[R:%.*]] = ashr exact i8 -128, [[X]]
365; CHECK-NEXT:    ret i8 [[R]]
366;
367  %a = add i8 %x, -2
368  call void @use(i8 %a)
369  %r = ashr exact i8 -32, %a
370  ret i8 %r
371}
372
373define <2 x i7> @ashr_exact_add_negative_shift_negative_vec(<2 x i7> %x) {
374; CHECK-LABEL: @ashr_exact_add_negative_shift_negative_vec(
375; CHECK-NEXT:    [[R:%.*]] = ashr exact <2 x i7> splat (i7 -64), [[X:%.*]]
376; CHECK-NEXT:    ret <2 x i7> [[R]]
377;
378  %a = add <2 x i7> %x, <i7 -5, i7 -5>
379  %r = ashr exact <2 x i7> <i7 -2, i7 -2>, %a
380  ret <2 x i7> %r
381}
382
383; not enough leading ones in shift constant
384
385define <2 x i7> @ashr_exact_add_negative_leading_ones_vec(<2 x i7> %x) {
386; CHECK-LABEL: @ashr_exact_add_negative_leading_ones_vec(
387; CHECK-NEXT:    [[A:%.*]] = add <2 x i7> [[X:%.*]], splat (i7 -5)
388; CHECK-NEXT:    [[R:%.*]] = ashr exact <2 x i7> splat (i7 -4), [[A]]
389; CHECK-NEXT:    ret <2 x i7> [[R]]
390;
391  %a = add <2 x i7> %x, <i7 -5, i7 -5>
392  %r = ashr exact <2 x i7> <i7 -4, i7 -4>, %a
393  ret <2 x i7> %r
394}
395
396; PR54890
397
398define i32 @shl_nsw_add_negative(i32 %x) {
399; CHECK-LABEL: @shl_nsw_add_negative(
400; CHECK-NEXT:    [[R:%.*]] = shl nuw i32 1, [[X:%.*]]
401; CHECK-NEXT:    ret i32 [[R]]
402;
403  %a = add i32 %x, -1
404  %r = shl nsw i32 2, %a
405  ret i32 %r
406}
407
408; vectors and extra uses are allowed
409; nuw propagates to the new shift
410
411define <2 x i8> @shl_nuw_add_negative_splat_uses(<2 x i8> %x, ptr %p) {
412; CHECK-LABEL: @shl_nuw_add_negative_splat_uses(
413; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -2)
414; CHECK-NEXT:    store <2 x i8> [[A]], ptr [[P:%.*]], align 2
415; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i8> splat (i8 3), [[X]]
416; CHECK-NEXT:    ret <2 x i8> [[R]]
417;
418  %a = add <2 x i8> %x, <i8 -2, i8 -2>
419  store <2 x i8> %a, ptr %p
420  %r = shl nuw <2 x i8> <i8 12, i8 12>, %a
421  ret <2 x i8> %r
422}
423
424; negative test - shift constant must have enough trailing zeros to allow the pre-shift
425
426define i32 @shl_nsw_add_negative_invalid_constant(i32 %x) {
427; CHECK-LABEL: @shl_nsw_add_negative_invalid_constant(
428; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -2
429; CHECK-NEXT:    [[R:%.*]] = shl nsw i32 2, [[A]]
430; CHECK-NEXT:    ret i32 [[R]]
431;
432  %a = add i32 %x, -2
433  %r = shl nsw i32 2, %a
434  ret i32 %r
435}
436
437; negative test - the offset constant must be negative
438
439define i32 @shl_nsw_add_positive_invalid_constant(i32 %x) {
440; CHECK-LABEL: @shl_nsw_add_positive_invalid_constant(
441; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 2
442; CHECK-NEXT:    [[R:%.*]] = shl nsw i32 4, [[A]]
443; CHECK-NEXT:    ret i32 [[R]]
444;
445  %a = add i32 %x, 2
446  %r = shl nsw i32 4, %a
447  ret i32 %r
448}
449
450; negative test - a large shift must be detected without crashing
451
452define i32 @shl_nsw_add_negative_invalid_constant2(i32 %x) {
453; CHECK-LABEL: @shl_nsw_add_negative_invalid_constant2(
454; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -33
455; CHECK-NEXT:    [[R:%.*]] = shl nsw i32 2, [[A]]
456; CHECK-NEXT:    ret i32 [[R]]
457;
458  %a = add i32 %x, -33
459  %r = shl nsw i32 2, %a
460  ret i32 %r
461}
462
463; negative test - currently transformed to 'xor' before we see it,
464; but INT_MIN should be handled too
465
466define i4 @shl_nsw_add_negative_invalid_constant3(i4 %x) {
467; CHECK-LABEL: @shl_nsw_add_negative_invalid_constant3(
468; CHECK-NEXT:    [[A:%.*]] = xor i4 [[X:%.*]], -8
469; CHECK-NEXT:    [[R:%.*]] = shl nsw i4 2, [[A]]
470; CHECK-NEXT:    ret i4 [[R]]
471;
472  %a = add i4 %x, 8
473  %r = shl nsw i4 2, %a
474  ret i4 %r
475}
476
477define i2 @lshr_2_add_zext_basic(i1 %a, i1 %b) {
478; CHECK-LABEL: @lshr_2_add_zext_basic(
479; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[A:%.*]], [[B:%.*]]
480; CHECK-NEXT:    [[LSHR:%.*]] = zext i1 [[TMP1]] to i2
481; CHECK-NEXT:    ret i2 [[LSHR]]
482;
483  %zext.a = zext i1 %a to i2
484  %zext.b = zext i1 %b to i2
485  %add = add i2 %zext.a, %zext.b
486  %lshr = lshr i2 %add, 1
487  ret i2 %lshr
488}
489
490define i2 @ashr_2_add_zext_basic(i1 %a, i1 %b) {
491; CHECK-LABEL: @ashr_2_add_zext_basic(
492; CHECK-NEXT:    [[ZEXT_A:%.*]] = zext i1 [[A:%.*]] to i2
493; CHECK-NEXT:    [[ZEXT_B:%.*]] = zext i1 [[B:%.*]] to i2
494; CHECK-NEXT:    [[ADD:%.*]] = add nuw i2 [[ZEXT_A]], [[ZEXT_B]]
495; CHECK-NEXT:    [[LSHR:%.*]] = ashr i2 [[ADD]], 1
496; CHECK-NEXT:    ret i2 [[LSHR]]
497;
498  %zext.a = zext i1 %a to i2
499  %zext.b = zext i1 %b to i2
500  %add = add i2 %zext.a, %zext.b
501  %lshr = ashr i2 %add, 1
502  ret i2 %lshr
503}
504
505define i32 @lshr_16_add_zext_basic(i16 %a, i16 %b) {
506; CHECK-LABEL: @lshr_16_add_zext_basic(
507; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[A:%.*]], -1
508; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ugt i16 [[B:%.*]], [[TMP1]]
509; CHECK-NEXT:    [[LSHR:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i32
510; CHECK-NEXT:    ret i32 [[LSHR]]
511;
512  %zext.a = zext i16 %a to i32
513  %zext.b = zext i16 %b to i32
514  %add = add i32 %zext.a, %zext.b
515  %lshr = lshr i32 %add, 16
516  ret i32 %lshr
517}
518
519define i32 @lshr_16_add_zext_basic_multiuse(i16 %a, i16 %b) {
520; CHECK-LABEL: @lshr_16_add_zext_basic_multiuse(
521; CHECK-NEXT:    [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
522; CHECK-NEXT:    [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
523; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
524; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 [[ADD]], 16
525; CHECK-NEXT:    [[OTHERUSE:%.*]] = or i32 [[LSHR]], [[ZEXT_A]]
526; CHECK-NEXT:    ret i32 [[OTHERUSE]]
527;
528  %zext.a = zext i16 %a to i32
529  %zext.b = zext i16 %b to i32
530  %add = add i32 %zext.a, %zext.b
531  %lshr = lshr i32 %add, 16
532  %otheruse = or i32 %lshr, %zext.a
533  ret i32 %otheruse
534}
535
536define i32 @lshr_16_add_known_16_leading_zeroes(i32 %a, i32 %b) {
537; CHECK-LABEL: @lshr_16_add_known_16_leading_zeroes(
538; CHECK-NEXT:    [[A16:%.*]] = and i32 [[A:%.*]], 65535
539; CHECK-NEXT:    [[B16:%.*]] = and i32 [[B:%.*]], 65535
540; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[A16]], [[B16]]
541; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 [[ADD]], 16
542; CHECK-NEXT:    ret i32 [[LSHR]]
543;
544  %a16 = and i32 %a, 65535 ; 0x65535
545  %b16 = and i32 %b, 65535 ; 0x65535
546  %add = add i32 %a16, %b16
547  %lshr = lshr i32 %add, 16
548  ret i32 %lshr
549}
550
551define i32 @lshr_16_add_not_known_16_leading_zeroes(i32 %a, i32 %b) {
552; CHECK-LABEL: @lshr_16_add_not_known_16_leading_zeroes(
553; CHECK-NEXT:    [[A16:%.*]] = and i32 [[A:%.*]], 131071
554; CHECK-NEXT:    [[B16:%.*]] = and i32 [[B:%.*]], 65535
555; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[A16]], [[B16]]
556; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 [[ADD]], 16
557; CHECK-NEXT:    ret i32 [[LSHR]]
558;
559  %a16 = and i32 %a, 131071 ; 0x1FFFF
560  %b16 = and i32 %b, 65535 ; 0x65535
561  %add = add i32 %a16, %b16
562  %lshr = lshr i32 %add, 16
563  ret i32 %lshr
564}
565
566define i64 @lshr_32_add_zext_basic(i32 %a, i32 %b) {
567; CHECK-LABEL: @lshr_32_add_zext_basic(
568; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
569; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ugt i32 [[B:%.*]], [[TMP1]]
570; CHECK-NEXT:    [[LSHR:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i64
571; CHECK-NEXT:    ret i64 [[LSHR]]
572;
573  %zext.a = zext i32 %a to i64
574  %zext.b = zext i32 %b to i64
575  %add = add i64 %zext.a, %zext.b
576  %lshr = lshr i64 %add, 32
577  ret i64 %lshr
578}
579
580define i64 @lshr_32_add_zext_basic_multiuse(i32 %a, i32 %b) {
581; CHECK-LABEL: @lshr_32_add_zext_basic_multiuse(
582; CHECK-NEXT:    [[ZEXT_A:%.*]] = zext i32 [[A:%.*]] to i64
583; CHECK-NEXT:    [[ZEXT_B:%.*]] = zext i32 [[B:%.*]] to i64
584; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[ZEXT_A]], [[ZEXT_B]]
585; CHECK-NEXT:    [[LSHR:%.*]] = lshr i64 [[ADD]], 32
586; CHECK-NEXT:    [[OTHERUSE:%.*]] = or i64 [[LSHR]], [[ZEXT_B]]
587; CHECK-NEXT:    ret i64 [[OTHERUSE]]
588;
589  %zext.a = zext i32 %a to i64
590  %zext.b = zext i32 %b to i64
591  %add = add i64 %zext.a, %zext.b
592  %lshr = lshr i64 %add, 32
593  %otheruse = or i64 %lshr, %zext.b
594  ret i64 %otheruse
595}
596
597define i64 @lshr_31_i32_add_zext_basic(i32 %a, i32 %b) {
598; CHECK-LABEL: @lshr_31_i32_add_zext_basic(
599; CHECK-NEXT:    [[ZEXT_A:%.*]] = zext i32 [[A:%.*]] to i64
600; CHECK-NEXT:    [[ZEXT_B:%.*]] = zext i32 [[B:%.*]] to i64
601; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[ZEXT_A]], [[ZEXT_B]]
602; CHECK-NEXT:    [[LSHR:%.*]] = lshr i64 [[ADD]], 31
603; CHECK-NEXT:    ret i64 [[LSHR]]
604;
605  %zext.a = zext i32 %a to i64
606  %zext.b = zext i32 %b to i64
607  %add = add i64 %zext.a, %zext.b
608  %lshr = lshr i64 %add, 31
609  ret i64 %lshr
610}
611
612define i64 @lshr_33_i32_add_zext_basic(i32 %a, i32 %b) {
613; CHECK-LABEL: @lshr_33_i32_add_zext_basic(
614; CHECK-NEXT:    ret i64 0
615;
616  %zext.a = zext i32 %a to i64
617  %zext.b = zext i32 %b to i64
618  %add = add i64 %zext.a, %zext.b
619  %lshr = lshr i64 %add, 33
620  ret i64 %lshr
621}
622
623define i64 @lshr_16_to_64_add_zext_basic(i16 %a, i16 %b) {
624; CHECK-LABEL: @lshr_16_to_64_add_zext_basic(
625; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[A:%.*]], -1
626; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ugt i16 [[B:%.*]], [[TMP1]]
627; CHECK-NEXT:    [[LSHR:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i64
628; CHECK-NEXT:    ret i64 [[LSHR]]
629;
630  %zext.a = zext i16 %a to i64
631  %zext.b = zext i16 %b to i64
632  %add = add i64 %zext.a, %zext.b
633  %lshr = lshr i64 %add, 16
634  ret i64 %lshr
635}
636
637define i64 @lshr_32_add_known_32_leading_zeroes(i64 %a, i64 %b) {
638; CHECK-LABEL: @lshr_32_add_known_32_leading_zeroes(
639; CHECK-NEXT:    [[A32:%.*]] = and i64 [[A:%.*]], 4294967295
640; CHECK-NEXT:    [[B32:%.*]] = and i64 [[B:%.*]], 4294967295
641; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[A32]], [[B32]]
642; CHECK-NEXT:    [[LSHR:%.*]] = lshr i64 [[ADD]], 32
643; CHECK-NEXT:    ret i64 [[LSHR]]
644;
645  %a32 = and i64 %a, 4294967295 ; 0xFFFFFFFF
646  %b32 = and i64 %b, 4294967295 ; 0xFFFFFFFF
647  %add = add i64 %a32, %b32
648  %lshr = lshr i64 %add, 32
649  ret i64 %lshr
650}
651
652define i64 @lshr_32_add_not_known_32_leading_zeroes(i64 %a, i64 %b) {
653;
654; CHECK-LABEL: @lshr_32_add_not_known_32_leading_zeroes(
655; CHECK-NEXT:    [[A32:%.*]] = and i64 [[A:%.*]], 8589934591
656; CHECK-NEXT:    [[B32:%.*]] = and i64 [[B:%.*]], 4294967295
657; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[A32]], [[B32]]
658; CHECK-NEXT:    [[LSHR:%.*]] = lshr i64 [[ADD]], 32
659; CHECK-NEXT:    ret i64 [[LSHR]]
660;
661  %a32 = and i64 %a, 8589934591 ; 0x1FFFFFFFF
662  %b32 = and i64 %b, 4294967295 ; 0xFFFFFFFF
663  %add = add i64 %a32, %b32
664  %lshr = lshr i64 %add, 32
665  ret i64 %lshr
666}
667
668define i32 @ashr_16_add_zext_basic(i16 %a, i16 %b) {
669; CHECK-LABEL: @ashr_16_add_zext_basic(
670; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[A:%.*]], -1
671; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ugt i16 [[B:%.*]], [[TMP1]]
672; CHECK-NEXT:    [[LSHR:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i32
673; CHECK-NEXT:    ret i32 [[LSHR]]
674;
675  %zext.a = zext i16 %a to i32
676  %zext.b = zext i16 %b to i32
677  %add = add i32 %zext.a, %zext.b
678  %lshr = lshr i32 %add, 16
679  ret i32 %lshr
680}
681
682define i64 @ashr_32_add_zext_basic(i32 %a, i32 %b) {
683; CHECK-LABEL: @ashr_32_add_zext_basic(
684; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
685; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ugt i32 [[B:%.*]], [[TMP1]]
686; CHECK-NEXT:    [[LSHR:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i64
687; CHECK-NEXT:    ret i64 [[LSHR]]
688;
689  %zext.a = zext i32 %a to i64
690  %zext.b = zext i32 %b to i64
691  %add = add i64 %zext.a, %zext.b
692  %lshr = ashr i64 %add, 32
693  ret i64 %lshr
694}
695
696define i64 @ashr_16_to_64_add_zext_basic(i16 %a, i16 %b) {
697; CHECK-LABEL: @ashr_16_to_64_add_zext_basic(
698; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[A:%.*]], -1
699; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ugt i16 [[B:%.*]], [[TMP1]]
700; CHECK-NEXT:    [[LSHR:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i64
701; CHECK-NEXT:    ret i64 [[LSHR]]
702;
703  %zext.a = zext i16 %a to i64
704  %zext.b = zext i16 %b to i64
705  %add = add i64 %zext.a, %zext.b
706  %lshr = ashr i64 %add, 16
707  ret i64 %lshr
708}
709
710define i32 @lshr_32_add_zext_trunc(i32 %a, i32 %b) {
711; CHECK-LABEL: @lshr_32_add_zext_trunc(
712; CHECK-NEXT:    [[ADD_NARROWED:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
713; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ult i32 [[ADD_NARROWED]], [[A]]
714; CHECK-NEXT:    [[TRUNC_SHR:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i32
715; CHECK-NEXT:    [[RET:%.*]] = add i32 [[ADD_NARROWED]], [[TRUNC_SHR]]
716; CHECK-NEXT:    ret i32 [[RET]]
717;
718  %zext.a = zext i32 %a to i64
719  %zext.b = zext i32 %b to i64
720  %add = add i64 %zext.a, %zext.b
721  %trunc.add = trunc i64 %add to i32
722  %shr = lshr i64 %add, 32
723  %trunc.shr = trunc i64 %shr to i32
724  %ret = add i32 %trunc.add, %trunc.shr
725  ret i32 %ret
726}
727
728define <3 x i32> @add3_i96(<3 x i32> %0, <3 x i32> %1) {
729; CHECK-LABEL: @add3_i96(
730; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x i32> [[TMP0:%.*]], i64 0
731; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <3 x i32> [[TMP1:%.*]], i64 0
732; CHECK-NEXT:    [[ADD_NARROWED:%.*]] = add i32 [[TMP4]], [[TMP3]]
733; CHECK-NEXT:    [[ADD_NARROWED_OVERFLOW:%.*]] = icmp ult i32 [[ADD_NARROWED]], [[TMP4]]
734; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <3 x i32> [[TMP0]], i64 1
735; CHECK-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
736; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <3 x i32> [[TMP1]], i64 1
737; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
738; CHECK-NEXT:    [[TMP9:%.*]] = add nuw nsw i64 [[TMP8]], [[TMP6]]
739; CHECK-NEXT:    [[TMP10:%.*]] = zext i1 [[ADD_NARROWED_OVERFLOW]] to i64
740; CHECK-NEXT:    [[TMP11:%.*]] = add nuw nsw i64 [[TMP9]], [[TMP10]]
741; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <3 x i32> [[TMP0]], i64 2
742; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <3 x i32> [[TMP1]], i64 2
743; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP12]]
744; CHECK-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP11]], 32
745; CHECK-NEXT:    [[TMP16:%.*]] = trunc nuw nsw i64 [[TMP15]] to i32
746; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP14]], [[TMP16]]
747; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <3 x i32> poison, i32 [[ADD_NARROWED]], i64 0
748; CHECK-NEXT:    [[TMP19:%.*]] = trunc i64 [[TMP11]] to i32
749; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <3 x i32> [[TMP18]], i32 [[TMP19]], i64 1
750; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <3 x i32> [[TMP20]], i32 [[TMP17]], i64 2
751; CHECK-NEXT:    ret <3 x i32> [[TMP21]]
752;
753  %3 = extractelement <3 x i32> %0, i64 0
754  %4 = zext i32 %3 to i64
755  %5 = extractelement <3 x i32> %1, i64 0
756  %6 = zext i32 %5 to i64
757  %7 = add nuw nsw i64 %6, %4
758  %8 = extractelement <3 x i32> %0, i64 1
759  %9 = zext i32 %8 to i64
760  %10 = extractelement <3 x i32> %1, i64 1
761  %11 = zext i32 %10 to i64
762  %12 = add nuw nsw i64 %11, %9
763  %13 = lshr i64 %7, 32
764  %14 = add nuw nsw i64 %12, %13
765  %15 = extractelement <3 x i32> %0, i64 2
766  %16 = extractelement <3 x i32> %1, i64 2
767  %17 = add i32 %16, %15
768  %18 = lshr i64 %14, 32
769  %19 = trunc i64 %18 to i32
770  %20 = add i32 %17, %19
771  %21 = trunc i64 %7 to i32
772  %22 = insertelement <3 x i32> undef, i32 %21, i32 0
773  %23 = trunc i64 %14 to i32
774  %24 = insertelement <3 x i32> %22, i32 %23, i32 1
775  %25 = insertelement <3 x i32> %24, i32 %20, i32 2
776  ret <3 x i32> %25
777}
778
779define i8 @shl_fold_or_disjoint_cnt(i8 %x) {
780; CHECK-LABEL: @shl_fold_or_disjoint_cnt(
781; CHECK-NEXT:    [[R:%.*]] = shl i8 16, [[X:%.*]]
782; CHECK-NEXT:    ret i8 [[R]]
783;
784  %a = or disjoint i8 %x, 3
785  %r = shl i8 2, %a
786  ret i8 %r
787}
788
789define <2 x i8> @ashr_fold_or_disjoint_cnt(<2 x i8> %x) {
790; CHECK-LABEL: @ashr_fold_or_disjoint_cnt(
791; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i8> <i8 0, i8 1>, [[X:%.*]]
792; CHECK-NEXT:    ret <2 x i8> [[R]]
793;
794  %a = or disjoint <2 x i8> %x, <i8 3, i8 1>
795  %r = ashr <2 x i8> <i8 2, i8 3>, %a
796  ret <2 x i8> %r
797}
798
799define <2 x i8> @lshr_fold_or_disjoint_cnt_out_of_bounds(<2 x i8> %x) {
800; CHECK-LABEL: @lshr_fold_or_disjoint_cnt_out_of_bounds(
801; CHECK-NEXT:    ret <2 x i8> zeroinitializer
802;
803  %a = or disjoint <2 x i8> %x, <i8 3, i8 8>
804  %r = lshr <2 x i8> <i8 2, i8 3>, %a
805  ret <2 x i8> %r
806}
807