xref: /llvm-project/llvm/test/CodeGen/X86/combine-sub.ll (revision 7b03fdbd560ca10bc7873a4887eda3cc6738d186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
4
5; fold (sub x, 0) -> x
6define <4 x i32> @combine_vec_sub_zero(<4 x i32> %a) {
7; CHECK-LABEL: combine_vec_sub_zero:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    retq
10  %1 = sub <4 x i32> %a, zeroinitializer
11  ret <4 x i32> %1
12}
13
14; fold (sub x, x) -> 0
15define <4 x i32> @combine_vec_sub_self(<4 x i32> %a) {
16; SSE-LABEL: combine_vec_sub_self:
17; SSE:       # %bb.0:
18; SSE-NEXT:    xorps %xmm0, %xmm0
19; SSE-NEXT:    retq
20;
21; AVX-LABEL: combine_vec_sub_self:
22; AVX:       # %bb.0:
23; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
24; AVX-NEXT:    retq
25  %1 = sub <4 x i32> %a, %a
26  ret <4 x i32> %1
27}
28
29; fold (sub x, c) -> (add x, -c)
30define <4 x i32> @combine_vec_sub_constant(<4 x i32> %x) {
31; SSE-LABEL: combine_vec_sub_constant:
32; SSE:       # %bb.0:
33; SSE-NEXT:    psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34; SSE-NEXT:    retq
35;
36; AVX-LABEL: combine_vec_sub_constant:
37; AVX:       # %bb.0:
38; AVX-NEXT:    vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39; AVX-NEXT:    retq
40  %1 = sub <4 x i32> %x, <i32 0, i32 1, i32 2, i32 3>
41  ret <4 x i32> %1
42}
43
44; Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
45define <4 x i32> @combine_vec_sub_negone(<4 x i32> %x) {
46; SSE-LABEL: combine_vec_sub_negone:
47; SSE:       # %bb.0:
48; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
49; SSE-NEXT:    pxor %xmm1, %xmm0
50; SSE-NEXT:    retq
51;
52; AVX-LABEL: combine_vec_sub_negone:
53; AVX:       # %bb.0:
54; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
55; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
56; AVX-NEXT:    retq
57  %1 = sub <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %x
58  ret <4 x i32> %1
59}
60
61; fold A-(A-B) -> B
62define <4 x i32> @combine_vec_sub_sub(<4 x i32> %a, <4 x i32> %b) {
63; SSE-LABEL: combine_vec_sub_sub:
64; SSE:       # %bb.0:
65; SSE-NEXT:    movaps %xmm1, %xmm0
66; SSE-NEXT:    retq
67;
68; AVX-LABEL: combine_vec_sub_sub:
69; AVX:       # %bb.0:
70; AVX-NEXT:    vmovaps %xmm1, %xmm0
71; AVX-NEXT:    retq
72  %1 = sub <4 x i32> %a, %b
73  %2 = sub <4 x i32> %a, %1
74  ret <4 x i32> %2
75}
76
77; fold (A+B)-A -> B
78define <4 x i32> @combine_vec_sub_add0(<4 x i32> %a, <4 x i32> %b) {
79; SSE-LABEL: combine_vec_sub_add0:
80; SSE:       # %bb.0:
81; SSE-NEXT:    movaps %xmm1, %xmm0
82; SSE-NEXT:    retq
83;
84; AVX-LABEL: combine_vec_sub_add0:
85; AVX:       # %bb.0:
86; AVX-NEXT:    vmovaps %xmm1, %xmm0
87; AVX-NEXT:    retq
88  %1 = add <4 x i32> %a, %b
89  %2 = sub <4 x i32> %1, %a
90  ret <4 x i32> %2
91}
92
93; fold (A+B)-B -> A
94define <4 x i32> @combine_vec_sub_add1(<4 x i32> %a, <4 x i32> %b) {
95; CHECK-LABEL: combine_vec_sub_add1:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    retq
98  %1 = add <4 x i32> %a, %b
99  %2 = sub <4 x i32> %1, %b
100  ret <4 x i32> %2
101}
102
103; fold C2-(A+C1) -> (C2-C1)-A
104define <4 x i32> @combine_vec_sub_constant_add(<4 x i32> %a) {
105; SSE-LABEL: combine_vec_sub_constant_add:
106; SSE:       # %bb.0:
107; SSE-NEXT:    pmovsxbd {{.*#+}} xmm1 = [3,1,4294967295,4294967293]
108; SSE-NEXT:    psubd %xmm0, %xmm1
109; SSE-NEXT:    movdqa %xmm1, %xmm0
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: combine_vec_sub_constant_add:
113; AVX:       # %bb.0:
114; AVX-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [3,1,4294967295,4294967293]
115; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
116; AVX-NEXT:    retq
117  %1 = add <4 x i32> %a, <i32 0, i32 1, i32 2, i32 3>
118  %2 = sub <4 x i32> <i32 3, i32 2, i32 1, i32 0>, %1
119  ret <4 x i32> %2
120}
121
122; fold ((A+(B+C))-B) -> A+C
123define <4 x i32> @combine_vec_sub_add_add(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
124; SSE-LABEL: combine_vec_sub_add_add:
125; SSE:       # %bb.0:
126; SSE-NEXT:    paddd %xmm2, %xmm0
127; SSE-NEXT:    retq
128;
129; AVX-LABEL: combine_vec_sub_add_add:
130; AVX:       # %bb.0:
131; AVX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
132; AVX-NEXT:    retq
133  %1 = add <4 x i32> %b, %c
134  %2 = add <4 x i32> %a, %1
135  %3 = sub <4 x i32> %2, %b
136  ret <4 x i32> %3
137}
138
139; fold ((A+(B-C))-B) -> A-C
140define <4 x i32> @combine_vec_sub_add_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
141; SSE-LABEL: combine_vec_sub_add_sub:
142; SSE:       # %bb.0:
143; SSE-NEXT:    psubd %xmm2, %xmm0
144; SSE-NEXT:    retq
145;
146; AVX-LABEL: combine_vec_sub_add_sub:
147; AVX:       # %bb.0:
148; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
149; AVX-NEXT:    retq
150  %1 = sub <4 x i32> %b, %c
151  %2 = add <4 x i32> %a, %1
152  %3 = sub <4 x i32> %2, %b
153  ret <4 x i32> %3
154}
155
156; fold ((A-(B-C))-C) -> A-B
157define <4 x i32> @combine_vec_sub_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
158; SSE-LABEL: combine_vec_sub_sub_sub:
159; SSE:       # %bb.0:
160; SSE-NEXT:    psubd %xmm1, %xmm0
161; SSE-NEXT:    retq
162;
163; AVX-LABEL: combine_vec_sub_sub_sub:
164; AVX:       # %bb.0:
165; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
166; AVX-NEXT:    retq
167  %1 = sub <4 x i32> %b, %c
168  %2 = sub <4 x i32> %a, %1
169  %3 = sub <4 x i32> %2, %c
170  ret <4 x i32> %3
171}
172
173; fold undef-A -> undef
174define <4 x i32> @combine_vec_sub_undef0(<4 x i32> %a) {
175; CHECK-LABEL: combine_vec_sub_undef0:
176; CHECK:       # %bb.0:
177; CHECK-NEXT:    retq
178  %1 = sub <4 x i32> undef, %a
179  ret <4 x i32> %1
180}
181
182; fold A-undef -> undef
183define <4 x i32> @combine_vec_sub_undef1(<4 x i32> %a) {
184; CHECK-LABEL: combine_vec_sub_undef1:
185; CHECK:       # %bb.0:
186; CHECK-NEXT:    retq
187  %1 = sub <4 x i32> %a, undef
188  ret <4 x i32> %1
189}
190
191; sub X, (sext Y i1) -> add X, (and Y 1)
192define <4 x i32> @combine_vec_add_sext(<4 x i32> %x, <4 x i1> %y) {
193; SSE-LABEL: combine_vec_add_sext:
194; SSE:       # %bb.0:
195; SSE-NEXT:    pslld $31, %xmm1
196; SSE-NEXT:    psrad $31, %xmm1
197; SSE-NEXT:    psubd %xmm1, %xmm0
198; SSE-NEXT:    retq
199;
200; AVX-LABEL: combine_vec_add_sext:
201; AVX:       # %bb.0:
202; AVX-NEXT:    vpslld $31, %xmm1, %xmm1
203; AVX-NEXT:    vpsrad $31, %xmm1, %xmm1
204; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
205; AVX-NEXT:    retq
206  %1 = sext <4 x i1> %y to <4 x i32>
207  %2 = sub <4 x i32> %x, %1
208  ret <4 x i32> %2
209}
210
211; sub X, (sextinreg Y i1) -> add X, (and Y 1)
212define <4 x i32> @combine_vec_sub_sextinreg(<4 x i32> %x, <4 x i32> %y) {
213; SSE-LABEL: combine_vec_sub_sextinreg:
214; SSE:       # %bb.0:
215; SSE-NEXT:    pslld $31, %xmm1
216; SSE-NEXT:    psrad $31, %xmm1
217; SSE-NEXT:    psubd %xmm1, %xmm0
218; SSE-NEXT:    retq
219;
220; AVX-LABEL: combine_vec_sub_sextinreg:
221; AVX:       # %bb.0:
222; AVX-NEXT:    vpslld $31, %xmm1, %xmm1
223; AVX-NEXT:    vpsrad $31, %xmm1, %xmm1
224; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
225; AVX-NEXT:    retq
226  %1 = shl <4 x i32> %y, <i32 31, i32 31, i32 31, i32 31>
227  %2 = ashr <4 x i32> %1, <i32 31, i32 31, i32 31, i32 31>
228  %3 = sub <4 x i32> %x, %2
229  ret <4 x i32> %3
230}
231
232; sub C1, (xor X, C1) -> add (xor X, ~C2), C1+1
233define i32 @combine_sub_xor_consts(i32 %x) {
234; CHECK-LABEL: combine_sub_xor_consts:
235; CHECK:       # %bb.0:
236; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
237; CHECK-NEXT:    xorl $-32, %edi
238; CHECK-NEXT:    leal 33(%rdi), %eax
239; CHECK-NEXT:    retq
240  %xor = xor i32 %x, 31
241  %sub = sub i32 32, %xor
242  ret i32 %sub
243}
244
245define <4 x i32> @combine_vec_sub_xor_consts(<4 x i32> %x) {
246; SSE-LABEL: combine_vec_sub_xor_consts:
247; SSE:       # %bb.0:
248; SSE-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
249; SSE-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
250; SSE-NEXT:    retq
251;
252; AVX-LABEL: combine_vec_sub_xor_consts:
253; AVX:       # %bb.0:
254; AVX-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
255; AVX-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
256; AVX-NEXT:    retq
257  %xor = xor <4 x i32> %x, <i32 28, i32 29, i32 -1, i32 -31>
258  %sub = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %xor
259  ret <4 x i32> %sub
260}
261
262define <4 x i32> @combine_vec_neg_xor_consts(<4 x i32> %x) {
263; SSE-LABEL: combine_vec_neg_xor_consts:
264; SSE:       # %bb.0:
265; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
266; SSE-NEXT:    psubd %xmm1, %xmm0
267; SSE-NEXT:    retq
268;
269; AVX-LABEL: combine_vec_neg_xor_consts:
270; AVX:       # %bb.0:
271; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
272; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
273; AVX-NEXT:    retq
274  %xor = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
275  %sub = sub <4 x i32> zeroinitializer, %xor
276  ret <4 x i32> %sub
277}
278
279; With AVX, this could use broadcast (an extra load) and
280; load-folded 'add', but currently we favor the virtually
281; free pcmpeq instruction.
282
283define void @PR52032_oneuse_constant(ptr %p) {
284; SSE-LABEL: PR52032_oneuse_constant:
285; SSE:       # %bb.0:
286; SSE-NEXT:    movdqu (%rdi), %xmm0
287; SSE-NEXT:    movdqu 16(%rdi), %xmm1
288; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
289; SSE-NEXT:    psubd %xmm2, %xmm1
290; SSE-NEXT:    psubd %xmm2, %xmm0
291; SSE-NEXT:    movdqu %xmm0, (%rdi)
292; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
293; SSE-NEXT:    retq
294;
295; AVX-LABEL: PR52032_oneuse_constant:
296; AVX:       # %bb.0:
297; AVX-NEXT:    vmovdqu (%rdi), %ymm0
298; AVX-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
299; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
300; AVX-NEXT:    vmovdqu %ymm0, (%rdi)
301; AVX-NEXT:    vzeroupper
302; AVX-NEXT:    retq
303  %i3 = load <8 x i32>, ptr %p, align 4
304  %i4 = add nsw <8 x i32> %i3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
305  store <8 x i32> %i4, ptr %p, align 4
306  ret void
307}
308
309; With AVX, we don't transform 'add' to 'sub' because that prevents load folding.
310; With SSE, we do it because we can't load fold the other op without overwriting the constant op.
311
312define void @PR52032(ptr %p) {
313; SSE-LABEL: PR52032:
314; SSE:       # %bb.0:
315; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
316; SSE-NEXT:    movdqu (%rdi), %xmm1
317; SSE-NEXT:    movdqu 16(%rdi), %xmm2
318; SSE-NEXT:    movdqu 32(%rdi), %xmm3
319; SSE-NEXT:    movdqu 48(%rdi), %xmm4
320; SSE-NEXT:    psubd %xmm0, %xmm2
321; SSE-NEXT:    psubd %xmm0, %xmm1
322; SSE-NEXT:    movdqu %xmm1, (%rdi)
323; SSE-NEXT:    movdqu %xmm2, 16(%rdi)
324; SSE-NEXT:    psubd %xmm0, %xmm4
325; SSE-NEXT:    psubd %xmm0, %xmm3
326; SSE-NEXT:    movdqu %xmm3, 32(%rdi)
327; SSE-NEXT:    movdqu %xmm4, 48(%rdi)
328; SSE-NEXT:    retq
329;
330; AVX-LABEL: PR52032:
331; AVX:       # %bb.0:
332; AVX-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1]
333; AVX-NEXT:    vpaddd (%rdi), %ymm0, %ymm1
334; AVX-NEXT:    vmovdqu %ymm1, (%rdi)
335; AVX-NEXT:    vpaddd 32(%rdi), %ymm0, %ymm0
336; AVX-NEXT:    vmovdqu %ymm0, 32(%rdi)
337; AVX-NEXT:    vzeroupper
338; AVX-NEXT:    retq
339  %i3 = load <8 x i32>, ptr %p, align 4
340  %i4 = add nsw <8 x i32> %i3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
341  store <8 x i32> %i4, ptr %p, align 4
342  %p2 = getelementptr inbounds <8 x i32>, ptr %p, i64 1
343  %i8 = load <8 x i32>, ptr %p2, align 4
344  %i9 = add nsw <8 x i32> %i8, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
345  store <8 x i32> %i9, ptr %p2, align 4
346  ret void
347}
348
349; Same as above, but 128-bit ops:
350; With AVX, we don't transform 'add' to 'sub' because that prevents load folding.
351; With SSE, we do it because we can't load fold the other op without overwriting the constant op.
352
353define void @PR52032_2(ptr %p) {
354; SSE-LABEL: PR52032_2:
355; SSE:       # %bb.0:
356; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
357; SSE-NEXT:    movdqu (%rdi), %xmm1
358; SSE-NEXT:    movdqu 16(%rdi), %xmm2
359; SSE-NEXT:    psubd %xmm0, %xmm1
360; SSE-NEXT:    movdqu %xmm1, (%rdi)
361; SSE-NEXT:    psubd %xmm0, %xmm2
362; SSE-NEXT:    movdqu %xmm2, 16(%rdi)
363; SSE-NEXT:    retq
364;
365; AVX-LABEL: PR52032_2:
366; AVX:       # %bb.0:
367; AVX-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1]
368; AVX-NEXT:    vpaddd (%rdi), %xmm0, %xmm1
369; AVX-NEXT:    vmovdqu %xmm1, (%rdi)
370; AVX-NEXT:    vpaddd 16(%rdi), %xmm0, %xmm0
371; AVX-NEXT:    vmovdqu %xmm0, 16(%rdi)
372; AVX-NEXT:    retq
373  %i3 = load <4 x i32>, ptr %p, align 4
374  %i4 = add nsw <4 x i32> %i3, <i32 1, i32 1, i32 1, i32 1>
375  store <4 x i32> %i4, ptr %p, align 4
376  %p2 = getelementptr inbounds <4 x i32>, ptr %p, i64 1
377  %i8 = load <4 x i32>, ptr %p2, align 4
378  %i9 = add nsw <4 x i32> %i8, <i32 1, i32 1, i32 1, i32 1>
379  store <4 x i32> %i9, ptr %p2, align 4
380  ret void
381}
382
383; If we are starting with a 'sub', it is always better to do the transform.
384
385define void @PR52032_3(ptr %p) {
386; SSE-LABEL: PR52032_3:
387; SSE:       # %bb.0:
388; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
389; SSE-NEXT:    movdqu (%rdi), %xmm1
390; SSE-NEXT:    movdqu 16(%rdi), %xmm2
391; SSE-NEXT:    paddd %xmm0, %xmm1
392; SSE-NEXT:    movdqu %xmm1, (%rdi)
393; SSE-NEXT:    paddd %xmm0, %xmm2
394; SSE-NEXT:    movdqu %xmm2, 16(%rdi)
395; SSE-NEXT:    retq
396;
397; AVX-LABEL: PR52032_3:
398; AVX:       # %bb.0:
399; AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
400; AVX-NEXT:    vpaddd (%rdi), %xmm0, %xmm1
401; AVX-NEXT:    vmovdqu %xmm1, (%rdi)
402; AVX-NEXT:    vpaddd 16(%rdi), %xmm0, %xmm0
403; AVX-NEXT:    vmovdqu %xmm0, 16(%rdi)
404; AVX-NEXT:    retq
405  %i3 = load <4 x i32>, ptr %p, align 4
406  %i4 = sub nsw <4 x i32> %i3, <i32 1, i32 1, i32 1, i32 1>
407  store <4 x i32> %i4, ptr %p, align 4
408  %p2 = getelementptr inbounds <4 x i32>, ptr %p, i64 1
409  %i8 = load <4 x i32>, ptr %p2, align 4
410  %i9 = sub nsw <4 x i32> %i8, <i32 1, i32 1, i32 1, i32 1>
411  store <4 x i32> %i9, ptr %p2, align 4
412  ret void
413}
414
415; If there's no chance of profitable load folding (because of extra uses), we convert 'add' to 'sub'.
416
417define void @PR52032_4(ptr %p, ptr %q) {
418; SSE-LABEL: PR52032_4:
419; SSE:       # %bb.0:
420; SSE-NEXT:    movdqu (%rdi), %xmm0
421; SSE-NEXT:    movdqa %xmm0, (%rsi)
422; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
423; SSE-NEXT:    psubd %xmm1, %xmm0
424; SSE-NEXT:    movdqu %xmm0, (%rdi)
425; SSE-NEXT:    movdqu 16(%rdi), %xmm0
426; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
427; SSE-NEXT:    psubd %xmm1, %xmm0
428; SSE-NEXT:    movdqu %xmm0, 16(%rdi)
429; SSE-NEXT:    retq
430;
431; AVX-LABEL: PR52032_4:
432; AVX:       # %bb.0:
433; AVX-NEXT:    vmovdqu (%rdi), %xmm0
434; AVX-NEXT:    vmovdqa %xmm0, (%rsi)
435; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
436; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
437; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
438; AVX-NEXT:    vmovdqu 16(%rdi), %xmm0
439; AVX-NEXT:    vmovdqa %xmm0, 16(%rsi)
440; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
441; AVX-NEXT:    vmovdqu %xmm0, 16(%rdi)
442; AVX-NEXT:    retq
443  %i3 = load <4 x i32>, ptr %p, align 4
444  store <4 x i32> %i3, ptr %q
445  %i4 = add nsw <4 x i32> %i3, <i32 1, i32 1, i32 1, i32 1>
446  store <4 x i32> %i4, ptr %p, align 4
447  %p2 = getelementptr inbounds <4 x i32>, ptr %p, i64 1
448  %q2 = getelementptr inbounds <4 x i32>, ptr %q, i64 1
449  %i8 = load <4 x i32>, ptr %p2, align 4
450  store <4 x i32> %i8, ptr %q2
451  %i9 = add nsw <4 x i32> %i8, <i32 1, i32 1, i32 1, i32 1>
452  store <4 x i32> %i9, ptr %p2, align 4
453  ret void
454}
455
456; Fold sub(32,xor(bsr(x),31)) -> add(xor(bsr(x),-32),33) -> add(or(bsr(x),-32),33) -> add(bsr(x),1)
457define i32 @PR74101(i32 %a0) {
458; CHECK-LABEL: PR74101:
459; CHECK:       # %bb.0:
460; CHECK-NEXT:    bsrl %edi, %eax
461; CHECK-NEXT:    incl %eax
462; CHECK-NEXT:    retq
463  %lz = call i32 @llvm.ctlz.i32(i32 %a0, i1 true)
464  %add = sub nuw nsw i32 32, %lz
465  ret i32 %add
466}
467