xref: /llvm-project/llvm/test/CodeGen/X86/known-never-zero.ll (revision 90e9895a9373b3d83eefe15b34d2dc83c7bcc88f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
4
5;; Use cttz to test if we properly prove never-zero. There is a very
6;; simple transform from cttz -> cttz_zero_undef if its operand is
7;; known never zero.
8declare i32 @llvm.cttz.i32(i32, i1)
9declare i32 @llvm.uadd.sat.i32(i32, i32)
10declare i32 @llvm.umax.i32(i32, i32)
11declare i32 @llvm.umin.i32(i32, i32)
12declare i32 @llvm.smin.i32(i32, i32)
13declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
14declare i32 @llvm.smax.i32(i32, i32)
15declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
16declare i32 @llvm.bswap.i32(i32)
17declare i32 @llvm.bitreverse.i32(i32)
18declare i32 @llvm.ctpop.i32(i32)
19declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
20declare i32 @llvm.abs.i32(i32, i1)
21declare i32 @llvm.fshl.i32(i32, i32, i32)
22declare i32 @llvm.fshr.i32(i32, i32, i32)
23
24define i32 @or_known_nonzero(i32 %x) {
25; X86-LABEL: or_known_nonzero:
26; X86:       # %bb.0:
27; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
28; X86-NEXT:    orl $1, %eax
29; X86-NEXT:    rep bsfl %eax, %eax
30; X86-NEXT:    retl
31;
32; X64-LABEL: or_known_nonzero:
33; X64:       # %bb.0:
34; X64-NEXT:    orl $1, %edi
35; X64-NEXT:    rep bsfl %edi, %eax
36; X64-NEXT:    retq
37  %z = or i32 %x, 1
38  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
39  ret i32 %r
40}
41
42define i32 @or_maybe_zero(i32 %x, i32 %y) {
43; X86-LABEL: or_maybe_zero:
44; X86:       # %bb.0:
45; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
46; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
47; X86-NEXT:    bsfl %eax, %ecx
48; X86-NEXT:    movl $32, %eax
49; X86-NEXT:    cmovnel %ecx, %eax
50; X86-NEXT:    retl
51;
52; X64-LABEL: or_maybe_zero:
53; X64:       # %bb.0:
54; X64-NEXT:    orl %esi, %edi
55; X64-NEXT:    movl $32, %eax
56; X64-NEXT:    rep bsfl %edi, %eax
57; X64-NEXT:    retq
58  %z = or i32 %x, %y
59  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
60  ret i32 %r
61}
62
63define i32 @select_known_nonzero(i1 %c, i32 %x) {
64; X86-LABEL: select_known_nonzero:
65; X86:       # %bb.0:
66; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
67; X86-NEXT:    orl $1, %eax
68; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
69; X86-NEXT:    movl $122, %ecx
70; X86-NEXT:    cmovnel %eax, %ecx
71; X86-NEXT:    rep bsfl %ecx, %eax
72; X86-NEXT:    retl
73;
74; X64-LABEL: select_known_nonzero:
75; X64:       # %bb.0:
76; X64-NEXT:    orl $1, %esi
77; X64-NEXT:    testb $1, %dil
78; X64-NEXT:    movl $122, %eax
79; X64-NEXT:    cmovnel %esi, %eax
80; X64-NEXT:    rep bsfl %eax, %eax
81; X64-NEXT:    retq
82  %y = or i32 %x, 1
83  %z = select i1 %c, i32 %y, i32 122
84  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
85  ret i32 %r
86}
87
88define i32 @select_maybe_zero(i1 %c, i32 %x) {
89; X86-LABEL: select_maybe_zero:
90; X86:       # %bb.0:
91; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
92; X86-NEXT:    orl $1, %eax
93; X86-NEXT:    xorl %ecx, %ecx
94; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
95; X86-NEXT:    cmovnel %eax, %ecx
96; X86-NEXT:    bsfl %ecx, %ecx
97; X86-NEXT:    movl $32, %eax
98; X86-NEXT:    cmovnel %ecx, %eax
99; X86-NEXT:    retl
100;
101; X64-LABEL: select_maybe_zero:
102; X64:       # %bb.0:
103; X64-NEXT:    orl $1, %esi
104; X64-NEXT:    xorl %ecx, %ecx
105; X64-NEXT:    testb $1, %dil
106; X64-NEXT:    cmovnel %esi, %ecx
107; X64-NEXT:    movl $32, %eax
108; X64-NEXT:    rep bsfl %ecx, %eax
109; X64-NEXT:    retq
110  %y = or i32 %x, 1
111  %z = select i1 %c, i32 %y, i32 0
112  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
113  ret i32 %r
114}
115
116define i32 @shl_known_nonzero_1s_bit_set(i32 %x) {
117; X86-LABEL: shl_known_nonzero_1s_bit_set:
118; X86:       # %bb.0:
119; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
120; X86-NEXT:    movl $123, %eax
121; X86-NEXT:    shll %cl, %eax
122; X86-NEXT:    rep bsfl %eax, %eax
123; X86-NEXT:    retl
124;
125; X64-LABEL: shl_known_nonzero_1s_bit_set:
126; X64:       # %bb.0:
127; X64-NEXT:    movl %edi, %ecx
128; X64-NEXT:    movl $123, %eax
129; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
130; X64-NEXT:    shll %cl, %eax
131; X64-NEXT:    rep bsfl %eax, %eax
132; X64-NEXT:    retq
133  %z = shl i32 123, %x
134  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
135  ret i32 %r
136}
137
138define i32 @shl_known_nonzero_nsw(i32 %x, i32 %yy) {
139; X86-LABEL: shl_known_nonzero_nsw:
140; X86:       # %bb.0:
141; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
142; X86-NEXT:    movl $256, %eax # imm = 0x100
143; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
144; X86-NEXT:    shll %cl, %eax
145; X86-NEXT:    rep bsfl %eax, %eax
146; X86-NEXT:    retl
147;
148; X64-LABEL: shl_known_nonzero_nsw:
149; X64:       # %bb.0:
150; X64-NEXT:    movl %edi, %ecx
151; X64-NEXT:    orl $256, %esi # imm = 0x100
152; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
153; X64-NEXT:    shll %cl, %esi
154; X64-NEXT:    rep bsfl %esi, %eax
155; X64-NEXT:    retq
156  %y = or i32 %yy, 256
157  %z = shl nsw i32 %y, %x
158  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
159  ret i32 %r
160}
161
162define i32 @shl_known_nonzero_nuw(i32 %x, i32 %yy) {
163; X86-LABEL: shl_known_nonzero_nuw:
164; X86:       # %bb.0:
165; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
166; X86-NEXT:    movl $256, %eax # imm = 0x100
167; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
168; X86-NEXT:    shll %cl, %eax
169; X86-NEXT:    rep bsfl %eax, %eax
170; X86-NEXT:    retl
171;
172; X64-LABEL: shl_known_nonzero_nuw:
173; X64:       # %bb.0:
174; X64-NEXT:    movl %edi, %ecx
175; X64-NEXT:    orl $256, %esi # imm = 0x100
176; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
177; X64-NEXT:    shll %cl, %esi
178; X64-NEXT:    rep bsfl %esi, %eax
179; X64-NEXT:    retq
180  %y = or i32 %yy, 256
181  %z = shl nuw i32 %y, %x
182  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
183  ret i32 %r
184}
185
186define i32 @shl_maybe_zero(i32 %x, i32 %y) {
187; X86-LABEL: shl_maybe_zero:
188; X86:       # %bb.0:
189; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
190; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
191; X86-NEXT:    shll %cl, %eax
192; X86-NEXT:    bsfl %eax, %ecx
193; X86-NEXT:    movl $32, %eax
194; X86-NEXT:    cmovnel %ecx, %eax
195; X86-NEXT:    retl
196;
197; X64-LABEL: shl_maybe_zero:
198; X64:       # %bb.0:
199; X64-NEXT:    movl %edi, %ecx
200; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
201; X64-NEXT:    shll %cl, %esi
202; X64-NEXT:    movl $32, %eax
203; X64-NEXT:    rep bsfl %esi, %eax
204; X64-NEXT:    retq
205  %z = shl nuw nsw i32 %y, %x
206  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
207  ret i32 %r
208}
209
210define i32 @uaddsat_known_nonzero(i32 %x) {
211; X86-LABEL: uaddsat_known_nonzero:
212; X86:       # %bb.0:
213; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
214; X86-NEXT:    incl %eax
215; X86-NEXT:    movl $-1, %ecx
216; X86-NEXT:    cmovnel %eax, %ecx
217; X86-NEXT:    rep bsfl %ecx, %eax
218; X86-NEXT:    retl
219;
220; X64-LABEL: uaddsat_known_nonzero:
221; X64:       # %bb.0:
222; X64-NEXT:    incl %edi
223; X64-NEXT:    movl $-1, %eax
224; X64-NEXT:    cmovnel %edi, %eax
225; X64-NEXT:    rep bsfl %eax, %eax
226; X64-NEXT:    retq
227  %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 1)
228  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
229  ret i32 %r
230}
231
232define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
233; X86-LABEL: uaddsat_maybe_zero:
234; X86:       # %bb.0:
235; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
236; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
237; X86-NEXT:    movl $-1, %ecx
238; X86-NEXT:    cmovael %eax, %ecx
239; X86-NEXT:    bsfl %ecx, %ecx
240; X86-NEXT:    movl $32, %eax
241; X86-NEXT:    cmovnel %ecx, %eax
242; X86-NEXT:    retl
243;
244; X64-LABEL: uaddsat_maybe_zero:
245; X64:       # %bb.0:
246; X64-NEXT:    addl %esi, %edi
247; X64-NEXT:    movl $-1, %ecx
248; X64-NEXT:    cmovael %edi, %ecx
249; X64-NEXT:    movl $32, %eax
250; X64-NEXT:    rep bsfl %ecx, %eax
251; X64-NEXT:    retq
252  %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
253  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
254  ret i32 %r
255}
256
257define i32 @umax_known_nonzero(i32 %x, i32 %y) {
258; X86-LABEL: umax_known_nonzero:
259; X86:       # %bb.0:
260; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
261; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
262; X86-NEXT:    movl $4, %edx
263; X86-NEXT:    shll %cl, %edx
264; X86-NEXT:    cmpl %edx, %eax
265; X86-NEXT:    cmoval %eax, %edx
266; X86-NEXT:    rep bsfl %edx, %eax
267; X86-NEXT:    retl
268;
269; X64-LABEL: umax_known_nonzero:
270; X64:       # %bb.0:
271; X64-NEXT:    movl %esi, %ecx
272; X64-NEXT:    movl $4, %eax
273; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
274; X64-NEXT:    shll %cl, %eax
275; X64-NEXT:    cmpl %eax, %edi
276; X64-NEXT:    cmoval %edi, %eax
277; X64-NEXT:    rep bsfl %eax, %eax
278; X64-NEXT:    retq
279  %yy = shl nuw i32 4, %y
280  %z = call i32 @llvm.umax.i32(i32 %x, i32 %yy)
281  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
282  ret i32 %r
283}
284
285define i32 @umax_maybe_zero(i32 %x, i32 %y) {
286; X86-LABEL: umax_maybe_zero:
287; X86:       # %bb.0:
288; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
289; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
290; X86-NEXT:    cmpl %eax, %ecx
291; X86-NEXT:    cmoval %ecx, %eax
292; X86-NEXT:    bsfl %eax, %ecx
293; X86-NEXT:    movl $32, %eax
294; X86-NEXT:    cmovnel %ecx, %eax
295; X86-NEXT:    retl
296;
297; X64-LABEL: umax_maybe_zero:
298; X64:       # %bb.0:
299; X64-NEXT:    cmpl %esi, %edi
300; X64-NEXT:    cmoval %edi, %esi
301; X64-NEXT:    movl $32, %eax
302; X64-NEXT:    rep bsfl %esi, %eax
303; X64-NEXT:    retq
304  %z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
305  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
306  ret i32 %r
307}
308
309define i32 @umin_known_nonzero(i32 %xx, i32 %yy) {
310; X86-LABEL: umin_known_nonzero:
311; X86:       # %bb.0:
312; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
313; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
314; X86-NEXT:    movl $4, %edx
315; X86-NEXT:    shll %cl, %edx
316; X86-NEXT:    addl $4, %eax
317; X86-NEXT:    cmpl %eax, %edx
318; X86-NEXT:    cmovbl %edx, %eax
319; X86-NEXT:    rep bsfl %eax, %eax
320; X86-NEXT:    retl
321;
322; X64-LABEL: umin_known_nonzero:
323; X64:       # %bb.0:
324; X64-NEXT:    movl %edi, %ecx
325; X64-NEXT:    movl $4, %eax
326; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
327; X64-NEXT:    shll %cl, %eax
328; X64-NEXT:    addl $4, %esi
329; X64-NEXT:    cmpl %esi, %eax
330; X64-NEXT:    cmovbl %eax, %esi
331; X64-NEXT:    rep bsfl %esi, %eax
332; X64-NEXT:    retq
333  %x = shl nuw i32 4, %xx
334  %y = add nuw nsw i32 %yy, 4
335  %z = call i32 @llvm.umin.i32(i32 %x, i32 %y)
336  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
337  ret i32 %r
338}
339
340define i32 @umin_maybe_zero(i32 %x, i32 %y) {
341; X86-LABEL: umin_maybe_zero:
342; X86:       # %bb.0:
343; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
344; X86-NEXT:    cmpl $54, %eax
345; X86-NEXT:    movl $54, %ecx
346; X86-NEXT:    cmovbl %eax, %ecx
347; X86-NEXT:    bsfl %ecx, %ecx
348; X86-NEXT:    movl $32, %eax
349; X86-NEXT:    cmovnel %ecx, %eax
350; X86-NEXT:    retl
351;
352; X64-LABEL: umin_maybe_zero:
353; X64:       # %bb.0:
354; X64-NEXT:    cmpl $54, %edi
355; X64-NEXT:    movl $54, %ecx
356; X64-NEXT:    cmovbl %edi, %ecx
357; X64-NEXT:    movl $32, %eax
358; X64-NEXT:    rep bsfl %ecx, %eax
359; X64-NEXT:    retq
360  %z = call i32 @llvm.umin.i32(i32 %x, i32 54)
361  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
362  ret i32 %r
363}
364
365define i32 @smin_known_nonzero(i32 %xx, i32 %yy) {
366; X86-LABEL: smin_known_nonzero:
367; X86:       # %bb.0:
368; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
369; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
370; X86-NEXT:    movl $4, %edx
371; X86-NEXT:    shll %cl, %edx
372; X86-NEXT:    addl $4, %eax
373; X86-NEXT:    cmpl %eax, %edx
374; X86-NEXT:    cmovll %edx, %eax
375; X86-NEXT:    rep bsfl %eax, %eax
376; X86-NEXT:    retl
377;
378; X64-LABEL: smin_known_nonzero:
379; X64:       # %bb.0:
380; X64-NEXT:    movl %edi, %ecx
381; X64-NEXT:    movl $4, %eax
382; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
383; X64-NEXT:    shll %cl, %eax
384; X64-NEXT:    addl $4, %esi
385; X64-NEXT:    cmpl %esi, %eax
386; X64-NEXT:    cmovll %eax, %esi
387; X64-NEXT:    rep bsfl %esi, %eax
388; X64-NEXT:    retq
389  %x = shl nuw i32 4, %xx
390  %y = add nuw nsw i32 %yy, 4
391  %z = call i32 @llvm.smin.i32(i32 %x, i32 %y)
392  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
393  ret i32 %r
394}
395
396define i32 @smin_known_zero(i32 %x, i32 %y) {
397; X86-LABEL: smin_known_zero:
398; X86:       # %bb.0:
399; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
400; X86-NEXT:    cmpl $-54, %eax
401; X86-NEXT:    movl $-54, %ecx
402; X86-NEXT:    cmovll %eax, %ecx
403; X86-NEXT:    rep bsfl %ecx, %eax
404; X86-NEXT:    retl
405;
406; X64-LABEL: smin_known_zero:
407; X64:       # %bb.0:
408; X64-NEXT:    cmpl $-54, %edi
409; X64-NEXT:    movl $-54, %eax
410; X64-NEXT:    cmovll %edi, %eax
411; X64-NEXT:    rep bsfl %eax, %eax
412; X64-NEXT:    retq
413  %z = call i32 @llvm.smin.i32(i32 %x, i32 -54)
414  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
415  ret i32 %r
416}
417
418define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
419; X86-LABEL: smin_known_zero_vec:
420; X86:       # %bb.0:
421; X86-NEXT:    movdqa {{.*#+}} xmm1 = [4294967242,4294967273,4294967284,4294967295]
422; X86-NEXT:    movdqa %xmm1, %xmm2
423; X86-NEXT:    pcmpgtd %xmm0, %xmm2
424; X86-NEXT:    pand %xmm2, %xmm0
425; X86-NEXT:    pandn %xmm1, %xmm2
426; X86-NEXT:    por %xmm2, %xmm0
427; X86-NEXT:    pcmpeqd %xmm1, %xmm1
428; X86-NEXT:    paddd %xmm0, %xmm1
429; X86-NEXT:    pand %xmm1, %xmm0
430; X86-NEXT:    pxor %xmm1, %xmm1
431; X86-NEXT:    pcmpeqd %xmm1, %xmm0
432; X86-NEXT:    psrld $31, %xmm0
433; X86-NEXT:    retl
434;
435; X64-LABEL: smin_known_zero_vec:
436; X64:       # %bb.0:
437; X64-NEXT:    vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
438; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
439; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
440; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
441; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
442; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
443; X64-NEXT:    vpsrld $31, %xmm0, %xmm0
444; X64-NEXT:    retq
445  %z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 -54, i32 -23, i32 -12, i32 -1>)
446  %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
447  %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
448  %ret = zext <4 x i1> %3 to <4 x i32>
449  ret <4 x i32> %ret
450}
451
452define i32 @smin_maybe_zero(i32 %x, i32 %y) {
453; X86-LABEL: smin_maybe_zero:
454; X86:       # %bb.0:
455; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
456; X86-NEXT:    cmpl $54, %eax
457; X86-NEXT:    movl $54, %ecx
458; X86-NEXT:    cmovll %eax, %ecx
459; X86-NEXT:    bsfl %ecx, %ecx
460; X86-NEXT:    movl $32, %eax
461; X86-NEXT:    cmovnel %ecx, %eax
462; X86-NEXT:    retl
463;
464; X64-LABEL: smin_maybe_zero:
465; X64:       # %bb.0:
466; X64-NEXT:    cmpl $54, %edi
467; X64-NEXT:    movl $54, %ecx
468; X64-NEXT:    cmovll %edi, %ecx
469; X64-NEXT:    movl $32, %eax
470; X64-NEXT:    rep bsfl %ecx, %eax
471; X64-NEXT:    retq
472  %z = call i32 @llvm.smin.i32(i32 %x, i32 54)
473  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
474  ret i32 %r
475}
476
477define i32 @smax_known_nonzero(i32 %xx, i32 %yy) {
478; X86-LABEL: smax_known_nonzero:
479; X86:       # %bb.0:
480; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
481; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
482; X86-NEXT:    movl $4, %edx
483; X86-NEXT:    shll %cl, %edx
484; X86-NEXT:    addl $4, %eax
485; X86-NEXT:    cmpl %eax, %edx
486; X86-NEXT:    cmovgl %edx, %eax
487; X86-NEXT:    rep bsfl %eax, %eax
488; X86-NEXT:    retl
489;
490; X64-LABEL: smax_known_nonzero:
491; X64:       # %bb.0:
492; X64-NEXT:    movl %edi, %ecx
493; X64-NEXT:    movl $4, %eax
494; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
495; X64-NEXT:    shll %cl, %eax
496; X64-NEXT:    addl $4, %esi
497; X64-NEXT:    cmpl %esi, %eax
498; X64-NEXT:    cmovgl %eax, %esi
499; X64-NEXT:    rep bsfl %esi, %eax
500; X64-NEXT:    retq
501  %x = shl nuw i32 4, %xx
502  %y = add nuw nsw i32 %yy, 4
503  %z = call i32 @llvm.smax.i32(i32 %x, i32 %y)
504  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
505  ret i32 %r
506}
507
508define i32 @smax_maybe_zero(i32 %x, i32 %y) {
509; X86-LABEL: smax_maybe_zero:
510; X86:       # %bb.0:
511; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
512; X86-NEXT:    cmpl $55, %eax
513; X86-NEXT:    movl $54, %ecx
514; X86-NEXT:    cmovgel %eax, %ecx
515; X86-NEXT:    rep bsfl %ecx, %eax
516; X86-NEXT:    retl
517;
518; X64-LABEL: smax_maybe_zero:
519; X64:       # %bb.0:
520; X64-NEXT:    cmpl $55, %edi
521; X64-NEXT:    movl $54, %eax
522; X64-NEXT:    cmovgel %edi, %eax
523; X64-NEXT:    rep bsfl %eax, %eax
524; X64-NEXT:    retq
525  %z = call i32 @llvm.smax.i32(i32 %x, i32 54)
526  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
527  ret i32 %r
528}
529
530define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
531; X86-LABEL: smax_known_zero_vec:
532; X86:       # %bb.0:
533; X86-NEXT:    movdqa {{.*#+}} xmm1 = [54,23,12,1]
534; X86-NEXT:    movdqa %xmm0, %xmm2
535; X86-NEXT:    pcmpgtd %xmm1, %xmm2
536; X86-NEXT:    pand %xmm2, %xmm0
537; X86-NEXT:    pandn %xmm1, %xmm2
538; X86-NEXT:    por %xmm2, %xmm0
539; X86-NEXT:    pcmpeqd %xmm1, %xmm1
540; X86-NEXT:    paddd %xmm0, %xmm1
541; X86-NEXT:    pand %xmm1, %xmm0
542; X86-NEXT:    pxor %xmm1, %xmm1
543; X86-NEXT:    pcmpeqd %xmm1, %xmm0
544; X86-NEXT:    psrld $31, %xmm0
545; X86-NEXT:    retl
546;
547; X64-LABEL: smax_known_zero_vec:
548; X64:       # %bb.0:
549; X64-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
550; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
551; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
552; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
553; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
554; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
555; X64-NEXT:    vpsrld $31, %xmm0, %xmm0
556; X64-NEXT:    retq
557  %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>)
558  %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
559  %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
560  %ret = zext <4 x i1> %3 to <4 x i32>
561  ret <4 x i32> %ret
562}
563
564define i32 @smax_known_zero(i32 %x, i32 %y) {
565; X86-LABEL: smax_known_zero:
566; X86:       # %bb.0:
567; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
568; X86-NEXT:    testl %eax, %eax
569; X86-NEXT:    movl $-1, %ecx
570; X86-NEXT:    cmovnsl %eax, %ecx
571; X86-NEXT:    bsfl %ecx, %ecx
572; X86-NEXT:    movl $32, %eax
573; X86-NEXT:    cmovnel %ecx, %eax
574; X86-NEXT:    retl
575;
576; X64-LABEL: smax_known_zero:
577; X64:       # %bb.0:
578; X64-NEXT:    testl %edi, %edi
579; X64-NEXT:    movl $-1, %ecx
580; X64-NEXT:    cmovnsl %edi, %ecx
581; X64-NEXT:    movl $32, %eax
582; X64-NEXT:    rep bsfl %ecx, %eax
583; X64-NEXT:    retq
584  %z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
585  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
586  ret i32 %r
587}
588
589define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
590; X86-LABEL: rotr_known_nonzero:
591; X86:       # %bb.0:
592; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
593; X86-NEXT:    movl $256, %eax # imm = 0x100
594; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
595; X86-NEXT:    rorl %cl, %eax
596; X86-NEXT:    rep bsfl %eax, %eax
597; X86-NEXT:    retl
598;
599; X64-LABEL: rotr_known_nonzero:
600; X64:       # %bb.0:
601; X64-NEXT:    movl %esi, %ecx
602; X64-NEXT:    orl $256, %edi # imm = 0x100
603; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
604; X64-NEXT:    rorl %cl, %edi
605; X64-NEXT:    rep bsfl %edi, %eax
606; X64-NEXT:    retq
607  %x = or i32 %xx, 256
608  %shr = lshr i32 %x, %y
609  %sub = sub i32 32, %y
610  %shl = shl i32 %x, %sub
611  %z = or i32 %shl, %shr
612  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
613  ret i32 %r
614}
615
616define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
617; X86-LABEL: rotr_maybe_zero:
618; X86:       # %bb.0:
619; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
620; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
621; X86-NEXT:    rorl %cl, %eax
622; X86-NEXT:    bsfl %eax, %ecx
623; X86-NEXT:    movl $32, %eax
624; X86-NEXT:    cmovnel %ecx, %eax
625; X86-NEXT:    retl
626;
627; X64-LABEL: rotr_maybe_zero:
628; X64:       # %bb.0:
629; X64-NEXT:    movl %esi, %ecx
630; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
631; X64-NEXT:    rorl %cl, %edi
632; X64-NEXT:    movl $32, %eax
633; X64-NEXT:    rep bsfl %edi, %eax
634; X64-NEXT:    retq
635  %shr = lshr i32 %x, %y
636  %sub = sub i32 32, %y
637  %shl = shl i32 %x, %sub
638  %z = or i32 %shl, %shr
639  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
640  ret i32 %r
641}
642
643define i32 @rotr_with_fshr_known_nonzero(i32 %xx, i32 %y) {
644; X86-LABEL: rotr_with_fshr_known_nonzero:
645; X86:       # %bb.0:
646; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
647; X86-NEXT:    movl $256, %eax # imm = 0x100
648; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
649; X86-NEXT:    rorl %cl, %eax
650; X86-NEXT:    rep bsfl %eax, %eax
651; X86-NEXT:    retl
652;
653; X64-LABEL: rotr_with_fshr_known_nonzero:
654; X64:       # %bb.0:
655; X64-NEXT:    movl %esi, %ecx
656; X64-NEXT:    orl $256, %edi # imm = 0x100
657; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
658; X64-NEXT:    rorl %cl, %edi
659; X64-NEXT:    rep bsfl %edi, %eax
660; X64-NEXT:    retq
661  %x = or i32 %xx, 256
662  %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
663  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
664  ret i32 %r
665}
666
667define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
668; X86-LABEL: rotr_with_fshr_maybe_zero:
669; X86:       # %bb.0:
670; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
671; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
672; X86-NEXT:    rorl %cl, %eax
673; X86-NEXT:    bsfl %eax, %ecx
674; X86-NEXT:    movl $32, %eax
675; X86-NEXT:    cmovnel %ecx, %eax
676; X86-NEXT:    retl
677;
678; X64-LABEL: rotr_with_fshr_maybe_zero:
679; X64:       # %bb.0:
680; X64-NEXT:    movl %esi, %ecx
681; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
682; X64-NEXT:    rorl %cl, %edi
683; X64-NEXT:    movl $32, %eax
684; X64-NEXT:    rep bsfl %edi, %eax
685; X64-NEXT:    retq
686  %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
687  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
688  ret i32 %r
689}
690
691define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
692; X86-LABEL: rotl_known_nonzero:
693; X86:       # %bb.0:
694; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
695; X86-NEXT:    movl $256, %eax # imm = 0x100
696; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
697; X86-NEXT:    roll %cl, %eax
698; X86-NEXT:    rep bsfl %eax, %eax
699; X86-NEXT:    retl
700;
701; X64-LABEL: rotl_known_nonzero:
702; X64:       # %bb.0:
703; X64-NEXT:    movl %esi, %ecx
704; X64-NEXT:    orl $256, %edi # imm = 0x100
705; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
706; X64-NEXT:    roll %cl, %edi
707; X64-NEXT:    rep bsfl %edi, %eax
708; X64-NEXT:    retq
709  %x = or i32 %xx, 256
710  %shl = shl i32 %x, %y
711  %sub = sub i32 32, %y
712  %shr = lshr i32 %x, %sub
713  %z = or i32 %shr, %shl
714  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
715  ret i32 %r
716}
717
718define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
719; X86-LABEL: rotl_maybe_zero:
720; X86:       # %bb.0:
721; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
722; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
723; X86-NEXT:    roll %cl, %eax
724; X86-NEXT:    bsfl %eax, %ecx
725; X86-NEXT:    movl $32, %eax
726; X86-NEXT:    cmovnel %ecx, %eax
727; X86-NEXT:    retl
728;
729; X64-LABEL: rotl_maybe_zero:
730; X64:       # %bb.0:
731; X64-NEXT:    movl %esi, %ecx
732; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
733; X64-NEXT:    roll %cl, %edi
734; X64-NEXT:    movl $32, %eax
735; X64-NEXT:    rep bsfl %edi, %eax
736; X64-NEXT:    retq
737  %shl = shl i32 %x, %y
738  %sub = sub i32 32, %y
739  %shr = lshr i32 %x, %sub
740  %z = or i32 %shr, %shl
741  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
742  ret i32 %r
743}
744
745define i32 @rotl_with_fshl_known_nonzero(i32 %xx, i32 %y) {
746; X86-LABEL: rotl_with_fshl_known_nonzero:
747; X86:       # %bb.0:
748; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
749; X86-NEXT:    movl $256, %eax # imm = 0x100
750; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
751; X86-NEXT:    roll %cl, %eax
752; X86-NEXT:    rep bsfl %eax, %eax
753; X86-NEXT:    retl
754;
755; X64-LABEL: rotl_with_fshl_known_nonzero:
756; X64:       # %bb.0:
757; X64-NEXT:    movl %esi, %ecx
758; X64-NEXT:    orl $256, %edi # imm = 0x100
759; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
760; X64-NEXT:    roll %cl, %edi
761; X64-NEXT:    rep bsfl %edi, %eax
762; X64-NEXT:    retq
763  %x = or i32 %xx, 256
764  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
765  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
766  ret i32 %r
767}
768
769define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
770; X86-LABEL: rotl_with_fshl_maybe_zero:
771; X86:       # %bb.0:
772; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
773; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
774; X86-NEXT:    roll %cl, %eax
775; X86-NEXT:    bsfl %eax, %ecx
776; X86-NEXT:    movl $32, %eax
777; X86-NEXT:    cmovnel %ecx, %eax
778; X86-NEXT:    retl
779;
780; X64-LABEL: rotl_with_fshl_maybe_zero:
781; X64:       # %bb.0:
782; X64-NEXT:    movl %esi, %ecx
783; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
784; X64-NEXT:    roll %cl, %edi
785; X64-NEXT:    movl $32, %eax
786; X64-NEXT:    rep bsfl %edi, %eax
787; X64-NEXT:    retq
788  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
789  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
790  ret i32 %r
791}
792
793define i32 @sra_known_nonzero_sign_bit_set(i32 %x) {
794; X86-LABEL: sra_known_nonzero_sign_bit_set:
795; X86:       # %bb.0:
796; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
797; X86-NEXT:    movl $-2147360405, %eax # imm = 0x8001E16B
798; X86-NEXT:    sarl %cl, %eax
799; X86-NEXT:    rep bsfl %eax, %eax
800; X86-NEXT:    retl
801;
802; X64-LABEL: sra_known_nonzero_sign_bit_set:
803; X64:       # %bb.0:
804; X64-NEXT:    movl %edi, %ecx
805; X64-NEXT:    movl $-2147360405, %eax # imm = 0x8001E16B
806; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
807; X64-NEXT:    sarl %cl, %eax
808; X64-NEXT:    rep bsfl %eax, %eax
809; X64-NEXT:    retq
810  %z = ashr i32 2147606891, %x
811  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
812  ret i32 %r
813}
814
815define i32 @sra_known_nonzero_exact(i32 %x, i32 %yy) {
816; X86-LABEL: sra_known_nonzero_exact:
817; X86:       # %bb.0:
818; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
819; X86-NEXT:    movl $256, %eax # imm = 0x100
820; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
821; X86-NEXT:    sarl %cl, %eax
822; X86-NEXT:    rep bsfl %eax, %eax
823; X86-NEXT:    retl
824;
825; X64-LABEL: sra_known_nonzero_exact:
826; X64:       # %bb.0:
827; X64-NEXT:    movl %edi, %ecx
828; X64-NEXT:    orl $256, %esi # imm = 0x100
829; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
830; X64-NEXT:    sarl %cl, %esi
831; X64-NEXT:    rep bsfl %esi, %eax
832; X64-NEXT:    retq
833  %y = or i32 %yy, 256
834  %z = ashr exact i32 %y, %x
835  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
836  ret i32 %r
837}
838
839define i32 @sra_maybe_zero(i32 %x, i32 %y) {
840; X86-LABEL: sra_maybe_zero:
841; X86:       # %bb.0:
842; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
843; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
844; X86-NEXT:    sarl %cl, %eax
845; X86-NEXT:    bsfl %eax, %ecx
846; X86-NEXT:    movl $32, %eax
847; X86-NEXT:    cmovnel %ecx, %eax
848; X86-NEXT:    retl
849;
850; X64-LABEL: sra_maybe_zero:
851; X64:       # %bb.0:
852; X64-NEXT:    movl %edi, %ecx
853; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
854; X64-NEXT:    sarl %cl, %esi
855; X64-NEXT:    movl $32, %eax
856; X64-NEXT:    rep bsfl %esi, %eax
857; X64-NEXT:    retq
858  %z = ashr exact i32 %y, %x
859  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
860  ret i32 %r
861}
862
863define i32 @srl_known_nonzero_sign_bit_set(i32 %x) {
864; X86-LABEL: srl_known_nonzero_sign_bit_set:
865; X86:       # %bb.0:
866; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
867; X86-NEXT:    movl $-2147360405, %eax # imm = 0x8001E16B
868; X86-NEXT:    shrl %cl, %eax
869; X86-NEXT:    rep bsfl %eax, %eax
870; X86-NEXT:    retl
871;
872; X64-LABEL: srl_known_nonzero_sign_bit_set:
873; X64:       # %bb.0:
874; X64-NEXT:    movl %edi, %ecx
875; X64-NEXT:    movl $-2147360405, %eax # imm = 0x8001E16B
876; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
877; X64-NEXT:    shrl %cl, %eax
878; X64-NEXT:    rep bsfl %eax, %eax
879; X64-NEXT:    retq
880  %z = lshr i32 2147606891, %x
881  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
882  ret i32 %r
883}
884
885define i32 @srl_known_nonzero_exact(i32 %x, i32 %yy) {
886; X86-LABEL: srl_known_nonzero_exact:
887; X86:       # %bb.0:
888; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
889; X86-NEXT:    movl $256, %eax # imm = 0x100
890; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
891; X86-NEXT:    shrl %cl, %eax
892; X86-NEXT:    rep bsfl %eax, %eax
893; X86-NEXT:    retl
894;
895; X64-LABEL: srl_known_nonzero_exact:
896; X64:       # %bb.0:
897; X64-NEXT:    movl %edi, %ecx
898; X64-NEXT:    orl $256, %esi # imm = 0x100
899; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
900; X64-NEXT:    shrl %cl, %esi
901; X64-NEXT:    rep bsfl %esi, %eax
902; X64-NEXT:    retq
903  %y = or i32 %yy, 256
904  %z = lshr exact i32 %y, %x
905  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
906  ret i32 %r
907}
908
909define i32 @srl_maybe_zero(i32 %x, i32 %y) {
910; X86-LABEL: srl_maybe_zero:
911; X86:       # %bb.0:
912; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
913; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
914; X86-NEXT:    shrl %cl, %eax
915; X86-NEXT:    bsfl %eax, %ecx
916; X86-NEXT:    movl $32, %eax
917; X86-NEXT:    cmovnel %ecx, %eax
918; X86-NEXT:    retl
919;
920; X64-LABEL: srl_maybe_zero:
921; X64:       # %bb.0:
922; X64-NEXT:    movl %edi, %ecx
923; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
924; X64-NEXT:    shrl %cl, %esi
925; X64-NEXT:    movl $32, %eax
926; X64-NEXT:    rep bsfl %esi, %eax
927; X64-NEXT:    retq
928  %z = lshr exact i32 %y, %x
929  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
930  ret i32 %r
931}
932
933define i32 @udiv_known_nonzero(i32 %xx, i32 %y) {
934; X86-LABEL: udiv_known_nonzero:
935; X86:       # %bb.0:
936; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
937; X86-NEXT:    orl $64, %eax
938; X86-NEXT:    xorl %edx, %edx
939; X86-NEXT:    divl {{[0-9]+}}(%esp)
940; X86-NEXT:    rep bsfl %eax, %eax
941; X86-NEXT:    retl
942;
943; X64-LABEL: udiv_known_nonzero:
944; X64:       # %bb.0:
945; X64-NEXT:    movl %edi, %eax
946; X64-NEXT:    orl $64, %eax
947; X64-NEXT:    xorl %edx, %edx
948; X64-NEXT:    divl %esi
949; X64-NEXT:    rep bsfl %eax, %eax
950; X64-NEXT:    retq
951  %x = or i32 %xx, 64
952  %z = udiv exact i32 %x, %y
953  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
954  ret i32 %r
955}
956
957define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
958; X86-LABEL: udiv_maybe_zero:
959; X86:       # %bb.0:
960; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
961; X86-NEXT:    xorl %edx, %edx
962; X86-NEXT:    divl {{[0-9]+}}(%esp)
963; X86-NEXT:    bsfl %eax, %ecx
964; X86-NEXT:    movl $32, %eax
965; X86-NEXT:    cmovnel %ecx, %eax
966; X86-NEXT:    retl
967;
968; X64-LABEL: udiv_maybe_zero:
969; X64:       # %bb.0:
970; X64-NEXT:    movl %edi, %eax
971; X64-NEXT:    xorl %edx, %edx
972; X64-NEXT:    divl %esi
973; X64-NEXT:    movl $32, %ecx
974; X64-NEXT:    rep bsfl %eax, %ecx
975; X64-NEXT:    movl %ecx, %eax
976; X64-NEXT:    retq
977  %z = udiv exact i32 %x, %y
978  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
979  ret i32 %r
980}
981
982define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) {
983; X86-LABEL: sdiv_known_nonzero:
984; X86:       # %bb.0:
985; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
986; X86-NEXT:    orl $64, %eax
987; X86-NEXT:    cltd
988; X86-NEXT:    idivl {{[0-9]+}}(%esp)
989; X86-NEXT:    rep bsfl %eax, %eax
990; X86-NEXT:    retl
991;
992; X64-LABEL: sdiv_known_nonzero:
993; X64:       # %bb.0:
994; X64-NEXT:    movl %edi, %eax
995; X64-NEXT:    orl $64, %eax
996; X64-NEXT:    cltd
997; X64-NEXT:    idivl %esi
998; X64-NEXT:    rep bsfl %eax, %eax
999; X64-NEXT:    retq
1000  %x = or i32 %xx, 64
1001  %z = sdiv exact i32 %x, %y
1002  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1003  ret i32 %r
1004}
1005
1006define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
1007; X86-LABEL: sdiv_maybe_zero:
1008; X86:       # %bb.0:
1009; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1010; X86-NEXT:    cltd
1011; X86-NEXT:    idivl {{[0-9]+}}(%esp)
1012; X86-NEXT:    bsfl %eax, %ecx
1013; X86-NEXT:    movl $32, %eax
1014; X86-NEXT:    cmovnel %ecx, %eax
1015; X86-NEXT:    retl
1016;
1017; X64-LABEL: sdiv_maybe_zero:
1018; X64:       # %bb.0:
1019; X64-NEXT:    movl %edi, %eax
1020; X64-NEXT:    cltd
1021; X64-NEXT:    idivl %esi
1022; X64-NEXT:    movl $32, %ecx
1023; X64-NEXT:    rep bsfl %eax, %ecx
1024; X64-NEXT:    movl %ecx, %eax
1025; X64-NEXT:    retq
1026  %z = sdiv exact i32 %x, %y
1027  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1028  ret i32 %r
1029}
1030
1031define i32 @add_known_nonzero(i32 %xx, i32 %y) {
1032; X86-LABEL: add_known_nonzero:
1033; X86:       # %bb.0:
1034; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1035; X86-NEXT:    orl $1, %eax
1036; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
1037; X86-NEXT:    rep bsfl %eax, %eax
1038; X86-NEXT:    retl
1039;
1040; X64-LABEL: add_known_nonzero:
1041; X64:       # %bb.0:
1042; X64-NEXT:    orl $1, %edi
1043; X64-NEXT:    addl %esi, %edi
1044; X64-NEXT:    rep bsfl %edi, %eax
1045; X64-NEXT:    retq
1046  %x = or i32 %xx, 1
1047  %z = add nuw i32 %x, %y
1048  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1049  ret i32 %r
1050}
1051
1052define i32 @add_maybe_zero(i32 %xx, i32 %y) {
1053; X86-LABEL: add_maybe_zero:
1054; X86:       # %bb.0:
1055; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1056; X86-NEXT:    orl $1, %eax
1057; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
1058; X86-NEXT:    bsfl %eax, %ecx
1059; X86-NEXT:    movl $32, %eax
1060; X86-NEXT:    cmovnel %ecx, %eax
1061; X86-NEXT:    retl
1062;
1063; X64-LABEL: add_maybe_zero:
1064; X64:       # %bb.0:
1065; X64-NEXT:    orl $1, %edi
1066; X64-NEXT:    addl %esi, %edi
1067; X64-NEXT:    movl $32, %eax
1068; X64-NEXT:    rep bsfl %edi, %eax
1069; X64-NEXT:    retq
1070  %x = or i32 %xx, 1
1071  %z = add nsw i32 %x, %y
1072  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1073  ret i32 %r
1074}
1075
1076define i32 @sub_known_nonzero_neg_case(i32 %xx) {
1077; X86-LABEL: sub_known_nonzero_neg_case:
1078; X86:       # %bb.0:
1079; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1080; X86-NEXT:    movl $256, %eax # imm = 0x100
1081; X86-NEXT:    shll %cl, %eax
1082; X86-NEXT:    negl %eax
1083; X86-NEXT:    rep bsfl %eax, %eax
1084; X86-NEXT:    retl
1085;
1086; X64-LABEL: sub_known_nonzero_neg_case:
1087; X64:       # %bb.0:
1088; X64-NEXT:    movl %edi, %ecx
1089; X64-NEXT:    movl $256, %eax # imm = 0x100
1090; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
1091; X64-NEXT:    shll %cl, %eax
1092; X64-NEXT:    negl %eax
1093; X64-NEXT:    rep bsfl %eax, %eax
1094; X64-NEXT:    retq
1095  %x = shl nuw nsw i32 256, %xx
1096  %z = sub i32 0, %x
1097  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1098  ret i32 %r
1099}
1100
1101define i32 @sub_known_nonzero_ne_case(i32 %xx, i32 %yy) {
1102; X86-LABEL: sub_known_nonzero_ne_case:
1103; X86:       # %bb.0:
1104; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1105; X86-NEXT:    movl %eax, %ecx
1106; X86-NEXT:    orl $64, %ecx
1107; X86-NEXT:    andl $-65, %eax
1108; X86-NEXT:    subl %ecx, %eax
1109; X86-NEXT:    rep bsfl %eax, %eax
1110; X86-NEXT:    retl
1111;
1112; X64-LABEL: sub_known_nonzero_ne_case:
1113; X64:       # %bb.0:
1114; X64-NEXT:    movl %edi, %eax
1115; X64-NEXT:    orl $64, %eax
1116; X64-NEXT:    andl $-65, %edi
1117; X64-NEXT:    subl %eax, %edi
1118; X64-NEXT:    rep bsfl %edi, %eax
1119; X64-NEXT:    retq
1120  %x = or i32 %xx, 64
1121  %y = and i32 %xx, -65
1122  %z = sub i32 %y, %x
1123  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1124  ret i32 %r
1125}
1126
1127define i32 @sub_maybe_zero(i32 %x) {
1128; X86-LABEL: sub_maybe_zero:
1129; X86:       # %bb.0:
1130; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1131; X86-NEXT:    movl %eax, %ecx
1132; X86-NEXT:    orl $64, %ecx
1133; X86-NEXT:    subl %eax, %ecx
1134; X86-NEXT:    bsfl %ecx, %ecx
1135; X86-NEXT:    movl $32, %eax
1136; X86-NEXT:    cmovnel %ecx, %eax
1137; X86-NEXT:    retl
1138;
1139; X64-LABEL: sub_maybe_zero:
1140; X64:       # %bb.0:
1141; X64-NEXT:    movl %edi, %ecx
1142; X64-NEXT:    orl $64, %ecx
1143; X64-NEXT:    subl %edi, %ecx
1144; X64-NEXT:    movl $32, %eax
1145; X64-NEXT:    rep bsfl %ecx, %eax
1146; X64-NEXT:    retq
1147  %y = or i32 %x, 64
1148  %z = sub i32 %y, %x
1149  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1150  ret i32 %r
1151}
1152
1153define i32 @sub_maybe_zero2(i32 %x) {
1154; X86-LABEL: sub_maybe_zero2:
1155; X86:       # %bb.0:
1156; X86-NEXT:    xorl %eax, %eax
1157; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
1158; X86-NEXT:    bsfl %eax, %ecx
1159; X86-NEXT:    movl $32, %eax
1160; X86-NEXT:    cmovnel %ecx, %eax
1161; X86-NEXT:    retl
1162;
1163; X64-LABEL: sub_maybe_zero2:
1164; X64:       # %bb.0:
1165; X64-NEXT:    negl %edi
1166; X64-NEXT:    movl $32, %eax
1167; X64-NEXT:    rep bsfl %edi, %eax
1168; X64-NEXT:    retq
1169  %z = sub i32 0, %x
1170  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1171  ret i32 %r
1172}
1173
1174define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
1175; X86-LABEL: mul_known_nonzero_nsw:
1176; X86:       # %bb.0:
1177; X86-NEXT:    movl $256, %eax # imm = 0x100
1178; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
1179; X86-NEXT:    imull {{[0-9]+}}(%esp), %eax
1180; X86-NEXT:    bsfl %eax, %ecx
1181; X86-NEXT:    movl $32, %eax
1182; X86-NEXT:    cmovnel %ecx, %eax
1183; X86-NEXT:    retl
1184;
1185; X64-LABEL: mul_known_nonzero_nsw:
1186; X64:       # %bb.0:
1187; X64-NEXT:    orl $256, %esi # imm = 0x100
1188; X64-NEXT:    imull %edi, %esi
1189; X64-NEXT:    movl $32, %eax
1190; X64-NEXT:    rep bsfl %esi, %eax
1191; X64-NEXT:    retq
1192  %y = or i32 %yy, 256
1193  %z = mul nsw i32 %y, %x
1194  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1195  ret i32 %r
1196}
1197
1198define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
1199; X86-LABEL: mul_known_nonzero_nuw:
1200; X86:       # %bb.0:
1201; X86-NEXT:    movl $256, %eax # imm = 0x100
1202; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
1203; X86-NEXT:    imull {{[0-9]+}}(%esp), %eax
1204; X86-NEXT:    bsfl %eax, %ecx
1205; X86-NEXT:    movl $32, %eax
1206; X86-NEXT:    cmovnel %ecx, %eax
1207; X86-NEXT:    retl
1208;
1209; X64-LABEL: mul_known_nonzero_nuw:
1210; X64:       # %bb.0:
1211; X64-NEXT:    orl $256, %esi # imm = 0x100
1212; X64-NEXT:    imull %edi, %esi
1213; X64-NEXT:    movl $32, %eax
1214; X64-NEXT:    rep bsfl %esi, %eax
1215; X64-NEXT:    retq
1216  %y = or i32 %yy, 256
1217  %z = mul nuw i32 %y, %x
1218  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1219  ret i32 %r
1220}
1221
1222define i32 @mul_maybe_zero(i32 %x, i32 %y) {
1223; X86-LABEL: mul_maybe_zero:
1224; X86:       # %bb.0:
1225; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1226; X86-NEXT:    imull {{[0-9]+}}(%esp), %eax
1227; X86-NEXT:    bsfl %eax, %ecx
1228; X86-NEXT:    movl $32, %eax
1229; X86-NEXT:    cmovnel %ecx, %eax
1230; X86-NEXT:    retl
1231;
1232; X64-LABEL: mul_maybe_zero:
1233; X64:       # %bb.0:
1234; X64-NEXT:    imull %esi, %edi
1235; X64-NEXT:    movl $32, %eax
1236; X64-NEXT:    rep bsfl %edi, %eax
1237; X64-NEXT:    retq
1238  %z = mul nuw nsw i32 %y, %x
1239  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1240  ret i32 %r
1241}
1242
1243define i32 @bitcast_known_nonzero(<2 x i16> %xx) {
1244; X86-LABEL: bitcast_known_nonzero:
1245; X86:       # %bb.0:
1246; X86-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1247; X86-NEXT:    pslld $23, %xmm0
1248; X86-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1249; X86-NEXT:    cvttps2dq %xmm0, %xmm0
1250; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1251; X86-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,256,u,u,u,u,u,u]
1252; X86-NEXT:    movd %xmm0, %eax
1253; X86-NEXT:    bsfl %eax, %ecx
1254; X86-NEXT:    movl $32, %eax
1255; X86-NEXT:    cmovnel %ecx, %eax
1256; X86-NEXT:    retl
1257;
1258; X64-LABEL: bitcast_known_nonzero:
1259; X64:       # %bb.0:
1260; X64-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1261; X64-NEXT:    vpslld $23, %xmm0, %xmm0
1262; X64-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1263; X64-NEXT:    vcvttps2dq %xmm0, %xmm0
1264; X64-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
1265; X64-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u]
1266; X64-NEXT:    vmovd %xmm0, %ecx
1267; X64-NEXT:    movl $32, %eax
1268; X64-NEXT:    rep bsfl %ecx, %eax
1269; X64-NEXT:    retq
1270  %x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx
1271  %z = bitcast <2 x i16> %x to i32
1272  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1273  ret i32 %r
1274}
1275
1276define i32 @bitcast_maybe_zero(<2 x i16> %x) {
1277; X86-LABEL: bitcast_maybe_zero:
1278; X86:       # %bb.0:
1279; X86-NEXT:    movd %xmm0, %eax
1280; X86-NEXT:    bsfl %eax, %ecx
1281; X86-NEXT:    movl $32, %eax
1282; X86-NEXT:    cmovnel %ecx, %eax
1283; X86-NEXT:    retl
1284;
1285; X64-LABEL: bitcast_maybe_zero:
1286; X64:       # %bb.0:
1287; X64-NEXT:    vmovd %xmm0, %ecx
1288; X64-NEXT:    movl $32, %eax
1289; X64-NEXT:    rep bsfl %ecx, %eax
1290; X64-NEXT:    retq
1291  %z = bitcast <2 x i16> %x to i32
1292  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1293  ret i32 %r
1294}
1295
1296define i32 @bitcast_from_float(float %x) {
1297; X86-LABEL: bitcast_from_float:
1298; X86:       # %bb.0:
1299; X86-NEXT:    bsfl {{[0-9]+}}(%esp), %ecx
1300; X86-NEXT:    movl $32, %eax
1301; X86-NEXT:    cmovnel %ecx, %eax
1302; X86-NEXT:    retl
1303;
1304; X64-LABEL: bitcast_from_float:
1305; X64:       # %bb.0:
1306; X64-NEXT:    vmovd %xmm0, %ecx
1307; X64-NEXT:    movl $32, %eax
1308; X64-NEXT:    rep bsfl %ecx, %eax
1309; X64-NEXT:    retq
1310  %z = bitcast float %x to i32
1311  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1312  ret i32 %r
1313}
1314
1315define i32 @zext_known_nonzero(i16 %xx) {
1316; X86-LABEL: zext_known_nonzero:
1317; X86:       # %bb.0:
1318; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1319; X86-NEXT:    movl $256, %eax # imm = 0x100
1320; X86-NEXT:    shll %cl, %eax
1321; X86-NEXT:    movzwl %ax, %eax
1322; X86-NEXT:    rep bsfl %eax, %eax
1323; X86-NEXT:    retl
1324;
1325; X64-LABEL: zext_known_nonzero:
1326; X64:       # %bb.0:
1327; X64-NEXT:    movl %edi, %ecx
1328; X64-NEXT:    movl $256, %eax # imm = 0x100
1329; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
1330; X64-NEXT:    shll %cl, %eax
1331; X64-NEXT:    movzwl %ax, %eax
1332; X64-NEXT:    rep bsfl %eax, %eax
1333; X64-NEXT:    retq
1334  %x = shl nuw nsw i16 256, %xx
1335  %z = zext i16 %x to i32
1336  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1337  ret i32 %r
1338}
1339
1340define i32 @zext_maybe_zero(i16 %x) {
1341; X86-LABEL: zext_maybe_zero:
1342; X86:       # %bb.0:
1343; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1344; X86-NEXT:    bsfl %eax, %ecx
1345; X86-NEXT:    movl $32, %eax
1346; X86-NEXT:    cmovnel %ecx, %eax
1347; X86-NEXT:    retl
1348;
1349; X64-LABEL: zext_maybe_zero:
1350; X64:       # %bb.0:
1351; X64-NEXT:    movzwl %di, %ecx
1352; X64-NEXT:    movl $32, %eax
1353; X64-NEXT:    rep bsfl %ecx, %eax
1354; X64-NEXT:    retq
1355  %z = zext i16 %x to i32
1356  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1357  ret i32 %r
1358}
1359
1360define i32 @sext_known_nonzero(i16 %xx) {
1361; X86-LABEL: sext_known_nonzero:
1362; X86:       # %bb.0:
1363; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1364; X86-NEXT:    movl $256, %eax # imm = 0x100
1365; X86-NEXT:    shll %cl, %eax
1366; X86-NEXT:    movzwl %ax, %eax
1367; X86-NEXT:    rep bsfl %eax, %eax
1368; X86-NEXT:    retl
1369;
1370; X64-LABEL: sext_known_nonzero:
1371; X64:       # %bb.0:
1372; X64-NEXT:    movl %edi, %ecx
1373; X64-NEXT:    movl $256, %eax # imm = 0x100
1374; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
1375; X64-NEXT:    shll %cl, %eax
1376; X64-NEXT:    movzwl %ax, %eax
1377; X64-NEXT:    rep bsfl %eax, %eax
1378; X64-NEXT:    retq
1379  %x = shl nuw nsw i16 256, %xx
1380  %z = sext i16 %x to i32
1381  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1382  ret i32 %r
1383}
1384
1385define i32 @sext_maybe_zero(i16 %x) {
1386; X86-LABEL: sext_maybe_zero:
1387; X86:       # %bb.0:
1388; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
1389; X86-NEXT:    bsfl %eax, %ecx
1390; X86-NEXT:    movl $32, %eax
1391; X86-NEXT:    cmovnel %ecx, %eax
1392; X86-NEXT:    retl
1393;
1394; X64-LABEL: sext_maybe_zero:
1395; X64:       # %bb.0:
1396; X64-NEXT:    movswl %di, %ecx
1397; X64-NEXT:    movl $32, %eax
1398; X64-NEXT:    rep bsfl %ecx, %eax
1399; X64-NEXT:    retq
1400  %z = sext i16 %x to i32
1401  %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
1402  ret i32 %r
1403}
1404