xref: /llvm-project/llvm/test/CodeGen/X86/cttz.ll (revision 90e9895a9373b3d83eefe15b34d2dc83c7bcc88f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X64-FASTLZCNT
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X86-FASTLZCNT
9
10declare i8 @llvm.cttz.i8(i8, i1)
11declare i16 @llvm.cttz.i16(i16, i1)
12declare i32 @llvm.cttz.i32(i32, i1)
13declare i64 @llvm.cttz.i64(i64, i1)
14
15define i8 @cttz_i8(i8 %x)  {
16; X86-LABEL: cttz_i8:
17; X86:       # %bb.0:
18; X86-NEXT:    rep bsfl {{[0-9]+}}(%esp), %eax
19; X86-NEXT:    # kill: def $al killed $al killed $eax
20; X86-NEXT:    retl
21;
22; X64-LABEL: cttz_i8:
23; X64:       # %bb.0:
24; X64-NEXT:    rep bsfl %edi, %eax
25; X64-NEXT:    # kill: def $al killed $al killed $eax
26; X64-NEXT:    retq
27;
28; X86-CLZ-LABEL: cttz_i8:
29; X86-CLZ:       # %bb.0:
30; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
31; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
32; X86-CLZ-NEXT:    retl
33;
34; X64-CLZ-LABEL: cttz_i8:
35; X64-CLZ:       # %bb.0:
36; X64-CLZ-NEXT:    tzcntl %edi, %eax
37; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
38; X64-CLZ-NEXT:    retq
39;
40; X64-FASTLZCNT-LABEL: cttz_i8:
41; X64-FASTLZCNT:       # %bb.0:
42; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
43; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
44; X64-FASTLZCNT-NEXT:    retq
45;
46; X86-FASTLZCNT-LABEL: cttz_i8:
47; X86-FASTLZCNT:       # %bb.0:
48; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
49; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
50; X86-FASTLZCNT-NEXT:    retl
51  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
52  ret i8 %tmp
53}
54
55define i16 @cttz_i16(i16 %x)  {
56; X86-LABEL: cttz_i16:
57; X86:       # %bb.0:
58; X86-NEXT:    rep bsfl {{[0-9]+}}(%esp), %eax
59; X86-NEXT:    # kill: def $ax killed $ax killed $eax
60; X86-NEXT:    retl
61;
62; X64-LABEL: cttz_i16:
63; X64:       # %bb.0:
64; X64-NEXT:    rep bsfl %edi, %eax
65; X64-NEXT:    # kill: def $ax killed $ax killed $eax
66; X64-NEXT:    retq
67;
68; X86-CLZ-LABEL: cttz_i16:
69; X86-CLZ:       # %bb.0:
70; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
71; X86-CLZ-NEXT:    # kill: def $ax killed $ax killed $eax
72; X86-CLZ-NEXT:    retl
73;
74; X64-CLZ-LABEL: cttz_i16:
75; X64-CLZ:       # %bb.0:
76; X64-CLZ-NEXT:    tzcntl %edi, %eax
77; X64-CLZ-NEXT:    # kill: def $ax killed $ax killed $eax
78; X64-CLZ-NEXT:    retq
79;
80; X64-FASTLZCNT-LABEL: cttz_i16:
81; X64-FASTLZCNT:       # %bb.0:
82; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
83; X64-FASTLZCNT-NEXT:    # kill: def $ax killed $ax killed $eax
84; X64-FASTLZCNT-NEXT:    retq
85;
86; X86-FASTLZCNT-LABEL: cttz_i16:
87; X86-FASTLZCNT:       # %bb.0:
88; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
89; X86-FASTLZCNT-NEXT:    # kill: def $ax killed $ax killed $eax
90; X86-FASTLZCNT-NEXT:    retl
91  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
92  ret i16 %tmp
93}
94
95define i32 @cttz_i32(i32 %x)  {
96; X86-LABEL: cttz_i32:
97; X86:       # %bb.0:
98; X86-NEXT:    rep bsfl {{[0-9]+}}(%esp), %eax
99; X86-NEXT:    retl
100;
101; X64-LABEL: cttz_i32:
102; X64:       # %bb.0:
103; X64-NEXT:    rep bsfl %edi, %eax
104; X64-NEXT:    retq
105;
106; X86-CLZ-LABEL: cttz_i32:
107; X86-CLZ:       # %bb.0:
108; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
109; X86-CLZ-NEXT:    retl
110;
111; X64-CLZ-LABEL: cttz_i32:
112; X64-CLZ:       # %bb.0:
113; X64-CLZ-NEXT:    tzcntl %edi, %eax
114; X64-CLZ-NEXT:    retq
115;
116; X64-FASTLZCNT-LABEL: cttz_i32:
117; X64-FASTLZCNT:       # %bb.0:
118; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
119; X64-FASTLZCNT-NEXT:    retq
120;
121; X86-FASTLZCNT-LABEL: cttz_i32:
122; X86-FASTLZCNT:       # %bb.0:
123; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
124; X86-FASTLZCNT-NEXT:    retl
125  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
126  ret i32 %tmp
127}
128
129define i64 @cttz_i64(i64 %x)  {
130; X86-NOCMOV-LABEL: cttz_i64:
131; X86-NOCMOV:       # %bb.0:
132; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
133; X86-NOCMOV-NEXT:    testl %eax, %eax
134; X86-NOCMOV-NEXT:    jne .LBB3_1
135; X86-NOCMOV-NEXT:  # %bb.2:
136; X86-NOCMOV-NEXT:    rep bsfl {{[0-9]+}}(%esp), %eax
137; X86-NOCMOV-NEXT:    addl $32, %eax
138; X86-NOCMOV-NEXT:    xorl %edx, %edx
139; X86-NOCMOV-NEXT:    retl
140; X86-NOCMOV-NEXT:  .LBB3_1:
141; X86-NOCMOV-NEXT:    rep bsfl %eax, %eax
142; X86-NOCMOV-NEXT:    xorl %edx, %edx
143; X86-NOCMOV-NEXT:    retl
144;
145; X86-CMOV-LABEL: cttz_i64:
146; X86-CMOV:       # %bb.0:
147; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
148; X86-CMOV-NEXT:    rep bsfl %ecx, %edx
149; X86-CMOV-NEXT:    rep bsfl {{[0-9]+}}(%esp), %eax
150; X86-CMOV-NEXT:    addl $32, %eax
151; X86-CMOV-NEXT:    testl %ecx, %ecx
152; X86-CMOV-NEXT:    cmovnel %edx, %eax
153; X86-CMOV-NEXT:    xorl %edx, %edx
154; X86-CMOV-NEXT:    retl
155;
156; X64-LABEL: cttz_i64:
157; X64:       # %bb.0:
158; X64-NEXT:    rep bsfq %rdi, %rax
159; X64-NEXT:    retq
160;
161; X86-CLZ-LABEL: cttz_i64:
162; X86-CLZ:       # %bb.0:
163; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
164; X86-CLZ-NEXT:    testl %eax, %eax
165; X86-CLZ-NEXT:    jne .LBB3_1
166; X86-CLZ-NEXT:  # %bb.2:
167; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
168; X86-CLZ-NEXT:    addl $32, %eax
169; X86-CLZ-NEXT:    xorl %edx, %edx
170; X86-CLZ-NEXT:    retl
171; X86-CLZ-NEXT:  .LBB3_1:
172; X86-CLZ-NEXT:    tzcntl %eax, %eax
173; X86-CLZ-NEXT:    xorl %edx, %edx
174; X86-CLZ-NEXT:    retl
175;
176; X64-CLZ-LABEL: cttz_i64:
177; X64-CLZ:       # %bb.0:
178; X64-CLZ-NEXT:    tzcntq %rdi, %rax
179; X64-CLZ-NEXT:    retq
180;
181; X64-FASTLZCNT-LABEL: cttz_i64:
182; X64-FASTLZCNT:       # %bb.0:
183; X64-FASTLZCNT-NEXT:    tzcntq %rdi, %rax
184; X64-FASTLZCNT-NEXT:    retq
185;
186; X86-FASTLZCNT-LABEL: cttz_i64:
187; X86-FASTLZCNT:       # %bb.0:
188; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
189; X86-FASTLZCNT-NEXT:    testl %eax, %eax
190; X86-FASTLZCNT-NEXT:    jne .LBB3_1
191; X86-FASTLZCNT-NEXT:  # %bb.2:
192; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
193; X86-FASTLZCNT-NEXT:    addl $32, %eax
194; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
195; X86-FASTLZCNT-NEXT:    retl
196; X86-FASTLZCNT-NEXT:  .LBB3_1:
197; X86-FASTLZCNT-NEXT:    tzcntl %eax, %eax
198; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
199; X86-FASTLZCNT-NEXT:    retl
200  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
201  ret i64 %tmp
202}
203
204; Promote i8 cttz to i32 and mask bit8 to prevent (slow) zero-src bsf case.
205define i8 @cttz_i8_zero_test(i8 %n) {
206; X86-LABEL: cttz_i8_zero_test:
207; X86:       # %bb.0:
208; X86-NEXT:    movl $256, %eax # imm = 0x100
209; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
210; X86-NEXT:    rep bsfl %eax, %eax
211; X86-NEXT:    # kill: def $al killed $al killed $eax
212; X86-NEXT:    retl
213;
214; X64-LABEL: cttz_i8_zero_test:
215; X64:       # %bb.0:
216; X64-NEXT:    orl $256, %edi # imm = 0x100
217; X64-NEXT:    rep bsfl %edi, %eax
218; X64-NEXT:    # kill: def $al killed $al killed $eax
219; X64-NEXT:    retq
220;
221; X86-CLZ-LABEL: cttz_i8_zero_test:
222; X86-CLZ:       # %bb.0:
223; X86-CLZ-NEXT:    movl $256, %eax # imm = 0x100
224; X86-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
225; X86-CLZ-NEXT:    tzcntl %eax, %eax
226; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
227; X86-CLZ-NEXT:    retl
228;
229; X64-CLZ-LABEL: cttz_i8_zero_test:
230; X64-CLZ:       # %bb.0:
231; X64-CLZ-NEXT:    orl $256, %edi # imm = 0x100
232; X64-CLZ-NEXT:    tzcntl %edi, %eax
233; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
234; X64-CLZ-NEXT:    retq
235;
236; X64-FASTLZCNT-LABEL: cttz_i8_zero_test:
237; X64-FASTLZCNT:       # %bb.0:
238; X64-FASTLZCNT-NEXT:    orl $256, %edi # imm = 0x100
239; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
240; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
241; X64-FASTLZCNT-NEXT:    retq
242;
243; X86-FASTLZCNT-LABEL: cttz_i8_zero_test:
244; X86-FASTLZCNT:       # %bb.0:
245; X86-FASTLZCNT-NEXT:    movl $256, %eax # imm = 0x100
246; X86-FASTLZCNT-NEXT:    orl {{[0-9]+}}(%esp), %eax
247; X86-FASTLZCNT-NEXT:    tzcntl %eax, %eax
248; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
249; X86-FASTLZCNT-NEXT:    retl
250  %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
251  ret i8 %tmp1
252}
253
254; Promote i16 cttz to i32 and mask bit16 to prevent (slow) zero-src bsf case.
255define i16 @cttz_i16_zero_test(i16 %n) {
256; X86-LABEL: cttz_i16_zero_test:
257; X86:       # %bb.0:
258; X86-NEXT:    movl $65536, %eax # imm = 0x10000
259; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
260; X86-NEXT:    rep bsfl %eax, %eax
261; X86-NEXT:    # kill: def $ax killed $ax killed $eax
262; X86-NEXT:    retl
263;
264; X64-LABEL: cttz_i16_zero_test:
265; X64:       # %bb.0:
266; X64-NEXT:    orl $65536, %edi # imm = 0x10000
267; X64-NEXT:    rep bsfl %edi, %eax
268; X64-NEXT:    # kill: def $ax killed $ax killed $eax
269; X64-NEXT:    retq
270;
271; X86-CLZ-LABEL: cttz_i16_zero_test:
272; X86-CLZ:       # %bb.0:
273; X86-CLZ-NEXT:    movl $65536, %eax # imm = 0x10000
274; X86-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
275; X86-CLZ-NEXT:    tzcntl %eax, %eax
276; X86-CLZ-NEXT:    # kill: def $ax killed $ax killed $eax
277; X86-CLZ-NEXT:    retl
278;
279; X64-CLZ-LABEL: cttz_i16_zero_test:
280; X64-CLZ:       # %bb.0:
281; X64-CLZ-NEXT:    orl $65536, %edi # imm = 0x10000
282; X64-CLZ-NEXT:    tzcntl %edi, %eax
283; X64-CLZ-NEXT:    # kill: def $ax killed $ax killed $eax
284; X64-CLZ-NEXT:    retq
285;
286; X64-FASTLZCNT-LABEL: cttz_i16_zero_test:
287; X64-FASTLZCNT:       # %bb.0:
288; X64-FASTLZCNT-NEXT:    orl $65536, %edi # imm = 0x10000
289; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
290; X64-FASTLZCNT-NEXT:    # kill: def $ax killed $ax killed $eax
291; X64-FASTLZCNT-NEXT:    retq
292;
293; X86-FASTLZCNT-LABEL: cttz_i16_zero_test:
294; X86-FASTLZCNT:       # %bb.0:
295; X86-FASTLZCNT-NEXT:    movl $65536, %eax # imm = 0x10000
296; X86-FASTLZCNT-NEXT:    orl {{[0-9]+}}(%esp), %eax
297; X86-FASTLZCNT-NEXT:    tzcntl %eax, %eax
298; X86-FASTLZCNT-NEXT:    # kill: def $ax killed $ax killed $eax
299; X86-FASTLZCNT-NEXT:    retl
300  %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
301  ret i16 %tmp1
302}
303
304; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
305define i32 @cttz_i32_zero_test(i32 %n) {
306; X86-NOCMOV-LABEL: cttz_i32_zero_test:
307; X86-NOCMOV:       # %bb.0:
308; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
309; X86-NOCMOV-NEXT:    testl %eax, %eax
310; X86-NOCMOV-NEXT:    je .LBB6_1
311; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
312; X86-NOCMOV-NEXT:    rep bsfl %eax, %eax
313; X86-NOCMOV-NEXT:    retl
314; X86-NOCMOV-NEXT:  .LBB6_1:
315; X86-NOCMOV-NEXT:    movl $32, %eax
316; X86-NOCMOV-NEXT:    retl
317;
318; X86-CMOV-LABEL: cttz_i32_zero_test:
319; X86-CMOV:       # %bb.0:
320; X86-CMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %ecx
321; X86-CMOV-NEXT:    movl $32, %eax
322; X86-CMOV-NEXT:    cmovnel %ecx, %eax
323; X86-CMOV-NEXT:    retl
324;
325; X64-LABEL: cttz_i32_zero_test:
326; X64:       # %bb.0:
327; X64-NEXT:    movl $32, %eax
328; X64-NEXT:    rep bsfl %edi, %eax
329; X64-NEXT:    retq
330;
331; X86-CLZ-LABEL: cttz_i32_zero_test:
332; X86-CLZ:       # %bb.0:
333; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
334; X86-CLZ-NEXT:    retl
335;
336; X64-CLZ-LABEL: cttz_i32_zero_test:
337; X64-CLZ:       # %bb.0:
338; X64-CLZ-NEXT:    tzcntl %edi, %eax
339; X64-CLZ-NEXT:    retq
340;
341; X64-FASTLZCNT-LABEL: cttz_i32_zero_test:
342; X64-FASTLZCNT:       # %bb.0:
343; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
344; X64-FASTLZCNT-NEXT:    retq
345;
346; X86-FASTLZCNT-LABEL: cttz_i32_zero_test:
347; X86-FASTLZCNT:       # %bb.0:
348; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
349; X86-FASTLZCNT-NEXT:    retl
350  %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
351  ret i32 %tmp1
352}
353
354; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
355define i64 @cttz_i64_zero_test(i64 %n) {
356; X86-NOCMOV-LABEL: cttz_i64_zero_test:
357; X86-NOCMOV:       # %bb.0:
358; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
359; X86-NOCMOV-NOT:     rep
360; X86-NOCMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %edx
361; X86-NOCMOV-NEXT:    movl $32, %eax
362; X86-NOCMOV-NEXT:    je .LBB7_2
363; X86-NOCMOV-NEXT:  # %bb.1:
364; X86-NOCMOV-NEXT:    movl %edx, %eax
365; X86-NOCMOV-NEXT:  .LBB7_2:
366; X86-NOCMOV-NEXT:    testl %ecx, %ecx
367; X86-NOCMOV-NEXT:    jne .LBB7_3
368; X86-NOCMOV-NEXT:  # %bb.4:
369; X86-NOCMOV-NEXT:    addl $32, %eax
370; X86-NOCMOV-NEXT:    xorl %edx, %edx
371; X86-NOCMOV-NEXT:    retl
372; X86-NOCMOV-NEXT:  .LBB7_3:
373; X86-NOCMOV-NEXT:    rep bsfl %ecx, %eax
374; X86-NOCMOV-NEXT:    xorl %edx, %edx
375; X86-NOCMOV-NEXT:    retl
376;
377; X86-CMOV-LABEL: cttz_i64_zero_test:
378; X86-CMOV:       # %bb.0:
379; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
380; X86-CMOV-NOT:     rep
381; X86-CMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %ecx
382; X86-CMOV-NEXT:    movl $32, %edx
383; X86-CMOV-NEXT:    cmovnel %ecx, %edx
384; X86-CMOV-NEXT:    addl $32, %edx
385; X86-CMOV-NOT:     rep
386; X86-CMOV-NEXT:    bsfl %eax, %eax
387; X86-CMOV-NEXT:    cmovel %edx, %eax
388; X86-CMOV-NEXT:    xorl %edx, %edx
389; X86-CMOV-NEXT:    retl
390;
391; X64-LABEL: cttz_i64_zero_test:
392; X64:       # %bb.0:
393; X64-NEXT:    movl $64, %eax
394; X64-NEXT:    rep bsfq %rdi, %rax
395; X64-NEXT:    retq
396;
397; X86-CLZ-LABEL: cttz_i64_zero_test:
398; X86-CLZ:       # %bb.0:
399; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
400; X86-CLZ-NEXT:    testl %eax, %eax
401; X86-CLZ-NEXT:    jne .LBB7_1
402; X86-CLZ-NEXT:  # %bb.2:
403; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
404; X86-CLZ-NEXT:    addl $32, %eax
405; X86-CLZ-NEXT:    xorl %edx, %edx
406; X86-CLZ-NEXT:    retl
407; X86-CLZ-NEXT:  .LBB7_1:
408; X86-CLZ-NEXT:    tzcntl %eax, %eax
409; X86-CLZ-NEXT:    xorl %edx, %edx
410; X86-CLZ-NEXT:    retl
411;
412; X64-CLZ-LABEL: cttz_i64_zero_test:
413; X64-CLZ:       # %bb.0:
414; X64-CLZ-NEXT:    tzcntq %rdi, %rax
415; X64-CLZ-NEXT:    retq
416;
417; X64-FASTLZCNT-LABEL: cttz_i64_zero_test:
418; X64-FASTLZCNT:       # %bb.0:
419; X64-FASTLZCNT-NEXT:    tzcntq %rdi, %rax
420; X64-FASTLZCNT-NEXT:    retq
421;
422; X86-FASTLZCNT-LABEL: cttz_i64_zero_test:
423; X86-FASTLZCNT:       # %bb.0:
424; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
425; X86-FASTLZCNT-NEXT:    testl %eax, %eax
426; X86-FASTLZCNT-NEXT:    jne .LBB7_1
427; X86-FASTLZCNT-NEXT:  # %bb.2:
428; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
429; X86-FASTLZCNT-NEXT:    addl $32, %eax
430; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
431; X86-FASTLZCNT-NEXT:    retl
432; X86-FASTLZCNT-NEXT:  .LBB7_1:
433; X86-FASTLZCNT-NEXT:    tzcntl %eax, %eax
434; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
435; X86-FASTLZCNT-NEXT:    retl
436  %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
437  ret i64 %tmp1
438}
439
440define i8 @cttz_i8_knownbits(i8 %x)  {
441; X86-LABEL: cttz_i8_knownbits:
442; X86:       # %bb.0:
443; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
444; X86-NEXT:    orb $2, %al
445; X86-NEXT:    movzbl %al, %eax
446; X86-NEXT:    rep bsfl %eax, %eax
447; X86-NEXT:    # kill: def $al killed $al killed $eax
448; X86-NEXT:    retl
449;
450; X64-LABEL: cttz_i8_knownbits:
451; X64:       # %bb.0:
452; X64-NEXT:    orb $2, %dil
453; X64-NEXT:    movzbl %dil, %eax
454; X64-NEXT:    rep bsfl %eax, %eax
455; X64-NEXT:    # kill: def $al killed $al killed $eax
456; X64-NEXT:    retq
457;
458; X86-CLZ-LABEL: cttz_i8_knownbits:
459; X86-CLZ:       # %bb.0:
460; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
461; X86-CLZ-NEXT:    orb $2, %al
462; X86-CLZ-NEXT:    movzbl %al, %eax
463; X86-CLZ-NEXT:    tzcntl %eax, %eax
464; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
465; X86-CLZ-NEXT:    retl
466;
467; X64-CLZ-LABEL: cttz_i8_knownbits:
468; X64-CLZ:       # %bb.0:
469; X64-CLZ-NEXT:    orb $2, %dil
470; X64-CLZ-NEXT:    movzbl %dil, %eax
471; X64-CLZ-NEXT:    tzcntl %eax, %eax
472; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
473; X64-CLZ-NEXT:    retq
474;
475; X64-FASTLZCNT-LABEL: cttz_i8_knownbits:
476; X64-FASTLZCNT:       # %bb.0:
477; X64-FASTLZCNT-NEXT:    orb $2, %dil
478; X64-FASTLZCNT-NEXT:    movzbl %dil, %eax
479; X64-FASTLZCNT-NEXT:    tzcntl %eax, %eax
480; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
481; X64-FASTLZCNT-NEXT:    retq
482;
483; X86-FASTLZCNT-LABEL: cttz_i8_knownbits:
484; X86-FASTLZCNT:       # %bb.0:
485; X86-FASTLZCNT-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
486; X86-FASTLZCNT-NEXT:    orb $2, %al
487; X86-FASTLZCNT-NEXT:    movzbl %al, %eax
488; X86-FASTLZCNT-NEXT:    tzcntl %eax, %eax
489; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
490; X86-FASTLZCNT-NEXT:    retl
491  %x2 = or i8 %x, 2
492  %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
493  %tmp2 = and i8 %tmp, 1
494  ret i8 %tmp2
495}
496
497; Make sure we can detect that the input is non-zero and avoid cmov after BSF
498; This is relevant for 32-bit mode without tzcnt
499define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) {
500; X86-NOCMOV-LABEL: cttz_i64_zero_test_knownneverzero:
501; X86-NOCMOV:       # %bb.0:
502; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
503; X86-NOCMOV-NEXT:    testl %eax, %eax
504; X86-NOCMOV-NEXT:    jne .LBB9_1
505; X86-NOCMOV-NEXT:  # %bb.2:
506; X86-NOCMOV-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
507; X86-NOCMOV-NEXT:    orl {{[0-9]+}}(%esp), %eax
508; X86-NOCMOV-NEXT:    rep bsfl %eax, %eax
509; X86-NOCMOV-NEXT:    orl $32, %eax
510; X86-NOCMOV-NEXT:    xorl %edx, %edx
511; X86-NOCMOV-NEXT:    retl
512; X86-NOCMOV-NEXT:  .LBB9_1:
513; X86-NOCMOV-NEXT:    rep bsfl %eax, %eax
514; X86-NOCMOV-NEXT:    xorl %edx, %edx
515; X86-NOCMOV-NEXT:    retl
516;
517; X86-CMOV-LABEL: cttz_i64_zero_test_knownneverzero:
518; X86-CMOV:       # %bb.0:
519; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
520; X86-CMOV-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
521; X86-CMOV-NEXT:    orl {{[0-9]+}}(%esp), %eax
522; X86-CMOV-NEXT:    rep bsfl %ecx, %edx
523; X86-CMOV-NEXT:    rep bsfl %eax, %eax
524; X86-CMOV-NEXT:    orl $32, %eax
525; X86-CMOV-NEXT:    testl %ecx, %ecx
526; X86-CMOV-NEXT:    cmovnel %edx, %eax
527; X86-CMOV-NEXT:    xorl %edx, %edx
528; X86-CMOV-NEXT:    retl
529;
530; X64-LABEL: cttz_i64_zero_test_knownneverzero:
531; X64:       # %bb.0:
532; X64-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
533; X64-NEXT:    orq %rdi, %rax
534; X64-NEXT:    rep bsfq %rax, %rax
535; X64-NEXT:    retq
536;
537; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
538; X86-CLZ:       # %bb.0:
539; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
540; X86-CLZ-NEXT:    testl %eax, %eax
541; X86-CLZ-NEXT:    jne .LBB9_1
542; X86-CLZ-NEXT:  # %bb.2:
543; X86-CLZ-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
544; X86-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
545; X86-CLZ-NEXT:    tzcntl %eax, %eax
546; X86-CLZ-NEXT:    orl $32, %eax
547; X86-CLZ-NEXT:    xorl %edx, %edx
548; X86-CLZ-NEXT:    retl
549; X86-CLZ-NEXT:  .LBB9_1:
550; X86-CLZ-NEXT:    tzcntl %eax, %eax
551; X86-CLZ-NEXT:    xorl %edx, %edx
552; X86-CLZ-NEXT:    retl
553;
554; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero:
555; X64-CLZ:       # %bb.0:
556; X64-CLZ-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
557; X64-CLZ-NEXT:    orq %rdi, %rax
558; X64-CLZ-NEXT:    tzcntq %rax, %rax
559; X64-CLZ-NEXT:    retq
560;
561; X64-FASTLZCNT-LABEL: cttz_i64_zero_test_knownneverzero:
562; X64-FASTLZCNT:       # %bb.0:
563; X64-FASTLZCNT-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
564; X64-FASTLZCNT-NEXT:    orq %rdi, %rax
565; X64-FASTLZCNT-NEXT:    tzcntq %rax, %rax
566; X64-FASTLZCNT-NEXT:    retq
567;
568; X86-FASTLZCNT-LABEL: cttz_i64_zero_test_knownneverzero:
569; X86-FASTLZCNT:       # %bb.0:
570; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
571; X86-FASTLZCNT-NEXT:    testl %eax, %eax
572; X86-FASTLZCNT-NEXT:    jne .LBB9_1
573; X86-FASTLZCNT-NEXT:  # %bb.2:
574; X86-FASTLZCNT-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
575; X86-FASTLZCNT-NEXT:    orl {{[0-9]+}}(%esp), %eax
576; X86-FASTLZCNT-NEXT:    tzcntl %eax, %eax
577; X86-FASTLZCNT-NEXT:    orl $32, %eax
578; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
579; X86-FASTLZCNT-NEXT:    retl
580; X86-FASTLZCNT-NEXT:  .LBB9_1:
581; X86-FASTLZCNT-NEXT:    tzcntl %eax, %eax
582; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
583; X86-FASTLZCNT-NEXT:    retl
584  %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000
585  %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false)
586  ret i64 %tmp1
587}
588
589define i32 @cttz_i32_osize(i32 %x) optsize {
590; X86-LABEL: cttz_i32_osize:
591; X86:       # %bb.0:
592; X86-NOT:     rep
593; X86-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
594; X86-NEXT:    retl
595;
596; X64-LABEL: cttz_i32_osize:
597; X64:       # %bb.0:
598; X64-NOT:     rep
599; X64-NEXT:    bsfl %edi, %eax
600; X64-NEXT:    retq
601;
602; X86-CLZ-LABEL: cttz_i32_osize:
603; X86-CLZ:       # %bb.0:
604; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
605; X86-CLZ-NEXT:    retl
606;
607; X64-CLZ-LABEL: cttz_i32_osize:
608; X64-CLZ:       # %bb.0:
609; X64-CLZ-NEXT:    tzcntl %edi, %eax
610; X64-CLZ-NEXT:    retq
611;
612; X64-FASTLZCNT-LABEL: cttz_i32_osize:
613; X64-FASTLZCNT:       # %bb.0:
614; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
615; X64-FASTLZCNT-NEXT:    retq
616;
617; X86-FASTLZCNT-LABEL: cttz_i32_osize:
618; X86-FASTLZCNT:       # %bb.0:
619; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
620; X86-FASTLZCNT-NEXT:    retl
621  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true)
622  ret i32 %tmp
623}
624
625define i32 @cttz_i32_msize(i32 %x) minsize {
626; X86-LABEL: cttz_i32_msize:
627; X86:       # %bb.0:
628; X86-NOT:     rep
629; X86-NEXT:    bsfl {{[0-9]+}}(%esp), %eax
630; X86-NEXT:    retl
631;
632; X64-LABEL: cttz_i32_msize:
633; X64:       # %bb.0:
634; X64-NOT:     rep
635; X64-NEXT:    bsfl %edi, %eax
636; X64-NEXT:    retq
637;
638; X86-CLZ-LABEL: cttz_i32_msize:
639; X86-CLZ:       # %bb.0:
640; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
641; X86-CLZ-NEXT:    retl
642;
643; X64-CLZ-LABEL: cttz_i32_msize:
644; X64-CLZ:       # %bb.0:
645; X64-CLZ-NEXT:    tzcntl %edi, %eax
646; X64-CLZ-NEXT:    retq
647;
648; X64-FASTLZCNT-LABEL: cttz_i32_msize:
649; X64-FASTLZCNT:       # %bb.0:
650; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
651; X64-FASTLZCNT-NEXT:    retq
652;
653; X86-FASTLZCNT-LABEL: cttz_i32_msize:
654; X86-FASTLZCNT:       # %bb.0:
655; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
656; X86-FASTLZCNT-NEXT:    retl
657  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true)
658  ret i32 %tmp
659}
660
661define i64 @cttz_i32_sext(i32 %x) {
662; X86-NOCMOV-LABEL: cttz_i32_sext:
663; X86-NOCMOV:       # %bb.0:
664; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
665; X86-NOCMOV-NEXT:    testl %eax, %eax
666; X86-NOCMOV-NEXT:    je .LBB12_1
667; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
668; X86-NOCMOV-NEXT:    rep bsfl %eax, %eax
669; X86-NOCMOV-NEXT:    xorl %edx, %edx
670; X86-NOCMOV-NEXT:    retl
671; X86-NOCMOV-NEXT:  .LBB12_1:
672; X86-NOCMOV-NEXT:    movl $32, %eax
673; X86-NOCMOV-NEXT:    xorl %edx, %edx
674; X86-NOCMOV-NEXT:    retl
675;
676; X86-CMOV-LABEL: cttz_i32_sext:
677; X86-CMOV:       # %bb.0:
678; X86-CMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %ecx
679; X86-CMOV-NEXT:    movl $32, %eax
680; X86-CMOV-NEXT:    cmovnel %ecx, %eax
681; X86-CMOV-NEXT:    xorl %edx, %edx
682; X86-CMOV-NEXT:    retl
683;
684; X64-LABEL: cttz_i32_sext:
685; X64:       # %bb.0:
686; X64-NEXT:    movl $32, %eax
687; X64-NEXT:    rep bsfl %edi, %eax
688; X64-NEXT:    retq
689;
690; X86-CLZ-LABEL: cttz_i32_sext:
691; X86-CLZ:       # %bb.0:
692; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
693; X86-CLZ-NEXT:    xorl %edx, %edx
694; X86-CLZ-NEXT:    retl
695;
696; X64-CLZ-LABEL: cttz_i32_sext:
697; X64-CLZ:       # %bb.0:
698; X64-CLZ-NEXT:    tzcntl %edi, %eax
699; X64-CLZ-NEXT:    retq
700;
701; X64-FASTLZCNT-LABEL: cttz_i32_sext:
702; X64-FASTLZCNT:       # %bb.0:
703; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
704; X64-FASTLZCNT-NEXT:    retq
705;
706; X86-FASTLZCNT-LABEL: cttz_i32_sext:
707; X86-FASTLZCNT:       # %bb.0:
708; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
709; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
710; X86-FASTLZCNT-NEXT:    retl
711  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 false)
712  %ext = sext i32 %tmp to i64
713  ret i64 %ext
714}
715
716define i64 @cttz_i32_zext(i32 %x) {
717; X86-NOCMOV-LABEL: cttz_i32_zext:
718; X86-NOCMOV:       # %bb.0:
719; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
720; X86-NOCMOV-NEXT:    testl %eax, %eax
721; X86-NOCMOV-NEXT:    je .LBB13_1
722; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
723; X86-NOCMOV-NEXT:    rep bsfl %eax, %eax
724; X86-NOCMOV-NEXT:    xorl %edx, %edx
725; X86-NOCMOV-NEXT:    retl
726; X86-NOCMOV-NEXT:  .LBB13_1:
727; X86-NOCMOV-NEXT:    movl $32, %eax
728; X86-NOCMOV-NEXT:    xorl %edx, %edx
729; X86-NOCMOV-NEXT:    retl
730;
731; X86-CMOV-LABEL: cttz_i32_zext:
732; X86-CMOV:       # %bb.0:
733; X86-CMOV-NEXT:    bsfl {{[0-9]+}}(%esp), %ecx
734; X86-CMOV-NEXT:    movl $32, %eax
735; X86-CMOV-NEXT:    cmovnel %ecx, %eax
736; X86-CMOV-NEXT:    xorl %edx, %edx
737; X86-CMOV-NEXT:    retl
738;
739; X64-LABEL: cttz_i32_zext:
740; X64:       # %bb.0:
741; X64-NEXT:    movl $32, %eax
742; X64-NEXT:    rep bsfl %edi, %eax
743; X64-NEXT:    retq
744;
745; X86-CLZ-LABEL: cttz_i32_zext:
746; X86-CLZ:       # %bb.0:
747; X86-CLZ-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
748; X86-CLZ-NEXT:    xorl %edx, %edx
749; X86-CLZ-NEXT:    retl
750;
751; X64-CLZ-LABEL: cttz_i32_zext:
752; X64-CLZ:       # %bb.0:
753; X64-CLZ-NEXT:    tzcntl %edi, %eax
754; X64-CLZ-NEXT:    retq
755;
756; X64-FASTLZCNT-LABEL: cttz_i32_zext:
757; X64-FASTLZCNT:       # %bb.0:
758; X64-FASTLZCNT-NEXT:    tzcntl %edi, %eax
759; X64-FASTLZCNT-NEXT:    retq
760;
761; X86-FASTLZCNT-LABEL: cttz_i32_zext:
762; X86-FASTLZCNT:       # %bb.0:
763; X86-FASTLZCNT-NEXT:    tzcntl {{[0-9]+}}(%esp), %eax
764; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
765; X86-FASTLZCNT-NEXT:    retl
766  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 false)
767  %ext = zext i32 %tmp to i64
768  ret i64 %ext
769}
770
771