xref: /llvm-project/llvm/test/CodeGen/X86/ctlz.ll (revision 90e9895a9373b3d83eefe15b34d2dc83c7bcc88f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X64-FASTLZCNT
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X86-FASTLZCNT
9
10declare i8 @llvm.ctlz.i8(i8, i1)
11declare i16 @llvm.ctlz.i16(i16, i1)
12declare i32 @llvm.ctlz.i32(i32, i1)
13declare i64 @llvm.ctlz.i64(i64, i1)
14
15define i8 @ctlz_i8(i8 %x) {
16; X86-LABEL: ctlz_i8:
17; X86:       # %bb.0:
18; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
19; X86-NEXT:    bsrl %eax, %eax
20; X86-NEXT:    xorl $7, %eax
21; X86-NEXT:    # kill: def $al killed $al killed $eax
22; X86-NEXT:    retl
23;
24; X64-LABEL: ctlz_i8:
25; X64:       # %bb.0:
26; X64-NEXT:    movzbl %dil, %eax
27; X64-NEXT:    bsrl %eax, %eax
28; X64-NEXT:    xorl $7, %eax
29; X64-NEXT:    # kill: def $al killed $al killed $eax
30; X64-NEXT:    retq
31;
32; X86-CLZ-LABEL: ctlz_i8:
33; X86-CLZ:       # %bb.0:
34; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
35; X86-CLZ-NEXT:    shll $24, %eax
36; X86-CLZ-NEXT:    lzcntl %eax, %eax
37; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
38; X86-CLZ-NEXT:    retl
39;
40; X64-CLZ-LABEL: ctlz_i8:
41; X64-CLZ:       # %bb.0:
42; X64-CLZ-NEXT:    shll $24, %edi
43; X64-CLZ-NEXT:    lzcntl %edi, %eax
44; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
45; X64-CLZ-NEXT:    retq
46;
47; X64-FASTLZCNT-LABEL: ctlz_i8:
48; X64-FASTLZCNT:       # %bb.0:
49; X64-FASTLZCNT-NEXT:    shll $24, %edi
50; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
51; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
52; X64-FASTLZCNT-NEXT:    retq
53;
54; X86-FASTLZCNT-LABEL: ctlz_i8:
55; X86-FASTLZCNT:       # %bb.0:
56; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
57; X86-FASTLZCNT-NEXT:    shll $24, %eax
58; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
59; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
60; X86-FASTLZCNT-NEXT:    retl
61  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
62  ret i8 %tmp2
63}
64
65define i16 @ctlz_i16(i16 %x) {
66; X86-LABEL: ctlz_i16:
67; X86:       # %bb.0:
68; X86-NEXT:    bsrw {{[0-9]+}}(%esp), %ax
69; X86-NEXT:    xorl $15, %eax
70; X86-NEXT:    # kill: def $ax killed $ax killed $eax
71; X86-NEXT:    retl
72;
73; X64-LABEL: ctlz_i16:
74; X64:       # %bb.0:
75; X64-NEXT:    bsrw %di, %ax
76; X64-NEXT:    xorl $15, %eax
77; X64-NEXT:    # kill: def $ax killed $ax killed $eax
78; X64-NEXT:    retq
79;
80; X86-CLZ-LABEL: ctlz_i16:
81; X86-CLZ:       # %bb.0:
82; X86-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
83; X86-CLZ-NEXT:    retl
84;
85; X64-CLZ-LABEL: ctlz_i16:
86; X64-CLZ:       # %bb.0:
87; X64-CLZ-NEXT:    lzcntw %di, %ax
88; X64-CLZ-NEXT:    retq
89;
90; X64-FASTLZCNT-LABEL: ctlz_i16:
91; X64-FASTLZCNT:       # %bb.0:
92; X64-FASTLZCNT-NEXT:    lzcntw %di, %ax
93; X64-FASTLZCNT-NEXT:    retq
94;
95; X86-FASTLZCNT-LABEL: ctlz_i16:
96; X86-FASTLZCNT:       # %bb.0:
97; X86-FASTLZCNT-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
98; X86-FASTLZCNT-NEXT:    retl
99  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
100  ret i16 %tmp2
101}
102
103define i32 @ctlz_i32(i32 %x) {
104; X86-LABEL: ctlz_i32:
105; X86:       # %bb.0:
106; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
107; X86-NEXT:    xorl $31, %eax
108; X86-NEXT:    retl
109;
110; X64-LABEL: ctlz_i32:
111; X64:       # %bb.0:
112; X64-NEXT:    bsrl %edi, %eax
113; X64-NEXT:    xorl $31, %eax
114; X64-NEXT:    retq
115;
116; X86-CLZ-LABEL: ctlz_i32:
117; X86-CLZ:       # %bb.0:
118; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
119; X86-CLZ-NEXT:    retl
120;
121; X64-CLZ-LABEL: ctlz_i32:
122; X64-CLZ:       # %bb.0:
123; X64-CLZ-NEXT:    lzcntl %edi, %eax
124; X64-CLZ-NEXT:    retq
125;
126; X64-FASTLZCNT-LABEL: ctlz_i32:
127; X64-FASTLZCNT:       # %bb.0:
128; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
129; X64-FASTLZCNT-NEXT:    retq
130;
131; X86-FASTLZCNT-LABEL: ctlz_i32:
132; X86-FASTLZCNT:       # %bb.0:
133; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
134; X86-FASTLZCNT-NEXT:    retl
135  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
136  ret i32 %tmp
137}
138
139define i64 @ctlz_i64(i64 %x) {
140; X86-NOCMOV-LABEL: ctlz_i64:
141; X86-NOCMOV:       # %bb.0:
142; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
143; X86-NOCMOV-NEXT:    testl %eax, %eax
144; X86-NOCMOV-NEXT:    jne .LBB3_1
145; X86-NOCMOV-NEXT:  # %bb.2:
146; X86-NOCMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
147; X86-NOCMOV-NEXT:    xorl $31, %eax
148; X86-NOCMOV-NEXT:    orl $32, %eax
149; X86-NOCMOV-NEXT:    xorl %edx, %edx
150; X86-NOCMOV-NEXT:    retl
151; X86-NOCMOV-NEXT:  .LBB3_1:
152; X86-NOCMOV-NEXT:    bsrl %eax, %eax
153; X86-NOCMOV-NEXT:    xorl $31, %eax
154; X86-NOCMOV-NEXT:    xorl %edx, %edx
155; X86-NOCMOV-NEXT:    retl
156;
157; X86-CMOV-LABEL: ctlz_i64:
158; X86-CMOV:       # %bb.0:
159; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
160; X86-CMOV-NEXT:    bsrl %ecx, %edx
161; X86-CMOV-NEXT:    xorl $31, %edx
162; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
163; X86-CMOV-NEXT:    xorl $31, %eax
164; X86-CMOV-NEXT:    orl $32, %eax
165; X86-CMOV-NEXT:    testl %ecx, %ecx
166; X86-CMOV-NEXT:    cmovnel %edx, %eax
167; X86-CMOV-NEXT:    xorl %edx, %edx
168; X86-CMOV-NEXT:    retl
169;
170; X64-LABEL: ctlz_i64:
171; X64:       # %bb.0:
172; X64-NEXT:    bsrq %rdi, %rax
173; X64-NEXT:    xorq $63, %rax
174; X64-NEXT:    retq
175;
176; X86-CLZ-LABEL: ctlz_i64:
177; X86-CLZ:       # %bb.0:
178; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
179; X86-CLZ-NEXT:    testl %eax, %eax
180; X86-CLZ-NEXT:    jne .LBB3_1
181; X86-CLZ-NEXT:  # %bb.2:
182; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
183; X86-CLZ-NEXT:    addl $32, %eax
184; X86-CLZ-NEXT:    xorl %edx, %edx
185; X86-CLZ-NEXT:    retl
186; X86-CLZ-NEXT:  .LBB3_1:
187; X86-CLZ-NEXT:    lzcntl %eax, %eax
188; X86-CLZ-NEXT:    xorl %edx, %edx
189; X86-CLZ-NEXT:    retl
190;
191; X64-CLZ-LABEL: ctlz_i64:
192; X64-CLZ:       # %bb.0:
193; X64-CLZ-NEXT:    lzcntq %rdi, %rax
194; X64-CLZ-NEXT:    retq
195;
196; X64-FASTLZCNT-LABEL: ctlz_i64:
197; X64-FASTLZCNT:       # %bb.0:
198; X64-FASTLZCNT-NEXT:    lzcntq %rdi, %rax
199; X64-FASTLZCNT-NEXT:    retq
200;
201; X86-FASTLZCNT-LABEL: ctlz_i64:
202; X86-FASTLZCNT:       # %bb.0:
203; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-FASTLZCNT-NEXT:    testl %eax, %eax
205; X86-FASTLZCNT-NEXT:    jne .LBB3_1
206; X86-FASTLZCNT-NEXT:  # %bb.2:
207; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
208; X86-FASTLZCNT-NEXT:    addl $32, %eax
209; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
210; X86-FASTLZCNT-NEXT:    retl
211; X86-FASTLZCNT-NEXT:  .LBB3_1:
212; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
213; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
214; X86-FASTLZCNT-NEXT:    retl
215  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
216  ret i64 %tmp
217}
218
219; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
220define i8 @ctlz_i8_zero_test(i8 %n) {
221; X86-NOCMOV-LABEL: ctlz_i8_zero_test:
222; X86-NOCMOV:       # %bb.0:
223; X86-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
224; X86-NOCMOV-NEXT:    testb %al, %al
225; X86-NOCMOV-NEXT:    je .LBB4_1
226; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
227; X86-NOCMOV-NEXT:    movzbl %al, %eax
228; X86-NOCMOV-NEXT:    bsrl %eax, %eax
229; X86-NOCMOV-NEXT:    xorl $7, %eax
230; X86-NOCMOV-NEXT:    # kill: def $al killed $al killed $eax
231; X86-NOCMOV-NEXT:    retl
232; X86-NOCMOV-NEXT:  .LBB4_1:
233; X86-NOCMOV-NEXT:    movb $8, %al
234; X86-NOCMOV-NEXT:    # kill: def $al killed $al killed $eax
235; X86-NOCMOV-NEXT:    retl
236;
237; X86-CMOV-LABEL: ctlz_i8_zero_test:
238; X86-CMOV:       # %bb.0:
239; X86-CMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
240; X86-CMOV-NEXT:    bsrl %eax, %ecx
241; X86-CMOV-NEXT:    movl $15, %eax
242; X86-CMOV-NEXT:    cmovnel %ecx, %eax
243; X86-CMOV-NEXT:    xorl $7, %eax
244; X86-CMOV-NEXT:    # kill: def $al killed $al killed $eax
245; X86-CMOV-NEXT:    retl
246;
247; X64-LABEL: ctlz_i8_zero_test:
248; X64:       # %bb.0:
249; X64-NEXT:    movzbl %dil, %ecx
250; X64-NEXT:    movl $15, %eax
251; X64-NEXT:    bsrl %ecx, %eax
252; X64-NEXT:    xorl $7, %eax
253; X64-NEXT:    # kill: def $al killed $al killed $eax
254; X64-NEXT:    retq
255;
256; X86-CLZ-LABEL: ctlz_i8_zero_test:
257; X86-CLZ:       # %bb.0:
258; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
259; X86-CLZ-NEXT:    lzcntl %eax, %eax
260; X86-CLZ-NEXT:    addl $-24, %eax
261; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
262; X86-CLZ-NEXT:    retl
263;
264; X64-CLZ-LABEL: ctlz_i8_zero_test:
265; X64-CLZ:       # %bb.0:
266; X64-CLZ-NEXT:    movzbl %dil, %eax
267; X64-CLZ-NEXT:    lzcntl %eax, %eax
268; X64-CLZ-NEXT:    addl $-24, %eax
269; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
270; X64-CLZ-NEXT:    retq
271;
272; X64-FASTLZCNT-LABEL: ctlz_i8_zero_test:
273; X64-FASTLZCNT:       # %bb.0:
274; X64-FASTLZCNT-NEXT:    movzbl %dil, %eax
275; X64-FASTLZCNT-NEXT:    lzcntl %eax, %eax
276; X64-FASTLZCNT-NEXT:    addl $-24, %eax
277; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
278; X64-FASTLZCNT-NEXT:    retq
279;
280; X86-FASTLZCNT-LABEL: ctlz_i8_zero_test:
281; X86-FASTLZCNT:       # %bb.0:
282; X86-FASTLZCNT-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
283; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
284; X86-FASTLZCNT-NEXT:    addl $-24, %eax
285; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
286; X86-FASTLZCNT-NEXT:    retl
287  %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
288  ret i8 %tmp1
289}
290
291; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
292define i16 @ctlz_i16_zero_test(i16 %n) {
293; X86-NOCMOV-LABEL: ctlz_i16_zero_test:
294; X86-NOCMOV:       # %bb.0:
295; X86-NOCMOV-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
296; X86-NOCMOV-NEXT:    testw %ax, %ax
297; X86-NOCMOV-NEXT:    je .LBB5_1
298; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
299; X86-NOCMOV-NEXT:    bsrw %ax, %ax
300; X86-NOCMOV-NEXT:    xorl $15, %eax
301; X86-NOCMOV-NEXT:    # kill: def $ax killed $ax killed $eax
302; X86-NOCMOV-NEXT:    retl
303; X86-NOCMOV-NEXT:  .LBB5_1:
304; X86-NOCMOV-NEXT:    movw $16, %ax
305; X86-NOCMOV-NEXT:    # kill: def $ax killed $ax killed $eax
306; X86-NOCMOV-NEXT:    retl
307;
308; X86-CMOV-LABEL: ctlz_i16_zero_test:
309; X86-CMOV:       # %bb.0:
310; X86-CMOV-NEXT:    bsrw {{[0-9]+}}(%esp), %cx
311; X86-CMOV-NEXT:    movw $31, %ax
312; X86-CMOV-NEXT:    cmovnew %cx, %ax
313; X86-CMOV-NEXT:    xorl $15, %eax
314; X86-CMOV-NEXT:    # kill: def $ax killed $ax killed $eax
315; X86-CMOV-NEXT:    retl
316;
317; X64-LABEL: ctlz_i16_zero_test:
318; X64:       # %bb.0:
319; X64-NEXT:    movw $31, %ax
320; X64-NEXT:    bsrw %di, %ax
321; X64-NEXT:    xorl $15, %eax
322; X64-NEXT:    # kill: def $ax killed $ax killed $eax
323; X64-NEXT:    retq
324;
325; X86-CLZ-LABEL: ctlz_i16_zero_test:
326; X86-CLZ:       # %bb.0:
327; X86-CLZ-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
328; X86-CLZ-NEXT:    retl
329;
330; X64-CLZ-LABEL: ctlz_i16_zero_test:
331; X64-CLZ:       # %bb.0:
332; X64-CLZ-NEXT:    lzcntw %di, %ax
333; X64-CLZ-NEXT:    retq
334;
335; X64-FASTLZCNT-LABEL: ctlz_i16_zero_test:
336; X64-FASTLZCNT:       # %bb.0:
337; X64-FASTLZCNT-NEXT:    lzcntw %di, %ax
338; X64-FASTLZCNT-NEXT:    retq
339;
340; X86-FASTLZCNT-LABEL: ctlz_i16_zero_test:
341; X86-FASTLZCNT:       # %bb.0:
342; X86-FASTLZCNT-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
343; X86-FASTLZCNT-NEXT:    retl
344  %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
345  ret i16 %tmp1
346}
347
348; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
349define i32 @ctlz_i32_zero_test(i32 %n) {
350; X86-NOCMOV-LABEL: ctlz_i32_zero_test:
351; X86-NOCMOV:       # %bb.0:
352; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
353; X86-NOCMOV-NEXT:    testl %eax, %eax
354; X86-NOCMOV-NEXT:    je .LBB6_1
355; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
356; X86-NOCMOV-NEXT:    bsrl %eax, %eax
357; X86-NOCMOV-NEXT:    xorl $31, %eax
358; X86-NOCMOV-NEXT:    retl
359; X86-NOCMOV-NEXT:  .LBB6_1:
360; X86-NOCMOV-NEXT:    movl $32, %eax
361; X86-NOCMOV-NEXT:    retl
362;
363; X86-CMOV-LABEL: ctlz_i32_zero_test:
364; X86-CMOV:       # %bb.0:
365; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
366; X86-CMOV-NEXT:    movl $63, %eax
367; X86-CMOV-NEXT:    cmovnel %ecx, %eax
368; X86-CMOV-NEXT:    xorl $31, %eax
369; X86-CMOV-NEXT:    retl
370;
371; X64-LABEL: ctlz_i32_zero_test:
372; X64:       # %bb.0:
373; X64-NEXT:    movl $63, %eax
374; X64-NEXT:    bsrl %edi, %eax
375; X64-NEXT:    xorl $31, %eax
376; X64-NEXT:    retq
377;
378; X86-CLZ-LABEL: ctlz_i32_zero_test:
379; X86-CLZ:       # %bb.0:
380; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
381; X86-CLZ-NEXT:    retl
382;
383; X64-CLZ-LABEL: ctlz_i32_zero_test:
384; X64-CLZ:       # %bb.0:
385; X64-CLZ-NEXT:    lzcntl %edi, %eax
386; X64-CLZ-NEXT:    retq
387;
388; X64-FASTLZCNT-LABEL: ctlz_i32_zero_test:
389; X64-FASTLZCNT:       # %bb.0:
390; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
391; X64-FASTLZCNT-NEXT:    retq
392;
393; X86-FASTLZCNT-LABEL: ctlz_i32_zero_test:
394; X86-FASTLZCNT:       # %bb.0:
395; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
396; X86-FASTLZCNT-NEXT:    retl
397  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
398  ret i32 %tmp1
399}
400
401; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
402define i64 @ctlz_i64_zero_test(i64 %n) {
403; X86-NOCMOV-LABEL: ctlz_i64_zero_test:
404; X86-NOCMOV:       # %bb.0:
405; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
406; X86-NOCMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %edx
407; X86-NOCMOV-NEXT:    movl $63, %eax
408; X86-NOCMOV-NEXT:    je .LBB7_2
409; X86-NOCMOV-NEXT:  # %bb.1:
410; X86-NOCMOV-NEXT:    movl %edx, %eax
411; X86-NOCMOV-NEXT:  .LBB7_2:
412; X86-NOCMOV-NEXT:    testl %ecx, %ecx
413; X86-NOCMOV-NEXT:    jne .LBB7_3
414; X86-NOCMOV-NEXT:  # %bb.4:
415; X86-NOCMOV-NEXT:    xorl $31, %eax
416; X86-NOCMOV-NEXT:    addl $32, %eax
417; X86-NOCMOV-NEXT:    xorl %edx, %edx
418; X86-NOCMOV-NEXT:    retl
419; X86-NOCMOV-NEXT:  .LBB7_3:
420; X86-NOCMOV-NEXT:    bsrl %ecx, %eax
421; X86-NOCMOV-NEXT:    xorl $31, %eax
422; X86-NOCMOV-NEXT:    xorl %edx, %edx
423; X86-NOCMOV-NEXT:    retl
424;
425; X86-CMOV-LABEL: ctlz_i64_zero_test:
426; X86-CMOV:       # %bb.0:
427; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
428; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
429; X86-CMOV-NEXT:    movl $63, %edx
430; X86-CMOV-NEXT:    cmovnel %eax, %edx
431; X86-CMOV-NEXT:    xorl $31, %edx
432; X86-CMOV-NEXT:    addl $32, %edx
433; X86-CMOV-NEXT:    bsrl %ecx, %eax
434; X86-CMOV-NEXT:    xorl $31, %eax
435; X86-CMOV-NEXT:    testl %ecx, %ecx
436; X86-CMOV-NEXT:    cmovel %edx, %eax
437; X86-CMOV-NEXT:    xorl %edx, %edx
438; X86-CMOV-NEXT:    retl
439;
440; X64-LABEL: ctlz_i64_zero_test:
441; X64:       # %bb.0:
442; X64-NEXT:    movl $127, %eax
443; X64-NEXT:    bsrq %rdi, %rax
444; X64-NEXT:    xorq $63, %rax
445; X64-NEXT:    retq
446;
447; X86-CLZ-LABEL: ctlz_i64_zero_test:
448; X86-CLZ:       # %bb.0:
449; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
450; X86-CLZ-NEXT:    testl %eax, %eax
451; X86-CLZ-NEXT:    jne .LBB7_1
452; X86-CLZ-NEXT:  # %bb.2:
453; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
454; X86-CLZ-NEXT:    addl $32, %eax
455; X86-CLZ-NEXT:    xorl %edx, %edx
456; X86-CLZ-NEXT:    retl
457; X86-CLZ-NEXT:  .LBB7_1:
458; X86-CLZ-NEXT:    lzcntl %eax, %eax
459; X86-CLZ-NEXT:    xorl %edx, %edx
460; X86-CLZ-NEXT:    retl
461;
462; X64-CLZ-LABEL: ctlz_i64_zero_test:
463; X64-CLZ:       # %bb.0:
464; X64-CLZ-NEXT:    lzcntq %rdi, %rax
465; X64-CLZ-NEXT:    retq
466;
467; X64-FASTLZCNT-LABEL: ctlz_i64_zero_test:
468; X64-FASTLZCNT:       # %bb.0:
469; X64-FASTLZCNT-NEXT:    lzcntq %rdi, %rax
470; X64-FASTLZCNT-NEXT:    retq
471;
472; X86-FASTLZCNT-LABEL: ctlz_i64_zero_test:
473; X86-FASTLZCNT:       # %bb.0:
474; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
475; X86-FASTLZCNT-NEXT:    testl %eax, %eax
476; X86-FASTLZCNT-NEXT:    jne .LBB7_1
477; X86-FASTLZCNT-NEXT:  # %bb.2:
478; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
479; X86-FASTLZCNT-NEXT:    addl $32, %eax
480; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
481; X86-FASTLZCNT-NEXT:    retl
482; X86-FASTLZCNT-NEXT:  .LBB7_1:
483; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
484; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
485; X86-FASTLZCNT-NEXT:    retl
486  %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
487  ret i64 %tmp1
488}
489
490; Don't generate the cmovne when the source is known non-zero (and bsr would
491; not set ZF).
492; rdar://9490949
493define i32 @ctlz_i32_fold_cmov(i32 %n) {
494; X86-LABEL: ctlz_i32_fold_cmov:
495; X86:       # %bb.0:
496; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
497; X86-NEXT:    orl $1, %eax
498; X86-NEXT:    bsrl %eax, %eax
499; X86-NEXT:    xorl $31, %eax
500; X86-NEXT:    retl
501;
502; X64-LABEL: ctlz_i32_fold_cmov:
503; X64:       # %bb.0:
504; X64-NEXT:    orl $1, %edi
505; X64-NEXT:    bsrl %edi, %eax
506; X64-NEXT:    xorl $31, %eax
507; X64-NEXT:    retq
508;
509; X86-CLZ-LABEL: ctlz_i32_fold_cmov:
510; X86-CLZ:       # %bb.0:
511; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
512; X86-CLZ-NEXT:    orl $1, %eax
513; X86-CLZ-NEXT:    lzcntl %eax, %eax
514; X86-CLZ-NEXT:    retl
515;
516; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
517; X64-CLZ:       # %bb.0:
518; X64-CLZ-NEXT:    orl $1, %edi
519; X64-CLZ-NEXT:    lzcntl %edi, %eax
520; X64-CLZ-NEXT:    retq
521;
522; X64-FASTLZCNT-LABEL: ctlz_i32_fold_cmov:
523; X64-FASTLZCNT:       # %bb.0:
524; X64-FASTLZCNT-NEXT:    orl $1, %edi
525; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
526; X64-FASTLZCNT-NEXT:    retq
527;
528; X86-FASTLZCNT-LABEL: ctlz_i32_fold_cmov:
529; X86-FASTLZCNT:       # %bb.0:
530; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
531; X86-FASTLZCNT-NEXT:    orl $1, %eax
532; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
533; X86-FASTLZCNT-NEXT:    retl
534  %or = or i32 %n, 1
535  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
536  ret i32 %tmp1
537}
538
539; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
540; the most significant bit, which is what 'bsr' does natively.
541; NOTE: We intentionally don't select `bsr` when `fast-lzcnt` is
542; available. This is 1) because `bsr` has some drawbacks including a
543; dependency on dst, 2) very poor performance on some of the
544; `fast-lzcnt` processors, and 3) `lzcnt` runs at ALU latency/throughput
545; so `lzcnt` + `xor` has better throughput than even the 1-uop
546; (1c latency, 1c throughput) `bsr`.
547define i32 @ctlz_bsr(i32 %n) {
548; X86-LABEL: ctlz_bsr:
549; X86:       # %bb.0:
550; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
551; X86-NEXT:    retl
552;
553; X64-LABEL: ctlz_bsr:
554; X64:       # %bb.0:
555; X64-NEXT:    bsrl %edi, %eax
556; X64-NEXT:    retq
557;
558; X86-CLZ-LABEL: ctlz_bsr:
559; X86-CLZ:       # %bb.0:
560; X86-CLZ-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
561; X86-CLZ-NEXT:    retl
562;
563; X64-CLZ-LABEL: ctlz_bsr:
564; X64-CLZ:       # %bb.0:
565; X64-CLZ-NEXT:    bsrl %edi, %eax
566; X64-CLZ-NEXT:    retq
567;
568; X64-FASTLZCNT-LABEL: ctlz_bsr:
569; X64-FASTLZCNT:       # %bb.0:
570; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
571; X64-FASTLZCNT-NEXT:    xorl $31, %eax
572; X64-FASTLZCNT-NEXT:    retq
573;
574; X86-FASTLZCNT-LABEL: ctlz_bsr:
575; X86-FASTLZCNT:       # %bb.0:
576; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
577; X86-FASTLZCNT-NEXT:    xorl $31, %eax
578; X86-FASTLZCNT-NEXT:    retl
579  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
580  %bsr = xor i32 %ctlz, 31
581  ret i32 %bsr
582}
583
584; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
585; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
586;        codegen doesn't know how to combine the $32 and $31 into $63.
587define i32 @ctlz_bsr_zero_test(i32 %n) {
588; X86-NOCMOV-LABEL: ctlz_bsr_zero_test:
589; X86-NOCMOV:       # %bb.0:
590; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
591; X86-NOCMOV-NEXT:    testl %eax, %eax
592; X86-NOCMOV-NEXT:    je .LBB10_1
593; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
594; X86-NOCMOV-NEXT:    bsrl %eax, %eax
595; X86-NOCMOV-NEXT:    xorl $31, %eax
596; X86-NOCMOV-NEXT:    xorl $31, %eax
597; X86-NOCMOV-NEXT:    retl
598; X86-NOCMOV-NEXT:  .LBB10_1:
599; X86-NOCMOV-NEXT:    movl $32, %eax
600; X86-NOCMOV-NEXT:    xorl $31, %eax
601; X86-NOCMOV-NEXT:    retl
602;
603; X86-CMOV-LABEL: ctlz_bsr_zero_test:
604; X86-CMOV:       # %bb.0:
605; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
606; X86-CMOV-NEXT:    movl $63, %eax
607; X86-CMOV-NEXT:    cmovnel %ecx, %eax
608; X86-CMOV-NEXT:    retl
609;
610; X64-LABEL: ctlz_bsr_zero_test:
611; X64:       # %bb.0:
612; X64-NEXT:    movl $63, %eax
613; X64-NEXT:    bsrl %edi, %eax
614; X64-NEXT:    retq
615;
616; X86-CLZ-LABEL: ctlz_bsr_zero_test:
617; X86-CLZ:       # %bb.0:
618; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
619; X86-CLZ-NEXT:    xorl $31, %eax
620; X86-CLZ-NEXT:    retl
621;
622; X64-CLZ-LABEL: ctlz_bsr_zero_test:
623; X64-CLZ:       # %bb.0:
624; X64-CLZ-NEXT:    lzcntl %edi, %eax
625; X64-CLZ-NEXT:    xorl $31, %eax
626; X64-CLZ-NEXT:    retq
627;
628; X64-FASTLZCNT-LABEL: ctlz_bsr_zero_test:
629; X64-FASTLZCNT:       # %bb.0:
630; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
631; X64-FASTLZCNT-NEXT:    xorl $31, %eax
632; X64-FASTLZCNT-NEXT:    retq
633;
634; X86-FASTLZCNT-LABEL: ctlz_bsr_zero_test:
635; X86-FASTLZCNT:       # %bb.0:
636; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
637; X86-FASTLZCNT-NEXT:    xorl $31, %eax
638; X86-FASTLZCNT-NEXT:    retl
639  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
640  %bsr = xor i32 %ctlz, 31
641  ret i32 %bsr
642}
643
644define i8 @ctlz_i8_knownbits(i8 %x)  {
645; X86-LABEL: ctlz_i8_knownbits:
646; X86:       # %bb.0:
647; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
648; X86-NEXT:    orb $64, %al
649; X86-NEXT:    movzbl %al, %eax
650; X86-NEXT:    bsrl %eax, %eax
651; X86-NEXT:    xorl $7, %eax
652; X86-NEXT:    # kill: def $al killed $al killed $eax
653; X86-NEXT:    retl
654;
655; X64-LABEL: ctlz_i8_knownbits:
656; X64:       # %bb.0:
657; X64-NEXT:    orb $64, %dil
658; X64-NEXT:    movzbl %dil, %eax
659; X64-NEXT:    bsrl %eax, %eax
660; X64-NEXT:    xorl $7, %eax
661; X64-NEXT:    # kill: def $al killed $al killed $eax
662; X64-NEXT:    retq
663;
664; X86-CLZ-LABEL: ctlz_i8_knownbits:
665; X86-CLZ:       # %bb.0:
666; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
667; X86-CLZ-NEXT:    orb $64, %al
668; X86-CLZ-NEXT:    movzbl %al, %eax
669; X86-CLZ-NEXT:    shll $24, %eax
670; X86-CLZ-NEXT:    lzcntl %eax, %eax
671; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
672; X86-CLZ-NEXT:    retl
673;
674; X64-CLZ-LABEL: ctlz_i8_knownbits:
675; X64-CLZ:       # %bb.0:
676; X64-CLZ-NEXT:    orb $64, %dil
677; X64-CLZ-NEXT:    movzbl %dil, %eax
678; X64-CLZ-NEXT:    shll $24, %eax
679; X64-CLZ-NEXT:    lzcntl %eax, %eax
680; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
681; X64-CLZ-NEXT:    retq
682;
683; X64-FASTLZCNT-LABEL: ctlz_i8_knownbits:
684; X64-FASTLZCNT:       # %bb.0:
685; X64-FASTLZCNT-NEXT:    orb $64, %dil
686; X64-FASTLZCNT-NEXT:    movzbl %dil, %eax
687; X64-FASTLZCNT-NEXT:    shll $24, %eax
688; X64-FASTLZCNT-NEXT:    lzcntl %eax, %eax
689; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
690; X64-FASTLZCNT-NEXT:    retq
691;
692; X86-FASTLZCNT-LABEL: ctlz_i8_knownbits:
693; X86-FASTLZCNT:       # %bb.0:
694; X86-FASTLZCNT-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
695; X86-FASTLZCNT-NEXT:    orb $64, %al
696; X86-FASTLZCNT-NEXT:    movzbl %al, %eax
697; X86-FASTLZCNT-NEXT:    shll $24, %eax
698; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
699; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
700; X86-FASTLZCNT-NEXT:    retl
701
702  %x2 = or i8 %x, 64
703  %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
704  %tmp2 = and i8 %tmp, 1
705  ret i8 %tmp2
706}
707
708; Make sure we can detect that the input is non-zero and avoid cmov after BSR
709; This is relevant for 32-bit mode without lzcnt
710define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) {
711; X86-NOCMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
712; X86-NOCMOV:       # %bb.0:
713; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
714; X86-NOCMOV-NEXT:    testl %eax, %eax
715; X86-NOCMOV-NEXT:    jne .LBB12_1
716; X86-NOCMOV-NEXT:  # %bb.2:
717; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
718; X86-NOCMOV-NEXT:    orl $1, %eax
719; X86-NOCMOV-NEXT:    bsrl %eax, %eax
720; X86-NOCMOV-NEXT:    xorl $31, %eax
721; X86-NOCMOV-NEXT:    orl $32, %eax
722; X86-NOCMOV-NEXT:    xorl %edx, %edx
723; X86-NOCMOV-NEXT:    retl
724; X86-NOCMOV-NEXT:  .LBB12_1:
725; X86-NOCMOV-NEXT:    bsrl %eax, %eax
726; X86-NOCMOV-NEXT:    xorl $31, %eax
727; X86-NOCMOV-NEXT:    xorl %edx, %edx
728; X86-NOCMOV-NEXT:    retl
729;
730; X86-CMOV-LABEL: ctlz_i64_zero_test_knownneverzero:
731; X86-CMOV:       # %bb.0:
732; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
733; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
734; X86-CMOV-NEXT:    orl $1, %eax
735; X86-CMOV-NEXT:    bsrl %ecx, %edx
736; X86-CMOV-NEXT:    xorl $31, %edx
737; X86-CMOV-NEXT:    bsrl %eax, %eax
738; X86-CMOV-NEXT:    xorl $31, %eax
739; X86-CMOV-NEXT:    orl $32, %eax
740; X86-CMOV-NEXT:    testl %ecx, %ecx
741; X86-CMOV-NEXT:    cmovnel %edx, %eax
742; X86-CMOV-NEXT:    xorl %edx, %edx
743; X86-CMOV-NEXT:    retl
744;
745; X64-LABEL: ctlz_i64_zero_test_knownneverzero:
746; X64:       # %bb.0:
747; X64-NEXT:    orq $1, %rdi
748; X64-NEXT:    bsrq %rdi, %rax
749; X64-NEXT:    xorq $63, %rax
750; X64-NEXT:    retq
751;
752; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
753; X86-CLZ:       # %bb.0:
754; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
755; X86-CLZ-NEXT:    testl %eax, %eax
756; X86-CLZ-NEXT:    jne .LBB12_1
757; X86-CLZ-NEXT:  # %bb.2:
758; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
759; X86-CLZ-NEXT:    orl $1, %eax
760; X86-CLZ-NEXT:    lzcntl %eax, %eax
761; X86-CLZ-NEXT:    orl $32, %eax
762; X86-CLZ-NEXT:    xorl %edx, %edx
763; X86-CLZ-NEXT:    retl
764; X86-CLZ-NEXT:  .LBB12_1:
765; X86-CLZ-NEXT:    lzcntl %eax, %eax
766; X86-CLZ-NEXT:    xorl %edx, %edx
767; X86-CLZ-NEXT:    retl
768;
769; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero:
770; X64-CLZ:       # %bb.0:
771; X64-CLZ-NEXT:    orq $1, %rdi
772; X64-CLZ-NEXT:    lzcntq %rdi, %rax
773; X64-CLZ-NEXT:    retq
774;
775; X64-FASTLZCNT-LABEL: ctlz_i64_zero_test_knownneverzero:
776; X64-FASTLZCNT:       # %bb.0:
777; X64-FASTLZCNT-NEXT:    orq $1, %rdi
778; X64-FASTLZCNT-NEXT:    lzcntq %rdi, %rax
779; X64-FASTLZCNT-NEXT:    retq
780;
781; X86-FASTLZCNT-LABEL: ctlz_i64_zero_test_knownneverzero:
782; X86-FASTLZCNT:       # %bb.0:
783; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
784; X86-FASTLZCNT-NEXT:    testl %eax, %eax
785; X86-FASTLZCNT-NEXT:    jne .LBB12_1
786; X86-FASTLZCNT-NEXT:  # %bb.2:
787; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
788; X86-FASTLZCNT-NEXT:    orl $1, %eax
789; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
790; X86-FASTLZCNT-NEXT:    orl $32, %eax
791; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
792; X86-FASTLZCNT-NEXT:    retl
793; X86-FASTLZCNT-NEXT:  .LBB12_1:
794; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
795; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
796; X86-FASTLZCNT-NEXT:    retl
797  %o = or i64 %n, 1
798  %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false)
799  ret i64 %tmp1
800}
801
802; Ensure we fold away the XOR(TRUNC(XOR(BSR(X),31)),31).
803define i8 @PR47603_trunc(i32 %0) {
804; X86-LABEL: PR47603_trunc:
805; X86:       # %bb.0:
806; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
807; X86-NEXT:    # kill: def $al killed $al killed $eax
808; X86-NEXT:    retl
809;
810; X64-LABEL: PR47603_trunc:
811; X64:       # %bb.0:
812; X64-NEXT:    bsrl %edi, %eax
813; X64-NEXT:    # kill: def $al killed $al killed $eax
814; X64-NEXT:    retq
815;
816; X86-CLZ-LABEL: PR47603_trunc:
817; X86-CLZ:       # %bb.0:
818; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
819; X86-CLZ-NEXT:    xorb $31, %al
820; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
821; X86-CLZ-NEXT:    retl
822;
823; X64-CLZ-LABEL: PR47603_trunc:
824; X64-CLZ:       # %bb.0:
825; X64-CLZ-NEXT:    lzcntl %edi, %eax
826; X64-CLZ-NEXT:    xorb $31, %al
827; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
828; X64-CLZ-NEXT:    retq
829;
830; X64-FASTLZCNT-LABEL: PR47603_trunc:
831; X64-FASTLZCNT:       # %bb.0:
832; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
833; X64-FASTLZCNT-NEXT:    xorb $31, %al
834; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
835; X64-FASTLZCNT-NEXT:    retq
836;
837; X86-FASTLZCNT-LABEL: PR47603_trunc:
838; X86-FASTLZCNT:       # %bb.0:
839; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
840; X86-FASTLZCNT-NEXT:    xorb $31, %al
841; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
842; X86-FASTLZCNT-NEXT:    retl
843  %2 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
844  %3 = xor i32 %2, 31
845  %4 = trunc i32 %3 to i8
846  ret i8 %4
847}
848
849; Ensure we fold away the XOR(ZEXT(XOR(BSR(X),31)),31).
850define i32 @PR47603_zext(i32 %a0, ptr %a1) {
851; X86-LABEL: PR47603_zext:
852; X86:       # %bb.0:
853; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
854; X86-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
855; X86-NEXT:    movsbl (%eax,%ecx), %eax
856; X86-NEXT:    retl
857;
858; X64-LABEL: PR47603_zext:
859; X64:       # %bb.0:
860; X64-NEXT:    bsrl %edi, %eax
861; X64-NEXT:    movsbl (%rsi,%rax), %eax
862; X64-NEXT:    retq
863;
864; X86-CLZ-LABEL: PR47603_zext:
865; X86-CLZ:       # %bb.0:
866; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
867; X86-CLZ-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
868; X86-CLZ-NEXT:    movsbl (%eax,%ecx), %eax
869; X86-CLZ-NEXT:    retl
870;
871; X64-CLZ-LABEL: PR47603_zext:
872; X64-CLZ:       # %bb.0:
873; X64-CLZ-NEXT:    lzcntl %edi, %eax
874; X64-CLZ-NEXT:    xorq $31, %rax
875; X64-CLZ-NEXT:    movsbl (%rsi,%rax), %eax
876; X64-CLZ-NEXT:    retq
877;
878; X64-FASTLZCNT-LABEL: PR47603_zext:
879; X64-FASTLZCNT:       # %bb.0:
880; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
881; X64-FASTLZCNT-NEXT:    xorq $31, %rax
882; X64-FASTLZCNT-NEXT:    movsbl (%rsi,%rax), %eax
883; X64-FASTLZCNT-NEXT:    retq
884;
885; X86-FASTLZCNT-LABEL: PR47603_zext:
886; X86-FASTLZCNT:       # %bb.0:
887; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
888; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %ecx
889; X86-FASTLZCNT-NEXT:    xorl $31, %ecx
890; X86-FASTLZCNT-NEXT:    movsbl (%eax,%ecx), %eax
891; X86-FASTLZCNT-NEXT:    retl
892  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %a0, i1 true)
893  %xor = xor i32 %ctlz, 31
894  %zext = zext i32 %xor to i64
895  %gep = getelementptr inbounds [32 x i8], ptr %a1, i64 0, i64 %zext
896  %load = load i8, ptr %gep, align 1
897  %sext = sext i8 %load to i32
898  ret i32 %sext
899}
900
901define i8 @ctlz_xor7_i8_true(i8 %x) {
902; X86-LABEL: ctlz_xor7_i8_true:
903; X86:       # %bb.0:
904; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
905; X86-NEXT:    bsrl %eax, %eax
906; X86-NEXT:    # kill: def $al killed $al killed $eax
907; X86-NEXT:    retl
908;
909; X64-LABEL: ctlz_xor7_i8_true:
910; X64:       # %bb.0:
911; X64-NEXT:    movzbl %dil, %eax
912; X64-NEXT:    bsrl %eax, %eax
913; X64-NEXT:    # kill: def $al killed $al killed $eax
914; X64-NEXT:    retq
915;
916; X86-CLZ-LABEL: ctlz_xor7_i8_true:
917; X86-CLZ:       # %bb.0:
918; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
919; X86-CLZ-NEXT:    bsrl %eax, %eax
920; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
921; X86-CLZ-NEXT:    retl
922;
923; X64-CLZ-LABEL: ctlz_xor7_i8_true:
924; X64-CLZ:       # %bb.0:
925; X64-CLZ-NEXT:    movzbl %dil, %eax
926; X64-CLZ-NEXT:    bsrl %eax, %eax
927; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
928; X64-CLZ-NEXT:    retq
929;
930; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_true:
931; X64-FASTLZCNT:       # %bb.0:
932; X64-FASTLZCNT-NEXT:    shll $24, %edi
933; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
934; X64-FASTLZCNT-NEXT:    xorb $7, %al
935; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
936; X64-FASTLZCNT-NEXT:    retq
937;
938; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_true:
939; X86-FASTLZCNT:       # %bb.0:
940; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
941; X86-FASTLZCNT-NEXT:    shll $24, %eax
942; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
943; X86-FASTLZCNT-NEXT:    xorb $7, %al
944; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
945; X86-FASTLZCNT-NEXT:    retl
946    %clz = call i8 @llvm.ctlz.i8(i8 %x, i1 true)
947    %res = xor i8 %clz, 7
948    ret i8 %res
949}
950
951define i8 @ctlz_xor7_i8_false(i8 %x) {
952; X86-NOCMOV-LABEL: ctlz_xor7_i8_false:
953; X86-NOCMOV:       # %bb.0:
954; X86-NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
955; X86-NOCMOV-NEXT:    testb %al, %al
956; X86-NOCMOV-NEXT:    je .LBB16_1
957; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
958; X86-NOCMOV-NEXT:    movzbl %al, %eax
959; X86-NOCMOV-NEXT:    bsrl %eax, %eax
960; X86-NOCMOV-NEXT:    xorl $7, %eax
961; X86-NOCMOV-NEXT:    xorb $7, %al
962; X86-NOCMOV-NEXT:    # kill: def $al killed $al killed $eax
963; X86-NOCMOV-NEXT:    retl
964; X86-NOCMOV-NEXT:  .LBB16_1:
965; X86-NOCMOV-NEXT:    movb $8, %al
966; X86-NOCMOV-NEXT:    xorb $7, %al
967; X86-NOCMOV-NEXT:    # kill: def $al killed $al killed $eax
968; X86-NOCMOV-NEXT:    retl
969;
970; X86-CMOV-LABEL: ctlz_xor7_i8_false:
971; X86-CMOV:       # %bb.0:
972; X86-CMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
973; X86-CMOV-NEXT:    bsrl %eax, %ecx
974; X86-CMOV-NEXT:    movl $15, %eax
975; X86-CMOV-NEXT:    cmovnel %ecx, %eax
976; X86-CMOV-NEXT:    # kill: def $al killed $al killed $eax
977; X86-CMOV-NEXT:    retl
978;
979; X64-LABEL: ctlz_xor7_i8_false:
980; X64:       # %bb.0:
981; X64-NEXT:    movzbl %dil, %ecx
982; X64-NEXT:    movl $15, %eax
983; X64-NEXT:    bsrl %ecx, %eax
984; X64-NEXT:    # kill: def $al killed $al killed $eax
985; X64-NEXT:    retq
986;
987; X86-CLZ-LABEL: ctlz_xor7_i8_false:
988; X86-CLZ:       # %bb.0:
989; X86-CLZ-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
990; X86-CLZ-NEXT:    lzcntl %eax, %eax
991; X86-CLZ-NEXT:    addl $-24, %eax
992; X86-CLZ-NEXT:    xorb $7, %al
993; X86-CLZ-NEXT:    # kill: def $al killed $al killed $eax
994; X86-CLZ-NEXT:    retl
995;
996; X64-CLZ-LABEL: ctlz_xor7_i8_false:
997; X64-CLZ:       # %bb.0:
998; X64-CLZ-NEXT:    movzbl %dil, %eax
999; X64-CLZ-NEXT:    lzcntl %eax, %eax
1000; X64-CLZ-NEXT:    addl $-24, %eax
1001; X64-CLZ-NEXT:    xorb $7, %al
1002; X64-CLZ-NEXT:    # kill: def $al killed $al killed $eax
1003; X64-CLZ-NEXT:    retq
1004;
1005; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_false:
1006; X64-FASTLZCNT:       # %bb.0:
1007; X64-FASTLZCNT-NEXT:    movzbl %dil, %eax
1008; X64-FASTLZCNT-NEXT:    lzcntl %eax, %eax
1009; X64-FASTLZCNT-NEXT:    addl $-24, %eax
1010; X64-FASTLZCNT-NEXT:    xorb $7, %al
1011; X64-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
1012; X64-FASTLZCNT-NEXT:    retq
1013;
1014; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_false:
1015; X86-FASTLZCNT:       # %bb.0:
1016; X86-FASTLZCNT-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1017; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
1018; X86-FASTLZCNT-NEXT:    addl $-24, %eax
1019; X86-FASTLZCNT-NEXT:    xorb $7, %al
1020; X86-FASTLZCNT-NEXT:    # kill: def $al killed $al killed $eax
1021; X86-FASTLZCNT-NEXT:    retl
1022    %clz = call i8 @llvm.ctlz.i8(i8 %x, i1 false)
1023    %res = xor i8 %clz, 7
1024    ret i8 %res
1025}
1026
1027define i16 @ctlz_xor15_i16_true(i16 %x) {
1028; X86-LABEL: ctlz_xor15_i16_true:
1029; X86:       # %bb.0:
1030; X86-NEXT:    bsrw {{[0-9]+}}(%esp), %ax
1031; X86-NEXT:    retl
1032;
1033; X64-LABEL: ctlz_xor15_i16_true:
1034; X64:       # %bb.0:
1035; X64-NEXT:    bsrw %di, %ax
1036; X64-NEXT:    retq
1037;
1038; X86-CLZ-LABEL: ctlz_xor15_i16_true:
1039; X86-CLZ:       # %bb.0:
1040; X86-CLZ-NEXT:    bsrw {{[0-9]+}}(%esp), %ax
1041; X86-CLZ-NEXT:    retl
1042;
1043; X64-CLZ-LABEL: ctlz_xor15_i16_true:
1044; X64-CLZ:       # %bb.0:
1045; X64-CLZ-NEXT:    bsrw %di, %ax
1046; X64-CLZ-NEXT:    retq
1047;
1048; X64-FASTLZCNT-LABEL: ctlz_xor15_i16_true:
1049; X64-FASTLZCNT:       # %bb.0:
1050; X64-FASTLZCNT-NEXT:    lzcntw %di, %ax
1051; X64-FASTLZCNT-NEXT:    xorl $15, %eax
1052; X64-FASTLZCNT-NEXT:    # kill: def $ax killed $ax killed $eax
1053; X64-FASTLZCNT-NEXT:    retq
1054;
1055; X86-FASTLZCNT-LABEL: ctlz_xor15_i16_true:
1056; X86-FASTLZCNT:       # %bb.0:
1057; X86-FASTLZCNT-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
1058; X86-FASTLZCNT-NEXT:    xorl $15, %eax
1059; X86-FASTLZCNT-NEXT:    # kill: def $ax killed $ax killed $eax
1060; X86-FASTLZCNT-NEXT:    retl
1061    %clz = call i16 @llvm.ctlz.i16(i16 %x, i1 true)
1062    %res = xor i16 %clz, 15
1063    ret i16 %res
1064}
1065
1066define i32 @ctlz_xor31_i32_false(i32 %x) {
1067; X86-NOCMOV-LABEL: ctlz_xor31_i32_false:
1068; X86-NOCMOV:       # %bb.0:
1069; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
1070; X86-NOCMOV-NEXT:    testl %eax, %eax
1071; X86-NOCMOV-NEXT:    je .LBB18_1
1072; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
1073; X86-NOCMOV-NEXT:    bsrl %eax, %eax
1074; X86-NOCMOV-NEXT:    xorl $31, %eax
1075; X86-NOCMOV-NEXT:    xorl $31, %eax
1076; X86-NOCMOV-NEXT:    retl
1077; X86-NOCMOV-NEXT:  .LBB18_1:
1078; X86-NOCMOV-NEXT:    movl $32, %eax
1079; X86-NOCMOV-NEXT:    xorl $31, %eax
1080; X86-NOCMOV-NEXT:    retl
1081;
1082; X86-CMOV-LABEL: ctlz_xor31_i32_false:
1083; X86-CMOV:       # %bb.0:
1084; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
1085; X86-CMOV-NEXT:    movl $63, %eax
1086; X86-CMOV-NEXT:    cmovnel %ecx, %eax
1087; X86-CMOV-NEXT:    retl
1088;
1089; X64-LABEL: ctlz_xor31_i32_false:
1090; X64:       # %bb.0:
1091; X64-NEXT:    movl $63, %eax
1092; X64-NEXT:    bsrl %edi, %eax
1093; X64-NEXT:    retq
1094;
1095; X86-CLZ-LABEL: ctlz_xor31_i32_false:
1096; X86-CLZ:       # %bb.0:
1097; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1098; X86-CLZ-NEXT:    xorl $31, %eax
1099; X86-CLZ-NEXT:    retl
1100;
1101; X64-CLZ-LABEL: ctlz_xor31_i32_false:
1102; X64-CLZ:       # %bb.0:
1103; X64-CLZ-NEXT:    lzcntl %edi, %eax
1104; X64-CLZ-NEXT:    xorl $31, %eax
1105; X64-CLZ-NEXT:    retq
1106;
1107; X64-FASTLZCNT-LABEL: ctlz_xor31_i32_false:
1108; X64-FASTLZCNT:       # %bb.0:
1109; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
1110; X64-FASTLZCNT-NEXT:    xorl $31, %eax
1111; X64-FASTLZCNT-NEXT:    retq
1112;
1113; X86-FASTLZCNT-LABEL: ctlz_xor31_i32_false:
1114; X86-FASTLZCNT:       # %bb.0:
1115; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1116; X86-FASTLZCNT-NEXT:    xorl $31, %eax
1117; X86-FASTLZCNT-NEXT:    retl
1118    %clz = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
1119    %res = xor i32 %clz, 31
1120    ret i32 %res
1121}
1122
1123define i64 @ctlz_xor63_i64_true(i64 %x) {
1124; X86-NOCMOV-LABEL: ctlz_xor63_i64_true:
1125; X86-NOCMOV:       # %bb.0:
1126; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
1127; X86-NOCMOV-NEXT:    testl %eax, %eax
1128; X86-NOCMOV-NEXT:    jne .LBB19_1
1129; X86-NOCMOV-NEXT:  # %bb.2:
1130; X86-NOCMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
1131; X86-NOCMOV-NEXT:    xorl $31, %eax
1132; X86-NOCMOV-NEXT:    orl $32, %eax
1133; X86-NOCMOV-NEXT:    jmp .LBB19_3
1134; X86-NOCMOV-NEXT:  .LBB19_1:
1135; X86-NOCMOV-NEXT:    bsrl %eax, %eax
1136; X86-NOCMOV-NEXT:    xorl $31, %eax
1137; X86-NOCMOV-NEXT:  .LBB19_3:
1138; X86-NOCMOV-NEXT:    xorl $63, %eax
1139; X86-NOCMOV-NEXT:    xorl %edx, %edx
1140; X86-NOCMOV-NEXT:    retl
1141;
1142; X86-CMOV-LABEL: ctlz_xor63_i64_true:
1143; X86-CMOV:       # %bb.0:
1144; X86-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1145; X86-CMOV-NEXT:    bsrl %ecx, %edx
1146; X86-CMOV-NEXT:    xorl $31, %edx
1147; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %eax
1148; X86-CMOV-NEXT:    xorl $31, %eax
1149; X86-CMOV-NEXT:    orl $32, %eax
1150; X86-CMOV-NEXT:    testl %ecx, %ecx
1151; X86-CMOV-NEXT:    cmovnel %edx, %eax
1152; X86-CMOV-NEXT:    xorl $63, %eax
1153; X86-CMOV-NEXT:    xorl %edx, %edx
1154; X86-CMOV-NEXT:    retl
1155;
1156; X64-LABEL: ctlz_xor63_i64_true:
1157; X64:       # %bb.0:
1158; X64-NEXT:    bsrq %rdi, %rax
1159; X64-NEXT:    retq
1160;
1161; X86-CLZ-LABEL: ctlz_xor63_i64_true:
1162; X86-CLZ:       # %bb.0:
1163; X86-CLZ-NEXT:    movl {{[0-9]+}}(%esp), %eax
1164; X86-CLZ-NEXT:    testl %eax, %eax
1165; X86-CLZ-NEXT:    jne .LBB19_1
1166; X86-CLZ-NEXT:  # %bb.2:
1167; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1168; X86-CLZ-NEXT:    addl $32, %eax
1169; X86-CLZ-NEXT:    jmp .LBB19_3
1170; X86-CLZ-NEXT:  .LBB19_1:
1171; X86-CLZ-NEXT:    lzcntl %eax, %eax
1172; X86-CLZ-NEXT:  .LBB19_3:
1173; X86-CLZ-NEXT:    xorl $63, %eax
1174; X86-CLZ-NEXT:    xorl %edx, %edx
1175; X86-CLZ-NEXT:    retl
1176;
1177; X64-CLZ-LABEL: ctlz_xor63_i64_true:
1178; X64-CLZ:       # %bb.0:
1179; X64-CLZ-NEXT:    bsrq %rdi, %rax
1180; X64-CLZ-NEXT:    retq
1181;
1182; X64-FASTLZCNT-LABEL: ctlz_xor63_i64_true:
1183; X64-FASTLZCNT:       # %bb.0:
1184; X64-FASTLZCNT-NEXT:    lzcntq %rdi, %rax
1185; X64-FASTLZCNT-NEXT:    xorq $63, %rax
1186; X64-FASTLZCNT-NEXT:    retq
1187;
1188; X86-FASTLZCNT-LABEL: ctlz_xor63_i64_true:
1189; X86-FASTLZCNT:       # %bb.0:
1190; X86-FASTLZCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
1191; X86-FASTLZCNT-NEXT:    testl %eax, %eax
1192; X86-FASTLZCNT-NEXT:    jne .LBB19_1
1193; X86-FASTLZCNT-NEXT:  # %bb.2:
1194; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1195; X86-FASTLZCNT-NEXT:    addl $32, %eax
1196; X86-FASTLZCNT-NEXT:    jmp .LBB19_3
1197; X86-FASTLZCNT-NEXT:  .LBB19_1:
1198; X86-FASTLZCNT-NEXT:    lzcntl %eax, %eax
1199; X86-FASTLZCNT-NEXT:  .LBB19_3:
1200; X86-FASTLZCNT-NEXT:    xorl $63, %eax
1201; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
1202; X86-FASTLZCNT-NEXT:    retl
1203    %clz = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
1204    %res = xor i64 %clz, 63
1205    ret i64 %res
1206}
1207
1208define i64 @ctlz_i32_sext(i32 %x) {
1209; X86-NOCMOV-LABEL: ctlz_i32_sext:
1210; X86-NOCMOV:       # %bb.0:
1211; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
1212; X86-NOCMOV-NEXT:    testl %eax, %eax
1213; X86-NOCMOV-NEXT:    je .LBB20_1
1214; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
1215; X86-NOCMOV-NEXT:    bsrl %eax, %eax
1216; X86-NOCMOV-NEXT:    xorl $31, %eax
1217; X86-NOCMOV-NEXT:    jmp .LBB20_3
1218; X86-NOCMOV-NEXT:  .LBB20_1:
1219; X86-NOCMOV-NEXT:    movl $32, %eax
1220; X86-NOCMOV-NEXT:  .LBB20_3: # %cond.end
1221; X86-NOCMOV-NEXT:    xorl $31, %eax
1222; X86-NOCMOV-NEXT:    xorl %edx, %edx
1223; X86-NOCMOV-NEXT:    retl
1224;
1225; X86-CMOV-LABEL: ctlz_i32_sext:
1226; X86-CMOV:       # %bb.0:
1227; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
1228; X86-CMOV-NEXT:    movl $63, %eax
1229; X86-CMOV-NEXT:    cmovnel %ecx, %eax
1230; X86-CMOV-NEXT:    xorl %edx, %edx
1231; X86-CMOV-NEXT:    retl
1232;
1233; X64-LABEL: ctlz_i32_sext:
1234; X64:       # %bb.0:
1235; X64-NEXT:    movl $63, %eax
1236; X64-NEXT:    bsrl %edi, %eax
1237; X64-NEXT:    retq
1238;
1239; X86-CLZ-LABEL: ctlz_i32_sext:
1240; X86-CLZ:       # %bb.0:
1241; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1242; X86-CLZ-NEXT:    xorl $31, %eax
1243; X86-CLZ-NEXT:    xorl %edx, %edx
1244; X86-CLZ-NEXT:    retl
1245;
1246; X64-CLZ-LABEL: ctlz_i32_sext:
1247; X64-CLZ:       # %bb.0:
1248; X64-CLZ-NEXT:    lzcntl %edi, %eax
1249; X64-CLZ-NEXT:    xorl $31, %eax
1250; X64-CLZ-NEXT:    retq
1251;
1252; X64-FASTLZCNT-LABEL: ctlz_i32_sext:
1253; X64-FASTLZCNT:       # %bb.0:
1254; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
1255; X64-FASTLZCNT-NEXT:    xorl $31, %eax
1256; X64-FASTLZCNT-NEXT:    retq
1257;
1258; X86-FASTLZCNT-LABEL: ctlz_i32_sext:
1259; X86-FASTLZCNT:       # %bb.0:
1260; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1261; X86-FASTLZCNT-NEXT:    xorl $31, %eax
1262; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
1263; X86-FASTLZCNT-NEXT:    retl
1264  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 false)
1265  %xor = xor i32 %tmp, 31
1266  %ext = sext i32 %xor to i64
1267  ret i64 %ext
1268}
1269
1270define i64 @ctlz_i32_zext(i32 %x) {
1271; X86-NOCMOV-LABEL: ctlz_i32_zext:
1272; X86-NOCMOV:       # %bb.0:
1273; X86-NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
1274; X86-NOCMOV-NEXT:    testl %eax, %eax
1275; X86-NOCMOV-NEXT:    je .LBB21_1
1276; X86-NOCMOV-NEXT:  # %bb.2: # %cond.false
1277; X86-NOCMOV-NEXT:    bsrl %eax, %eax
1278; X86-NOCMOV-NEXT:    xorl $31, %eax
1279; X86-NOCMOV-NEXT:    jmp .LBB21_3
1280; X86-NOCMOV-NEXT:  .LBB21_1:
1281; X86-NOCMOV-NEXT:    movl $32, %eax
1282; X86-NOCMOV-NEXT:  .LBB21_3: # %cond.end
1283; X86-NOCMOV-NEXT:    xorl $31, %eax
1284; X86-NOCMOV-NEXT:    xorl %edx, %edx
1285; X86-NOCMOV-NEXT:    retl
1286;
1287; X86-CMOV-LABEL: ctlz_i32_zext:
1288; X86-CMOV:       # %bb.0:
1289; X86-CMOV-NEXT:    bsrl {{[0-9]+}}(%esp), %ecx
1290; X86-CMOV-NEXT:    movl $63, %eax
1291; X86-CMOV-NEXT:    cmovnel %ecx, %eax
1292; X86-CMOV-NEXT:    xorl %edx, %edx
1293; X86-CMOV-NEXT:    retl
1294;
1295; X64-LABEL: ctlz_i32_zext:
1296; X64:       # %bb.0:
1297; X64-NEXT:    movl $63, %eax
1298; X64-NEXT:    bsrl %edi, %eax
1299; X64-NEXT:    retq
1300;
1301; X86-CLZ-LABEL: ctlz_i32_zext:
1302; X86-CLZ:       # %bb.0:
1303; X86-CLZ-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1304; X86-CLZ-NEXT:    xorl $31, %eax
1305; X86-CLZ-NEXT:    xorl %edx, %edx
1306; X86-CLZ-NEXT:    retl
1307;
1308; X64-CLZ-LABEL: ctlz_i32_zext:
1309; X64-CLZ:       # %bb.0:
1310; X64-CLZ-NEXT:    lzcntl %edi, %eax
1311; X64-CLZ-NEXT:    xorl $31, %eax
1312; X64-CLZ-NEXT:    retq
1313;
1314; X64-FASTLZCNT-LABEL: ctlz_i32_zext:
1315; X64-FASTLZCNT:       # %bb.0:
1316; X64-FASTLZCNT-NEXT:    lzcntl %edi, %eax
1317; X64-FASTLZCNT-NEXT:    xorl $31, %eax
1318; X64-FASTLZCNT-NEXT:    retq
1319;
1320; X86-FASTLZCNT-LABEL: ctlz_i32_zext:
1321; X86-FASTLZCNT:       # %bb.0:
1322; X86-FASTLZCNT-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
1323; X86-FASTLZCNT-NEXT:    xorl $31, %eax
1324; X86-FASTLZCNT-NEXT:    xorl %edx, %edx
1325; X86-FASTLZCNT-NEXT:    retl
1326  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 false)
1327  %xor = xor i32 %tmp, 31
1328  %ext = zext i32 %xor to i64
1329  ret i64 %ext
1330}
1331