1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X64-FASTLZCNT 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X86-FASTLZCNT 9 10declare i8 @llvm.ctlz.i8(i8, i1) 11declare i16 @llvm.ctlz.i16(i16, i1) 12declare i32 @llvm.ctlz.i32(i32, i1) 13declare i64 @llvm.ctlz.i64(i64, i1) 14 15define i8 @ctlz_i8(i8 %x) { 16; X86-LABEL: ctlz_i8: 17; X86: # %bb.0: 18; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 19; X86-NEXT: bsrl %eax, %eax 20; X86-NEXT: xorl $7, %eax 21; X86-NEXT: # kill: def $al killed $al killed $eax 22; X86-NEXT: retl 23; 24; X64-LABEL: ctlz_i8: 25; X64: # %bb.0: 26; X64-NEXT: movzbl %dil, %eax 27; X64-NEXT: bsrl %eax, %eax 28; X64-NEXT: xorl $7, %eax 29; X64-NEXT: # kill: def $al killed $al killed $eax 30; X64-NEXT: retq 31; 32; X86-CLZ-LABEL: ctlz_i8: 33; X86-CLZ: # %bb.0: 34; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 35; X86-CLZ-NEXT: shll $24, %eax 36; X86-CLZ-NEXT: lzcntl %eax, %eax 37; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 38; X86-CLZ-NEXT: retl 39; 40; X64-CLZ-LABEL: ctlz_i8: 41; X64-CLZ: # %bb.0: 42; X64-CLZ-NEXT: shll $24, %edi 43; X64-CLZ-NEXT: lzcntl %edi, %eax 44; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 45; X64-CLZ-NEXT: retq 46; 47; X64-FASTLZCNT-LABEL: ctlz_i8: 48; X64-FASTLZCNT: # %bb.0: 49; X64-FASTLZCNT-NEXT: shll $24, %edi 50; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 51; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 52; X64-FASTLZCNT-NEXT: retq 53; 54; X86-FASTLZCNT-LABEL: ctlz_i8: 55; X86-FASTLZCNT: # %bb.0: 56; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 57; X86-FASTLZCNT-NEXT: shll $24, %eax 58; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 59; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 60; X86-FASTLZCNT-NEXT: retl 61 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true ) 62 ret i8 %tmp2 63} 64 65define i16 @ctlz_i16(i16 %x) { 66; X86-LABEL: ctlz_i16: 67; X86: # %bb.0: 68; X86-NEXT: bsrw {{[0-9]+}}(%esp), %ax 69; X86-NEXT: xorl $15, %eax 70; X86-NEXT: # kill: def $ax killed $ax killed $eax 71; X86-NEXT: retl 72; 73; X64-LABEL: ctlz_i16: 74; X64: # %bb.0: 75; X64-NEXT: bsrw %di, %ax 76; X64-NEXT: xorl $15, %eax 77; X64-NEXT: # kill: def $ax killed $ax killed $eax 78; X64-NEXT: retq 79; 80; X86-CLZ-LABEL: ctlz_i16: 81; X86-CLZ: # %bb.0: 82; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 83; X86-CLZ-NEXT: retl 84; 85; X64-CLZ-LABEL: ctlz_i16: 86; X64-CLZ: # %bb.0: 87; X64-CLZ-NEXT: lzcntw %di, %ax 88; X64-CLZ-NEXT: retq 89; 90; X64-FASTLZCNT-LABEL: ctlz_i16: 91; X64-FASTLZCNT: # %bb.0: 92; X64-FASTLZCNT-NEXT: lzcntw %di, %ax 93; X64-FASTLZCNT-NEXT: retq 94; 95; X86-FASTLZCNT-LABEL: ctlz_i16: 96; X86-FASTLZCNT: # %bb.0: 97; X86-FASTLZCNT-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 98; X86-FASTLZCNT-NEXT: retl 99 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true ) 100 ret i16 %tmp2 101} 102 103define i32 @ctlz_i32(i32 %x) { 104; X86-LABEL: ctlz_i32: 105; X86: # %bb.0: 106; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax 107; X86-NEXT: xorl $31, %eax 108; X86-NEXT: retl 109; 110; X64-LABEL: ctlz_i32: 111; X64: # %bb.0: 112; X64-NEXT: bsrl %edi, %eax 113; X64-NEXT: xorl $31, %eax 114; X64-NEXT: retq 115; 116; X86-CLZ-LABEL: ctlz_i32: 117; X86-CLZ: # %bb.0: 118; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 119; X86-CLZ-NEXT: retl 120; 121; X64-CLZ-LABEL: ctlz_i32: 122; X64-CLZ: # %bb.0: 123; X64-CLZ-NEXT: lzcntl %edi, %eax 124; X64-CLZ-NEXT: retq 125; 126; X64-FASTLZCNT-LABEL: ctlz_i32: 127; X64-FASTLZCNT: # %bb.0: 128; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 129; X64-FASTLZCNT-NEXT: retq 130; 131; X86-FASTLZCNT-LABEL: ctlz_i32: 132; X86-FASTLZCNT: # %bb.0: 133; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 134; X86-FASTLZCNT-NEXT: retl 135 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true ) 136 ret i32 %tmp 137} 138 139define i64 @ctlz_i64(i64 %x) { 140; X86-NOCMOV-LABEL: ctlz_i64: 141; X86-NOCMOV: # %bb.0: 142; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 143; X86-NOCMOV-NEXT: testl %eax, %eax 144; X86-NOCMOV-NEXT: jne .LBB3_1 145; X86-NOCMOV-NEXT: # %bb.2: 146; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 147; X86-NOCMOV-NEXT: xorl $31, %eax 148; X86-NOCMOV-NEXT: orl $32, %eax 149; X86-NOCMOV-NEXT: xorl %edx, %edx 150; X86-NOCMOV-NEXT: retl 151; X86-NOCMOV-NEXT: .LBB3_1: 152; X86-NOCMOV-NEXT: bsrl %eax, %eax 153; X86-NOCMOV-NEXT: xorl $31, %eax 154; X86-NOCMOV-NEXT: xorl %edx, %edx 155; X86-NOCMOV-NEXT: retl 156; 157; X86-CMOV-LABEL: ctlz_i64: 158; X86-CMOV: # %bb.0: 159; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 160; X86-CMOV-NEXT: bsrl %ecx, %edx 161; X86-CMOV-NEXT: xorl $31, %edx 162; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 163; X86-CMOV-NEXT: xorl $31, %eax 164; X86-CMOV-NEXT: orl $32, %eax 165; X86-CMOV-NEXT: testl %ecx, %ecx 166; X86-CMOV-NEXT: cmovnel %edx, %eax 167; X86-CMOV-NEXT: xorl %edx, %edx 168; X86-CMOV-NEXT: retl 169; 170; X64-LABEL: ctlz_i64: 171; X64: # %bb.0: 172; X64-NEXT: bsrq %rdi, %rax 173; X64-NEXT: xorq $63, %rax 174; X64-NEXT: retq 175; 176; X86-CLZ-LABEL: ctlz_i64: 177; X86-CLZ: # %bb.0: 178; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-CLZ-NEXT: testl %eax, %eax 180; X86-CLZ-NEXT: jne .LBB3_1 181; X86-CLZ-NEXT: # %bb.2: 182; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 183; X86-CLZ-NEXT: addl $32, %eax 184; X86-CLZ-NEXT: xorl %edx, %edx 185; X86-CLZ-NEXT: retl 186; X86-CLZ-NEXT: .LBB3_1: 187; X86-CLZ-NEXT: lzcntl %eax, %eax 188; X86-CLZ-NEXT: xorl %edx, %edx 189; X86-CLZ-NEXT: retl 190; 191; X64-CLZ-LABEL: ctlz_i64: 192; X64-CLZ: # %bb.0: 193; X64-CLZ-NEXT: lzcntq %rdi, %rax 194; X64-CLZ-NEXT: retq 195; 196; X64-FASTLZCNT-LABEL: ctlz_i64: 197; X64-FASTLZCNT: # %bb.0: 198; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax 199; X64-FASTLZCNT-NEXT: retq 200; 201; X86-FASTLZCNT-LABEL: ctlz_i64: 202; X86-FASTLZCNT: # %bb.0: 203; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-FASTLZCNT-NEXT: testl %eax, %eax 205; X86-FASTLZCNT-NEXT: jne .LBB3_1 206; X86-FASTLZCNT-NEXT: # %bb.2: 207; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 208; X86-FASTLZCNT-NEXT: addl $32, %eax 209; X86-FASTLZCNT-NEXT: xorl %edx, %edx 210; X86-FASTLZCNT-NEXT: retl 211; X86-FASTLZCNT-NEXT: .LBB3_1: 212; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 213; X86-FASTLZCNT-NEXT: xorl %edx, %edx 214; X86-FASTLZCNT-NEXT: retl 215 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true ) 216 ret i64 %tmp 217} 218 219; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 220define i8 @ctlz_i8_zero_test(i8 %n) { 221; X86-NOCMOV-LABEL: ctlz_i8_zero_test: 222; X86-NOCMOV: # %bb.0: 223; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax 224; X86-NOCMOV-NEXT: testb %al, %al 225; X86-NOCMOV-NEXT: je .LBB4_1 226; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 227; X86-NOCMOV-NEXT: movzbl %al, %eax 228; X86-NOCMOV-NEXT: bsrl %eax, %eax 229; X86-NOCMOV-NEXT: xorl $7, %eax 230; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax 231; X86-NOCMOV-NEXT: retl 232; X86-NOCMOV-NEXT: .LBB4_1: 233; X86-NOCMOV-NEXT: movb $8, %al 234; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax 235; X86-NOCMOV-NEXT: retl 236; 237; X86-CMOV-LABEL: ctlz_i8_zero_test: 238; X86-CMOV: # %bb.0: 239; X86-CMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax 240; X86-CMOV-NEXT: bsrl %eax, %ecx 241; X86-CMOV-NEXT: movl $15, %eax 242; X86-CMOV-NEXT: cmovnel %ecx, %eax 243; X86-CMOV-NEXT: xorl $7, %eax 244; X86-CMOV-NEXT: # kill: def $al killed $al killed $eax 245; X86-CMOV-NEXT: retl 246; 247; X64-LABEL: ctlz_i8_zero_test: 248; X64: # %bb.0: 249; X64-NEXT: movzbl %dil, %ecx 250; X64-NEXT: movl $15, %eax 251; X64-NEXT: bsrl %ecx, %eax 252; X64-NEXT: xorl $7, %eax 253; X64-NEXT: # kill: def $al killed $al killed $eax 254; X64-NEXT: retq 255; 256; X86-CLZ-LABEL: ctlz_i8_zero_test: 257; X86-CLZ: # %bb.0: 258; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 259; X86-CLZ-NEXT: lzcntl %eax, %eax 260; X86-CLZ-NEXT: addl $-24, %eax 261; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 262; X86-CLZ-NEXT: retl 263; 264; X64-CLZ-LABEL: ctlz_i8_zero_test: 265; X64-CLZ: # %bb.0: 266; X64-CLZ-NEXT: movzbl %dil, %eax 267; X64-CLZ-NEXT: lzcntl %eax, %eax 268; X64-CLZ-NEXT: addl $-24, %eax 269; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 270; X64-CLZ-NEXT: retq 271; 272; X64-FASTLZCNT-LABEL: ctlz_i8_zero_test: 273; X64-FASTLZCNT: # %bb.0: 274; X64-FASTLZCNT-NEXT: movzbl %dil, %eax 275; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax 276; X64-FASTLZCNT-NEXT: addl $-24, %eax 277; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 278; X64-FASTLZCNT-NEXT: retq 279; 280; X86-FASTLZCNT-LABEL: ctlz_i8_zero_test: 281; X86-FASTLZCNT: # %bb.0: 282; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax 283; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 284; X86-FASTLZCNT-NEXT: addl $-24, %eax 285; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 286; X86-FASTLZCNT-NEXT: retl 287 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false) 288 ret i8 %tmp1 289} 290 291; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 292define i16 @ctlz_i16_zero_test(i16 %n) { 293; X86-NOCMOV-LABEL: ctlz_i16_zero_test: 294; X86-NOCMOV: # %bb.0: 295; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax 296; X86-NOCMOV-NEXT: testw %ax, %ax 297; X86-NOCMOV-NEXT: je .LBB5_1 298; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 299; X86-NOCMOV-NEXT: bsrw %ax, %ax 300; X86-NOCMOV-NEXT: xorl $15, %eax 301; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax 302; X86-NOCMOV-NEXT: retl 303; X86-NOCMOV-NEXT: .LBB5_1: 304; X86-NOCMOV-NEXT: movw $16, %ax 305; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax 306; X86-NOCMOV-NEXT: retl 307; 308; X86-CMOV-LABEL: ctlz_i16_zero_test: 309; X86-CMOV: # %bb.0: 310; X86-CMOV-NEXT: bsrw {{[0-9]+}}(%esp), %cx 311; X86-CMOV-NEXT: movw $31, %ax 312; X86-CMOV-NEXT: cmovnew %cx, %ax 313; X86-CMOV-NEXT: xorl $15, %eax 314; X86-CMOV-NEXT: # kill: def $ax killed $ax killed $eax 315; X86-CMOV-NEXT: retl 316; 317; X64-LABEL: ctlz_i16_zero_test: 318; X64: # %bb.0: 319; X64-NEXT: movw $31, %ax 320; X64-NEXT: bsrw %di, %ax 321; X64-NEXT: xorl $15, %eax 322; X64-NEXT: # kill: def $ax killed $ax killed $eax 323; X64-NEXT: retq 324; 325; X86-CLZ-LABEL: ctlz_i16_zero_test: 326; X86-CLZ: # %bb.0: 327; X86-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 328; X86-CLZ-NEXT: retl 329; 330; X64-CLZ-LABEL: ctlz_i16_zero_test: 331; X64-CLZ: # %bb.0: 332; X64-CLZ-NEXT: lzcntw %di, %ax 333; X64-CLZ-NEXT: retq 334; 335; X64-FASTLZCNT-LABEL: ctlz_i16_zero_test: 336; X64-FASTLZCNT: # %bb.0: 337; X64-FASTLZCNT-NEXT: lzcntw %di, %ax 338; X64-FASTLZCNT-NEXT: retq 339; 340; X86-FASTLZCNT-LABEL: ctlz_i16_zero_test: 341; X86-FASTLZCNT: # %bb.0: 342; X86-FASTLZCNT-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 343; X86-FASTLZCNT-NEXT: retl 344 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false) 345 ret i16 %tmp1 346} 347 348; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 349define i32 @ctlz_i32_zero_test(i32 %n) { 350; X86-NOCMOV-LABEL: ctlz_i32_zero_test: 351; X86-NOCMOV: # %bb.0: 352; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 353; X86-NOCMOV-NEXT: testl %eax, %eax 354; X86-NOCMOV-NEXT: je .LBB6_1 355; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 356; X86-NOCMOV-NEXT: bsrl %eax, %eax 357; X86-NOCMOV-NEXT: xorl $31, %eax 358; X86-NOCMOV-NEXT: retl 359; X86-NOCMOV-NEXT: .LBB6_1: 360; X86-NOCMOV-NEXT: movl $32, %eax 361; X86-NOCMOV-NEXT: retl 362; 363; X86-CMOV-LABEL: ctlz_i32_zero_test: 364; X86-CMOV: # %bb.0: 365; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx 366; X86-CMOV-NEXT: movl $63, %eax 367; X86-CMOV-NEXT: cmovnel %ecx, %eax 368; X86-CMOV-NEXT: xorl $31, %eax 369; X86-CMOV-NEXT: retl 370; 371; X64-LABEL: ctlz_i32_zero_test: 372; X64: # %bb.0: 373; X64-NEXT: movl $63, %eax 374; X64-NEXT: bsrl %edi, %eax 375; X64-NEXT: xorl $31, %eax 376; X64-NEXT: retq 377; 378; X86-CLZ-LABEL: ctlz_i32_zero_test: 379; X86-CLZ: # %bb.0: 380; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 381; X86-CLZ-NEXT: retl 382; 383; X64-CLZ-LABEL: ctlz_i32_zero_test: 384; X64-CLZ: # %bb.0: 385; X64-CLZ-NEXT: lzcntl %edi, %eax 386; X64-CLZ-NEXT: retq 387; 388; X64-FASTLZCNT-LABEL: ctlz_i32_zero_test: 389; X64-FASTLZCNT: # %bb.0: 390; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 391; X64-FASTLZCNT-NEXT: retq 392; 393; X86-FASTLZCNT-LABEL: ctlz_i32_zero_test: 394; X86-FASTLZCNT: # %bb.0: 395; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 396; X86-FASTLZCNT-NEXT: retl 397 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 398 ret i32 %tmp1 399} 400 401; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 402define i64 @ctlz_i64_zero_test(i64 %n) { 403; X86-NOCMOV-LABEL: ctlz_i64_zero_test: 404; X86-NOCMOV: # %bb.0: 405; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 406; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %edx 407; X86-NOCMOV-NEXT: movl $63, %eax 408; X86-NOCMOV-NEXT: je .LBB7_2 409; X86-NOCMOV-NEXT: # %bb.1: 410; X86-NOCMOV-NEXT: movl %edx, %eax 411; X86-NOCMOV-NEXT: .LBB7_2: 412; X86-NOCMOV-NEXT: testl %ecx, %ecx 413; X86-NOCMOV-NEXT: jne .LBB7_3 414; X86-NOCMOV-NEXT: # %bb.4: 415; X86-NOCMOV-NEXT: xorl $31, %eax 416; X86-NOCMOV-NEXT: addl $32, %eax 417; X86-NOCMOV-NEXT: xorl %edx, %edx 418; X86-NOCMOV-NEXT: retl 419; X86-NOCMOV-NEXT: .LBB7_3: 420; X86-NOCMOV-NEXT: bsrl %ecx, %eax 421; X86-NOCMOV-NEXT: xorl $31, %eax 422; X86-NOCMOV-NEXT: xorl %edx, %edx 423; X86-NOCMOV-NEXT: retl 424; 425; X86-CMOV-LABEL: ctlz_i64_zero_test: 426; X86-CMOV: # %bb.0: 427; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 428; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 429; X86-CMOV-NEXT: movl $63, %edx 430; X86-CMOV-NEXT: cmovnel %eax, %edx 431; X86-CMOV-NEXT: xorl $31, %edx 432; X86-CMOV-NEXT: addl $32, %edx 433; X86-CMOV-NEXT: bsrl %ecx, %eax 434; X86-CMOV-NEXT: xorl $31, %eax 435; X86-CMOV-NEXT: testl %ecx, %ecx 436; X86-CMOV-NEXT: cmovel %edx, %eax 437; X86-CMOV-NEXT: xorl %edx, %edx 438; X86-CMOV-NEXT: retl 439; 440; X64-LABEL: ctlz_i64_zero_test: 441; X64: # %bb.0: 442; X64-NEXT: movl $127, %eax 443; X64-NEXT: bsrq %rdi, %rax 444; X64-NEXT: xorq $63, %rax 445; X64-NEXT: retq 446; 447; X86-CLZ-LABEL: ctlz_i64_zero_test: 448; X86-CLZ: # %bb.0: 449; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 450; X86-CLZ-NEXT: testl %eax, %eax 451; X86-CLZ-NEXT: jne .LBB7_1 452; X86-CLZ-NEXT: # %bb.2: 453; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 454; X86-CLZ-NEXT: addl $32, %eax 455; X86-CLZ-NEXT: xorl %edx, %edx 456; X86-CLZ-NEXT: retl 457; X86-CLZ-NEXT: .LBB7_1: 458; X86-CLZ-NEXT: lzcntl %eax, %eax 459; X86-CLZ-NEXT: xorl %edx, %edx 460; X86-CLZ-NEXT: retl 461; 462; X64-CLZ-LABEL: ctlz_i64_zero_test: 463; X64-CLZ: # %bb.0: 464; X64-CLZ-NEXT: lzcntq %rdi, %rax 465; X64-CLZ-NEXT: retq 466; 467; X64-FASTLZCNT-LABEL: ctlz_i64_zero_test: 468; X64-FASTLZCNT: # %bb.0: 469; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax 470; X64-FASTLZCNT-NEXT: retq 471; 472; X86-FASTLZCNT-LABEL: ctlz_i64_zero_test: 473; X86-FASTLZCNT: # %bb.0: 474; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 475; X86-FASTLZCNT-NEXT: testl %eax, %eax 476; X86-FASTLZCNT-NEXT: jne .LBB7_1 477; X86-FASTLZCNT-NEXT: # %bb.2: 478; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 479; X86-FASTLZCNT-NEXT: addl $32, %eax 480; X86-FASTLZCNT-NEXT: xorl %edx, %edx 481; X86-FASTLZCNT-NEXT: retl 482; X86-FASTLZCNT-NEXT: .LBB7_1: 483; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 484; X86-FASTLZCNT-NEXT: xorl %edx, %edx 485; X86-FASTLZCNT-NEXT: retl 486 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false) 487 ret i64 %tmp1 488} 489 490; Don't generate the cmovne when the source is known non-zero (and bsr would 491; not set ZF). 492; rdar://9490949 493define i32 @ctlz_i32_fold_cmov(i32 %n) { 494; X86-LABEL: ctlz_i32_fold_cmov: 495; X86: # %bb.0: 496; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 497; X86-NEXT: orl $1, %eax 498; X86-NEXT: bsrl %eax, %eax 499; X86-NEXT: xorl $31, %eax 500; X86-NEXT: retl 501; 502; X64-LABEL: ctlz_i32_fold_cmov: 503; X64: # %bb.0: 504; X64-NEXT: orl $1, %edi 505; X64-NEXT: bsrl %edi, %eax 506; X64-NEXT: xorl $31, %eax 507; X64-NEXT: retq 508; 509; X86-CLZ-LABEL: ctlz_i32_fold_cmov: 510; X86-CLZ: # %bb.0: 511; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 512; X86-CLZ-NEXT: orl $1, %eax 513; X86-CLZ-NEXT: lzcntl %eax, %eax 514; X86-CLZ-NEXT: retl 515; 516; X64-CLZ-LABEL: ctlz_i32_fold_cmov: 517; X64-CLZ: # %bb.0: 518; X64-CLZ-NEXT: orl $1, %edi 519; X64-CLZ-NEXT: lzcntl %edi, %eax 520; X64-CLZ-NEXT: retq 521; 522; X64-FASTLZCNT-LABEL: ctlz_i32_fold_cmov: 523; X64-FASTLZCNT: # %bb.0: 524; X64-FASTLZCNT-NEXT: orl $1, %edi 525; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 526; X64-FASTLZCNT-NEXT: retq 527; 528; X86-FASTLZCNT-LABEL: ctlz_i32_fold_cmov: 529; X86-FASTLZCNT: # %bb.0: 530; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 531; X86-FASTLZCNT-NEXT: orl $1, %eax 532; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 533; X86-FASTLZCNT-NEXT: retl 534 %or = or i32 %n, 1 535 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false) 536 ret i32 %tmp1 537} 538 539; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute 540; the most significant bit, which is what 'bsr' does natively. 541; NOTE: We intentionally don't select `bsr` when `fast-lzcnt` is 542; available. This is 1) because `bsr` has some drawbacks including a 543; dependency on dst, 2) very poor performance on some of the 544; `fast-lzcnt` processors, and 3) `lzcnt` runs at ALU latency/throughput 545; so `lzcnt` + `xor` has better throughput than even the 1-uop 546; (1c latency, 1c throughput) `bsr`. 547define i32 @ctlz_bsr(i32 %n) { 548; X86-LABEL: ctlz_bsr: 549; X86: # %bb.0: 550; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax 551; X86-NEXT: retl 552; 553; X64-LABEL: ctlz_bsr: 554; X64: # %bb.0: 555; X64-NEXT: bsrl %edi, %eax 556; X64-NEXT: retq 557; 558; X86-CLZ-LABEL: ctlz_bsr: 559; X86-CLZ: # %bb.0: 560; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %eax 561; X86-CLZ-NEXT: retl 562; 563; X64-CLZ-LABEL: ctlz_bsr: 564; X64-CLZ: # %bb.0: 565; X64-CLZ-NEXT: bsrl %edi, %eax 566; X64-CLZ-NEXT: retq 567; 568; X64-FASTLZCNT-LABEL: ctlz_bsr: 569; X64-FASTLZCNT: # %bb.0: 570; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 571; X64-FASTLZCNT-NEXT: xorl $31, %eax 572; X64-FASTLZCNT-NEXT: retq 573; 574; X86-FASTLZCNT-LABEL: ctlz_bsr: 575; X86-FASTLZCNT: # %bb.0: 576; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 577; X86-FASTLZCNT-NEXT: xorl $31, %eax 578; X86-FASTLZCNT-NEXT: retl 579 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true) 580 %bsr = xor i32 %ctlz, 31 581 ret i32 %bsr 582} 583 584; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 585; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and 586; codegen doesn't know how to combine the $32 and $31 into $63. 587define i32 @ctlz_bsr_zero_test(i32 %n) { 588; X86-NOCMOV-LABEL: ctlz_bsr_zero_test: 589; X86-NOCMOV: # %bb.0: 590; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 591; X86-NOCMOV-NEXT: testl %eax, %eax 592; X86-NOCMOV-NEXT: je .LBB10_1 593; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 594; X86-NOCMOV-NEXT: bsrl %eax, %eax 595; X86-NOCMOV-NEXT: xorl $31, %eax 596; X86-NOCMOV-NEXT: xorl $31, %eax 597; X86-NOCMOV-NEXT: retl 598; X86-NOCMOV-NEXT: .LBB10_1: 599; X86-NOCMOV-NEXT: movl $32, %eax 600; X86-NOCMOV-NEXT: xorl $31, %eax 601; X86-NOCMOV-NEXT: retl 602; 603; X86-CMOV-LABEL: ctlz_bsr_zero_test: 604; X86-CMOV: # %bb.0: 605; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx 606; X86-CMOV-NEXT: movl $63, %eax 607; X86-CMOV-NEXT: cmovnel %ecx, %eax 608; X86-CMOV-NEXT: retl 609; 610; X64-LABEL: ctlz_bsr_zero_test: 611; X64: # %bb.0: 612; X64-NEXT: movl $63, %eax 613; X64-NEXT: bsrl %edi, %eax 614; X64-NEXT: retq 615; 616; X86-CLZ-LABEL: ctlz_bsr_zero_test: 617; X86-CLZ: # %bb.0: 618; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 619; X86-CLZ-NEXT: xorl $31, %eax 620; X86-CLZ-NEXT: retl 621; 622; X64-CLZ-LABEL: ctlz_bsr_zero_test: 623; X64-CLZ: # %bb.0: 624; X64-CLZ-NEXT: lzcntl %edi, %eax 625; X64-CLZ-NEXT: xorl $31, %eax 626; X64-CLZ-NEXT: retq 627; 628; X64-FASTLZCNT-LABEL: ctlz_bsr_zero_test: 629; X64-FASTLZCNT: # %bb.0: 630; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 631; X64-FASTLZCNT-NEXT: xorl $31, %eax 632; X64-FASTLZCNT-NEXT: retq 633; 634; X86-FASTLZCNT-LABEL: ctlz_bsr_zero_test: 635; X86-FASTLZCNT: # %bb.0: 636; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 637; X86-FASTLZCNT-NEXT: xorl $31, %eax 638; X86-FASTLZCNT-NEXT: retl 639 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false) 640 %bsr = xor i32 %ctlz, 31 641 ret i32 %bsr 642} 643 644define i8 @ctlz_i8_knownbits(i8 %x) { 645; X86-LABEL: ctlz_i8_knownbits: 646; X86: # %bb.0: 647; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 648; X86-NEXT: orb $64, %al 649; X86-NEXT: movzbl %al, %eax 650; X86-NEXT: bsrl %eax, %eax 651; X86-NEXT: xorl $7, %eax 652; X86-NEXT: # kill: def $al killed $al killed $eax 653; X86-NEXT: retl 654; 655; X64-LABEL: ctlz_i8_knownbits: 656; X64: # %bb.0: 657; X64-NEXT: orb $64, %dil 658; X64-NEXT: movzbl %dil, %eax 659; X64-NEXT: bsrl %eax, %eax 660; X64-NEXT: xorl $7, %eax 661; X64-NEXT: # kill: def $al killed $al killed $eax 662; X64-NEXT: retq 663; 664; X86-CLZ-LABEL: ctlz_i8_knownbits: 665; X86-CLZ: # %bb.0: 666; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 667; X86-CLZ-NEXT: orb $64, %al 668; X86-CLZ-NEXT: movzbl %al, %eax 669; X86-CLZ-NEXT: shll $24, %eax 670; X86-CLZ-NEXT: lzcntl %eax, %eax 671; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 672; X86-CLZ-NEXT: retl 673; 674; X64-CLZ-LABEL: ctlz_i8_knownbits: 675; X64-CLZ: # %bb.0: 676; X64-CLZ-NEXT: orb $64, %dil 677; X64-CLZ-NEXT: movzbl %dil, %eax 678; X64-CLZ-NEXT: shll $24, %eax 679; X64-CLZ-NEXT: lzcntl %eax, %eax 680; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 681; X64-CLZ-NEXT: retq 682; 683; X64-FASTLZCNT-LABEL: ctlz_i8_knownbits: 684; X64-FASTLZCNT: # %bb.0: 685; X64-FASTLZCNT-NEXT: orb $64, %dil 686; X64-FASTLZCNT-NEXT: movzbl %dil, %eax 687; X64-FASTLZCNT-NEXT: shll $24, %eax 688; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax 689; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 690; X64-FASTLZCNT-NEXT: retq 691; 692; X86-FASTLZCNT-LABEL: ctlz_i8_knownbits: 693; X86-FASTLZCNT: # %bb.0: 694; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax 695; X86-FASTLZCNT-NEXT: orb $64, %al 696; X86-FASTLZCNT-NEXT: movzbl %al, %eax 697; X86-FASTLZCNT-NEXT: shll $24, %eax 698; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 699; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 700; X86-FASTLZCNT-NEXT: retl 701 702 %x2 = or i8 %x, 64 703 %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true ) 704 %tmp2 = and i8 %tmp, 1 705 ret i8 %tmp2 706} 707 708; Make sure we can detect that the input is non-zero and avoid cmov after BSR 709; This is relevant for 32-bit mode without lzcnt 710define i64 @ctlz_i64_zero_test_knownneverzero(i64 %n) { 711; X86-NOCMOV-LABEL: ctlz_i64_zero_test_knownneverzero: 712; X86-NOCMOV: # %bb.0: 713; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 714; X86-NOCMOV-NEXT: testl %eax, %eax 715; X86-NOCMOV-NEXT: jne .LBB12_1 716; X86-NOCMOV-NEXT: # %bb.2: 717; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 718; X86-NOCMOV-NEXT: orl $1, %eax 719; X86-NOCMOV-NEXT: bsrl %eax, %eax 720; X86-NOCMOV-NEXT: xorl $31, %eax 721; X86-NOCMOV-NEXT: orl $32, %eax 722; X86-NOCMOV-NEXT: xorl %edx, %edx 723; X86-NOCMOV-NEXT: retl 724; X86-NOCMOV-NEXT: .LBB12_1: 725; X86-NOCMOV-NEXT: bsrl %eax, %eax 726; X86-NOCMOV-NEXT: xorl $31, %eax 727; X86-NOCMOV-NEXT: xorl %edx, %edx 728; X86-NOCMOV-NEXT: retl 729; 730; X86-CMOV-LABEL: ctlz_i64_zero_test_knownneverzero: 731; X86-CMOV: # %bb.0: 732; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 733; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 734; X86-CMOV-NEXT: orl $1, %eax 735; X86-CMOV-NEXT: bsrl %ecx, %edx 736; X86-CMOV-NEXT: xorl $31, %edx 737; X86-CMOV-NEXT: bsrl %eax, %eax 738; X86-CMOV-NEXT: xorl $31, %eax 739; X86-CMOV-NEXT: orl $32, %eax 740; X86-CMOV-NEXT: testl %ecx, %ecx 741; X86-CMOV-NEXT: cmovnel %edx, %eax 742; X86-CMOV-NEXT: xorl %edx, %edx 743; X86-CMOV-NEXT: retl 744; 745; X64-LABEL: ctlz_i64_zero_test_knownneverzero: 746; X64: # %bb.0: 747; X64-NEXT: orq $1, %rdi 748; X64-NEXT: bsrq %rdi, %rax 749; X64-NEXT: xorq $63, %rax 750; X64-NEXT: retq 751; 752; X86-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero: 753; X86-CLZ: # %bb.0: 754; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 755; X86-CLZ-NEXT: testl %eax, %eax 756; X86-CLZ-NEXT: jne .LBB12_1 757; X86-CLZ-NEXT: # %bb.2: 758; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 759; X86-CLZ-NEXT: orl $1, %eax 760; X86-CLZ-NEXT: lzcntl %eax, %eax 761; X86-CLZ-NEXT: orl $32, %eax 762; X86-CLZ-NEXT: xorl %edx, %edx 763; X86-CLZ-NEXT: retl 764; X86-CLZ-NEXT: .LBB12_1: 765; X86-CLZ-NEXT: lzcntl %eax, %eax 766; X86-CLZ-NEXT: xorl %edx, %edx 767; X86-CLZ-NEXT: retl 768; 769; X64-CLZ-LABEL: ctlz_i64_zero_test_knownneverzero: 770; X64-CLZ: # %bb.0: 771; X64-CLZ-NEXT: orq $1, %rdi 772; X64-CLZ-NEXT: lzcntq %rdi, %rax 773; X64-CLZ-NEXT: retq 774; 775; X64-FASTLZCNT-LABEL: ctlz_i64_zero_test_knownneverzero: 776; X64-FASTLZCNT: # %bb.0: 777; X64-FASTLZCNT-NEXT: orq $1, %rdi 778; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax 779; X64-FASTLZCNT-NEXT: retq 780; 781; X86-FASTLZCNT-LABEL: ctlz_i64_zero_test_knownneverzero: 782; X86-FASTLZCNT: # %bb.0: 783; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 784; X86-FASTLZCNT-NEXT: testl %eax, %eax 785; X86-FASTLZCNT-NEXT: jne .LBB12_1 786; X86-FASTLZCNT-NEXT: # %bb.2: 787; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 788; X86-FASTLZCNT-NEXT: orl $1, %eax 789; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 790; X86-FASTLZCNT-NEXT: orl $32, %eax 791; X86-FASTLZCNT-NEXT: xorl %edx, %edx 792; X86-FASTLZCNT-NEXT: retl 793; X86-FASTLZCNT-NEXT: .LBB12_1: 794; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 795; X86-FASTLZCNT-NEXT: xorl %edx, %edx 796; X86-FASTLZCNT-NEXT: retl 797 %o = or i64 %n, 1 798 %tmp1 = call i64 @llvm.ctlz.i64(i64 %o, i1 false) 799 ret i64 %tmp1 800} 801 802; Ensure we fold away the XOR(TRUNC(XOR(BSR(X),31)),31). 803define i8 @PR47603_trunc(i32 %0) { 804; X86-LABEL: PR47603_trunc: 805; X86: # %bb.0: 806; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax 807; X86-NEXT: # kill: def $al killed $al killed $eax 808; X86-NEXT: retl 809; 810; X64-LABEL: PR47603_trunc: 811; X64: # %bb.0: 812; X64-NEXT: bsrl %edi, %eax 813; X64-NEXT: # kill: def $al killed $al killed $eax 814; X64-NEXT: retq 815; 816; X86-CLZ-LABEL: PR47603_trunc: 817; X86-CLZ: # %bb.0: 818; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 819; X86-CLZ-NEXT: xorb $31, %al 820; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 821; X86-CLZ-NEXT: retl 822; 823; X64-CLZ-LABEL: PR47603_trunc: 824; X64-CLZ: # %bb.0: 825; X64-CLZ-NEXT: lzcntl %edi, %eax 826; X64-CLZ-NEXT: xorb $31, %al 827; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 828; X64-CLZ-NEXT: retq 829; 830; X64-FASTLZCNT-LABEL: PR47603_trunc: 831; X64-FASTLZCNT: # %bb.0: 832; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 833; X64-FASTLZCNT-NEXT: xorb $31, %al 834; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 835; X64-FASTLZCNT-NEXT: retq 836; 837; X86-FASTLZCNT-LABEL: PR47603_trunc: 838; X86-FASTLZCNT: # %bb.0: 839; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 840; X86-FASTLZCNT-NEXT: xorb $31, %al 841; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 842; X86-FASTLZCNT-NEXT: retl 843 %2 = call i32 @llvm.ctlz.i32(i32 %0, i1 true) 844 %3 = xor i32 %2, 31 845 %4 = trunc i32 %3 to i8 846 ret i8 %4 847} 848 849; Ensure we fold away the XOR(ZEXT(XOR(BSR(X),31)),31). 850define i32 @PR47603_zext(i32 %a0, ptr %a1) { 851; X86-LABEL: PR47603_zext: 852; X86: # %bb.0: 853; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 854; X86-NEXT: bsrl {{[0-9]+}}(%esp), %ecx 855; X86-NEXT: movsbl (%eax,%ecx), %eax 856; X86-NEXT: retl 857; 858; X64-LABEL: PR47603_zext: 859; X64: # %bb.0: 860; X64-NEXT: bsrl %edi, %eax 861; X64-NEXT: movsbl (%rsi,%rax), %eax 862; X64-NEXT: retq 863; 864; X86-CLZ-LABEL: PR47603_zext: 865; X86-CLZ: # %bb.0: 866; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 867; X86-CLZ-NEXT: bsrl {{[0-9]+}}(%esp), %ecx 868; X86-CLZ-NEXT: movsbl (%eax,%ecx), %eax 869; X86-CLZ-NEXT: retl 870; 871; X64-CLZ-LABEL: PR47603_zext: 872; X64-CLZ: # %bb.0: 873; X64-CLZ-NEXT: lzcntl %edi, %eax 874; X64-CLZ-NEXT: xorq $31, %rax 875; X64-CLZ-NEXT: movsbl (%rsi,%rax), %eax 876; X64-CLZ-NEXT: retq 877; 878; X64-FASTLZCNT-LABEL: PR47603_zext: 879; X64-FASTLZCNT: # %bb.0: 880; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 881; X64-FASTLZCNT-NEXT: xorq $31, %rax 882; X64-FASTLZCNT-NEXT: movsbl (%rsi,%rax), %eax 883; X64-FASTLZCNT-NEXT: retq 884; 885; X86-FASTLZCNT-LABEL: PR47603_zext: 886; X86-FASTLZCNT: # %bb.0: 887; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 888; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx 889; X86-FASTLZCNT-NEXT: xorl $31, %ecx 890; X86-FASTLZCNT-NEXT: movsbl (%eax,%ecx), %eax 891; X86-FASTLZCNT-NEXT: retl 892 %ctlz = tail call i32 @llvm.ctlz.i32(i32 %a0, i1 true) 893 %xor = xor i32 %ctlz, 31 894 %zext = zext i32 %xor to i64 895 %gep = getelementptr inbounds [32 x i8], ptr %a1, i64 0, i64 %zext 896 %load = load i8, ptr %gep, align 1 897 %sext = sext i8 %load to i32 898 ret i32 %sext 899} 900 901define i8 @ctlz_xor7_i8_true(i8 %x) { 902; X86-LABEL: ctlz_xor7_i8_true: 903; X86: # %bb.0: 904; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 905; X86-NEXT: bsrl %eax, %eax 906; X86-NEXT: # kill: def $al killed $al killed $eax 907; X86-NEXT: retl 908; 909; X64-LABEL: ctlz_xor7_i8_true: 910; X64: # %bb.0: 911; X64-NEXT: movzbl %dil, %eax 912; X64-NEXT: bsrl %eax, %eax 913; X64-NEXT: # kill: def $al killed $al killed $eax 914; X64-NEXT: retq 915; 916; X86-CLZ-LABEL: ctlz_xor7_i8_true: 917; X86-CLZ: # %bb.0: 918; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 919; X86-CLZ-NEXT: bsrl %eax, %eax 920; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 921; X86-CLZ-NEXT: retl 922; 923; X64-CLZ-LABEL: ctlz_xor7_i8_true: 924; X64-CLZ: # %bb.0: 925; X64-CLZ-NEXT: movzbl %dil, %eax 926; X64-CLZ-NEXT: bsrl %eax, %eax 927; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 928; X64-CLZ-NEXT: retq 929; 930; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_true: 931; X64-FASTLZCNT: # %bb.0: 932; X64-FASTLZCNT-NEXT: shll $24, %edi 933; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 934; X64-FASTLZCNT-NEXT: xorb $7, %al 935; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 936; X64-FASTLZCNT-NEXT: retq 937; 938; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_true: 939; X86-FASTLZCNT: # %bb.0: 940; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 941; X86-FASTLZCNT-NEXT: shll $24, %eax 942; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 943; X86-FASTLZCNT-NEXT: xorb $7, %al 944; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 945; X86-FASTLZCNT-NEXT: retl 946 %clz = call i8 @llvm.ctlz.i8(i8 %x, i1 true) 947 %res = xor i8 %clz, 7 948 ret i8 %res 949} 950 951define i8 @ctlz_xor7_i8_false(i8 %x) { 952; X86-NOCMOV-LABEL: ctlz_xor7_i8_false: 953; X86-NOCMOV: # %bb.0: 954; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax 955; X86-NOCMOV-NEXT: testb %al, %al 956; X86-NOCMOV-NEXT: je .LBB16_1 957; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 958; X86-NOCMOV-NEXT: movzbl %al, %eax 959; X86-NOCMOV-NEXT: bsrl %eax, %eax 960; X86-NOCMOV-NEXT: xorl $7, %eax 961; X86-NOCMOV-NEXT: xorb $7, %al 962; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax 963; X86-NOCMOV-NEXT: retl 964; X86-NOCMOV-NEXT: .LBB16_1: 965; X86-NOCMOV-NEXT: movb $8, %al 966; X86-NOCMOV-NEXT: xorb $7, %al 967; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax 968; X86-NOCMOV-NEXT: retl 969; 970; X86-CMOV-LABEL: ctlz_xor7_i8_false: 971; X86-CMOV: # %bb.0: 972; X86-CMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax 973; X86-CMOV-NEXT: bsrl %eax, %ecx 974; X86-CMOV-NEXT: movl $15, %eax 975; X86-CMOV-NEXT: cmovnel %ecx, %eax 976; X86-CMOV-NEXT: # kill: def $al killed $al killed $eax 977; X86-CMOV-NEXT: retl 978; 979; X64-LABEL: ctlz_xor7_i8_false: 980; X64: # %bb.0: 981; X64-NEXT: movzbl %dil, %ecx 982; X64-NEXT: movl $15, %eax 983; X64-NEXT: bsrl %ecx, %eax 984; X64-NEXT: # kill: def $al killed $al killed $eax 985; X64-NEXT: retq 986; 987; X86-CLZ-LABEL: ctlz_xor7_i8_false: 988; X86-CLZ: # %bb.0: 989; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 990; X86-CLZ-NEXT: lzcntl %eax, %eax 991; X86-CLZ-NEXT: addl $-24, %eax 992; X86-CLZ-NEXT: xorb $7, %al 993; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 994; X86-CLZ-NEXT: retl 995; 996; X64-CLZ-LABEL: ctlz_xor7_i8_false: 997; X64-CLZ: # %bb.0: 998; X64-CLZ-NEXT: movzbl %dil, %eax 999; X64-CLZ-NEXT: lzcntl %eax, %eax 1000; X64-CLZ-NEXT: addl $-24, %eax 1001; X64-CLZ-NEXT: xorb $7, %al 1002; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 1003; X64-CLZ-NEXT: retq 1004; 1005; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_false: 1006; X64-FASTLZCNT: # %bb.0: 1007; X64-FASTLZCNT-NEXT: movzbl %dil, %eax 1008; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax 1009; X64-FASTLZCNT-NEXT: addl $-24, %eax 1010; X64-FASTLZCNT-NEXT: xorb $7, %al 1011; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 1012; X64-FASTLZCNT-NEXT: retq 1013; 1014; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_false: 1015; X86-FASTLZCNT: # %bb.0: 1016; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1017; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 1018; X86-FASTLZCNT-NEXT: addl $-24, %eax 1019; X86-FASTLZCNT-NEXT: xorb $7, %al 1020; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 1021; X86-FASTLZCNT-NEXT: retl 1022 %clz = call i8 @llvm.ctlz.i8(i8 %x, i1 false) 1023 %res = xor i8 %clz, 7 1024 ret i8 %res 1025} 1026 1027define i16 @ctlz_xor15_i16_true(i16 %x) { 1028; X86-LABEL: ctlz_xor15_i16_true: 1029; X86: # %bb.0: 1030; X86-NEXT: bsrw {{[0-9]+}}(%esp), %ax 1031; X86-NEXT: retl 1032; 1033; X64-LABEL: ctlz_xor15_i16_true: 1034; X64: # %bb.0: 1035; X64-NEXT: bsrw %di, %ax 1036; X64-NEXT: retq 1037; 1038; X86-CLZ-LABEL: ctlz_xor15_i16_true: 1039; X86-CLZ: # %bb.0: 1040; X86-CLZ-NEXT: bsrw {{[0-9]+}}(%esp), %ax 1041; X86-CLZ-NEXT: retl 1042; 1043; X64-CLZ-LABEL: ctlz_xor15_i16_true: 1044; X64-CLZ: # %bb.0: 1045; X64-CLZ-NEXT: bsrw %di, %ax 1046; X64-CLZ-NEXT: retq 1047; 1048; X64-FASTLZCNT-LABEL: ctlz_xor15_i16_true: 1049; X64-FASTLZCNT: # %bb.0: 1050; X64-FASTLZCNT-NEXT: lzcntw %di, %ax 1051; X64-FASTLZCNT-NEXT: xorl $15, %eax 1052; X64-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax 1053; X64-FASTLZCNT-NEXT: retq 1054; 1055; X86-FASTLZCNT-LABEL: ctlz_xor15_i16_true: 1056; X86-FASTLZCNT: # %bb.0: 1057; X86-FASTLZCNT-NEXT: lzcntw {{[0-9]+}}(%esp), %ax 1058; X86-FASTLZCNT-NEXT: xorl $15, %eax 1059; X86-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax 1060; X86-FASTLZCNT-NEXT: retl 1061 %clz = call i16 @llvm.ctlz.i16(i16 %x, i1 true) 1062 %res = xor i16 %clz, 15 1063 ret i16 %res 1064} 1065 1066define i32 @ctlz_xor31_i32_false(i32 %x) { 1067; X86-NOCMOV-LABEL: ctlz_xor31_i32_false: 1068; X86-NOCMOV: # %bb.0: 1069; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 1070; X86-NOCMOV-NEXT: testl %eax, %eax 1071; X86-NOCMOV-NEXT: je .LBB18_1 1072; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 1073; X86-NOCMOV-NEXT: bsrl %eax, %eax 1074; X86-NOCMOV-NEXT: xorl $31, %eax 1075; X86-NOCMOV-NEXT: xorl $31, %eax 1076; X86-NOCMOV-NEXT: retl 1077; X86-NOCMOV-NEXT: .LBB18_1: 1078; X86-NOCMOV-NEXT: movl $32, %eax 1079; X86-NOCMOV-NEXT: xorl $31, %eax 1080; X86-NOCMOV-NEXT: retl 1081; 1082; X86-CMOV-LABEL: ctlz_xor31_i32_false: 1083; X86-CMOV: # %bb.0: 1084; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx 1085; X86-CMOV-NEXT: movl $63, %eax 1086; X86-CMOV-NEXT: cmovnel %ecx, %eax 1087; X86-CMOV-NEXT: retl 1088; 1089; X64-LABEL: ctlz_xor31_i32_false: 1090; X64: # %bb.0: 1091; X64-NEXT: movl $63, %eax 1092; X64-NEXT: bsrl %edi, %eax 1093; X64-NEXT: retq 1094; 1095; X86-CLZ-LABEL: ctlz_xor31_i32_false: 1096; X86-CLZ: # %bb.0: 1097; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1098; X86-CLZ-NEXT: xorl $31, %eax 1099; X86-CLZ-NEXT: retl 1100; 1101; X64-CLZ-LABEL: ctlz_xor31_i32_false: 1102; X64-CLZ: # %bb.0: 1103; X64-CLZ-NEXT: lzcntl %edi, %eax 1104; X64-CLZ-NEXT: xorl $31, %eax 1105; X64-CLZ-NEXT: retq 1106; 1107; X64-FASTLZCNT-LABEL: ctlz_xor31_i32_false: 1108; X64-FASTLZCNT: # %bb.0: 1109; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 1110; X64-FASTLZCNT-NEXT: xorl $31, %eax 1111; X64-FASTLZCNT-NEXT: retq 1112; 1113; X86-FASTLZCNT-LABEL: ctlz_xor31_i32_false: 1114; X86-FASTLZCNT: # %bb.0: 1115; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1116; X86-FASTLZCNT-NEXT: xorl $31, %eax 1117; X86-FASTLZCNT-NEXT: retl 1118 %clz = call i32 @llvm.ctlz.i32(i32 %x, i1 false) 1119 %res = xor i32 %clz, 31 1120 ret i32 %res 1121} 1122 1123define i64 @ctlz_xor63_i64_true(i64 %x) { 1124; X86-NOCMOV-LABEL: ctlz_xor63_i64_true: 1125; X86-NOCMOV: # %bb.0: 1126; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 1127; X86-NOCMOV-NEXT: testl %eax, %eax 1128; X86-NOCMOV-NEXT: jne .LBB19_1 1129; X86-NOCMOV-NEXT: # %bb.2: 1130; X86-NOCMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 1131; X86-NOCMOV-NEXT: xorl $31, %eax 1132; X86-NOCMOV-NEXT: orl $32, %eax 1133; X86-NOCMOV-NEXT: jmp .LBB19_3 1134; X86-NOCMOV-NEXT: .LBB19_1: 1135; X86-NOCMOV-NEXT: bsrl %eax, %eax 1136; X86-NOCMOV-NEXT: xorl $31, %eax 1137; X86-NOCMOV-NEXT: .LBB19_3: 1138; X86-NOCMOV-NEXT: xorl $63, %eax 1139; X86-NOCMOV-NEXT: xorl %edx, %edx 1140; X86-NOCMOV-NEXT: retl 1141; 1142; X86-CMOV-LABEL: ctlz_xor63_i64_true: 1143; X86-CMOV: # %bb.0: 1144; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 1145; X86-CMOV-NEXT: bsrl %ecx, %edx 1146; X86-CMOV-NEXT: xorl $31, %edx 1147; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %eax 1148; X86-CMOV-NEXT: xorl $31, %eax 1149; X86-CMOV-NEXT: orl $32, %eax 1150; X86-CMOV-NEXT: testl %ecx, %ecx 1151; X86-CMOV-NEXT: cmovnel %edx, %eax 1152; X86-CMOV-NEXT: xorl $63, %eax 1153; X86-CMOV-NEXT: xorl %edx, %edx 1154; X86-CMOV-NEXT: retl 1155; 1156; X64-LABEL: ctlz_xor63_i64_true: 1157; X64: # %bb.0: 1158; X64-NEXT: bsrq %rdi, %rax 1159; X64-NEXT: retq 1160; 1161; X86-CLZ-LABEL: ctlz_xor63_i64_true: 1162; X86-CLZ: # %bb.0: 1163; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 1164; X86-CLZ-NEXT: testl %eax, %eax 1165; X86-CLZ-NEXT: jne .LBB19_1 1166; X86-CLZ-NEXT: # %bb.2: 1167; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1168; X86-CLZ-NEXT: addl $32, %eax 1169; X86-CLZ-NEXT: jmp .LBB19_3 1170; X86-CLZ-NEXT: .LBB19_1: 1171; X86-CLZ-NEXT: lzcntl %eax, %eax 1172; X86-CLZ-NEXT: .LBB19_3: 1173; X86-CLZ-NEXT: xorl $63, %eax 1174; X86-CLZ-NEXT: xorl %edx, %edx 1175; X86-CLZ-NEXT: retl 1176; 1177; X64-CLZ-LABEL: ctlz_xor63_i64_true: 1178; X64-CLZ: # %bb.0: 1179; X64-CLZ-NEXT: bsrq %rdi, %rax 1180; X64-CLZ-NEXT: retq 1181; 1182; X64-FASTLZCNT-LABEL: ctlz_xor63_i64_true: 1183; X64-FASTLZCNT: # %bb.0: 1184; X64-FASTLZCNT-NEXT: lzcntq %rdi, %rax 1185; X64-FASTLZCNT-NEXT: xorq $63, %rax 1186; X64-FASTLZCNT-NEXT: retq 1187; 1188; X86-FASTLZCNT-LABEL: ctlz_xor63_i64_true: 1189; X86-FASTLZCNT: # %bb.0: 1190; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 1191; X86-FASTLZCNT-NEXT: testl %eax, %eax 1192; X86-FASTLZCNT-NEXT: jne .LBB19_1 1193; X86-FASTLZCNT-NEXT: # %bb.2: 1194; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1195; X86-FASTLZCNT-NEXT: addl $32, %eax 1196; X86-FASTLZCNT-NEXT: jmp .LBB19_3 1197; X86-FASTLZCNT-NEXT: .LBB19_1: 1198; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax 1199; X86-FASTLZCNT-NEXT: .LBB19_3: 1200; X86-FASTLZCNT-NEXT: xorl $63, %eax 1201; X86-FASTLZCNT-NEXT: xorl %edx, %edx 1202; X86-FASTLZCNT-NEXT: retl 1203 %clz = call i64 @llvm.ctlz.i64(i64 %x, i1 true) 1204 %res = xor i64 %clz, 63 1205 ret i64 %res 1206} 1207 1208define i64 @ctlz_i32_sext(i32 %x) { 1209; X86-NOCMOV-LABEL: ctlz_i32_sext: 1210; X86-NOCMOV: # %bb.0: 1211; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 1212; X86-NOCMOV-NEXT: testl %eax, %eax 1213; X86-NOCMOV-NEXT: je .LBB20_1 1214; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 1215; X86-NOCMOV-NEXT: bsrl %eax, %eax 1216; X86-NOCMOV-NEXT: xorl $31, %eax 1217; X86-NOCMOV-NEXT: jmp .LBB20_3 1218; X86-NOCMOV-NEXT: .LBB20_1: 1219; X86-NOCMOV-NEXT: movl $32, %eax 1220; X86-NOCMOV-NEXT: .LBB20_3: # %cond.end 1221; X86-NOCMOV-NEXT: xorl $31, %eax 1222; X86-NOCMOV-NEXT: xorl %edx, %edx 1223; X86-NOCMOV-NEXT: retl 1224; 1225; X86-CMOV-LABEL: ctlz_i32_sext: 1226; X86-CMOV: # %bb.0: 1227; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx 1228; X86-CMOV-NEXT: movl $63, %eax 1229; X86-CMOV-NEXT: cmovnel %ecx, %eax 1230; X86-CMOV-NEXT: xorl %edx, %edx 1231; X86-CMOV-NEXT: retl 1232; 1233; X64-LABEL: ctlz_i32_sext: 1234; X64: # %bb.0: 1235; X64-NEXT: movl $63, %eax 1236; X64-NEXT: bsrl %edi, %eax 1237; X64-NEXT: retq 1238; 1239; X86-CLZ-LABEL: ctlz_i32_sext: 1240; X86-CLZ: # %bb.0: 1241; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1242; X86-CLZ-NEXT: xorl $31, %eax 1243; X86-CLZ-NEXT: xorl %edx, %edx 1244; X86-CLZ-NEXT: retl 1245; 1246; X64-CLZ-LABEL: ctlz_i32_sext: 1247; X64-CLZ: # %bb.0: 1248; X64-CLZ-NEXT: lzcntl %edi, %eax 1249; X64-CLZ-NEXT: xorl $31, %eax 1250; X64-CLZ-NEXT: retq 1251; 1252; X64-FASTLZCNT-LABEL: ctlz_i32_sext: 1253; X64-FASTLZCNT: # %bb.0: 1254; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 1255; X64-FASTLZCNT-NEXT: xorl $31, %eax 1256; X64-FASTLZCNT-NEXT: retq 1257; 1258; X86-FASTLZCNT-LABEL: ctlz_i32_sext: 1259; X86-FASTLZCNT: # %bb.0: 1260; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1261; X86-FASTLZCNT-NEXT: xorl $31, %eax 1262; X86-FASTLZCNT-NEXT: xorl %edx, %edx 1263; X86-FASTLZCNT-NEXT: retl 1264 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 false) 1265 %xor = xor i32 %tmp, 31 1266 %ext = sext i32 %xor to i64 1267 ret i64 %ext 1268} 1269 1270define i64 @ctlz_i32_zext(i32 %x) { 1271; X86-NOCMOV-LABEL: ctlz_i32_zext: 1272; X86-NOCMOV: # %bb.0: 1273; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 1274; X86-NOCMOV-NEXT: testl %eax, %eax 1275; X86-NOCMOV-NEXT: je .LBB21_1 1276; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 1277; X86-NOCMOV-NEXT: bsrl %eax, %eax 1278; X86-NOCMOV-NEXT: xorl $31, %eax 1279; X86-NOCMOV-NEXT: jmp .LBB21_3 1280; X86-NOCMOV-NEXT: .LBB21_1: 1281; X86-NOCMOV-NEXT: movl $32, %eax 1282; X86-NOCMOV-NEXT: .LBB21_3: # %cond.end 1283; X86-NOCMOV-NEXT: xorl $31, %eax 1284; X86-NOCMOV-NEXT: xorl %edx, %edx 1285; X86-NOCMOV-NEXT: retl 1286; 1287; X86-CMOV-LABEL: ctlz_i32_zext: 1288; X86-CMOV: # %bb.0: 1289; X86-CMOV-NEXT: bsrl {{[0-9]+}}(%esp), %ecx 1290; X86-CMOV-NEXT: movl $63, %eax 1291; X86-CMOV-NEXT: cmovnel %ecx, %eax 1292; X86-CMOV-NEXT: xorl %edx, %edx 1293; X86-CMOV-NEXT: retl 1294; 1295; X64-LABEL: ctlz_i32_zext: 1296; X64: # %bb.0: 1297; X64-NEXT: movl $63, %eax 1298; X64-NEXT: bsrl %edi, %eax 1299; X64-NEXT: retq 1300; 1301; X86-CLZ-LABEL: ctlz_i32_zext: 1302; X86-CLZ: # %bb.0: 1303; X86-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1304; X86-CLZ-NEXT: xorl $31, %eax 1305; X86-CLZ-NEXT: xorl %edx, %edx 1306; X86-CLZ-NEXT: retl 1307; 1308; X64-CLZ-LABEL: ctlz_i32_zext: 1309; X64-CLZ: # %bb.0: 1310; X64-CLZ-NEXT: lzcntl %edi, %eax 1311; X64-CLZ-NEXT: xorl $31, %eax 1312; X64-CLZ-NEXT: retq 1313; 1314; X64-FASTLZCNT-LABEL: ctlz_i32_zext: 1315; X64-FASTLZCNT: # %bb.0: 1316; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax 1317; X64-FASTLZCNT-NEXT: xorl $31, %eax 1318; X64-FASTLZCNT-NEXT: retq 1319; 1320; X86-FASTLZCNT-LABEL: ctlz_i32_zext: 1321; X86-FASTLZCNT: # %bb.0: 1322; X86-FASTLZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax 1323; X86-FASTLZCNT-NEXT: xorl $31, %eax 1324; X86-FASTLZCNT-NEXT: xorl %edx, %edx 1325; X86-FASTLZCNT-NEXT: retl 1326 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 false) 1327 %xor = xor i32 %tmp, 31 1328 %ext = zext i32 %xor to i64 1329 ret i64 %ext 1330} 1331