1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X86-CLZ 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=X64-CLZ 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X64-FASTLZCNT 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt,+fast-lzcnt | FileCheck %s --check-prefix=X86-FASTLZCNT 9 10declare i8 @llvm.cttz.i8(i8, i1) 11declare i16 @llvm.cttz.i16(i16, i1) 12declare i32 @llvm.cttz.i32(i32, i1) 13declare i64 @llvm.cttz.i64(i64, i1) 14 15define i8 @cttz_i8(i8 %x) { 16; X86-LABEL: cttz_i8: 17; X86: # %bb.0: 18; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax 19; X86-NEXT: # kill: def $al killed $al killed $eax 20; X86-NEXT: retl 21; 22; X64-LABEL: cttz_i8: 23; X64: # %bb.0: 24; X64-NEXT: rep bsfl %edi, %eax 25; X64-NEXT: # kill: def $al killed $al killed $eax 26; X64-NEXT: retq 27; 28; X86-CLZ-LABEL: cttz_i8: 29; X86-CLZ: # %bb.0: 30; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 31; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 32; X86-CLZ-NEXT: retl 33; 34; X64-CLZ-LABEL: cttz_i8: 35; X64-CLZ: # %bb.0: 36; X64-CLZ-NEXT: tzcntl %edi, %eax 37; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 38; X64-CLZ-NEXT: retq 39; 40; X64-FASTLZCNT-LABEL: cttz_i8: 41; X64-FASTLZCNT: # %bb.0: 42; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 43; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 44; X64-FASTLZCNT-NEXT: retq 45; 46; X86-FASTLZCNT-LABEL: cttz_i8: 47; X86-FASTLZCNT: # %bb.0: 48; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 49; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 50; X86-FASTLZCNT-NEXT: retl 51 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true ) 52 ret i8 %tmp 53} 54 55define i16 @cttz_i16(i16 %x) { 56; X86-LABEL: cttz_i16: 57; X86: # %bb.0: 58; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax 59; X86-NEXT: # kill: def $ax killed $ax killed $eax 60; X86-NEXT: retl 61; 62; X64-LABEL: cttz_i16: 63; X64: # %bb.0: 64; X64-NEXT: rep bsfl %edi, %eax 65; X64-NEXT: # kill: def $ax killed $ax killed $eax 66; X64-NEXT: retq 67; 68; X86-CLZ-LABEL: cttz_i16: 69; X86-CLZ: # %bb.0: 70; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 71; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax 72; X86-CLZ-NEXT: retl 73; 74; X64-CLZ-LABEL: cttz_i16: 75; X64-CLZ: # %bb.0: 76; X64-CLZ-NEXT: tzcntl %edi, %eax 77; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax 78; X64-CLZ-NEXT: retq 79; 80; X64-FASTLZCNT-LABEL: cttz_i16: 81; X64-FASTLZCNT: # %bb.0: 82; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 83; X64-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax 84; X64-FASTLZCNT-NEXT: retq 85; 86; X86-FASTLZCNT-LABEL: cttz_i16: 87; X86-FASTLZCNT: # %bb.0: 88; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 89; X86-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax 90; X86-FASTLZCNT-NEXT: retl 91 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true ) 92 ret i16 %tmp 93} 94 95define i32 @cttz_i32(i32 %x) { 96; X86-LABEL: cttz_i32: 97; X86: # %bb.0: 98; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax 99; X86-NEXT: retl 100; 101; X64-LABEL: cttz_i32: 102; X64: # %bb.0: 103; X64-NEXT: rep bsfl %edi, %eax 104; X64-NEXT: retq 105; 106; X86-CLZ-LABEL: cttz_i32: 107; X86-CLZ: # %bb.0: 108; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 109; X86-CLZ-NEXT: retl 110; 111; X64-CLZ-LABEL: cttz_i32: 112; X64-CLZ: # %bb.0: 113; X64-CLZ-NEXT: tzcntl %edi, %eax 114; X64-CLZ-NEXT: retq 115; 116; X64-FASTLZCNT-LABEL: cttz_i32: 117; X64-FASTLZCNT: # %bb.0: 118; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 119; X64-FASTLZCNT-NEXT: retq 120; 121; X86-FASTLZCNT-LABEL: cttz_i32: 122; X86-FASTLZCNT: # %bb.0: 123; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 124; X86-FASTLZCNT-NEXT: retl 125 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true ) 126 ret i32 %tmp 127} 128 129define i64 @cttz_i64(i64 %x) { 130; X86-NOCMOV-LABEL: cttz_i64: 131; X86-NOCMOV: # %bb.0: 132; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 133; X86-NOCMOV-NEXT: testl %eax, %eax 134; X86-NOCMOV-NEXT: jne .LBB3_1 135; X86-NOCMOV-NEXT: # %bb.2: 136; X86-NOCMOV-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax 137; X86-NOCMOV-NEXT: addl $32, %eax 138; X86-NOCMOV-NEXT: xorl %edx, %edx 139; X86-NOCMOV-NEXT: retl 140; X86-NOCMOV-NEXT: .LBB3_1: 141; X86-NOCMOV-NEXT: rep bsfl %eax, %eax 142; X86-NOCMOV-NEXT: xorl %edx, %edx 143; X86-NOCMOV-NEXT: retl 144; 145; X86-CMOV-LABEL: cttz_i64: 146; X86-CMOV: # %bb.0: 147; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 148; X86-CMOV-NEXT: rep bsfl %ecx, %edx 149; X86-CMOV-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax 150; X86-CMOV-NEXT: addl $32, %eax 151; X86-CMOV-NEXT: testl %ecx, %ecx 152; X86-CMOV-NEXT: cmovnel %edx, %eax 153; X86-CMOV-NEXT: xorl %edx, %edx 154; X86-CMOV-NEXT: retl 155; 156; X64-LABEL: cttz_i64: 157; X64: # %bb.0: 158; X64-NEXT: rep bsfq %rdi, %rax 159; X64-NEXT: retq 160; 161; X86-CLZ-LABEL: cttz_i64: 162; X86-CLZ: # %bb.0: 163; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 164; X86-CLZ-NEXT: testl %eax, %eax 165; X86-CLZ-NEXT: jne .LBB3_1 166; X86-CLZ-NEXT: # %bb.2: 167; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 168; X86-CLZ-NEXT: addl $32, %eax 169; X86-CLZ-NEXT: xorl %edx, %edx 170; X86-CLZ-NEXT: retl 171; X86-CLZ-NEXT: .LBB3_1: 172; X86-CLZ-NEXT: tzcntl %eax, %eax 173; X86-CLZ-NEXT: xorl %edx, %edx 174; X86-CLZ-NEXT: retl 175; 176; X64-CLZ-LABEL: cttz_i64: 177; X64-CLZ: # %bb.0: 178; X64-CLZ-NEXT: tzcntq %rdi, %rax 179; X64-CLZ-NEXT: retq 180; 181; X64-FASTLZCNT-LABEL: cttz_i64: 182; X64-FASTLZCNT: # %bb.0: 183; X64-FASTLZCNT-NEXT: tzcntq %rdi, %rax 184; X64-FASTLZCNT-NEXT: retq 185; 186; X86-FASTLZCNT-LABEL: cttz_i64: 187; X86-FASTLZCNT: # %bb.0: 188; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 189; X86-FASTLZCNT-NEXT: testl %eax, %eax 190; X86-FASTLZCNT-NEXT: jne .LBB3_1 191; X86-FASTLZCNT-NEXT: # %bb.2: 192; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 193; X86-FASTLZCNT-NEXT: addl $32, %eax 194; X86-FASTLZCNT-NEXT: xorl %edx, %edx 195; X86-FASTLZCNT-NEXT: retl 196; X86-FASTLZCNT-NEXT: .LBB3_1: 197; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax 198; X86-FASTLZCNT-NEXT: xorl %edx, %edx 199; X86-FASTLZCNT-NEXT: retl 200 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true ) 201 ret i64 %tmp 202} 203 204; Promote i8 cttz to i32 and mask bit8 to prevent (slow) zero-src bsf case. 205define i8 @cttz_i8_zero_test(i8 %n) { 206; X86-LABEL: cttz_i8_zero_test: 207; X86: # %bb.0: 208; X86-NEXT: movl $256, %eax # imm = 0x100 209; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 210; X86-NEXT: rep bsfl %eax, %eax 211; X86-NEXT: # kill: def $al killed $al killed $eax 212; X86-NEXT: retl 213; 214; X64-LABEL: cttz_i8_zero_test: 215; X64: # %bb.0: 216; X64-NEXT: orl $256, %edi # imm = 0x100 217; X64-NEXT: rep bsfl %edi, %eax 218; X64-NEXT: # kill: def $al killed $al killed $eax 219; X64-NEXT: retq 220; 221; X86-CLZ-LABEL: cttz_i8_zero_test: 222; X86-CLZ: # %bb.0: 223; X86-CLZ-NEXT: movl $256, %eax # imm = 0x100 224; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax 225; X86-CLZ-NEXT: tzcntl %eax, %eax 226; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 227; X86-CLZ-NEXT: retl 228; 229; X64-CLZ-LABEL: cttz_i8_zero_test: 230; X64-CLZ: # %bb.0: 231; X64-CLZ-NEXT: orl $256, %edi # imm = 0x100 232; X64-CLZ-NEXT: tzcntl %edi, %eax 233; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 234; X64-CLZ-NEXT: retq 235; 236; X64-FASTLZCNT-LABEL: cttz_i8_zero_test: 237; X64-FASTLZCNT: # %bb.0: 238; X64-FASTLZCNT-NEXT: orl $256, %edi # imm = 0x100 239; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 240; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 241; X64-FASTLZCNT-NEXT: retq 242; 243; X86-FASTLZCNT-LABEL: cttz_i8_zero_test: 244; X86-FASTLZCNT: # %bb.0: 245; X86-FASTLZCNT-NEXT: movl $256, %eax # imm = 0x100 246; X86-FASTLZCNT-NEXT: orl {{[0-9]+}}(%esp), %eax 247; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax 248; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 249; X86-FASTLZCNT-NEXT: retl 250 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false) 251 ret i8 %tmp1 252} 253 254; Promote i16 cttz to i32 and mask bit16 to prevent (slow) zero-src bsf case. 255define i16 @cttz_i16_zero_test(i16 %n) { 256; X86-LABEL: cttz_i16_zero_test: 257; X86: # %bb.0: 258; X86-NEXT: movl $65536, %eax # imm = 0x10000 259; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 260; X86-NEXT: rep bsfl %eax, %eax 261; X86-NEXT: # kill: def $ax killed $ax killed $eax 262; X86-NEXT: retl 263; 264; X64-LABEL: cttz_i16_zero_test: 265; X64: # %bb.0: 266; X64-NEXT: orl $65536, %edi # imm = 0x10000 267; X64-NEXT: rep bsfl %edi, %eax 268; X64-NEXT: # kill: def $ax killed $ax killed $eax 269; X64-NEXT: retq 270; 271; X86-CLZ-LABEL: cttz_i16_zero_test: 272; X86-CLZ: # %bb.0: 273; X86-CLZ-NEXT: movl $65536, %eax # imm = 0x10000 274; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax 275; X86-CLZ-NEXT: tzcntl %eax, %eax 276; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax 277; X86-CLZ-NEXT: retl 278; 279; X64-CLZ-LABEL: cttz_i16_zero_test: 280; X64-CLZ: # %bb.0: 281; X64-CLZ-NEXT: orl $65536, %edi # imm = 0x10000 282; X64-CLZ-NEXT: tzcntl %edi, %eax 283; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax 284; X64-CLZ-NEXT: retq 285; 286; X64-FASTLZCNT-LABEL: cttz_i16_zero_test: 287; X64-FASTLZCNT: # %bb.0: 288; X64-FASTLZCNT-NEXT: orl $65536, %edi # imm = 0x10000 289; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 290; X64-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax 291; X64-FASTLZCNT-NEXT: retq 292; 293; X86-FASTLZCNT-LABEL: cttz_i16_zero_test: 294; X86-FASTLZCNT: # %bb.0: 295; X86-FASTLZCNT-NEXT: movl $65536, %eax # imm = 0x10000 296; X86-FASTLZCNT-NEXT: orl {{[0-9]+}}(%esp), %eax 297; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax 298; X86-FASTLZCNT-NEXT: # kill: def $ax killed $ax killed $eax 299; X86-FASTLZCNT-NEXT: retl 300 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false) 301 ret i16 %tmp1 302} 303 304; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 305define i32 @cttz_i32_zero_test(i32 %n) { 306; X86-NOCMOV-LABEL: cttz_i32_zero_test: 307; X86-NOCMOV: # %bb.0: 308; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 309; X86-NOCMOV-NEXT: testl %eax, %eax 310; X86-NOCMOV-NEXT: je .LBB6_1 311; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 312; X86-NOCMOV-NEXT: rep bsfl %eax, %eax 313; X86-NOCMOV-NEXT: retl 314; X86-NOCMOV-NEXT: .LBB6_1: 315; X86-NOCMOV-NEXT: movl $32, %eax 316; X86-NOCMOV-NEXT: retl 317; 318; X86-CMOV-LABEL: cttz_i32_zero_test: 319; X86-CMOV: # %bb.0: 320; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx 321; X86-CMOV-NEXT: movl $32, %eax 322; X86-CMOV-NEXT: cmovnel %ecx, %eax 323; X86-CMOV-NEXT: retl 324; 325; X64-LABEL: cttz_i32_zero_test: 326; X64: # %bb.0: 327; X64-NEXT: movl $32, %eax 328; X64-NEXT: rep bsfl %edi, %eax 329; X64-NEXT: retq 330; 331; X86-CLZ-LABEL: cttz_i32_zero_test: 332; X86-CLZ: # %bb.0: 333; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 334; X86-CLZ-NEXT: retl 335; 336; X64-CLZ-LABEL: cttz_i32_zero_test: 337; X64-CLZ: # %bb.0: 338; X64-CLZ-NEXT: tzcntl %edi, %eax 339; X64-CLZ-NEXT: retq 340; 341; X64-FASTLZCNT-LABEL: cttz_i32_zero_test: 342; X64-FASTLZCNT: # %bb.0: 343; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 344; X64-FASTLZCNT-NEXT: retq 345; 346; X86-FASTLZCNT-LABEL: cttz_i32_zero_test: 347; X86-FASTLZCNT: # %bb.0: 348; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 349; X86-FASTLZCNT-NEXT: retl 350 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false) 351 ret i32 %tmp1 352} 353 354; Generate a test and branch to handle zero inputs because bsr/bsf are very slow. 355define i64 @cttz_i64_zero_test(i64 %n) { 356; X86-NOCMOV-LABEL: cttz_i64_zero_test: 357; X86-NOCMOV: # %bb.0: 358; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 359; X86-NOCMOV-NOT: rep 360; X86-NOCMOV-NEXT: bsfl {{[0-9]+}}(%esp), %edx 361; X86-NOCMOV-NEXT: movl $32, %eax 362; X86-NOCMOV-NEXT: je .LBB7_2 363; X86-NOCMOV-NEXT: # %bb.1: 364; X86-NOCMOV-NEXT: movl %edx, %eax 365; X86-NOCMOV-NEXT: .LBB7_2: 366; X86-NOCMOV-NEXT: testl %ecx, %ecx 367; X86-NOCMOV-NEXT: jne .LBB7_3 368; X86-NOCMOV-NEXT: # %bb.4: 369; X86-NOCMOV-NEXT: addl $32, %eax 370; X86-NOCMOV-NEXT: xorl %edx, %edx 371; X86-NOCMOV-NEXT: retl 372; X86-NOCMOV-NEXT: .LBB7_3: 373; X86-NOCMOV-NEXT: rep bsfl %ecx, %eax 374; X86-NOCMOV-NEXT: xorl %edx, %edx 375; X86-NOCMOV-NEXT: retl 376; 377; X86-CMOV-LABEL: cttz_i64_zero_test: 378; X86-CMOV: # %bb.0: 379; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 380; X86-CMOV-NOT: rep 381; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx 382; X86-CMOV-NEXT: movl $32, %edx 383; X86-CMOV-NEXT: cmovnel %ecx, %edx 384; X86-CMOV-NEXT: addl $32, %edx 385; X86-CMOV-NOT: rep 386; X86-CMOV-NEXT: bsfl %eax, %eax 387; X86-CMOV-NEXT: cmovel %edx, %eax 388; X86-CMOV-NEXT: xorl %edx, %edx 389; X86-CMOV-NEXT: retl 390; 391; X64-LABEL: cttz_i64_zero_test: 392; X64: # %bb.0: 393; X64-NEXT: movl $64, %eax 394; X64-NEXT: rep bsfq %rdi, %rax 395; X64-NEXT: retq 396; 397; X86-CLZ-LABEL: cttz_i64_zero_test: 398; X86-CLZ: # %bb.0: 399; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 400; X86-CLZ-NEXT: testl %eax, %eax 401; X86-CLZ-NEXT: jne .LBB7_1 402; X86-CLZ-NEXT: # %bb.2: 403; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 404; X86-CLZ-NEXT: addl $32, %eax 405; X86-CLZ-NEXT: xorl %edx, %edx 406; X86-CLZ-NEXT: retl 407; X86-CLZ-NEXT: .LBB7_1: 408; X86-CLZ-NEXT: tzcntl %eax, %eax 409; X86-CLZ-NEXT: xorl %edx, %edx 410; X86-CLZ-NEXT: retl 411; 412; X64-CLZ-LABEL: cttz_i64_zero_test: 413; X64-CLZ: # %bb.0: 414; X64-CLZ-NEXT: tzcntq %rdi, %rax 415; X64-CLZ-NEXT: retq 416; 417; X64-FASTLZCNT-LABEL: cttz_i64_zero_test: 418; X64-FASTLZCNT: # %bb.0: 419; X64-FASTLZCNT-NEXT: tzcntq %rdi, %rax 420; X64-FASTLZCNT-NEXT: retq 421; 422; X86-FASTLZCNT-LABEL: cttz_i64_zero_test: 423; X86-FASTLZCNT: # %bb.0: 424; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 425; X86-FASTLZCNT-NEXT: testl %eax, %eax 426; X86-FASTLZCNT-NEXT: jne .LBB7_1 427; X86-FASTLZCNT-NEXT: # %bb.2: 428; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 429; X86-FASTLZCNT-NEXT: addl $32, %eax 430; X86-FASTLZCNT-NEXT: xorl %edx, %edx 431; X86-FASTLZCNT-NEXT: retl 432; X86-FASTLZCNT-NEXT: .LBB7_1: 433; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax 434; X86-FASTLZCNT-NEXT: xorl %edx, %edx 435; X86-FASTLZCNT-NEXT: retl 436 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false) 437 ret i64 %tmp1 438} 439 440define i8 @cttz_i8_knownbits(i8 %x) { 441; X86-LABEL: cttz_i8_knownbits: 442; X86: # %bb.0: 443; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 444; X86-NEXT: orb $2, %al 445; X86-NEXT: movzbl %al, %eax 446; X86-NEXT: rep bsfl %eax, %eax 447; X86-NEXT: # kill: def $al killed $al killed $eax 448; X86-NEXT: retl 449; 450; X64-LABEL: cttz_i8_knownbits: 451; X64: # %bb.0: 452; X64-NEXT: orb $2, %dil 453; X64-NEXT: movzbl %dil, %eax 454; X64-NEXT: rep bsfl %eax, %eax 455; X64-NEXT: # kill: def $al killed $al killed $eax 456; X64-NEXT: retq 457; 458; X86-CLZ-LABEL: cttz_i8_knownbits: 459; X86-CLZ: # %bb.0: 460; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax 461; X86-CLZ-NEXT: orb $2, %al 462; X86-CLZ-NEXT: movzbl %al, %eax 463; X86-CLZ-NEXT: tzcntl %eax, %eax 464; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax 465; X86-CLZ-NEXT: retl 466; 467; X64-CLZ-LABEL: cttz_i8_knownbits: 468; X64-CLZ: # %bb.0: 469; X64-CLZ-NEXT: orb $2, %dil 470; X64-CLZ-NEXT: movzbl %dil, %eax 471; X64-CLZ-NEXT: tzcntl %eax, %eax 472; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax 473; X64-CLZ-NEXT: retq 474; 475; X64-FASTLZCNT-LABEL: cttz_i8_knownbits: 476; X64-FASTLZCNT: # %bb.0: 477; X64-FASTLZCNT-NEXT: orb $2, %dil 478; X64-FASTLZCNT-NEXT: movzbl %dil, %eax 479; X64-FASTLZCNT-NEXT: tzcntl %eax, %eax 480; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 481; X64-FASTLZCNT-NEXT: retq 482; 483; X86-FASTLZCNT-LABEL: cttz_i8_knownbits: 484; X86-FASTLZCNT: # %bb.0: 485; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax 486; X86-FASTLZCNT-NEXT: orb $2, %al 487; X86-FASTLZCNT-NEXT: movzbl %al, %eax 488; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax 489; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax 490; X86-FASTLZCNT-NEXT: retl 491 %x2 = or i8 %x, 2 492 %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true ) 493 %tmp2 = and i8 %tmp, 1 494 ret i8 %tmp2 495} 496 497; Make sure we can detect that the input is non-zero and avoid cmov after BSF 498; This is relevant for 32-bit mode without tzcnt 499define i64 @cttz_i64_zero_test_knownneverzero(i64 %n) { 500; X86-NOCMOV-LABEL: cttz_i64_zero_test_knownneverzero: 501; X86-NOCMOV: # %bb.0: 502; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 503; X86-NOCMOV-NEXT: testl %eax, %eax 504; X86-NOCMOV-NEXT: jne .LBB9_1 505; X86-NOCMOV-NEXT: # %bb.2: 506; X86-NOCMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000 507; X86-NOCMOV-NEXT: orl {{[0-9]+}}(%esp), %eax 508; X86-NOCMOV-NEXT: rep bsfl %eax, %eax 509; X86-NOCMOV-NEXT: orl $32, %eax 510; X86-NOCMOV-NEXT: xorl %edx, %edx 511; X86-NOCMOV-NEXT: retl 512; X86-NOCMOV-NEXT: .LBB9_1: 513; X86-NOCMOV-NEXT: rep bsfl %eax, %eax 514; X86-NOCMOV-NEXT: xorl %edx, %edx 515; X86-NOCMOV-NEXT: retl 516; 517; X86-CMOV-LABEL: cttz_i64_zero_test_knownneverzero: 518; X86-CMOV: # %bb.0: 519; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx 520; X86-CMOV-NEXT: movl $-2147483648, %eax # imm = 0x80000000 521; X86-CMOV-NEXT: orl {{[0-9]+}}(%esp), %eax 522; X86-CMOV-NEXT: rep bsfl %ecx, %edx 523; X86-CMOV-NEXT: rep bsfl %eax, %eax 524; X86-CMOV-NEXT: orl $32, %eax 525; X86-CMOV-NEXT: testl %ecx, %ecx 526; X86-CMOV-NEXT: cmovnel %edx, %eax 527; X86-CMOV-NEXT: xorl %edx, %edx 528; X86-CMOV-NEXT: retl 529; 530; X64-LABEL: cttz_i64_zero_test_knownneverzero: 531; X64: # %bb.0: 532; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 533; X64-NEXT: orq %rdi, %rax 534; X64-NEXT: rep bsfq %rax, %rax 535; X64-NEXT: retq 536; 537; X86-CLZ-LABEL: cttz_i64_zero_test_knownneverzero: 538; X86-CLZ: # %bb.0: 539; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax 540; X86-CLZ-NEXT: testl %eax, %eax 541; X86-CLZ-NEXT: jne .LBB9_1 542; X86-CLZ-NEXT: # %bb.2: 543; X86-CLZ-NEXT: movl $-2147483648, %eax # imm = 0x80000000 544; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax 545; X86-CLZ-NEXT: tzcntl %eax, %eax 546; X86-CLZ-NEXT: orl $32, %eax 547; X86-CLZ-NEXT: xorl %edx, %edx 548; X86-CLZ-NEXT: retl 549; X86-CLZ-NEXT: .LBB9_1: 550; X86-CLZ-NEXT: tzcntl %eax, %eax 551; X86-CLZ-NEXT: xorl %edx, %edx 552; X86-CLZ-NEXT: retl 553; 554; X64-CLZ-LABEL: cttz_i64_zero_test_knownneverzero: 555; X64-CLZ: # %bb.0: 556; X64-CLZ-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 557; X64-CLZ-NEXT: orq %rdi, %rax 558; X64-CLZ-NEXT: tzcntq %rax, %rax 559; X64-CLZ-NEXT: retq 560; 561; X64-FASTLZCNT-LABEL: cttz_i64_zero_test_knownneverzero: 562; X64-FASTLZCNT: # %bb.0: 563; X64-FASTLZCNT-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 564; X64-FASTLZCNT-NEXT: orq %rdi, %rax 565; X64-FASTLZCNT-NEXT: tzcntq %rax, %rax 566; X64-FASTLZCNT-NEXT: retq 567; 568; X86-FASTLZCNT-LABEL: cttz_i64_zero_test_knownneverzero: 569; X86-FASTLZCNT: # %bb.0: 570; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax 571; X86-FASTLZCNT-NEXT: testl %eax, %eax 572; X86-FASTLZCNT-NEXT: jne .LBB9_1 573; X86-FASTLZCNT-NEXT: # %bb.2: 574; X86-FASTLZCNT-NEXT: movl $-2147483648, %eax # imm = 0x80000000 575; X86-FASTLZCNT-NEXT: orl {{[0-9]+}}(%esp), %eax 576; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax 577; X86-FASTLZCNT-NEXT: orl $32, %eax 578; X86-FASTLZCNT-NEXT: xorl %edx, %edx 579; X86-FASTLZCNT-NEXT: retl 580; X86-FASTLZCNT-NEXT: .LBB9_1: 581; X86-FASTLZCNT-NEXT: tzcntl %eax, %eax 582; X86-FASTLZCNT-NEXT: xorl %edx, %edx 583; X86-FASTLZCNT-NEXT: retl 584 %o = or i64 %n, -9223372036854775808 ; 0x8000000000000000 585 %tmp1 = call i64 @llvm.cttz.i64(i64 %o, i1 false) 586 ret i64 %tmp1 587} 588 589define i32 @cttz_i32_osize(i32 %x) optsize { 590; X86-LABEL: cttz_i32_osize: 591; X86: # %bb.0: 592; X86-NOT: rep 593; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax 594; X86-NEXT: retl 595; 596; X64-LABEL: cttz_i32_osize: 597; X64: # %bb.0: 598; X64-NOT: rep 599; X64-NEXT: bsfl %edi, %eax 600; X64-NEXT: retq 601; 602; X86-CLZ-LABEL: cttz_i32_osize: 603; X86-CLZ: # %bb.0: 604; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 605; X86-CLZ-NEXT: retl 606; 607; X64-CLZ-LABEL: cttz_i32_osize: 608; X64-CLZ: # %bb.0: 609; X64-CLZ-NEXT: tzcntl %edi, %eax 610; X64-CLZ-NEXT: retq 611; 612; X64-FASTLZCNT-LABEL: cttz_i32_osize: 613; X64-FASTLZCNT: # %bb.0: 614; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 615; X64-FASTLZCNT-NEXT: retq 616; 617; X86-FASTLZCNT-LABEL: cttz_i32_osize: 618; X86-FASTLZCNT: # %bb.0: 619; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 620; X86-FASTLZCNT-NEXT: retl 621 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true) 622 ret i32 %tmp 623} 624 625define i32 @cttz_i32_msize(i32 %x) minsize { 626; X86-LABEL: cttz_i32_msize: 627; X86: # %bb.0: 628; X86-NOT: rep 629; X86-NEXT: bsfl {{[0-9]+}}(%esp), %eax 630; X86-NEXT: retl 631; 632; X64-LABEL: cttz_i32_msize: 633; X64: # %bb.0: 634; X64-NOT: rep 635; X64-NEXT: bsfl %edi, %eax 636; X64-NEXT: retq 637; 638; X86-CLZ-LABEL: cttz_i32_msize: 639; X86-CLZ: # %bb.0: 640; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 641; X86-CLZ-NEXT: retl 642; 643; X64-CLZ-LABEL: cttz_i32_msize: 644; X64-CLZ: # %bb.0: 645; X64-CLZ-NEXT: tzcntl %edi, %eax 646; X64-CLZ-NEXT: retq 647; 648; X64-FASTLZCNT-LABEL: cttz_i32_msize: 649; X64-FASTLZCNT: # %bb.0: 650; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 651; X64-FASTLZCNT-NEXT: retq 652; 653; X86-FASTLZCNT-LABEL: cttz_i32_msize: 654; X86-FASTLZCNT: # %bb.0: 655; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 656; X86-FASTLZCNT-NEXT: retl 657 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true) 658 ret i32 %tmp 659} 660 661define i64 @cttz_i32_sext(i32 %x) { 662; X86-NOCMOV-LABEL: cttz_i32_sext: 663; X86-NOCMOV: # %bb.0: 664; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 665; X86-NOCMOV-NEXT: testl %eax, %eax 666; X86-NOCMOV-NEXT: je .LBB12_1 667; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 668; X86-NOCMOV-NEXT: rep bsfl %eax, %eax 669; X86-NOCMOV-NEXT: xorl %edx, %edx 670; X86-NOCMOV-NEXT: retl 671; X86-NOCMOV-NEXT: .LBB12_1: 672; X86-NOCMOV-NEXT: movl $32, %eax 673; X86-NOCMOV-NEXT: xorl %edx, %edx 674; X86-NOCMOV-NEXT: retl 675; 676; X86-CMOV-LABEL: cttz_i32_sext: 677; X86-CMOV: # %bb.0: 678; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx 679; X86-CMOV-NEXT: movl $32, %eax 680; X86-CMOV-NEXT: cmovnel %ecx, %eax 681; X86-CMOV-NEXT: xorl %edx, %edx 682; X86-CMOV-NEXT: retl 683; 684; X64-LABEL: cttz_i32_sext: 685; X64: # %bb.0: 686; X64-NEXT: movl $32, %eax 687; X64-NEXT: rep bsfl %edi, %eax 688; X64-NEXT: retq 689; 690; X86-CLZ-LABEL: cttz_i32_sext: 691; X86-CLZ: # %bb.0: 692; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 693; X86-CLZ-NEXT: xorl %edx, %edx 694; X86-CLZ-NEXT: retl 695; 696; X64-CLZ-LABEL: cttz_i32_sext: 697; X64-CLZ: # %bb.0: 698; X64-CLZ-NEXT: tzcntl %edi, %eax 699; X64-CLZ-NEXT: retq 700; 701; X64-FASTLZCNT-LABEL: cttz_i32_sext: 702; X64-FASTLZCNT: # %bb.0: 703; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 704; X64-FASTLZCNT-NEXT: retq 705; 706; X86-FASTLZCNT-LABEL: cttz_i32_sext: 707; X86-FASTLZCNT: # %bb.0: 708; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 709; X86-FASTLZCNT-NEXT: xorl %edx, %edx 710; X86-FASTLZCNT-NEXT: retl 711 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 false) 712 %ext = sext i32 %tmp to i64 713 ret i64 %ext 714} 715 716define i64 @cttz_i32_zext(i32 %x) { 717; X86-NOCMOV-LABEL: cttz_i32_zext: 718; X86-NOCMOV: # %bb.0: 719; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax 720; X86-NOCMOV-NEXT: testl %eax, %eax 721; X86-NOCMOV-NEXT: je .LBB13_1 722; X86-NOCMOV-NEXT: # %bb.2: # %cond.false 723; X86-NOCMOV-NEXT: rep bsfl %eax, %eax 724; X86-NOCMOV-NEXT: xorl %edx, %edx 725; X86-NOCMOV-NEXT: retl 726; X86-NOCMOV-NEXT: .LBB13_1: 727; X86-NOCMOV-NEXT: movl $32, %eax 728; X86-NOCMOV-NEXT: xorl %edx, %edx 729; X86-NOCMOV-NEXT: retl 730; 731; X86-CMOV-LABEL: cttz_i32_zext: 732; X86-CMOV: # %bb.0: 733; X86-CMOV-NEXT: bsfl {{[0-9]+}}(%esp), %ecx 734; X86-CMOV-NEXT: movl $32, %eax 735; X86-CMOV-NEXT: cmovnel %ecx, %eax 736; X86-CMOV-NEXT: xorl %edx, %edx 737; X86-CMOV-NEXT: retl 738; 739; X64-LABEL: cttz_i32_zext: 740; X64: # %bb.0: 741; X64-NEXT: movl $32, %eax 742; X64-NEXT: rep bsfl %edi, %eax 743; X64-NEXT: retq 744; 745; X86-CLZ-LABEL: cttz_i32_zext: 746; X86-CLZ: # %bb.0: 747; X86-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 748; X86-CLZ-NEXT: xorl %edx, %edx 749; X86-CLZ-NEXT: retl 750; 751; X64-CLZ-LABEL: cttz_i32_zext: 752; X64-CLZ: # %bb.0: 753; X64-CLZ-NEXT: tzcntl %edi, %eax 754; X64-CLZ-NEXT: retq 755; 756; X64-FASTLZCNT-LABEL: cttz_i32_zext: 757; X64-FASTLZCNT: # %bb.0: 758; X64-FASTLZCNT-NEXT: tzcntl %edi, %eax 759; X64-FASTLZCNT-NEXT: retq 760; 761; X86-FASTLZCNT-LABEL: cttz_i32_zext: 762; X86-FASTLZCNT: # %bb.0: 763; X86-FASTLZCNT-NEXT: tzcntl {{[0-9]+}}(%esp), %eax 764; X86-FASTLZCNT-NEXT: xorl %edx, %edx 765; X86-FASTLZCNT-NEXT: retl 766 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 false) 767 %ext = zext i32 %tmp to i64 768 ret i64 %ext 769} 770 771