1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64 4 5;; Use cttz to test if we properly prove never-zero. There is a very 6;; simple transform from cttz -> cttz_zero_undef if its operand is 7;; known never zero. 8declare i32 @llvm.cttz.i32(i32, i1) 9declare i32 @llvm.uadd.sat.i32(i32, i32) 10declare i32 @llvm.umax.i32(i32, i32) 11declare i32 @llvm.umin.i32(i32, i32) 12declare i32 @llvm.smin.i32(i32, i32) 13declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 14declare i32 @llvm.smax.i32(i32, i32) 15declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 16declare i32 @llvm.bswap.i32(i32) 17declare i32 @llvm.bitreverse.i32(i32) 18declare i32 @llvm.ctpop.i32(i32) 19declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 20declare i32 @llvm.abs.i32(i32, i1) 21declare i32 @llvm.fshl.i32(i32, i32, i32) 22declare i32 @llvm.fshr.i32(i32, i32, i32) 23 24define i32 @or_known_nonzero(i32 %x) { 25; X86-LABEL: or_known_nonzero: 26; X86: # %bb.0: 27; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 28; X86-NEXT: orl $1, %eax 29; X86-NEXT: rep bsfl %eax, %eax 30; X86-NEXT: retl 31; 32; X64-LABEL: or_known_nonzero: 33; X64: # %bb.0: 34; X64-NEXT: orl $1, %edi 35; X64-NEXT: rep bsfl %edi, %eax 36; X64-NEXT: retq 37 %z = or i32 %x, 1 38 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 39 ret i32 %r 40} 41 42define i32 @or_maybe_zero(i32 %x, i32 %y) { 43; X86-LABEL: or_maybe_zero: 44; X86: # %bb.0: 45; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 46; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 47; X86-NEXT: bsfl %eax, %ecx 48; X86-NEXT: movl $32, %eax 49; X86-NEXT: cmovnel %ecx, %eax 50; X86-NEXT: retl 51; 52; X64-LABEL: or_maybe_zero: 53; X64: # %bb.0: 54; X64-NEXT: orl %esi, %edi 55; X64-NEXT: movl $32, %eax 56; X64-NEXT: rep bsfl %edi, %eax 57; X64-NEXT: retq 58 %z = or i32 %x, %y 59 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 60 ret i32 %r 61} 62 63define i32 @select_known_nonzero(i1 %c, i32 %x) { 64; X86-LABEL: select_known_nonzero: 65; X86: # %bb.0: 66; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 67; X86-NEXT: orl $1, %eax 68; X86-NEXT: testb $1, {{[0-9]+}}(%esp) 69; X86-NEXT: movl $122, %ecx 70; X86-NEXT: cmovnel %eax, %ecx 71; X86-NEXT: rep bsfl %ecx, %eax 72; X86-NEXT: retl 73; 74; X64-LABEL: select_known_nonzero: 75; X64: # %bb.0: 76; X64-NEXT: orl $1, %esi 77; X64-NEXT: testb $1, %dil 78; X64-NEXT: movl $122, %eax 79; X64-NEXT: cmovnel %esi, %eax 80; X64-NEXT: rep bsfl %eax, %eax 81; X64-NEXT: retq 82 %y = or i32 %x, 1 83 %z = select i1 %c, i32 %y, i32 122 84 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 85 ret i32 %r 86} 87 88define i32 @select_maybe_zero(i1 %c, i32 %x) { 89; X86-LABEL: select_maybe_zero: 90; X86: # %bb.0: 91; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 92; X86-NEXT: orl $1, %eax 93; X86-NEXT: xorl %ecx, %ecx 94; X86-NEXT: testb $1, {{[0-9]+}}(%esp) 95; X86-NEXT: cmovnel %eax, %ecx 96; X86-NEXT: bsfl %ecx, %ecx 97; X86-NEXT: movl $32, %eax 98; X86-NEXT: cmovnel %ecx, %eax 99; X86-NEXT: retl 100; 101; X64-LABEL: select_maybe_zero: 102; X64: # %bb.0: 103; X64-NEXT: orl $1, %esi 104; X64-NEXT: xorl %ecx, %ecx 105; X64-NEXT: testb $1, %dil 106; X64-NEXT: cmovnel %esi, %ecx 107; X64-NEXT: movl $32, %eax 108; X64-NEXT: rep bsfl %ecx, %eax 109; X64-NEXT: retq 110 %y = or i32 %x, 1 111 %z = select i1 %c, i32 %y, i32 0 112 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 113 ret i32 %r 114} 115 116define i32 @shl_known_nonzero_1s_bit_set(i32 %x) { 117; X86-LABEL: shl_known_nonzero_1s_bit_set: 118; X86: # %bb.0: 119; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 120; X86-NEXT: movl $123, %eax 121; X86-NEXT: shll %cl, %eax 122; X86-NEXT: rep bsfl %eax, %eax 123; X86-NEXT: retl 124; 125; X64-LABEL: shl_known_nonzero_1s_bit_set: 126; X64: # %bb.0: 127; X64-NEXT: movl %edi, %ecx 128; X64-NEXT: movl $123, %eax 129; X64-NEXT: # kill: def $cl killed $cl killed $ecx 130; X64-NEXT: shll %cl, %eax 131; X64-NEXT: rep bsfl %eax, %eax 132; X64-NEXT: retq 133 %z = shl i32 123, %x 134 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 135 ret i32 %r 136} 137 138define i32 @shl_known_nonzero_nsw(i32 %x, i32 %yy) { 139; X86-LABEL: shl_known_nonzero_nsw: 140; X86: # %bb.0: 141; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 142; X86-NEXT: movl $256, %eax # imm = 0x100 143; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 144; X86-NEXT: shll %cl, %eax 145; X86-NEXT: rep bsfl %eax, %eax 146; X86-NEXT: retl 147; 148; X64-LABEL: shl_known_nonzero_nsw: 149; X64: # %bb.0: 150; X64-NEXT: movl %edi, %ecx 151; X64-NEXT: orl $256, %esi # imm = 0x100 152; X64-NEXT: # kill: def $cl killed $cl killed $ecx 153; X64-NEXT: shll %cl, %esi 154; X64-NEXT: rep bsfl %esi, %eax 155; X64-NEXT: retq 156 %y = or i32 %yy, 256 157 %z = shl nsw i32 %y, %x 158 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 159 ret i32 %r 160} 161 162define i32 @shl_known_nonzero_nuw(i32 %x, i32 %yy) { 163; X86-LABEL: shl_known_nonzero_nuw: 164; X86: # %bb.0: 165; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 166; X86-NEXT: movl $256, %eax # imm = 0x100 167; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 168; X86-NEXT: shll %cl, %eax 169; X86-NEXT: rep bsfl %eax, %eax 170; X86-NEXT: retl 171; 172; X64-LABEL: shl_known_nonzero_nuw: 173; X64: # %bb.0: 174; X64-NEXT: movl %edi, %ecx 175; X64-NEXT: orl $256, %esi # imm = 0x100 176; X64-NEXT: # kill: def $cl killed $cl killed $ecx 177; X64-NEXT: shll %cl, %esi 178; X64-NEXT: rep bsfl %esi, %eax 179; X64-NEXT: retq 180 %y = or i32 %yy, 256 181 %z = shl nuw i32 %y, %x 182 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 183 ret i32 %r 184} 185 186define i32 @shl_maybe_zero(i32 %x, i32 %y) { 187; X86-LABEL: shl_maybe_zero: 188; X86: # %bb.0: 189; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 190; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 191; X86-NEXT: shll %cl, %eax 192; X86-NEXT: bsfl %eax, %ecx 193; X86-NEXT: movl $32, %eax 194; X86-NEXT: cmovnel %ecx, %eax 195; X86-NEXT: retl 196; 197; X64-LABEL: shl_maybe_zero: 198; X64: # %bb.0: 199; X64-NEXT: movl %edi, %ecx 200; X64-NEXT: # kill: def $cl killed $cl killed $ecx 201; X64-NEXT: shll %cl, %esi 202; X64-NEXT: movl $32, %eax 203; X64-NEXT: rep bsfl %esi, %eax 204; X64-NEXT: retq 205 %z = shl nuw nsw i32 %y, %x 206 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 207 ret i32 %r 208} 209 210define i32 @uaddsat_known_nonzero(i32 %x) { 211; X86-LABEL: uaddsat_known_nonzero: 212; X86: # %bb.0: 213; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 214; X86-NEXT: incl %eax 215; X86-NEXT: movl $-1, %ecx 216; X86-NEXT: cmovnel %eax, %ecx 217; X86-NEXT: rep bsfl %ecx, %eax 218; X86-NEXT: retl 219; 220; X64-LABEL: uaddsat_known_nonzero: 221; X64: # %bb.0: 222; X64-NEXT: incl %edi 223; X64-NEXT: movl $-1, %eax 224; X64-NEXT: cmovnel %edi, %eax 225; X64-NEXT: rep bsfl %eax, %eax 226; X64-NEXT: retq 227 %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 1) 228 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 229 ret i32 %r 230} 231 232define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) { 233; X86-LABEL: uaddsat_maybe_zero: 234; X86: # %bb.0: 235; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 236; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 237; X86-NEXT: movl $-1, %ecx 238; X86-NEXT: cmovael %eax, %ecx 239; X86-NEXT: bsfl %ecx, %ecx 240; X86-NEXT: movl $32, %eax 241; X86-NEXT: cmovnel %ecx, %eax 242; X86-NEXT: retl 243; 244; X64-LABEL: uaddsat_maybe_zero: 245; X64: # %bb.0: 246; X64-NEXT: addl %esi, %edi 247; X64-NEXT: movl $-1, %ecx 248; X64-NEXT: cmovael %edi, %ecx 249; X64-NEXT: movl $32, %eax 250; X64-NEXT: rep bsfl %ecx, %eax 251; X64-NEXT: retq 252 %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y) 253 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 254 ret i32 %r 255} 256 257define i32 @umax_known_nonzero(i32 %x, i32 %y) { 258; X86-LABEL: umax_known_nonzero: 259; X86: # %bb.0: 260; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 261; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 262; X86-NEXT: movl $4, %edx 263; X86-NEXT: shll %cl, %edx 264; X86-NEXT: cmpl %edx, %eax 265; X86-NEXT: cmoval %eax, %edx 266; X86-NEXT: rep bsfl %edx, %eax 267; X86-NEXT: retl 268; 269; X64-LABEL: umax_known_nonzero: 270; X64: # %bb.0: 271; X64-NEXT: movl %esi, %ecx 272; X64-NEXT: movl $4, %eax 273; X64-NEXT: # kill: def $cl killed $cl killed $ecx 274; X64-NEXT: shll %cl, %eax 275; X64-NEXT: cmpl %eax, %edi 276; X64-NEXT: cmoval %edi, %eax 277; X64-NEXT: rep bsfl %eax, %eax 278; X64-NEXT: retq 279 %yy = shl nuw i32 4, %y 280 %z = call i32 @llvm.umax.i32(i32 %x, i32 %yy) 281 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 282 ret i32 %r 283} 284 285define i32 @umax_maybe_zero(i32 %x, i32 %y) { 286; X86-LABEL: umax_maybe_zero: 287; X86: # %bb.0: 288; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 289; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 290; X86-NEXT: cmpl %eax, %ecx 291; X86-NEXT: cmoval %ecx, %eax 292; X86-NEXT: bsfl %eax, %ecx 293; X86-NEXT: movl $32, %eax 294; X86-NEXT: cmovnel %ecx, %eax 295; X86-NEXT: retl 296; 297; X64-LABEL: umax_maybe_zero: 298; X64: # %bb.0: 299; X64-NEXT: cmpl %esi, %edi 300; X64-NEXT: cmoval %edi, %esi 301; X64-NEXT: movl $32, %eax 302; X64-NEXT: rep bsfl %esi, %eax 303; X64-NEXT: retq 304 %z = call i32 @llvm.umax.i32(i32 %x, i32 %y) 305 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 306 ret i32 %r 307} 308 309define i32 @umin_known_nonzero(i32 %xx, i32 %yy) { 310; X86-LABEL: umin_known_nonzero: 311; X86: # %bb.0: 312; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 313; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 314; X86-NEXT: movl $4, %edx 315; X86-NEXT: shll %cl, %edx 316; X86-NEXT: addl $4, %eax 317; X86-NEXT: cmpl %eax, %edx 318; X86-NEXT: cmovbl %edx, %eax 319; X86-NEXT: rep bsfl %eax, %eax 320; X86-NEXT: retl 321; 322; X64-LABEL: umin_known_nonzero: 323; X64: # %bb.0: 324; X64-NEXT: movl %edi, %ecx 325; X64-NEXT: movl $4, %eax 326; X64-NEXT: # kill: def $cl killed $cl killed $ecx 327; X64-NEXT: shll %cl, %eax 328; X64-NEXT: addl $4, %esi 329; X64-NEXT: cmpl %esi, %eax 330; X64-NEXT: cmovbl %eax, %esi 331; X64-NEXT: rep bsfl %esi, %eax 332; X64-NEXT: retq 333 %x = shl nuw i32 4, %xx 334 %y = add nuw nsw i32 %yy, 4 335 %z = call i32 @llvm.umin.i32(i32 %x, i32 %y) 336 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 337 ret i32 %r 338} 339 340define i32 @umin_maybe_zero(i32 %x, i32 %y) { 341; X86-LABEL: umin_maybe_zero: 342; X86: # %bb.0: 343; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 344; X86-NEXT: cmpl $54, %eax 345; X86-NEXT: movl $54, %ecx 346; X86-NEXT: cmovbl %eax, %ecx 347; X86-NEXT: bsfl %ecx, %ecx 348; X86-NEXT: movl $32, %eax 349; X86-NEXT: cmovnel %ecx, %eax 350; X86-NEXT: retl 351; 352; X64-LABEL: umin_maybe_zero: 353; X64: # %bb.0: 354; X64-NEXT: cmpl $54, %edi 355; X64-NEXT: movl $54, %ecx 356; X64-NEXT: cmovbl %edi, %ecx 357; X64-NEXT: movl $32, %eax 358; X64-NEXT: rep bsfl %ecx, %eax 359; X64-NEXT: retq 360 %z = call i32 @llvm.umin.i32(i32 %x, i32 54) 361 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 362 ret i32 %r 363} 364 365define i32 @smin_known_nonzero(i32 %xx, i32 %yy) { 366; X86-LABEL: smin_known_nonzero: 367; X86: # %bb.0: 368; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 369; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 370; X86-NEXT: movl $4, %edx 371; X86-NEXT: shll %cl, %edx 372; X86-NEXT: addl $4, %eax 373; X86-NEXT: cmpl %eax, %edx 374; X86-NEXT: cmovll %edx, %eax 375; X86-NEXT: rep bsfl %eax, %eax 376; X86-NEXT: retl 377; 378; X64-LABEL: smin_known_nonzero: 379; X64: # %bb.0: 380; X64-NEXT: movl %edi, %ecx 381; X64-NEXT: movl $4, %eax 382; X64-NEXT: # kill: def $cl killed $cl killed $ecx 383; X64-NEXT: shll %cl, %eax 384; X64-NEXT: addl $4, %esi 385; X64-NEXT: cmpl %esi, %eax 386; X64-NEXT: cmovll %eax, %esi 387; X64-NEXT: rep bsfl %esi, %eax 388; X64-NEXT: retq 389 %x = shl nuw i32 4, %xx 390 %y = add nuw nsw i32 %yy, 4 391 %z = call i32 @llvm.smin.i32(i32 %x, i32 %y) 392 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 393 ret i32 %r 394} 395 396define i32 @smin_known_zero(i32 %x, i32 %y) { 397; X86-LABEL: smin_known_zero: 398; X86: # %bb.0: 399; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 400; X86-NEXT: cmpl $-54, %eax 401; X86-NEXT: movl $-54, %ecx 402; X86-NEXT: cmovll %eax, %ecx 403; X86-NEXT: rep bsfl %ecx, %eax 404; X86-NEXT: retl 405; 406; X64-LABEL: smin_known_zero: 407; X64: # %bb.0: 408; X64-NEXT: cmpl $-54, %edi 409; X64-NEXT: movl $-54, %eax 410; X64-NEXT: cmovll %edi, %eax 411; X64-NEXT: rep bsfl %eax, %eax 412; X64-NEXT: retq 413 %z = call i32 @llvm.smin.i32(i32 %x, i32 -54) 414 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 415 ret i32 %r 416} 417 418define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) { 419; X86-LABEL: smin_known_zero_vec: 420; X86: # %bb.0: 421; X86-NEXT: movdqa {{.*#+}} xmm1 = [4294967242,4294967273,4294967284,4294967295] 422; X86-NEXT: movdqa %xmm1, %xmm2 423; X86-NEXT: pcmpgtd %xmm0, %xmm2 424; X86-NEXT: pand %xmm2, %xmm0 425; X86-NEXT: pandn %xmm1, %xmm2 426; X86-NEXT: por %xmm2, %xmm0 427; X86-NEXT: pcmpeqd %xmm1, %xmm1 428; X86-NEXT: paddd %xmm0, %xmm1 429; X86-NEXT: pand %xmm1, %xmm0 430; X86-NEXT: pxor %xmm1, %xmm1 431; X86-NEXT: pcmpeqd %xmm1, %xmm0 432; X86-NEXT: psrld $31, %xmm0 433; X86-NEXT: retl 434; 435; X64-LABEL: smin_known_zero_vec: 436; X64: # %bb.0: 437; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 438; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 439; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1 440; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 441; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 442; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 443; X64-NEXT: vpsrld $31, %xmm0, %xmm0 444; X64-NEXT: retq 445 %z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 -54, i32 -23, i32 -12, i32 -1>) 446 %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z) 447 %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1> 448 %ret = zext <4 x i1> %3 to <4 x i32> 449 ret <4 x i32> %ret 450} 451 452define i32 @smin_maybe_zero(i32 %x, i32 %y) { 453; X86-LABEL: smin_maybe_zero: 454; X86: # %bb.0: 455; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 456; X86-NEXT: cmpl $54, %eax 457; X86-NEXT: movl $54, %ecx 458; X86-NEXT: cmovll %eax, %ecx 459; X86-NEXT: bsfl %ecx, %ecx 460; X86-NEXT: movl $32, %eax 461; X86-NEXT: cmovnel %ecx, %eax 462; X86-NEXT: retl 463; 464; X64-LABEL: smin_maybe_zero: 465; X64: # %bb.0: 466; X64-NEXT: cmpl $54, %edi 467; X64-NEXT: movl $54, %ecx 468; X64-NEXT: cmovll %edi, %ecx 469; X64-NEXT: movl $32, %eax 470; X64-NEXT: rep bsfl %ecx, %eax 471; X64-NEXT: retq 472 %z = call i32 @llvm.smin.i32(i32 %x, i32 54) 473 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 474 ret i32 %r 475} 476 477define i32 @smax_known_nonzero(i32 %xx, i32 %yy) { 478; X86-LABEL: smax_known_nonzero: 479; X86: # %bb.0: 480; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 481; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 482; X86-NEXT: movl $4, %edx 483; X86-NEXT: shll %cl, %edx 484; X86-NEXT: addl $4, %eax 485; X86-NEXT: cmpl %eax, %edx 486; X86-NEXT: cmovgl %edx, %eax 487; X86-NEXT: rep bsfl %eax, %eax 488; X86-NEXT: retl 489; 490; X64-LABEL: smax_known_nonzero: 491; X64: # %bb.0: 492; X64-NEXT: movl %edi, %ecx 493; X64-NEXT: movl $4, %eax 494; X64-NEXT: # kill: def $cl killed $cl killed $ecx 495; X64-NEXT: shll %cl, %eax 496; X64-NEXT: addl $4, %esi 497; X64-NEXT: cmpl %esi, %eax 498; X64-NEXT: cmovgl %eax, %esi 499; X64-NEXT: rep bsfl %esi, %eax 500; X64-NEXT: retq 501 %x = shl nuw i32 4, %xx 502 %y = add nuw nsw i32 %yy, 4 503 %z = call i32 @llvm.smax.i32(i32 %x, i32 %y) 504 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 505 ret i32 %r 506} 507 508define i32 @smax_maybe_zero(i32 %x, i32 %y) { 509; X86-LABEL: smax_maybe_zero: 510; X86: # %bb.0: 511; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 512; X86-NEXT: cmpl $55, %eax 513; X86-NEXT: movl $54, %ecx 514; X86-NEXT: cmovgel %eax, %ecx 515; X86-NEXT: rep bsfl %ecx, %eax 516; X86-NEXT: retl 517; 518; X64-LABEL: smax_maybe_zero: 519; X64: # %bb.0: 520; X64-NEXT: cmpl $55, %edi 521; X64-NEXT: movl $54, %eax 522; X64-NEXT: cmovgel %edi, %eax 523; X64-NEXT: rep bsfl %eax, %eax 524; X64-NEXT: retq 525 %z = call i32 @llvm.smax.i32(i32 %x, i32 54) 526 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 527 ret i32 %r 528} 529 530define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) { 531; X86-LABEL: smax_known_zero_vec: 532; X86: # %bb.0: 533; X86-NEXT: movdqa {{.*#+}} xmm1 = [54,23,12,1] 534; X86-NEXT: movdqa %xmm0, %xmm2 535; X86-NEXT: pcmpgtd %xmm1, %xmm2 536; X86-NEXT: pand %xmm2, %xmm0 537; X86-NEXT: pandn %xmm1, %xmm2 538; X86-NEXT: por %xmm2, %xmm0 539; X86-NEXT: pcmpeqd %xmm1, %xmm1 540; X86-NEXT: paddd %xmm0, %xmm1 541; X86-NEXT: pand %xmm1, %xmm0 542; X86-NEXT: pxor %xmm1, %xmm1 543; X86-NEXT: pcmpeqd %xmm1, %xmm0 544; X86-NEXT: psrld $31, %xmm0 545; X86-NEXT: retl 546; 547; X64-LABEL: smax_known_zero_vec: 548; X64: # %bb.0: 549; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 550; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 551; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1 552; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 553; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 554; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 555; X64-NEXT: vpsrld $31, %xmm0, %xmm0 556; X64-NEXT: retq 557 %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>) 558 %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z) 559 %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1> 560 %ret = zext <4 x i1> %3 to <4 x i32> 561 ret <4 x i32> %ret 562} 563 564define i32 @smax_known_zero(i32 %x, i32 %y) { 565; X86-LABEL: smax_known_zero: 566; X86: # %bb.0: 567; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 568; X86-NEXT: testl %eax, %eax 569; X86-NEXT: movl $-1, %ecx 570; X86-NEXT: cmovnsl %eax, %ecx 571; X86-NEXT: bsfl %ecx, %ecx 572; X86-NEXT: movl $32, %eax 573; X86-NEXT: cmovnel %ecx, %eax 574; X86-NEXT: retl 575; 576; X64-LABEL: smax_known_zero: 577; X64: # %bb.0: 578; X64-NEXT: testl %edi, %edi 579; X64-NEXT: movl $-1, %ecx 580; X64-NEXT: cmovnsl %edi, %ecx 581; X64-NEXT: movl $32, %eax 582; X64-NEXT: rep bsfl %ecx, %eax 583; X64-NEXT: retq 584 %z = call i32 @llvm.smax.i32(i32 %x, i32 -1) 585 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 586 ret i32 %r 587} 588 589define i32 @rotr_known_nonzero(i32 %xx, i32 %y) { 590; X86-LABEL: rotr_known_nonzero: 591; X86: # %bb.0: 592; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 593; X86-NEXT: movl $256, %eax # imm = 0x100 594; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 595; X86-NEXT: rorl %cl, %eax 596; X86-NEXT: rep bsfl %eax, %eax 597; X86-NEXT: retl 598; 599; X64-LABEL: rotr_known_nonzero: 600; X64: # %bb.0: 601; X64-NEXT: movl %esi, %ecx 602; X64-NEXT: orl $256, %edi # imm = 0x100 603; X64-NEXT: # kill: def $cl killed $cl killed $ecx 604; X64-NEXT: rorl %cl, %edi 605; X64-NEXT: rep bsfl %edi, %eax 606; X64-NEXT: retq 607 %x = or i32 %xx, 256 608 %shr = lshr i32 %x, %y 609 %sub = sub i32 32, %y 610 %shl = shl i32 %x, %sub 611 %z = or i32 %shl, %shr 612 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 613 ret i32 %r 614} 615 616define i32 @rotr_maybe_zero(i32 %x, i32 %y) { 617; X86-LABEL: rotr_maybe_zero: 618; X86: # %bb.0: 619; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 620; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 621; X86-NEXT: rorl %cl, %eax 622; X86-NEXT: bsfl %eax, %ecx 623; X86-NEXT: movl $32, %eax 624; X86-NEXT: cmovnel %ecx, %eax 625; X86-NEXT: retl 626; 627; X64-LABEL: rotr_maybe_zero: 628; X64: # %bb.0: 629; X64-NEXT: movl %esi, %ecx 630; X64-NEXT: # kill: def $cl killed $cl killed $ecx 631; X64-NEXT: rorl %cl, %edi 632; X64-NEXT: movl $32, %eax 633; X64-NEXT: rep bsfl %edi, %eax 634; X64-NEXT: retq 635 %shr = lshr i32 %x, %y 636 %sub = sub i32 32, %y 637 %shl = shl i32 %x, %sub 638 %z = or i32 %shl, %shr 639 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 640 ret i32 %r 641} 642 643define i32 @rotr_with_fshr_known_nonzero(i32 %xx, i32 %y) { 644; X86-LABEL: rotr_with_fshr_known_nonzero: 645; X86: # %bb.0: 646; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 647; X86-NEXT: movl $256, %eax # imm = 0x100 648; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 649; X86-NEXT: rorl %cl, %eax 650; X86-NEXT: rep bsfl %eax, %eax 651; X86-NEXT: retl 652; 653; X64-LABEL: rotr_with_fshr_known_nonzero: 654; X64: # %bb.0: 655; X64-NEXT: movl %esi, %ecx 656; X64-NEXT: orl $256, %edi # imm = 0x100 657; X64-NEXT: # kill: def $cl killed $cl killed $ecx 658; X64-NEXT: rorl %cl, %edi 659; X64-NEXT: rep bsfl %edi, %eax 660; X64-NEXT: retq 661 %x = or i32 %xx, 256 662 %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y) 663 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 664 ret i32 %r 665} 666 667define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) { 668; X86-LABEL: rotr_with_fshr_maybe_zero: 669; X86: # %bb.0: 670; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 671; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 672; X86-NEXT: rorl %cl, %eax 673; X86-NEXT: bsfl %eax, %ecx 674; X86-NEXT: movl $32, %eax 675; X86-NEXT: cmovnel %ecx, %eax 676; X86-NEXT: retl 677; 678; X64-LABEL: rotr_with_fshr_maybe_zero: 679; X64: # %bb.0: 680; X64-NEXT: movl %esi, %ecx 681; X64-NEXT: # kill: def $cl killed $cl killed $ecx 682; X64-NEXT: rorl %cl, %edi 683; X64-NEXT: movl $32, %eax 684; X64-NEXT: rep bsfl %edi, %eax 685; X64-NEXT: retq 686 %z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y) 687 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 688 ret i32 %r 689} 690 691define i32 @rotl_known_nonzero(i32 %xx, i32 %y) { 692; X86-LABEL: rotl_known_nonzero: 693; X86: # %bb.0: 694; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 695; X86-NEXT: movl $256, %eax # imm = 0x100 696; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 697; X86-NEXT: roll %cl, %eax 698; X86-NEXT: rep bsfl %eax, %eax 699; X86-NEXT: retl 700; 701; X64-LABEL: rotl_known_nonzero: 702; X64: # %bb.0: 703; X64-NEXT: movl %esi, %ecx 704; X64-NEXT: orl $256, %edi # imm = 0x100 705; X64-NEXT: # kill: def $cl killed $cl killed $ecx 706; X64-NEXT: roll %cl, %edi 707; X64-NEXT: rep bsfl %edi, %eax 708; X64-NEXT: retq 709 %x = or i32 %xx, 256 710 %shl = shl i32 %x, %y 711 %sub = sub i32 32, %y 712 %shr = lshr i32 %x, %sub 713 %z = or i32 %shr, %shl 714 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 715 ret i32 %r 716} 717 718define i32 @rotl_maybe_zero(i32 %x, i32 %y) { 719; X86-LABEL: rotl_maybe_zero: 720; X86: # %bb.0: 721; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 722; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 723; X86-NEXT: roll %cl, %eax 724; X86-NEXT: bsfl %eax, %ecx 725; X86-NEXT: movl $32, %eax 726; X86-NEXT: cmovnel %ecx, %eax 727; X86-NEXT: retl 728; 729; X64-LABEL: rotl_maybe_zero: 730; X64: # %bb.0: 731; X64-NEXT: movl %esi, %ecx 732; X64-NEXT: # kill: def $cl killed $cl killed $ecx 733; X64-NEXT: roll %cl, %edi 734; X64-NEXT: movl $32, %eax 735; X64-NEXT: rep bsfl %edi, %eax 736; X64-NEXT: retq 737 %shl = shl i32 %x, %y 738 %sub = sub i32 32, %y 739 %shr = lshr i32 %x, %sub 740 %z = or i32 %shr, %shl 741 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 742 ret i32 %r 743} 744 745define i32 @rotl_with_fshl_known_nonzero(i32 %xx, i32 %y) { 746; X86-LABEL: rotl_with_fshl_known_nonzero: 747; X86: # %bb.0: 748; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 749; X86-NEXT: movl $256, %eax # imm = 0x100 750; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 751; X86-NEXT: roll %cl, %eax 752; X86-NEXT: rep bsfl %eax, %eax 753; X86-NEXT: retl 754; 755; X64-LABEL: rotl_with_fshl_known_nonzero: 756; X64: # %bb.0: 757; X64-NEXT: movl %esi, %ecx 758; X64-NEXT: orl $256, %edi # imm = 0x100 759; X64-NEXT: # kill: def $cl killed $cl killed $ecx 760; X64-NEXT: roll %cl, %edi 761; X64-NEXT: rep bsfl %edi, %eax 762; X64-NEXT: retq 763 %x = or i32 %xx, 256 764 %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y) 765 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 766 ret i32 %r 767} 768 769define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) { 770; X86-LABEL: rotl_with_fshl_maybe_zero: 771; X86: # %bb.0: 772; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 773; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 774; X86-NEXT: roll %cl, %eax 775; X86-NEXT: bsfl %eax, %ecx 776; X86-NEXT: movl $32, %eax 777; X86-NEXT: cmovnel %ecx, %eax 778; X86-NEXT: retl 779; 780; X64-LABEL: rotl_with_fshl_maybe_zero: 781; X64: # %bb.0: 782; X64-NEXT: movl %esi, %ecx 783; X64-NEXT: # kill: def $cl killed $cl killed $ecx 784; X64-NEXT: roll %cl, %edi 785; X64-NEXT: movl $32, %eax 786; X64-NEXT: rep bsfl %edi, %eax 787; X64-NEXT: retq 788 %z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y) 789 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 790 ret i32 %r 791} 792 793define i32 @sra_known_nonzero_sign_bit_set(i32 %x) { 794; X86-LABEL: sra_known_nonzero_sign_bit_set: 795; X86: # %bb.0: 796; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 797; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B 798; X86-NEXT: sarl %cl, %eax 799; X86-NEXT: rep bsfl %eax, %eax 800; X86-NEXT: retl 801; 802; X64-LABEL: sra_known_nonzero_sign_bit_set: 803; X64: # %bb.0: 804; X64-NEXT: movl %edi, %ecx 805; X64-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B 806; X64-NEXT: # kill: def $cl killed $cl killed $ecx 807; X64-NEXT: sarl %cl, %eax 808; X64-NEXT: rep bsfl %eax, %eax 809; X64-NEXT: retq 810 %z = ashr i32 2147606891, %x 811 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 812 ret i32 %r 813} 814 815define i32 @sra_known_nonzero_exact(i32 %x, i32 %yy) { 816; X86-LABEL: sra_known_nonzero_exact: 817; X86: # %bb.0: 818; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 819; X86-NEXT: movl $256, %eax # imm = 0x100 820; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 821; X86-NEXT: sarl %cl, %eax 822; X86-NEXT: rep bsfl %eax, %eax 823; X86-NEXT: retl 824; 825; X64-LABEL: sra_known_nonzero_exact: 826; X64: # %bb.0: 827; X64-NEXT: movl %edi, %ecx 828; X64-NEXT: orl $256, %esi # imm = 0x100 829; X64-NEXT: # kill: def $cl killed $cl killed $ecx 830; X64-NEXT: sarl %cl, %esi 831; X64-NEXT: rep bsfl %esi, %eax 832; X64-NEXT: retq 833 %y = or i32 %yy, 256 834 %z = ashr exact i32 %y, %x 835 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 836 ret i32 %r 837} 838 839define i32 @sra_maybe_zero(i32 %x, i32 %y) { 840; X86-LABEL: sra_maybe_zero: 841; X86: # %bb.0: 842; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 843; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 844; X86-NEXT: sarl %cl, %eax 845; X86-NEXT: bsfl %eax, %ecx 846; X86-NEXT: movl $32, %eax 847; X86-NEXT: cmovnel %ecx, %eax 848; X86-NEXT: retl 849; 850; X64-LABEL: sra_maybe_zero: 851; X64: # %bb.0: 852; X64-NEXT: movl %edi, %ecx 853; X64-NEXT: # kill: def $cl killed $cl killed $ecx 854; X64-NEXT: sarl %cl, %esi 855; X64-NEXT: movl $32, %eax 856; X64-NEXT: rep bsfl %esi, %eax 857; X64-NEXT: retq 858 %z = ashr exact i32 %y, %x 859 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 860 ret i32 %r 861} 862 863define i32 @srl_known_nonzero_sign_bit_set(i32 %x) { 864; X86-LABEL: srl_known_nonzero_sign_bit_set: 865; X86: # %bb.0: 866; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 867; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B 868; X86-NEXT: shrl %cl, %eax 869; X86-NEXT: rep bsfl %eax, %eax 870; X86-NEXT: retl 871; 872; X64-LABEL: srl_known_nonzero_sign_bit_set: 873; X64: # %bb.0: 874; X64-NEXT: movl %edi, %ecx 875; X64-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B 876; X64-NEXT: # kill: def $cl killed $cl killed $ecx 877; X64-NEXT: shrl %cl, %eax 878; X64-NEXT: rep bsfl %eax, %eax 879; X64-NEXT: retq 880 %z = lshr i32 2147606891, %x 881 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 882 ret i32 %r 883} 884 885define i32 @srl_known_nonzero_exact(i32 %x, i32 %yy) { 886; X86-LABEL: srl_known_nonzero_exact: 887; X86: # %bb.0: 888; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 889; X86-NEXT: movl $256, %eax # imm = 0x100 890; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 891; X86-NEXT: shrl %cl, %eax 892; X86-NEXT: rep bsfl %eax, %eax 893; X86-NEXT: retl 894; 895; X64-LABEL: srl_known_nonzero_exact: 896; X64: # %bb.0: 897; X64-NEXT: movl %edi, %ecx 898; X64-NEXT: orl $256, %esi # imm = 0x100 899; X64-NEXT: # kill: def $cl killed $cl killed $ecx 900; X64-NEXT: shrl %cl, %esi 901; X64-NEXT: rep bsfl %esi, %eax 902; X64-NEXT: retq 903 %y = or i32 %yy, 256 904 %z = lshr exact i32 %y, %x 905 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 906 ret i32 %r 907} 908 909define i32 @srl_maybe_zero(i32 %x, i32 %y) { 910; X86-LABEL: srl_maybe_zero: 911; X86: # %bb.0: 912; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 913; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 914; X86-NEXT: shrl %cl, %eax 915; X86-NEXT: bsfl %eax, %ecx 916; X86-NEXT: movl $32, %eax 917; X86-NEXT: cmovnel %ecx, %eax 918; X86-NEXT: retl 919; 920; X64-LABEL: srl_maybe_zero: 921; X64: # %bb.0: 922; X64-NEXT: movl %edi, %ecx 923; X64-NEXT: # kill: def $cl killed $cl killed $ecx 924; X64-NEXT: shrl %cl, %esi 925; X64-NEXT: movl $32, %eax 926; X64-NEXT: rep bsfl %esi, %eax 927; X64-NEXT: retq 928 %z = lshr exact i32 %y, %x 929 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 930 ret i32 %r 931} 932 933define i32 @udiv_known_nonzero(i32 %xx, i32 %y) { 934; X86-LABEL: udiv_known_nonzero: 935; X86: # %bb.0: 936; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 937; X86-NEXT: orl $64, %eax 938; X86-NEXT: xorl %edx, %edx 939; X86-NEXT: divl {{[0-9]+}}(%esp) 940; X86-NEXT: rep bsfl %eax, %eax 941; X86-NEXT: retl 942; 943; X64-LABEL: udiv_known_nonzero: 944; X64: # %bb.0: 945; X64-NEXT: movl %edi, %eax 946; X64-NEXT: orl $64, %eax 947; X64-NEXT: xorl %edx, %edx 948; X64-NEXT: divl %esi 949; X64-NEXT: rep bsfl %eax, %eax 950; X64-NEXT: retq 951 %x = or i32 %xx, 64 952 %z = udiv exact i32 %x, %y 953 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 954 ret i32 %r 955} 956 957define i32 @udiv_maybe_zero(i32 %x, i32 %y) { 958; X86-LABEL: udiv_maybe_zero: 959; X86: # %bb.0: 960; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 961; X86-NEXT: xorl %edx, %edx 962; X86-NEXT: divl {{[0-9]+}}(%esp) 963; X86-NEXT: bsfl %eax, %ecx 964; X86-NEXT: movl $32, %eax 965; X86-NEXT: cmovnel %ecx, %eax 966; X86-NEXT: retl 967; 968; X64-LABEL: udiv_maybe_zero: 969; X64: # %bb.0: 970; X64-NEXT: movl %edi, %eax 971; X64-NEXT: xorl %edx, %edx 972; X64-NEXT: divl %esi 973; X64-NEXT: movl $32, %ecx 974; X64-NEXT: rep bsfl %eax, %ecx 975; X64-NEXT: movl %ecx, %eax 976; X64-NEXT: retq 977 %z = udiv exact i32 %x, %y 978 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 979 ret i32 %r 980} 981 982define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) { 983; X86-LABEL: sdiv_known_nonzero: 984; X86: # %bb.0: 985; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 986; X86-NEXT: orl $64, %eax 987; X86-NEXT: cltd 988; X86-NEXT: idivl {{[0-9]+}}(%esp) 989; X86-NEXT: rep bsfl %eax, %eax 990; X86-NEXT: retl 991; 992; X64-LABEL: sdiv_known_nonzero: 993; X64: # %bb.0: 994; X64-NEXT: movl %edi, %eax 995; X64-NEXT: orl $64, %eax 996; X64-NEXT: cltd 997; X64-NEXT: idivl %esi 998; X64-NEXT: rep bsfl %eax, %eax 999; X64-NEXT: retq 1000 %x = or i32 %xx, 64 1001 %z = sdiv exact i32 %x, %y 1002 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1003 ret i32 %r 1004} 1005 1006define i32 @sdiv_maybe_zero(i32 %x, i32 %y) { 1007; X86-LABEL: sdiv_maybe_zero: 1008; X86: # %bb.0: 1009; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1010; X86-NEXT: cltd 1011; X86-NEXT: idivl {{[0-9]+}}(%esp) 1012; X86-NEXT: bsfl %eax, %ecx 1013; X86-NEXT: movl $32, %eax 1014; X86-NEXT: cmovnel %ecx, %eax 1015; X86-NEXT: retl 1016; 1017; X64-LABEL: sdiv_maybe_zero: 1018; X64: # %bb.0: 1019; X64-NEXT: movl %edi, %eax 1020; X64-NEXT: cltd 1021; X64-NEXT: idivl %esi 1022; X64-NEXT: movl $32, %ecx 1023; X64-NEXT: rep bsfl %eax, %ecx 1024; X64-NEXT: movl %ecx, %eax 1025; X64-NEXT: retq 1026 %z = sdiv exact i32 %x, %y 1027 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1028 ret i32 %r 1029} 1030 1031define i32 @add_known_nonzero(i32 %xx, i32 %y) { 1032; X86-LABEL: add_known_nonzero: 1033; X86: # %bb.0: 1034; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1035; X86-NEXT: orl $1, %eax 1036; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 1037; X86-NEXT: rep bsfl %eax, %eax 1038; X86-NEXT: retl 1039; 1040; X64-LABEL: add_known_nonzero: 1041; X64: # %bb.0: 1042; X64-NEXT: orl $1, %edi 1043; X64-NEXT: addl %esi, %edi 1044; X64-NEXT: rep bsfl %edi, %eax 1045; X64-NEXT: retq 1046 %x = or i32 %xx, 1 1047 %z = add nuw i32 %x, %y 1048 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1049 ret i32 %r 1050} 1051 1052define i32 @add_maybe_zero(i32 %xx, i32 %y) { 1053; X86-LABEL: add_maybe_zero: 1054; X86: # %bb.0: 1055; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1056; X86-NEXT: orl $1, %eax 1057; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 1058; X86-NEXT: bsfl %eax, %ecx 1059; X86-NEXT: movl $32, %eax 1060; X86-NEXT: cmovnel %ecx, %eax 1061; X86-NEXT: retl 1062; 1063; X64-LABEL: add_maybe_zero: 1064; X64: # %bb.0: 1065; X64-NEXT: orl $1, %edi 1066; X64-NEXT: addl %esi, %edi 1067; X64-NEXT: movl $32, %eax 1068; X64-NEXT: rep bsfl %edi, %eax 1069; X64-NEXT: retq 1070 %x = or i32 %xx, 1 1071 %z = add nsw i32 %x, %y 1072 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1073 ret i32 %r 1074} 1075 1076define i32 @sub_known_nonzero_neg_case(i32 %xx) { 1077; X86-LABEL: sub_known_nonzero_neg_case: 1078; X86: # %bb.0: 1079; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1080; X86-NEXT: movl $256, %eax # imm = 0x100 1081; X86-NEXT: shll %cl, %eax 1082; X86-NEXT: negl %eax 1083; X86-NEXT: rep bsfl %eax, %eax 1084; X86-NEXT: retl 1085; 1086; X64-LABEL: sub_known_nonzero_neg_case: 1087; X64: # %bb.0: 1088; X64-NEXT: movl %edi, %ecx 1089; X64-NEXT: movl $256, %eax # imm = 0x100 1090; X64-NEXT: # kill: def $cl killed $cl killed $ecx 1091; X64-NEXT: shll %cl, %eax 1092; X64-NEXT: negl %eax 1093; X64-NEXT: rep bsfl %eax, %eax 1094; X64-NEXT: retq 1095 %x = shl nuw nsw i32 256, %xx 1096 %z = sub i32 0, %x 1097 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1098 ret i32 %r 1099} 1100 1101define i32 @sub_known_nonzero_ne_case(i32 %xx, i32 %yy) { 1102; X86-LABEL: sub_known_nonzero_ne_case: 1103; X86: # %bb.0: 1104; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1105; X86-NEXT: movl %eax, %ecx 1106; X86-NEXT: orl $64, %ecx 1107; X86-NEXT: andl $-65, %eax 1108; X86-NEXT: subl %ecx, %eax 1109; X86-NEXT: rep bsfl %eax, %eax 1110; X86-NEXT: retl 1111; 1112; X64-LABEL: sub_known_nonzero_ne_case: 1113; X64: # %bb.0: 1114; X64-NEXT: movl %edi, %eax 1115; X64-NEXT: orl $64, %eax 1116; X64-NEXT: andl $-65, %edi 1117; X64-NEXT: subl %eax, %edi 1118; X64-NEXT: rep bsfl %edi, %eax 1119; X64-NEXT: retq 1120 %x = or i32 %xx, 64 1121 %y = and i32 %xx, -65 1122 %z = sub i32 %y, %x 1123 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1124 ret i32 %r 1125} 1126 1127define i32 @sub_maybe_zero(i32 %x) { 1128; X86-LABEL: sub_maybe_zero: 1129; X86: # %bb.0: 1130; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1131; X86-NEXT: movl %eax, %ecx 1132; X86-NEXT: orl $64, %ecx 1133; X86-NEXT: subl %eax, %ecx 1134; X86-NEXT: bsfl %ecx, %ecx 1135; X86-NEXT: movl $32, %eax 1136; X86-NEXT: cmovnel %ecx, %eax 1137; X86-NEXT: retl 1138; 1139; X64-LABEL: sub_maybe_zero: 1140; X64: # %bb.0: 1141; X64-NEXT: movl %edi, %ecx 1142; X64-NEXT: orl $64, %ecx 1143; X64-NEXT: subl %edi, %ecx 1144; X64-NEXT: movl $32, %eax 1145; X64-NEXT: rep bsfl %ecx, %eax 1146; X64-NEXT: retq 1147 %y = or i32 %x, 64 1148 %z = sub i32 %y, %x 1149 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1150 ret i32 %r 1151} 1152 1153define i32 @sub_maybe_zero2(i32 %x) { 1154; X86-LABEL: sub_maybe_zero2: 1155; X86: # %bb.0: 1156; X86-NEXT: xorl %eax, %eax 1157; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 1158; X86-NEXT: bsfl %eax, %ecx 1159; X86-NEXT: movl $32, %eax 1160; X86-NEXT: cmovnel %ecx, %eax 1161; X86-NEXT: retl 1162; 1163; X64-LABEL: sub_maybe_zero2: 1164; X64: # %bb.0: 1165; X64-NEXT: negl %edi 1166; X64-NEXT: movl $32, %eax 1167; X64-NEXT: rep bsfl %edi, %eax 1168; X64-NEXT: retq 1169 %z = sub i32 0, %x 1170 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1171 ret i32 %r 1172} 1173 1174define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) { 1175; X86-LABEL: mul_known_nonzero_nsw: 1176; X86: # %bb.0: 1177; X86-NEXT: movl $256, %eax # imm = 0x100 1178; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 1179; X86-NEXT: imull {{[0-9]+}}(%esp), %eax 1180; X86-NEXT: bsfl %eax, %ecx 1181; X86-NEXT: movl $32, %eax 1182; X86-NEXT: cmovnel %ecx, %eax 1183; X86-NEXT: retl 1184; 1185; X64-LABEL: mul_known_nonzero_nsw: 1186; X64: # %bb.0: 1187; X64-NEXT: orl $256, %esi # imm = 0x100 1188; X64-NEXT: imull %edi, %esi 1189; X64-NEXT: movl $32, %eax 1190; X64-NEXT: rep bsfl %esi, %eax 1191; X64-NEXT: retq 1192 %y = or i32 %yy, 256 1193 %z = mul nsw i32 %y, %x 1194 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1195 ret i32 %r 1196} 1197 1198define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) { 1199; X86-LABEL: mul_known_nonzero_nuw: 1200; X86: # %bb.0: 1201; X86-NEXT: movl $256, %eax # imm = 0x100 1202; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 1203; X86-NEXT: imull {{[0-9]+}}(%esp), %eax 1204; X86-NEXT: bsfl %eax, %ecx 1205; X86-NEXT: movl $32, %eax 1206; X86-NEXT: cmovnel %ecx, %eax 1207; X86-NEXT: retl 1208; 1209; X64-LABEL: mul_known_nonzero_nuw: 1210; X64: # %bb.0: 1211; X64-NEXT: orl $256, %esi # imm = 0x100 1212; X64-NEXT: imull %edi, %esi 1213; X64-NEXT: movl $32, %eax 1214; X64-NEXT: rep bsfl %esi, %eax 1215; X64-NEXT: retq 1216 %y = or i32 %yy, 256 1217 %z = mul nuw i32 %y, %x 1218 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1219 ret i32 %r 1220} 1221 1222define i32 @mul_maybe_zero(i32 %x, i32 %y) { 1223; X86-LABEL: mul_maybe_zero: 1224; X86: # %bb.0: 1225; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1226; X86-NEXT: imull {{[0-9]+}}(%esp), %eax 1227; X86-NEXT: bsfl %eax, %ecx 1228; X86-NEXT: movl $32, %eax 1229; X86-NEXT: cmovnel %ecx, %eax 1230; X86-NEXT: retl 1231; 1232; X64-LABEL: mul_maybe_zero: 1233; X64: # %bb.0: 1234; X64-NEXT: imull %esi, %edi 1235; X64-NEXT: movl $32, %eax 1236; X64-NEXT: rep bsfl %edi, %eax 1237; X64-NEXT: retq 1238 %z = mul nuw nsw i32 %y, %x 1239 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1240 ret i32 %r 1241} 1242 1243define i32 @bitcast_known_nonzero(<2 x i16> %xx) { 1244; X86-LABEL: bitcast_known_nonzero: 1245; X86: # %bb.0: 1246; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1247; X86-NEXT: pslld $23, %xmm0 1248; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1249; X86-NEXT: cvttps2dq %xmm0, %xmm0 1250; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1251; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,256,u,u,u,u,u,u] 1252; X86-NEXT: movd %xmm0, %eax 1253; X86-NEXT: bsfl %eax, %ecx 1254; X86-NEXT: movl $32, %eax 1255; X86-NEXT: cmovnel %ecx, %eax 1256; X86-NEXT: retl 1257; 1258; X64-LABEL: bitcast_known_nonzero: 1259; X64: # %bb.0: 1260; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1261; X64-NEXT: vpslld $23, %xmm0, %xmm0 1262; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1263; X64-NEXT: vcvttps2dq %xmm0, %xmm0 1264; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1265; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u] 1266; X64-NEXT: vmovd %xmm0, %ecx 1267; X64-NEXT: movl $32, %eax 1268; X64-NEXT: rep bsfl %ecx, %eax 1269; X64-NEXT: retq 1270 %x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx 1271 %z = bitcast <2 x i16> %x to i32 1272 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1273 ret i32 %r 1274} 1275 1276define i32 @bitcast_maybe_zero(<2 x i16> %x) { 1277; X86-LABEL: bitcast_maybe_zero: 1278; X86: # %bb.0: 1279; X86-NEXT: movd %xmm0, %eax 1280; X86-NEXT: bsfl %eax, %ecx 1281; X86-NEXT: movl $32, %eax 1282; X86-NEXT: cmovnel %ecx, %eax 1283; X86-NEXT: retl 1284; 1285; X64-LABEL: bitcast_maybe_zero: 1286; X64: # %bb.0: 1287; X64-NEXT: vmovd %xmm0, %ecx 1288; X64-NEXT: movl $32, %eax 1289; X64-NEXT: rep bsfl %ecx, %eax 1290; X64-NEXT: retq 1291 %z = bitcast <2 x i16> %x to i32 1292 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1293 ret i32 %r 1294} 1295 1296define i32 @bitcast_from_float(float %x) { 1297; X86-LABEL: bitcast_from_float: 1298; X86: # %bb.0: 1299; X86-NEXT: bsfl {{[0-9]+}}(%esp), %ecx 1300; X86-NEXT: movl $32, %eax 1301; X86-NEXT: cmovnel %ecx, %eax 1302; X86-NEXT: retl 1303; 1304; X64-LABEL: bitcast_from_float: 1305; X64: # %bb.0: 1306; X64-NEXT: vmovd %xmm0, %ecx 1307; X64-NEXT: movl $32, %eax 1308; X64-NEXT: rep bsfl %ecx, %eax 1309; X64-NEXT: retq 1310 %z = bitcast float %x to i32 1311 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1312 ret i32 %r 1313} 1314 1315define i32 @zext_known_nonzero(i16 %xx) { 1316; X86-LABEL: zext_known_nonzero: 1317; X86: # %bb.0: 1318; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1319; X86-NEXT: movl $256, %eax # imm = 0x100 1320; X86-NEXT: shll %cl, %eax 1321; X86-NEXT: movzwl %ax, %eax 1322; X86-NEXT: rep bsfl %eax, %eax 1323; X86-NEXT: retl 1324; 1325; X64-LABEL: zext_known_nonzero: 1326; X64: # %bb.0: 1327; X64-NEXT: movl %edi, %ecx 1328; X64-NEXT: movl $256, %eax # imm = 0x100 1329; X64-NEXT: # kill: def $cl killed $cl killed $ecx 1330; X64-NEXT: shll %cl, %eax 1331; X64-NEXT: movzwl %ax, %eax 1332; X64-NEXT: rep bsfl %eax, %eax 1333; X64-NEXT: retq 1334 %x = shl nuw nsw i16 256, %xx 1335 %z = zext i16 %x to i32 1336 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1337 ret i32 %r 1338} 1339 1340define i32 @zext_maybe_zero(i16 %x) { 1341; X86-LABEL: zext_maybe_zero: 1342; X86: # %bb.0: 1343; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1344; X86-NEXT: bsfl %eax, %ecx 1345; X86-NEXT: movl $32, %eax 1346; X86-NEXT: cmovnel %ecx, %eax 1347; X86-NEXT: retl 1348; 1349; X64-LABEL: zext_maybe_zero: 1350; X64: # %bb.0: 1351; X64-NEXT: movzwl %di, %ecx 1352; X64-NEXT: movl $32, %eax 1353; X64-NEXT: rep bsfl %ecx, %eax 1354; X64-NEXT: retq 1355 %z = zext i16 %x to i32 1356 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1357 ret i32 %r 1358} 1359 1360define i32 @sext_known_nonzero(i16 %xx) { 1361; X86-LABEL: sext_known_nonzero: 1362; X86: # %bb.0: 1363; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1364; X86-NEXT: movl $256, %eax # imm = 0x100 1365; X86-NEXT: shll %cl, %eax 1366; X86-NEXT: movzwl %ax, %eax 1367; X86-NEXT: rep bsfl %eax, %eax 1368; X86-NEXT: retl 1369; 1370; X64-LABEL: sext_known_nonzero: 1371; X64: # %bb.0: 1372; X64-NEXT: movl %edi, %ecx 1373; X64-NEXT: movl $256, %eax # imm = 0x100 1374; X64-NEXT: # kill: def $cl killed $cl killed $ecx 1375; X64-NEXT: shll %cl, %eax 1376; X64-NEXT: movzwl %ax, %eax 1377; X64-NEXT: rep bsfl %eax, %eax 1378; X64-NEXT: retq 1379 %x = shl nuw nsw i16 256, %xx 1380 %z = sext i16 %x to i32 1381 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1382 ret i32 %r 1383} 1384 1385define i32 @sext_maybe_zero(i16 %x) { 1386; X86-LABEL: sext_maybe_zero: 1387; X86: # %bb.0: 1388; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 1389; X86-NEXT: bsfl %eax, %ecx 1390; X86-NEXT: movl $32, %eax 1391; X86-NEXT: cmovnel %ecx, %eax 1392; X86-NEXT: retl 1393; 1394; X64-LABEL: sext_maybe_zero: 1395; X64: # %bb.0: 1396; X64-NEXT: movswl %di, %ecx 1397; X64-NEXT: movl $32, %eax 1398; X64-NEXT: rep bsfl %ecx, %eax 1399; X64-NEXT: retq 1400 %z = sext i16 %x to i32 1401 %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) 1402 ret i32 %r 1403} 1404