1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --check-prefix=X86 3; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 4 5@array = weak dso_local global [4 x i32] zeroinitializer 6 7define dso_local i32 @test_lshr_and(i32 %x) { 8; X86-LABEL: test_lshr_and: 9; X86: # %bb.0: 10; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X86-NEXT: andl $12, %eax 12; X86-NEXT: movl array(%eax), %eax 13; X86-NEXT: retl 14; 15; X64-LABEL: test_lshr_and: 16; X64: # %bb.0: 17; X64-NEXT: # kill: def $edi killed $edi def $rdi 18; X64-NEXT: andl $12, %edi 19; X64-NEXT: movl array(%rdi), %eax 20; X64-NEXT: retq 21 %tmp2 = lshr i32 %x, 2 22 %tmp3 = and i32 %tmp2, 3 23 %tmp4 = getelementptr [4 x i32], ptr @array, i32 0, i32 %tmp3 24 %tmp5 = load i32, ptr %tmp4, align 4 25 ret i32 %tmp5 26} 27 28define dso_local ptr @test_exact1(i32 %a, i32 %b, ptr %x) { 29; X86-LABEL: test_exact1: 30; X86: # %bb.0: 31; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 32; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 33; X86-NEXT: sarl %eax 34; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 35; X86-NEXT: retl 36; 37; X64-LABEL: test_exact1: 38; X64: # %bb.0: 39; X64-NEXT: subl %edi, %esi 40; X64-NEXT: sarl $3, %esi 41; X64-NEXT: movslq %esi, %rax 42; X64-NEXT: leaq (%rdx,%rax,4), %rax 43; X64-NEXT: retq 44 %sub = sub i32 %b, %a 45 %shr = ashr exact i32 %sub, 3 46 %gep = getelementptr inbounds i32, ptr %x, i32 %shr 47 ret ptr %gep 48} 49 50define dso_local ptr @test_exact2(i32 %a, i32 %b, ptr %x) { 51; X86-LABEL: test_exact2: 52; X86: # %bb.0: 53; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 54; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 55; X86-NEXT: sarl %eax 56; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 57; X86-NEXT: retl 58; 59; X64-LABEL: test_exact2: 60; X64: # %bb.0: 61; X64-NEXT: subl %edi, %esi 62; X64-NEXT: sarl $3, %esi 63; X64-NEXT: movslq %esi, %rax 64; X64-NEXT: leaq (%rdx,%rax,4), %rax 65; X64-NEXT: retq 66 %sub = sub i32 %b, %a 67 %shr = ashr exact i32 %sub, 3 68 %gep = getelementptr inbounds i32, ptr %x, i32 %shr 69 ret ptr %gep 70} 71 72define dso_local ptr @test_exact3(i32 %a, i32 %b, ptr %x) { 73; X86-LABEL: test_exact3: 74; X86: # %bb.0: 75; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 76; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 77; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 78; X86-NEXT: retl 79; 80; X64-LABEL: test_exact3: 81; X64: # %bb.0: 82; X64-NEXT: subl %edi, %esi 83; X64-NEXT: sarl $2, %esi 84; X64-NEXT: movslq %esi, %rax 85; X64-NEXT: leaq (%rdx,%rax,4), %rax 86; X64-NEXT: retq 87 %sub = sub i32 %b, %a 88 %shr = ashr exact i32 %sub, 2 89 %gep = getelementptr inbounds i32, ptr %x, i32 %shr 90 ret ptr %gep 91} 92 93define dso_local ptr @test_exact4(i32 %a, i32 %b, ptr %x) { 94; X86-LABEL: test_exact4: 95; X86: # %bb.0: 96; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 97; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 98; X86-NEXT: shrl %eax 99; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 100; X86-NEXT: retl 101; 102; X64-LABEL: test_exact4: 103; X64: # %bb.0: 104; X64-NEXT: # kill: def $esi killed $esi def $rsi 105; X64-NEXT: subl %edi, %esi 106; X64-NEXT: shrl %esi 107; X64-NEXT: leaq (%rsi,%rdx), %rax 108; X64-NEXT: retq 109 %sub = sub i32 %b, %a 110 %shr = lshr exact i32 %sub, 3 111 %gep = getelementptr inbounds i32, ptr %x, i32 %shr 112 ret ptr %gep 113} 114 115define dso_local ptr @test_exact5(i32 %a, i32 %b, ptr %x) { 116; X86-LABEL: test_exact5: 117; X86: # %bb.0: 118; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 119; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 120; X86-NEXT: shrl %eax 121; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 122; X86-NEXT: retl 123; 124; X64-LABEL: test_exact5: 125; X64: # %bb.0: 126; X64-NEXT: # kill: def $esi killed $esi def $rsi 127; X64-NEXT: subl %edi, %esi 128; X64-NEXT: shrl %esi 129; X64-NEXT: leaq (%rsi,%rdx), %rax 130; X64-NEXT: retq 131 %sub = sub i32 %b, %a 132 %shr = lshr exact i32 %sub, 3 133 %gep = getelementptr inbounds i32, ptr %x, i32 %shr 134 ret ptr %gep 135} 136 137define dso_local ptr @test_exact6(i32 %a, i32 %b, ptr %x) { 138; X86-LABEL: test_exact6: 139; X86: # %bb.0: 140; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 141; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 142; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 143; X86-NEXT: retl 144; 145; X64-LABEL: test_exact6: 146; X64: # %bb.0: 147; X64-NEXT: # kill: def $esi killed $esi def $rsi 148; X64-NEXT: subl %edi, %esi 149; X64-NEXT: leaq (%rsi,%rdx), %rax 150; X64-NEXT: retq 151 %sub = sub i32 %b, %a 152 %shr = lshr exact i32 %sub, 2 153 %gep = getelementptr inbounds i32, ptr %x, i32 %shr 154 ret ptr %gep 155} 156 157; PR42644 - https://bugs.llvm.org/show_bug.cgi?id=42644 158 159define i64 @ashr_add_shl_i32(i64 %r) nounwind { 160; X86-LABEL: ashr_add_shl_i32: 161; X86: # %bb.0: 162; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 163; X86-NEXT: incl %eax 164; X86-NEXT: movl %eax, %edx 165; X86-NEXT: sarl $31, %edx 166; X86-NEXT: retl 167; 168; X64-LABEL: ashr_add_shl_i32: 169; X64: # %bb.0: 170; X64-NEXT: incl %edi 171; X64-NEXT: movslq %edi, %rax 172; X64-NEXT: retq 173 %conv = shl i64 %r, 32 174 %sext = add i64 %conv, 4294967296 175 %conv1 = ashr i64 %sext, 32 176 ret i64 %conv1 177} 178 179define i64 @ashr_add_shl_i8(i64 %r) nounwind { 180; X86-LABEL: ashr_add_shl_i8: 181; X86: # %bb.0: 182; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 183; X86-NEXT: addb $2, %al 184; X86-NEXT: movsbl %al, %eax 185; X86-NEXT: movl %eax, %edx 186; X86-NEXT: sarl $31, %edx 187; X86-NEXT: retl 188; 189; X64-LABEL: ashr_add_shl_i8: 190; X64: # %bb.0: 191; X64-NEXT: addb $2, %dil 192; X64-NEXT: movsbq %dil, %rax 193; X64-NEXT: retq 194 %conv = shl i64 %r, 56 195 %sext = add i64 %conv, 144115188075855872 196 %conv1 = ashr i64 %sext, 56 197 ret i64 %conv1 198} 199 200define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind { 201; X86-LABEL: ashr_add_shl_v4i8: 202; X86: # %bb.0: 203; X86-NEXT: pushl %edi 204; X86-NEXT: pushl %esi 205; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 206; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 207; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 208; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 209; X86-NEXT: movb {{[0-9]+}}(%esp), %dh 210; X86-NEXT: incb %dh 211; X86-NEXT: movsbl %dh, %esi 212; X86-NEXT: incb %ch 213; X86-NEXT: movsbl %ch, %edi 214; X86-NEXT: incb %dl 215; X86-NEXT: movsbl %dl, %edx 216; X86-NEXT: incb %cl 217; X86-NEXT: movsbl %cl, %ecx 218; X86-NEXT: movl %ecx, 12(%eax) 219; X86-NEXT: movl %edx, 8(%eax) 220; X86-NEXT: movl %edi, 4(%eax) 221; X86-NEXT: movl %esi, (%eax) 222; X86-NEXT: popl %esi 223; X86-NEXT: popl %edi 224; X86-NEXT: retl $4 225; 226; X64-LABEL: ashr_add_shl_v4i8: 227; X64: # %bb.0: 228; X64-NEXT: pslld $24, %xmm0 229; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 230; X64-NEXT: psrad $24, %xmm0 231; X64-NEXT: retq 232 %conv = shl <4 x i32> %r, <i32 24, i32 24, i32 24, i32 24> 233 %sext = add <4 x i32> %conv, <i32 16777216, i32 16777216, i32 16777216, i32 16777216> 234 %conv1 = ashr <4 x i32> %sext, <i32 24, i32 24, i32 24, i32 24> 235 ret <4 x i32> %conv1 236} 237 238define i64 @ashr_add_shl_i36(i64 %r) nounwind { 239; X86-LABEL: ashr_add_shl_i36: 240; X86: # %bb.0: 241; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 242; X86-NEXT: shll $4, %edx 243; X86-NEXT: movl %edx, %eax 244; X86-NEXT: sarl $4, %eax 245; X86-NEXT: sarl $31, %edx 246; X86-NEXT: retl 247; 248; X64-LABEL: ashr_add_shl_i36: 249; X64: # %bb.0: 250; X64-NEXT: movq %rdi, %rax 251; X64-NEXT: shlq $36, %rax 252; X64-NEXT: sarq $36, %rax 253; X64-NEXT: retq 254 %conv = shl i64 %r, 36 255 %sext = add i64 %conv, 4294967296 256 %conv1 = ashr i64 %sext, 36 257 ret i64 %conv1 258} 259 260define i64 @ashr_add_shl_mismatch_shifts1(i64 %r) nounwind { 261; X86-LABEL: ashr_add_shl_mismatch_shifts1: 262; X86: # %bb.0: 263; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 264; X86-NEXT: incl %eax 265; X86-NEXT: movl %eax, %edx 266; X86-NEXT: sarl $31, %edx 267; X86-NEXT: retl 268; 269; X64-LABEL: ashr_add_shl_mismatch_shifts1: 270; X64: # %bb.0: 271; X64-NEXT: shlq $8, %rdi 272; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 273; X64-NEXT: addq %rdi, %rax 274; X64-NEXT: sarq $32, %rax 275; X64-NEXT: retq 276 %conv = shl i64 %r, 8 277 %sext = add i64 %conv, 4294967296 278 %conv1 = ashr i64 %sext, 32 279 ret i64 %conv1 280} 281 282define i64 @ashr_add_shl_mismatch_shifts2(i64 %r) nounwind { 283; X86-LABEL: ashr_add_shl_mismatch_shifts2: 284; X86: # %bb.0: 285; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 286; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 287; X86-NEXT: shrl $8, %edx 288; X86-NEXT: incl %edx 289; X86-NEXT: shrdl $8, %edx, %eax 290; X86-NEXT: shrl $8, %edx 291; X86-NEXT: retl 292; 293; X64-LABEL: ashr_add_shl_mismatch_shifts2: 294; X64: # %bb.0: 295; X64-NEXT: shrq $8, %rdi 296; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 297; X64-NEXT: addq %rdi, %rax 298; X64-NEXT: shrq $8, %rax 299; X64-NEXT: retq 300 %conv = lshr i64 %r, 8 301 %sext = add i64 %conv, 4294967296 302 %conv1 = ashr i64 %sext, 8 303 ret i64 %conv1 304} 305 306define dso_local i32 @ashr_add_shl_i32_i8_extra_use1(i32 %r, ptr %p) nounwind { 307; X86-LABEL: ashr_add_shl_i32_i8_extra_use1: 308; X86: # %bb.0: 309; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 310; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 311; X86-NEXT: shll $24, %eax 312; X86-NEXT: addl $33554432, %eax # imm = 0x2000000 313; X86-NEXT: movl %eax, (%ecx) 314; X86-NEXT: sarl $24, %eax 315; X86-NEXT: retl 316; 317; X64-LABEL: ashr_add_shl_i32_i8_extra_use1: 318; X64: # %bb.0: 319; X64-NEXT: # kill: def $edi killed $edi def $rdi 320; X64-NEXT: shll $24, %edi 321; X64-NEXT: leal 33554432(%rdi), %eax 322; X64-NEXT: movl %eax, (%rsi) 323; X64-NEXT: sarl $24, %eax 324; X64-NEXT: retq 325 %conv = shl i32 %r, 24 326 %sext = add i32 %conv, 33554432 327 store i32 %sext, ptr %p 328 %conv1 = ashr i32 %sext, 24 329 ret i32 %conv1 330} 331 332define dso_local i32 @ashr_add_shl_i32_i8_extra_use2(i32 %r, ptr %p) nounwind { 333; X86-LABEL: ashr_add_shl_i32_i8_extra_use2: 334; X86: # %bb.0: 335; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 336; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 337; X86-NEXT: shll $24, %eax 338; X86-NEXT: movl %eax, (%ecx) 339; X86-NEXT: addl $33554432, %eax # imm = 0x2000000 340; X86-NEXT: sarl $24, %eax 341; X86-NEXT: retl 342; 343; X64-LABEL: ashr_add_shl_i32_i8_extra_use2: 344; X64: # %bb.0: 345; X64-NEXT: # kill: def $edi killed $edi def $rdi 346; X64-NEXT: shll $24, %edi 347; X64-NEXT: movl %edi, (%rsi) 348; X64-NEXT: leal 33554432(%rdi), %eax 349; X64-NEXT: sarl $24, %eax 350; X64-NEXT: retq 351 %conv = shl i32 %r, 24 352 store i32 %conv, ptr %p 353 %sext = add i32 %conv, 33554432 354 %conv1 = ashr i32 %sext, 24 355 ret i32 %conv1 356} 357 358define dso_local i32 @ashr_add_shl_i32_i8_extra_use3(i32 %r, ptr %p1, ptr %p2) nounwind { 359; X86-LABEL: ashr_add_shl_i32_i8_extra_use3: 360; X86: # %bb.0: 361; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 362; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 363; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 364; X86-NEXT: shll $24, %eax 365; X86-NEXT: movl %eax, (%edx) 366; X86-NEXT: addl $33554432, %eax # imm = 0x2000000 367; X86-NEXT: movl %eax, (%ecx) 368; X86-NEXT: sarl $24, %eax 369; X86-NEXT: retl 370; 371; X64-LABEL: ashr_add_shl_i32_i8_extra_use3: 372; X64: # %bb.0: 373; X64-NEXT: # kill: def $edi killed $edi def $rdi 374; X64-NEXT: shll $24, %edi 375; X64-NEXT: movl %edi, (%rsi) 376; X64-NEXT: leal 33554432(%rdi), %eax 377; X64-NEXT: movl %eax, (%rdx) 378; X64-NEXT: sarl $24, %eax 379; X64-NEXT: retq 380 %conv = shl i32 %r, 24 381 store i32 %conv, ptr %p1 382 %sext = add i32 %conv, 33554432 383 store i32 %sext, ptr %p2 384 %conv1 = ashr i32 %sext, 24 385 ret i32 %conv1 386} 387 388%"class.QPainterPath" = type { double, double, i32 } 389 390define dso_local void @PR42880(i32 %t0) { 391; X86-LABEL: PR42880: 392; X86: # %bb.0: 393; X86-NEXT: xorl %eax, %eax 394; X86-NEXT: testb %al, %al 395; X86-NEXT: je .LBB16_1 396; X86-NEXT: # %bb.2: # %if 397; X86-NEXT: .LBB16_1: # %then 398; 399; X64-LABEL: PR42880: 400; X64: # %bb.0: 401; X64-NEXT: xorl %eax, %eax 402; X64-NEXT: testb %al, %al 403; X64-NEXT: je .LBB16_1 404; X64-NEXT: # %bb.2: # %if 405; X64-NEXT: .LBB16_1: # %then 406 %sub = add nsw i32 %t0, -1 407 %add.ptr.i94 = getelementptr inbounds %"class.QPainterPath", ptr null, i32 %sub 408 %x = ptrtoint ptr %add.ptr.i94 to i32 409 %sub2 = sub i32 %x, 0 410 %div = sdiv exact i32 %sub2, 24 411 br i1 poison, label %if, label %then 412 413then: 414 %t1 = xor i32 %div, -1 415 unreachable 416 417if: 418 unreachable 419} 420 421; The mul here is the equivalent of (neg (shl X, 32)). 422define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind { 423; X86-LABEL: ashr_add_neg_shl_i32: 424; X86: # %bb.0: 425; X86-NEXT: movl $1, %eax 426; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 427; X86-NEXT: movl %eax, %edx 428; X86-NEXT: sarl $31, %edx 429; X86-NEXT: retl 430; 431; X64-LABEL: ashr_add_neg_shl_i32: 432; X64: # %bb.0: 433; X64-NEXT: movl $1, %eax 434; X64-NEXT: subl %edi, %eax 435; X64-NEXT: cltq 436; X64-NEXT: retq 437 %conv = mul i64 %r, -4294967296 438 %sext = add i64 %conv, 4294967296 439 %conv1 = ashr i64 %sext, 32 440 ret i64 %conv1 441} 442 443; The mul here is the equivalent of (neg (shl X, 56)). 444define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind { 445; X86-LABEL: ashr_add_neg_shl_i8: 446; X86: # %bb.0: 447; X86-NEXT: movb $2, %al 448; X86-NEXT: subb {{[0-9]+}}(%esp), %al 449; X86-NEXT: movsbl %al, %eax 450; X86-NEXT: movl %eax, %edx 451; X86-NEXT: sarl $31, %edx 452; X86-NEXT: retl 453; 454; X64-LABEL: ashr_add_neg_shl_i8: 455; X64: # %bb.0: 456; X64-NEXT: movb $2, %al 457; X64-NEXT: subb %dil, %al 458; X64-NEXT: movsbq %al, %rax 459; X64-NEXT: retq 460 %conv = mul i64 %r, -72057594037927936 461 %sext = add i64 %conv, 144115188075855872 462 %conv1 = ashr i64 %sext, 56 463 ret i64 %conv1 464} 465 466; The mul here is the equivalent of (neg (shl X, 24)). 467define <4 x i32> @ashr_add_neg_shl_v4i8(<4 x i32> %r) nounwind { 468; X86-LABEL: ashr_add_neg_shl_v4i8: 469; X86: # %bb.0: 470; X86-NEXT: pushl %edi 471; X86-NEXT: pushl %esi 472; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 473; X86-NEXT: movb $1, %cl 474; X86-NEXT: movb $1, %dl 475; X86-NEXT: subb {{[0-9]+}}(%esp), %dl 476; X86-NEXT: movsbl %dl, %edx 477; X86-NEXT: movb $1, %ch 478; X86-NEXT: subb {{[0-9]+}}(%esp), %ch 479; X86-NEXT: movsbl %ch, %esi 480; X86-NEXT: movb $1, %ch 481; X86-NEXT: subb {{[0-9]+}}(%esp), %ch 482; X86-NEXT: movsbl %ch, %edi 483; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 484; X86-NEXT: movsbl %cl, %ecx 485; X86-NEXT: movl %ecx, 12(%eax) 486; X86-NEXT: movl %edi, 8(%eax) 487; X86-NEXT: movl %esi, 4(%eax) 488; X86-NEXT: movl %edx, (%eax) 489; X86-NEXT: popl %esi 490; X86-NEXT: popl %edi 491; X86-NEXT: retl $4 492; 493; X64-LABEL: ashr_add_neg_shl_v4i8: 494; X64: # %bb.0: 495; X64-NEXT: pslld $24, %xmm0 496; X64-NEXT: movdqa {{.*#+}} xmm1 = [16777216,16777216,16777216,16777216] 497; X64-NEXT: psubd %xmm0, %xmm1 498; X64-NEXT: psrad $24, %xmm1 499; X64-NEXT: movdqa %xmm1, %xmm0 500; X64-NEXT: retq 501 %conv = mul <4 x i32> %r, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216> 502 %sext = add <4 x i32> %conv, <i32 16777216, i32 16777216, i32 16777216, i32 16777216> 503 %conv1 = ashr <4 x i32> %sext, <i32 24, i32 24, i32 24, i32 24> 504 ret <4 x i32> %conv1 505} 506 507define i32 @or_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { 508; X86-LABEL: or_tree_with_shifts_i32: 509; X86: # %bb.0: 510; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 511; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 512; X86-NEXT: shll $16, %eax 513; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 514; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 515; X86-NEXT: retl 516; 517; X64-LABEL: or_tree_with_shifts_i32: 518; X64: # %bb.0: 519; X64-NEXT: movl %esi, %eax 520; X64-NEXT: orl %edx, %edi 521; X64-NEXT: shll $16, %edi 522; X64-NEXT: orl %ecx, %eax 523; X64-NEXT: orl %edi, %eax 524; X64-NEXT: retq 525 %a.shifted = shl i32 %a, 16 526 %c.shifted = shl i32 %c, 16 527 %or.ab = or i32 %a.shifted, %b 528 %or.cd = or i32 %c.shifted, %d 529 %r = or i32 %or.ab, %or.cd 530 ret i32 %r 531} 532 533define i32 @xor_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { 534; X86-LABEL: xor_tree_with_shifts_i32: 535; X86: # %bb.0: 536; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 537; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 538; X86-NEXT: shrl $16, %eax 539; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 540; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 541; X86-NEXT: retl 542; 543; X64-LABEL: xor_tree_with_shifts_i32: 544; X64: # %bb.0: 545; X64-NEXT: movl %esi, %eax 546; X64-NEXT: xorl %edx, %edi 547; X64-NEXT: shrl $16, %edi 548; X64-NEXT: xorl %ecx, %eax 549; X64-NEXT: xorl %edi, %eax 550; X64-NEXT: retq 551 %a.shifted = lshr i32 %a, 16 552 %c.shifted = lshr i32 %c, 16 553 %xor.ab = xor i32 %a.shifted, %b 554 %xor.cd = xor i32 %d, %c.shifted 555 %r = xor i32 %xor.ab, %xor.cd 556 ret i32 %r 557} 558 559define i32 @and_tree_with_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { 560; X86-LABEL: and_tree_with_shifts_i32: 561; X86: # %bb.0: 562; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 563; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx 564; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx 565; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 566; X86-NEXT: andl %ecx, %eax 567; X86-NEXT: retl 568; 569; X64-LABEL: and_tree_with_shifts_i32: 570; X64: # %bb.0: 571; X64-NEXT: movl %esi, %eax 572; X64-NEXT: andl %edx, %edi 573; X64-NEXT: sarl $16, %edi 574; X64-NEXT: andl %ecx, %eax 575; X64-NEXT: andl %edi, %eax 576; X64-NEXT: retq 577 %a.shifted = ashr i32 %a, 16 578 %c.shifted = ashr i32 %c, 16 579 %and.ab = and i32 %b, %a.shifted 580 %and.cd = and i32 %c.shifted, %d 581 %r = and i32 %and.ab, %and.cd 582 ret i32 %r 583} 584 585define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %s) { 586; X86-LABEL: logic_tree_with_shifts_var_i32: 587; X86: # %bb.0: 588; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 589; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 590; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 591; X86-NEXT: shll %cl, %eax 592; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 593; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 594; X86-NEXT: retl 595; 596; X64-LABEL: logic_tree_with_shifts_var_i32: 597; X64: # %bb.0: 598; X64-NEXT: movl %ecx, %eax 599; X64-NEXT: orl %edx, %edi 600; X64-NEXT: movl %r8d, %ecx 601; X64-NEXT: shll %cl, %edi 602; X64-NEXT: orl %esi, %eax 603; X64-NEXT: orl %edi, %eax 604; X64-NEXT: retq 605 %a.shifted = shl i32 %a, %s 606 %c.shifted = shl i32 %c, %s 607 %or.ab = or i32 %b, %a.shifted 608 %or.cd = or i32 %d, %c.shifted 609 %r = or i32 %or.ab, %or.cd 610 ret i32 %r 611} 612 613define i32 @logic_tree_with_mismatching_shifts_i32(i32 %a, i32 %b, i32 %c, i32 %d) { 614; X86-LABEL: logic_tree_with_mismatching_shifts_i32: 615; X86: # %bb.0: 616; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 617; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 618; X86-NEXT: shll $15, %ecx 619; X86-NEXT: shll $16, %eax 620; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 621; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 622; X86-NEXT: orl %ecx, %eax 623; X86-NEXT: retl 624; 625; X64-LABEL: logic_tree_with_mismatching_shifts_i32: 626; X64: # %bb.0: 627; X64-NEXT: movl %edx, %eax 628; X64-NEXT: shll $15, %edi 629; X64-NEXT: shll $16, %eax 630; X64-NEXT: orl %esi, %edi 631; X64-NEXT: orl %ecx, %eax 632; X64-NEXT: orl %edi, %eax 633; X64-NEXT: retq 634 %a.shifted = shl i32 %a, 15 635 %c.shifted = shl i32 %c, 16 636 %or.ab = or i32 %a.shifted, %b 637 %or.cd = or i32 %c.shifted, %d 638 %r = or i32 %or.ab, %or.cd 639 ret i32 %r 640} 641 642define i32 @logic_tree_with_mismatching_shifts2_i32(i32 %a, i32 %b, i32 %c, i32 %d) { 643; X86-LABEL: logic_tree_with_mismatching_shifts2_i32: 644; X86: # %bb.0: 645; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 646; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 647; X86-NEXT: shll $16, %ecx 648; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 649; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 650; X86-NEXT: orl %ecx, %eax 651; X86-NEXT: retl 652; 653; X64-LABEL: logic_tree_with_mismatching_shifts2_i32: 654; X64: # %bb.0: 655; X64-NEXT: movl %edx, %eax 656; X64-NEXT: shll $16, %edi 657; X64-NEXT: shrl $16, %eax 658; X64-NEXT: orl %esi, %edi 659; X64-NEXT: orl %ecx, %eax 660; X64-NEXT: orl %edi, %eax 661; X64-NEXT: retq 662 %a.shifted = shl i32 %a, 16 663 %c.shifted = lshr i32 %c, 16 664 %or.ab = or i32 %a.shifted, %b 665 %or.cd = or i32 %c.shifted, %d 666 %r = or i32 %or.ab, %or.cd 667 ret i32 %r 668} 669 670define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 671; X86-LABEL: or_tree_with_shifts_vec_i32: 672; X86: # %bb.0: 673; X86-NEXT: pushl %edi 674; X86-NEXT: .cfi_def_cfa_offset 8 675; X86-NEXT: pushl %esi 676; X86-NEXT: .cfi_def_cfa_offset 12 677; X86-NEXT: .cfi_offset %esi, -12 678; X86-NEXT: .cfi_offset %edi, -8 679; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 680; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 681; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 682; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 683; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 684; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 685; X86-NEXT: shll $16, %ecx 686; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 687; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 688; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 689; X86-NEXT: shll $16, %edx 690; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 691; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 692; X86-NEXT: orl {{[0-9]+}}(%esp), %esi 693; X86-NEXT: shll $16, %esi 694; X86-NEXT: orl {{[0-9]+}}(%esp), %esi 695; X86-NEXT: orl {{[0-9]+}}(%esp), %esi 696; X86-NEXT: orl {{[0-9]+}}(%esp), %edi 697; X86-NEXT: shll $16, %edi 698; X86-NEXT: orl {{[0-9]+}}(%esp), %edi 699; X86-NEXT: orl {{[0-9]+}}(%esp), %edi 700; X86-NEXT: movl %edi, 12(%eax) 701; X86-NEXT: movl %esi, 8(%eax) 702; X86-NEXT: movl %edx, 4(%eax) 703; X86-NEXT: movl %ecx, (%eax) 704; X86-NEXT: popl %esi 705; X86-NEXT: .cfi_def_cfa_offset 8 706; X86-NEXT: popl %edi 707; X86-NEXT: .cfi_def_cfa_offset 4 708; X86-NEXT: retl $4 709; 710; X64-LABEL: or_tree_with_shifts_vec_i32: 711; X64: # %bb.0: 712; X64-NEXT: por %xmm2, %xmm0 713; X64-NEXT: pslld $16, %xmm0 714; X64-NEXT: por %xmm3, %xmm1 715; X64-NEXT: por %xmm1, %xmm0 716; X64-NEXT: retq 717 %a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16> 718 %c.shifted = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> 719 %or.ab = or <4 x i32> %a.shifted, %b 720 %or.cd = or <4 x i32> %c.shifted, %d 721 %r = or <4 x i32> %or.ab, %or.cd 722 ret <4 x i32> %r 723} 724 725define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 726; X86-LABEL: or_tree_with_mismatching_shifts_vec_i32: 727; X86: # %bb.0: 728; X86-NEXT: pushl %edi 729; X86-NEXT: .cfi_def_cfa_offset 8 730; X86-NEXT: pushl %esi 731; X86-NEXT: .cfi_def_cfa_offset 12 732; X86-NEXT: .cfi_offset %esi, -12 733; X86-NEXT: .cfi_offset %edi, -8 734; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 735; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 736; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 737; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 738; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 739; X86-NEXT: shll $16, %eax 740; X86-NEXT: shll $17, %ecx 741; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 742; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 743; X86-NEXT: orl %eax, %ecx 744; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 745; X86-NEXT: shll $16, %eax 746; X86-NEXT: shll $17, %edx 747; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 748; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 749; X86-NEXT: orl %eax, %edx 750; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 751; X86-NEXT: shll $16, %eax 752; X86-NEXT: shll $17, %esi 753; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 754; X86-NEXT: orl {{[0-9]+}}(%esp), %esi 755; X86-NEXT: orl %eax, %esi 756; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 757; X86-NEXT: shll $16, %eax 758; X86-NEXT: shll $17, %edi 759; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 760; X86-NEXT: orl {{[0-9]+}}(%esp), %edi 761; X86-NEXT: orl %eax, %edi 762; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 763; X86-NEXT: movl %ecx, 12(%eax) 764; X86-NEXT: movl %edx, 8(%eax) 765; X86-NEXT: movl %esi, 4(%eax) 766; X86-NEXT: movl %edi, (%eax) 767; X86-NEXT: popl %esi 768; X86-NEXT: .cfi_def_cfa_offset 8 769; X86-NEXT: popl %edi 770; X86-NEXT: .cfi_def_cfa_offset 4 771; X86-NEXT: retl $4 772; 773; X64-LABEL: or_tree_with_mismatching_shifts_vec_i32: 774; X64: # %bb.0: 775; X64-NEXT: pslld $16, %xmm0 776; X64-NEXT: pslld $17, %xmm2 777; X64-NEXT: por %xmm1, %xmm0 778; X64-NEXT: por %xmm3, %xmm2 779; X64-NEXT: por %xmm2, %xmm0 780; X64-NEXT: retq 781 %a.shifted = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16> 782 %c.shifted = shl <4 x i32> %c, <i32 17, i32 17, i32 17, i32 17> 783 %or.ab = or <4 x i32> %a.shifted, %b 784 %or.cd = or <4 x i32> %c.shifted, %d 785 %r = or <4 x i32> %or.ab, %or.cd 786 ret <4 x i32> %r 787} 788 789; Reproducer for a DAGCombiner::combineShiftOfShiftedLogic bug. DAGCombiner 790; need to check that the sum of the shift amounts fits in i8, which is the 791; legal type used to described X86 shift amounts. Verify that we do not try to 792; create a shift with 130+160 as shift amount, and verify that the stored 793; value do not depend on %a1. 794define void @combineShiftOfShiftedLogic(i128 %a1, i32 %a2, ptr %p) { 795; X86-LABEL: combineShiftOfShiftedLogic: 796; X86: # %bb.0: 797; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 798; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 799; X86-NEXT: movl %eax, 20(%ecx) 800; X86-NEXT: movl $0, 16(%ecx) 801; X86-NEXT: movl $0, 12(%ecx) 802; X86-NEXT: movl $0, 8(%ecx) 803; X86-NEXT: movl $0, 4(%ecx) 804; X86-NEXT: movl $0, (%ecx) 805; X86-NEXT: retl 806; 807; X64-LABEL: combineShiftOfShiftedLogic: 808; X64: # %bb.0: 809; X64-NEXT: # kill: def $edx killed $edx def $rdx 810; X64-NEXT: shlq $32, %rdx 811; X64-NEXT: movq %rdx, 16(%rcx) 812; X64-NEXT: movq $0, 8(%rcx) 813; X64-NEXT: movq $0, (%rcx) 814; X64-NEXT: retq 815 %zext1 = zext i128 %a1 to i192 816 %zext2 = zext i32 %a2 to i192 817 %shl = shl i192 %zext1, 130 818 %or = or i192 %shl, %zext2 819 %res = shl i192 %or, 160 820 store i192 %res, ptr %p, align 8 821 ret void 822} 823