1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck %s 3 4; This test has multiple opportunities for SimplifyDemandedBits after type 5; legalization. There are 2 opportunities on the chain feeding the LHS of the 6; shl. And one opportunity on the shift amount. We previously weren't managing 7; the DAGCombiner worklist correctly and failed to get the RHS. 8define i32 @foo(i32 %x, i32 %y, i32 %z) { 9; CHECK-LABEL: foo: 10; CHECK: # %bb.0: 11; CHECK-NEXT: mul a0, a0, a0 12; CHECK-NEXT: addi a0, a0, 1 13; CHECK-NEXT: mul a0, a0, a0 14; CHECK-NEXT: add a0, a0, a2 15; CHECK-NEXT: addi a0, a0, 1 16; CHECK-NEXT: sllw a0, a0, a1 17; CHECK-NEXT: ret 18 %b = mul i32 %x, %x 19 %c = add i32 %b, 1 20 %d = mul i32 %c, %c 21 %e = add i32 %d, %z 22 %f = add i32 %e, 1 23 %g = shl i32 %f, %y 24 ret i32 %g 25} 26 27; The sign bit of an nsw self multiply is 0. Make sure we can use this to 28; convert the AND constant to -8. 29define i64 @mul_self_nsw_sign(i64 %x) { 30; CHECK-LABEL: mul_self_nsw_sign: 31; CHECK: # %bb.0: 32; CHECK-NEXT: mul a0, a0, a0 33; CHECK-NEXT: andi a0, a0, -8 34; CHECK-NEXT: ret 35 %a = mul nsw i64 %x, %x 36 %b = and i64 %a, 9223372036854775800 37 ret i64 %b 38} 39 40; Make sure we sign extend the constant after type legalization to allow the 41; use of ori. 42define void @ori(ptr nocapture noundef %0) { 43; CHECK-LABEL: ori: 44; CHECK: # %bb.0: 45; CHECK-NEXT: lw a1, 0(a0) 46; CHECK-NEXT: ori a1, a1, -2 47; CHECK-NEXT: sw a1, 0(a0) 48; CHECK-NEXT: ret 49 %2 = load i32, ptr %0, align 4 50 %3 = or i32 %2, -2 51 store i32 %3, ptr %0, align 4 52 ret void 53} 54 55; Make sure we sign extend the constant after type legalization to allow the 56; use of xori. 57define void @xori(ptr nocapture noundef %0) { 58; CHECK-LABEL: xori: 59; CHECK: # %bb.0: 60; CHECK-NEXT: lw a1, 0(a0) 61; CHECK-NEXT: xori a1, a1, -5 62; CHECK-NEXT: sw a1, 0(a0) 63; CHECK-NEXT: ret 64 %2 = load i32, ptr %0, align 4 65 %3 = xor i32 %2, -5 66 store i32 %3, ptr %0, align 4 67 ret void 68} 69 70; Make sure we sign extend the constant after type legalization to allow the 71; shorter constant materialization. 72define void @or_signbit(ptr nocapture noundef %0) { 73; CHECK-LABEL: or_signbit: 74; CHECK: # %bb.0: 75; CHECK-NEXT: lw a1, 0(a0) 76; CHECK-NEXT: lui a2, 524288 77; CHECK-NEXT: or a1, a1, a2 78; CHECK-NEXT: sw a1, 0(a0) 79; CHECK-NEXT: ret 80 %2 = load i32, ptr %0, align 4 81 %3 = or i32 %2, -2147483648 82 store i32 %3, ptr %0, align 4 83 ret void 84} 85 86; Make sure we sign extend the constant after type legalization to allow the 87; shorter constant materialization. 88define void @xor_signbit(ptr nocapture noundef %0) { 89; CHECK-LABEL: xor_signbit: 90; CHECK: # %bb.0: 91; CHECK-NEXT: lw a1, 0(a0) 92; CHECK-NEXT: lui a2, 524288 93; CHECK-NEXT: xor a1, a1, a2 94; CHECK-NEXT: sw a1, 0(a0) 95; CHECK-NEXT: ret 96 %2 = load i32, ptr %0, align 4 97 %3 = xor i32 %2, -2147483648 98 store i32 %3, ptr %0, align 4 99 ret void 100} 101 102; Type legalization inserts a sext_inreg after the sub. This causes the 103; constant for the AND to be turned into 0xfffffff8. Then SimplifyDemandedBits 104; removes the sext_inreg from the path to the store. This prevents 105; TargetShrinkDemandedConstant from being able to restore the lost upper bits 106; from the and mask to allow andi. ISel is able to recover the lost sext_inreg 107; using hasAllWUsers. We also use hasAllWUsers to recover the ANDI. 108define signext i32 @andi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { 109; CHECK-LABEL: andi_sub_cse: 110; CHECK: # %bb.0: 111; CHECK-NEXT: andi a0, a0, -8 112; CHECK-NEXT: subw a0, a0, a1 113; CHECK-NEXT: sw a0, 0(a2) 114; CHECK-NEXT: ret 115 %4 = and i32 %0, -8 116 %5 = sub i32 %4, %1 117 store i32 %5, ptr %2, align 4 118 ret i32 %5 119} 120 121define signext i32 @addi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { 122; CHECK-LABEL: addi_sub_cse: 123; CHECK: # %bb.0: 124; CHECK-NEXT: subw a0, a0, a1 125; CHECK-NEXT: addiw a0, a0, -8 126; CHECK-NEXT: sw a0, 0(a2) 127; CHECK-NEXT: ret 128 %4 = add i32 %0, -8 129 %5 = sub i32 %4, %1 130 store i32 %5, ptr %2, align 4 131 ret i32 %5 132} 133 134define signext i32 @xori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { 135; CHECK-LABEL: xori_sub_cse: 136; CHECK: # %bb.0: 137; CHECK-NEXT: xori a0, a0, -8 138; CHECK-NEXT: subw a0, a0, a1 139; CHECK-NEXT: sw a0, 0(a2) 140; CHECK-NEXT: ret 141 %4 = xor i32 %0, -8 142 %5 = sub i32 %4, %1 143 store i32 %5, ptr %2, align 4 144 ret i32 %5 145} 146 147define signext i32 @ori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) { 148; CHECK-LABEL: ori_sub_cse: 149; CHECK: # %bb.0: 150; CHECK-NEXT: ori a0, a0, -8 151; CHECK-NEXT: subw a0, a0, a1 152; CHECK-NEXT: sw a0, 0(a2) 153; CHECK-NEXT: ret 154 %4 = or i32 %0, -8 155 %5 = sub i32 %4, %1 156 store i32 %5, ptr %2, align 4 157 ret i32 %5 158} 159 160; SimplifyDemandedBits breaks the ANDI by turning -8 into 0xfffffff8. This 161; gets CSEd with the AND needed for type legalizing the lshr. This increases 162; the use count of the AND with 0xfffffff8 making TargetShrinkDemandedConstant 163; unable to restore it to 0xffffffff for the lshr and -8 for the AND to use 164; ANDI. 165; Instead we rely on ISel to form srliw even though the AND has multiple uses 166; and the mask has missing 1s where bits will be shifted out. This reduces the 167; use count of the AND and we can use hasAllWUsers to form ANDI. 168define signext i32 @andi_srliw(i32 signext %0, ptr %1, i32 signext %2) { 169; CHECK-LABEL: andi_srliw: 170; CHECK: # %bb.0: 171; CHECK-NEXT: andi a3, a0, -8 172; CHECK-NEXT: srliw a4, a0, 3 173; CHECK-NEXT: addw a0, a3, a2 174; CHECK-NEXT: sw a4, 0(a1) 175; CHECK-NEXT: ret 176 %4 = and i32 %0, -8 177 %5 = lshr i32 %0, 3 178 store i32 %5, ptr %1, align 4 179 %6 = add i32 %4, %2 180 ret i32 %6 181} 182 183define i32 @and_or(i32 signext %x) { 184; CHECK-LABEL: and_or: 185; CHECK: # %bb.0: # %entry 186; CHECK-NEXT: ori a0, a0, 255 187; CHECK-NEXT: slli a0, a0, 48 188; CHECK-NEXT: srli a0, a0, 48 189; CHECK-NEXT: ret 190entry: 191 %and = and i32 %x, 65280 192 %or = or i32 %and, 255 193 ret i32 %or 194} 195 196define i64 @and_allones(i32 signext %x) { 197; CHECK-LABEL: and_allones: 198; CHECK: # %bb.0: # %entry 199; CHECK-NEXT: addi a0, a0, -1 200; CHECK-NEXT: li a1, 1 201; CHECK-NEXT: sll a0, a1, a0 202; CHECK-NEXT: ret 203entry: 204 %y = zext i32 %x to i64 205 %shamt = add nsw i64 %y, -1 206 %ret = shl i64 1, %shamt 207 ret i64 %ret 208} 209