xref: /llvm-project/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll (revision 2c9e2e9f0b75d6b023a388564092cc852ba29bd5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck %s
3
4; This test has multiple opportunities for SimplifyDemandedBits after type
5; legalization. There are 2 opportunities on the chain feeding the LHS of the
6; shl. And one opportunity on the shift amount. We previously weren't managing
7; the DAGCombiner worklist correctly and failed to get the RHS.
8define i32 @foo(i32 %x, i32 %y, i32 %z) {
9; CHECK-LABEL: foo:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    mul a0, a0, a0
12; CHECK-NEXT:    addi a0, a0, 1
13; CHECK-NEXT:    mul a0, a0, a0
14; CHECK-NEXT:    add a0, a0, a2
15; CHECK-NEXT:    addi a0, a0, 1
16; CHECK-NEXT:    sllw a0, a0, a1
17; CHECK-NEXT:    ret
18  %b = mul i32 %x, %x
19  %c = add i32 %b, 1
20  %d = mul i32 %c, %c
21  %e = add i32 %d, %z
22  %f = add i32 %e, 1
23  %g = shl i32 %f, %y
24  ret i32 %g
25}
26
27; The sign bit of an nsw self multiply is 0. Make sure we can use this to
28; convert the AND constant to -8.
29define i64 @mul_self_nsw_sign(i64 %x) {
30; CHECK-LABEL: mul_self_nsw_sign:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    mul a0, a0, a0
33; CHECK-NEXT:    andi a0, a0, -8
34; CHECK-NEXT:    ret
35  %a = mul nsw i64 %x, %x
36  %b = and i64 %a, 9223372036854775800
37  ret i64 %b
38}
39
40; Make sure we sign extend the constant after type legalization to allow the
41; use of ori.
42define void @ori(ptr nocapture noundef %0) {
43; CHECK-LABEL: ori:
44; CHECK:       # %bb.0:
45; CHECK-NEXT:    lw a1, 0(a0)
46; CHECK-NEXT:    ori a1, a1, -2
47; CHECK-NEXT:    sw a1, 0(a0)
48; CHECK-NEXT:    ret
49  %2 = load i32, ptr %0, align 4
50  %3 = or i32 %2, -2
51  store i32 %3, ptr %0, align 4
52  ret void
53}
54
55; Make sure we sign extend the constant after type legalization to allow the
56; use of xori.
57define void @xori(ptr nocapture noundef %0) {
58; CHECK-LABEL: xori:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    lw a1, 0(a0)
61; CHECK-NEXT:    xori a1, a1, -5
62; CHECK-NEXT:    sw a1, 0(a0)
63; CHECK-NEXT:    ret
64  %2 = load i32, ptr %0, align 4
65  %3 = xor i32 %2, -5
66  store i32 %3, ptr %0, align 4
67  ret void
68}
69
70; Make sure we sign extend the constant after type legalization to allow the
71; shorter constant materialization.
72define void @or_signbit(ptr nocapture noundef %0) {
73; CHECK-LABEL: or_signbit:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    lw a1, 0(a0)
76; CHECK-NEXT:    lui a2, 524288
77; CHECK-NEXT:    or a1, a1, a2
78; CHECK-NEXT:    sw a1, 0(a0)
79; CHECK-NEXT:    ret
80  %2 = load i32, ptr %0, align 4
81  %3 = or i32 %2, -2147483648
82  store i32 %3, ptr %0, align 4
83  ret void
84}
85
86; Make sure we sign extend the constant after type legalization to allow the
87; shorter constant materialization.
88define void @xor_signbit(ptr nocapture noundef %0) {
89; CHECK-LABEL: xor_signbit:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    lw a1, 0(a0)
92; CHECK-NEXT:    lui a2, 524288
93; CHECK-NEXT:    xor a1, a1, a2
94; CHECK-NEXT:    sw a1, 0(a0)
95; CHECK-NEXT:    ret
96  %2 = load i32, ptr %0, align 4
97  %3 = xor i32 %2, -2147483648
98  store i32 %3, ptr %0, align 4
99  ret void
100}
101
102; Type legalization inserts a sext_inreg after the sub. This causes the
103; constant for the AND to be turned into 0xfffffff8. Then SimplifyDemandedBits
104; removes the sext_inreg from the path to the store. This prevents
105; TargetShrinkDemandedConstant from being able to restore the lost upper bits
106; from the and mask to allow andi. ISel is able to recover the lost sext_inreg
107; using hasAllWUsers. We also use hasAllWUsers to recover the ANDI.
108define signext i32 @andi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
109; CHECK-LABEL: andi_sub_cse:
110; CHECK:       # %bb.0:
111; CHECK-NEXT:    andi a0, a0, -8
112; CHECK-NEXT:    subw a0, a0, a1
113; CHECK-NEXT:    sw a0, 0(a2)
114; CHECK-NEXT:    ret
115  %4 = and i32 %0, -8
116  %5 = sub i32 %4, %1
117  store i32 %5, ptr %2, align 4
118  ret i32 %5
119}
120
121define signext i32 @addi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
122; CHECK-LABEL: addi_sub_cse:
123; CHECK:       # %bb.0:
124; CHECK-NEXT:    subw a0, a0, a1
125; CHECK-NEXT:    addiw a0, a0, -8
126; CHECK-NEXT:    sw a0, 0(a2)
127; CHECK-NEXT:    ret
128  %4 = add i32 %0, -8
129  %5 = sub i32 %4, %1
130  store i32 %5, ptr %2, align 4
131  ret i32 %5
132}
133
134define signext i32 @xori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
135; CHECK-LABEL: xori_sub_cse:
136; CHECK:       # %bb.0:
137; CHECK-NEXT:    xori a0, a0, -8
138; CHECK-NEXT:    subw a0, a0, a1
139; CHECK-NEXT:    sw a0, 0(a2)
140; CHECK-NEXT:    ret
141  %4 = xor i32 %0, -8
142  %5 = sub i32 %4, %1
143  store i32 %5, ptr %2, align 4
144  ret i32 %5
145}
146
147define signext i32 @ori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
148; CHECK-LABEL: ori_sub_cse:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    ori a0, a0, -8
151; CHECK-NEXT:    subw a0, a0, a1
152; CHECK-NEXT:    sw a0, 0(a2)
153; CHECK-NEXT:    ret
154  %4 = or i32 %0, -8
155  %5 = sub i32 %4, %1
156  store i32 %5, ptr %2, align 4
157  ret i32 %5
158}
159
160; SimplifyDemandedBits breaks the ANDI by turning -8 into 0xfffffff8. This
161; gets CSEd with the AND needed for type legalizing the lshr. This increases
162; the use count of the AND with 0xfffffff8 making TargetShrinkDemandedConstant
163; unable to restore it to 0xffffffff for the lshr and -8 for the AND to use
164; ANDI.
165; Instead we rely on ISel to form srliw even though the AND has multiple uses
166; and the mask has missing 1s where bits will be shifted out. This reduces the
167; use count of the AND and we can use hasAllWUsers to form ANDI.
168define signext i32 @andi_srliw(i32 signext %0, ptr %1, i32 signext %2) {
169; CHECK-LABEL: andi_srliw:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    andi a3, a0, -8
172; CHECK-NEXT:    srliw a4, a0, 3
173; CHECK-NEXT:    addw a0, a3, a2
174; CHECK-NEXT:    sw a4, 0(a1)
175; CHECK-NEXT:    ret
176  %4 = and i32 %0, -8
177  %5 = lshr i32 %0, 3
178  store i32 %5, ptr %1, align 4
179  %6 = add i32 %4, %2
180  ret i32 %6
181}
182
183define i32 @and_or(i32 signext %x) {
184; CHECK-LABEL: and_or:
185; CHECK:       # %bb.0: # %entry
186; CHECK-NEXT:    ori a0, a0, 255
187; CHECK-NEXT:    slli a0, a0, 48
188; CHECK-NEXT:    srli a0, a0, 48
189; CHECK-NEXT:    ret
190entry:
191  %and = and i32 %x, 65280
192  %or = or i32 %and, 255
193  ret i32 %or
194}
195
196define i64 @and_allones(i32 signext %x) {
197; CHECK-LABEL: and_allones:
198; CHECK:       # %bb.0: # %entry
199; CHECK-NEXT:    addi a0, a0, -1
200; CHECK-NEXT:    li a1, 1
201; CHECK-NEXT:    sll a0, a1, a0
202; CHECK-NEXT:    ret
203entry:
204  %y = zext i32 %x to i64
205  %shamt = add nsw i64 %y, -1
206  %ret = shl i64 1, %shamt
207  ret i64 %ret
208}
209