xref: /llvm-project/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s -check-prefix=RV64I
4
5; Test that we turn (sra (shl X, 32), 32-C) into (slli (sext.w X), C)
6
7define i64 @test1(i64 %a) nounwind {
8; RV64I-LABEL: test1:
9; RV64I:       # %bb.0:
10; RV64I-NEXT:    sext.w a0, a0
11; RV64I-NEXT:    slli a0, a0, 2
12; RV64I-NEXT:    ret
13  %1 = shl i64 %a, 32
14  %2 = ashr i64 %1, 30
15  ret i64 %2
16}
17
18define i64 @test2(i32 signext %a) nounwind {
19; RV64I-LABEL: test2:
20; RV64I:       # %bb.0:
21; RV64I-NEXT:    slli a0, a0, 3
22; RV64I-NEXT:    ret
23  %1 = zext i32 %a to i64
24  %2 = shl i64 %1, 32
25  %3 = ashr i64 %2, 29
26  ret i64 %3
27}
28
29define i64 @test3(ptr %a) nounwind {
30; RV64I-LABEL: test3:
31; RV64I:       # %bb.0:
32; RV64I-NEXT:    lw a0, 0(a0)
33; RV64I-NEXT:    slli a0, a0, 4
34; RV64I-NEXT:    ret
35  %1 = load i32, ptr %a
36  %2 = zext i32 %1 to i64
37  %3 = shl i64 %2, 32
38  %4 = ashr i64 %3, 28
39  ret i64 %4
40}
41
42define i64 @test4(i32 signext %a, i32 signext %b) nounwind {
43; RV64I-LABEL: test4:
44; RV64I:       # %bb.0:
45; RV64I-NEXT:    addw a0, a0, a1
46; RV64I-NEXT:    slli a0, a0, 30
47; RV64I-NEXT:    ret
48  %1 = add i32 %a, %b
49  %2 = zext i32 %1 to i64
50  %3 = shl i64 %2, 32
51  %4 = ashr i64 %3, 2
52  ret i64 %4
53}
54
55define i64 @test5(i32 signext %a, i32 signext %b) nounwind {
56; RV64I-LABEL: test5:
57; RV64I:       # %bb.0:
58; RV64I-NEXT:    xor a0, a0, a1
59; RV64I-NEXT:    slli a0, a0, 31
60; RV64I-NEXT:    ret
61  %1 = xor i32 %a, %b
62  %2 = zext i32 %1 to i64
63  %3 = shl i64 %2, 32
64  %4 = ashr i64 %3, 1
65  ret i64 %4
66}
67
68define i64 @test6(i32 signext %a, i32 signext %b) nounwind {
69; RV64I-LABEL: test6:
70; RV64I:       # %bb.0:
71; RV64I-NEXT:    sllw a0, a0, a1
72; RV64I-NEXT:    slli a0, a0, 16
73; RV64I-NEXT:    ret
74  %1 = shl i32 %a, %b
75  %2 = zext i32 %1 to i64
76  %3 = shl i64 %2, 32
77  %4 = ashr i64 %3, 16
78  ret i64 %4
79}
80
81; The ashr+add+shl is canonical IR from InstCombine for
82; (sext (add (trunc X to i32), 1) to i32).
83; That can be implemented as addiw make sure we recover it.
84define i64 @test7(ptr %0, i64 %1) {
85; RV64I-LABEL: test7:
86; RV64I:       # %bb.0:
87; RV64I-NEXT:    addiw a0, a1, 1
88; RV64I-NEXT:    ret
89  %3 = shl i64 %1, 32
90  %4 = add i64 %3, 4294967296
91  %5 = ashr exact i64 %4, 32
92  ret i64 %5
93}
94
95; The ashr+add+shl is canonical IR from InstCombine for
96; (sext (sub 1, (trunc X to i32)) to i32).
97; That can be implemented as (li 1)+subw make sure we recover it.
98define i64 @test8(ptr %0, i64 %1) {
99; RV64I-LABEL: test8:
100; RV64I:       # %bb.0:
101; RV64I-NEXT:    li a0, 1
102; RV64I-NEXT:    subw a0, a0, a1
103; RV64I-NEXT:    ret
104  %3 = mul i64 %1, -4294967296
105  %4 = add i64 %3, 4294967296
106  %5 = ashr exact i64 %4, 32
107  ret i64 %5
108}
109
110; The gep is here to introduce a shl by 2 after the ashr that will get folded
111; and make this harder to recover.
112define signext i32 @test9(ptr %0, i64 %1) {
113; RV64I-LABEL: test9:
114; RV64I:       # %bb.0:
115; RV64I-NEXT:    lui a2, 1
116; RV64I-NEXT:    addi a2, a2, 1
117; RV64I-NEXT:    addw a1, a1, a2
118; RV64I-NEXT:    slli a1, a1, 2
119; RV64I-NEXT:    add a0, a0, a1
120; RV64I-NEXT:    lw a0, 0(a0)
121; RV64I-NEXT:    ret
122  %3 = shl i64 %1, 32
123  %4 = add i64 %3, 17596481011712 ; 4097 << 32
124  %5 = ashr exact i64 %4, 32
125  %6 = getelementptr inbounds i32, ptr %0, i64 %5
126  %7 = load i32, ptr %6, align 4
127  ret i32 %7
128}
129
130; The gep is here to introduce a shl by 2 after the ashr that will get folded
131; and make this harder to recover.
132define signext i32 @test10(ptr %0, i64 %1) {
133; RV64I-LABEL: test10:
134; RV64I:       # %bb.0:
135; RV64I-NEXT:    lui a2, 30141
136; RV64I-NEXT:    addi a2, a2, -747
137; RV64I-NEXT:    subw a2, a2, a1
138; RV64I-NEXT:    slli a2, a2, 2
139; RV64I-NEXT:    add a0, a0, a2
140; RV64I-NEXT:    lw a0, 0(a0)
141; RV64I-NEXT:    ret
142  %3 = mul i64 %1, -4294967296
143  %4 = add i64 %3, 530242871224172544 ; 123456789 << 32
144  %5 = ashr exact i64 %4, 32
145  %6 = getelementptr inbounds i32, ptr %0, i64 %5
146  %7 = load i32, ptr %6, align 4
147  ret i32 %7
148}
149
150define i64 @test11(ptr %0, i64 %1) {
151; RV64I-LABEL: test11:
152; RV64I:       # %bb.0:
153; RV64I-NEXT:    lui a0, 524288
154; RV64I-NEXT:    subw a0, a0, a1
155; RV64I-NEXT:    ret
156  %3 = mul i64 %1, -4294967296
157  %4 = add i64 %3, 9223372036854775808 ;0x8000'0000'0000'0000
158  %5 = ashr exact i64 %4, 32
159  ret i64 %5
160}
161
162; Make sure we use slli+srai to enable the possibility of compressed
163define i32 @test12(i32 signext %0) {
164; RV64I-LABEL: test12:
165; RV64I:       # %bb.0:
166; RV64I-NEXT:    slli a0, a0, 49
167; RV64I-NEXT:    srai a0, a0, 47
168; RV64I-NEXT:    ret
169  %2 = shl i32 %0, 17
170  %3 = ashr i32 %2, 15
171  ret i32 %3
172}
173
174define i8 @test13(ptr %0, i64 %1) {
175; RV64I-LABEL: test13:
176; RV64I:       # %bb.0:
177; RV64I-NEXT:    li a2, 1
178; RV64I-NEXT:    li a3, 2
179; RV64I-NEXT:    subw a2, a2, a1
180; RV64I-NEXT:    subw a3, a3, a1
181; RV64I-NEXT:    add a2, a0, a2
182; RV64I-NEXT:    add a0, a0, a3
183; RV64I-NEXT:    lbu a1, 0(a2)
184; RV64I-NEXT:    lbu a0, 0(a0)
185; RV64I-NEXT:    add a0, a1, a0
186; RV64I-NEXT:    ret
187  %3 = mul i64 %1, -4294967296
188  %4 = add i64 %3, 4294967296 ; 1 << 32
189  %5 = ashr exact i64 %4, 32
190  %6 = getelementptr inbounds i8, ptr %0, i64 %5
191  %7 = load i8, ptr %6, align 4
192  %8 = add i64 %3, 8589934592 ; 2 << 32
193  %9 = ashr exact i64 %8, 32
194  %10 = getelementptr inbounds i8, ptr %0, i64 %9
195  %11 = load i8, ptr %10, align 4
196  %12 = add i8 %7, %11
197  ret i8 %12
198}
199
200define signext i32 @test14(ptr %0, ptr %1, i64 %2) {
201; RV64I-LABEL: test14:
202; RV64I:       # %bb.0:
203; RV64I-NEXT:    li a3, 1
204; RV64I-NEXT:    subw a3, a3, a2
205; RV64I-NEXT:    add a0, a0, a3
206; RV64I-NEXT:    slli a3, a3, 2
207; RV64I-NEXT:    lbu a0, 0(a0)
208; RV64I-NEXT:    add a1, a1, a3
209; RV64I-NEXT:    lw a1, 0(a1)
210; RV64I-NEXT:    addw a0, a0, a1
211; RV64I-NEXT:    ret
212  %4 = mul i64 %2, -4294967296
213  %5 = add i64 %4, 4294967296 ; 1 << 32
214  %6 = ashr exact i64 %5, 32
215  %7 = getelementptr inbounds i8, ptr %0, i64 %6
216  %8 = load i8, ptr %7, align 4
217  %9 = zext i8 %8 to i32
218  %10 = getelementptr inbounds i32, ptr %1, i64 %6
219  %11 = load i32, ptr %10, align 4
220  %12 = add i32 %9, %11
221  ret i32 %12
222}
223