xref: /llvm-project/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll (revision f1ec0d12bb0843f0deab83ef2b5cf1339cbc4f0b)
1; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s
2
3target datalayout =
4"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
5target triple = "x86_64-unknown-linux-gnu"
6
7@x = external global [1 x [2 x <4 x float>]]
8
9; Can we sink single addressing mode computation to use?
10define void @test1(i1 %cond, ptr %base) {
11; CHECK-LABEL: @test1
12; CHECK: getelementptr inbounds i8, {{.+}} 40
13entry:
14  %addr = getelementptr inbounds i64, ptr %base, i64 5
15  br i1 %cond, label %if.then, label %fallthrough
16
17if.then:
18  %v = load i32, ptr %addr, align 4
19  br label %fallthrough
20
21fallthrough:
22  ret void
23}
24
25declare void @foo(i32)
26
27; Make sure sinking two copies of addressing mode into different blocks works
28define void @test2(i1 %cond, ptr %base) {
29; CHECK-LABEL: @test2
30entry:
31  %addr = getelementptr inbounds i64, ptr %base, i64 5
32  br i1 %cond, label %if.then, label %fallthrough
33
34if.then:
35; CHECK-LABEL: if.then:
36; CHECK: getelementptr inbounds i8, {{.+}} 40
37  %v1 = load i32, ptr %addr, align 4
38  call void @foo(i32 %v1)
39  %cmp = icmp eq i32 %v1, 0
40  br i1 %cmp, label %next, label %fallthrough
41
42next:
43; CHECK-LABEL: next:
44; CHECK: getelementptr inbounds i8, {{.+}} 40
45  %v2 = load i32, ptr %addr, align 4
46  call void @foo(i32 %v2)
47  br label %fallthrough
48
49fallthrough:
50  ret void
51}
52
53; If we have two loads in the same block, only need one copy of addressing mode
54; - instruction selection will duplicate if needed
55define void @test3(i1 %cond, ptr %base) {
56; CHECK-LABEL: @test3
57entry:
58  %addr = getelementptr inbounds i64, ptr %base, i64 5
59  br i1 %cond, label %if.then, label %fallthrough
60
61if.then:
62; CHECK-LABEL: if.then:
63; CHECK: getelementptr inbounds i8, {{.+}} 40
64  %v1 = load i32, ptr %addr, align 4
65  call void @foo(i32 %v1)
66; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
67  %v2 = load i32, ptr %addr, align 4
68  call void @foo(i32 %v2)
69  br label %fallthrough
70
71fallthrough:
72  ret void
73}
74
75; Can we still sink addressing mode if there's a cold use of the
76; address itself?
77define void @test4(i1 %cond, ptr %base) {
78; CHECK-LABEL: @test4
79entry:
80  %addr = getelementptr inbounds i64, ptr %base, i64 5
81  br i1 %cond, label %if.then, label %fallthrough
82
83if.then:
84; CHECK-LABEL: if.then:
85; CHECK: getelementptr inbounds i8, {{.+}} 40
86  %v1 = load i32, ptr %addr, align 4
87  call void @foo(i32 %v1)
88  %cmp = icmp eq i32 %v1, 0
89  br i1 %cmp, label %rare.1, label %fallthrough
90
91fallthrough:
92  ret void
93
94rare.1:
95; CHECK-LABEL: rare.1:
96; CHECK: getelementptr inbounds i8, {{.+}} 40
97  call void @slowpath(i32 %v1, ptr %addr) cold
98  br label %fallthrough
99}
100
101; Negative test - don't want to duplicate addressing into hot path
102define void @test5(i1 %cond, ptr %base) {
103; CHECK-LABEL: @test5
104entry:
105; CHECK: %addr = getelementptr inbounds
106  %addr = getelementptr inbounds i64, ptr %base, i64 5
107  br i1 %cond, label %if.then, label %fallthrough
108
109if.then:
110; CHECK-LABEL: if.then:
111; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
112  %v1 = load i32, ptr %addr, align 4
113  call void @foo(i32 %v1)
114  %cmp = icmp eq i32 %v1, 0
115  br i1 %cmp, label %rare.1, label %fallthrough
116
117fallthrough:
118  ret void
119
120rare.1:
121  call void @slowpath(i32 %v1, ptr %addr) ;; NOT COLD
122  br label %fallthrough
123}
124
125; Negative test - opt for size
126define void @test6(i1 %cond, ptr %base) minsize {
127; CHECK-LABEL: @test6
128entry:
129; CHECK: %addr = getelementptr
130  %addr = getelementptr inbounds i64, ptr %base, i64 5
131  br i1 %cond, label %if.then, label %fallthrough
132
133if.then:
134; CHECK-LABEL: if.then:
135; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
136  %v1 = load i32, ptr %addr, align 4
137  call void @foo(i32 %v1)
138  %cmp = icmp eq i32 %v1, 0
139  br i1 %cmp, label %rare.1, label %fallthrough
140
141fallthrough:
142  ret void
143
144rare.1:
145  call void @slowpath(i32 %v1, ptr %addr) cold
146  br label %fallthrough
147}
148
149; Negative test - opt for size
150define void @test6_pgso(i1 %cond, ptr %base) !prof !14 {
151; CHECK-LABEL: @test6
152entry:
153; CHECK: %addr = getelementptr
154  %addr = getelementptr inbounds i64, ptr %base, i64 5
155  br i1 %cond, label %if.then, label %fallthrough
156
157if.then:
158; CHECK-LABEL: if.then:
159; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
160  %v1 = load i32, ptr %addr, align 4
161  call void @foo(i32 %v1)
162  %cmp = icmp eq i32 %v1, 0
163  br i1 %cmp, label %rare.1, label %fallthrough
164
165fallthrough:
166  ret void
167
168rare.1:
169  call void @slowpath(i32 %v1, ptr %addr) cold
170  br label %fallthrough
171}
172
173; Make sure sinking two copies of addressing mode into different blocks works
174; when there are cold paths for each.
175define void @test7(i1 %cond, ptr %base) {
176; CHECK-LABEL: @test7
177entry:
178  %addr = getelementptr inbounds i64, ptr %base, i64 5
179  br i1 %cond, label %if.then, label %fallthrough
180
181if.then:
182; CHECK-LABEL: if.then:
183; CHECK: getelementptr inbounds i8, {{.+}} 40
184  %v1 = load i32, ptr %addr, align 4
185  call void @foo(i32 %v1)
186  %cmp = icmp eq i32 %v1, 0
187  br i1 %cmp, label %rare.1, label %next
188
189next:
190; CHECK-LABEL: next:
191; CHECK: getelementptr inbounds i8, {{.+}} 40
192  %v2 = load i32, ptr %addr, align 4
193  call void @foo(i32 %v2)
194  %cmp2 = icmp eq i32 %v2, 0
195  br i1 %cmp2, label %rare.1, label %fallthrough
196
197fallthrough:
198  ret void
199
200rare.1:
201; CHECK-LABEL: rare.1:
202; CHECK: getelementptr inbounds i8, {{.+}} 40
203  call void @slowpath(i32 %v1, ptr %addr) cold
204  br label %next
205
206rare.2:
207; CHECK-LABEL: rare.2:
208; CHECK: getelementptr inbounds i8, {{.+}} 40
209  call void @slowpath(i32 %v2, ptr %addr) cold
210  br label %fallthrough
211}
212
213declare void @slowpath(i32, ptr)
214
215; Make sure we don't end up in an infinite loop after we fail to sink.
216; CHECK-LABEL: define void @test8
217; CHECK: %ptr = getelementptr i8, ptr %aFOO_load_ptr2int_2void, i32 undef
218define void @test8() {
219allocas:
220  %aFOO_load = load ptr, ptr undef
221  %aFOO_load_ptr2int = ptrtoint ptr %aFOO_load to i64
222  %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
223  %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to ptr
224  %ptr = getelementptr i8, ptr %aFOO_load_ptr2int_2void, i32 undef
225  br label %load.i145
226
227load.i145:
228  %valall.i144 = load <4 x float>, ptr %ptr, align 4
229  br label %pl_loop.i.i122
230
231pl_loop.i.i122:
232  br label %pl_loop.i.i122
233}
234
235; Make sure we can sink address computation even
236; if there is a cycle in phi nodes.
237define void @test9(i1 %cond, ptr %base) {
238; CHECK-LABEL: @test9
239entry:
240  %addr = getelementptr inbounds i64, ptr %base, i64 5
241  br label %header
242
243header:
244  %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
245  %casted.loop = phi ptr [%addr, %entry], [%casted.merged, %backedge]
246  br i1 %cond, label %if.then, label %backedge
247
248if.then:
249  call void @foo(i32 %iv)
250  %addr.1 = getelementptr inbounds i64, ptr %base, i64 5
251  br label %backedge
252
253backedge:
254; CHECK-LABEL: backedge:
255; CHECK: getelementptr inbounds i8, {{.+}} 40
256  %casted.merged = phi ptr [%casted.loop, %header], [%addr.1, %if.then]
257  %v = load i32, ptr %casted.merged, align 4
258  call void @foo(i32 %v)
259  %iv.inc = add i32 %iv, 1
260  %cmp = icmp slt i32 %iv.inc, 1000
261  br i1 %cmp, label %header, label %exit
262
263exit:
264  ret void
265}
266
267; Make sure we can eliminate a select when both arguments perform equivalent
268; address computation.
269define void @test10(i1 %cond, ptr %base) {
270; CHECK-LABEL: @test10
271; CHECK: getelementptr inbounds i8, {{.+}} 40
272; CHECK-NOT: select
273entry:
274  %gep1 = getelementptr inbounds i64, ptr %base, i64 5
275  %gep2 = getelementptr inbounds i32, ptr %base, i64 10
276  %casted.merged = select i1 %cond, ptr %gep1, ptr %gep2
277  %v = load i32, ptr %casted.merged, align 4
278  call void @foo(i32 %v)
279  ret void
280}
281
282; Found by fuzzer, getSExtValue of > 64 bit constant
283define void @i96_mul(ptr %base, i96 %offset) {
284BB:
285  ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
286  %B84 = mul i96 %offset, 39614081257132168796771975167
287  %G23 = getelementptr i1, ptr %base, i96 %B84
288  store i1 false, ptr %G23
289  ret void
290}
291
292!llvm.module.flags = !{!0}
293!0 = !{i32 1, !"ProfileSummary", !1}
294!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
295!2 = !{!"ProfileFormat", !"InstrProf"}
296!3 = !{!"TotalCount", i64 10000}
297!4 = !{!"MaxCount", i64 10}
298!5 = !{!"MaxInternalCount", i64 1}
299!6 = !{!"MaxFunctionCount", i64 1000}
300!7 = !{!"NumCounts", i64 3}
301!8 = !{!"NumFunctions", i64 3}
302!9 = !{!"DetailedSummary", !10}
303!10 = !{!11, !12, !13}
304!11 = !{i32 10000, i64 100, i32 1}
305!12 = !{i32 999000, i64 100, i32 1}
306!13 = !{i32 999999, i64 1, i32 2}
307!14 = !{!"function_entry_count", i64 0}
308