xref: /llvm-project/llvm/test/Transforms/InstCombine/compare-unescaped.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=instcombine -S < %s | FileCheck %s
3
4@gp = global ptr null, align 8
5
6declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
7
8define i1 @compare_global_trivialeq() {
9; CHECK-LABEL: @compare_global_trivialeq(
10; CHECK-NEXT:    ret i1 false
11;
12  %m = call ptr @malloc(i64 4)
13  %lgp = load ptr, ptr @gp, align 8
14  %cmp = icmp eq ptr %m, %lgp
15  ret i1 %cmp
16}
17
18define i1 @compare_global_trivialne() {
19; CHECK-LABEL: @compare_global_trivialne(
20; CHECK-NEXT:    ret i1 true
21;
22  %m = call ptr @malloc(i64 4)
23  %lgp = load ptr, ptr @gp, align 8
24  %cmp = icmp ne ptr %m, %lgp
25  ret i1 %cmp
26}
27
28
29; Although the %m is marked nocapture in the deopt operand in call to function f,
30; we cannot remove the alloc site: call to malloc
31; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
32declare void @f()
33define i1 @compare_and_call_with_deopt() {
34; CHECK-LABEL: @compare_and_call_with_deopt(
35; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
36; CHECK-NEXT:    tail call void @f() [ "deopt"(ptr [[M]]) ]
37; CHECK-NEXT:    ret i1 false
38;
39  %m = call ptr @malloc(i64 24)
40  %lgp = load ptr, ptr @gp, align 8, !nonnull !0
41  %cmp = icmp eq ptr %lgp, %m
42  tail call void @f() [ "deopt"(ptr %m) ]
43  ret i1 %cmp
44}
45
46; Same functon as above with deopt operand in function f, but comparison is NE
47define i1 @compare_ne_and_call_with_deopt() {
48; CHECK-LABEL: @compare_ne_and_call_with_deopt(
49; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
50; CHECK-NEXT:    tail call void @f() [ "deopt"(ptr [[M]]) ]
51; CHECK-NEXT:    ret i1 true
52;
53  %m = call ptr @malloc(i64 24)
54  %lgp = load ptr, ptr @gp, align 8, !nonnull !0
55  %cmp = icmp ne ptr %lgp, %m
56  tail call void @f() [ "deopt"(ptr %m) ]
57  ret i1 %cmp
58}
59
60; Same function as above, but global not marked nonnull, and we cannot fold the comparison
61define i1 @compare_ne_global_maybe_null() {
62; CHECK-LABEL: @compare_ne_global_maybe_null(
63; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
64; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8
65; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[LGP]], [[M]]
66; CHECK-NEXT:    tail call void @f() [ "deopt"(ptr [[M]]) ]
67; CHECK-NEXT:    ret i1 [[CMP]]
68;
69  %m = call ptr @malloc(i64 24)
70  %lgp = load ptr, ptr @gp
71  %cmp = icmp ne ptr %lgp, %m
72  tail call void @f() [ "deopt"(ptr %m) ]
73  ret i1 %cmp
74}
75
76; FIXME: The comparison should fold to false since %m escapes (call to function escape)
77; after the comparison.
78declare void @escape(ptr)
79define i1 @compare_and_call_after() {
80; CHECK-LABEL: @compare_and_call_after(
81; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
82; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0:![0-9]+]]
83; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[M]], [[LGP]]
84; CHECK-NEXT:    br i1 [[CMP]], label [[ESCAPE_CALL:%.*]], label [[JUST_RETURN:%.*]]
85; CHECK:       escape_call:
86; CHECK-NEXT:    call void @escape(ptr [[M]])
87; CHECK-NEXT:    ret i1 true
88; CHECK:       just_return:
89; CHECK-NEXT:    ret i1 false
90;
91  %m = call ptr @malloc(i64 24)
92  %lgp = load ptr, ptr @gp, align 8, !nonnull !0
93  %cmp = icmp eq ptr %m, %lgp
94  br i1 %cmp, label %escape_call, label %just_return
95
96escape_call:
97  call void @escape(ptr %m)
98  ret i1 true
99
100just_return:
101  ret i1 %cmp
102}
103
104define i1 @compare_distinct_mallocs() {
105; CHECK-LABEL: @compare_distinct_mallocs(
106; CHECK-NEXT:    ret i1 false
107;
108  %m = call ptr @malloc(i64 4)
109  %n = call ptr @malloc(i64 4)
110  %cmp = icmp eq ptr %m, %n
111  ret i1 %cmp
112}
113
114; the compare is folded to true since the folding compare looks through bitcasts.
115; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
116define i1 @compare_samepointer_under_bitcast() {
117; CHECK-LABEL: @compare_samepointer_under_bitcast(
118; CHECK-NEXT:    ret i1 true
119;
120  %m = call ptr @malloc(i64 4)
121  %cmp = icmp eq ptr %m, %m
122  ret i1 %cmp
123}
124
125; the compare is folded to true since the folding compare looks through bitcasts.
126; The malloc call for %m cannot be elided since it is used in the call to function f.
127define i1 @compare_samepointer_escaped() {
128; CHECK-LABEL: @compare_samepointer_escaped(
129; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
130; CHECK-NEXT:    call void @f() [ "deopt"(ptr [[M]]) ]
131; CHECK-NEXT:    ret i1 true
132;
133  %m = call ptr @malloc(i64 4)
134  %cmp = icmp eq ptr %m, %m
135  call void @f() [ "deopt"(ptr %m) ]
136  ret i1 %cmp
137}
138
139; Technically, we can fold the %cmp2 comparison, even though %m escapes through
140; the ret statement since `ret` terminates the function and we cannot reach from
141; the ret to cmp.
142; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
143; cross-threading data dependencies since we do not make the distinction between
144; atomic and non-atomic loads in capture tracking.
145define ptr @compare_ret_escape(ptr %c) {
146; CHECK-LABEL: @compare_ret_escape(
147; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
148; CHECK-NEXT:    [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
149; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[N]], [[C:%.*]]
150; CHECK-NEXT:    br i1 [[CMP]], label [[RETST:%.*]], label [[CHK:%.*]]
151; CHECK:       retst:
152; CHECK-NEXT:    ret ptr [[M]]
153; CHECK:       chk:
154; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0]]
155; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr [[M]], [[LGP]]
156; CHECK-NEXT:    br i1 [[CMP2]], label [[RETST]], label [[CHK2:%.*]]
157; CHECK:       chk2:
158; CHECK-NEXT:    ret ptr [[N]]
159;
160  %m = call ptr @malloc(i64 4)
161  %n = call ptr @malloc(i64 4)
162  %cmp = icmp eq ptr %n, %c
163  br i1 %cmp, label %retst, label %chk
164
165retst:
166  ret ptr %m
167
168chk:
169  %lgp = load ptr, ptr @gp, align 8, !nonnull !0
170  %cmp2 = icmp eq ptr %m, %lgp
171  br i1 %cmp2, label %retst,  label %chk2
172
173chk2:
174  ret ptr %n
175}
176
177; The malloc call for %m cannot be elided since it is used in the call to function f.
178; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
179define i1 @compare_distinct_pointer_escape() {
180; CHECK-LABEL: @compare_distinct_pointer_escape(
181; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
182; CHECK-NEXT:    tail call void @f() [ "deopt"(ptr [[M]]) ]
183; CHECK-NEXT:    ret i1 true
184;
185  %m = call ptr @malloc(i64 4)
186  %n = call ptr @malloc(i64 4)
187  tail call void @f() [ "deopt"(ptr %m) ]
188  %cmp = icmp ne ptr %m, %n
189  ret i1 %cmp
190}
191
192; The next block of tests demonstrate a very subtle correctness requirement.
193; We can generally assume any *single* heap layout we chose for the result of
194; a malloc call, but we can't simultanious assume two different ones.  As a
195; result, we must make sure that we only fold conditions if we can ensure that
196; we fold *all* potentially address capturing compares the same.  This is
197; the same point that applies to allocas, applied to noaiias/malloc.
198
199; These two functions represents either a) forging a pointer via inttoptr or
200; b) indexing off an adjacent allocation.  In either case, the operation is
201; obscured by an uninlined helper and not visible to instcombine.
202declare ptr @hidden_inttoptr()
203declare ptr @hidden_offset(ptr %other)
204
205; FIXME: Missed oppurtunity
206define i1 @ptrtoint_single_cmp() {
207; CHECK-LABEL: @ptrtoint_single_cmp(
208; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
209; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
210; CHECK-NEXT:    ret i1 [[CMP]]
211;
212  %m = call ptr @malloc(i64 4)
213  %rhs = inttoptr i64 2048 to ptr
214  %cmp = icmp eq ptr %m, %rhs
215  ret i1 %cmp
216}
217
218define i1 @offset_single_cmp() {
219; CHECK-LABEL: @offset_single_cmp(
220; CHECK-NEXT:    ret i1 false
221;
222  %m = call ptr @malloc(i64 4)
223  %n = call ptr @malloc(i64 4)
224  %rhs = getelementptr i8, ptr %n, i32 4
225  %cmp = icmp eq ptr %m, %rhs
226  ret i1 %cmp
227}
228
229declare void @witness(i1, i1)
230
231define void @neg_consistent_fold1() {
232; CHECK-LABEL: @neg_consistent_fold1(
233; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
234; CHECK-NEXT:    [[RHS2:%.*]] = call ptr @hidden_inttoptr()
235; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
236; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
237; CHECK-NEXT:    call void @witness(i1 [[CMP1]], i1 [[CMP2]])
238; CHECK-NEXT:    ret void
239;
240  %m = call ptr @malloc(i64 4)
241  %rhs = inttoptr i64 2048 to ptr
242  %rhs2 = call ptr @hidden_inttoptr()
243  %cmp1 = icmp eq ptr %m, %rhs
244  %cmp2 = icmp eq ptr %m, %rhs2
245  call void @witness(i1 %cmp1, i1 %cmp2)
246  ret void
247}
248
249define void @neg_consistent_fold2() {
250; CHECK-LABEL: @neg_consistent_fold2(
251; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
252; CHECK-NEXT:    [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
253; CHECK-NEXT:    [[RHS:%.*]] = getelementptr i8, ptr [[N]], i64 4
254; CHECK-NEXT:    [[RHS2:%.*]] = call ptr @hidden_offset(ptr [[N]])
255; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq ptr [[M]], [[RHS]]
256; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
257; CHECK-NEXT:    call void @witness(i1 [[CMP1]], i1 [[CMP2]])
258; CHECK-NEXT:    ret void
259;
260  %m = call ptr @malloc(i64 4)
261  %n = call ptr @malloc(i64 4)
262  %rhs = getelementptr i8, ptr %n, i32 4
263  %rhs2 = call ptr @hidden_offset(ptr %n)
264  %cmp1 = icmp eq ptr %m, %rhs
265  %cmp2 = icmp eq ptr %m, %rhs2
266  call void @witness(i1 %cmp1, i1 %cmp2)
267  ret void
268}
269
270define void @neg_consistent_fold3() {
271; CHECK-LABEL: @neg_consistent_fold3(
272; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
273; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8
274; CHECK-NEXT:    [[RHS2:%.*]] = call ptr @hidden_inttoptr()
275; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq ptr [[M]], [[LGP]]
276; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
277; CHECK-NEXT:    call void @witness(i1 [[CMP1]], i1 [[CMP2]])
278; CHECK-NEXT:    ret void
279;
280  %m = call ptr @malloc(i64 4)
281  %lgp = load ptr, ptr @gp, align 8
282  %rhs2 = call ptr @hidden_inttoptr()
283  %cmp1 = icmp eq ptr %m, %lgp
284  %cmp2 = icmp eq ptr %m, %rhs2
285  call void @witness(i1 %cmp1, i1 %cmp2)
286  ret void
287}
288
289; FIXME: This appears correct, but the current implementation relies
290; on visiting both cmps in the same pass.  We may have an simplification order
291; under which one is missed, and that would be a bug.
292define void @neg_consistent_fold4() {
293; CHECK-LABEL: @neg_consistent_fold4(
294; CHECK-NEXT:    call void @witness(i1 false, i1 false)
295; CHECK-NEXT:    ret void
296;
297  %m = call ptr @malloc(i64 4)
298  %lgp = load ptr, ptr @gp, align 8
299  %cmp1 = icmp eq ptr %m, %lgp
300  %cmp2 = icmp eq ptr %m, %lgp
301  call void @witness(i1 %cmp1, i1 %cmp2)
302  ret void
303}
304
305declare void @unknown(ptr)
306
307; Points out that a nocapture call can't cause a consistent result issue
308; as it is (by assumption) not able to contain a comparison which might
309; capture the address.
310
311define i1 @consistent_nocapture_inttoptr() {
312; CHECK-LABEL: @consistent_nocapture_inttoptr(
313; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
314; CHECK-NEXT:    call void @unknown(ptr captures(none) [[M]])
315; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
316; CHECK-NEXT:    ret i1 [[CMP]]
317;
318  %m = call ptr @malloc(i64 4)
319  call void @unknown(ptr nocapture %m)
320  %rhs = inttoptr i64 2048 to ptr
321  %cmp = icmp eq ptr %m, %rhs
322  ret i1 %cmp
323}
324
325define i1 @consistent_nocapture_offset() {
326; CHECK-LABEL: @consistent_nocapture_offset(
327; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
328; CHECK-NEXT:    call void @unknown(ptr captures(none) [[M]])
329; CHECK-NEXT:    ret i1 false
330;
331  %m = call ptr @malloc(i64 4)
332  call void @unknown(ptr nocapture %m)
333  %n = call ptr @malloc(i64 4)
334  %rhs = getelementptr i8, ptr %n, i32 4
335  %cmp = icmp eq ptr %m, %rhs
336  ret i1 %cmp
337}
338
339define i1 @consistent_nocapture_through_global() {
340; CHECK-LABEL: @consistent_nocapture_through_global(
341; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
342; CHECK-NEXT:    call void @unknown(ptr captures(none) [[M]])
343; CHECK-NEXT:    ret i1 false
344;
345  %m = call ptr @malloc(i64 4)
346  call void @unknown(ptr nocapture %m)
347  %lgp = load ptr, ptr @gp, align 8, !nonnull !0
348  %cmp = icmp eq ptr %m, %lgp
349  ret i1 %cmp
350}
351
352; End consistent heap layout tests
353
354; We can fold this by assuming a single heap layout
355define i1 @two_nonnull_mallocs() {
356; CHECK-LABEL: @two_nonnull_mallocs(
357; CHECK-NEXT:    ret i1 false
358;
359  %m = call nonnull ptr @malloc(i64 4)
360  %n = call nonnull ptr @malloc(i64 4)
361  %cmp = icmp eq ptr %m, %n
362  ret i1 %cmp
363}
364
365; The address of %n is captured, but %m can be arranged to make
366; the comparison non-equal.
367define i1 @two_nonnull_mallocs2() {
368; CHECK-LABEL: @two_nonnull_mallocs2(
369; CHECK-NEXT:    [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
370; CHECK-NEXT:    call void @unknown(ptr nonnull [[N]])
371; CHECK-NEXT:    ret i1 false
372;
373  %m = call nonnull ptr @malloc(i64 4)
374  %n = call nonnull ptr @malloc(i64 4)
375  call void @unknown(ptr %n)
376  %cmp = icmp eq ptr %m, %n
377  ret i1 %cmp
378}
379
380; TODO: We can fold this, but don't with the current scheme.
381define i1 @two_nonnull_mallocs_hidden() {
382; CHECK-LABEL: @two_nonnull_mallocs_hidden(
383; CHECK-NEXT:    [[M:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
384; CHECK-NEXT:    [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
385; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 1
386; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds nuw i8, ptr [[N]], i64 2
387; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]]
388; CHECK-NEXT:    ret i1 [[CMP]]
389;
390  %m = call nonnull ptr @malloc(i64 4)
391  %n = call nonnull ptr @malloc(i64 4)
392  %gep1 = getelementptr i8, ptr %m, i32 1
393  %gep2 = getelementptr i8, ptr %n, i32 2
394  %cmp = icmp eq ptr %gep1, %gep2
395  ret i1 %cmp
396}
397
398
399!0 = !{}
400
401
402