xref: /llvm-project/llvm/test/Transforms/EarlyCSE/X86/preserve_memoryssa.ll (revision 1ca64c5fb74270661ca2f9ebd821f47dcb3152b4)
1; RUN: opt < %s -passes='early-cse<memssa>' -earlycse-debug-hash -verify-memoryssa -disable-output
2; REQUIRES: asserts
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7; Tests below highlight scenarios where EarlyCSE does not preserve MemorySSA
8; optimized accesses. Current MemorySSA verify will accept these.
9
10; Test 1:
11; AA cannot tell here that the last load does not alias the only store.
12; The first two loads are a common expression, EarlyCSE removes the second one,
13; and then AA can see that the last load is a Use(LoE). Hence not optimized as
14; it claims. Note that if we replace the GEP indices 2 and 1, AA sees NoAlias
15; for the last load, before CSE-ing the first 2 loads.
16%struct.ImageParameters = type { i32, i32, i32 }
17@img = external global ptr, align 8
18define void @test1_macroblock() {
19entry:
20  ; MemoryUse(LoE)
21  %0 = load ptr, ptr @img, align 8
22
23  %Pos_2 = getelementptr inbounds %struct.ImageParameters, ptr %0, i64 0, i32 2
24  ; 1 = MemoryDef(LoE)
25  store i32 undef, ptr %Pos_2, align 8
26
27  ; MemoryUse(LoE)
28  %1 = load ptr, ptr @img, align 8
29
30  %Pos_1 = getelementptr inbounds %struct.ImageParameters, ptr %1, i64 0, i32 1
31  ; MemoryUse(1) MayAlias
32  %2 = load i32, ptr %Pos_1, align 4
33  unreachable
34}
35
36; Test 2:
37; EarlyCSE simplifies %string to undef. Def and Use used to be MustAlias, with
38; undef they are NoAlias. The Use can be optimized further to LoE. We can
39; de-optimize uses of replaced instructions, but in general this is not enough
40; (see next tests).
41%struct.TermS = type { i32, i32, i32, i32, i32, ptr }
42define fastcc void @test2_term_string() {
43entry:
44  %string = getelementptr inbounds %struct.TermS, ptr undef, i64 0, i32 5
45  ; 1 = MemoryDef(LoE)
46  store ptr undef, ptr %string, align 8
47  ; MemoryUse(1) MustAlias
48  %0 = load ptr, ptr %string, align 8
49  unreachable
50}
51
52; Test 3:
53; EarlyCSE simplifies %0 to undef. So the second Def now stores to undef.
54; We now find the second load (Use(2) can be optimized further to LoE)
55; When replacing instructions, we can deoptimize all uses of the replaced
56; instruction and all uses of transitive accesses. However this does not stop
57; MemorySSA from being tripped by AA (see test4).
58%struct.Grammar = type { ptr, ptr, %struct.anon }
59%struct.anon = type { i32, i32, ptr, [3 x ptr] }
60%struct.Term = type { i32 }
61
62define fastcc void @test3_term_string(ptr %g) {
63entry:
64  ; 1 = MemoryDef(LoE)
65  store ptr undef, ptr undef, align 8
66  ; MemoryUse(LoE)
67  %0 = load ptr, ptr undef, align 8
68  %arrayidx = getelementptr inbounds i8, ptr %0, i64 undef
69  ; 2 = MemoryDef(1)
70  store i8 0, ptr %arrayidx, align 1
71  %v = getelementptr inbounds %struct.Grammar, ptr %g, i64 0, i32 2, i32 2
72  ; MemoryUse(2) MayAlias
73  %1 = load ptr, ptr %v, align 8
74  unreachable
75}
76
77; Test 4:
78; Removing dead/unused instructions in if.then274 makes AA smarter. Before
79; removal, it finds %4 MayAlias the store above. After removal this can be
80; optimized to LoE. Hence after EarlyCSE, there is an access who claims is
81; optimized and it can be optimized further.
82
83; We can't escape such cases in general when relying on Alias Analysis.
84; The only fail-safe way to actually preserve MemorySSA when removing or
85; replacing instructions (i.e. get the *same* MemorySSA as if it was computed
86; for the updated IR) is to recompute it from scratch. What we get now is still
87; a correct update, but with accesses that claim to be optimized and can be
88; optimized further if we were to re-run MemorySSA on the IR.
89%struct.gnode.0.1.3.6.9.18.20.79 = type { i32, i32, i32, i32, i32, i32, i32, ptr }
90@gnodeArray = external global ptr, align 8
91
92define void @test4_shortest(i1 %arg) {
93entry:
94  %exl.i = alloca [5 x i32], align 16
95  br i1 %arg, label %if.then274, label %for.cond404
96
97if.then274:                                       ; preds = %if.end256
98  %arrayidx.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1
99  %arrayidx1.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2
100  %arrayidx2.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3
101  %arrayidx3.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4
102  %arrayidx.i1034 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1
103  %arrayidx1.i1035 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2
104  %arrayidx2.i1036 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3
105  %arrayidx3.i1037 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4
106  unreachable
107
108for.cond404:                                      ; preds = %if.end256
109  %arrayidx.i960 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1
110  %arrayidx1.i961 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2
111  %arrayidx2.i962 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3
112  ; 1 = MemoryDef(LoE)
113  store i32 undef, ptr %arrayidx2.i962, align 4
114  %arrayidx3.i963 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4
115
116  ; MemoryUse(LoE)
117  %0 = load ptr, ptr @gnodeArray, align 8
118  %arrayidx6.i968 = getelementptr inbounds ptr, ptr %0, i64 undef
119  ; MemoryUse(1) MayAlias
120  %1 = load ptr, ptr %arrayidx6.i968, align 8
121  br i1 %arg, label %for.cond26.preheader.i974, label %if.then20.for.body_crit_edge.i999
122
123for.cond26.preheader.i974:                        ; preds = %if.then20.i996
124  %arrayidx.i924 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1
125  %arrayidx1.i925 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2
126  %arrayidx2.i926 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3
127  %arrayidx3.i927 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4
128  unreachable
129
130if.then20.for.body_crit_edge.i999:                ; preds = %if.then20.i996
131  %arrayidx9.phi.trans.insert.i997 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 undef
132  unreachable
133}
134