1; RUN: opt < %s -passes='early-cse<memssa>' -earlycse-debug-hash -verify-memoryssa -disable-output 2; REQUIRES: asserts 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7; Tests below highlight scenarios where EarlyCSE does not preserve MemorySSA 8; optimized accesses. Current MemorySSA verify will accept these. 9 10; Test 1: 11; AA cannot tell here that the last load does not alias the only store. 12; The first two loads are a common expression, EarlyCSE removes the second one, 13; and then AA can see that the last load is a Use(LoE). Hence not optimized as 14; it claims. Note that if we replace the GEP indices 2 and 1, AA sees NoAlias 15; for the last load, before CSE-ing the first 2 loads. 16%struct.ImageParameters = type { i32, i32, i32 } 17@img = external global ptr, align 8 18define void @test1_macroblock() { 19entry: 20 ; MemoryUse(LoE) 21 %0 = load ptr, ptr @img, align 8 22 23 %Pos_2 = getelementptr inbounds %struct.ImageParameters, ptr %0, i64 0, i32 2 24 ; 1 = MemoryDef(LoE) 25 store i32 undef, ptr %Pos_2, align 8 26 27 ; MemoryUse(LoE) 28 %1 = load ptr, ptr @img, align 8 29 30 %Pos_1 = getelementptr inbounds %struct.ImageParameters, ptr %1, i64 0, i32 1 31 ; MemoryUse(1) MayAlias 32 %2 = load i32, ptr %Pos_1, align 4 33 unreachable 34} 35 36; Test 2: 37; EarlyCSE simplifies %string to undef. Def and Use used to be MustAlias, with 38; undef they are NoAlias. The Use can be optimized further to LoE. We can 39; de-optimize uses of replaced instructions, but in general this is not enough 40; (see next tests). 41%struct.TermS = type { i32, i32, i32, i32, i32, ptr } 42define fastcc void @test2_term_string() { 43entry: 44 %string = getelementptr inbounds %struct.TermS, ptr undef, i64 0, i32 5 45 ; 1 = MemoryDef(LoE) 46 store ptr undef, ptr %string, align 8 47 ; MemoryUse(1) MustAlias 48 %0 = load ptr, ptr %string, align 8 49 unreachable 50} 51 52; Test 3: 53; EarlyCSE simplifies %0 to undef. So the second Def now stores to undef. 54; We now find the second load (Use(2) can be optimized further to LoE) 55; When replacing instructions, we can deoptimize all uses of the replaced 56; instruction and all uses of transitive accesses. However this does not stop 57; MemorySSA from being tripped by AA (see test4). 58%struct.Grammar = type { ptr, ptr, %struct.anon } 59%struct.anon = type { i32, i32, ptr, [3 x ptr] } 60%struct.Term = type { i32 } 61 62define fastcc void @test3_term_string(ptr %g) { 63entry: 64 ; 1 = MemoryDef(LoE) 65 store ptr undef, ptr undef, align 8 66 ; MemoryUse(LoE) 67 %0 = load ptr, ptr undef, align 8 68 %arrayidx = getelementptr inbounds i8, ptr %0, i64 undef 69 ; 2 = MemoryDef(1) 70 store i8 0, ptr %arrayidx, align 1 71 %v = getelementptr inbounds %struct.Grammar, ptr %g, i64 0, i32 2, i32 2 72 ; MemoryUse(2) MayAlias 73 %1 = load ptr, ptr %v, align 8 74 unreachable 75} 76 77; Test 4: 78; Removing dead/unused instructions in if.then274 makes AA smarter. Before 79; removal, it finds %4 MayAlias the store above. After removal this can be 80; optimized to LoE. Hence after EarlyCSE, there is an access who claims is 81; optimized and it can be optimized further. 82 83; We can't escape such cases in general when relying on Alias Analysis. 84; The only fail-safe way to actually preserve MemorySSA when removing or 85; replacing instructions (i.e. get the *same* MemorySSA as if it was computed 86; for the updated IR) is to recompute it from scratch. What we get now is still 87; a correct update, but with accesses that claim to be optimized and can be 88; optimized further if we were to re-run MemorySSA on the IR. 89%struct.gnode.0.1.3.6.9.18.20.79 = type { i32, i32, i32, i32, i32, i32, i32, ptr } 90@gnodeArray = external global ptr, align 8 91 92define void @test4_shortest(i1 %arg) { 93entry: 94 %exl.i = alloca [5 x i32], align 16 95 br i1 %arg, label %if.then274, label %for.cond404 96 97if.then274: ; preds = %if.end256 98 %arrayidx.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1 99 %arrayidx1.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2 100 %arrayidx2.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3 101 %arrayidx3.i = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4 102 %arrayidx.i1034 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1 103 %arrayidx1.i1035 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2 104 %arrayidx2.i1036 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3 105 %arrayidx3.i1037 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4 106 unreachable 107 108for.cond404: ; preds = %if.end256 109 %arrayidx.i960 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1 110 %arrayidx1.i961 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2 111 %arrayidx2.i962 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3 112 ; 1 = MemoryDef(LoE) 113 store i32 undef, ptr %arrayidx2.i962, align 4 114 %arrayidx3.i963 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4 115 116 ; MemoryUse(LoE) 117 %0 = load ptr, ptr @gnodeArray, align 8 118 %arrayidx6.i968 = getelementptr inbounds ptr, ptr %0, i64 undef 119 ; MemoryUse(1) MayAlias 120 %1 = load ptr, ptr %arrayidx6.i968, align 8 121 br i1 %arg, label %for.cond26.preheader.i974, label %if.then20.for.body_crit_edge.i999 122 123for.cond26.preheader.i974: ; preds = %if.then20.i996 124 %arrayidx.i924 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 1 125 %arrayidx1.i925 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 2 126 %arrayidx2.i926 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 3 127 %arrayidx3.i927 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 4 128 unreachable 129 130if.then20.for.body_crit_edge.i999: ; preds = %if.then20.i996 131 %arrayidx9.phi.trans.insert.i997 = getelementptr inbounds [5 x i32], ptr %exl.i, i64 0, i64 undef 132 unreachable 133} 134