1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s 3 4; Test whether memcpy-memcpy dependence is optimized across 5; basic blocks (conditional branches and invokes). 6; TODO: This is not supported yet. 7 8%struct.s = type { i32, i32 } 9 10@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 11@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 12@i = external constant ptr 13 14declare void @qux() 15declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) 16declare void @__cxa_throw(ptr, ptr, ptr) 17declare i32 @__gxx_personality_v0(...) 18declare ptr @__cxa_begin_catch(ptr) 19 20; A simple partial redundancy. Test that the second memcpy is optimized 21; to copy directly from the original source rather than from the temporary. 22 23define void @wobble(ptr noalias %dst, ptr %src, i1 %some_condition) { 24; CHECK-LABEL: @wobble( 25; CHECK-NEXT: bb: 26; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 27; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TEMP]], ptr nonnull align 8 [[SRC:%.*]], i64 64, i1 false) 28; CHECK-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] 29; CHECK: out: 30; CHECK-NEXT: call void @qux() 31; CHECK-NEXT: unreachable 32; CHECK: more: 33; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 64, i1 false) 34; CHECK-NEXT: ret void 35; 36bb: 37 %temp = alloca i8, i32 64 38 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %temp, ptr nonnull align 8%src, i64 64, i1 false) 39 br i1 %some_condition, label %more, label %out 40 41out: 42 call void @qux() 43 unreachable 44 45more: 46 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %temp, i64 64, i1 false) 47 ret void 48} 49 50; A CFG triangle with a partial redundancy targeting an alloca. Test that the 51; memcpy inside the triangle is optimized to copy directly from the original 52; source rather than from the temporary. 53 54define i32 @foo(i1 %t3) { 55; CHECK-LABEL: @foo( 56; CHECK-NEXT: bb: 57; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 58; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 59; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @s_foo, i64 8, i1 false) 60; CHECK-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] 61; CHECK: bb4: 62; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[T]], ptr align 4 @s_foo, i64 8, i1 false) 63; CHECK-NEXT: br label [[BB7]] 64; CHECK: bb7: 65; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T]], align 4 66; CHECK-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], ptr [[T]], i32 0, i32 1 67; CHECK-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4 68; CHECK-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] 69; CHECK-NEXT: ret i32 [[T12]] 70; 71bb: 72 %s = alloca %struct.s, align 4 73 %t = alloca %struct.s, align 4 74 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %s, ptr align 4 @s_foo, i64 8, i1 false) 75 br i1 %t3, label %bb4, label %bb7 76 77bb4: ; preds = %bb 78 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %t, ptr align 4 %s, i64 8, i1 false) 79 br label %bb7 80 81bb7: ; preds = %bb4, %bb 82 %t9 = load i32, ptr %t, align 4 83 %t10 = getelementptr %struct.s, ptr %t, i32 0, i32 1 84 %t11 = load i32, ptr %t10, align 4 85 %t12 = add i32 %t9, %t11 86 ret i32 %t12 87} 88 89; A CFG diamond with an invoke on one side, and a partially redundant memcpy 90; into an alloca on the other. Test that the memcpy inside the diamond is 91; optimized to copy ; directly from the original source rather than from the 92; temporary. This more complex test represents a relatively common usage 93; pattern. 94 95define i32 @baz(i1 %t5) personality ptr @__gxx_personality_v0 { 96; CHECK-LABEL: @baz( 97; CHECK-NEXT: bb: 98; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 99; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 100; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @s_baz, i64 8, i1 false) 101; CHECK-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] 102; CHECK: bb6: 103; CHECK-NEXT: invoke void @__cxa_throw(ptr null, ptr @i, ptr null) 104; CHECK-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] 105; CHECK: bb9: 106; CHECK-NEXT: [[T10:%.*]] = landingpad { ptr, i32 } 107; CHECK-NEXT: catch ptr null 108; CHECK-NEXT: br label [[BB13:%.*]] 109; CHECK: bb13: 110; CHECK-NEXT: [[T15:%.*]] = call ptr @__cxa_begin_catch(ptr null) 111; CHECK-NEXT: br label [[BB23:%.*]] 112; CHECK: bb22: 113; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[T]], ptr align 4 @s_baz, i64 8, i1 false) 114; CHECK-NEXT: br label [[BB23]] 115; CHECK: bb23: 116; CHECK-NEXT: [[T18:%.*]] = load i32, ptr [[T]], align 4 117; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T]], i32 0, i32 1 118; CHECK-NEXT: [[T20:%.*]] = load i32, ptr [[T19]], align 4 119; CHECK-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] 120; CHECK-NEXT: ret i32 [[T21]] 121; CHECK: bb25: 122; CHECK-NEXT: unreachable 123; 124bb: 125 %s = alloca %struct.s, align 4 126 %t = alloca %struct.s, align 4 127 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %s, ptr align 4 @s_baz, i64 8, i1 false) 128 br i1 %t5, label %bb6, label %bb22 129 130bb6: ; preds = %bb 131 invoke void @__cxa_throw(ptr null, ptr @i, ptr null) 132 to label %bb25 unwind label %bb9 133 134bb9: ; preds = %bb6 135 %t10 = landingpad { ptr, i32 } 136 catch ptr null 137 br label %bb13 138 139bb13: ; preds = %bb9 140 %t15 = call ptr @__cxa_begin_catch(ptr null) 141 br label %bb23 142 143bb22: ; preds = %bb 144 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %t, ptr align 4 %s, i64 8, i1 false) 145 br label %bb23 146 147bb23: ; preds = %bb22, %bb13 148 %t18 = load i32, ptr %t, align 4 149 %t19 = getelementptr inbounds %struct.s, ptr %t, i32 0, i32 1 150 %t20 = load i32, ptr %t19, align 4 151 %t21 = add nsw i32 %t18, %t20 152 ret i32 %t21 153 154bb25: ; preds = %bb6 155 unreachable 156} 157 158define void @memphi_with_unrelated_clobber(i1 %cond, ptr %arg, ptr noalias %a, ptr noalias %b, ptr noalias %c) { 159; CHECK-LABEL: @memphi_with_unrelated_clobber( 160; CHECK-NEXT: entry: 161; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[A:%.*]], ptr [[B:%.*]], i64 16, i1 false) 162; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]] 163; CHECK: then: 164; CHECK-NEXT: store i64 0, ptr [[ARG:%.*]], align 4 165; CHECK-NEXT: br label [[EXIT]] 166; CHECK: exit: 167; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[C:%.*]], ptr [[B]], i64 16, i1 false) 168; CHECK-NEXT: ret void 169; 170entry: 171 call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr %b, i64 16, i1 false) 172 br i1 %cond, label %then, label %exit 173 174then: 175 store i64 0, ptr %arg 176 br label %exit 177 178exit: 179 call void @llvm.memcpy.p0.p0.i64(ptr %c, ptr %a, i64 16, i1 false) 180 ret void 181} 182