1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=memcpyopt -S %s -verify-memoryssa | FileCheck %s 3 4; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing 5; bytes are known to be undef. 6 7 8%T = type { i64, i32, i32 } 9 10define void @test_alloca(ptr %result) { 11; CHECK-LABEL: @test_alloca( 12; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 13; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false) 14; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false) 15; CHECK-NEXT: ret void 16; 17 %a = alloca %T, align 8 18 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false) 19 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 20 ret void 21} 22 23define void @test_alloca_with_lifetimes(ptr %result) { 24; CHECK-LABEL: @test_alloca_with_lifetimes( 25; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 26; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[A]]) 27; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false) 28; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false) 29; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[A]]) 30; CHECK-NEXT: ret void 31; 32 %a = alloca %T, align 8 33 call void @llvm.lifetime.start.p0(i64 16, ptr %a) 34 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false) 35 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 36 call void @llvm.lifetime.end.p0(i64 16, ptr %a) 37 ret void 38} 39 40define void @test_malloc_with_lifetimes(ptr %result) { 41; CHECK-LABEL: @test_malloc_with_lifetimes( 42; CHECK-NEXT: [[A:%.*]] = call ptr @malloc(i64 16) 43; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[A]]) 44; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false) 45; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false) 46; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[A]]) 47; CHECK-NEXT: call void @free(ptr [[A]]) 48; CHECK-NEXT: ret void 49; 50 %a = call ptr @malloc(i64 16) 51 call void @llvm.lifetime.start.p0(i64 16, ptr %a) 52 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false) 53 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 54 call void @llvm.lifetime.end.p0(i64 16, ptr %a) 55 call void @free(ptr %a) 56 ret void 57} 58 59; memcpy size is larger than lifetime, don't optimize. 60define void @test_copy_larger_than_lifetime_size(ptr %result) { 61; CHECK-LABEL: @test_copy_larger_than_lifetime_size( 62; CHECK-NEXT: [[A:%.*]] = call ptr @malloc(i64 16) 63; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[A]]) 64; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false) 65; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false) 66; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[A]]) 67; CHECK-NEXT: call void @free(ptr [[A]]) 68; CHECK-NEXT: ret void 69; 70 %a = call ptr @malloc(i64 16) 71 call void @llvm.lifetime.start.p0(i64 12, ptr %a) 72 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false) 73 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 74 call void @llvm.lifetime.end.p0(i64 12, ptr %a) 75 call void @free(ptr %a) 76 ret void 77} 78 79; The trailing bytes are not known to be undef, we can't ignore them. 80define void @test_not_undef_memory(ptr %result, ptr %input) { 81; CHECK-LABEL: @test_not_undef_memory( 82; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[INPUT:%.*]], i8 0, i64 12, i1 false) 83; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[INPUT]], i64 16, i1 false) 84; CHECK-NEXT: ret void 85; 86 call void @llvm.memset.p0.i64(ptr align 8 %input, i8 0, i64 12, i1 false) 87 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %input, i64 16, i1 false) 88 ret void 89} 90 91; Memset is volatile, memcpy is not. Can be optimized. 92define void @test_volatile_memset(ptr %result) { 93; CHECK-LABEL: @test_volatile_memset( 94; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 95; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 true) 96; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false) 97; CHECK-NEXT: ret void 98; 99 %a = alloca %T, align 8 100 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 true) 101 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 102 ret void 103} 104 105; Memcpy is volatile, memset is not. Cannot be optimized. 106define void @test_volatile_memcpy(ptr %result) { 107; CHECK-LABEL: @test_volatile_memcpy( 108; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 109; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false) 110; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 true) 111; CHECK-NEXT: ret void 112; 113 %a = alloca %T, align 8 114 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false) 115 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 true) 116 ret void 117} 118 119; Write between memset and memcpy, can't optimize. 120define void @test_write_between(ptr %result) { 121; CHECK-LABEL: @test_write_between( 122; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 123; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false) 124; CHECK-NEXT: store i8 -1, ptr [[A]], align 1 125; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false) 126; CHECK-NEXT: ret void 127; 128 %a = alloca %T, align 8 129 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false) 130 store i8 -1, ptr %a 131 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 132 ret void 133} 134 135; A write prior to the memset, which is part of the memset region. 136; We could optimize this, but currently don't, because the used memory location is imprecise. 137define void @test_write_before_memset_in_memset_region(ptr %result) { 138; CHECK-LABEL: @test_write_before_memset_in_memset_region( 139; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 140; CHECK-NEXT: store i8 -1, ptr [[A]], align 1 141; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 8, i1 false) 142; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false) 143; CHECK-NEXT: ret void 144; 145 %a = alloca %T, align 8 146 store i8 -1, ptr %a 147 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 8, i1 false) 148 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 149 ret void 150} 151 152; A write prior to the memset, which is part of the memcpy (but not memset) region. 153; This cannot be optimized. 154define void @test_write_before_memset_in_memcpy_region(ptr %result) { 155; CHECK-LABEL: @test_write_before_memset_in_memcpy_region( 156; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 157; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], ptr [[A]], i64 0, i32 2 158; CHECK-NEXT: store i32 -1, ptr [[C]], align 4 159; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 8, i1 false) 160; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false) 161; CHECK-NEXT: ret void 162; 163 %a = alloca %T, align 8 164 %c = getelementptr inbounds %T, ptr %a, i64 0, i32 2 165 store i32 -1, ptr %c 166 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 8, i1 false) 167 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 168 ret void 169} 170 171; A write prior to the memset, which is part of both the memset and memcpy regions. 172; This cannot be optimized. 173define void @test_write_before_memset_in_both_regions(ptr %result) { 174; CHECK-LABEL: @test_write_before_memset_in_both_regions( 175; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 176; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], ptr [[A]], i64 0, i32 1 177; CHECK-NEXT: store i32 -1, ptr [[C]], align 4 178; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 10, i1 false) 179; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false) 180; CHECK-NEXT: ret void 181; 182 %a = alloca %T, align 8 183 %c = getelementptr inbounds %T, ptr %a, i64 0, i32 1 184 store i32 -1, ptr %c 185 call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 10, i1 false) 186 call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false) 187 ret void 188} 189 190declare ptr @malloc(i64) 191declare void @free(ptr) 192 193declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) 194declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) 195 196declare void @llvm.lifetime.start.p0(i64, ptr nocapture) 197declare void @llvm.lifetime.end.p0(i64, ptr nocapture) 198