xref: /llvm-project/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll (revision a11faeed446882a81e79d780125d93e7199df645)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=memcpyopt -S %s -verify-memoryssa | FileCheck %s
3
4; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing
5; bytes are known to be undef.
6
7
8%T = type { i64, i32, i32 }
9
10define void @test_alloca(ptr %result) {
11; CHECK-LABEL: @test_alloca(
12; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
13; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
14; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false)
15; CHECK-NEXT:    ret void
16;
17  %a = alloca %T, align 8
18  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
19  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
20  ret void
21}
22
23define void @test_alloca_with_lifetimes(ptr %result) {
24; CHECK-LABEL: @test_alloca_with_lifetimes(
25; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
26; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr [[A]])
27; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
28; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false)
29; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr [[A]])
30; CHECK-NEXT:    ret void
31;
32  %a = alloca %T, align 8
33  call void @llvm.lifetime.start.p0(i64 16, ptr %a)
34  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
35  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
36  call void @llvm.lifetime.end.p0(i64 16, ptr %a)
37  ret void
38}
39
40define void @test_malloc_with_lifetimes(ptr %result) {
41; CHECK-LABEL: @test_malloc_with_lifetimes(
42; CHECK-NEXT:    [[A:%.*]] = call ptr @malloc(i64 16)
43; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr [[A]])
44; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
45; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false)
46; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr [[A]])
47; CHECK-NEXT:    call void @free(ptr [[A]])
48; CHECK-NEXT:    ret void
49;
50  %a = call ptr @malloc(i64 16)
51  call void @llvm.lifetime.start.p0(i64 16, ptr %a)
52  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
53  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
54  call void @llvm.lifetime.end.p0(i64 16, ptr %a)
55  call void @free(ptr %a)
56  ret void
57}
58
59; memcpy size is larger than lifetime, don't optimize.
60define void @test_copy_larger_than_lifetime_size(ptr %result) {
61; CHECK-LABEL: @test_copy_larger_than_lifetime_size(
62; CHECK-NEXT:    [[A:%.*]] = call ptr @malloc(i64 16)
63; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr [[A]])
64; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
65; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
66; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr [[A]])
67; CHECK-NEXT:    call void @free(ptr [[A]])
68; CHECK-NEXT:    ret void
69;
70  %a = call ptr @malloc(i64 16)
71  call void @llvm.lifetime.start.p0(i64 12, ptr %a)
72  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
73  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
74  call void @llvm.lifetime.end.p0(i64 12, ptr %a)
75  call void @free(ptr %a)
76  ret void
77}
78
79; The trailing bytes are not known to be undef, we can't ignore them.
80define void @test_not_undef_memory(ptr %result, ptr %input) {
81; CHECK-LABEL: @test_not_undef_memory(
82; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[INPUT:%.*]], i8 0, i64 12, i1 false)
83; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[INPUT]], i64 16, i1 false)
84; CHECK-NEXT:    ret void
85;
86  call void @llvm.memset.p0.i64(ptr align 8 %input, i8 0, i64 12, i1 false)
87  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %input, i64 16, i1 false)
88  ret void
89}
90
91; Memset is volatile, memcpy is not. Can be optimized.
92define void @test_volatile_memset(ptr %result) {
93; CHECK-LABEL: @test_volatile_memset(
94; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
95; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 true)
96; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 12, i1 false)
97; CHECK-NEXT:    ret void
98;
99  %a = alloca %T, align 8
100  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 true)
101  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
102  ret void
103}
104
105; Memcpy is volatile, memset is not. Cannot be optimized.
106define void @test_volatile_memcpy(ptr %result) {
107; CHECK-LABEL: @test_volatile_memcpy(
108; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
109; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
110; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 true)
111; CHECK-NEXT:    ret void
112;
113  %a = alloca %T, align 8
114  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
115  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 true)
116  ret void
117}
118
119; Write between memset and memcpy, can't optimize.
120define void @test_write_between(ptr %result) {
121; CHECK-LABEL: @test_write_between(
122; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
123; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
124; CHECK-NEXT:    store i8 -1, ptr [[A]], align 1
125; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
126; CHECK-NEXT:    ret void
127;
128  %a = alloca %T, align 8
129  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
130  store i8 -1, ptr %a
131  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
132  ret void
133}
134
135; A write prior to the memset, which is part of the memset region.
136; We could optimize this, but currently don't, because the used memory location is imprecise.
137define void @test_write_before_memset_in_memset_region(ptr %result) {
138; CHECK-LABEL: @test_write_before_memset_in_memset_region(
139; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
140; CHECK-NEXT:    store i8 -1, ptr [[A]], align 1
141; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 8, i1 false)
142; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
143; CHECK-NEXT:    ret void
144;
145  %a = alloca %T, align 8
146  store i8 -1, ptr %a
147  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 8, i1 false)
148  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
149  ret void
150}
151
152; A write prior to the memset, which is part of the memcpy (but not memset) region.
153; This cannot be optimized.
154define void @test_write_before_memset_in_memcpy_region(ptr %result) {
155; CHECK-LABEL: @test_write_before_memset_in_memcpy_region(
156; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
157; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[T]], ptr [[A]], i64 0, i32 2
158; CHECK-NEXT:    store i32 -1, ptr [[C]], align 4
159; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 8, i1 false)
160; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
161; CHECK-NEXT:    ret void
162;
163  %a = alloca %T, align 8
164  %c = getelementptr inbounds %T, ptr %a, i64 0, i32 2
165  store i32 -1, ptr %c
166  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 8, i1 false)
167  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
168  ret void
169}
170
171; A write prior to the memset, which is part of both the memset and memcpy regions.
172; This cannot be optimized.
173define void @test_write_before_memset_in_both_regions(ptr %result) {
174; CHECK-LABEL: @test_write_before_memset_in_both_regions(
175; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
176; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[T]], ptr [[A]], i64 0, i32 1
177; CHECK-NEXT:    store i32 -1, ptr [[C]], align 4
178; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 10, i1 false)
179; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
180; CHECK-NEXT:    ret void
181;
182  %a = alloca %T, align 8
183  %c = getelementptr inbounds %T, ptr %a, i64 0, i32 1
184  store i32 -1, ptr %c
185  call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 10, i1 false)
186  call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
187  ret void
188}
189
190declare ptr @malloc(i64)
191declare void @free(ptr)
192
193declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
194declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
195
196declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
197declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
198