xref: /llvm-project/llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll (revision f497a00da968b0ff90d8c98caa184d14b9a92495)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=dse -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK %s
3target datalayout = "E-m:e-i64:64-n32:64"
4target triple = "powerpc64le-unknown-linux"
5
6%"struct.std::complex" = type { { float, float } }
7
8define void @_Z4testSt7complexIfE(ptr noalias nocapture sret(%"struct.std::complex") %agg.result, i64 %c.coerce) {
9; CHECK-LABEL: @_Z4testSt7complexIfE(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[REF_TMP:%.*]] = alloca i64, align 8
12; CHECK-NEXT:    [[C_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[C_COERCE:%.*]], 32
13; CHECK-NEXT:    [[C_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_SROA_0_0_EXTRACT_SHIFT]] to i32
14; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[C_SROA_0_0_EXTRACT_TRUNC]] to float
15; CHECK-NEXT:    [[C_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_COERCE]] to i32
16; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[C_SROA_2_0_EXTRACT_TRUNC]] to float
17; CHECK-NEXT:    call void @_Z3barSt7complexIfE(ptr nonnull sret(%"struct.std::complex") [[REF_TMP]], i64 [[C_COERCE]])
18; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 8
19; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP2]], 32
20; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
21; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
22; CHECK-NEXT:    [[_M_VALUE_IMAGP_I_I:%.*]] = getelementptr inbounds %"struct.std::complex", ptr [[AGG_RESULT:%.*]], i64 0, i32 0, i32 1
23; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP2]] to i32
24; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float
25; CHECK-NEXT:    [[MUL_AD_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP1]]
26; CHECK-NEXT:    [[MUL_BC_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP0]]
27; CHECK-NEXT:    [[MUL_I_I_I:%.*]] = fadd fast float [[MUL_AD_I_I]], [[MUL_BC_I_I]]
28; CHECK-NEXT:    [[MUL_AC_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP0]]
29; CHECK-NEXT:    [[MUL_BD_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP1]]
30; CHECK-NEXT:    [[MUL_R_I_I:%.*]] = fsub fast float [[MUL_AC_I_I]], [[MUL_BD_I_I]]
31; CHECK-NEXT:    store float [[MUL_R_I_I]], ptr [[AGG_RESULT]], align 4
32; CHECK-NEXT:    store float [[MUL_I_I_I]], ptr [[_M_VALUE_IMAGP_I_I]], align 4
33; CHECK-NEXT:    ret void
34;
35entry:
36
37  %ref.tmp = alloca i64, align 8
38  %c.sroa.0.0.extract.shift = lshr i64 %c.coerce, 32
39  %c.sroa.0.0.extract.trunc = trunc i64 %c.sroa.0.0.extract.shift to i32
40  %0 = bitcast i32 %c.sroa.0.0.extract.trunc to float
41  %c.sroa.2.0.extract.trunc = trunc i64 %c.coerce to i32
42  %1 = bitcast i32 %c.sroa.2.0.extract.trunc to float
43  call void @_Z3barSt7complexIfE(ptr nonnull sret(%"struct.std::complex") %ref.tmp, i64 %c.coerce)
44  %2 = load i64, ptr %ref.tmp, align 8
45  store i64 %2, ptr %agg.result, align 4
46
47  %3 = lshr i64 %2, 32
48  %4 = trunc i64 %3 to i32
49  %5 = bitcast i32 %4 to float
50  %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", ptr %agg.result, i64 0, i32 0, i32 1
51  %6 = trunc i64 %2 to i32
52  %7 = bitcast i32 %6 to float
53  %mul_ad.i.i = fmul fast float %5, %1
54  %mul_bc.i.i = fmul fast float %7, %0
55  %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
56  %mul_ac.i.i = fmul fast float %5, %0
57  %mul_bd.i.i = fmul fast float %7, %1
58  %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
59  store float %mul_r.i.i, ptr %agg.result, align 4
60  store float %mul_i.i.i, ptr %_M_value.imagp.i.i, align 4
61  ret void
62}
63
64declare void @_Z3barSt7complexIfE(ptr sret(%"struct.std::complex"), i64)
65
66define void @test1(ptr %ptr) {
67; CHECK-LABEL: @test1(
68; CHECK-NEXT:  entry:
69; CHECK-NEXT:    store i16 -30062, ptr [[PTR:%.*]], align 2
70; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
71; CHECK-NEXT:    store i8 47, ptr [[BPTR3]], align 1
72; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
73; CHECK-NEXT:    store i16 2020, ptr [[BPTR1]], align 1
74; CHECK-NEXT:    ret void
75;
76entry:
77
78  store i32 5, ptr %ptr
79  store i8 7, ptr %ptr
80  store i16 -30062, ptr %ptr
81  %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2
82  store i8 25, ptr %bptr2
83  %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3
84  store i8 47, ptr %bptr3
85  %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1
86  store i16 2020, ptr %bptr1, align 1
87  ret void
88
89
90}
91
92define void @test2(ptr %ptr) {
93; CHECK-LABEL: @test2(
94; CHECK-NEXT:  entry:
95; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 -1
96; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
97; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
98; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
99; CHECK-NEXT:    store i16 1456, ptr [[BPTRM1]], align 1
100; CHECK-NEXT:    store i16 1346, ptr [[PTR]], align 1
101; CHECK-NEXT:    store i16 1756, ptr [[BPTR1]], align 1
102; CHECK-NEXT:    store i16 1126, ptr [[BPTR2]], align 1
103; CHECK-NEXT:    store i16 5656, ptr [[BPTR3]], align 1
104; CHECK-NEXT:    ret void
105;
106entry:
107
108  store i32 5, ptr %ptr
109
110  %bptrm1 = getelementptr inbounds i8, ptr %ptr, i64 -1
111  %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1
112  %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2
113  %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3
114
115
116  store i16 1456, ptr %bptrm1, align 1
117  store i16 1346, ptr %ptr, align 1
118  store i16 1756, ptr %bptr1, align 1
119  store i16 1126, ptr %bptr2, align 1
120  store i16 5656, ptr %bptr3, align 1
121
122
123
124  ret void
125
126}
127
128define signext i8 @test3(ptr %ptr) {
129; CHECK-LABEL: @test3(
130; CHECK-NEXT:  entry:
131; CHECK-NEXT:    store i32 5, ptr [[PTR:%.*]], align 4
132; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 -1
133; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
134; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
135; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
136; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[PTR]], align 1
137; CHECK-NEXT:    store i16 1456, ptr [[BPTRM1]], align 1
138; CHECK-NEXT:    store i16 1346, ptr [[PTR]], align 1
139; CHECK-NEXT:    store i16 1756, ptr [[BPTR1]], align 1
140; CHECK-NEXT:    store i16 1126, ptr [[BPTR2]], align 1
141; CHECK-NEXT:    store i16 5656, ptr [[BPTR3]], align 1
142; CHECK-NEXT:    ret i8 [[V]]
143;
144entry:
145
146  store i32 5, ptr %ptr
147
148  %bptrm1 = getelementptr inbounds i8, ptr %ptr, i64 -1
149  %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1
150  %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2
151  %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3
152
153
154  %v = load i8, ptr %ptr, align 1
155  store i16 1456, ptr %bptrm1, align 1
156  store i16 1346, ptr %ptr, align 1
157  store i16 1756, ptr %bptr1, align 1
158  store i16 1126, ptr %bptr2, align 1
159  store i16 5656, ptr %bptr3, align 1
160
161
162  ret i8 %v
163
164}
165
166%struct.foostruct = type {
167ptr,
168ptr,
169ptr,
170ptr,
171ptr
172}
173declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
174declare void @goFunc(ptr)
175declare i32 @fa(ptr, ptr, i32, i8, ptr)
176
177; We miss this case, because of an aggressive limit of partial overlap analysis.
178; With a larger partial store limit, we remove the memset.
179define void @test4()  {
180; CHECK-LABEL: @test4(
181; CHECK-NEXT:  entry:
182; CHECK-NEXT:    [[BANG:%.*]] = alloca [[STRUCT_FOOSTRUCT:%.*]], align 8
183; CHECK-NEXT:    store ptr @fa, ptr [[BANG]], align 8
184; CHECK-NEXT:    [[V3:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 1
185; CHECK-NEXT:    store ptr @fa, ptr [[V3]], align 8
186; CHECK-NEXT:    [[V4:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 2
187; CHECK-NEXT:    store ptr @fa, ptr [[V4]], align 8
188; CHECK-NEXT:    [[V5:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 3
189; CHECK-NEXT:    store ptr @fa, ptr [[V5]], align 8
190; CHECK-NEXT:    [[V6:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 4
191; CHECK-NEXT:    store ptr null, ptr [[V6]], align 8
192; CHECK-NEXT:    call void @goFunc(ptr [[BANG]])
193; CHECK-NEXT:    ret void
194entry:
195
196  %bang = alloca %struct.foostruct, align 8
197  call void @llvm.memset.p0.i64(ptr align 8 %bang, i8 0, i64 40, i1 false)
198  store ptr @fa, ptr %bang, align 8
199  %v3 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 1
200  store ptr @fa, ptr %v3, align 8
201  %v4 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 2
202  store ptr @fa, ptr %v4, align 8
203  %v5 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 3
204  store ptr @fa, ptr %v5, align 8
205  %v6 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 4
206  store ptr null, ptr %v6, align 8
207  call void @goFunc(ptr %bang)
208  ret void
209
210}
211
212define signext i8 @test5(ptr %ptr) {
213; CHECK-LABEL: @test5(
214; CHECK-NEXT:  entry:
215; CHECK-NEXT:    [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 1
216; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2
217; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3
218; CHECK-NEXT:    store i16 -1, ptr [[BPTR2]], align 1
219; CHECK-NEXT:    store i16 1456, ptr [[BPTR1]], align 1
220; CHECK-NEXT:    store i16 1346, ptr [[PTR]], align 1
221; CHECK-NEXT:    ret i8 0
222;
223entry:
224
225  store i32 0, ptr %ptr
226
227  %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1
228  %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2
229  %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3
230
231
232  store i16 65535, ptr %bptr2, align 1
233  store i16 1456, ptr %bptr1, align 1
234  store i16 1346, ptr %ptr, align 1
235
236
237  ret i8 0
238}
239
240define signext i8 @test6(ptr %ptr) {
241; CHECK-LABEL: @test6(
242; CHECK-NEXT:  entry:
243; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1
244; CHECK-NEXT:    store i16 1456, ptr [[BPTR2]], align 1
245; CHECK-NEXT:    store i16 -1, ptr [[PTR]], align 1
246; CHECK-NEXT:    ret i8 0
247;
248entry:
249
250  store i32 0, ptr %ptr
251
252  %bptr2 = getelementptr inbounds i16, ptr %ptr, i64 1
253
254  store i16 1456, ptr %bptr2, align 1
255  store i16 65535, ptr %ptr, align 1
256
257
258  ret i8 0
259}
260
261define signext i8 @test7(ptr %ptr) {
262; CHECK-LABEL: @test7(
263; CHECK-NEXT:  entry:
264; CHECK-NEXT:    [[BPTR2:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1
265; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 2
266; CHECK-NEXT:    [[BPTR4:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 3
267; CHECK-NEXT:    store i16 1346, ptr [[PTR]], align 1
268; CHECK-NEXT:    store i16 1756, ptr [[BPTR3]], align 1
269; CHECK-NEXT:    store i16 1456, ptr [[BPTR2]], align 1
270; CHECK-NEXT:    store i16 5656, ptr [[BPTR4]], align 1
271; CHECK-NEXT:    ret i8 0
272;
273entry:
274
275  store i64 0, ptr %ptr
276
277  %bptr2 = getelementptr inbounds i16, ptr %ptr, i64 1
278  %bptr3 = getelementptr inbounds i16, ptr %ptr, i64 2
279  %bptr4 = getelementptr inbounds i16, ptr %ptr, i64 3
280
281  store i16 1346, ptr %ptr, align 1
282  store i16 1756, ptr %bptr3, align 1
283  store i16 1456, ptr %bptr2, align 1
284  store i16 5656, ptr %bptr4, align 1
285
286
287  ret i8 0
288}
289