xref: /llvm-project/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll (revision 19f4d68252b70c81ebb1686a5a31069eda5373de)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-apple-ios -global-isel -global-isel-abort=1 - < %s | FileCheck %s
3
4define void @test_simple_2xs8(ptr %ptr) {
5; CHECK-LABEL: test_simple_2xs8:
6; CHECK:       ; %bb.0:
7; CHECK-NEXT:    mov w8, #4 ; =0x4
8; CHECK-NEXT:    mov w9, #5 ; =0x5
9; CHECK-NEXT:    strb w8, [x0]
10; CHECK-NEXT:    strb w9, [x0, #1]
11; CHECK-NEXT:    ret
12  store i8 4, ptr %ptr
13  %addr2 = getelementptr i8, ptr %ptr, i64 1
14  store i8 5, ptr %addr2
15  ret void
16}
17
18define void @test_simple_2xs16(ptr %ptr) {
19; CHECK-LABEL: test_simple_2xs16:
20; CHECK:       ; %bb.0:
21; CHECK-NEXT:    mov w8, #4 ; =0x4
22; CHECK-NEXT:    movk w8, #5, lsl #16
23; CHECK-NEXT:    str w8, [x0]
24; CHECK-NEXT:    ret
25  store i16 4, ptr %ptr
26  %addr2 = getelementptr i16, ptr %ptr, i64 1
27  store i16 5, ptr %addr2
28  ret void
29}
30
31define void @test_simple_4xs16(ptr %ptr) {
32; CHECK-LABEL: test_simple_4xs16:
33; CHECK:       ; %bb.0:
34; CHECK-NEXT:    mov x8, #4 ; =0x4
35; CHECK-NEXT:    movk x8, #5, lsl #16
36; CHECK-NEXT:    movk x8, #9, lsl #32
37; CHECK-NEXT:    movk x8, #14, lsl #48
38; CHECK-NEXT:    str x8, [x0]
39; CHECK-NEXT:    ret
40  store i16 4, ptr %ptr
41  %addr2 = getelementptr i16, ptr %ptr, i64 1
42  store i16 5, ptr %addr2
43  %addr3 = getelementptr i16, ptr %ptr, i64 2
44  store i16 9, ptr %addr3
45  %addr4 = getelementptr i16, ptr %ptr, i64 3
46  store i16 14, ptr %addr4
47  ret void
48}
49
50define void @test_simple_2xs32(ptr %ptr) {
51; CHECK-LABEL: test_simple_2xs32:
52; CHECK:       ; %bb.0:
53; CHECK-NEXT:    mov x8, #4 ; =0x4
54; CHECK-NEXT:    movk x8, #5, lsl #32
55; CHECK-NEXT:    str x8, [x0]
56; CHECK-NEXT:    ret
57  store i32 4, ptr %ptr
58  %addr2 = getelementptr i32, ptr %ptr, i64 1
59  store i32 5, ptr %addr2
60  ret void
61}
62
63define void @test_simple_2xs64_illegal(ptr %ptr) {
64; CHECK-LABEL: test_simple_2xs64_illegal:
65; CHECK:       ; %bb.0:
66; CHECK-NEXT:    mov w8, #4 ; =0x4
67; CHECK-NEXT:    mov w9, #5 ; =0x5
68; CHECK-NEXT:    stp x8, x9, [x0]
69; CHECK-NEXT:    ret
70  store i64 4, ptr %ptr
71  %addr2 = getelementptr i64, ptr %ptr, i64 1
72  store i64 5, ptr %addr2
73  ret void
74}
75
76; Don't merge vectors...yet.
77define void @test_simple_vector(ptr %ptr) {
78; CHECK-LABEL: test_simple_vector:
79; CHECK:       ; %bb.0:
80; CHECK-NEXT:    mov w8, #4 ; =0x4
81; CHECK-NEXT:    mov w9, #7 ; =0x7
82; CHECK-NEXT:    strh w8, [x0]
83; CHECK-NEXT:    mov w8, #5 ; =0x5
84; CHECK-NEXT:    strh w9, [x0, #2]
85; CHECK-NEXT:    mov w9, #8 ; =0x8
86; CHECK-NEXT:    strh w8, [x0, #4]!
87; CHECK-NEXT:    strh w9, [x0, #2]
88; CHECK-NEXT:    ret
89  store <2 x i16> <i16 4, i16 7>, ptr %ptr
90  %addr2 = getelementptr <2 x i16>, ptr %ptr, i64 1
91  store <2 x i16> <i16 5, i16 8>, ptr %addr2
92  ret void
93}
94
95define i32 @test_unknown_alias(ptr %ptr, ptr %aliasptr) {
96; CHECK-LABEL: test_unknown_alias:
97; CHECK:       ; %bb.0:
98; CHECK-NEXT:    mov w9, #4 ; =0x4
99; CHECK-NEXT:    mov x8, x0
100; CHECK-NEXT:    str w9, [x0]
101; CHECK-NEXT:    mov w9, #5 ; =0x5
102; CHECK-NEXT:    ldr w0, [x1]
103; CHECK-NEXT:    str w9, [x8, #4]
104; CHECK-NEXT:    ret
105  store i32 4, ptr %ptr
106  %ld = load i32, ptr %aliasptr
107  %addr2 = getelementptr i32, ptr %ptr, i64 1
108  store i32 5, ptr %addr2
109  ret i32 %ld
110}
111
112define void @test_2x_2xs32(ptr %ptr, ptr %ptr2) {
113; CHECK-LABEL: test_2x_2xs32:
114; CHECK:       ; %bb.0:
115; CHECK-NEXT:    mov w8, #4 ; =0x4
116; CHECK-NEXT:    mov w9, #5 ; =0x5
117; CHECK-NEXT:    stp w8, w9, [x0]
118; CHECK-NEXT:    mov x8, #9 ; =0x9
119; CHECK-NEXT:    movk x8, #17, lsl #32
120; CHECK-NEXT:    str x8, [x1]
121; CHECK-NEXT:    ret
122  store i32 4, ptr %ptr
123  %addr2 = getelementptr i32, ptr %ptr, i64 1
124  store i32 5, ptr %addr2
125
126  store i32 9, ptr %ptr2
127  %addr4 = getelementptr i32, ptr %ptr2, i64 1
128  store i32 17, ptr %addr4
129  ret void
130}
131
132define void @test_simple_var_2xs8(ptr %ptr, i8 %v1, i8 %v2) {
133; CHECK-LABEL: test_simple_var_2xs8:
134; CHECK:       ; %bb.0:
135; CHECK-NEXT:    strb w1, [x0]
136; CHECK-NEXT:    strb w2, [x0, #1]
137; CHECK-NEXT:    ret
138  store i8 %v1, ptr %ptr
139  %addr2 = getelementptr i8, ptr %ptr, i64 1
140  store i8 %v2, ptr %addr2
141  ret void
142}
143
144define void @test_simple_var_2xs16(ptr %ptr, i16 %v1, i16 %v2) {
145; CHECK-LABEL: test_simple_var_2xs16:
146; CHECK:       ; %bb.0:
147; CHECK-NEXT:    strh w1, [x0]
148; CHECK-NEXT:    strh w2, [x0, #2]
149; CHECK-NEXT:    ret
150  store i16 %v1, ptr %ptr
151  %addr2 = getelementptr i16, ptr %ptr, i64 1
152  store i16 %v2, ptr %addr2
153  ret void
154}
155
156define void @test_simple_var_2xs32(ptr %ptr, i32 %v1, i32 %v2) {
157; CHECK-LABEL: test_simple_var_2xs32:
158; CHECK:       ; %bb.0:
159; CHECK-NEXT:    stp w1, w2, [x0]
160; CHECK-NEXT:    ret
161  store i32 %v1, ptr %ptr
162  %addr2 = getelementptr i32, ptr %ptr, i64 1
163  store i32 %v2, ptr %addr2
164  ret void
165}
166
167
168; The store to ptr2 prevents merging into a single store.
169; We can still merge the stores into addr1 and addr2.
170define void @test_alias_4xs16(ptr %ptr, ptr %ptr2) {
171; CHECK-LABEL: test_alias_4xs16:
172; CHECK:       ; %bb.0:
173; CHECK-NEXT:    mov w8, #4 ; =0x4
174; CHECK-NEXT:    mov w9, #9 ; =0x9
175; CHECK-NEXT:    movk w8, #5, lsl #16
176; CHECK-NEXT:    strh w9, [x0, #4]
177; CHECK-NEXT:    str w8, [x0]
178; CHECK-NEXT:    mov w8, #14 ; =0xe
179; CHECK-NEXT:    strh wzr, [x1]
180; CHECK-NEXT:    strh w8, [x0, #6]
181; CHECK-NEXT:    ret
182  store i16 4, ptr %ptr
183  %addr2 = getelementptr i16, ptr %ptr, i64 1
184  store i16 5, ptr %addr2
185  %addr3 = getelementptr i16, ptr %ptr, i64 2
186  store i16 9, ptr %addr3
187  store i16 0, ptr %ptr2
188  %addr4 = getelementptr i16, ptr %ptr, i64 3
189  store i16 14, ptr %addr4
190  ret void
191}
192
193; Here store of 5 and 9 can be merged, others have aliasing barriers.
194define void @test_alias2_4xs16(ptr %ptr, ptr %ptr2, ptr %ptr3) {
195; CHECK-LABEL: test_alias2_4xs16:
196; CHECK:       ; %bb.0:
197; CHECK-NEXT:    mov w8, #4 ; =0x4
198; CHECK-NEXT:    strh w8, [x0]
199; CHECK-NEXT:    mov w8, #5 ; =0x5
200; CHECK-NEXT:    movk w8, #9, lsl #16
201; CHECK-NEXT:    strh wzr, [x2]
202; CHECK-NEXT:    stur w8, [x0, #2]
203; CHECK-NEXT:    mov w8, #14 ; =0xe
204; CHECK-NEXT:    strh wzr, [x1]
205; CHECK-NEXT:    strh w8, [x0, #6]
206; CHECK-NEXT:    ret
207  store i16 4, ptr %ptr
208  %addr2 = getelementptr i16, ptr %ptr, i64 1
209  store i16 0, ptr %ptr3
210  store i16 5, ptr %addr2
211  %addr3 = getelementptr i16, ptr %ptr, i64 2
212  store i16 9, ptr %addr3
213  store i16 0, ptr %ptr2
214  %addr4 = getelementptr i16, ptr %ptr, i64 3
215  store i16 14, ptr %addr4
216  ret void
217}
218
219; No merging can be done here.
220define void @test_alias3_4xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
221; CHECK-LABEL: test_alias3_4xs16:
222; CHECK:       ; %bb.0:
223; CHECK-NEXT:    mov w8, #4 ; =0x4
224; CHECK-NEXT:    strh w8, [x0]
225; CHECK-NEXT:    mov w8, #5 ; =0x5
226; CHECK-NEXT:    strh wzr, [x2]
227; CHECK-NEXT:    strh w8, [x0, #2]
228; CHECK-NEXT:    mov w8, #9 ; =0x9
229; CHECK-NEXT:    strh wzr, [x3]
230; CHECK-NEXT:    strh w8, [x0, #4]
231; CHECK-NEXT:    mov w8, #14 ; =0xe
232; CHECK-NEXT:    strh wzr, [x1]
233; CHECK-NEXT:    strh w8, [x0, #6]
234; CHECK-NEXT:    ret
235  store i16 4, ptr %ptr
236  %addr2 = getelementptr i16, ptr %ptr, i64 1
237  store i16 0, ptr %ptr3
238  store i16 5, ptr %addr2
239  store i16 0, ptr %ptr4
240  %addr3 = getelementptr i16, ptr %ptr, i64 2
241  store i16 9, ptr %addr3
242  store i16 0, ptr %ptr2
243  %addr4 = getelementptr i16, ptr %ptr, i64 3
244  store i16 14, ptr %addr4
245  ret void
246}
247
248; Can merge because the load is from a different alloca and can't alias.
249define i32 @test_alias_allocas_2xs32(ptr %ptr) {
250; CHECK-LABEL: test_alias_allocas_2xs32:
251; CHECK:       ; %bb.0:
252; CHECK-NEXT:    sub sp, sp, #32
253; CHECK-NEXT:    .cfi_def_cfa_offset 32
254; CHECK-NEXT:    mov x8, #4 ; =0x4
255; CHECK-NEXT:    ldr w0, [sp, #4]
256; CHECK-NEXT:    movk x8, #5, lsl #32
257; CHECK-NEXT:    str x8, [sp, #8]
258; CHECK-NEXT:    add sp, sp, #32
259; CHECK-NEXT:    ret
260  %a1 = alloca [6 x i32]
261  %a2 = alloca i32, align 4
262  store i32 4, ptr %a1
263  %ld = load i32, ptr %a2
264  %addr2 = getelementptr [6 x i32], ptr %a1, i64 0, i32 1
265  store i32 5, ptr %addr2
266  ret i32 %ld
267}
268
269define void @test_volatile(ptr %ptr) {
270; CHECK-LABEL: test_volatile:
271; CHECK:       ; %bb.0: ; %entry
272; CHECK-NEXT:    ldr x8, [x0]
273; CHECK-NEXT:    str wzr, [x8]
274; CHECK-NEXT:    str wzr, [x8, #4]
275; CHECK-NEXT:    ret
276entry:
277  %0 = load ptr, ptr %ptr, align 8
278  store volatile i32 0, ptr %0, align 4;
279  %add.ptr.i.i38 = getelementptr inbounds i32, ptr %0, i64 1
280  store volatile i32 0, ptr %add.ptr.i.i38, align 4
281  ret void
282}
283
284define void @test_atomic(ptr %ptr) {
285; CHECK-LABEL: test_atomic:
286; CHECK:       ; %bb.0: ; %entry
287; CHECK-NEXT:    ldr x8, [x0]
288; CHECK-NEXT:    stlr wzr, [x8]
289; CHECK-NEXT:    add x8, x8, #4
290; CHECK-NEXT:    stlr wzr, [x8]
291; CHECK-NEXT:    ret
292entry:
293  %0 = load ptr, ptr %ptr, align 8
294  store atomic i32 0, ptr %0 release, align 4;
295  %add.ptr.i.i38 = getelementptr inbounds i32, ptr %0, i64 1
296  store atomic i32 0, ptr %add.ptr.i.i38 release, align 4
297  ret void
298}
299
300; Here store of 9 and 15 can be merged, but the store of 0 prevents the store
301; of 5 from being considered. This checks a corner case where we would skip
302; doing an alias check because of a >= vs > bug, due to the presence of a
303; non-aliasing instruction, in this case the load %safeld.
304define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_ptr) {
305; CHECK-LABEL: test_alias_3xs16:
306; CHECK:       ; %bb.0:
307; CHECK-NEXT:    mov x8, x0
308; CHECK-NEXT:    mov w9, #5 ; =0x5
309; CHECK-NEXT:    ldr w0, [x3]
310; CHECK-NEXT:    str w9, [x8, #4]
311; CHECK-NEXT:    mov x9, #9 ; =0x9
312; CHECK-NEXT:    movk x9, #14, lsl #32
313; CHECK-NEXT:    strh wzr, [x8, #4]
314; CHECK-NEXT:    str x9, [x8, #8]
315; CHECK-NEXT:    ret
316  %safeld = load i32, ptr %safe_ptr
317  %addr2 = getelementptr i32, ptr %ptr, i64 1
318  store i32 5, ptr %addr2
319  store i16 0, ptr %addr2 ; aliases directly with store above.
320  %addr3 = getelementptr i32, ptr %ptr, i64 2
321  store i32 9, ptr %addr3
322  %addr4 = getelementptr i32, ptr %ptr, i64 3
323  store i32 14, ptr %addr4
324  ret i32 %safeld
325}
326
327@G = external global [10 x i32]
328
329define void @invalid_zero_offset_no_merge(i64 %0) {
330; CHECK-LABEL: invalid_zero_offset_no_merge:
331; CHECK:       ; %bb.0:
332; CHECK-NEXT:  Lloh0:
333; CHECK-NEXT:    adrp x8, _G@GOTPAGE
334; CHECK-NEXT:  Lloh1:
335; CHECK-NEXT:    ldr x8, [x8, _G@GOTPAGEOFF]
336; CHECK-NEXT:    str wzr, [x8, x0, lsl #2]
337; CHECK-NEXT:    str wzr, [x8, #4]
338; CHECK-NEXT:    ret
339; CHECK-NEXT:    .loh AdrpLdrGot Lloh0, Lloh1
340  %2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
341  store i32 0, ptr %2, align 4
342  store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
343  ret void
344}
345