xref: /llvm-project/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll (revision 1b11729dc037891af9877d3512aca2b8ef61e28b)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
3; RUN: llc -debugify-and-strip-all-safe -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-postlegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
4
5define ptr @test_simple_load_pre(ptr %ptr) {
6
7  ; CHECK-LABEL: name: test_simple_load_pre
8  ; CHECK: bb.1 (%ir-block.0):
9  ; CHECK-NEXT:   liveins: $x0
10  ; CHECK-NEXT: {{  $}}
11  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
12  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
13  ; CHECK-NEXT:   [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 1 :: (volatile load (s8) from %ir.next)
14  ; CHECK-NEXT:   $x0 = COPY [[INDEXED_LOAD1]](p0)
15  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
16  %next = getelementptr i8, ptr %ptr, i32 42
17  load volatile i8, ptr %next
18  ret ptr %next
19}
20
21define ptr @test_unused_load_pre(ptr %ptr) {
22
23  ; CHECK-LABEL: name: test_unused_load_pre
24  ; CHECK: bb.1 (%ir-block.0):
25  ; CHECK-NEXT:   liveins: $x0
26  ; CHECK-NEXT: {{  $}}
27  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
28  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
29  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
30  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next)
31  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
32  ; CHECK-NEXT:   $x0 = COPY [[C1]](p0)
33  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
34  %next = getelementptr i8, ptr %ptr, i32 42
35  load volatile i8, ptr %next
36  ret ptr null
37}
38
39define ptr @test_simple_store_pre(ptr %ptr) {
40
41  ; CHECK-LABEL: name: test_simple_store_pre
42  ; CHECK: bb.1 (%ir-block.0):
43  ; CHECK-NEXT:   liveins: $x0
44  ; CHECK-NEXT: {{  $}}
45  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
46  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
47  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
48  ; CHECK-NEXT:   [[INDEXED_STORE:%[0-9]+]]:_(p0) = G_INDEXED_STORE [[C1]](s8), [[COPY]], [[C]](s64), 1 :: (volatile store (s8) into %ir.next)
49  ; CHECK-NEXT:   $x0 = COPY [[INDEXED_STORE]](p0)
50  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
51  %next = getelementptr i8, ptr %ptr, i32 42
52  store volatile i8 0, ptr %next
53  ret ptr %next
54}
55
56; The potentially pre-indexed address is used as the value stored. Converting
57; would produce the value too late but only by one instruction.
58define ptr @test_store_pre_val_loop(ptr %ptr) {
59
60  ; CHECK-LABEL: name: test_store_pre_val_loop
61  ; CHECK: bb.1 (%ir-block.0):
62  ; CHECK-NEXT:   liveins: $x0
63  ; CHECK-NEXT: {{  $}}
64  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
65  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 336
66  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
67  ; CHECK-NEXT:   G_STORE [[PTR_ADD]](p0), [[PTR_ADD]](p0) :: (volatile store (p0) into %ir.next)
68  ; CHECK-NEXT:   $x0 = COPY [[PTR_ADD]](p0)
69  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
70  %next = getelementptr ptr, ptr %ptr, i32 42
71  store volatile ptr %next, ptr %next
72  ret ptr %next
73}
74
75; Potentially pre-indexed address is used between GEP computing it and load.
76define ptr @test_load_pre_before(ptr %ptr) {
77
78  ; CHECK-LABEL: name: test_load_pre_before
79  ; CHECK: bb.1 (%ir-block.0):
80  ; CHECK-NEXT:   liveins: $x0
81  ; CHECK-NEXT: {{  $}}
82  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
83  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
84  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
85  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
86  ; CHECK-NEXT:   $x0 = COPY [[PTR_ADD]](p0)
87  ; CHECK-NEXT:   BL @bar, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0
88  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
89  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next)
90  ; CHECK-NEXT:   $x0 = COPY [[PTR_ADD]](p0)
91  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
92  %next = getelementptr i8, ptr %ptr, i32 42
93  call void @bar(ptr %next)
94  load volatile i8, ptr %next
95  ret ptr %next
96}
97
98; Materializing the base into a writable register (from sp/fp) would be just as
99; bad as the original GEP.
100define ptr @test_alloca_load_pre() {
101
102  ; CHECK-LABEL: name: test_alloca_load_pre
103  ; CHECK: bb.1 (%ir-block.0):
104  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
105  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr
106  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
107  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (volatile load (s8) from %ir.next)
108  ; CHECK-NEXT:   $x0 = COPY [[PTR_ADD]](p0)
109  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
110  %ptr = alloca i8, i32 128
111  %next = getelementptr i8, ptr %ptr, i32 42
112  load volatile i8, ptr %next
113  ret ptr %next
114}
115
116define ptr @test_simple_load_post(ptr %ptr) {
117
118  ; CHECK-LABEL: name: test_simple_load_post
119  ; CHECK: bb.1 (%ir-block.0):
120  ; CHECK-NEXT:   liveins: $x0
121  ; CHECK-NEXT: {{  $}}
122  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
123  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
124  ; CHECK-NEXT:   [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[C]](s64), 0 :: (volatile load (s8) from %ir.ptr)
125  ; CHECK-NEXT:   $x0 = COPY [[INDEXED_LOAD1]](p0)
126  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
127  %next = getelementptr i8, ptr %ptr, i32 42
128  load volatile i8, ptr %ptr
129  ret ptr %next
130}
131
132define ptr @test_simple_load_post_gep_after(ptr %ptr) {
133
134  ; CHECK-LABEL: name: test_simple_load_post_gep_after
135  ; CHECK: bb.1 (%ir-block.0):
136  ; CHECK-NEXT:   liveins: $x0
137  ; CHECK-NEXT: {{  $}}
138  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
139  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
140  ; CHECK-NEXT:   BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
141  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
142  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
143  ; CHECK-NEXT:   [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr)
144  ; CHECK-NEXT:   $x0 = COPY [[INDEXED_LOAD1]](p0)
145  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
146  %offset = call i64 @get_offset()
147  load volatile i8, ptr %ptr
148  %next = getelementptr i8, ptr %ptr, i64 %offset
149  ret ptr %next
150}
151
152define ptr @test_load_post_keep_looking(ptr %ptr) {
153
154  ; CHECK-LABEL: name: test_load_post_keep_looking
155  ; CHECK: bb.1 (%ir-block.0):
156  ; CHECK-NEXT:   liveins: $x0
157  ; CHECK-NEXT: {{  $}}
158  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
159  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
160  ; CHECK-NEXT:   BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
161  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
162  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
163  ; CHECK-NEXT:   [[INDEXED_LOAD:%[0-9]+]]:_(s8), [[INDEXED_LOAD1:%[0-9]+]]:_(p0) = G_INDEXED_LOAD [[COPY]], [[COPY1]](s64), 0 :: (volatile load (s8) from %ir.ptr)
164  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0)
165  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[PTRTOINT]](s64)
166  ; CHECK-NEXT:   [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var
167  ; CHECK-NEXT:   [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var
168  ; CHECK-NEXT:   G_STORE [[TRUNC]](s8), [[ADD_LOW]](p0) :: (store (s8) into @var)
169  ; CHECK-NEXT:   $x0 = COPY [[INDEXED_LOAD1]](p0)
170  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
171  %offset = call i64 @get_offset()
172  load volatile i8, ptr %ptr
173  %intval = ptrtoint ptr %ptr to i8
174  store i8 %intval, ptr @var
175
176  %next = getelementptr i8, ptr %ptr, i64 %offset
177  ret ptr %next
178}
179
180; Base is frame index. Using indexing would need copy anyway.
181define ptr @test_load_post_alloca() {
182
183  ; CHECK-LABEL: name: test_load_post_alloca
184  ; CHECK: bb.1 (%ir-block.0):
185  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr
186  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (volatile load (s8) from %ir.ptr)
187  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
188  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
189  ; CHECK-NEXT:   $x0 = COPY [[PTR_ADD]](p0)
190  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
191  %ptr = alloca i8, i32 128
192  %next = getelementptr i8, ptr %ptr, i32 42
193  load volatile i8, ptr %ptr
194  ret ptr %next
195}
196
197; Offset computation does not dominate the load we might be indexing.
198define ptr @test_load_post_gep_offset_after(ptr %ptr) {
199
200  ; CHECK-LABEL: name: test_load_post_gep_offset_after
201  ; CHECK: bb.1 (%ir-block.0):
202  ; CHECK-NEXT:   liveins: $x0
203  ; CHECK-NEXT: {{  $}}
204  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
205  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (volatile load (s8) from %ir.ptr)
206  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
207  ; CHECK-NEXT:   BL @get_offset, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
208  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
209  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
210  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64)
211  ; CHECK-NEXT:   $x0 = COPY [[PTR_ADD]](p0)
212  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
213  load volatile i8, ptr %ptr
214  %offset = call i64 @get_offset()
215  %next = getelementptr i8, ptr %ptr, i64 %offset
216  ret ptr %next
217}
218
219declare void @bar(ptr)
220declare i64 @get_offset()
221@var = global i8 0
222@varp8 = global ptr null
223