xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -stop-after=irtranslator -o - %s | FileCheck %s
3
4declare align(8) dereferenceable(8) ptr @declared_with_ret_deref() #0
5declare align(8) ptr @unknown_decl() #0
6declare align(8) dereferenceable(4) ptr @declared_with_ret_deref4() #0
7declare align(8) dereferenceable_or_null(8) ptr @declared_with_ret_deref_or_null() #0
8declare align(8) nonnull ptr @nonnull_decl() #0
9declare align(8) dereferenceable_or_null(4) ptr @declared_with_ret_deref_or_null4() #0
10
11; Should have dereferenceable on mem operand
12define i64 @load_deref_declaration_only() {
13  ; CHECK-LABEL: name: load_deref_declaration_only
14  ; CHECK: bb.1 (%ir-block.0):
15  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
16  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
17  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
18  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
19  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
20  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
21  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
22  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
23  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
24  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
25  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
26  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
27  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
28  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
29  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
30  %call = call ptr @declared_with_ret_deref()
31  %load = load i64, ptr %call, align 8
32  ret i64 %load
33}
34
35; No dereferenceable on mem operand
36define i64 @load_deref_unknown_decl() {
37  ; CHECK-LABEL: name: load_deref_unknown_decl
38  ; CHECK: bb.1 (%ir-block.0):
39  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
40  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
41  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
42  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
43  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
44  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
45  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
46  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
47  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
48  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
49  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call)
50  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
51  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
52  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
53  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
54  %call = call ptr @unknown_decl()
55  %load = load i64, ptr %call, align 8
56  ret i64 %load
57}
58
59; Should have dereferenceable on mem operand
60define i64 @load_deref_callsite_only() {
61  ; CHECK-LABEL: name: load_deref_callsite_only
62  ; CHECK: bb.1 (%ir-block.0):
63  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
64  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
65  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
66  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
67  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
68  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
69  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
70  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
71  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
72  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
73  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
74  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
75  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
76  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
77  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
78  %call = call dereferenceable(8) ptr @unknown_decl()
79  %load = load i64, ptr %call, align 8
80  ret i64 %load
81}
82
83; Both loads should have effective dereferenceable(8) since the
84; maximum should be used.
85define i64 @load_deref_maxmimum_callsite_declaration_only() {
86  ; CHECK-LABEL: name: load_deref_maxmimum_callsite_declaration_only
87  ; CHECK: bb.1 (%ir-block.0):
88  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
89  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
90  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
91  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
92  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
93  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
94  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
95  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
96  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
97  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
98  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0)
99  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
100  ; CHECK-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref4
101  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
102  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
103  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
104  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
105  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
106  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
107  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
108  ; CHECK-NEXT:   [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8
109  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1)
110  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
111  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
112  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
113  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
114  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
115  %call0 = call dereferenceable(4) ptr @declared_with_ret_deref()
116  %load0 = load i64, ptr %call0, align 8
117  %call1 = call dereferenceable(8) ptr @declared_with_ret_deref4()
118  %load1 = load i64, ptr %call1, align 8
119  %add = add i64 %load0, %load1
120  ret i64 %add
121}
122
123; Should have deref_or_nullerenceable on mem operand
124define i64 @load_deref_or_null_declaration_only() {
125  ; CHECK-LABEL: name: load_deref_or_null_declaration_only
126  ; CHECK: bb.1 (%ir-block.0):
127  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
128  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
129  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
130  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
131  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
132  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
133  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
134  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
135  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
136  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
137  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
138  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
139  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
140  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
141  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
142  %call = call nonnull ptr @declared_with_ret_deref_or_null()
143  %load = load i64, ptr %call, align 8
144  ret i64 %load
145}
146
147; No deref_or_nullerenceable on mem operand
148define i64 @load_deref_or_null_nonnull_decl() {
149  ; CHECK-LABEL: name: load_deref_or_null_nonnull_decl
150  ; CHECK: bb.1 (%ir-block.0):
151  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
152  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
153  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
154  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
155  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
156  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
157  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
158  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
159  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
160  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
161  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (load (s64) from %ir.call)
162  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
163  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
164  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
165  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
166  %call = call ptr @nonnull_decl()
167  %load = load i64, ptr %call, align 8
168  ret i64 %load
169}
170
171; Should have deref_or_nullerenceable on mem operand
172define i64 @load_deref_or_null_callsite_only() {
173  ; CHECK-LABEL: name: load_deref_or_null_callsite_only
174  ; CHECK: bb.1 (%ir-block.0):
175  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
176  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
177  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
178  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
179  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
180  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
181  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
182  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
183  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
184  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
185  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call)
186  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
187  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
188  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
189  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
190  %call = call dereferenceable_or_null(8) ptr @nonnull_decl()
191  %load = load i64, ptr %call, align 8
192  ret i64 %load
193}
194
195; Both loads should have effective deref_or_nullerenceable(8) since the
196; maximum should be used.
197define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() {
198  ; CHECK-LABEL: name: load_deref_or_null_maxmimum_callsite_declaration_only
199  ; CHECK: bb.1 (%ir-block.0):
200  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
201  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
202  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
203  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
204  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
205  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
206  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
207  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
208  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
209  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV]], 8
210  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN]](p0) :: (dereferenceable load (s64) from %ir.call0)
211  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
212  ; CHECK-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null4
213  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
214  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
215  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref_or_null4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
216  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
217  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
218  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
219  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
220  ; CHECK-NEXT:   [[ASSERT_ALIGN1:%[0-9]+]]:_(p0) = G_ASSERT_ALIGN [[MV1]], 8
221  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[ASSERT_ALIGN1]](p0) :: (dereferenceable load (s64) from %ir.call1)
222  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
223  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
224  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
225  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
226  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
227  %call0 = call dereferenceable_or_null(4) nonnull ptr @declared_with_ret_deref_or_null()
228  %load0 = load i64, ptr %call0, align 8
229  %call1 = call dereferenceable_or_null(8) nonnull ptr @declared_with_ret_deref_or_null4()
230  %load1 = load i64, ptr %call1, align 8
231  %add = add i64 %load0, %load1
232  ret i64 %add
233}
234
235attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
236