xref: /llvm-project/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
5
6; Tests that we can avoid nullptr checks for addrspacecasts from/to priv/local.
7;
8; Whenever a testcase is successful, we should see the addrspacecast replaced with the intrinsic
9; and the resulting code should have no select/cndmask null check for the pointer.
10
11define void @local_to_flat_nonnull_arg(ptr addrspace(3) nonnull %ptr) {
12; OPT-LABEL: define void @local_to_flat_nonnull_arg(
13; OPT-SAME: ptr addrspace(3) nonnull [[PTR:%.*]]) {
14; OPT-NEXT:    [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
15; OPT-NEXT:    store volatile i32 7, ptr [[TMP1]], align 4
16; OPT-NEXT:    ret void
17;
18; ASM-LABEL: local_to_flat_nonnull_arg:
19; ASM:       ; %bb.0:
20; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; ASM-NEXT:    s_mov_b64 s[4:5], src_shared_base
22; ASM-NEXT:    v_mov_b32_e32 v1, s5
23; ASM-NEXT:    v_mov_b32_e32 v2, 7
24; ASM-NEXT:    flat_store_dword v[0:1], v2
25; ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
26; ASM-NEXT:    s_setpc_b64 s[30:31]
27  %x = addrspacecast ptr addrspace(3) %ptr to ptr
28  store volatile i32 7, ptr %x
29  ret void
30}
31
32define void @private_to_flat_nonnull_arg(ptr addrspace(5) nonnull %ptr) {
33; OPT-LABEL: define void @private_to_flat_nonnull_arg(
34; OPT-SAME: ptr addrspace(5) nonnull [[PTR:%.*]]) {
35; OPT-NEXT:    [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PTR]])
36; OPT-NEXT:    store volatile i32 7, ptr [[TMP1]], align 4
37; OPT-NEXT:    ret void
38;
39; ASM-LABEL: private_to_flat_nonnull_arg:
40; ASM:       ; %bb.0:
41; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
43; ASM-NEXT:    v_mov_b32_e32 v1, s5
44; ASM-NEXT:    v_mov_b32_e32 v2, 7
45; ASM-NEXT:    flat_store_dword v[0:1], v2
46; ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
47; ASM-NEXT:    s_setpc_b64 s[30:31]
48  %x = addrspacecast ptr addrspace(5) %ptr to ptr
49  store volatile i32 7, ptr %x
50  ret void
51}
52
53define void @flat_to_local_nonnull_arg(ptr nonnull %ptr) {
54; OPT-LABEL: define void @flat_to_local_nonnull_arg(
55; OPT-SAME: ptr nonnull [[PTR:%.*]]) {
56; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr [[PTR]])
57; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) [[TMP1]], align 4
58; OPT-NEXT:    ret void
59;
60; ASM-LABEL: flat_to_local_nonnull_arg:
61; ASM:       ; %bb.0:
62; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; ASM-NEXT:    v_mov_b32_e32 v1, 7
64; ASM-NEXT:    ds_write_b32 v0, v1
65; ASM-NEXT:    s_waitcnt lgkmcnt(0)
66; ASM-NEXT:    s_setpc_b64 s[30:31]
67  %x = addrspacecast ptr %ptr to ptr addrspace(3)
68  store volatile i32 7, ptr addrspace(3) %x
69  ret void
70}
71
72define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) {
73; OPT-LABEL: define void @flat_to_private_nonnull_arg(
74; OPT-SAME: ptr nonnull [[PTR:%.*]]) {
75; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[PTR]])
76; OPT-NEXT:    store volatile i32 7, ptr addrspace(5) [[TMP1]], align 4
77; OPT-NEXT:    ret void
78;
79; ASM-LABEL: flat_to_private_nonnull_arg:
80; ASM:       ; %bb.0:
81; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; ASM-NEXT:    v_mov_b32_e32 v1, 7
83; ASM-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
84; ASM-NEXT:    s_waitcnt vmcnt(0)
85; ASM-NEXT:    s_setpc_b64 s[30:31]
86  %x = addrspacecast ptr %ptr to ptr addrspace(5)
87  store volatile i32 7, ptr addrspace(5) %x
88  ret void
89}
90
91define void @private_alloca_to_flat(ptr %ptr) {
92; OPT-LABEL: define void @private_alloca_to_flat(
93; OPT-SAME: ptr [[PTR:%.*]]) {
94; OPT-NEXT:    [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5)
95; OPT-NEXT:    [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[ALLOCA]])
96; OPT-NEXT:    store volatile i32 7, ptr [[TMP1]], align 4
97; OPT-NEXT:    ret void
98;
99; DAGISEL-ASM-LABEL: private_alloca_to_flat:
100; DAGISEL-ASM:       ; %bb.0:
101; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; DAGISEL-ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
103; DAGISEL-ASM-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
104; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v1, s5
105; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v2, 7
106; DAGISEL-ASM-NEXT:    flat_store_dword v[0:1], v2
107; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
108; DAGISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
109;
110; GISEL-ASM-LABEL: private_alloca_to_flat:
111; GISEL-ASM:       ; %bb.0:
112; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GISEL-ASM-NEXT:    s_lshr_b32 s4, s32, 6
114; GISEL-ASM-NEXT:    s_mov_b64 s[6:7], src_private_base
115; GISEL-ASM-NEXT:    s_mov_b32 s5, s7
116; GISEL-ASM-NEXT:    v_mov_b32_e32 v0, s4
117; GISEL-ASM-NEXT:    v_mov_b32_e32 v2, 7
118; GISEL-ASM-NEXT:    v_mov_b32_e32 v1, s5
119; GISEL-ASM-NEXT:    flat_store_dword v[0:1], v2
120; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
121; GISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
122  %alloca = alloca i8, addrspace(5)
123  %x = addrspacecast ptr addrspace(5) %alloca to ptr
124  store volatile i32 7, ptr %x
125  ret void
126}
127
128@lds = internal unnamed_addr addrspace(3) global i8 poison, align 4
129
130define void @knownbits_on_flat_to_priv(ptr %ptr) {
131; OPT-LABEL: define void @knownbits_on_flat_to_priv(
132; OPT-SAME: ptr [[PTR:%.*]]) {
133; OPT-NEXT:    [[PTR_INT:%.*]] = ptrtoint ptr [[PTR]] to i64
134; OPT-NEXT:    [[PTR_OR:%.*]] = or i64 [[PTR_INT]], 15
135; OPT-NEXT:    [[KB_PTR:%.*]] = inttoptr i64 [[PTR_OR]] to ptr
136; OPT-NEXT:    [[TMP1:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[KB_PTR]])
137; OPT-NEXT:    store volatile i32 7, ptr addrspace(5) [[TMP1]], align 4
138; OPT-NEXT:    ret void
139;
140; ASM-LABEL: knownbits_on_flat_to_priv:
141; ASM:       ; %bb.0:
142; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; ASM-NEXT:    v_or_b32_e32 v0, 15, v0
144; ASM-NEXT:    v_mov_b32_e32 v1, 7
145; ASM-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
146; ASM-NEXT:    s_waitcnt vmcnt(0)
147; ASM-NEXT:    s_setpc_b64 s[30:31]
148  %ptr.int = ptrtoint ptr %ptr to i64
149  %ptr.or = or i64 %ptr.int, 15 ; set some low bits
150  %kb.ptr = inttoptr i64 %ptr.or to ptr
151  %x = addrspacecast ptr %kb.ptr to ptr addrspace(5)
152  store volatile i32 7, ptr addrspace(5) %x
153  ret void
154}
155
156define void @knownbits_on_priv_to_flat(ptr addrspace(5) %ptr) {
157; OPT-LABEL: define void @knownbits_on_priv_to_flat(
158; OPT-SAME: ptr addrspace(5) [[PTR:%.*]]) {
159; OPT-NEXT:    [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32
160; OPT-NEXT:    [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535
161; OPT-NEXT:    [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5)
162; OPT-NEXT:    [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[KB_PTR]])
163; OPT-NEXT:    store volatile i32 7, ptr [[TMP1]], align 4
164; OPT-NEXT:    ret void
165;
166; ASM-LABEL: knownbits_on_priv_to_flat:
167; ASM:       ; %bb.0:
168; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169; ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
170; ASM-NEXT:    v_and_b32_e32 v0, 0xffff, v0
171; ASM-NEXT:    v_mov_b32_e32 v1, s5
172; ASM-NEXT:    v_mov_b32_e32 v2, 7
173; ASM-NEXT:    flat_store_dword v[0:1], v2
174; ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
175; ASM-NEXT:    s_setpc_b64 s[30:31]
176  %ptr.int = ptrtoint ptr addrspace(5) %ptr to i32
177  %ptr.or = and i32 %ptr.int, 65535 ; ensure only lower 16 bits can be set.
178  %kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5)
179  %x = addrspacecast ptr addrspace(5) %kb.ptr to ptr
180  store volatile i32 7, ptr %x
181  ret void
182}
183
184define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
185; OPT-LABEL: define void @recursive_phis(
186; OPT-SAME: i1 [[COND:%.*]], ptr addrspace(5) [[PTR:%.*]]) {
187; OPT-NEXT:  [[ENTRY:.*]]:
188; OPT-NEXT:    [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5)
189; OPT-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
190; OPT:       [[THEN]]:
191; OPT-NEXT:    [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32
192; OPT-NEXT:    [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535
193; OPT-NEXT:    [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5)
194; OPT-NEXT:    br label %[[FINALLY:.*]]
195; OPT:       [[ELSE]]:
196; OPT-NEXT:    [[OTHER_PHI:%.*]] = phi ptr addrspace(5) [ [[ALLOCA]], %[[ENTRY]] ], [ [[PHI_PTR:%.*]], %[[FINALLY]] ]
197; OPT-NEXT:    br label %[[FINALLY]]
198; OPT:       [[FINALLY]]:
199; OPT-NEXT:    [[PHI_PTR]] = phi ptr addrspace(5) [ [[KB_PTR]], %[[THEN]] ], [ [[OTHER_PHI]], %[[ELSE]] ]
200; OPT-NEXT:    [[TMP0:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PHI_PTR]])
201; OPT-NEXT:    store volatile i32 7, ptr [[TMP0]], align 4
202; OPT-NEXT:    br i1 [[COND]], label %[[ELSE]], label %[[END:.*]]
203; OPT:       [[END]]:
204; OPT-NEXT:    ret void
205;
206; DAGISEL-ASM-LABEL: recursive_phis:
207; DAGISEL-ASM:       ; %bb.0: ; %entry
208; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; DAGISEL-ASM-NEXT:    v_and_b32_e32 v0, 1, v0
210; DAGISEL-ASM-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
211; DAGISEL-ASM-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
212; DAGISEL-ASM-NEXT:    s_and_saveexec_b64 s[4:5], vcc
213; DAGISEL-ASM-NEXT:  ; %bb.1: ; %then
214; DAGISEL-ASM-NEXT:    v_and_b32_e32 v0, 0xffff, v1
215; DAGISEL-ASM-NEXT:  ; %bb.2: ; %finallyendcf.split
216; DAGISEL-ASM-NEXT:    s_or_b64 exec, exec, s[4:5]
217; DAGISEL-ASM-NEXT:    s_xor_b64 s[6:7], vcc, -1
218; DAGISEL-ASM-NEXT:    s_mov_b64 s[4:5], 0
219; DAGISEL-ASM-NEXT:    s_mov_b64 s[8:9], src_private_base
220; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v2, 7
221; DAGISEL-ASM-NEXT:  .LBB7_3: ; %finally
222; DAGISEL-ASM-NEXT:    ; =>This Inner Loop Header: Depth=1
223; DAGISEL-ASM-NEXT:    s_and_b64 s[10:11], exec, s[6:7]
224; DAGISEL-ASM-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
225; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v1, s9
226; DAGISEL-ASM-NEXT:    flat_store_dword v[0:1], v2
227; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0)
228; DAGISEL-ASM-NEXT:    s_andn2_b64 exec, exec, s[4:5]
229; DAGISEL-ASM-NEXT:    s_cbranch_execnz .LBB7_3
230; DAGISEL-ASM-NEXT:  ; %bb.4: ; %end
231; DAGISEL-ASM-NEXT:    s_or_b64 exec, exec, s[4:5]
232; DAGISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
233; DAGISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
234;
235; GISEL-ASM-LABEL: recursive_phis:
236; GISEL-ASM:       ; %bb.0: ; %entry
237; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GISEL-ASM-NEXT:    v_and_b32_e32 v0, 1, v0
239; GISEL-ASM-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
240; GISEL-ASM-NEXT:    s_lshr_b32 s6, s32, 6
241; GISEL-ASM-NEXT:    s_xor_b64 s[4:5], vcc, -1
242; GISEL-ASM-NEXT:    v_mov_b32_e32 v0, s6
243; GISEL-ASM-NEXT:    s_and_saveexec_b64 s[6:7], vcc
244; GISEL-ASM-NEXT:  ; %bb.1: ; %then
245; GISEL-ASM-NEXT:    v_and_b32_e32 v0, 0xffff, v1
246; GISEL-ASM-NEXT:  ; %bb.2: ; %finallyendcf.split
247; GISEL-ASM-NEXT:    s_or_b64 exec, exec, s[6:7]
248; GISEL-ASM-NEXT:    s_mov_b64 s[8:9], src_private_base
249; GISEL-ASM-NEXT:    s_mov_b64 s[6:7], 0
250; GISEL-ASM-NEXT:    v_mov_b32_e32 v1, s9
251; GISEL-ASM-NEXT:    v_mov_b32_e32 v2, 7
252; GISEL-ASM-NEXT:  .LBB7_3: ; %finally
253; GISEL-ASM-NEXT:    ; =>This Inner Loop Header: Depth=1
254; GISEL-ASM-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
255; GISEL-ASM-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
256; GISEL-ASM-NEXT:    flat_store_dword v[0:1], v2
257; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0)
258; GISEL-ASM-NEXT:    s_andn2_b64 exec, exec, s[6:7]
259; GISEL-ASM-NEXT:    s_cbranch_execnz .LBB7_3
260; GISEL-ASM-NEXT:  ; %bb.4: ; %end
261; GISEL-ASM-NEXT:    s_or_b64 exec, exec, s[6:7]
262; GISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
263; GISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
264entry:
265  %alloca = alloca i8, addrspace(5)
266  br i1 %cond, label %then, label %else
267
268then:
269  %ptr.int = ptrtoint ptr addrspace(5) %ptr to i32
270  %ptr.or = and i32 %ptr.int, 65535 ; ensure low bits are zeroes
271  %kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5)
272  br label %finally
273
274else:
275  %other.phi = phi ptr addrspace(5) [%alloca, %entry], [%phi.ptr, %finally]
276  br label %finally
277
278finally:
279  %phi.ptr = phi ptr addrspace(5) [%kb.ptr, %then], [%other.phi, %else]
280  %x = addrspacecast ptr addrspace(5) %phi.ptr to ptr
281  store volatile i32 7, ptr %x
282  br i1 %cond, label %else, label %end
283
284end:
285  ret void
286}
287
288; This used to assert due to assuming the size of the source address
289; space was larger than the destination.
290
291define i32 @cast_private_to_flat_to_private(ptr addrspace(5) %private.ptr) {
292; OPT-LABEL: define i32 @cast_private_to_flat_to_private(
293; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) {
294; OPT-NEXT:    [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr
295; OPT-NEXT:    [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(5)
296; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(5) [[CAST_BACK]], align 4
297; OPT-NEXT:    ret i32 [[LOAD]]
298;
299; ASM-LABEL: cast_private_to_flat_to_private:
300; ASM:       ; %bb.0:
301; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; ASM-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen glc
303; ASM-NEXT:    s_waitcnt vmcnt(0)
304; ASM-NEXT:    s_setpc_b64 s[30:31]
305  %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr
306  %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(5)
307  %load = load volatile i32, ptr addrspace(5) %cast.back
308  ret i32 %load
309}
310
311; This is UB but shouldn't assert.
312define i32 @cast_private_to_flat_to_local(ptr addrspace(5) %private.ptr) {
313; OPT-LABEL: define i32 @cast_private_to_flat_to_local(
314; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) {
315; OPT-NEXT:    [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr
316; OPT-NEXT:    [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
317; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[CAST_BACK]], align 4
318; OPT-NEXT:    ret i32 [[LOAD]]
319;
320; DAGISEL-ASM-LABEL: cast_private_to_flat_to_local:
321; DAGISEL-ASM:       ; %bb.0:
322; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323; DAGISEL-ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
324; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v1, s5
325; DAGISEL-ASM-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
326; DAGISEL-ASM-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
327; DAGISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
328; DAGISEL-ASM-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
329; DAGISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
330; DAGISEL-ASM-NEXT:    ds_read_b32 v0, v0
331; DAGISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
332; DAGISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
333;
334; GISEL-ASM-LABEL: cast_private_to_flat_to_local:
335; GISEL-ASM:       ; %bb.0:
336; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; GISEL-ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
338; GISEL-ASM-NEXT:    v_mov_b32_e32 v1, s5
339; GISEL-ASM-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
340; GISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
341; GISEL-ASM-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
342; GISEL-ASM-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
343; GISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
344; GISEL-ASM-NEXT:    ds_read_b32 v0, v0
345; GISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
346; GISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
347  %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr
348  %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(3)
349  %load = load volatile i32, ptr addrspace(3) %cast.back
350  ret i32 %load
351}
352
353; This is UB but shouldn't assert.
354define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) {
355; OPT-LABEL: define i32 @cast_private_to_flat_to_global(
356; OPT-SAME: ptr addrspace(6) [[CONST32_PTR:%.*]]) {
357; OPT-NEXT:    [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(6) [[CONST32_PTR]] to ptr
358; OPT-NEXT:    [[LOCAL_PTR:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
359; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4
360; OPT-NEXT:    ret i32 [[LOAD]]
361;
362; ASM-LABEL: cast_private_to_flat_to_global:
363; ASM:       ; %bb.0:
364; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; ASM-NEXT:    v_mov_b32_e32 v1, 0
366; ASM-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
367; ASM-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
368; ASM-NEXT:    ds_read_b32 v0, v0
369; ASM-NEXT:    s_waitcnt lgkmcnt(0)
370; ASM-NEXT:    s_setpc_b64 s[30:31]
371  %flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr
372  %local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3)
373  %load = load volatile i32, ptr addrspace(3) %local.ptr
374  ret i32 %load
375}
376