xref: /llvm-project/llvm/test/CodeGen/AMDGPU/addrspacecast.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri -mattr=-promote-alloca < %s | llc | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
2; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 -mattr=-promote-alloca < %s | llc | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
3
4target triple = "amdgcn-amd-amdhsa"
5
6; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
7
8; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
9; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
10; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
11; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
12; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
13
14; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
15
16; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
17; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
18
19; GFX9: s_cmp_lg_u32 [[PTR]], -1
20; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
21; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
22
23; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
24
25; HSA:  .amdhsa_user_sgpr_private_segment_buffer 1
26; HSA:  .amdhsa_user_sgpr_dispatch_ptr 0
27; CI:   .amdhsa_user_sgpr_queue_ptr 1
28; GFX9: .amdhsa_user_sgpr_queue_ptr 0
29
30; At most 2 digits. Make sure src_shared_base is not counted as a high
31; number SGPR.
32
33; HSA: NumSgprs: {{[0-9]+}}
34define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #0 {
35  %stof = addrspacecast ptr addrspace(3) %ptr to ptr
36  store volatile i32 7, ptr %stof
37  ret void
38}
39
40; Test handling inside a non-kernel
41; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast_func:
42; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x10{{$}}
43; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
44; CI-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
45; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
46; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0
47
48; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
49
50; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
51
52; GFX9-DAG: v_mov_b32_e32 v[[VREG_HIBASE:[0-9]+]], s[[HIBASE]]
53; GFX9-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
54; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc
55; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, v[[VREG_HIBASE]], vcc
56
57; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
58define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
59  %stof = addrspacecast ptr addrspace(3) %ptr to ptr
60  store volatile i32 7, ptr %stof
61  ret void
62}
63
64; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
65
66; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
67; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
68
69; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
70; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
71; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
72; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
73
74; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
75; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_private_base
76
77; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
78; GFX9: s_cmp_lg_u32 [[PTR]], -1
79; GFX9: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
80; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
81
82; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
83
84; HSA:  .amdhsa_user_sgpr_private_segment_buffer 1
85; HSA:  .amdhsa_user_sgpr_dispatch_ptr 0
86; CI:   .amdhsa_user_sgpr_queue_ptr 1
87; GFX9: .amdhsa_user_sgpr_queue_ptr 0
88
89; HSA: NumSgprs: {{[0-9]+}}
90define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
91  %stof = addrspacecast ptr addrspace(5) %ptr to ptr
92  store volatile i32 7, ptr %stof
93  ret void
94}
95
96; no-op
97; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
98
99; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
100; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
101; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
102; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
103; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
104
105; HSA:  .amdhsa_user_sgpr_queue_ptr 0
106define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
107  %stof = addrspacecast ptr addrspace(1) %ptr to ptr
108  store volatile i32 7, ptr %stof
109  ret void
110}
111
112; no-op
113; HSA-LABEL: {{^}}use_constant_to_flat_addrspacecast:
114; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
115; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
116; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
117; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
118define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 {
119  %stof = addrspacecast ptr addrspace(4) %ptr to ptr
120  %ld = load volatile i32, ptr %stof
121  ret void
122}
123
124; HSA-LABEL: {{^}}use_constant_to_global_addrspacecast:
125; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
126; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
127; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
128; CI: {{flat|global}}_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
129
130; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
131; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]]
132define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) #0 {
133  %stof = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(1)
134  %ld = load volatile i32, ptr addrspace(1) %stof
135  ret void
136}
137
138; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast:
139
140; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
141; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
142; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec
143; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1
144; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]]
145; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
146; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0
147; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1
148; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
149; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]]
150; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]]
151
152; HSA:  .amdhsa_user_sgpr_private_segment_buffer 1
153; HSA:  .amdhsa_user_sgpr_dispatch_ptr 0
154; HSA:  .amdhsa_user_sgpr_queue_ptr 0
155define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
156  %ftos = addrspacecast ptr %ptr to ptr addrspace(3)
157  store volatile i32 0, ptr addrspace(3) %ftos
158  ret void
159}
160
161; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast:
162
163; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]]
164; CI-DAG v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
165; CI-DAG v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
166; CI-DAG v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
167; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}}
168; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec
169; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1
170; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]]
171; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
172; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0
173; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1
174; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]]
175; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
176; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
177
178; HSA:  .amdhsa_user_sgpr_private_segment_buffer 1
179; HSA:  .amdhsa_user_sgpr_dispatch_ptr 0
180; HSA:  .amdhsa_user_sgpr_queue_ptr 0
181define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
182  %ftos = addrspacecast ptr %ptr to ptr addrspace(5)
183  store volatile i32 0, ptr addrspace(5) %ftos
184  ret void
185}
186
187; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast:
188
189; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
190; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
191; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
192; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
193; CI: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
194
195; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
196; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}
197
198; HSA:  .amdhsa_user_sgpr_queue_ptr 0
199define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
200  %ftos = addrspacecast ptr %ptr to ptr addrspace(1)
201  store volatile i32 0, ptr addrspace(1) %ftos
202  ret void
203}
204
205; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast:
206
207; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0
208; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0
209
210; HSA:  .amdhsa_user_sgpr_queue_ptr 0
211define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
212  %ftos = addrspacecast ptr %ptr to ptr addrspace(4)
213  load volatile i32, ptr addrspace(4) %ftos
214  ret void
215}
216
217; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
218
219; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
220; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
221; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
222; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
223define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
224  %cast = addrspacecast ptr addrspace(3) null to ptr
225  store volatile i32 7, ptr %cast
226  ret void
227}
228
229; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast:
230; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
231; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
232; HSA: ds_write_b32 [[PTR]], [[K]]
233define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
234  %cast = addrspacecast ptr null to ptr addrspace(3)
235  store volatile i32 7, ptr addrspace(3) %cast
236  ret void
237}
238
239; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
240; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
241; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
242; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
243; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
244define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
245  %cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr
246  store volatile i32 7, ptr %cast
247  ret void
248}
249
250; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast:
251; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
252; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
253; HSA: ds_write_b32 [[PTR]], [[K]]
254define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
255  %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(3)
256  store volatile i32 7, ptr addrspace(3) %cast
257  ret void
258}
259
260; FIXME: Shouldn't need to enable queue ptr
261; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
262; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
263; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
264; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
265; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
266define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
267  %cast = addrspacecast ptr addrspace(5) null to ptr
268  store volatile i32 7, ptr %cast
269  ret void
270}
271
272; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
273; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
274; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
275; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
276define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
277  %cast = addrspacecast ptr null to ptr addrspace(5)
278  store volatile i32 7, ptr addrspace(5) %cast
279  ret void
280}
281
282; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast:
283
284; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
285; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
286; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
287; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
288
289; CI:  .amdhsa_user_sgpr_queue_ptr 1
290; GFX9:  .amdhsa_user_sgpr_queue_ptr 0
291define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
292  %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr
293  store volatile i32 7, ptr %cast
294  ret void
295}
296
297; HSA-LABEL: {{^}}cast_neg1_flat_to_private_addrspacecast:
298; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
299; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
300; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
301define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
302  %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(5)
303  store volatile i32 7, ptr addrspace(5) %cast
304  ret void
305}
306
307
308; Disable optimizations in case there are optimizations added that
309; specialize away generic pointer accesses.
310
311; HSA-LABEL: {{^}}branch_use_flat_i32:
312; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
313; HSA: s_endpgm
314define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) #0 {
315entry:
316  %cmp = icmp ne i32 %c, 0
317  br i1 %cmp, label %local, label %global
318
319local:
320  %flat_local = addrspacecast ptr addrspace(3) %lptr to ptr
321  br label %end
322
323global:
324  %flat_global = addrspacecast ptr addrspace(1) %gptr to ptr
325  br label %end
326
327end:
328  %fptr = phi ptr [ %flat_local, %local ], [ %flat_global, %global ]
329  store volatile i32 %x, ptr %fptr, align 4
330;  %val = load i32, ptr %fptr, align 4
331;  store i32 %val, ptr addrspace(1) %out, align 4
332  ret void
333}
334
335; Check for prologue initializing special SGPRs pointing to scratch.
336; HSA-LABEL: {{^}}store_flat_scratch:
337; CI-DAG: s_mov_b32 flat_scratch_lo, s9
338; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11
339; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
340
341; GFX9: s_add_u32 flat_scratch_lo, s6, s9
342; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
343
344; HSA: {{flat|global}}_store_dword
345; HSA: s_barrier
346; HSA: {{flat|global}}_load_dword
347define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 {
348  %alloca = alloca i32, i32 9, align 4, addrspace(5)
349  %x = call i32 @llvm.amdgcn.workitem.id.x() #2
350  %pptr = getelementptr i32, ptr addrspace(5) %alloca, i32 %x
351  %fptr = addrspacecast ptr addrspace(5) %pptr to ptr
352  store volatile i32 %x, ptr %fptr
353  ; Dummy call
354  call void @llvm.amdgcn.s.barrier() #1
355  %reload = load volatile i32, ptr %fptr, align 4
356  store volatile i32 %reload, ptr addrspace(1) %out, align 4
357  ret void
358}
359
360; HSA-LABEL: {{^}}use_constant_to_constant32_addrspacecast
361; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
362; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
363; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}}
364; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
365; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
366; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
367define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
368  %ptr = load volatile ptr addrspace(4), ptr addrspace(4) %ptr.ptr
369  %addrspacecast = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(6)
370  %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
371  %load = load volatile i32, ptr addrspace(6) %gep, align 4
372  ret void
373}
374
375; HSA-LABEL: {{^}}use_global_to_constant32_addrspacecast
376; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}}
377; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}}
378; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}}
379; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
380; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
381; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
382define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
383  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) %ptr.ptr
384  %addrspacecast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(6)
385  %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
386  %load = load volatile i32, ptr addrspace(6) %gep, align 4
387  ret void
388}
389
390; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_0:
391; GCN: s_load_dword [[PTR:s[0-9]+]],
392; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0
393; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
394; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
395define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) #0 {
396  %stof = addrspacecast ptr addrspace(6) %ptr to ptr
397  %load = load volatile i32, ptr %stof
398  ret void
399}
400
401; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_1:
402; GCN: s_load_dword [[PTR:s[0-9]+]],
403; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000
404; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
405; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
406define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #3 {
407  %stof = addrspacecast ptr addrspace(6) %ptr to ptr
408  %load = load volatile i32, ptr %stof
409  ret void
410}
411
412define <2 x ptr addrspace(5)> @addrspacecast_v2p0_to_v2p5(<2 x ptr> %ptr) {
413; HSA-LABEL: addrspacecast_v2p0_to_v2p5:
414; HSA:       ; %bb.0:
415; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
417; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
418; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
419; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
420; HSA-NEXT:    s_setpc_b64 s[30:31]
421  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(5)>
422  ret <2 x ptr addrspace(5)> %cast
423}
424
425define <3 x ptr addrspace(5)> @addrspacecast_v3p0_to_v3p5(<3 x ptr> %ptr) {
426; HSA-LABEL: addrspacecast_v3p0_to_v3p5:
427; HSA:       ; %bb.0:
428; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
430; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
431; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
432; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
433; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
434; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
435; HSA-NEXT:    s_setpc_b64 s[30:31]
436  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(5)>
437  ret <3 x ptr addrspace(5)> %cast
438}
439
440define <4 x ptr addrspace(5)> @addrspacecast_v4p0_to_v4p5(<4 x ptr> %ptr) {
441; HSA-LABEL: addrspacecast_v4p0_to_v4p5:
442; HSA:       ; %bb.0:
443; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
445; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
446; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
447; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
448; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
449; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
450; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
451; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
452; HSA-NEXT:    s_setpc_b64 s[30:31]
453  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(5)>
454  ret <4 x ptr addrspace(5)> %cast
455}
456
457define <8 x ptr addrspace(5)> @addrspacecast_v8p0_to_v8p5(<8 x ptr> %ptr) {
458; HSA-LABEL: addrspacecast_v8p0_to_v8p5:
459; HSA:       ; %bb.0:
460; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
462; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
463; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
464; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
465; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
466; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
467; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
468; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
469; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
470; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
471; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
472; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
473; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
474; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
475; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
476; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
477; HSA-NEXT:    s_setpc_b64 s[30:31]
478  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(5)>
479  ret <8 x ptr addrspace(5)> %cast
480}
481
482define <16 x ptr addrspace(5)> @addrspacecast_v16p0_to_v16p5(<16 x ptr> %ptr) {
483; HSA-LABEL: addrspacecast_v16p0_to_v16p5:
484; HSA:       ; %bb.0:
485; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
487; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
488; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
489; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
490; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
491; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
492; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
493; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
494; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
495; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
496; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
497; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
498; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
499; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
500; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
501; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
502; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
503; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
504; HSA-NEXT:    v_cndmask_b32_e32 v8, -1, v16, vcc
505; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
506; HSA-NEXT:    v_cndmask_b32_e32 v9, -1, v18, vcc
507; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
508; HSA-NEXT:    v_cndmask_b32_e32 v10, -1, v20, vcc
509; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[22:23]
510; HSA-NEXT:    v_cndmask_b32_e32 v11, -1, v22, vcc
511; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[24:25]
512; HSA-NEXT:    v_cndmask_b32_e32 v12, -1, v24, vcc
513; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[26:27]
514; HSA-NEXT:    v_cndmask_b32_e32 v13, -1, v26, vcc
515; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[28:29]
516; HSA-NEXT:    v_cndmask_b32_e32 v14, -1, v28, vcc
517; HSA-NEXT:    s_waitcnt vmcnt(0)
518; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[30:31]
519; HSA-NEXT:    v_cndmask_b32_e32 v15, -1, v30, vcc
520; HSA-NEXT:    s_setpc_b64 s[30:31]
521  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(5)>
522  ret <16 x ptr addrspace(5)> %cast
523}
524
525define <2 x ptr> @addrspacecast_v2p5_to_v2p0(<2 x ptr addrspace(5)> %ptr) {
526; CI-LABEL: addrspacecast_v2p5_to_v2p0:
527; CI:       ; %bb.0:
528; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
530; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
531; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
532; CI-NEXT:    s_waitcnt lgkmcnt(0)
533; CI-NEXT:    v_mov_b32_e32 v3, s4
534; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
535; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
536; CI-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
537; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
538; CI-NEXT:    v_mov_b32_e32 v1, v4
539; CI-NEXT:    s_setpc_b64 s[30:31]
540;
541; GFX9-LABEL: addrspacecast_v2p5_to_v2p0:
542; GFX9:       ; %bb.0:
543; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
545; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
546; GFX9-NEXT:    v_mov_b32_e32 v3, s5
547; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
548; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
549; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
550; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
551; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
552; GFX9-NEXT:    v_mov_b32_e32 v1, v4
553; GFX9-NEXT:    s_setpc_b64 s[30:31]
554  %cast = addrspacecast <2 x ptr addrspace(5)> %ptr to <2 x ptr>
555  ret <2 x ptr> %cast
556}
557
558define <3 x ptr> @addrspacecast_v3p5_to_v3p0(<3 x ptr addrspace(5)> %ptr) {
559; CI-LABEL: addrspacecast_v3p5_to_v3p0:
560; CI:       ; %bb.0:
561; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
563; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
564; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
565; CI-NEXT:    s_waitcnt lgkmcnt(0)
566; CI-NEXT:    v_mov_b32_e32 v5, s4
567; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
568; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
569; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
570; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
571; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
572; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
573; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
574; CI-NEXT:    v_mov_b32_e32 v1, v7
575; CI-NEXT:    v_mov_b32_e32 v2, v6
576; CI-NEXT:    s_setpc_b64 s[30:31]
577;
578; GFX9-LABEL: addrspacecast_v3p5_to_v3p0:
579; GFX9:       ; %bb.0:
580; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
582; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
583; GFX9-NEXT:    v_mov_b32_e32 v5, s5
584; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
585; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
586; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
587; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
588; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
589; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
590; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
591; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
592; GFX9-NEXT:    v_mov_b32_e32 v1, v7
593; GFX9-NEXT:    v_mov_b32_e32 v2, v6
594; GFX9-NEXT:    s_setpc_b64 s[30:31]
595  %cast = addrspacecast <3 x ptr addrspace(5)> %ptr to <3 x ptr>
596  ret <3 x ptr> %cast
597}
598
599define <4 x ptr> @addrspacecast_v4p5_to_v4p0(<4 x ptr addrspace(5)> %ptr) {
600; CI-LABEL: addrspacecast_v4p5_to_v4p0:
601; CI:       ; %bb.0:
602; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
604; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
605; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
606; CI-NEXT:    s_waitcnt lgkmcnt(0)
607; CI-NEXT:    v_mov_b32_e32 v7, s4
608; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
609; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
610; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
611; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
612; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
613; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
614; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
615; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
616; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
617; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
618; CI-NEXT:    v_mov_b32_e32 v1, v10
619; CI-NEXT:    v_mov_b32_e32 v2, v8
620; CI-NEXT:    v_mov_b32_e32 v3, v9
621; CI-NEXT:    s_setpc_b64 s[30:31]
622;
623; GFX9-LABEL: addrspacecast_v4p5_to_v4p0:
624; GFX9:       ; %bb.0:
625; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
627; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
628; GFX9-NEXT:    v_mov_b32_e32 v7, s5
629; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
630; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
631; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
632; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
633; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
634; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
635; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
636; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
637; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
638; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
639; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
640; GFX9-NEXT:    v_mov_b32_e32 v1, v10
641; GFX9-NEXT:    v_mov_b32_e32 v2, v8
642; GFX9-NEXT:    v_mov_b32_e32 v3, v9
643; GFX9-NEXT:    s_setpc_b64 s[30:31]
644  %cast = addrspacecast <4 x ptr addrspace(5)> %ptr to <4 x ptr>
645  ret <4 x ptr> %cast
646}
647
648define <8 x ptr> @addrspacecast_v8p5_to_v8p0(<8 x ptr addrspace(5)> %ptr) {
649; CI-LABEL: addrspacecast_v8p5_to_v8p0:
650; CI:       ; %bb.0:
651; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
653; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
654; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
655; CI-NEXT:    s_waitcnt lgkmcnt(0)
656; CI-NEXT:    v_mov_b32_e32 v15, s4
657; CI-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
658; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
659; CI-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
660; CI-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
661; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
662; CI-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
663; CI-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
664; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
665; CI-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
666; CI-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
667; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
668; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
669; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
670; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
671; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
672; CI-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
673; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
674; CI-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
675; CI-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
676; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
677; CI-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
678; CI-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
679; CI-NEXT:    v_mov_b32_e32 v1, v22
680; CI-NEXT:    v_mov_b32_e32 v2, v16
681; CI-NEXT:    v_mov_b32_e32 v3, v17
682; CI-NEXT:    v_mov_b32_e32 v4, v18
683; CI-NEXT:    v_mov_b32_e32 v5, v19
684; CI-NEXT:    v_mov_b32_e32 v6, v20
685; CI-NEXT:    v_mov_b32_e32 v7, v21
686; CI-NEXT:    s_setpc_b64 s[30:31]
687;
688; GFX9-LABEL: addrspacecast_v8p5_to_v8p0:
689; GFX9:       ; %bb.0:
690; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
692; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
693; GFX9-NEXT:    v_mov_b32_e32 v15, s5
694; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
695; GFX9-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
696; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
697; GFX9-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
698; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
699; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
700; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
701; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
702; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
703; GFX9-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
704; GFX9-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
705; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
706; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
707; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
708; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
709; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
710; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
711; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
712; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
713; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
714; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
715; GFX9-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
716; GFX9-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
717; GFX9-NEXT:    v_mov_b32_e32 v1, v22
718; GFX9-NEXT:    v_mov_b32_e32 v2, v16
719; GFX9-NEXT:    v_mov_b32_e32 v3, v17
720; GFX9-NEXT:    v_mov_b32_e32 v4, v18
721; GFX9-NEXT:    v_mov_b32_e32 v5, v19
722; GFX9-NEXT:    v_mov_b32_e32 v6, v20
723; GFX9-NEXT:    v_mov_b32_e32 v7, v21
724; GFX9-NEXT:    s_setpc_b64 s[30:31]
725  %cast = addrspacecast <8 x ptr addrspace(5)> %ptr to <8 x ptr>
726  ret <8 x ptr> %cast
727}
728
729define <16 x ptr> @addrspacecast_v16p5_to_v16p0(<16 x ptr addrspace(5)> %ptr) {
730; CI-LABEL: addrspacecast_v16p5_to_v16p0:
731; CI:       ; %bb.0:
732; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
734; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
735; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
736; CI-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v5
737; CI-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v6
738; CI-NEXT:    s_waitcnt lgkmcnt(0)
739; CI-NEXT:    v_mov_b32_e32 v31, s4
740; CI-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
741; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
742; CI-NEXT:    v_cndmask_b32_e32 v34, 0, v1, vcc
743; CI-NEXT:    v_cndmask_b32_e32 v39, 0, v31, vcc
744; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
745; CI-NEXT:    v_cndmask_b32_e32 v35, 0, v2, vcc
746; CI-NEXT:    v_cndmask_b32_e32 v32, 0, v31, vcc
747; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
748; CI-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v4
749; CI-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v7
750; CI-NEXT:    v_cndmask_b32_e32 v36, 0, v3, vcc
751; CI-NEXT:    v_cndmask_b32_e64 v48, 0, v4, s[4:5]
752; CI-NEXT:    v_cndmask_b32_e64 v37, 0, v5, s[6:7]
753; CI-NEXT:    v_cndmask_b32_e64 v33, 0, v6, s[8:9]
754; CI-NEXT:    v_cndmask_b32_e64 v38, 0, v7, s[10:11]
755; CI-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v8
756; CI-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v9
757; CI-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v10
758; CI-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v11
759; CI-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v12
760; CI-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v13
761; CI-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v14
762; CI-NEXT:    v_cmp_ne_u32_e64 s[26:27], -1, v15
763; CI-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[12:13]
764; CI-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[14:15]
765; CI-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[16:17]
766; CI-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[18:19]
767; CI-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[20:21]
768; CI-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[22:23]
769; CI-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[24:25]
770; CI-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[26:27]
771; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v31, vcc
772; CI-NEXT:    v_cndmask_b32_e64 v9, 0, v31, s[4:5]
773; CI-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[6:7]
774; CI-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[8:9]
775; CI-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[10:11]
776; CI-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[12:13]
777; CI-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[14:15]
778; CI-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[16:17]
779; CI-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[18:19]
780; CI-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[20:21]
781; CI-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[22:23]
782; CI-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[24:25]
783; CI-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[26:27]
784; CI-NEXT:    v_mov_b32_e32 v1, v49
785; CI-NEXT:    v_mov_b32_e32 v2, v34
786; CI-NEXT:    v_mov_b32_e32 v3, v39
787; CI-NEXT:    v_mov_b32_e32 v4, v35
788; CI-NEXT:    v_mov_b32_e32 v5, v32
789; CI-NEXT:    v_mov_b32_e32 v6, v36
790; CI-NEXT:    v_mov_b32_e32 v8, v48
791; CI-NEXT:    v_mov_b32_e32 v10, v37
792; CI-NEXT:    v_mov_b32_e32 v12, v33
793; CI-NEXT:    v_mov_b32_e32 v14, v38
794; CI-NEXT:    s_setpc_b64 s[30:31]
795;
796; GFX9-LABEL: addrspacecast_v16p5_to_v16p0:
797; GFX9:       ; %bb.0:
798; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
800; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
801; GFX9-NEXT:    v_mov_b32_e32 v31, s5
802; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
803; GFX9-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
804; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
805; GFX9-NEXT:    v_cndmask_b32_e32 v34, 0, v1, vcc
806; GFX9-NEXT:    v_cndmask_b32_e32 v39, 0, v31, vcc
807; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
808; GFX9-NEXT:    v_cndmask_b32_e32 v35, 0, v2, vcc
809; GFX9-NEXT:    v_cndmask_b32_e32 v32, 0, v31, vcc
810; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
811; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v4
812; GFX9-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v5
813; GFX9-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v6
814; GFX9-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v7
815; GFX9-NEXT:    v_cndmask_b32_e32 v36, 0, v3, vcc
816; GFX9-NEXT:    v_cndmask_b32_e64 v48, 0, v4, s[4:5]
817; GFX9-NEXT:    v_cndmask_b32_e64 v37, 0, v5, s[6:7]
818; GFX9-NEXT:    v_cndmask_b32_e64 v33, 0, v6, s[8:9]
819; GFX9-NEXT:    v_cndmask_b32_e64 v38, 0, v7, s[10:11]
820; GFX9-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v8
821; GFX9-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v9
822; GFX9-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v10
823; GFX9-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v11
824; GFX9-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v12
825; GFX9-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v13
826; GFX9-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v14
827; GFX9-NEXT:    v_cmp_ne_u32_e64 s[26:27], -1, v15
828; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[12:13]
829; GFX9-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[14:15]
830; GFX9-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[16:17]
831; GFX9-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[18:19]
832; GFX9-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[20:21]
833; GFX9-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[22:23]
834; GFX9-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[24:25]
835; GFX9-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[26:27]
836; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v31, vcc
837; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, v31, s[4:5]
838; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[6:7]
839; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[8:9]
840; GFX9-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[10:11]
841; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[12:13]
842; GFX9-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[14:15]
843; GFX9-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[16:17]
844; GFX9-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[18:19]
845; GFX9-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[20:21]
846; GFX9-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[22:23]
847; GFX9-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[24:25]
848; GFX9-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[26:27]
849; GFX9-NEXT:    v_mov_b32_e32 v1, v49
850; GFX9-NEXT:    v_mov_b32_e32 v2, v34
851; GFX9-NEXT:    v_mov_b32_e32 v3, v39
852; GFX9-NEXT:    v_mov_b32_e32 v4, v35
853; GFX9-NEXT:    v_mov_b32_e32 v5, v32
854; GFX9-NEXT:    v_mov_b32_e32 v6, v36
855; GFX9-NEXT:    v_mov_b32_e32 v8, v48
856; GFX9-NEXT:    v_mov_b32_e32 v10, v37
857; GFX9-NEXT:    v_mov_b32_e32 v12, v33
858; GFX9-NEXT:    v_mov_b32_e32 v14, v38
859; GFX9-NEXT:    s_setpc_b64 s[30:31]
860  %cast = addrspacecast <16 x ptr addrspace(5)> %ptr to <16 x ptr>
861  ret <16 x ptr> %cast
862}
863
864define <2 x ptr addrspace(3)> @addrspacecast_v2p0_to_v2p3(<2 x ptr> %ptr) {
865; HSA-LABEL: addrspacecast_v2p0_to_v2p3:
866; HSA:       ; %bb.0:
867; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
869; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
870; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
871; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
872; HSA-NEXT:    s_setpc_b64 s[30:31]
873  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(3)>
874  ret <2 x ptr addrspace(3)> %cast
875}
876
877define <3 x ptr addrspace(3)> @addrspacecast_v3p0_to_v3p3(<3 x ptr> %ptr) {
878; HSA-LABEL: addrspacecast_v3p0_to_v3p3:
879; HSA:       ; %bb.0:
880; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
882; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
883; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
884; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
885; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
886; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
887; HSA-NEXT:    s_setpc_b64 s[30:31]
888  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(3)>
889  ret <3 x ptr addrspace(3)> %cast
890}
891
892define <4 x ptr addrspace(3)> @addrspacecast_v4p0_to_v4p3(<4 x ptr> %ptr) {
893; HSA-LABEL: addrspacecast_v4p0_to_v4p3:
894; HSA:       ; %bb.0:
895; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
897; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
898; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
899; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
900; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
901; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
902; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
903; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
904; HSA-NEXT:    s_setpc_b64 s[30:31]
905  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(3)>
906  ret <4 x ptr addrspace(3)> %cast
907}
908
909define <8 x ptr addrspace(3)> @addrspacecast_v8p0_to_v8p3(<8 x ptr> %ptr) {
910; HSA-LABEL: addrspacecast_v8p0_to_v8p3:
911; HSA:       ; %bb.0:
912; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
914; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
915; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
916; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
917; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
918; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
919; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
920; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
921; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
922; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
923; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
924; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
925; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
926; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
927; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
928; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
929; HSA-NEXT:    s_setpc_b64 s[30:31]
930  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(3)>
931  ret <8 x ptr addrspace(3)> %cast
932}
933
934define <16 x ptr addrspace(3)> @addrspacecast_v16p0_to_v16p3(<16 x ptr> %ptr) {
935; HSA-LABEL: addrspacecast_v16p0_to_v16p3:
936; HSA:       ; %bb.0:
937; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
938; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
939; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
940; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
941; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
942; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
943; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
944; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
945; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
946; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
947; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
948; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
949; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
950; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
951; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
952; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
953; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
954; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
955; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
956; HSA-NEXT:    v_cndmask_b32_e32 v8, -1, v16, vcc
957; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
958; HSA-NEXT:    v_cndmask_b32_e32 v9, -1, v18, vcc
959; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
960; HSA-NEXT:    v_cndmask_b32_e32 v10, -1, v20, vcc
961; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[22:23]
962; HSA-NEXT:    v_cndmask_b32_e32 v11, -1, v22, vcc
963; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[24:25]
964; HSA-NEXT:    v_cndmask_b32_e32 v12, -1, v24, vcc
965; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[26:27]
966; HSA-NEXT:    v_cndmask_b32_e32 v13, -1, v26, vcc
967; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[28:29]
968; HSA-NEXT:    v_cndmask_b32_e32 v14, -1, v28, vcc
969; HSA-NEXT:    s_waitcnt vmcnt(0)
970; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[30:31]
971; HSA-NEXT:    v_cndmask_b32_e32 v15, -1, v30, vcc
972; HSA-NEXT:    s_setpc_b64 s[30:31]
973  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(3)>
974  ret <16 x ptr addrspace(3)> %cast
975}
976
977define <2 x ptr> @addrspacecast_v2p3_to_v2p0(<2 x ptr addrspace(3)> %ptr) {
978; CI-LABEL: addrspacecast_v2p3_to_v2p0:
979; CI:       ; %bb.0:
980; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
982; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
983; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
984; CI-NEXT:    s_waitcnt lgkmcnt(0)
985; CI-NEXT:    v_mov_b32_e32 v3, s4
986; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
987; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
988; CI-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
989; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
990; CI-NEXT:    v_mov_b32_e32 v1, v4
991; CI-NEXT:    s_setpc_b64 s[30:31]
992;
993; GFX9-LABEL: addrspacecast_v2p3_to_v2p0:
994; GFX9:       ; %bb.0:
995; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
997; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
998; GFX9-NEXT:    v_mov_b32_e32 v3, s5
999; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1000; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
1001; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1002; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
1003; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
1004; GFX9-NEXT:    v_mov_b32_e32 v1, v4
1005; GFX9-NEXT:    s_setpc_b64 s[30:31]
1006  %cast = addrspacecast <2 x ptr addrspace(3)> %ptr to <2 x ptr>
1007  ret <2 x ptr> %cast
1008}
1009
1010define <3 x ptr> @addrspacecast_v3p3_to_v3p0(<3 x ptr addrspace(3)> %ptr) {
1011; CI-LABEL: addrspacecast_v3p3_to_v3p0:
1012; CI:       ; %bb.0:
1013; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
1015; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1016; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1017; CI-NEXT:    s_waitcnt lgkmcnt(0)
1018; CI-NEXT:    v_mov_b32_e32 v5, s4
1019; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
1020; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1021; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
1022; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
1023; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1024; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
1025; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
1026; CI-NEXT:    v_mov_b32_e32 v1, v7
1027; CI-NEXT:    v_mov_b32_e32 v2, v6
1028; CI-NEXT:    s_setpc_b64 s[30:31]
1029;
1030; GFX9-LABEL: addrspacecast_v3p3_to_v3p0:
1031; GFX9:       ; %bb.0:
1032; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
1034; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1035; GFX9-NEXT:    v_mov_b32_e32 v5, s5
1036; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1037; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
1038; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1039; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
1040; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
1041; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1042; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
1043; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
1044; GFX9-NEXT:    v_mov_b32_e32 v1, v7
1045; GFX9-NEXT:    v_mov_b32_e32 v2, v6
1046; GFX9-NEXT:    s_setpc_b64 s[30:31]
1047  %cast = addrspacecast <3 x ptr addrspace(3)> %ptr to <3 x ptr>
1048  ret <3 x ptr> %cast
1049}
1050
1051define <4 x ptr> @addrspacecast_v4p3_to_v4p0(<4 x ptr addrspace(3)> %ptr) {
1052; CI-LABEL: addrspacecast_v4p3_to_v4p0:
1053; CI:       ; %bb.0:
1054; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
1056; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1057; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1058; CI-NEXT:    s_waitcnt lgkmcnt(0)
1059; CI-NEXT:    v_mov_b32_e32 v7, s4
1060; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
1061; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1062; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
1063; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
1064; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1065; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
1066; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
1067; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
1068; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
1069; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
1070; CI-NEXT:    v_mov_b32_e32 v1, v10
1071; CI-NEXT:    v_mov_b32_e32 v2, v8
1072; CI-NEXT:    v_mov_b32_e32 v3, v9
1073; CI-NEXT:    s_setpc_b64 s[30:31]
1074;
1075; GFX9-LABEL: addrspacecast_v4p3_to_v4p0:
1076; GFX9:       ; %bb.0:
1077; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1078; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
1079; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1080; GFX9-NEXT:    v_mov_b32_e32 v7, s5
1081; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1082; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
1083; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1084; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
1085; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
1086; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1087; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
1088; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
1089; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
1090; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
1091; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
1092; GFX9-NEXT:    v_mov_b32_e32 v1, v10
1093; GFX9-NEXT:    v_mov_b32_e32 v2, v8
1094; GFX9-NEXT:    v_mov_b32_e32 v3, v9
1095; GFX9-NEXT:    s_setpc_b64 s[30:31]
1096  %cast = addrspacecast <4 x ptr addrspace(3)> %ptr to <4 x ptr>
1097  ret <4 x ptr> %cast
1098}
1099
1100define <8 x ptr> @addrspacecast_v8p3_to_v8p0(<8 x ptr addrspace(3)> %ptr) {
1101; CI-LABEL: addrspacecast_v8p3_to_v8p0:
1102; CI:       ; %bb.0:
1103; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
1105; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1106; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1107; CI-NEXT:    s_waitcnt lgkmcnt(0)
1108; CI-NEXT:    v_mov_b32_e32 v15, s4
1109; CI-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
1110; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1111; CI-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
1112; CI-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
1113; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1114; CI-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
1115; CI-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
1116; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
1117; CI-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
1118; CI-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
1119; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
1120; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
1121; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
1122; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
1123; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
1124; CI-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
1125; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
1126; CI-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
1127; CI-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
1128; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
1129; CI-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
1130; CI-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
1131; CI-NEXT:    v_mov_b32_e32 v1, v22
1132; CI-NEXT:    v_mov_b32_e32 v2, v16
1133; CI-NEXT:    v_mov_b32_e32 v3, v17
1134; CI-NEXT:    v_mov_b32_e32 v4, v18
1135; CI-NEXT:    v_mov_b32_e32 v5, v19
1136; CI-NEXT:    v_mov_b32_e32 v6, v20
1137; CI-NEXT:    v_mov_b32_e32 v7, v21
1138; CI-NEXT:    s_setpc_b64 s[30:31]
1139;
1140; GFX9-LABEL: addrspacecast_v8p3_to_v8p0:
1141; GFX9:       ; %bb.0:
1142; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1143; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
1144; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1145; GFX9-NEXT:    v_mov_b32_e32 v15, s5
1146; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1147; GFX9-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
1148; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1149; GFX9-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
1150; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
1151; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1152; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
1153; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
1154; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
1155; GFX9-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
1156; GFX9-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
1157; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
1158; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
1159; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
1160; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
1161; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
1162; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
1163; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
1164; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
1165; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
1166; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
1167; GFX9-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
1168; GFX9-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
1169; GFX9-NEXT:    v_mov_b32_e32 v1, v22
1170; GFX9-NEXT:    v_mov_b32_e32 v2, v16
1171; GFX9-NEXT:    v_mov_b32_e32 v3, v17
1172; GFX9-NEXT:    v_mov_b32_e32 v4, v18
1173; GFX9-NEXT:    v_mov_b32_e32 v5, v19
1174; GFX9-NEXT:    v_mov_b32_e32 v6, v20
1175; GFX9-NEXT:    v_mov_b32_e32 v7, v21
1176; GFX9-NEXT:    s_setpc_b64 s[30:31]
1177  %cast = addrspacecast <8 x ptr addrspace(3)> %ptr to <8 x ptr>
1178  ret <8 x ptr> %cast
1179}
1180
1181define <16 x ptr> @addrspacecast_v16p3_to_v16p0(<16 x ptr addrspace(3)> %ptr) {
1182; CI-LABEL: addrspacecast_v16p3_to_v16p0:
1183; CI:       ; %bb.0:
1184; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
1186; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1187; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1188; CI-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v5
1189; CI-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v6
1190; CI-NEXT:    s_waitcnt lgkmcnt(0)
1191; CI-NEXT:    v_mov_b32_e32 v31, s4
1192; CI-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
1193; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1194; CI-NEXT:    v_cndmask_b32_e32 v34, 0, v1, vcc
1195; CI-NEXT:    v_cndmask_b32_e32 v39, 0, v31, vcc
1196; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1197; CI-NEXT:    v_cndmask_b32_e32 v35, 0, v2, vcc
1198; CI-NEXT:    v_cndmask_b32_e32 v32, 0, v31, vcc
1199; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
1200; CI-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v4
1201; CI-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v7
1202; CI-NEXT:    v_cndmask_b32_e32 v36, 0, v3, vcc
1203; CI-NEXT:    v_cndmask_b32_e64 v48, 0, v4, s[4:5]
1204; CI-NEXT:    v_cndmask_b32_e64 v37, 0, v5, s[6:7]
1205; CI-NEXT:    v_cndmask_b32_e64 v33, 0, v6, s[8:9]
1206; CI-NEXT:    v_cndmask_b32_e64 v38, 0, v7, s[10:11]
1207; CI-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v8
1208; CI-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v9
1209; CI-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v10
1210; CI-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v11
1211; CI-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v12
1212; CI-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v13
1213; CI-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v14
1214; CI-NEXT:    v_cmp_ne_u32_e64 s[26:27], -1, v15
1215; CI-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[12:13]
1216; CI-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[14:15]
1217; CI-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[16:17]
1218; CI-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[18:19]
1219; CI-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[20:21]
1220; CI-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[22:23]
1221; CI-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[24:25]
1222; CI-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[26:27]
1223; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v31, vcc
1224; CI-NEXT:    v_cndmask_b32_e64 v9, 0, v31, s[4:5]
1225; CI-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[6:7]
1226; CI-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[8:9]
1227; CI-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[10:11]
1228; CI-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[12:13]
1229; CI-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[14:15]
1230; CI-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[16:17]
1231; CI-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[18:19]
1232; CI-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[20:21]
1233; CI-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[22:23]
1234; CI-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[24:25]
1235; CI-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[26:27]
1236; CI-NEXT:    v_mov_b32_e32 v1, v49
1237; CI-NEXT:    v_mov_b32_e32 v2, v34
1238; CI-NEXT:    v_mov_b32_e32 v3, v39
1239; CI-NEXT:    v_mov_b32_e32 v4, v35
1240; CI-NEXT:    v_mov_b32_e32 v5, v32
1241; CI-NEXT:    v_mov_b32_e32 v6, v36
1242; CI-NEXT:    v_mov_b32_e32 v8, v48
1243; CI-NEXT:    v_mov_b32_e32 v10, v37
1244; CI-NEXT:    v_mov_b32_e32 v12, v33
1245; CI-NEXT:    v_mov_b32_e32 v14, v38
1246; CI-NEXT:    s_setpc_b64 s[30:31]
1247;
1248; GFX9-LABEL: addrspacecast_v16p3_to_v16p0:
1249; GFX9:       ; %bb.0:
1250; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1251; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
1252; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
1253; GFX9-NEXT:    v_mov_b32_e32 v31, s5
1254; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
1255; GFX9-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
1256; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
1257; GFX9-NEXT:    v_cndmask_b32_e32 v34, 0, v1, vcc
1258; GFX9-NEXT:    v_cndmask_b32_e32 v39, 0, v31, vcc
1259; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
1260; GFX9-NEXT:    v_cndmask_b32_e32 v35, 0, v2, vcc
1261; GFX9-NEXT:    v_cndmask_b32_e32 v32, 0, v31, vcc
1262; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
1263; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v4
1264; GFX9-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v5
1265; GFX9-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v6
1266; GFX9-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v7
1267; GFX9-NEXT:    v_cndmask_b32_e32 v36, 0, v3, vcc
1268; GFX9-NEXT:    v_cndmask_b32_e64 v48, 0, v4, s[4:5]
1269; GFX9-NEXT:    v_cndmask_b32_e64 v37, 0, v5, s[6:7]
1270; GFX9-NEXT:    v_cndmask_b32_e64 v33, 0, v6, s[8:9]
1271; GFX9-NEXT:    v_cndmask_b32_e64 v38, 0, v7, s[10:11]
1272; GFX9-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v8
1273; GFX9-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v9
1274; GFX9-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v10
1275; GFX9-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v11
1276; GFX9-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v12
1277; GFX9-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v13
1278; GFX9-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v14
1279; GFX9-NEXT:    v_cmp_ne_u32_e64 s[26:27], -1, v15
1280; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[12:13]
1281; GFX9-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[14:15]
1282; GFX9-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[16:17]
1283; GFX9-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[18:19]
1284; GFX9-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[20:21]
1285; GFX9-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[22:23]
1286; GFX9-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[24:25]
1287; GFX9-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[26:27]
1288; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v31, vcc
1289; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, v31, s[4:5]
1290; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[6:7]
1291; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[8:9]
1292; GFX9-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[10:11]
1293; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[12:13]
1294; GFX9-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[14:15]
1295; GFX9-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[16:17]
1296; GFX9-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[18:19]
1297; GFX9-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[20:21]
1298; GFX9-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[22:23]
1299; GFX9-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[24:25]
1300; GFX9-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[26:27]
1301; GFX9-NEXT:    v_mov_b32_e32 v1, v49
1302; GFX9-NEXT:    v_mov_b32_e32 v2, v34
1303; GFX9-NEXT:    v_mov_b32_e32 v3, v39
1304; GFX9-NEXT:    v_mov_b32_e32 v4, v35
1305; GFX9-NEXT:    v_mov_b32_e32 v5, v32
1306; GFX9-NEXT:    v_mov_b32_e32 v6, v36
1307; GFX9-NEXT:    v_mov_b32_e32 v8, v48
1308; GFX9-NEXT:    v_mov_b32_e32 v10, v37
1309; GFX9-NEXT:    v_mov_b32_e32 v12, v33
1310; GFX9-NEXT:    v_mov_b32_e32 v14, v38
1311; GFX9-NEXT:    s_setpc_b64 s[30:31]
1312  %cast = addrspacecast <16 x ptr addrspace(3)> %ptr to <16 x ptr>
1313  ret <16 x ptr> %cast
1314}
1315
1316define <2 x ptr addrspace(1)> @addrspacecast_v2p0_to_v2p1(<2 x ptr> %ptr) {
1317; HSA-LABEL: addrspacecast_v2p0_to_v2p1:
1318; HSA:       ; %bb.0:
1319; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320; HSA-NEXT:    s_setpc_b64 s[30:31]
1321  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(1)>
1322  ret <2 x ptr addrspace(1)> %cast
1323}
1324
1325define <3 x ptr addrspace(1)> @addrspacecast_v3p0_to_v3p1(<3 x ptr> %ptr) {
1326; HSA-LABEL: addrspacecast_v3p0_to_v3p1:
1327; HSA:       ; %bb.0:
1328; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329; HSA-NEXT:    s_setpc_b64 s[30:31]
1330  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(1)>
1331  ret <3 x ptr addrspace(1)> %cast
1332}
1333
1334define <4 x ptr addrspace(1)> @addrspacecast_v4p0_to_v4p1(<4 x ptr> %ptr) {
1335; HSA-LABEL: addrspacecast_v4p0_to_v4p1:
1336; HSA:       ; %bb.0:
1337; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1338; HSA-NEXT:    s_setpc_b64 s[30:31]
1339  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(1)>
1340  ret <4 x ptr addrspace(1)> %cast
1341}
1342
1343define <8 x ptr addrspace(1)> @addrspacecast_v8p0_to_v8p1(<8 x ptr> %ptr) {
1344; HSA-LABEL: addrspacecast_v8p0_to_v8p1:
1345; HSA:       ; %bb.0:
1346; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347; HSA-NEXT:    s_setpc_b64 s[30:31]
1348  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(1)>
1349  ret <8 x ptr addrspace(1)> %cast
1350}
1351
1352define <16 x ptr addrspace(1)> @addrspacecast_v16p0_to_v16p1(<16 x ptr> %ptr) {
1353; HSA-LABEL: addrspacecast_v16p0_to_v16p1:
1354; HSA:       ; %bb.0:
1355; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
1357; HSA-NEXT:    s_waitcnt vmcnt(0)
1358; HSA-NEXT:    s_setpc_b64 s[30:31]
1359  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(1)>
1360  ret <16 x ptr addrspace(1)> %cast
1361}
1362
1363define <2 x ptr> @addrspacecast_v2p1_to_v2p0(<2 x ptr addrspace(1)> %ptr) {
1364; HSA-LABEL: addrspacecast_v2p1_to_v2p0:
1365; HSA:       ; %bb.0:
1366; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; HSA-NEXT:    s_setpc_b64 s[30:31]
1368  %cast = addrspacecast <2 x ptr addrspace(1)> %ptr to <2 x ptr>
1369  ret <2 x ptr> %cast
1370}
1371
1372define <1 x ptr> @addrspacecast_v1p1_to_v1p0(<1 x ptr addrspace(1)> %ptr) {
1373; HSA-LABEL: addrspacecast_v1p1_to_v1p0:
1374; HSA:       ; %bb.0:
1375; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1376; HSA-NEXT:    s_setpc_b64 s[30:31]
1377  %cast = addrspacecast <1 x ptr addrspace(1)> %ptr to <1 x ptr>
1378  ret <1 x ptr> %cast
1379}
1380
1381define <4 x ptr> @addrspacecast_v4p1_to_v4p0(<4 x ptr addrspace(1)> %ptr) {
1382; HSA-LABEL: addrspacecast_v4p1_to_v4p0:
1383; HSA:       ; %bb.0:
1384; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385; HSA-NEXT:    s_setpc_b64 s[30:31]
1386  %cast = addrspacecast <4 x ptr addrspace(1)> %ptr to <4 x ptr>
1387  ret <4 x ptr> %cast
1388}
1389
1390define <8 x ptr> @addrspacecast_v8p1_to_v8p0(<8 x ptr addrspace(1)> %ptr) {
1391; HSA-LABEL: addrspacecast_v8p1_to_v8p0:
1392; HSA:       ; %bb.0:
1393; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394; HSA-NEXT:    s_setpc_b64 s[30:31]
1395  %cast = addrspacecast <8 x ptr addrspace(1)> %ptr to <8 x ptr>
1396  ret <8 x ptr> %cast
1397}
1398
1399define <16 x ptr> @addrspacecast_v16p1_to_v16p0(<16 x ptr addrspace(1)> %ptr) {
1400; HSA-LABEL: addrspacecast_v16p1_to_v16p0:
1401; HSA:       ; %bb.0:
1402; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
1404; HSA-NEXT:    s_waitcnt vmcnt(0)
1405; HSA-NEXT:    s_setpc_b64 s[30:31]
1406  %cast = addrspacecast <16 x ptr addrspace(1)> %ptr to <16 x ptr>
1407  ret <16 x ptr> %cast
1408}
1409
1410define <2 x ptr addrspace(6)> @addrspacecast_v2p0_to_v2p6(<2 x ptr> %ptr) {
1411; HSA-LABEL: addrspacecast_v2p0_to_v2p6:
1412; HSA:       ; %bb.0:
1413; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414; HSA-NEXT:    v_mov_b32_e32 v1, v2
1415; HSA-NEXT:    s_setpc_b64 s[30:31]
1416  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(6)>
1417  ret <2 x ptr addrspace(6)> %cast
1418}
1419
1420define <3 x ptr addrspace(6)> @addrspacecast_v3p0_to_v3p6(<3 x ptr> %ptr) {
1421; HSA-LABEL: addrspacecast_v3p0_to_v3p6:
1422; HSA:       ; %bb.0:
1423; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424; HSA-NEXT:    v_mov_b32_e32 v1, v2
1425; HSA-NEXT:    v_mov_b32_e32 v2, v4
1426; HSA-NEXT:    s_setpc_b64 s[30:31]
1427  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(6)>
1428  ret <3 x ptr addrspace(6)> %cast
1429}
1430
1431define <4 x ptr addrspace(6)> @addrspacecast_v4p0_to_v4p6(<4 x ptr> %ptr) {
1432; HSA-LABEL: addrspacecast_v4p0_to_v4p6:
1433; HSA:       ; %bb.0:
1434; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435; HSA-NEXT:    v_mov_b32_e32 v3, v6
1436; HSA-NEXT:    v_mov_b32_e32 v1, v2
1437; HSA-NEXT:    v_mov_b32_e32 v2, v4
1438; HSA-NEXT:    s_setpc_b64 s[30:31]
1439  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(6)>
1440  ret <4 x ptr addrspace(6)> %cast
1441}
1442
1443define <8 x ptr addrspace(6)> @addrspacecast_v8p0_to_v8p6(<8 x ptr> %ptr) {
1444; HSA-LABEL: addrspacecast_v8p0_to_v8p6:
1445; HSA:       ; %bb.0:
1446; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1447; HSA-NEXT:    v_mov_b32_e32 v7, v14
1448; HSA-NEXT:    v_mov_b32_e32 v5, v10
1449; HSA-NEXT:    v_mov_b32_e32 v3, v6
1450; HSA-NEXT:    v_mov_b32_e32 v1, v2
1451; HSA-NEXT:    v_mov_b32_e32 v2, v4
1452; HSA-NEXT:    v_mov_b32_e32 v4, v8
1453; HSA-NEXT:    v_mov_b32_e32 v6, v12
1454; HSA-NEXT:    s_setpc_b64 s[30:31]
1455  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(6)>
1456  ret <8 x ptr addrspace(6)> %cast
1457}
1458
1459define <16 x ptr addrspace(6)> @addrspacecast_v16p0_to_v16p6(<16 x ptr> %ptr) {
1460; HSA-LABEL: addrspacecast_v16p0_to_v16p6:
1461; HSA:       ; %bb.0:
1462; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1463; HSA-NEXT:    v_mov_b32_e32 v15, v30
1464; HSA-NEXT:    v_mov_b32_e32 v13, v26
1465; HSA-NEXT:    v_mov_b32_e32 v11, v22
1466; HSA-NEXT:    v_mov_b32_e32 v9, v18
1467; HSA-NEXT:    v_mov_b32_e32 v7, v14
1468; HSA-NEXT:    v_mov_b32_e32 v5, v10
1469; HSA-NEXT:    v_mov_b32_e32 v3, v6
1470; HSA-NEXT:    v_mov_b32_e32 v1, v2
1471; HSA-NEXT:    v_mov_b32_e32 v2, v4
1472; HSA-NEXT:    v_mov_b32_e32 v4, v8
1473; HSA-NEXT:    v_mov_b32_e32 v6, v12
1474; HSA-NEXT:    v_mov_b32_e32 v8, v16
1475; HSA-NEXT:    v_mov_b32_e32 v10, v20
1476; HSA-NEXT:    v_mov_b32_e32 v12, v24
1477; HSA-NEXT:    v_mov_b32_e32 v14, v28
1478; HSA-NEXT:    s_setpc_b64 s[30:31]
1479  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(6)>
1480  ret <16 x ptr addrspace(6)> %cast
1481}
1482
1483define <2 x ptr> @addrspacecast_v2p6_to_v2p0(<2 x ptr addrspace(6)> %ptr) {
1484; HSA-LABEL: addrspacecast_v2p6_to_v2p0:
1485; HSA:       ; %bb.0:
1486; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1487; HSA-NEXT:    v_mov_b32_e32 v2, v1
1488; HSA-NEXT:    v_mov_b32_e32 v1, 0
1489; HSA-NEXT:    v_mov_b32_e32 v3, 0
1490; HSA-NEXT:    s_setpc_b64 s[30:31]
1491  %cast = addrspacecast <2 x ptr addrspace(6)> %ptr to <2 x ptr>
1492  ret <2 x ptr> %cast
1493}
1494
1495define <1 x ptr> @addrspacecast_v1p6_to_v1p0(<1 x ptr addrspace(6)> %ptr) {
1496; HSA-LABEL: addrspacecast_v1p6_to_v1p0:
1497; HSA:       ; %bb.0:
1498; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499; HSA-NEXT:    v_mov_b32_e32 v1, 0
1500; HSA-NEXT:    s_setpc_b64 s[30:31]
1501  %cast = addrspacecast <1 x ptr addrspace(6)> %ptr to <1 x ptr>
1502  ret <1 x ptr> %cast
1503}
1504
1505define <4 x ptr> @addrspacecast_v4p6_to_v4p0(<4 x ptr addrspace(6)> %ptr) {
1506; HSA-LABEL: addrspacecast_v4p6_to_v4p0:
1507; HSA:       ; %bb.0:
1508; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509; HSA-NEXT:    v_mov_b32_e32 v6, v3
1510; HSA-NEXT:    v_mov_b32_e32 v4, v2
1511; HSA-NEXT:    v_mov_b32_e32 v2, v1
1512; HSA-NEXT:    v_mov_b32_e32 v1, 0
1513; HSA-NEXT:    v_mov_b32_e32 v3, 0
1514; HSA-NEXT:    v_mov_b32_e32 v5, 0
1515; HSA-NEXT:    v_mov_b32_e32 v7, 0
1516; HSA-NEXT:    s_setpc_b64 s[30:31]
1517  %cast = addrspacecast <4 x ptr addrspace(6)> %ptr to <4 x ptr>
1518  ret <4 x ptr> %cast
1519}
1520
1521define <8 x ptr> @addrspacecast_v8p6_to_v8p0(<8 x ptr addrspace(6)> %ptr) {
1522; HSA-LABEL: addrspacecast_v8p6_to_v8p0:
1523; HSA:       ; %bb.0:
1524; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525; HSA-NEXT:    v_mov_b32_e32 v14, v7
1526; HSA-NEXT:    v_mov_b32_e32 v12, v6
1527; HSA-NEXT:    v_mov_b32_e32 v10, v5
1528; HSA-NEXT:    v_mov_b32_e32 v8, v4
1529; HSA-NEXT:    v_mov_b32_e32 v6, v3
1530; HSA-NEXT:    v_mov_b32_e32 v4, v2
1531; HSA-NEXT:    v_mov_b32_e32 v2, v1
1532; HSA-NEXT:    v_mov_b32_e32 v1, 0
1533; HSA-NEXT:    v_mov_b32_e32 v3, 0
1534; HSA-NEXT:    v_mov_b32_e32 v5, 0
1535; HSA-NEXT:    v_mov_b32_e32 v7, 0
1536; HSA-NEXT:    v_mov_b32_e32 v9, 0
1537; HSA-NEXT:    v_mov_b32_e32 v11, 0
1538; HSA-NEXT:    v_mov_b32_e32 v13, 0
1539; HSA-NEXT:    v_mov_b32_e32 v15, 0
1540; HSA-NEXT:    s_setpc_b64 s[30:31]
1541  %cast = addrspacecast <8 x ptr addrspace(6)> %ptr to <8 x ptr>
1542  ret <8 x ptr> %cast
1543}
1544
1545define <16 x ptr> @addrspacecast_v16p6_to_v16p0(<16 x ptr addrspace(6)> %ptr) {
1546; HSA-LABEL: addrspacecast_v16p6_to_v16p0:
1547; HSA:       ; %bb.0:
1548; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1549; HSA-NEXT:    v_mov_b32_e32 v28, v14
1550; HSA-NEXT:    v_mov_b32_e32 v24, v12
1551; HSA-NEXT:    v_mov_b32_e32 v20, v10
1552; HSA-NEXT:    v_mov_b32_e32 v16, v8
1553; HSA-NEXT:    v_mov_b32_e32 v14, v7
1554; HSA-NEXT:    v_mov_b32_e32 v12, v6
1555; HSA-NEXT:    v_mov_b32_e32 v10, v5
1556; HSA-NEXT:    v_mov_b32_e32 v8, v4
1557; HSA-NEXT:    v_mov_b32_e32 v6, v3
1558; HSA-NEXT:    v_mov_b32_e32 v4, v2
1559; HSA-NEXT:    v_mov_b32_e32 v2, v1
1560; HSA-NEXT:    v_mov_b32_e32 v1, 0
1561; HSA-NEXT:    v_mov_b32_e32 v3, 0
1562; HSA-NEXT:    v_mov_b32_e32 v5, 0
1563; HSA-NEXT:    v_mov_b32_e32 v7, 0
1564; HSA-NEXT:    v_mov_b32_e32 v18, v9
1565; HSA-NEXT:    v_mov_b32_e32 v22, v11
1566; HSA-NEXT:    v_mov_b32_e32 v26, v13
1567; HSA-NEXT:    v_mov_b32_e32 v30, v15
1568; HSA-NEXT:    v_mov_b32_e32 v9, 0
1569; HSA-NEXT:    v_mov_b32_e32 v11, 0
1570; HSA-NEXT:    v_mov_b32_e32 v13, 0
1571; HSA-NEXT:    v_mov_b32_e32 v15, 0
1572; HSA-NEXT:    v_mov_b32_e32 v17, 0
1573; HSA-NEXT:    v_mov_b32_e32 v19, 0
1574; HSA-NEXT:    v_mov_b32_e32 v21, 0
1575; HSA-NEXT:    v_mov_b32_e32 v23, 0
1576; HSA-NEXT:    v_mov_b32_e32 v25, 0
1577; HSA-NEXT:    v_mov_b32_e32 v27, 0
1578; HSA-NEXT:    v_mov_b32_e32 v29, 0
1579; HSA-NEXT:    v_mov_b32_e32 v31, 0
1580; HSA-NEXT:    s_setpc_b64 s[30:31]
1581  %cast = addrspacecast <16 x ptr addrspace(6)> %ptr to <16 x ptr>
1582  ret <16 x ptr> %cast
1583}
1584
1585declare void @llvm.amdgcn.s.barrier() #1
1586declare i32 @llvm.amdgcn.workitem.id.x() #2
1587
1588attributes #0 = { nounwind }
1589attributes #1 = { nounwind convergent }
1590attributes #2 = { nounwind readnone }
1591attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
1592
1593!llvm.module.flags = !{!0}
1594!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
1595