1; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri -mattr=-promote-alloca < %s | llc | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s 2; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 -mattr=-promote-alloca < %s | llc | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s 3 4target triple = "amdgcn-amd-amdhsa" 5 6; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: 7 8; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} 9; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} 10; CI-DAG: s_cmp_lg_u32 [[PTR]], -1 11; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0 12; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0 13 14; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base 15 16; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 17; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} 18 19; GFX9: s_cmp_lg_u32 [[PTR]], -1 20; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0 21; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0 22 23; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] 24 25; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 26; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 27; CI: .amdhsa_user_sgpr_queue_ptr 1 28; GFX9: .amdhsa_user_sgpr_queue_ptr 0 29 30; At most 2 digits. Make sure src_shared_base is not counted as a high 31; number SGPR. 32 33; HSA: NumSgprs: {{[0-9]+}} 34define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #0 { 35 %stof = addrspacecast ptr addrspace(3) %ptr to ptr 36 store volatile i32 7, ptr %stof 37 ret void 38} 39 40; Test handling inside a non-kernel 41; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast_func: 42; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x10{{$}} 43; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] 44; CI-DAG: v_cmp_ne_u32_e32 vcc, -1, v0 45; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc 46; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0 47 48; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base 49 50; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 51 52; GFX9-DAG: v_mov_b32_e32 v[[VREG_HIBASE:[0-9]+]], s[[HIBASE]] 53; GFX9-DAG: v_cmp_ne_u32_e32 vcc, -1, v0 54; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc 55; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, v[[VREG_HIBASE]], vcc 56 57; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] 58define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 { 59 %stof = addrspacecast ptr addrspace(3) %ptr to ptr 60 store volatile i32 7, ptr %stof 61 ret void 62} 63 64; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast: 65 66; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} 67; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}} 68 69; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 70; CI-DAG: s_cmp_lg_u32 [[PTR]], -1 71; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0 72; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0 73 74; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} 75; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_private_base 76 77; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 78; GFX9: s_cmp_lg_u32 [[PTR]], -1 79; GFX9: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0 80; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0 81 82; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]] 83 84; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 85; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 86; CI: .amdhsa_user_sgpr_queue_ptr 1 87; GFX9: .amdhsa_user_sgpr_queue_ptr 0 88 89; HSA: NumSgprs: {{[0-9]+}} 90define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { 91 %stof = addrspacecast ptr addrspace(5) %ptr to ptr 92 store volatile i32 7, ptr %stof 93 ret void 94} 95 96; no-op 97; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast: 98 99; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]] 100; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] 101; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] 102; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 103; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]] 104 105; HSA: .amdhsa_user_sgpr_queue_ptr 0 106define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 { 107 %stof = addrspacecast ptr addrspace(1) %ptr to ptr 108 store volatile i32 7, ptr %stof 109 ret void 110} 111 112; no-op 113; HSA-LABEL: {{^}}use_constant_to_flat_addrspacecast: 114; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]] 115; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] 116; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] 117; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]] 118define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 { 119 %stof = addrspacecast ptr addrspace(4) %ptr to ptr 120 %ld = load volatile i32, ptr %stof 121 ret void 122} 123 124; HSA-LABEL: {{^}}use_constant_to_global_addrspacecast: 125; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]] 126; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] 127; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] 128; CI: {{flat|global}}_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]] 129 130; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} 131; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]] 132define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) #0 { 133 %stof = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(1) 134 %ld = load volatile i32, ptr addrspace(1) %stof 135 ret void 136} 137 138; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast: 139 140; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]] 141; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} 142; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec 143; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1 144; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]] 145; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} 146; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0 147; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1 148; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] 149; CI-DAG: ds_write_b32 [[VCASTPTR]], v[[K]] 150; GFX9-DAG: ds_write_b32 [[CASTPTR]], v[[K]] 151 152; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 153; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 154; HSA: .amdhsa_user_sgpr_queue_ptr 0 155define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 { 156 %ftos = addrspacecast ptr %ptr to ptr addrspace(3) 157 store volatile i32 0, ptr addrspace(3) %ftos 158 ret void 159} 160 161; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast: 162 163; HSA: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]] 164; CI-DAG v_cmp_ne_u64_e64 vcc, s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} 165; CI-DAG v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] 166; CI-DAG v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] 167; CI-DAG: v_cmp_ne_u64_e64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], s[[[PTR_LO]]:[[PTR_HI]]], 0{{$}} 168; CI-DAG: s_and_b64 s{{[[0-9]+:[0-9]+]}}, s[[[CMP_LO]]:[[CMP_HI]]], exec 169; CI-DAG: s_cselect_b32 [[CASTPTR:s[0-9]+]], s[[PTR_LO]], -1 170; CI-DAG: v_mov_b32_e32 [[VCASTPTR:v[0-9]+]], [[CASTPTR]] 171; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} 172; GFX9-DAG: s_cmp_lg_u64 s[[[CMP_LO:[0-9]+]]:[[CMP_HI:[0-9]+]]], 0 173; GFX9-DAG: s_cselect_b32 s[[PTR_LO]], s[[PTR_LO]], -1 174; GFX9-DAG: v_mov_b32_e32 [[CASTPTR:v[0-9]+]], s[[PTR_LO]] 175; CI: buffer_store_dword v[[K]], [[VCASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} 176; GFX9: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} 177 178; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 179; HSA: .amdhsa_user_sgpr_dispatch_ptr 0 180; HSA: .amdhsa_user_sgpr_queue_ptr 0 181define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 { 182 %ftos = addrspacecast ptr %ptr to ptr addrspace(5) 183 store volatile i32 0, ptr addrspace(5) %ftos 184 ret void 185} 186 187; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast: 188 189; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0 190; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] 191; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] 192; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0 193; CI: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]] 194 195; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 196; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}} 197 198; HSA: .amdhsa_user_sgpr_queue_ptr 0 199define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 { 200 %ftos = addrspacecast ptr %ptr to ptr addrspace(1) 201 store volatile i32 0, ptr addrspace(1) %ftos 202 ret void 203} 204 205; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast: 206 207; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]], s[4:5], 0x0 208; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0 209 210; HSA: .amdhsa_user_sgpr_queue_ptr 0 211define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 { 212 %ftos = addrspacecast ptr %ptr to ptr addrspace(4) 213 load volatile i32, ptr addrspace(4) %ftos 214 ret void 215} 216 217; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast: 218 219; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 220; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 221; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} 222; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]] 223define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 { 224 %cast = addrspacecast ptr addrspace(3) null to ptr 225 store volatile i32 7, ptr %cast 226 ret void 227} 228 229; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast: 230; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} 231; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} 232; HSA: ds_write_b32 [[PTR]], [[K]] 233define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 { 234 %cast = addrspacecast ptr null to ptr addrspace(3) 235 store volatile i32 7, ptr addrspace(3) %cast 236 ret void 237} 238 239; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast: 240; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 241; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} 242; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 243; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]] 244define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 { 245 %cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr 246 store volatile i32 7, ptr %cast 247 ret void 248} 249 250; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast: 251; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} 252; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} 253; HSA: ds_write_b32 [[PTR]], [[K]] 254define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { 255 %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(3) 256 store volatile i32 7, ptr addrspace(3) %cast 257 ret void 258} 259 260; FIXME: Shouldn't need to enable queue ptr 261; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: 262; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 263; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 264; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} 265; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]] 266define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { 267 %cast = addrspacecast ptr addrspace(5) null to ptr 268 store volatile i32 7, ptr %cast 269 ret void 270} 271 272; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: 273; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} 274; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} 275; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 276define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { 277 %cast = addrspacecast ptr null to ptr addrspace(5) 278 store volatile i32 7, ptr addrspace(5) %cast 279 ret void 280} 281 282; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast: 283 284; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} 285; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} 286; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 287; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]] 288 289; CI: .amdhsa_user_sgpr_queue_ptr 1 290; GFX9: .amdhsa_user_sgpr_queue_ptr 0 291define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 { 292 %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr 293 store volatile i32 7, ptr %cast 294 ret void 295} 296 297; HSA-LABEL: {{^}}cast_neg1_flat_to_private_addrspacecast: 298; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} 299; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} 300; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 301define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 { 302 %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(5) 303 store volatile i32 7, ptr addrspace(5) %cast 304 ret void 305} 306 307 308; Disable optimizations in case there are optimizations added that 309; specialize away generic pointer accesses. 310 311; HSA-LABEL: {{^}}branch_use_flat_i32: 312; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} 313; HSA: s_endpgm 314define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) #0 { 315entry: 316 %cmp = icmp ne i32 %c, 0 317 br i1 %cmp, label %local, label %global 318 319local: 320 %flat_local = addrspacecast ptr addrspace(3) %lptr to ptr 321 br label %end 322 323global: 324 %flat_global = addrspacecast ptr addrspace(1) %gptr to ptr 325 br label %end 326 327end: 328 %fptr = phi ptr [ %flat_local, %local ], [ %flat_global, %global ] 329 store volatile i32 %x, ptr %fptr, align 4 330; %val = load i32, ptr %fptr, align 4 331; store i32 %val, ptr addrspace(1) %out, align 4 332 ret void 333} 334 335; Check for prologue initializing special SGPRs pointing to scratch. 336; HSA-LABEL: {{^}}store_flat_scratch: 337; CI-DAG: s_mov_b32 flat_scratch_lo, s9 338; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11 339; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 340 341; GFX9: s_add_u32 flat_scratch_lo, s6, s9 342; GFX9: s_addc_u32 flat_scratch_hi, s7, 0 343 344; HSA: {{flat|global}}_store_dword 345; HSA: s_barrier 346; HSA: {{flat|global}}_load_dword 347define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 { 348 %alloca = alloca i32, i32 9, align 4, addrspace(5) 349 %x = call i32 @llvm.amdgcn.workitem.id.x() #2 350 %pptr = getelementptr i32, ptr addrspace(5) %alloca, i32 %x 351 %fptr = addrspacecast ptr addrspace(5) %pptr to ptr 352 store volatile i32 %x, ptr %fptr 353 ; Dummy call 354 call void @llvm.amdgcn.s.barrier() #1 355 %reload = load volatile i32, ptr %fptr, align 4 356 store volatile i32 %reload, ptr addrspace(1) %out, align 4 357 ret void 358} 359 360; HSA-LABEL: {{^}}use_constant_to_constant32_addrspacecast 361; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}} 362; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}} 363; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}} 364; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}} 365; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]] 366; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}} 367define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 { 368 %ptr = load volatile ptr addrspace(4), ptr addrspace(4) %ptr.ptr 369 %addrspacecast = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(6) 370 %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset 371 %load = load volatile i32, ptr addrspace(6) %gep, align 4 372 ret void 373} 374 375; HSA-LABEL: {{^}}use_global_to_constant32_addrspacecast 376; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}} 377; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}} 378; GFX9: s_load_dwordx2 s[[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]], [[PTRPTR]], 0x0{{$}} 379; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}} 380; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]] 381; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}} 382define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 { 383 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) %ptr.ptr 384 %addrspacecast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(6) 385 %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset 386 %load = load volatile i32, ptr addrspace(6) %gep, align 4 387 ret void 388} 389 390; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_0: 391; GCN: s_load_dword [[PTR:s[0-9]+]], 392; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0 393; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]] 394; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]] 395define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) #0 { 396 %stof = addrspacecast ptr addrspace(6) %ptr to ptr 397 %load = load volatile i32, ptr %stof 398 ret void 399} 400 401; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_1: 402; GCN: s_load_dword [[PTR:s[0-9]+]], 403; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000 404; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]] 405; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]] 406define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #3 { 407 %stof = addrspacecast ptr addrspace(6) %ptr to ptr 408 %load = load volatile i32, ptr %stof 409 ret void 410} 411 412define <2 x ptr addrspace(5)> @addrspacecast_v2p0_to_v2p5(<2 x ptr> %ptr) { 413; HSA-LABEL: addrspacecast_v2p0_to_v2p5: 414; HSA: ; %bb.0: 415; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 417; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 418; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 419; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 420; HSA-NEXT: s_setpc_b64 s[30:31] 421 %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(5)> 422 ret <2 x ptr addrspace(5)> %cast 423} 424 425define <3 x ptr addrspace(5)> @addrspacecast_v3p0_to_v3p5(<3 x ptr> %ptr) { 426; HSA-LABEL: addrspacecast_v3p0_to_v3p5: 427; HSA: ; %bb.0: 428; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 430; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 431; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 432; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 433; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 434; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 435; HSA-NEXT: s_setpc_b64 s[30:31] 436 %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(5)> 437 ret <3 x ptr addrspace(5)> %cast 438} 439 440define <4 x ptr addrspace(5)> @addrspacecast_v4p0_to_v4p5(<4 x ptr> %ptr) { 441; HSA-LABEL: addrspacecast_v4p0_to_v4p5: 442; HSA: ; %bb.0: 443; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 444; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 445; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 446; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 447; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 448; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 449; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 450; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 451; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc 452; HSA-NEXT: s_setpc_b64 s[30:31] 453 %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(5)> 454 ret <4 x ptr addrspace(5)> %cast 455} 456 457define <8 x ptr addrspace(5)> @addrspacecast_v8p0_to_v8p5(<8 x ptr> %ptr) { 458; HSA-LABEL: addrspacecast_v8p0_to_v8p5: 459; HSA: ; %bb.0: 460; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 461; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 462; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 463; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 464; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 465; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 466; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 467; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 468; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc 469; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 470; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc 471; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 472; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc 473; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] 474; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc 475; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 476; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc 477; HSA-NEXT: s_setpc_b64 s[30:31] 478 %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(5)> 479 ret <8 x ptr addrspace(5)> %cast 480} 481 482define <16 x ptr addrspace(5)> @addrspacecast_v16p0_to_v16p5(<16 x ptr> %ptr) { 483; HSA-LABEL: addrspacecast_v16p0_to_v16p5: 484; HSA: ; %bb.0: 485; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 487; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 488; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 489; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 490; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 491; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 492; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 493; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 494; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc 495; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 496; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc 497; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 498; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc 499; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] 500; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc 501; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 502; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc 503; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] 504; HSA-NEXT: v_cndmask_b32_e32 v8, -1, v16, vcc 505; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] 506; HSA-NEXT: v_cndmask_b32_e32 v9, -1, v18, vcc 507; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] 508; HSA-NEXT: v_cndmask_b32_e32 v10, -1, v20, vcc 509; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[22:23] 510; HSA-NEXT: v_cndmask_b32_e32 v11, -1, v22, vcc 511; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[24:25] 512; HSA-NEXT: v_cndmask_b32_e32 v12, -1, v24, vcc 513; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[26:27] 514; HSA-NEXT: v_cndmask_b32_e32 v13, -1, v26, vcc 515; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[28:29] 516; HSA-NEXT: v_cndmask_b32_e32 v14, -1, v28, vcc 517; HSA-NEXT: s_waitcnt vmcnt(0) 518; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[30:31] 519; HSA-NEXT: v_cndmask_b32_e32 v15, -1, v30, vcc 520; HSA-NEXT: s_setpc_b64 s[30:31] 521 %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(5)> 522 ret <16 x ptr addrspace(5)> %cast 523} 524 525define <2 x ptr> @addrspacecast_v2p5_to_v2p0(<2 x ptr addrspace(5)> %ptr) { 526; CI-LABEL: addrspacecast_v2p5_to_v2p0: 527; CI: ; %bb.0: 528; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 529; CI-NEXT: s_load_dword s4, s[6:7], 0x11 530; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 531; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 532; CI-NEXT: s_waitcnt lgkmcnt(0) 533; CI-NEXT: v_mov_b32_e32 v3, s4 534; CI-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc 535; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 536; CI-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 537; CI-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 538; CI-NEXT: v_mov_b32_e32 v1, v4 539; CI-NEXT: s_setpc_b64 s[30:31] 540; 541; GFX9-LABEL: addrspacecast_v2p5_to_v2p0: 542; GFX9: ; %bb.0: 543; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 544; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base 545; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 546; GFX9-NEXT: v_mov_b32_e32 v3, s5 547; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 548; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc 549; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 550; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 551; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 552; GFX9-NEXT: v_mov_b32_e32 v1, v4 553; GFX9-NEXT: s_setpc_b64 s[30:31] 554 %cast = addrspacecast <2 x ptr addrspace(5)> %ptr to <2 x ptr> 555 ret <2 x ptr> %cast 556} 557 558define <3 x ptr> @addrspacecast_v3p5_to_v3p0(<3 x ptr addrspace(5)> %ptr) { 559; CI-LABEL: addrspacecast_v3p5_to_v3p0: 560; CI: ; %bb.0: 561; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562; CI-NEXT: s_load_dword s4, s[6:7], 0x11 563; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 564; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 565; CI-NEXT: s_waitcnt lgkmcnt(0) 566; CI-NEXT: v_mov_b32_e32 v5, s4 567; CI-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc 568; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 569; CI-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc 570; CI-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 571; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 572; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 573; CI-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc 574; CI-NEXT: v_mov_b32_e32 v1, v7 575; CI-NEXT: v_mov_b32_e32 v2, v6 576; CI-NEXT: s_setpc_b64 s[30:31] 577; 578; GFX9-LABEL: addrspacecast_v3p5_to_v3p0: 579; GFX9: ; %bb.0: 580; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base 582; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 583; GFX9-NEXT: v_mov_b32_e32 v5, s5 584; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 585; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc 586; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 587; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc 588; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 589; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 590; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 591; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc 592; GFX9-NEXT: v_mov_b32_e32 v1, v7 593; GFX9-NEXT: v_mov_b32_e32 v2, v6 594; GFX9-NEXT: s_setpc_b64 s[30:31] 595 %cast = addrspacecast <3 x ptr addrspace(5)> %ptr to <3 x ptr> 596 ret <3 x ptr> %cast 597} 598 599define <4 x ptr> @addrspacecast_v4p5_to_v4p0(<4 x ptr addrspace(5)> %ptr) { 600; CI-LABEL: addrspacecast_v4p5_to_v4p0: 601; CI: ; %bb.0: 602; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 603; CI-NEXT: s_load_dword s4, s[6:7], 0x11 604; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 605; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 606; CI-NEXT: s_waitcnt lgkmcnt(0) 607; CI-NEXT: v_mov_b32_e32 v7, s4 608; CI-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc 609; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 610; CI-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc 611; CI-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc 612; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 613; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 614; CI-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc 615; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 616; CI-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc 617; CI-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc 618; CI-NEXT: v_mov_b32_e32 v1, v10 619; CI-NEXT: v_mov_b32_e32 v2, v8 620; CI-NEXT: v_mov_b32_e32 v3, v9 621; CI-NEXT: s_setpc_b64 s[30:31] 622; 623; GFX9-LABEL: addrspacecast_v4p5_to_v4p0: 624; GFX9: ; %bb.0: 625; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base 627; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 628; GFX9-NEXT: v_mov_b32_e32 v7, s5 629; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 630; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc 631; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 632; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc 633; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc 634; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 635; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 636; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc 637; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 638; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc 639; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc 640; GFX9-NEXT: v_mov_b32_e32 v1, v10 641; GFX9-NEXT: v_mov_b32_e32 v2, v8 642; GFX9-NEXT: v_mov_b32_e32 v3, v9 643; GFX9-NEXT: s_setpc_b64 s[30:31] 644 %cast = addrspacecast <4 x ptr addrspace(5)> %ptr to <4 x ptr> 645 ret <4 x ptr> %cast 646} 647 648define <8 x ptr> @addrspacecast_v8p5_to_v8p0(<8 x ptr addrspace(5)> %ptr) { 649; CI-LABEL: addrspacecast_v8p5_to_v8p0: 650; CI: ; %bb.0: 651; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; CI-NEXT: s_load_dword s4, s[6:7], 0x11 653; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 654; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 655; CI-NEXT: s_waitcnt lgkmcnt(0) 656; CI-NEXT: v_mov_b32_e32 v15, s4 657; CI-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc 658; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 659; CI-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc 660; CI-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc 661; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 662; CI-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc 663; CI-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc 664; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 665; CI-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc 666; CI-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc 667; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 668; CI-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 669; CI-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc 670; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 671; CI-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc 672; CI-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc 673; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 674; CI-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc 675; CI-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc 676; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 677; CI-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc 678; CI-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc 679; CI-NEXT: v_mov_b32_e32 v1, v22 680; CI-NEXT: v_mov_b32_e32 v2, v16 681; CI-NEXT: v_mov_b32_e32 v3, v17 682; CI-NEXT: v_mov_b32_e32 v4, v18 683; CI-NEXT: v_mov_b32_e32 v5, v19 684; CI-NEXT: v_mov_b32_e32 v6, v20 685; CI-NEXT: v_mov_b32_e32 v7, v21 686; CI-NEXT: s_setpc_b64 s[30:31] 687; 688; GFX9-LABEL: addrspacecast_v8p5_to_v8p0: 689; GFX9: ; %bb.0: 690; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 691; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base 692; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 693; GFX9-NEXT: v_mov_b32_e32 v15, s5 694; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 695; GFX9-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc 696; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 697; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc 698; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc 699; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 700; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc 701; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc 702; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 703; GFX9-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc 704; GFX9-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc 705; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 706; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 707; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc 708; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 709; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc 710; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc 711; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 712; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc 713; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc 714; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 715; GFX9-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc 716; GFX9-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc 717; GFX9-NEXT: v_mov_b32_e32 v1, v22 718; GFX9-NEXT: v_mov_b32_e32 v2, v16 719; GFX9-NEXT: v_mov_b32_e32 v3, v17 720; GFX9-NEXT: v_mov_b32_e32 v4, v18 721; GFX9-NEXT: v_mov_b32_e32 v5, v19 722; GFX9-NEXT: v_mov_b32_e32 v6, v20 723; GFX9-NEXT: v_mov_b32_e32 v7, v21 724; GFX9-NEXT: s_setpc_b64 s[30:31] 725 %cast = addrspacecast <8 x ptr addrspace(5)> %ptr to <8 x ptr> 726 ret <8 x ptr> %cast 727} 728 729define <16 x ptr> @addrspacecast_v16p5_to_v16p0(<16 x ptr addrspace(5)> %ptr) { 730; CI-LABEL: addrspacecast_v16p5_to_v16p0: 731; CI: ; %bb.0: 732; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 733; CI-NEXT: s_load_dword s4, s[6:7], 0x11 734; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 735; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 736; CI-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v5 737; CI-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v6 738; CI-NEXT: s_waitcnt lgkmcnt(0) 739; CI-NEXT: v_mov_b32_e32 v31, s4 740; CI-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc 741; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 742; CI-NEXT: v_cndmask_b32_e32 v34, 0, v1, vcc 743; CI-NEXT: v_cndmask_b32_e32 v39, 0, v31, vcc 744; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 745; CI-NEXT: v_cndmask_b32_e32 v35, 0, v2, vcc 746; CI-NEXT: v_cndmask_b32_e32 v32, 0, v31, vcc 747; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 748; CI-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v4 749; CI-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v7 750; CI-NEXT: v_cndmask_b32_e32 v36, 0, v3, vcc 751; CI-NEXT: v_cndmask_b32_e64 v48, 0, v4, s[4:5] 752; CI-NEXT: v_cndmask_b32_e64 v37, 0, v5, s[6:7] 753; CI-NEXT: v_cndmask_b32_e64 v33, 0, v6, s[8:9] 754; CI-NEXT: v_cndmask_b32_e64 v38, 0, v7, s[10:11] 755; CI-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v8 756; CI-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v9 757; CI-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v10 758; CI-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v11 759; CI-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v12 760; CI-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v13 761; CI-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v14 762; CI-NEXT: v_cmp_ne_u32_e64 s[26:27], -1, v15 763; CI-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[12:13] 764; CI-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[14:15] 765; CI-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[16:17] 766; CI-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[18:19] 767; CI-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[20:21] 768; CI-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[22:23] 769; CI-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[24:25] 770; CI-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[26:27] 771; CI-NEXT: v_cndmask_b32_e32 v7, 0, v31, vcc 772; CI-NEXT: v_cndmask_b32_e64 v9, 0, v31, s[4:5] 773; CI-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[6:7] 774; CI-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[8:9] 775; CI-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[10:11] 776; CI-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[12:13] 777; CI-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[14:15] 778; CI-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[16:17] 779; CI-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[18:19] 780; CI-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[20:21] 781; CI-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[22:23] 782; CI-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[24:25] 783; CI-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[26:27] 784; CI-NEXT: v_mov_b32_e32 v1, v49 785; CI-NEXT: v_mov_b32_e32 v2, v34 786; CI-NEXT: v_mov_b32_e32 v3, v39 787; CI-NEXT: v_mov_b32_e32 v4, v35 788; CI-NEXT: v_mov_b32_e32 v5, v32 789; CI-NEXT: v_mov_b32_e32 v6, v36 790; CI-NEXT: v_mov_b32_e32 v8, v48 791; CI-NEXT: v_mov_b32_e32 v10, v37 792; CI-NEXT: v_mov_b32_e32 v12, v33 793; CI-NEXT: v_mov_b32_e32 v14, v38 794; CI-NEXT: s_setpc_b64 s[30:31] 795; 796; GFX9-LABEL: addrspacecast_v16p5_to_v16p0: 797; GFX9: ; %bb.0: 798; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base 800; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 801; GFX9-NEXT: v_mov_b32_e32 v31, s5 802; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 803; GFX9-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc 804; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 805; GFX9-NEXT: v_cndmask_b32_e32 v34, 0, v1, vcc 806; GFX9-NEXT: v_cndmask_b32_e32 v39, 0, v31, vcc 807; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 808; GFX9-NEXT: v_cndmask_b32_e32 v35, 0, v2, vcc 809; GFX9-NEXT: v_cndmask_b32_e32 v32, 0, v31, vcc 810; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 811; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v4 812; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v5 813; GFX9-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v6 814; GFX9-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v7 815; GFX9-NEXT: v_cndmask_b32_e32 v36, 0, v3, vcc 816; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v4, s[4:5] 817; GFX9-NEXT: v_cndmask_b32_e64 v37, 0, v5, s[6:7] 818; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v6, s[8:9] 819; GFX9-NEXT: v_cndmask_b32_e64 v38, 0, v7, s[10:11] 820; GFX9-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v8 821; GFX9-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v9 822; GFX9-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v10 823; GFX9-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v11 824; GFX9-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v12 825; GFX9-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v13 826; GFX9-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v14 827; GFX9-NEXT: v_cmp_ne_u32_e64 s[26:27], -1, v15 828; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[12:13] 829; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[14:15] 830; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[16:17] 831; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[18:19] 832; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[20:21] 833; GFX9-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[22:23] 834; GFX9-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[24:25] 835; GFX9-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[26:27] 836; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v31, vcc 837; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, v31, s[4:5] 838; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[6:7] 839; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[8:9] 840; GFX9-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[10:11] 841; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[12:13] 842; GFX9-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[14:15] 843; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[16:17] 844; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[18:19] 845; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[20:21] 846; GFX9-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[22:23] 847; GFX9-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[24:25] 848; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[26:27] 849; GFX9-NEXT: v_mov_b32_e32 v1, v49 850; GFX9-NEXT: v_mov_b32_e32 v2, v34 851; GFX9-NEXT: v_mov_b32_e32 v3, v39 852; GFX9-NEXT: v_mov_b32_e32 v4, v35 853; GFX9-NEXT: v_mov_b32_e32 v5, v32 854; GFX9-NEXT: v_mov_b32_e32 v6, v36 855; GFX9-NEXT: v_mov_b32_e32 v8, v48 856; GFX9-NEXT: v_mov_b32_e32 v10, v37 857; GFX9-NEXT: v_mov_b32_e32 v12, v33 858; GFX9-NEXT: v_mov_b32_e32 v14, v38 859; GFX9-NEXT: s_setpc_b64 s[30:31] 860 %cast = addrspacecast <16 x ptr addrspace(5)> %ptr to <16 x ptr> 861 ret <16 x ptr> %cast 862} 863 864define <2 x ptr addrspace(3)> @addrspacecast_v2p0_to_v2p3(<2 x ptr> %ptr) { 865; HSA-LABEL: addrspacecast_v2p0_to_v2p3: 866; HSA: ; %bb.0: 867; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 868; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 869; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 870; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 871; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 872; HSA-NEXT: s_setpc_b64 s[30:31] 873 %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(3)> 874 ret <2 x ptr addrspace(3)> %cast 875} 876 877define <3 x ptr addrspace(3)> @addrspacecast_v3p0_to_v3p3(<3 x ptr> %ptr) { 878; HSA-LABEL: addrspacecast_v3p0_to_v3p3: 879; HSA: ; %bb.0: 880; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 881; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 882; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 883; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 884; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 885; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 886; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 887; HSA-NEXT: s_setpc_b64 s[30:31] 888 %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(3)> 889 ret <3 x ptr addrspace(3)> %cast 890} 891 892define <4 x ptr addrspace(3)> @addrspacecast_v4p0_to_v4p3(<4 x ptr> %ptr) { 893; HSA-LABEL: addrspacecast_v4p0_to_v4p3: 894; HSA: ; %bb.0: 895; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 896; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 897; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 898; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 899; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 900; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 901; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 902; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 903; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc 904; HSA-NEXT: s_setpc_b64 s[30:31] 905 %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(3)> 906 ret <4 x ptr addrspace(3)> %cast 907} 908 909define <8 x ptr addrspace(3)> @addrspacecast_v8p0_to_v8p3(<8 x ptr> %ptr) { 910; HSA-LABEL: addrspacecast_v8p0_to_v8p3: 911; HSA: ; %bb.0: 912; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 914; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 915; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 916; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 917; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 918; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 919; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 920; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc 921; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 922; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc 923; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 924; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc 925; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] 926; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc 927; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 928; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc 929; HSA-NEXT: s_setpc_b64 s[30:31] 930 %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(3)> 931 ret <8 x ptr addrspace(3)> %cast 932} 933 934define <16 x ptr addrspace(3)> @addrspacecast_v16p0_to_v16p3(<16 x ptr> %ptr) { 935; HSA-LABEL: addrspacecast_v16p0_to_v16p3: 936; HSA: ; %bb.0: 937; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 938; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 939; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 940; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 941; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] 942; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc 943; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 944; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 945; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] 946; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc 947; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] 948; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc 949; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] 950; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc 951; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] 952; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc 953; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] 954; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc 955; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] 956; HSA-NEXT: v_cndmask_b32_e32 v8, -1, v16, vcc 957; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] 958; HSA-NEXT: v_cndmask_b32_e32 v9, -1, v18, vcc 959; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] 960; HSA-NEXT: v_cndmask_b32_e32 v10, -1, v20, vcc 961; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[22:23] 962; HSA-NEXT: v_cndmask_b32_e32 v11, -1, v22, vcc 963; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[24:25] 964; HSA-NEXT: v_cndmask_b32_e32 v12, -1, v24, vcc 965; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[26:27] 966; HSA-NEXT: v_cndmask_b32_e32 v13, -1, v26, vcc 967; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[28:29] 968; HSA-NEXT: v_cndmask_b32_e32 v14, -1, v28, vcc 969; HSA-NEXT: s_waitcnt vmcnt(0) 970; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[30:31] 971; HSA-NEXT: v_cndmask_b32_e32 v15, -1, v30, vcc 972; HSA-NEXT: s_setpc_b64 s[30:31] 973 %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(3)> 974 ret <16 x ptr addrspace(3)> %cast 975} 976 977define <2 x ptr> @addrspacecast_v2p3_to_v2p0(<2 x ptr addrspace(3)> %ptr) { 978; CI-LABEL: addrspacecast_v2p3_to_v2p0: 979; CI: ; %bb.0: 980; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 981; CI-NEXT: s_load_dword s4, s[6:7], 0x10 982; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 983; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 984; CI-NEXT: s_waitcnt lgkmcnt(0) 985; CI-NEXT: v_mov_b32_e32 v3, s4 986; CI-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc 987; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 988; CI-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 989; CI-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 990; CI-NEXT: v_mov_b32_e32 v1, v4 991; CI-NEXT: s_setpc_b64 s[30:31] 992; 993; GFX9-LABEL: addrspacecast_v2p3_to_v2p0: 994; GFX9: ; %bb.0: 995; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base 997; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 998; GFX9-NEXT: v_mov_b32_e32 v3, s5 999; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1000; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc 1001; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1002; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 1003; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 1004; GFX9-NEXT: v_mov_b32_e32 v1, v4 1005; GFX9-NEXT: s_setpc_b64 s[30:31] 1006 %cast = addrspacecast <2 x ptr addrspace(3)> %ptr to <2 x ptr> 1007 ret <2 x ptr> %cast 1008} 1009 1010define <3 x ptr> @addrspacecast_v3p3_to_v3p0(<3 x ptr addrspace(3)> %ptr) { 1011; CI-LABEL: addrspacecast_v3p3_to_v3p0: 1012; CI: ; %bb.0: 1013; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1014; CI-NEXT: s_load_dword s4, s[6:7], 0x10 1015; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1016; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1017; CI-NEXT: s_waitcnt lgkmcnt(0) 1018; CI-NEXT: v_mov_b32_e32 v5, s4 1019; CI-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc 1020; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1021; CI-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc 1022; CI-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 1023; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1024; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 1025; CI-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc 1026; CI-NEXT: v_mov_b32_e32 v1, v7 1027; CI-NEXT: v_mov_b32_e32 v2, v6 1028; CI-NEXT: s_setpc_b64 s[30:31] 1029; 1030; GFX9-LABEL: addrspacecast_v3p3_to_v3p0: 1031; GFX9: ; %bb.0: 1032; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1033; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base 1034; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1035; GFX9-NEXT: v_mov_b32_e32 v5, s5 1036; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1037; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc 1038; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1039; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc 1040; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 1041; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1042; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 1043; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc 1044; GFX9-NEXT: v_mov_b32_e32 v1, v7 1045; GFX9-NEXT: v_mov_b32_e32 v2, v6 1046; GFX9-NEXT: s_setpc_b64 s[30:31] 1047 %cast = addrspacecast <3 x ptr addrspace(3)> %ptr to <3 x ptr> 1048 ret <3 x ptr> %cast 1049} 1050 1051define <4 x ptr> @addrspacecast_v4p3_to_v4p0(<4 x ptr addrspace(3)> %ptr) { 1052; CI-LABEL: addrspacecast_v4p3_to_v4p0: 1053; CI: ; %bb.0: 1054; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1055; CI-NEXT: s_load_dword s4, s[6:7], 0x10 1056; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1057; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1058; CI-NEXT: s_waitcnt lgkmcnt(0) 1059; CI-NEXT: v_mov_b32_e32 v7, s4 1060; CI-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc 1061; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1062; CI-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc 1063; CI-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc 1064; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1065; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 1066; CI-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc 1067; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 1068; CI-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc 1069; CI-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc 1070; CI-NEXT: v_mov_b32_e32 v1, v10 1071; CI-NEXT: v_mov_b32_e32 v2, v8 1072; CI-NEXT: v_mov_b32_e32 v3, v9 1073; CI-NEXT: s_setpc_b64 s[30:31] 1074; 1075; GFX9-LABEL: addrspacecast_v4p3_to_v4p0: 1076; GFX9: ; %bb.0: 1077; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1078; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base 1079; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1080; GFX9-NEXT: v_mov_b32_e32 v7, s5 1081; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1082; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc 1083; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1084; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc 1085; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc 1086; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1087; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 1088; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc 1089; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 1090; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc 1091; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc 1092; GFX9-NEXT: v_mov_b32_e32 v1, v10 1093; GFX9-NEXT: v_mov_b32_e32 v2, v8 1094; GFX9-NEXT: v_mov_b32_e32 v3, v9 1095; GFX9-NEXT: s_setpc_b64 s[30:31] 1096 %cast = addrspacecast <4 x ptr addrspace(3)> %ptr to <4 x ptr> 1097 ret <4 x ptr> %cast 1098} 1099 1100define <8 x ptr> @addrspacecast_v8p3_to_v8p0(<8 x ptr addrspace(3)> %ptr) { 1101; CI-LABEL: addrspacecast_v8p3_to_v8p0: 1102; CI: ; %bb.0: 1103; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1104; CI-NEXT: s_load_dword s4, s[6:7], 0x10 1105; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1106; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1107; CI-NEXT: s_waitcnt lgkmcnt(0) 1108; CI-NEXT: v_mov_b32_e32 v15, s4 1109; CI-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc 1110; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1111; CI-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc 1112; CI-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc 1113; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1114; CI-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc 1115; CI-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc 1116; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 1117; CI-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc 1118; CI-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc 1119; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 1120; CI-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 1121; CI-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc 1122; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 1123; CI-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc 1124; CI-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc 1125; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 1126; CI-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc 1127; CI-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc 1128; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 1129; CI-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc 1130; CI-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc 1131; CI-NEXT: v_mov_b32_e32 v1, v22 1132; CI-NEXT: v_mov_b32_e32 v2, v16 1133; CI-NEXT: v_mov_b32_e32 v3, v17 1134; CI-NEXT: v_mov_b32_e32 v4, v18 1135; CI-NEXT: v_mov_b32_e32 v5, v19 1136; CI-NEXT: v_mov_b32_e32 v6, v20 1137; CI-NEXT: v_mov_b32_e32 v7, v21 1138; CI-NEXT: s_setpc_b64 s[30:31] 1139; 1140; GFX9-LABEL: addrspacecast_v8p3_to_v8p0: 1141; GFX9: ; %bb.0: 1142; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1143; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base 1144; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1145; GFX9-NEXT: v_mov_b32_e32 v15, s5 1146; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1147; GFX9-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc 1148; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1149; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc 1150; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc 1151; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1152; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc 1153; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc 1154; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 1155; GFX9-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc 1156; GFX9-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc 1157; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 1158; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 1159; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc 1160; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 1161; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc 1162; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc 1163; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 1164; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc 1165; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc 1166; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 1167; GFX9-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc 1168; GFX9-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc 1169; GFX9-NEXT: v_mov_b32_e32 v1, v22 1170; GFX9-NEXT: v_mov_b32_e32 v2, v16 1171; GFX9-NEXT: v_mov_b32_e32 v3, v17 1172; GFX9-NEXT: v_mov_b32_e32 v4, v18 1173; GFX9-NEXT: v_mov_b32_e32 v5, v19 1174; GFX9-NEXT: v_mov_b32_e32 v6, v20 1175; GFX9-NEXT: v_mov_b32_e32 v7, v21 1176; GFX9-NEXT: s_setpc_b64 s[30:31] 1177 %cast = addrspacecast <8 x ptr addrspace(3)> %ptr to <8 x ptr> 1178 ret <8 x ptr> %cast 1179} 1180 1181define <16 x ptr> @addrspacecast_v16p3_to_v16p0(<16 x ptr addrspace(3)> %ptr) { 1182; CI-LABEL: addrspacecast_v16p3_to_v16p0: 1183; CI: ; %bb.0: 1184; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1185; CI-NEXT: s_load_dword s4, s[6:7], 0x10 1186; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1187; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1188; CI-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v5 1189; CI-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v6 1190; CI-NEXT: s_waitcnt lgkmcnt(0) 1191; CI-NEXT: v_mov_b32_e32 v31, s4 1192; CI-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc 1193; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1194; CI-NEXT: v_cndmask_b32_e32 v34, 0, v1, vcc 1195; CI-NEXT: v_cndmask_b32_e32 v39, 0, v31, vcc 1196; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1197; CI-NEXT: v_cndmask_b32_e32 v35, 0, v2, vcc 1198; CI-NEXT: v_cndmask_b32_e32 v32, 0, v31, vcc 1199; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 1200; CI-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v4 1201; CI-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v7 1202; CI-NEXT: v_cndmask_b32_e32 v36, 0, v3, vcc 1203; CI-NEXT: v_cndmask_b32_e64 v48, 0, v4, s[4:5] 1204; CI-NEXT: v_cndmask_b32_e64 v37, 0, v5, s[6:7] 1205; CI-NEXT: v_cndmask_b32_e64 v33, 0, v6, s[8:9] 1206; CI-NEXT: v_cndmask_b32_e64 v38, 0, v7, s[10:11] 1207; CI-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v8 1208; CI-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v9 1209; CI-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v10 1210; CI-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v11 1211; CI-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v12 1212; CI-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v13 1213; CI-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v14 1214; CI-NEXT: v_cmp_ne_u32_e64 s[26:27], -1, v15 1215; CI-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[12:13] 1216; CI-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[14:15] 1217; CI-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[16:17] 1218; CI-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[18:19] 1219; CI-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[20:21] 1220; CI-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[22:23] 1221; CI-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[24:25] 1222; CI-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[26:27] 1223; CI-NEXT: v_cndmask_b32_e32 v7, 0, v31, vcc 1224; CI-NEXT: v_cndmask_b32_e64 v9, 0, v31, s[4:5] 1225; CI-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[6:7] 1226; CI-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[8:9] 1227; CI-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[10:11] 1228; CI-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[12:13] 1229; CI-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[14:15] 1230; CI-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[16:17] 1231; CI-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[18:19] 1232; CI-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[20:21] 1233; CI-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[22:23] 1234; CI-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[24:25] 1235; CI-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[26:27] 1236; CI-NEXT: v_mov_b32_e32 v1, v49 1237; CI-NEXT: v_mov_b32_e32 v2, v34 1238; CI-NEXT: v_mov_b32_e32 v3, v39 1239; CI-NEXT: v_mov_b32_e32 v4, v35 1240; CI-NEXT: v_mov_b32_e32 v5, v32 1241; CI-NEXT: v_mov_b32_e32 v6, v36 1242; CI-NEXT: v_mov_b32_e32 v8, v48 1243; CI-NEXT: v_mov_b32_e32 v10, v37 1244; CI-NEXT: v_mov_b32_e32 v12, v33 1245; CI-NEXT: v_mov_b32_e32 v14, v38 1246; CI-NEXT: s_setpc_b64 s[30:31] 1247; 1248; GFX9-LABEL: addrspacecast_v16p3_to_v16p0: 1249; GFX9: ; %bb.0: 1250; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1251; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base 1252; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 1253; GFX9-NEXT: v_mov_b32_e32 v31, s5 1254; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1255; GFX9-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc 1256; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 1257; GFX9-NEXT: v_cndmask_b32_e32 v34, 0, v1, vcc 1258; GFX9-NEXT: v_cndmask_b32_e32 v39, 0, v31, vcc 1259; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 1260; GFX9-NEXT: v_cndmask_b32_e32 v35, 0, v2, vcc 1261; GFX9-NEXT: v_cndmask_b32_e32 v32, 0, v31, vcc 1262; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 1263; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v4 1264; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v5 1265; GFX9-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v6 1266; GFX9-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v7 1267; GFX9-NEXT: v_cndmask_b32_e32 v36, 0, v3, vcc 1268; GFX9-NEXT: v_cndmask_b32_e64 v48, 0, v4, s[4:5] 1269; GFX9-NEXT: v_cndmask_b32_e64 v37, 0, v5, s[6:7] 1270; GFX9-NEXT: v_cndmask_b32_e64 v33, 0, v6, s[8:9] 1271; GFX9-NEXT: v_cndmask_b32_e64 v38, 0, v7, s[10:11] 1272; GFX9-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v8 1273; GFX9-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v9 1274; GFX9-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v10 1275; GFX9-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v11 1276; GFX9-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v12 1277; GFX9-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v13 1278; GFX9-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v14 1279; GFX9-NEXT: v_cmp_ne_u32_e64 s[26:27], -1, v15 1280; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[12:13] 1281; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[14:15] 1282; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[16:17] 1283; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[18:19] 1284; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[20:21] 1285; GFX9-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[22:23] 1286; GFX9-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[24:25] 1287; GFX9-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[26:27] 1288; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v31, vcc 1289; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, v31, s[4:5] 1290; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[6:7] 1291; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[8:9] 1292; GFX9-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[10:11] 1293; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[12:13] 1294; GFX9-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[14:15] 1295; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[16:17] 1296; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[18:19] 1297; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[20:21] 1298; GFX9-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[22:23] 1299; GFX9-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[24:25] 1300; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[26:27] 1301; GFX9-NEXT: v_mov_b32_e32 v1, v49 1302; GFX9-NEXT: v_mov_b32_e32 v2, v34 1303; GFX9-NEXT: v_mov_b32_e32 v3, v39 1304; GFX9-NEXT: v_mov_b32_e32 v4, v35 1305; GFX9-NEXT: v_mov_b32_e32 v5, v32 1306; GFX9-NEXT: v_mov_b32_e32 v6, v36 1307; GFX9-NEXT: v_mov_b32_e32 v8, v48 1308; GFX9-NEXT: v_mov_b32_e32 v10, v37 1309; GFX9-NEXT: v_mov_b32_e32 v12, v33 1310; GFX9-NEXT: v_mov_b32_e32 v14, v38 1311; GFX9-NEXT: s_setpc_b64 s[30:31] 1312 %cast = addrspacecast <16 x ptr addrspace(3)> %ptr to <16 x ptr> 1313 ret <16 x ptr> %cast 1314} 1315 1316define <2 x ptr addrspace(1)> @addrspacecast_v2p0_to_v2p1(<2 x ptr> %ptr) { 1317; HSA-LABEL: addrspacecast_v2p0_to_v2p1: 1318; HSA: ; %bb.0: 1319; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1320; HSA-NEXT: s_setpc_b64 s[30:31] 1321 %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(1)> 1322 ret <2 x ptr addrspace(1)> %cast 1323} 1324 1325define <3 x ptr addrspace(1)> @addrspacecast_v3p0_to_v3p1(<3 x ptr> %ptr) { 1326; HSA-LABEL: addrspacecast_v3p0_to_v3p1: 1327; HSA: ; %bb.0: 1328; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1329; HSA-NEXT: s_setpc_b64 s[30:31] 1330 %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(1)> 1331 ret <3 x ptr addrspace(1)> %cast 1332} 1333 1334define <4 x ptr addrspace(1)> @addrspacecast_v4p0_to_v4p1(<4 x ptr> %ptr) { 1335; HSA-LABEL: addrspacecast_v4p0_to_v4p1: 1336; HSA: ; %bb.0: 1337; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1338; HSA-NEXT: s_setpc_b64 s[30:31] 1339 %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(1)> 1340 ret <4 x ptr addrspace(1)> %cast 1341} 1342 1343define <8 x ptr addrspace(1)> @addrspacecast_v8p0_to_v8p1(<8 x ptr> %ptr) { 1344; HSA-LABEL: addrspacecast_v8p0_to_v8p1: 1345; HSA: ; %bb.0: 1346; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1347; HSA-NEXT: s_setpc_b64 s[30:31] 1348 %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(1)> 1349 ret <8 x ptr addrspace(1)> %cast 1350} 1351 1352define <16 x ptr addrspace(1)> @addrspacecast_v16p0_to_v16p1(<16 x ptr> %ptr) { 1353; HSA-LABEL: addrspacecast_v16p0_to_v16p1: 1354; HSA: ; %bb.0: 1355; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1356; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 1357; HSA-NEXT: s_waitcnt vmcnt(0) 1358; HSA-NEXT: s_setpc_b64 s[30:31] 1359 %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(1)> 1360 ret <16 x ptr addrspace(1)> %cast 1361} 1362 1363define <2 x ptr> @addrspacecast_v2p1_to_v2p0(<2 x ptr addrspace(1)> %ptr) { 1364; HSA-LABEL: addrspacecast_v2p1_to_v2p0: 1365; HSA: ; %bb.0: 1366; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; HSA-NEXT: s_setpc_b64 s[30:31] 1368 %cast = addrspacecast <2 x ptr addrspace(1)> %ptr to <2 x ptr> 1369 ret <2 x ptr> %cast 1370} 1371 1372define <1 x ptr> @addrspacecast_v1p1_to_v1p0(<1 x ptr addrspace(1)> %ptr) { 1373; HSA-LABEL: addrspacecast_v1p1_to_v1p0: 1374; HSA: ; %bb.0: 1375; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1376; HSA-NEXT: s_setpc_b64 s[30:31] 1377 %cast = addrspacecast <1 x ptr addrspace(1)> %ptr to <1 x ptr> 1378 ret <1 x ptr> %cast 1379} 1380 1381define <4 x ptr> @addrspacecast_v4p1_to_v4p0(<4 x ptr addrspace(1)> %ptr) { 1382; HSA-LABEL: addrspacecast_v4p1_to_v4p0: 1383; HSA: ; %bb.0: 1384; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; HSA-NEXT: s_setpc_b64 s[30:31] 1386 %cast = addrspacecast <4 x ptr addrspace(1)> %ptr to <4 x ptr> 1387 ret <4 x ptr> %cast 1388} 1389 1390define <8 x ptr> @addrspacecast_v8p1_to_v8p0(<8 x ptr addrspace(1)> %ptr) { 1391; HSA-LABEL: addrspacecast_v8p1_to_v8p0: 1392; HSA: ; %bb.0: 1393; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; HSA-NEXT: s_setpc_b64 s[30:31] 1395 %cast = addrspacecast <8 x ptr addrspace(1)> %ptr to <8 x ptr> 1396 ret <8 x ptr> %cast 1397} 1398 1399define <16 x ptr> @addrspacecast_v16p1_to_v16p0(<16 x ptr addrspace(1)> %ptr) { 1400; HSA-LABEL: addrspacecast_v16p1_to_v16p0: 1401; HSA: ; %bb.0: 1402; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1403; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 1404; HSA-NEXT: s_waitcnt vmcnt(0) 1405; HSA-NEXT: s_setpc_b64 s[30:31] 1406 %cast = addrspacecast <16 x ptr addrspace(1)> %ptr to <16 x ptr> 1407 ret <16 x ptr> %cast 1408} 1409 1410define <2 x ptr addrspace(6)> @addrspacecast_v2p0_to_v2p6(<2 x ptr> %ptr) { 1411; HSA-LABEL: addrspacecast_v2p0_to_v2p6: 1412; HSA: ; %bb.0: 1413; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1414; HSA-NEXT: v_mov_b32_e32 v1, v2 1415; HSA-NEXT: s_setpc_b64 s[30:31] 1416 %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(6)> 1417 ret <2 x ptr addrspace(6)> %cast 1418} 1419 1420define <3 x ptr addrspace(6)> @addrspacecast_v3p0_to_v3p6(<3 x ptr> %ptr) { 1421; HSA-LABEL: addrspacecast_v3p0_to_v3p6: 1422; HSA: ; %bb.0: 1423; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1424; HSA-NEXT: v_mov_b32_e32 v1, v2 1425; HSA-NEXT: v_mov_b32_e32 v2, v4 1426; HSA-NEXT: s_setpc_b64 s[30:31] 1427 %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(6)> 1428 ret <3 x ptr addrspace(6)> %cast 1429} 1430 1431define <4 x ptr addrspace(6)> @addrspacecast_v4p0_to_v4p6(<4 x ptr> %ptr) { 1432; HSA-LABEL: addrspacecast_v4p0_to_v4p6: 1433; HSA: ; %bb.0: 1434; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1435; HSA-NEXT: v_mov_b32_e32 v3, v6 1436; HSA-NEXT: v_mov_b32_e32 v1, v2 1437; HSA-NEXT: v_mov_b32_e32 v2, v4 1438; HSA-NEXT: s_setpc_b64 s[30:31] 1439 %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(6)> 1440 ret <4 x ptr addrspace(6)> %cast 1441} 1442 1443define <8 x ptr addrspace(6)> @addrspacecast_v8p0_to_v8p6(<8 x ptr> %ptr) { 1444; HSA-LABEL: addrspacecast_v8p0_to_v8p6: 1445; HSA: ; %bb.0: 1446; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1447; HSA-NEXT: v_mov_b32_e32 v7, v14 1448; HSA-NEXT: v_mov_b32_e32 v5, v10 1449; HSA-NEXT: v_mov_b32_e32 v3, v6 1450; HSA-NEXT: v_mov_b32_e32 v1, v2 1451; HSA-NEXT: v_mov_b32_e32 v2, v4 1452; HSA-NEXT: v_mov_b32_e32 v4, v8 1453; HSA-NEXT: v_mov_b32_e32 v6, v12 1454; HSA-NEXT: s_setpc_b64 s[30:31] 1455 %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(6)> 1456 ret <8 x ptr addrspace(6)> %cast 1457} 1458 1459define <16 x ptr addrspace(6)> @addrspacecast_v16p0_to_v16p6(<16 x ptr> %ptr) { 1460; HSA-LABEL: addrspacecast_v16p0_to_v16p6: 1461; HSA: ; %bb.0: 1462; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1463; HSA-NEXT: v_mov_b32_e32 v15, v30 1464; HSA-NEXT: v_mov_b32_e32 v13, v26 1465; HSA-NEXT: v_mov_b32_e32 v11, v22 1466; HSA-NEXT: v_mov_b32_e32 v9, v18 1467; HSA-NEXT: v_mov_b32_e32 v7, v14 1468; HSA-NEXT: v_mov_b32_e32 v5, v10 1469; HSA-NEXT: v_mov_b32_e32 v3, v6 1470; HSA-NEXT: v_mov_b32_e32 v1, v2 1471; HSA-NEXT: v_mov_b32_e32 v2, v4 1472; HSA-NEXT: v_mov_b32_e32 v4, v8 1473; HSA-NEXT: v_mov_b32_e32 v6, v12 1474; HSA-NEXT: v_mov_b32_e32 v8, v16 1475; HSA-NEXT: v_mov_b32_e32 v10, v20 1476; HSA-NEXT: v_mov_b32_e32 v12, v24 1477; HSA-NEXT: v_mov_b32_e32 v14, v28 1478; HSA-NEXT: s_setpc_b64 s[30:31] 1479 %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(6)> 1480 ret <16 x ptr addrspace(6)> %cast 1481} 1482 1483define <2 x ptr> @addrspacecast_v2p6_to_v2p0(<2 x ptr addrspace(6)> %ptr) { 1484; HSA-LABEL: addrspacecast_v2p6_to_v2p0: 1485; HSA: ; %bb.0: 1486; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1487; HSA-NEXT: v_mov_b32_e32 v2, v1 1488; HSA-NEXT: v_mov_b32_e32 v1, 0 1489; HSA-NEXT: v_mov_b32_e32 v3, 0 1490; HSA-NEXT: s_setpc_b64 s[30:31] 1491 %cast = addrspacecast <2 x ptr addrspace(6)> %ptr to <2 x ptr> 1492 ret <2 x ptr> %cast 1493} 1494 1495define <1 x ptr> @addrspacecast_v1p6_to_v1p0(<1 x ptr addrspace(6)> %ptr) { 1496; HSA-LABEL: addrspacecast_v1p6_to_v1p0: 1497; HSA: ; %bb.0: 1498; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1499; HSA-NEXT: v_mov_b32_e32 v1, 0 1500; HSA-NEXT: s_setpc_b64 s[30:31] 1501 %cast = addrspacecast <1 x ptr addrspace(6)> %ptr to <1 x ptr> 1502 ret <1 x ptr> %cast 1503} 1504 1505define <4 x ptr> @addrspacecast_v4p6_to_v4p0(<4 x ptr addrspace(6)> %ptr) { 1506; HSA-LABEL: addrspacecast_v4p6_to_v4p0: 1507; HSA: ; %bb.0: 1508; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1509; HSA-NEXT: v_mov_b32_e32 v6, v3 1510; HSA-NEXT: v_mov_b32_e32 v4, v2 1511; HSA-NEXT: v_mov_b32_e32 v2, v1 1512; HSA-NEXT: v_mov_b32_e32 v1, 0 1513; HSA-NEXT: v_mov_b32_e32 v3, 0 1514; HSA-NEXT: v_mov_b32_e32 v5, 0 1515; HSA-NEXT: v_mov_b32_e32 v7, 0 1516; HSA-NEXT: s_setpc_b64 s[30:31] 1517 %cast = addrspacecast <4 x ptr addrspace(6)> %ptr to <4 x ptr> 1518 ret <4 x ptr> %cast 1519} 1520 1521define <8 x ptr> @addrspacecast_v8p6_to_v8p0(<8 x ptr addrspace(6)> %ptr) { 1522; HSA-LABEL: addrspacecast_v8p6_to_v8p0: 1523; HSA: ; %bb.0: 1524; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1525; HSA-NEXT: v_mov_b32_e32 v14, v7 1526; HSA-NEXT: v_mov_b32_e32 v12, v6 1527; HSA-NEXT: v_mov_b32_e32 v10, v5 1528; HSA-NEXT: v_mov_b32_e32 v8, v4 1529; HSA-NEXT: v_mov_b32_e32 v6, v3 1530; HSA-NEXT: v_mov_b32_e32 v4, v2 1531; HSA-NEXT: v_mov_b32_e32 v2, v1 1532; HSA-NEXT: v_mov_b32_e32 v1, 0 1533; HSA-NEXT: v_mov_b32_e32 v3, 0 1534; HSA-NEXT: v_mov_b32_e32 v5, 0 1535; HSA-NEXT: v_mov_b32_e32 v7, 0 1536; HSA-NEXT: v_mov_b32_e32 v9, 0 1537; HSA-NEXT: v_mov_b32_e32 v11, 0 1538; HSA-NEXT: v_mov_b32_e32 v13, 0 1539; HSA-NEXT: v_mov_b32_e32 v15, 0 1540; HSA-NEXT: s_setpc_b64 s[30:31] 1541 %cast = addrspacecast <8 x ptr addrspace(6)> %ptr to <8 x ptr> 1542 ret <8 x ptr> %cast 1543} 1544 1545define <16 x ptr> @addrspacecast_v16p6_to_v16p0(<16 x ptr addrspace(6)> %ptr) { 1546; HSA-LABEL: addrspacecast_v16p6_to_v16p0: 1547; HSA: ; %bb.0: 1548; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1549; HSA-NEXT: v_mov_b32_e32 v28, v14 1550; HSA-NEXT: v_mov_b32_e32 v24, v12 1551; HSA-NEXT: v_mov_b32_e32 v20, v10 1552; HSA-NEXT: v_mov_b32_e32 v16, v8 1553; HSA-NEXT: v_mov_b32_e32 v14, v7 1554; HSA-NEXT: v_mov_b32_e32 v12, v6 1555; HSA-NEXT: v_mov_b32_e32 v10, v5 1556; HSA-NEXT: v_mov_b32_e32 v8, v4 1557; HSA-NEXT: v_mov_b32_e32 v6, v3 1558; HSA-NEXT: v_mov_b32_e32 v4, v2 1559; HSA-NEXT: v_mov_b32_e32 v2, v1 1560; HSA-NEXT: v_mov_b32_e32 v1, 0 1561; HSA-NEXT: v_mov_b32_e32 v3, 0 1562; HSA-NEXT: v_mov_b32_e32 v5, 0 1563; HSA-NEXT: v_mov_b32_e32 v7, 0 1564; HSA-NEXT: v_mov_b32_e32 v18, v9 1565; HSA-NEXT: v_mov_b32_e32 v22, v11 1566; HSA-NEXT: v_mov_b32_e32 v26, v13 1567; HSA-NEXT: v_mov_b32_e32 v30, v15 1568; HSA-NEXT: v_mov_b32_e32 v9, 0 1569; HSA-NEXT: v_mov_b32_e32 v11, 0 1570; HSA-NEXT: v_mov_b32_e32 v13, 0 1571; HSA-NEXT: v_mov_b32_e32 v15, 0 1572; HSA-NEXT: v_mov_b32_e32 v17, 0 1573; HSA-NEXT: v_mov_b32_e32 v19, 0 1574; HSA-NEXT: v_mov_b32_e32 v21, 0 1575; HSA-NEXT: v_mov_b32_e32 v23, 0 1576; HSA-NEXT: v_mov_b32_e32 v25, 0 1577; HSA-NEXT: v_mov_b32_e32 v27, 0 1578; HSA-NEXT: v_mov_b32_e32 v29, 0 1579; HSA-NEXT: v_mov_b32_e32 v31, 0 1580; HSA-NEXT: s_setpc_b64 s[30:31] 1581 %cast = addrspacecast <16 x ptr addrspace(6)> %ptr to <16 x ptr> 1582 ret <16 x ptr> %cast 1583} 1584 1585declare void @llvm.amdgcn.s.barrier() #1 1586declare i32 @llvm.amdgcn.workitem.id.x() #2 1587 1588attributes #0 = { nounwind } 1589attributes #1 = { nounwind convergent } 1590attributes #2 = { nounwind readnone } 1591attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" } 1592 1593!llvm.module.flags = !{!0} 1594!0 = !{i32 1, !"amdhsa_code_object_version", i32 400} 1595