1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; FIXME: Test should be redundant with constant-address-space-32bit.ll 6 7; It's important to check with gfx8 and gfx9 to check access through global and flat. 8 9; Custom lowering needs to swap out the MMO address space 10define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) { 11; GFX6-LABEL: load_constant32bit_vgpr_offset: 12; GFX6: ; %bb.0: ; %entry 13; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 14; GFX6-NEXT: s_mov_b32 s2, 0 15; GFX6-NEXT: v_mov_b32_e32 v1, 0 16; GFX6-NEXT: s_mov_b32 s3, 0xf000 17; GFX6-NEXT: s_mov_b64 s[0:1], 0 18; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 19; GFX6-NEXT: s_waitcnt vmcnt(0) 20; GFX6-NEXT: ; return to shader part epilog 21; 22; GFX8-LABEL: load_constant32bit_vgpr_offset: 23; GFX8: ; %bb.0: ; %entry 24; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 25; GFX8-NEXT: v_mov_b32_e32 v1, 0 26; GFX8-NEXT: flat_load_dword v0, v[0:1] 27; GFX8-NEXT: s_waitcnt vmcnt(0) 28; GFX8-NEXT: ; return to shader part epilog 29; 30; GFX9-LABEL: load_constant32bit_vgpr_offset: 31; GFX9: ; %bb.0: ; %entry 32; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 33; GFX9-NEXT: v_mov_b32_e32 v1, 0 34; GFX9-NEXT: global_load_dword v0, v[0:1], off 35; GFX9-NEXT: s_waitcnt vmcnt(0) 36; GFX9-NEXT: ; return to shader part epilog 37entry: 38 %gep = getelementptr <{ [4294967295 x float] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg 39 %load = load float, ptr addrspace(6) %gep, align 4 40 ret float %load 41} 42 43define amdgpu_ps i32 @load_constant32bit_sgpr_offset(i32 inreg %arg) { 44; GCN-LABEL: load_constant32bit_sgpr_offset: 45; GCN: ; %bb.0: ; %entry 46; GCN-NEXT: s_lshl_b32 s0, s0, 2 47; GCN-NEXT: s_mov_b32 s1, 0 48; GCN-NEXT: s_load_dword s0, s[0:1], 0x0 49; GCN-NEXT: s_waitcnt lgkmcnt(0) 50; GCN-NEXT: ; return to shader part epilog 51entry: 52 %gep = getelementptr <{ [4294967295 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg 53 %load = load i32, ptr addrspace(6) %gep, align 4 54 ret i32 %load 55} 56 57; This gets split during regbankselect 58define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %arg) { 59; GFX6-LABEL: load_constant32bit_vgpr_v8f32: 60; GFX6: ; %bb.0: ; %entry 61; GFX6-NEXT: v_mov_b32_e32 v4, v0 62; GFX6-NEXT: s_mov_b32 s2, 0 63; GFX6-NEXT: v_mov_b32_e32 v5, 0 64; GFX6-NEXT: s_mov_b32 s3, 0xf000 65; GFX6-NEXT: s_mov_b64 s[0:1], 0 66; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 67; GFX6-NEXT: buffer_load_dwordx4 v[4:7], v[4:5], s[0:3], 0 addr64 offset:16 68; GFX6-NEXT: s_waitcnt vmcnt(0) 69; GFX6-NEXT: ; return to shader part epilog 70; 71; GFX8-LABEL: load_constant32bit_vgpr_v8f32: 72; GFX8: ; %bb.0: ; %entry 73; GFX8-NEXT: v_mov_b32_e32 v4, v0 74; GFX8-NEXT: v_mov_b32_e32 v5, 0 75; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[4:5] 76; GFX8-NEXT: v_add_u32_e32 v4, vcc, 16, v4 77; GFX8-NEXT: v_addc_u32_e64 v5, s[0:1], 0, 0, vcc 78; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 79; GFX8-NEXT: s_waitcnt vmcnt(0) 80; GFX8-NEXT: ; return to shader part epilog 81; 82; GFX9-LABEL: load_constant32bit_vgpr_v8f32: 83; GFX9: ; %bb.0: ; %entry 84; GFX9-NEXT: v_mov_b32_e32 v8, v0 85; GFX9-NEXT: v_mov_b32_e32 v9, 0 86; GFX9-NEXT: global_load_dwordx4 v[0:3], v[8:9], off 87; GFX9-NEXT: global_load_dwordx4 v[4:7], v[8:9], off offset:16 88; GFX9-NEXT: s_waitcnt vmcnt(0) 89; GFX9-NEXT: ; return to shader part epilog 90entry: 91 %load = load <8 x float>, ptr addrspace(6) %arg, align 32 92 ret <8 x float> %load 93} 94