xref: /llvm-project/llvm/test/CodeGen/AMDGPU/gds-allocation.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4
5@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
6@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
7@lds1 = internal addrspace(3) global [4 x i32] undef, align 256
8
9; These two objects should be allocated at the same constant offsets
10; from the base.
11define amdgpu_kernel void @alloc_lds_gds(ptr addrspace(1) %out) #1 {
12; GCN-LABEL: alloc_lds_gds:
13; GCN:       ; %bb.0:
14; GCN-NEXT:    v_mov_b32_e32 v0, 5
15; GCN-NEXT:    v_mov_b32_e32 v1, 0
16; GCN-NEXT:    s_mov_b32 m0, 16
17; GCN-NEXT:    s_nop 0
18; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
19; GCN-NEXT:    s_waitcnt lgkmcnt(0)
20; GCN-NEXT:    buffer_wbinvl1
21; GCN-NEXT:    ds_add_u32 v1, v0 offset:12
22; GCN-NEXT:    s_waitcnt lgkmcnt(0)
23; GCN-NEXT:    s_endpgm
24  %gep.gds = getelementptr [4 x i32], ptr addrspace(2) @gds0, i32 0, i32 3
25  %val0 = atomicrmw add ptr addrspace(2) %gep.gds, i32 5 acq_rel
26  %gep.lds = getelementptr [4 x i32], ptr addrspace(3) @lds0, i32 0, i32 3
27  %val1 = atomicrmw add ptr addrspace(3) %gep.lds, i32 5 acq_rel
28  ret void
29}
30
31; The LDS alignment shouldn't change offset of GDS.
32define amdgpu_kernel void @alloc_lds_gds_align(ptr addrspace(1) %out) #1 {
33; GCN-LABEL: alloc_lds_gds_align:
34; GCN:       ; %bb.0:
35; GCN-NEXT:    v_mov_b32_e32 v0, 5
36; GCN-NEXT:    v_mov_b32_e32 v1, 0
37; GCN-NEXT:    s_mov_b32 m0, 16
38; GCN-NEXT:    s_nop 0
39; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
40; GCN-NEXT:    s_waitcnt lgkmcnt(0)
41; GCN-NEXT:    buffer_wbinvl1
42; GCN-NEXT:    ds_add_u32 v1, v0 offset:140
43; GCN-NEXT:    s_waitcnt lgkmcnt(0)
44; GCN-NEXT:    ds_add_u32 v1, v0 offset:12
45; GCN-NEXT:    s_waitcnt lgkmcnt(0)
46; GCN-NEXT:    s_endpgm
47  %gep.gds = getelementptr [4 x i32], ptr addrspace(2) @gds0, i32 0, i32 3
48  %val0 = atomicrmw add ptr addrspace(2) %gep.gds, i32 5 acq_rel
49
50  %gep.lds0 = getelementptr [4 x i32], ptr addrspace(3) @lds0, i32 0, i32 3
51  %val1 = atomicrmw add ptr addrspace(3) %gep.lds0, i32 5 acq_rel
52
53  %gep.lds1 = getelementptr [4 x i32], ptr addrspace(3) @lds1, i32 0, i32 3
54  %val2 = atomicrmw add ptr addrspace(3) %gep.lds1, i32 5 acq_rel
55  ret void
56}
57
58@gds_align8 = internal addrspace(2) global [4 x i32] undef, align 8
59@gds_align32 = internal addrspace(2) global [4 x i32] undef, align 32
60
61define amdgpu_kernel void @gds_global_align(ptr addrspace(1) %out) {
62; GCN-LABEL: gds_global_align:
63; GCN:       ; %bb.0:
64; GCN-NEXT:    v_mov_b32_e32 v0, 5
65; GCN-NEXT:    v_mov_b32_e32 v1, 0
66; GCN-NEXT:    s_mov_b32 m0, 32
67; GCN-NEXT:    s_nop 0
68; GCN-NEXT:    ds_add_u32 v1, v0 offset:28 gds
69; GCN-NEXT:    s_waitcnt lgkmcnt(0)
70; GCN-NEXT:    buffer_wbinvl1
71; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
72; GCN-NEXT:    s_waitcnt lgkmcnt(0)
73; GCN-NEXT:    buffer_wbinvl1
74; GCN-NEXT:    s_endpgm
75  %gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) @gds_align8, i32 0, i32 3
76  %val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel
77  %gep.gds1 = getelementptr [4 x i32], ptr addrspace(2) @gds_align32, i32 0, i32 3
78  %val1 = atomicrmw add ptr addrspace(2) %gep.gds1, i32 5 acq_rel
79  ret void
80}
81
82define amdgpu_kernel void @gds_global_align_plus_attr(ptr addrspace(1) %out) #0 {
83; GCN-LABEL: gds_global_align_plus_attr:
84; GCN:       ; %bb.0:
85; GCN-NEXT:    v_mov_b32_e32 v0, 5
86; GCN-NEXT:    v_mov_b32_e32 v1, 0
87; GCN-NEXT:    s_movk_i32 m0, 0x420
88; GCN-NEXT:    s_nop 0
89; GCN-NEXT:    ds_add_u32 v1, v0 offset:1052 gds
90; GCN-NEXT:    s_waitcnt lgkmcnt(0)
91; GCN-NEXT:    buffer_wbinvl1
92; GCN-NEXT:    ds_add_u32 v1, v0 offset:1036 gds
93; GCN-NEXT:    s_waitcnt lgkmcnt(0)
94; GCN-NEXT:    buffer_wbinvl1
95; GCN-NEXT:    s_endpgm
96  %gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) @gds_align8, i32 0, i32 3
97  %val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel
98  %gep.gds1 = getelementptr [4 x i32], ptr addrspace(2) @gds_align32, i32 0, i32 3
99  %val1 = atomicrmw add ptr addrspace(2) %gep.gds1, i32 5 acq_rel
100  ret void
101}
102
103@small.gds = internal addrspace(2) global i8 undef, align 1
104@gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4
105
106define amdgpu_kernel void @gds_extern_align(ptr addrspace(1) %out, ptr addrspace(2) %gds.arg) #0 {
107; GCN-LABEL: gds_extern_align:
108; GCN:       ; %bb.0:
109; GCN-NEXT:    s_load_dword s0, s[4:5], 0x8
110; GCN-NEXT:    v_mov_b32_e32 v0, 5
111; GCN-NEXT:    s_movk_i32 m0, 0x401
112; GCN-NEXT:    s_movk_i32 s1, 0x400
113; GCN-NEXT:    ;;#ASMSTART
114; GCN-NEXT:    ; use s1
115; GCN-NEXT:    ;;#ASMEND
116; GCN-NEXT:    s_waitcnt lgkmcnt(0)
117; GCN-NEXT:    v_mov_b32_e32 v1, s0
118; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
119; GCN-NEXT:    s_waitcnt lgkmcnt(0)
120; GCN-NEXT:    buffer_wbinvl1
121; GCN-NEXT:    s_endpgm
122  call void asm sideeffect "; use $0","s"(ptr addrspace(2) @small.gds)
123  %gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) %gds.arg, i32 0, i32 3
124  %val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel
125  ret void
126}
127
128attributes #0 = { "amdgpu-gds-size"="1024" }
129