xref: /llvm-project/llvm/test/CodeGen/AMDGPU/ptr-buffer-alias-scheduling.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefixes=SDAG
3; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefixes=GISEL
4
5define amdgpu_kernel void @buffers_dont_alias(ptr addrspace(8) noalias %a, ptr addrspace(8) noalias %b) {
6; SDAG-LABEL: buffers_dont_alias:
7; SDAG:       ; %bb.0:
8; SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
9; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
10; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
11; SDAG-NEXT:    s_waitcnt vmcnt(0)
12; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v0
13; SDAG-NEXT:    v_mul_f32_e32 v1, v1, v1
14; SDAG-NEXT:    v_mul_f32_e32 v2, v2, v2
15; SDAG-NEXT:    v_mul_f32_e32 v3, v3, v3
16; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[12:15], 0
17; SDAG-NEXT:    s_endpgm
18;
19; GISEL-LABEL: buffers_dont_alias:
20; GISEL:       ; %bb.0:
21; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
22; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
23; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
24; GISEL-NEXT:    s_waitcnt vmcnt(0)
25; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v0
26; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v1
27; GISEL-NEXT:    v_mul_f32_e32 v2, v2, v2
28; GISEL-NEXT:    v_mul_f32_e32 v3, v3, v3
29; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[12:15], 0
30; GISEL-NEXT:    s_endpgm
31  %l0 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 0, i32 0, i32 0)
32  %s0 = fmul float %l0, %l0
33  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s0, ptr addrspace(8) %b, i32 0, i32 0, i32 0)
34
35  %l1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 4, i32 0, i32 0)
36  %s1 = fmul float %l1, %l1
37  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s1, ptr addrspace(8) %b, i32 4, i32 0, i32 0)
38
39  %l2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 8, i32 0, i32 0)
40  %s2 = fmul float %l2, %l2
41  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s2, ptr addrspace(8) %b, i32 8, i32 0, i32 0)
42
43  %l3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 12, i32 0, i32 0)
44  %s3 = fmul float %l3, %l3
45  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s3, ptr addrspace(8) %b, i32 12, i32 0, i32 0)
46
47  ret void
48}
49
50define amdgpu_kernel void @buffers_from_flat_dont_alias(ptr noalias %a.flat, ptr noalias %b.flat) {
51; SDAG-LABEL: buffers_from_flat_dont_alias:
52; SDAG:       ; %bb.0:
53; SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
54; SDAG-NEXT:    s_mov_b32 s7, 0
55; SDAG-NEXT:    s_mov_b32 s6, 16
56; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
57; SDAG-NEXT:    s_and_b32 s5, s1, 0xffff
58; SDAG-NEXT:    s_mov_b32 s4, s0
59; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
60; SDAG-NEXT:    s_and_b32 s5, s3, 0xffff
61; SDAG-NEXT:    s_mov_b32 s4, s2
62; SDAG-NEXT:    s_waitcnt vmcnt(0)
63; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v0
64; SDAG-NEXT:    v_mul_f32_e32 v1, v1, v1
65; SDAG-NEXT:    v_mul_f32_e32 v2, v2, v2
66; SDAG-NEXT:    v_mul_f32_e32 v3, v3, v3
67; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
68; SDAG-NEXT:    s_endpgm
69;
70; GISEL-LABEL: buffers_from_flat_dont_alias:
71; GISEL:       ; %bb.0:
72; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
73; GISEL-NEXT:    s_mov_b32 s7, 0
74; GISEL-NEXT:    s_mov_b32 s6, 16
75; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
76; GISEL-NEXT:    s_and_b32 s5, s1, 0xffff
77; GISEL-NEXT:    s_mov_b32 s4, s0
78; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
79; GISEL-NEXT:    s_and_b32 s5, s3, 0xffff
80; GISEL-NEXT:    s_mov_b32 s4, s2
81; GISEL-NEXT:    s_waitcnt vmcnt(0)
82; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v0
83; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v1
84; GISEL-NEXT:    v_mul_f32_e32 v2, v2, v2
85; GISEL-NEXT:    v_mul_f32_e32 v3, v3, v3
86; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
87; GISEL-NEXT:    s_endpgm
88  %a = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %a.flat, i16 0, i32 16, i32 0)
89  %b = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %b.flat, i16 0, i32 16, i32 0)
90
91  %l0 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 0, i32 0, i32 0)
92  %s0 = fmul float %l0, %l0
93  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s0, ptr addrspace(8) %b, i32 0, i32 0, i32 0)
94
95  %l1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 4, i32 0, i32 0)
96  %s1 = fmul float %l1, %l1
97  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s1, ptr addrspace(8) %b, i32 4, i32 0, i32 0)
98
99  %l2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 8, i32 0, i32 0)
100  %s2 = fmul float %l2, %l2
101  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s2, ptr addrspace(8) %b, i32 8, i32 0, i32 0)
102
103  %l3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 12, i32 0, i32 0)
104  %s3 = fmul float %l3, %l3
105  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s3, ptr addrspace(8) %b, i32 12, i32 0, i32 0)
106
107  ret void
108}
109
110define amdgpu_kernel void @buffers_might_alias(ptr addrspace(8) %a, ptr addrspace(8) %b) {
111; SDAG-LABEL: buffers_might_alias:
112; SDAG:       ; %bb.0:
113; SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
114; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
115; SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
116; SDAG-NEXT:    s_waitcnt vmcnt(0)
117; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v0
118; SDAG-NEXT:    buffer_store_dword v0, off, s[12:15], 0
119; SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:4
120; SDAG-NEXT:    s_waitcnt vmcnt(0)
121; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v0
122; SDAG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:4
123; SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:8
124; SDAG-NEXT:    s_waitcnt vmcnt(0)
125; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v0
126; SDAG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:8
127; SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:12
128; SDAG-NEXT:    s_waitcnt vmcnt(0)
129; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v0
130; SDAG-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:12
131; SDAG-NEXT:    s_endpgm
132;
133; GISEL-LABEL: buffers_might_alias:
134; GISEL:       ; %bb.0:
135; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
136; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
137; GISEL-NEXT:    buffer_load_dword v0, off, s[8:11], 0
138; GISEL-NEXT:    s_waitcnt vmcnt(0)
139; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v0
140; GISEL-NEXT:    buffer_store_dword v0, off, s[12:15], 0
141; GISEL-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:4
142; GISEL-NEXT:    s_waitcnt vmcnt(0)
143; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v0
144; GISEL-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:4
145; GISEL-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:8
146; GISEL-NEXT:    s_waitcnt vmcnt(0)
147; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v0
148; GISEL-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:8
149; GISEL-NEXT:    buffer_load_dword v0, off, s[8:11], 0 offset:12
150; GISEL-NEXT:    s_waitcnt vmcnt(0)
151; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v0
152; GISEL-NEXT:    buffer_store_dword v0, off, s[12:15], 0 offset:12
153; GISEL-NEXT:    s_endpgm
154  %l0 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 0, i32 0, i32 0)
155  %s0 = fmul float %l0, %l0
156  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s0, ptr addrspace(8) %b, i32 0, i32 0, i32 0)
157
158  %l1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 4, i32 0, i32 0)
159  %s1 = fmul float %l1, %l1
160  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s1, ptr addrspace(8) %b, i32 4, i32 0, i32 0)
161
162  %l2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 8, i32 0, i32 0)
163  %s2 = fmul float %l2, %l2
164  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s2, ptr addrspace(8) %b, i32 8, i32 0, i32 0)
165
166  %l3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 12, i32 0, i32 0)
167  %s3 = fmul float %l3, %l3
168  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %s3, ptr addrspace(8) %b, i32 12, i32 0, i32 0)
169
170  ret void
171}
172
173define amdgpu_kernel void @independent_offsets(ptr addrspace(8) %a) {
174; SDAG-LABEL: independent_offsets:
175; SDAG:       ; %bb.0:
176; SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
177; SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
178; SDAG-NEXT:    v_mov_b32_e32 v2, 1.0
179; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
180; SDAG-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:4
181; SDAG-NEXT:    s_nop 0
182; SDAG-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
183; SDAG-NEXT:    s_waitcnt vmcnt(1)
184; SDAG-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
185; SDAG-NEXT:    s_endpgm
186;
187; GISEL-LABEL: independent_offsets:
188; GISEL:       ; %bb.0:
189; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
190; GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
191; GISEL-NEXT:    v_mov_b32_e32 v2, 1.0
192; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
193; GISEL-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:4
194; GISEL-NEXT:    s_nop 0
195; GISEL-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
196; GISEL-NEXT:    s_waitcnt vmcnt(1)
197; GISEL-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
198; GISEL-NEXT:    s_endpgm
199  %lane = call i32 @llvm.amdgcn.workitem.id.x()
200  %idx = shl i32 %lane, 2
201  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float 1.0, ptr addrspace(8) %a, i32 %idx, i32 0, i32 0)
202  %idx.1 = add i32 %idx, 4
203  %idx.2 = add i32 %idx, 8
204  %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %a, i32 %idx.1, i32 0, i32 0)
205  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %a, i32 %idx.2, i32 0, i32 0)
206
207  ret void
208}
209
210declare i32 @llvm.amdgcn.workitem.id.x()
211
212declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32)
213declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
214declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr readnone nocapture, i16, i32, i32)
215