xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
5
6; GCN-LABEL: {{^}}lds_param_load:
7; GCN: s_mov_b32 m0
8; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x
9; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y
10; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z
11; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w
12; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x
13; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.x
14; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.y
15; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.z
16; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr0.w
17; GFX12-DAG: ds_param_load v{{[0-9]+}}, attr1.x
18; GFX11-DAG: s_waitcnt expcnt(4)
19; GCN: v_add_f32
20; GCN: buffer_store_b32
21; GFX11-DAG: s_waitcnt expcnt(3)
22; GFX12-DAG: s_wait_expcnt 0x3
23; GCN: buffer_store_b32
24; GFX11-DAG: s_waitcnt expcnt(2)
25; GFX12-DAG: s_wait_expcnt 0x2
26; GCN: buffer_store_b32
27; GFX11-DAG: s_waitcnt expcnt(1)
28; GFX12-DAG: s_wait_expcnt 0x1
29; GCN: buffer_store_b32
30; GFX11-DAG: s_waitcnt expcnt(0)
31; GFX12-DAG: s_wait_expcnt 0x0
32; GCN: buffer_store_b32
33; GCN: buffer_store_b32
34define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
35main_body:
36  %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)
37  ; Ensure memory clustering is occuring for lds_param_load
38  %p5 = fadd float %p0, 1.0
39  %p1 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %arg)
40  %p2 = call float @llvm.amdgcn.lds.param.load(i32 2, i32 0, i32 %arg)
41  %p3 = call float @llvm.amdgcn.lds.param.load(i32 3, i32 0, i32 %arg)
42  %p4 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %arg)
43  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p5, ptr addrspace(8) %buf, i32 4, i32 0, i32 0)
44  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p1, ptr addrspace(8) %buf, i32 4, i32 1, i32 0)
45  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p2, ptr addrspace(8) %buf, i32 4, i32 2, i32 0)
46  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p3, ptr addrspace(8) %buf, i32 4, i32 3, i32 0)
47  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p4, ptr addrspace(8) %buf, i32 4, i32 4, i32 0)
48  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p0, ptr addrspace(8) %buf, i32 4, i32 5, i32 0)
49  ret void
50}
51
52declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
53declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
54
55attributes #0 = { nounwind }
56attributes #1 = { nounwind readnone }
57