xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll (revision 927032807dfdca5d94eb0a8707d38b605e95e407)
1*92703280SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*92703280SMatt Arsenault; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
3*92703280SMatt Arsenault; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
4*92703280SMatt Arsenault; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
5*92703280SMatt Arsenault; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
6*92703280SMatt Arsenault
7*92703280SMatt Arsenault; FIXME: Not a great error
8*92703280SMatt Arsenault; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand!
9*92703280SMatt Arsenault; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.ptr.buffer.load.lds),
10*92703280SMatt Arsenault
11*92703280SMatt Arsenaultdeclare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
12*92703280SMatt Arsenault
13*92703280SMatt Arsenault;---------------------------------------------------------------------y
14*92703280SMatt Arsenault; dwordx3
15*92703280SMatt Arsenault;---------------------------------------------------------------------
16*92703280SMatt Arsenault
17*92703280SMatt Arsenaultdefine amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
18*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3:
19*92703280SMatt Arsenault; GFX950:       ; %bb.0: ; %main_body
20*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
21*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
22*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dword off, s[0:3], 0 lds
23*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds
24*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dword off, s[0:3], 0 offset:8 nt lds
25*92703280SMatt Arsenault; GFX950-NEXT:    v_mov_b32_e32 v0, s4
26*92703280SMatt Arsenault; GFX950-NEXT:    s_waitcnt vmcnt(0)
27*92703280SMatt Arsenault; GFX950-NEXT:    ds_read_b32 v0, v0
28*92703280SMatt Arsenault; GFX950-NEXT:    s_waitcnt lgkmcnt(0)
29*92703280SMatt Arsenault; GFX950-NEXT:    ; return to shader part epilog
30*92703280SMatt Arsenaultmain_body:
31*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
32*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
33*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
34*92703280SMatt Arsenault  %res = load float, ptr addrspace(3) %lds
35*92703280SMatt Arsenault  ret float %res
36*92703280SMatt Arsenault}
37*92703280SMatt Arsenault
38*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
39*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_imm_voffset:
40*92703280SMatt Arsenault; GFX950:       ; %bb.0:
41*92703280SMatt Arsenault; GFX950-NEXT:    v_mov_b32_e32 v0, 0x800
42*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
43*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
44*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx3 v0, s[0:3], 0 offen lds
45*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
46*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 2048, i32 0, i32 0, i32 0)
47*92703280SMatt Arsenault  ret void
48*92703280SMatt Arsenault}
49*92703280SMatt Arsenault
50*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) {
51*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset:
52*92703280SMatt Arsenault; GFX950:       ; %bb.0:
53*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
54*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
55*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx3 v0, s[0:3], 0 offen lds
56*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
57*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 0, i32 0, i32 0)
58*92703280SMatt Arsenault  ret void
59*92703280SMatt Arsenault}
60*92703280SMatt Arsenault
61*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) {
62*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset:
63*92703280SMatt Arsenault; GFX950:       ; %bb.0:
64*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
65*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
66*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx3 off, s[0:3], s5 lds
67*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
68*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 0, i32 %soffset, i32 0, i32 0)
69*92703280SMatt Arsenault  ret void
70*92703280SMatt Arsenault}
71*92703280SMatt Arsenault
72*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
73*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset:
74*92703280SMatt Arsenault; GFX950:       ; %bb.0:
75*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
76*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
77*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx3 v0, s[0:3], s5 offen lds
78*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
79*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 0, i32 0)
80*92703280SMatt Arsenault  ret void
81*92703280SMatt Arsenault}
82*92703280SMatt Arsenault
83*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
84*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset:
85*92703280SMatt Arsenault; GFX950:       ; %bb.0:
86*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
87*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
88*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx3 v0, s[0:3], s5 offen offset:2048 lds
89*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
90*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 2048, i32 0)
91*92703280SMatt Arsenault  ret void
92*92703280SMatt Arsenault}
93*92703280SMatt Arsenault
94*92703280SMatt Arsenault;---------------------------------------------------------------------y
95*92703280SMatt Arsenault; dwordx4
96*92703280SMatt Arsenault;---------------------------------------------------------------------
97*92703280SMatt Arsenault
98*92703280SMatt Arsenaultdefine amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
99*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4:
100*92703280SMatt Arsenault; GFX950:       ; %bb.0: ; %main_body
101*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
102*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
103*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dword off, s[0:3], 0 lds
104*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds
105*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dword off, s[0:3], 0 offset:8 nt lds
106*92703280SMatt Arsenault; GFX950-NEXT:    v_mov_b32_e32 v0, s4
107*92703280SMatt Arsenault; GFX950-NEXT:    s_waitcnt vmcnt(0)
108*92703280SMatt Arsenault; GFX950-NEXT:    ds_read_b32 v0, v0
109*92703280SMatt Arsenault; GFX950-NEXT:    s_waitcnt lgkmcnt(0)
110*92703280SMatt Arsenault; GFX950-NEXT:    ; return to shader part epilog
111*92703280SMatt Arsenaultmain_body:
112*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
113*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
114*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
115*92703280SMatt Arsenault  %res = load float, ptr addrspace(3) %lds
116*92703280SMatt Arsenault  ret float %res
117*92703280SMatt Arsenault}
118*92703280SMatt Arsenault
119*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
120*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_imm_voffset:
121*92703280SMatt Arsenault; GFX950:       ; %bb.0:
122*92703280SMatt Arsenault; GFX950-NEXT:    v_mov_b32_e32 v0, 0x800
123*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
124*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
125*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx4 v0, s[0:3], 0 offen lds
126*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
127*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 2048, i32 0, i32 0, i32 0)
128*92703280SMatt Arsenault  ret void
129*92703280SMatt Arsenault}
130*92703280SMatt Arsenault
131*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) {
132*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset:
133*92703280SMatt Arsenault; GFX950:       ; %bb.0:
134*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
135*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
136*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx4 v0, s[0:3], 0 offen lds
137*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
138*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 0, i32 0, i32 0)
139*92703280SMatt Arsenault  ret void
140*92703280SMatt Arsenault}
141*92703280SMatt Arsenault
142*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) {
143*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset:
144*92703280SMatt Arsenault; GFX950:       ; %bb.0:
145*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
146*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
147*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx4 off, s[0:3], s5 lds
148*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
149*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 0, i32 %soffset, i32 0, i32 0)
150*92703280SMatt Arsenault  ret void
151*92703280SMatt Arsenault}
152*92703280SMatt Arsenault
153*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
154*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset:
155*92703280SMatt Arsenault; GFX950:       ; %bb.0:
156*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
157*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
158*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx4 v0, s[0:3], s5 offen lds
159*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
160*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 0, i32 0)
161*92703280SMatt Arsenault  ret void
162*92703280SMatt Arsenault}
163*92703280SMatt Arsenault
164*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) {
165*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset:
166*92703280SMatt Arsenault; GFX950:       ; %bb.0:
167*92703280SMatt Arsenault; GFX950-NEXT:    s_mov_b32 m0, s4
168*92703280SMatt Arsenault; GFX950-NEXT:    s_nop 0
169*92703280SMatt Arsenault; GFX950-NEXT:    buffer_load_dwordx4 v0, s[0:3], s5 offen offset:2048 lds
170*92703280SMatt Arsenault; GFX950-NEXT:    s_endpgm
171*92703280SMatt Arsenault  call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 2048, i32 0)
172*92703280SMatt Arsenault  ret void
173*92703280SMatt Arsenault}
174*92703280SMatt Arsenault;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
175*92703280SMatt Arsenault; GFX950-GISEL: {{.*}}
176*92703280SMatt Arsenault; GFX950-SDAG: {{.*}}
177