xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.atomic.buffer.load.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=CHECK
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=CHECK
4
5define amdgpu_kernel void @struct_atomic_buffer_load_i32(<4 x i32> %addr, i32 %index) {
6; CHECK-LABEL: struct_atomic_buffer_load_i32:
7; CHECK:       ; %bb.0: ; %bb
8; CHECK-NEXT:    s_clause 0x1
9; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
10; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
11; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
12; CHECK-NEXT:    s_mov_b32 s4, 0
13; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
14; CHECK-NEXT:    v_mov_b32_e32 v1, s6
15; CHECK-NEXT:  .LBB0_1: ; %bb1
16; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
17; CHECK-NEXT:    buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
18; CHECK-NEXT:    s_waitcnt vmcnt(0)
19; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
20; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
21; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
22; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
23; CHECK-NEXT:    s_cbranch_execnz .LBB0_1
24; CHECK-NEXT:  ; %bb.2: ; %bb2
25; CHECK-NEXT:    s_endpgm
26bb:
27  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
28  br label %bb1
29bb1:
30  %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1)
31  %cmp = icmp eq i32 %load, %id
32  br i1 %cmp, label %bb1, label %bb2
33bb2:
34  ret void
35}
36
37define amdgpu_kernel void @struct_atomic_buffer_load_i32_const_idx(<4 x i32> %addr) {
38; CHECK-LABEL: struct_atomic_buffer_load_i32_const_idx:
39; CHECK:       ; %bb.0: ; %bb
40; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
41; CHECK-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
42; CHECK-NEXT:    s_mov_b32 s4, 0
43; CHECK-NEXT:  .LBB1_1: ; %bb1
44; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
45; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
46; CHECK-NEXT:    buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
47; CHECK-NEXT:    s_waitcnt vmcnt(0)
48; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
49; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
50; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
51; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
52; CHECK-NEXT:    s_cbranch_execnz .LBB1_1
53; CHECK-NEXT:  ; %bb.2: ; %bb2
54; CHECK-NEXT:    s_endpgm
55bb:
56  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
57  br label %bb1
58bb1:
59  %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 15, i32 0, i32 0, i32 1)
60  %cmp = icmp eq i32 %load, %id
61  br i1 %cmp, label %bb1, label %bb2
62bb2:
63  ret void
64}
65
66define amdgpu_kernel void @struct_atomic_buffer_load_i32_off(<4 x i32> %addr, i32 %index) {
67; CHECK-LABEL: struct_atomic_buffer_load_i32_off:
68; CHECK:       ; %bb.0: ; %bb
69; CHECK-NEXT:    s_clause 0x1
70; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
71; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
72; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
73; CHECK-NEXT:    s_mov_b32 s4, 0
74; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
75; CHECK-NEXT:    v_mov_b32_e32 v1, s6
76; CHECK-NEXT:  .LBB2_1: ; %bb1
77; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
78; CHECK-NEXT:    buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
79; CHECK-NEXT:    s_waitcnt vmcnt(0)
80; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
81; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
82; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
83; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
84; CHECK-NEXT:    s_cbranch_execnz .LBB2_1
85; CHECK-NEXT:  ; %bb.2: ; %bb2
86; CHECK-NEXT:    s_endpgm
87bb:
88  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
89  br label %bb1
90bb1:
91  %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1)
92  %cmp = icmp eq i32 %load, %id
93  br i1 %cmp, label %bb1, label %bb2
94bb2:
95  ret void
96}
97
98define amdgpu_kernel void @struct_atomic_buffer_load_i32_soff(<4 x i32> %addr, i32 %index) {
99; CHECK-LABEL: struct_atomic_buffer_load_i32_soff:
100; CHECK:       ; %bb.0: ; %bb
101; CHECK-NEXT:    s_clause 0x1
102; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
103; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
104; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
105; CHECK-NEXT:    s_mov_b32 s4, 0
106; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
107; CHECK-NEXT:    v_mov_b32_e32 v1, s6
108; CHECK-NEXT:  .LBB3_1: ; %bb1
109; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
110; CHECK-NEXT:    buffer_load_b32 v2, v1, s[0:3], 4 idxen offset:4 glc
111; CHECK-NEXT:    s_waitcnt vmcnt(0)
112; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
113; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
114; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
115; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
116; CHECK-NEXT:    s_cbranch_execnz .LBB3_1
117; CHECK-NEXT:  ; %bb.2: ; %bb2
118; CHECK-NEXT:    s_endpgm
119bb:
120  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
121  br label %bb1
122bb1:
123  %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 4, i32 1)
124  %cmp = icmp eq i32 %load, %id
125  br i1 %cmp, label %bb1, label %bb2
126bb2:
127  ret void
128}
129define amdgpu_kernel void @struct_atomic_buffer_load_i32_dlc(<4 x i32> %addr, i32 %index) {
130; CHECK-LABEL: struct_atomic_buffer_load_i32_dlc:
131; CHECK:       ; %bb.0: ; %bb
132; CHECK-NEXT:    s_clause 0x1
133; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
134; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
135; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
136; CHECK-NEXT:    s_mov_b32 s4, 0
137; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
138; CHECK-NEXT:    v_mov_b32_e32 v1, s6
139; CHECK-NEXT:  .LBB4_1: ; %bb1
140; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
141; CHECK-NEXT:    buffer_load_b32 v2, v1, s[0:3], 0 idxen offset:4 dlc
142; CHECK-NEXT:    s_waitcnt vmcnt(0)
143; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
144; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
145; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
146; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
147; CHECK-NEXT:    s_cbranch_execnz .LBB4_1
148; CHECK-NEXT:  ; %bb.2: ; %bb2
149; CHECK-NEXT:    s_endpgm
150bb:
151  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
152  br label %bb1
153bb1:
154  %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 4)
155  %cmp = icmp eq i32 %load, %id
156  br i1 %cmp, label %bb1, label %bb2
157bb2:
158  ret void
159}
160
161define amdgpu_kernel void @struct_nonatomic_buffer_load_i32(<4 x i32> %addr, i32 %index) {
162; CHECK-LABEL: struct_nonatomic_buffer_load_i32:
163; CHECK:       ; %bb.0: ; %bb
164; CHECK-NEXT:    s_clause 0x1
165; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
166; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
167; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
168; CHECK-NEXT:    v_dual_mov_b32 v1, s6 :: v_dual_and_b32 v0, 0x3ff, v0
169; CHECK-NEXT:    buffer_load_b32 v1, v1, s[0:3], 0 idxen offset:4 glc
170; CHECK-NEXT:    s_mov_b32 s0, 0
171; CHECK-NEXT:    s_waitcnt vmcnt(0)
172; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v1, v0
173; CHECK-NEXT:  .LBB5_1: ; %bb1
174; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
175; CHECK-NEXT:    s_and_b32 s1, exec_lo, vcc_lo
176; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
177; CHECK-NEXT:    s_or_b32 s0, s1, s0
178; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
179; CHECK-NEXT:    s_cbranch_execnz .LBB5_1
180; CHECK-NEXT:  ; %bb.2: ; %bb2
181; CHECK-NEXT:    s_endpgm
182bb:
183  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
184  br label %bb1
185bb1:
186  %load = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
187  %cmp = icmp eq i32 %load, %id
188  br i1 %cmp, label %bb1, label %bb2
189bb2:
190  ret void
191}
192
193define amdgpu_kernel void @struct_atomic_buffer_load_i64(<4 x i32> %addr, i32 %index) {
194; CHECK-LABEL: struct_atomic_buffer_load_i64:
195; CHECK:       ; %bb.0: ; %bb
196; CHECK-NEXT:    s_clause 0x1
197; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
198; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
199; CHECK-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
200; CHECK-NEXT:    s_mov_b32 s4, 0
201; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
202; CHECK-NEXT:    v_mov_b32_e32 v2, s6
203; CHECK-NEXT:  .LBB6_1: ; %bb1
204; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
205; CHECK-NEXT:    buffer_load_b64 v[3:4], v2, s[0:3], 0 idxen offset:4 glc
206; CHECK-NEXT:    s_waitcnt vmcnt(0)
207; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc_lo, v[3:4], v[0:1]
208; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
209; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
210; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
211; CHECK-NEXT:    s_cbranch_execnz .LBB6_1
212; CHECK-NEXT:  ; %bb.2: ; %bb2
213; CHECK-NEXT:    s_endpgm
214bb:
215  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
216  %id.zext = zext i32 %id to i64
217  br label %bb1
218bb1:
219  %load = call i64 @llvm.amdgcn.struct.atomic.buffer.load.i64(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
220  %cmp = icmp eq i64 %load, %id.zext
221  br i1 %cmp, label %bb1, label %bb2
222bb2:
223  ret void
224}
225
226define amdgpu_kernel void @struct_atomic_buffer_load_v2i16(<4 x i32> %addr, i32 %index) {
227; CHECK-LABEL: struct_atomic_buffer_load_v2i16:
228; CHECK:       ; %bb.0: ; %bb
229; CHECK-NEXT:    s_clause 0x1
230; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
231; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
232; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
233; CHECK-NEXT:    s_mov_b32 s4, 0
234; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
235; CHECK-NEXT:    v_mov_b32_e32 v1, s6
236; CHECK-NEXT:  .LBB7_1: ; %bb1
237; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
238; CHECK-NEXT:    buffer_load_b32 v2, v1, s[0:3], 0 idxen glc
239; CHECK-NEXT:    s_waitcnt vmcnt(0)
240; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
241; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
242; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
243; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
244; CHECK-NEXT:    s_cbranch_execnz .LBB7_1
245; CHECK-NEXT:  ; %bb.2: ; %bb2
246; CHECK-NEXT:    s_endpgm
247bb:
248  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
249  br label %bb1
250bb1:
251  %load = call <2 x i16> @llvm.amdgcn.struct.atomic.buffer.load.v2i16(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1)
252  %bitcast = bitcast <2 x i16> %load to i32
253  %cmp = icmp eq i32 %bitcast, %id
254  br i1 %cmp, label %bb1, label %bb2
255bb2:
256  ret void
257}
258
259define amdgpu_kernel void @struct_atomic_buffer_load_v4i16(<4 x i32> %addr, i32 %index) {
260; CHECK-LABEL: struct_atomic_buffer_load_v4i16:
261; CHECK:       ; %bb.0: ; %bb
262; CHECK-NEXT:    s_clause 0x1
263; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
264; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
265; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
266; CHECK-NEXT:    s_mov_b32 s4, 0
267; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
268; CHECK-NEXT:    v_mov_b32_e32 v1, s6
269; CHECK-NEXT:  .LBB8_1: ; %bb1
270; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
271; CHECK-NEXT:    buffer_load_b64 v[2:3], v1, s[0:3], 0 idxen offset:4 glc
272; CHECK-NEXT:    s_waitcnt vmcnt(0)
273; CHECK-NEXT:    v_and_b32_e32 v2, 0xffff, v2
274; CHECK-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
275; CHECK-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
276; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
277; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
278; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
279; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
280; CHECK-NEXT:    s_cbranch_execnz .LBB8_1
281; CHECK-NEXT:  ; %bb.2: ; %bb2
282; CHECK-NEXT:    s_endpgm
283bb:
284  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
285  br label %bb1
286bb1:
287  %load = call <4 x i16> @llvm.amdgcn.struct.atomic.buffer.load.v4i16(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
288  %shortened = shufflevector <4 x i16> %load, <4 x i16> poison, <2 x i32> <i32 0, i32 2>
289  %bitcast = bitcast <2 x i16> %shortened to i32
290  %cmp = icmp eq i32 %bitcast, %id
291  br i1 %cmp, label %bb1, label %bb2
292bb2:
293  ret void
294}
295
296define amdgpu_kernel void @struct_atomic_buffer_load_v4i32(<4 x i32> %addr, i32 %index) {
297; CHECK-LABEL: struct_atomic_buffer_load_v4i32:
298; CHECK:       ; %bb.0: ; %bb
299; CHECK-NEXT:    s_clause 0x1
300; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
301; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
302; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
303; CHECK-NEXT:    s_mov_b32 s4, 0
304; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
305; CHECK-NEXT:    v_mov_b32_e32 v1, s6
306; CHECK-NEXT:  .LBB9_1: ; %bb1
307; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
308; CHECK-NEXT:    buffer_load_b128 v[2:5], v1, s[0:3], 0 idxen offset:4 glc
309; CHECK-NEXT:    s_waitcnt vmcnt(0)
310; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v5, v0
311; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
312; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
313; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
314; CHECK-NEXT:    s_cbranch_execnz .LBB9_1
315; CHECK-NEXT:  ; %bb.2: ; %bb2
316; CHECK-NEXT:    s_endpgm
317bb:
318  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
319  br label %bb1
320bb1:
321  %load = call <4 x i32> @llvm.amdgcn.struct.atomic.buffer.load.v4i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
322  %extracted = extractelement <4 x i32> %load, i32 3
323  %cmp = icmp eq i32 %extracted, %id
324  br i1 %cmp, label %bb1, label %bb2
325bb2:
326  ret void
327}
328
329define amdgpu_kernel void @struct_atomic_buffer_load_ptr(<4 x i32> %addr, i32 %index) {
330; CHECK-LABEL: struct_atomic_buffer_load_ptr:
331; CHECK:       ; %bb.0: ; %bb
332; CHECK-NEXT:    s_clause 0x1
333; CHECK-NEXT:    s_load_b32 s6, s[4:5], 0x34
334; CHECK-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
335; CHECK-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
336; CHECK-NEXT:    s_mov_b32 s4, 0
337; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
338; CHECK-NEXT:    v_mov_b32_e32 v1, s6
339; CHECK-NEXT:  .LBB10_1: ; %bb1
340; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
341; CHECK-NEXT:    buffer_load_b64 v[2:3], v1, s[0:3], 0 idxen offset:4 glc
342; CHECK-NEXT:    s_waitcnt vmcnt(0)
343; CHECK-NEXT:    flat_load_b32 v2, v[2:3]
344; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
345; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v0
346; CHECK-NEXT:    s_or_b32 s4, vcc_lo, s4
347; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
348; CHECK-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
349; CHECK-NEXT:    s_cbranch_execnz .LBB10_1
350; CHECK-NEXT:  ; %bb.2: ; %bb2
351; CHECK-NEXT:    s_endpgm
352bb:
353  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
354  br label %bb1
355bb1:
356  %load = call ptr @llvm.amdgcn.struct.atomic.buffer.load.ptr(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1)
357  %elem = load i32, ptr %load
358  %cmp = icmp eq i32 %elem, %id
359  br i1 %cmp, label %bb1, label %bb2
360bb2:
361  ret void
362}
363
364declare i32 @llvm.amdgcn.struct.atom.buffer.load.i32(<4 x i32>, i32, i32, i32, i32 immarg)
365declare i64 @llvm.amdgcn.struct.atom.buffer.load.i64(<4 x i32>, i32, i32, i32, i32 immarg)
366declare <2 x i16> @llvm.amdgcn.struct.atom.buffer.load.v2i16(<4 x i32>, i32, i32, i32, i32 immarg)
367declare <4 x i16> @llvm.amdgcn.struct.atom.buffer.load.v4i16(<4 x i32>, i32, i32, i32, i32 immarg)
368declare <4 x i32> @llvm.amdgcn.struct.atom.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32 immarg)
369declare ptr @llvm.amdgcn.struct.atom.buffer.load.ptr(<4 x i32>, i32, i32, i32, i32 immarg)
370declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32 immarg)
371declare i32 @llvm.amdgcn.workitem.id.x()
372