xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll (revision 3277c7cd28154e33637a168acb26cea7ac1f7fff)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX678,GFX67,GFX6
3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX67,GFX78,GFX7
4; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX678,GFX789,GFX8910,GFX78,GFX89,GFX8
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX6789,GFX78910,GFX789,GFX8910,GFX89,GFX910,GFX9
6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX678910,GFX78910,GFX8910,GFX910,GFX10
7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11
8; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12
9
10define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
11; GFX67-LABEL: s_buffer_load_imm:
12; GFX67:       ; %bb.0: ; %main_body
13; GFX67-NEXT:    s_buffer_load_dword s0, s[0:3], 0x1
14; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
15; GFX67-NEXT:    v_mov_b32_e32 v0, s0
16; GFX67-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
17; GFX67-NEXT:    s_endpgm
18;
19; GFX8910-LABEL: s_buffer_load_imm:
20; GFX8910:       ; %bb.0: ; %main_body
21; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x4
22; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
23; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
24; GFX8910-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
25; GFX8910-NEXT:    s_endpgm
26;
27; GFX11-LABEL: s_buffer_load_imm:
28; GFX11:       ; %bb.0: ; %main_body
29; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x4
30; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
31; GFX11-NEXT:    v_mov_b32_e32 v0, s0
32; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
33; GFX11-NEXT:    s_endpgm
34;
35; GFX12-LABEL: s_buffer_load_imm:
36; GFX12:       ; %bb.0: ; %main_body
37; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x4
38; GFX12-NEXT:    s_wait_kmcnt 0x0
39; GFX12-NEXT:    v_mov_b32_e32 v0, s0
40; GFX12-NEXT:    export mrt0 v0, v0, v0, v0 done
41; GFX12-NEXT:    s_endpgm
42main_body:
43  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
44  %bitcast = bitcast i32 %load to float
45  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
46  ret void
47}
48
49define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
50; GFX678-LABEL: s_buffer_load_index:
51; GFX678:       ; %bb.0: ; %main_body
52; GFX678-NEXT:    s_buffer_load_dword s0, s[0:3], s4
53; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
54; GFX678-NEXT:    v_mov_b32_e32 v0, s0
55; GFX678-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
56; GFX678-NEXT:    s_endpgm
57;
58; GFX910-LABEL: s_buffer_load_index:
59; GFX910:       ; %bb.0: ; %main_body
60; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
61; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
62; GFX910-NEXT:    v_mov_b32_e32 v0, s0
63; GFX910-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
64; GFX910-NEXT:    s_endpgm
65;
66; GFX11-LABEL: s_buffer_load_index:
67; GFX11:       ; %bb.0: ; %main_body
68; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
69; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
70; GFX11-NEXT:    v_mov_b32_e32 v0, s0
71; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
72; GFX11-NEXT:    s_endpgm
73;
74; GFX12-LABEL: s_buffer_load_index:
75; GFX12:       ; %bb.0: ; %main_body
76; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
77; GFX12-NEXT:    s_wait_kmcnt 0x0
78; GFX12-NEXT:    v_mov_b32_e32 v0, s0
79; GFX12-NEXT:    export mrt0 v0, v0, v0, v0 done
80; GFX12-NEXT:    s_endpgm
81main_body:
82  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
83  %bitcast = bitcast i32 %load to float
84  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
85  ret void
86}
87
88define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
89; GFX678910-LABEL: s_buffer_load_index_divergent:
90; GFX678910:       ; %bb.0: ; %main_body
91; GFX678910-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
92; GFX678910-NEXT:    s_waitcnt vmcnt(0)
93; GFX678910-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
94; GFX678910-NEXT:    s_endpgm
95;
96; GFX11-LABEL: s_buffer_load_index_divergent:
97; GFX11:       ; %bb.0: ; %main_body
98; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
99; GFX11-NEXT:    s_waitcnt vmcnt(0)
100; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
101; GFX11-NEXT:    s_endpgm
102;
103; GFX12-LABEL: s_buffer_load_index_divergent:
104; GFX12:       ; %bb.0: ; %main_body
105; GFX12-NEXT:    buffer_load_b32 v0, v0, s[0:3], null offen
106; GFX12-NEXT:    s_wait_loadcnt 0x0
107; GFX12-NEXT:    export mrt0 v0, v0, v0, v0 done
108; GFX12-NEXT:    s_endpgm
109main_body:
110  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
111  %bitcast = bitcast i32 %load to float
112  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
113  ret void
114}
115
116define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
117; GFX67-LABEL: s_buffer_loadx2_imm:
118; GFX67:       ; %bb.0: ; %main_body
119; GFX67-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x10
120; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
121; GFX67-NEXT:    v_mov_b32_e32 v0, s0
122; GFX67-NEXT:    v_mov_b32_e32 v1, s1
123; GFX67-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
124; GFX67-NEXT:    s_endpgm
125;
126; GFX8910-LABEL: s_buffer_loadx2_imm:
127; GFX8910:       ; %bb.0: ; %main_body
128; GFX8910-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x40
129; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
130; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
131; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
132; GFX8910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
133; GFX8910-NEXT:    s_endpgm
134;
135; GFX11-LABEL: s_buffer_loadx2_imm:
136; GFX11:       ; %bb.0: ; %main_body
137; GFX11-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x40
138; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
139; GFX11-NEXT:    v_mov_b32_e32 v0, s0
140; GFX11-NEXT:    v_mov_b32_e32 v1, s1
141; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
142; GFX11-NEXT:    s_endpgm
143;
144; GFX12-LABEL: s_buffer_loadx2_imm:
145; GFX12:       ; %bb.0: ; %main_body
146; GFX12-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x40
147; GFX12-NEXT:    s_wait_kmcnt 0x0
148; GFX12-NEXT:    v_mov_b32_e32 v0, s0
149; GFX12-NEXT:    v_mov_b32_e32 v1, s1
150; GFX12-NEXT:    export mrt0 v0, v1, v0, v0 done
151; GFX12-NEXT:    s_endpgm
152main_body:
153  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
154  %bitcast = bitcast <2 x i32> %load to <2 x float>
155  %x = extractelement <2 x float> %bitcast, i32 0
156  %y = extractelement <2 x float> %bitcast, i32 1
157  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
158  ret void
159}
160
161define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
162; GFX678-LABEL: s_buffer_loadx2_index:
163; GFX678:       ; %bb.0: ; %main_body
164; GFX678-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], s4
165; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
166; GFX678-NEXT:    v_mov_b32_e32 v0, s0
167; GFX678-NEXT:    v_mov_b32_e32 v1, s1
168; GFX678-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
169; GFX678-NEXT:    s_endpgm
170;
171; GFX910-LABEL: s_buffer_loadx2_index:
172; GFX910:       ; %bb.0: ; %main_body
173; GFX910-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], s4 offset:0x0
174; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
175; GFX910-NEXT:    v_mov_b32_e32 v0, s0
176; GFX910-NEXT:    v_mov_b32_e32 v1, s1
177; GFX910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
178; GFX910-NEXT:    s_endpgm
179;
180; GFX11-LABEL: s_buffer_loadx2_index:
181; GFX11:       ; %bb.0: ; %main_body
182; GFX11-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
183; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
184; GFX11-NEXT:    v_mov_b32_e32 v0, s0
185; GFX11-NEXT:    v_mov_b32_e32 v1, s1
186; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
187; GFX11-NEXT:    s_endpgm
188;
189; GFX12-LABEL: s_buffer_loadx2_index:
190; GFX12:       ; %bb.0: ; %main_body
191; GFX12-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], s4 offset:0x0
192; GFX12-NEXT:    s_wait_kmcnt 0x0
193; GFX12-NEXT:    v_mov_b32_e32 v0, s0
194; GFX12-NEXT:    v_mov_b32_e32 v1, s1
195; GFX12-NEXT:    export mrt0 v0, v1, v0, v0 done
196; GFX12-NEXT:    s_endpgm
197main_body:
198  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
199  %bitcast = bitcast <2 x i32> %load to <2 x float>
200  %x = extractelement <2 x float> %bitcast, i32 0
201  %y = extractelement <2 x float> %bitcast, i32 1
202  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
203  ret void
204}
205
206define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
207; GFX678910-LABEL: s_buffer_loadx2_index_divergent:
208; GFX678910:       ; %bb.0: ; %main_body
209; GFX678910-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
210; GFX678910-NEXT:    s_waitcnt vmcnt(0)
211; GFX678910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
212; GFX678910-NEXT:    s_endpgm
213;
214; GFX11-LABEL: s_buffer_loadx2_index_divergent:
215; GFX11:       ; %bb.0: ; %main_body
216; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
217; GFX11-NEXT:    s_waitcnt vmcnt(0)
218; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
219; GFX11-NEXT:    s_endpgm
220;
221; GFX12-LABEL: s_buffer_loadx2_index_divergent:
222; GFX12:       ; %bb.0: ; %main_body
223; GFX12-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], null offen
224; GFX12-NEXT:    s_wait_loadcnt 0x0
225; GFX12-NEXT:    export mrt0 v0, v1, v0, v0 done
226; GFX12-NEXT:    s_endpgm
227main_body:
228  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
229  %bitcast = bitcast <2 x i32> %load to <2 x float>
230  %x = extractelement <2 x float> %bitcast, i32 0
231  %y = extractelement <2 x float> %bitcast, i32 1
232  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
233  ret void
234}
235
236define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
237; GFX67-LABEL: s_buffer_loadx3_imm:
238; GFX67:       ; %bb.0: ; %main_body
239; GFX67-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x10
240; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
241; GFX67-NEXT:    v_mov_b32_e32 v0, s0
242; GFX67-NEXT:    v_mov_b32_e32 v1, s1
243; GFX67-NEXT:    v_mov_b32_e32 v2, s2
244; GFX67-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
245; GFX67-NEXT:    s_endpgm
246;
247; GFX8910-LABEL: s_buffer_loadx3_imm:
248; GFX8910:       ; %bb.0: ; %main_body
249; GFX8910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x40
250; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
251; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
252; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
253; GFX8910-NEXT:    v_mov_b32_e32 v2, s2
254; GFX8910-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
255; GFX8910-NEXT:    s_endpgm
256;
257; GFX11-LABEL: s_buffer_loadx3_imm:
258; GFX11:       ; %bb.0: ; %main_body
259; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0x40
260; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
261; GFX11-NEXT:    v_mov_b32_e32 v0, s0
262; GFX11-NEXT:    v_mov_b32_e32 v1, s1
263; GFX11-NEXT:    v_mov_b32_e32 v2, s2
264; GFX11-NEXT:    exp mrt0 v0, v1, v2, v0 done
265; GFX11-NEXT:    s_endpgm
266;
267; GFX12-LABEL: s_buffer_loadx3_imm:
268; GFX12:       ; %bb.0: ; %main_body
269; GFX12-NEXT:    s_buffer_load_b96 s[0:2], s[0:3], 0x40
270; GFX12-NEXT:    s_wait_kmcnt 0x0
271; GFX12-NEXT:    v_mov_b32_e32 v0, s0
272; GFX12-NEXT:    v_mov_b32_e32 v1, s1
273; GFX12-NEXT:    v_mov_b32_e32 v2, s2
274; GFX12-NEXT:    export mrt0 v0, v1, v2, v0 done
275; GFX12-NEXT:    s_endpgm
276main_body:
277  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
278  %bitcast = bitcast <3 x i32> %load to <3 x float>
279  %x = extractelement <3 x float> %bitcast, i32 0
280  %y = extractelement <3 x float> %bitcast, i32 1
281  %z = extractelement <3 x float> %bitcast, i32 2
282  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
283  ret void
284}
285
286define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
287; GFX678-LABEL: s_buffer_loadx3_index:
288; GFX678:       ; %bb.0: ; %main_body
289; GFX678-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4
290; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
291; GFX678-NEXT:    v_mov_b32_e32 v0, s0
292; GFX678-NEXT:    v_mov_b32_e32 v1, s1
293; GFX678-NEXT:    v_mov_b32_e32 v2, s2
294; GFX678-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
295; GFX678-NEXT:    s_endpgm
296;
297; GFX910-LABEL: s_buffer_loadx3_index:
298; GFX910:       ; %bb.0: ; %main_body
299; GFX910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
300; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
301; GFX910-NEXT:    v_mov_b32_e32 v0, s0
302; GFX910-NEXT:    v_mov_b32_e32 v1, s1
303; GFX910-NEXT:    v_mov_b32_e32 v2, s2
304; GFX910-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
305; GFX910-NEXT:    s_endpgm
306;
307; GFX11-LABEL: s_buffer_loadx3_index:
308; GFX11:       ; %bb.0: ; %main_body
309; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
310; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
311; GFX11-NEXT:    v_mov_b32_e32 v0, s0
312; GFX11-NEXT:    v_mov_b32_e32 v1, s1
313; GFX11-NEXT:    v_mov_b32_e32 v2, s2
314; GFX11-NEXT:    exp mrt0 v0, v1, v2, v0 done
315; GFX11-NEXT:    s_endpgm
316;
317; GFX12-LABEL: s_buffer_loadx3_index:
318; GFX12:       ; %bb.0: ; %main_body
319; GFX12-NEXT:    s_buffer_load_b96 s[0:2], s[0:3], s4 offset:0x0
320; GFX12-NEXT:    s_wait_kmcnt 0x0
321; GFX12-NEXT:    v_mov_b32_e32 v0, s0
322; GFX12-NEXT:    v_mov_b32_e32 v1, s1
323; GFX12-NEXT:    v_mov_b32_e32 v2, s2
324; GFX12-NEXT:    export mrt0 v0, v1, v2, v0 done
325; GFX12-NEXT:    s_endpgm
326main_body:
327  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
328  %bitcast = bitcast <3 x i32> %load to <3 x float>
329  %x = extractelement <3 x float> %bitcast, i32 0
330  %y = extractelement <3 x float> %bitcast, i32 1
331  %z = extractelement <3 x float> %bitcast, i32 2
332  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
333  ret void
334}
335
336define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
337; GFX6-LABEL: s_buffer_loadx3_index_divergent:
338; GFX6:       ; %bb.0: ; %main_body
339; GFX6-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
340; GFX6-NEXT:    s_waitcnt vmcnt(0)
341; GFX6-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
342; GFX6-NEXT:    s_endpgm
343;
344; GFX78910-LABEL: s_buffer_loadx3_index_divergent:
345; GFX78910:       ; %bb.0: ; %main_body
346; GFX78910-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[0:3], 0 offen
347; GFX78910-NEXT:    s_waitcnt vmcnt(0)
348; GFX78910-NEXT:    exp mrt0 v0, v1, v2, v0 done vm
349; GFX78910-NEXT:    s_endpgm
350;
351; GFX11-LABEL: s_buffer_loadx3_index_divergent:
352; GFX11:       ; %bb.0: ; %main_body
353; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen
354; GFX11-NEXT:    s_waitcnt vmcnt(0)
355; GFX11-NEXT:    exp mrt0 v0, v1, v2, v0 done
356; GFX11-NEXT:    s_endpgm
357;
358; GFX12-LABEL: s_buffer_loadx3_index_divergent:
359; GFX12:       ; %bb.0: ; %main_body
360; GFX12-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], null offen
361; GFX12-NEXT:    s_wait_loadcnt 0x0
362; GFX12-NEXT:    export mrt0 v0, v1, v2, v0 done
363; GFX12-NEXT:    s_endpgm
364main_body:
365  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
366  %bitcast = bitcast <3 x i32> %load to <3 x float>
367  %x = extractelement <3 x float> %bitcast, i32 0
368  %y = extractelement <3 x float> %bitcast, i32 1
369  %z = extractelement <3 x float> %bitcast, i32 2
370  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
371  ret void
372}
373
374define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
375; GFX67-LABEL: s_buffer_loadx4_imm:
376; GFX67:       ; %bb.0: ; %main_body
377; GFX67-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x32
378; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
379; GFX67-NEXT:    v_mov_b32_e32 v0, s0
380; GFX67-NEXT:    v_mov_b32_e32 v1, s1
381; GFX67-NEXT:    v_mov_b32_e32 v2, s2
382; GFX67-NEXT:    v_mov_b32_e32 v3, s3
383; GFX67-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
384; GFX67-NEXT:    s_endpgm
385;
386; GFX8910-LABEL: s_buffer_loadx4_imm:
387; GFX8910:       ; %bb.0: ; %main_body
388; GFX8910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0xc8
389; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
390; GFX8910-NEXT:    v_mov_b32_e32 v0, s0
391; GFX8910-NEXT:    v_mov_b32_e32 v1, s1
392; GFX8910-NEXT:    v_mov_b32_e32 v2, s2
393; GFX8910-NEXT:    v_mov_b32_e32 v3, s3
394; GFX8910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
395; GFX8910-NEXT:    s_endpgm
396;
397; GFX11-LABEL: s_buffer_loadx4_imm:
398; GFX11:       ; %bb.0: ; %main_body
399; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0xc8
400; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
401; GFX11-NEXT:    v_mov_b32_e32 v0, s0
402; GFX11-NEXT:    v_mov_b32_e32 v1, s1
403; GFX11-NEXT:    v_mov_b32_e32 v2, s2
404; GFX11-NEXT:    v_mov_b32_e32 v3, s3
405; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
406; GFX11-NEXT:    s_endpgm
407;
408; GFX12-LABEL: s_buffer_loadx4_imm:
409; GFX12:       ; %bb.0: ; %main_body
410; GFX12-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0xc8
411; GFX12-NEXT:    s_wait_kmcnt 0x0
412; GFX12-NEXT:    v_mov_b32_e32 v0, s0
413; GFX12-NEXT:    v_mov_b32_e32 v1, s1
414; GFX12-NEXT:    v_mov_b32_e32 v2, s2
415; GFX12-NEXT:    v_mov_b32_e32 v3, s3
416; GFX12-NEXT:    export mrt0 v0, v1, v2, v3 done
417; GFX12-NEXT:    s_endpgm
418main_body:
419  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
420  %bitcast = bitcast <4 x i32> %load to <4 x float>
421  %x = extractelement <4 x float> %bitcast, i32 0
422  %y = extractelement <4 x float> %bitcast, i32 1
423  %z = extractelement <4 x float> %bitcast, i32 2
424  %w = extractelement <4 x float> %bitcast, i32 3
425  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
426  ret void
427}
428
429define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
430; GFX678-LABEL: s_buffer_loadx4_index:
431; GFX678:       ; %bb.0: ; %main_body
432; GFX678-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4
433; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
434; GFX678-NEXT:    v_mov_b32_e32 v0, s0
435; GFX678-NEXT:    v_mov_b32_e32 v1, s1
436; GFX678-NEXT:    v_mov_b32_e32 v2, s2
437; GFX678-NEXT:    v_mov_b32_e32 v3, s3
438; GFX678-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
439; GFX678-NEXT:    s_endpgm
440;
441; GFX910-LABEL: s_buffer_loadx4_index:
442; GFX910:       ; %bb.0: ; %main_body
443; GFX910-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], s4 offset:0x0
444; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
445; GFX910-NEXT:    v_mov_b32_e32 v0, s0
446; GFX910-NEXT:    v_mov_b32_e32 v1, s1
447; GFX910-NEXT:    v_mov_b32_e32 v2, s2
448; GFX910-NEXT:    v_mov_b32_e32 v3, s3
449; GFX910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
450; GFX910-NEXT:    s_endpgm
451;
452; GFX11-LABEL: s_buffer_loadx4_index:
453; GFX11:       ; %bb.0: ; %main_body
454; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
455; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
456; GFX11-NEXT:    v_mov_b32_e32 v0, s0
457; GFX11-NEXT:    v_mov_b32_e32 v1, s1
458; GFX11-NEXT:    v_mov_b32_e32 v2, s2
459; GFX11-NEXT:    v_mov_b32_e32 v3, s3
460; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
461; GFX11-NEXT:    s_endpgm
462;
463; GFX12-LABEL: s_buffer_loadx4_index:
464; GFX12:       ; %bb.0: ; %main_body
465; GFX12-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], s4 offset:0x0
466; GFX12-NEXT:    s_wait_kmcnt 0x0
467; GFX12-NEXT:    v_mov_b32_e32 v0, s0
468; GFX12-NEXT:    v_mov_b32_e32 v1, s1
469; GFX12-NEXT:    v_mov_b32_e32 v2, s2
470; GFX12-NEXT:    v_mov_b32_e32 v3, s3
471; GFX12-NEXT:    export mrt0 v0, v1, v2, v3 done
472; GFX12-NEXT:    s_endpgm
473main_body:
474  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
475  %bitcast = bitcast <4 x i32> %load to <4 x float>
476  %x = extractelement <4 x float> %bitcast, i32 0
477  %y = extractelement <4 x float> %bitcast, i32 1
478  %z = extractelement <4 x float> %bitcast, i32 2
479  %w = extractelement <4 x float> %bitcast, i32 3
480  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
481  ret void
482}
483
484define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
485; GFX678910-LABEL: s_buffer_loadx4_index_divergent:
486; GFX678910:       ; %bb.0: ; %main_body
487; GFX678910-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
488; GFX678910-NEXT:    s_waitcnt vmcnt(0)
489; GFX678910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
490; GFX678910-NEXT:    s_endpgm
491;
492; GFX11-LABEL: s_buffer_loadx4_index_divergent:
493; GFX11:       ; %bb.0: ; %main_body
494; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
495; GFX11-NEXT:    s_waitcnt vmcnt(0)
496; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
497; GFX11-NEXT:    s_endpgm
498;
499; GFX12-LABEL: s_buffer_loadx4_index_divergent:
500; GFX12:       ; %bb.0: ; %main_body
501; GFX12-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], null offen
502; GFX12-NEXT:    s_wait_loadcnt 0x0
503; GFX12-NEXT:    export mrt0 v0, v1, v2, v3 done
504; GFX12-NEXT:    s_endpgm
505main_body:
506  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
507  %bitcast = bitcast <4 x i32> %load to <4 x float>
508  %x = extractelement <4 x float> %bitcast, i32 0
509  %y = extractelement <4 x float> %bitcast, i32 1
510  %z = extractelement <4 x float> %bitcast, i32 2
511  %w = extractelement <4 x float> %bitcast, i32 3
512  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
513  ret void
514}
515
516define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
517; GFX67-LABEL: s_buffer_load_imm_mergex2:
518; GFX67:       ; %bb.0: ; %main_body
519; GFX67-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x1
520; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
521; GFX67-NEXT:    v_mov_b32_e32 v0, s0
522; GFX67-NEXT:    v_mov_b32_e32 v1, s1
523; GFX67-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
524; GFX67-NEXT:    s_endpgm
525;
526; GFX8-LABEL: s_buffer_load_imm_mergex2:
527; GFX8:       ; %bb.0: ; %main_body
528; GFX8-NEXT:    s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
529; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
530; GFX8-NEXT:    v_mov_b32_e32 v0, s0
531; GFX8-NEXT:    v_mov_b32_e32 v1, s1
532; GFX8-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
533; GFX8-NEXT:    s_endpgm
534;
535; GFX910-LABEL: s_buffer_load_imm_mergex2:
536; GFX910:       ; %bb.0: ; %main_body
537; GFX910-NEXT:    s_buffer_load_dwordx2 s[4:5], s[0:3], 0x4
538; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
539; GFX910-NEXT:    v_mov_b32_e32 v0, s4
540; GFX910-NEXT:    v_mov_b32_e32 v1, s5
541; GFX910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
542; GFX910-NEXT:    s_endpgm
543;
544; GFX11-LABEL: s_buffer_load_imm_mergex2:
545; GFX11:       ; %bb.0: ; %main_body
546; GFX11-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x4
547; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
548; GFX11-NEXT:    v_mov_b32_e32 v0, s0
549; GFX11-NEXT:    v_mov_b32_e32 v1, s1
550; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
551; GFX11-NEXT:    s_endpgm
552;
553; GFX12-LABEL: s_buffer_load_imm_mergex2:
554; GFX12:       ; %bb.0: ; %main_body
555; GFX12-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x4
556; GFX12-NEXT:    s_wait_kmcnt 0x0
557; GFX12-NEXT:    v_mov_b32_e32 v0, s0
558; GFX12-NEXT:    v_mov_b32_e32 v1, s1
559; GFX12-NEXT:    export mrt0 v0, v1, v0, v0 done
560; GFX12-NEXT:    s_endpgm
561main_body:
562  %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
563  %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
564  %x = bitcast i32 %load0 to float
565  %y = bitcast i32 %load1 to float
566  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
567  ret void
568}
569
570define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
571; GFX67-LABEL: s_buffer_load_imm_mergex4:
572; GFX67:       ; %bb.0: ; %main_body
573; GFX67-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x2
574; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
575; GFX67-NEXT:    v_mov_b32_e32 v0, s0
576; GFX67-NEXT:    v_mov_b32_e32 v1, s1
577; GFX67-NEXT:    v_mov_b32_e32 v2, s2
578; GFX67-NEXT:    v_mov_b32_e32 v3, s3
579; GFX67-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
580; GFX67-NEXT:    s_endpgm
581;
582; GFX8-LABEL: s_buffer_load_imm_mergex4:
583; GFX8:       ; %bb.0: ; %main_body
584; GFX8-NEXT:    s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
585; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
586; GFX8-NEXT:    v_mov_b32_e32 v0, s0
587; GFX8-NEXT:    v_mov_b32_e32 v1, s1
588; GFX8-NEXT:    v_mov_b32_e32 v2, s2
589; GFX8-NEXT:    v_mov_b32_e32 v3, s3
590; GFX8-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
591; GFX8-NEXT:    s_endpgm
592;
593; GFX910-LABEL: s_buffer_load_imm_mergex4:
594; GFX910:       ; %bb.0: ; %main_body
595; GFX910-NEXT:    s_buffer_load_dwordx4 s[4:7], s[0:3], 0x8
596; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
597; GFX910-NEXT:    v_mov_b32_e32 v0, s4
598; GFX910-NEXT:    v_mov_b32_e32 v1, s5
599; GFX910-NEXT:    v_mov_b32_e32 v2, s6
600; GFX910-NEXT:    v_mov_b32_e32 v3, s7
601; GFX910-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
602; GFX910-NEXT:    s_endpgm
603;
604; GFX11-LABEL: s_buffer_load_imm_mergex4:
605; GFX11:       ; %bb.0: ; %main_body
606; GFX11-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0x8
607; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
608; GFX11-NEXT:    v_mov_b32_e32 v0, s0
609; GFX11-NEXT:    v_mov_b32_e32 v1, s1
610; GFX11-NEXT:    v_mov_b32_e32 v2, s2
611; GFX11-NEXT:    v_mov_b32_e32 v3, s3
612; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
613; GFX11-NEXT:    s_endpgm
614;
615; GFX12-LABEL: s_buffer_load_imm_mergex4:
616; GFX12:       ; %bb.0: ; %main_body
617; GFX12-NEXT:    s_buffer_load_b128 s[0:3], s[0:3], 0x8
618; GFX12-NEXT:    s_wait_kmcnt 0x0
619; GFX12-NEXT:    v_mov_b32_e32 v0, s0
620; GFX12-NEXT:    v_mov_b32_e32 v1, s1
621; GFX12-NEXT:    v_mov_b32_e32 v2, s2
622; GFX12-NEXT:    v_mov_b32_e32 v3, s3
623; GFX12-NEXT:    export mrt0 v0, v1, v2, v3 done
624; GFX12-NEXT:    s_endpgm
625main_body:
626  %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
627  %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
628  %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0)
629  %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0)
630  %x = bitcast i32 %load0 to float
631  %y = bitcast i32 %load1 to float
632  %z = bitcast i32 %load2 to float
633  %w = bitcast i32 %load3 to float
634  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
635  ret void
636}
637
638@gv = external addrspace(1) global i32
639
640define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
641; GFX6-LABEL: s_buffer_load_index_across_bb:
642; GFX6:       ; %bb.0: ; %main_body
643; GFX6-NEXT:    s_getpc_b64 s[4:5]
644; GFX6-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
645; GFX6-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
646; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
647; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
648; GFX6-NEXT:    s_mov_b32 s7, 0xf000
649; GFX6-NEXT:    s_mov_b32 s6, -1
650; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
651; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
652; GFX6-NEXT:    s_waitcnt expcnt(0)
653; GFX6-NEXT:    v_or_b32_e32 v0, 8, v0
654; GFX6-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
655; GFX6-NEXT:    s_waitcnt vmcnt(0)
656; GFX6-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
657; GFX6-NEXT:    s_endpgm
658;
659; GFX7-LABEL: s_buffer_load_index_across_bb:
660; GFX7:       ; %bb.0: ; %main_body
661; GFX7-NEXT:    s_getpc_b64 s[4:5]
662; GFX7-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
663; GFX7-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
664; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
665; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
666; GFX7-NEXT:    s_mov_b32 s7, 0xf000
667; GFX7-NEXT:    s_mov_b32 s6, -1
668; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
669; GFX7-NEXT:    buffer_store_dword v0, off, s[4:7], 0
670; GFX7-NEXT:    v_or_b32_e32 v0, 8, v0
671; GFX7-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
672; GFX7-NEXT:    s_waitcnt vmcnt(0)
673; GFX7-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
674; GFX7-NEXT:    s_endpgm
675;
676; GFX8-LABEL: s_buffer_load_index_across_bb:
677; GFX8:       ; %bb.0: ; %main_body
678; GFX8-NEXT:    s_getpc_b64 s[4:5]
679; GFX8-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
680; GFX8-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
681; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
682; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
683; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
684; GFX8-NEXT:    v_mov_b32_e32 v1, s4
685; GFX8-NEXT:    v_mov_b32_e32 v2, s5
686; GFX8-NEXT:    flat_store_dword v[1:2], v0
687; GFX8-NEXT:    v_or_b32_e32 v0, 8, v0
688; GFX8-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
689; GFX8-NEXT:    s_waitcnt vmcnt(0)
690; GFX8-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
691; GFX8-NEXT:    s_endpgm
692;
693; GFX9-LABEL: s_buffer_load_index_across_bb:
694; GFX9:       ; %bb.0: ; %main_body
695; GFX9-NEXT:    s_getpc_b64 s[4:5]
696; GFX9-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
697; GFX9-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
698; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
699; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
700; GFX9-NEXT:    v_mov_b32_e32 v1, 0
701; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
702; GFX9-NEXT:    global_store_dword v1, v0, s[4:5]
703; GFX9-NEXT:    v_or_b32_e32 v0, 8, v0
704; GFX9-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
705; GFX9-NEXT:    s_waitcnt vmcnt(0)
706; GFX9-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
707; GFX9-NEXT:    s_endpgm
708;
709; GFX10-LABEL: s_buffer_load_index_across_bb:
710; GFX10:       ; %bb.0: ; %main_body
711; GFX10-NEXT:    s_getpc_b64 s[4:5]
712; GFX10-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
713; GFX10-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
714; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
715; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
716; GFX10-NEXT:    v_mov_b32_e32 v1, 0
717; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
718; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
719; GFX10-NEXT:    v_or_b32_e32 v0, 8, v0
720; GFX10-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
721; GFX10-NEXT:    s_waitcnt vmcnt(0)
722; GFX10-NEXT:    exp mrt0 v0, v0, v0, v0 done vm
723; GFX10-NEXT:    s_endpgm
724;
725; GFX11-LABEL: s_buffer_load_index_across_bb:
726; GFX11:       ; %bb.0: ; %main_body
727; GFX11-NEXT:    s_getpc_b64 s[4:5]
728; GFX11-NEXT:    s_add_u32 s4, s4, gv@gotpcrel32@lo+4
729; GFX11-NEXT:    s_addc_u32 s5, s5, gv@gotpcrel32@hi+12
730; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
731; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
732; GFX11-NEXT:    v_mov_b32_e32 v1, 0
733; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
734; GFX11-NEXT:    global_store_b32 v1, v0, s[4:5]
735; GFX11-NEXT:    v_or_b32_e32 v0, 8, v0
736; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
737; GFX11-NEXT:    s_waitcnt vmcnt(0)
738; GFX11-NEXT:    exp mrt0 v0, v0, v0, v0 done
739; GFX11-NEXT:    s_endpgm
740;
741; GFX12-LABEL: s_buffer_load_index_across_bb:
742; GFX12:       ; %bb.0: ; %main_body
743; GFX12-NEXT:    s_getpc_b64 s[4:5]
744; GFX12-NEXT:    s_sext_i32_i16 s5, s5
745; GFX12-NEXT:    s_add_co_u32 s4, s4, gv@gotpcrel32@lo+8
746; GFX12-NEXT:    s_add_co_ci_u32 s5, s5, gv@gotpcrel32@hi+16
747; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
748; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
749; GFX12-NEXT:    v_mov_b32_e32 v1, 0
750; GFX12-NEXT:    s_wait_kmcnt 0x0
751; GFX12-NEXT:    global_store_b32 v1, v0, s[4:5]
752; GFX12-NEXT:    v_or_b32_e32 v0, 8, v0
753; GFX12-NEXT:    buffer_load_b32 v0, v0, s[0:3], null offen
754; GFX12-NEXT:    s_wait_loadcnt 0x0
755; GFX12-NEXT:    export mrt0 v0, v0, v0, v0 done
756; GFX12-NEXT:    s_endpgm
757main_body:
758  %tmp = shl i32 %index, 4
759  store i32 %tmp, ptr addrspace(1) @gv
760  br label %bb1
761
762bb1:                                              ; preds = %main_body
763  %tmp1 = or i32 %tmp, 8
764  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
765  %bitcast = bitcast i32 %load to float
766  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
767  ret void
768}
769
770define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
771; GFX678910-LABEL: s_buffer_load_index_across_bb_merged:
772; GFX678910:       ; %bb.0: ; %main_body
773; GFX678910-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
774; GFX678910-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen offset:8
775; GFX678910-NEXT:    s_waitcnt vmcnt(0)
776; GFX678910-NEXT:    exp mrt0 v0, v1, v0, v0 done vm
777; GFX678910-NEXT:    s_endpgm
778;
779; GFX11-LABEL: s_buffer_load_index_across_bb_merged:
780; GFX11:       ; %bb.0: ; %main_body
781; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
782; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:8
783; GFX11-NEXT:    s_waitcnt vmcnt(0)
784; GFX11-NEXT:    exp mrt0 v0, v1, v0, v0 done
785; GFX11-NEXT:    s_endpgm
786;
787; GFX12-LABEL: s_buffer_load_index_across_bb_merged:
788; GFX12:       ; %bb.0: ; %main_body
789; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
790; GFX12-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], null offen offset:8
791; GFX12-NEXT:    s_wait_loadcnt 0x0
792; GFX12-NEXT:    export mrt0 v0, v1, v0, v0 done
793; GFX12-NEXT:    s_endpgm
794main_body:
795  %tmp = shl i32 %index, 4
796  br label %bb1
797
798bb1:                                              ; preds = %main_body
799  %tmp1 = or i32 %tmp, 8
800  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
801  %tmp2 = or i32 %tmp1, 4
802  %load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0)
803  %bitcast = bitcast i32 %load to float
804  %bitcast2 = bitcast i32 %load2 to float
805  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true)
806  ret void
807}
808
809define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
810; GFX6-LABEL: s_buffer_load_imm_neg1:
811; GFX6:       ; %bb.0:
812; GFX6-NEXT:    s_mov_b32 s4, -1
813; GFX6-NEXT:    s_nop 3
814; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
815; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
816; GFX6-NEXT:    ; return to shader part epilog
817;
818; GFX78-LABEL: s_buffer_load_imm_neg1:
819; GFX78:       ; %bb.0:
820; GFX78-NEXT:    s_mov_b32 s4, -1
821; GFX78-NEXT:    s_buffer_load_dword s0, s[0:3], s4
822; GFX78-NEXT:    s_waitcnt lgkmcnt(0)
823; GFX78-NEXT:    ; return to shader part epilog
824;
825; GFX910-LABEL: s_buffer_load_imm_neg1:
826; GFX910:       ; %bb.0:
827; GFX910-NEXT:    s_mov_b32 s4, -1
828; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
829; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
830; GFX910-NEXT:    ; return to shader part epilog
831;
832; GFX11-LABEL: s_buffer_load_imm_neg1:
833; GFX11:       ; %bb.0:
834; GFX11-NEXT:    s_mov_b32 s4, -1
835; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
836; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
837; GFX11-NEXT:    ; return to shader part epilog
838;
839; GFX12-LABEL: s_buffer_load_imm_neg1:
840; GFX12:       ; %bb.0:
841; GFX12-NEXT:    s_mov_b32 s4, -1
842; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
843; GFX12-NEXT:    s_wait_kmcnt 0x0
844; GFX12-NEXT:    ; return to shader part epilog
845  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
846  ret i32 %load
847}
848
849define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
850; GFX6-LABEL: s_buffer_load_imm_neg4:
851; GFX6:       ; %bb.0:
852; GFX6-NEXT:    s_mov_b32 s4, -4
853; GFX6-NEXT:    s_nop 3
854; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
855; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
856; GFX6-NEXT:    ; return to shader part epilog
857;
858; GFX7-LABEL: s_buffer_load_imm_neg4:
859; GFX7:       ; %bb.0:
860; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3fffffff
861; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
862; GFX7-NEXT:    ; return to shader part epilog
863;
864; GFX8-LABEL: s_buffer_load_imm_neg4:
865; GFX8:       ; %bb.0:
866; GFX8-NEXT:    s_mov_b32 s4, -4
867; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
868; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
869; GFX8-NEXT:    ; return to shader part epilog
870;
871; GFX910-LABEL: s_buffer_load_imm_neg4:
872; GFX910:       ; %bb.0:
873; GFX910-NEXT:    s_mov_b32 s4, -4
874; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
875; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
876; GFX910-NEXT:    ; return to shader part epilog
877;
878; GFX11-LABEL: s_buffer_load_imm_neg4:
879; GFX11:       ; %bb.0:
880; GFX11-NEXT:    s_mov_b32 s4, -4
881; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
882; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
883; GFX11-NEXT:    ; return to shader part epilog
884;
885; GFX12-LABEL: s_buffer_load_imm_neg4:
886; GFX12:       ; %bb.0:
887; GFX12-NEXT:    s_mov_b32 s4, -4
888; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
889; GFX12-NEXT:    s_wait_kmcnt 0x0
890; GFX12-NEXT:    ; return to shader part epilog
891  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
892  ret i32 %load
893}
894
895define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
896; GFX6-LABEL: s_buffer_load_imm_neg8:
897; GFX6:       ; %bb.0:
898; GFX6-NEXT:    s_mov_b32 s4, -8
899; GFX6-NEXT:    s_nop 3
900; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
901; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
902; GFX6-NEXT:    ; return to shader part epilog
903;
904; GFX7-LABEL: s_buffer_load_imm_neg8:
905; GFX7:       ; %bb.0:
906; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3ffffffe
907; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
908; GFX7-NEXT:    ; return to shader part epilog
909;
910; GFX8-LABEL: s_buffer_load_imm_neg8:
911; GFX8:       ; %bb.0:
912; GFX8-NEXT:    s_mov_b32 s4, -8
913; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
914; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
915; GFX8-NEXT:    ; return to shader part epilog
916;
917; GFX910-LABEL: s_buffer_load_imm_neg8:
918; GFX910:       ; %bb.0:
919; GFX910-NEXT:    s_mov_b32 s4, -8
920; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
921; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
922; GFX910-NEXT:    ; return to shader part epilog
923;
924; GFX11-LABEL: s_buffer_load_imm_neg8:
925; GFX11:       ; %bb.0:
926; GFX11-NEXT:    s_mov_b32 s4, -8
927; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
928; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
929; GFX11-NEXT:    ; return to shader part epilog
930;
931; GFX12-LABEL: s_buffer_load_imm_neg8:
932; GFX12:       ; %bb.0:
933; GFX12-NEXT:    s_mov_b32 s4, -8
934; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
935; GFX12-NEXT:    s_wait_kmcnt 0x0
936; GFX12-NEXT:    ; return to shader part epilog
937  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
938  ret i32 %load
939}
940
941define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
942; GFX6-LABEL: s_buffer_load_imm_bit31:
943; GFX6:       ; %bb.0:
944; GFX6-NEXT:    s_brev_b32 s4, 1
945; GFX6-NEXT:    s_nop 3
946; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
947; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
948; GFX6-NEXT:    ; return to shader part epilog
949;
950; GFX7-LABEL: s_buffer_load_imm_bit31:
951; GFX7:       ; %bb.0:
952; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x20000000
953; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
954; GFX7-NEXT:    ; return to shader part epilog
955;
956; GFX8-LABEL: s_buffer_load_imm_bit31:
957; GFX8:       ; %bb.0:
958; GFX8-NEXT:    s_brev_b32 s4, 1
959; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
960; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
961; GFX8-NEXT:    ; return to shader part epilog
962;
963; GFX910-LABEL: s_buffer_load_imm_bit31:
964; GFX910:       ; %bb.0:
965; GFX910-NEXT:    s_brev_b32 s4, 1
966; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
967; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
968; GFX910-NEXT:    ; return to shader part epilog
969;
970; GFX11-LABEL: s_buffer_load_imm_bit31:
971; GFX11:       ; %bb.0:
972; GFX11-NEXT:    s_brev_b32 s4, 1
973; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
974; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
975; GFX11-NEXT:    ; return to shader part epilog
976;
977; GFX12-LABEL: s_buffer_load_imm_bit31:
978; GFX12:       ; %bb.0:
979; GFX12-NEXT:    s_brev_b32 s4, 1
980; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
981; GFX12-NEXT:    s_wait_kmcnt 0x0
982; GFX12-NEXT:    ; return to shader part epilog
983  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
984  ret i32 %load
985}
986
987define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
988; GFX6-LABEL: s_buffer_load_imm_bit30:
989; GFX6:       ; %bb.0:
990; GFX6-NEXT:    s_mov_b32 s4, 2.0
991; GFX6-NEXT:    s_nop 3
992; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
993; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
994; GFX6-NEXT:    ; return to shader part epilog
995;
996; GFX7-LABEL: s_buffer_load_imm_bit30:
997; GFX7:       ; %bb.0:
998; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x10000000
999; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1000; GFX7-NEXT:    ; return to shader part epilog
1001;
1002; GFX8-LABEL: s_buffer_load_imm_bit30:
1003; GFX8:       ; %bb.0:
1004; GFX8-NEXT:    s_mov_b32 s4, 2.0
1005; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1006; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1007; GFX8-NEXT:    ; return to shader part epilog
1008;
1009; GFX910-LABEL: s_buffer_load_imm_bit30:
1010; GFX910:       ; %bb.0:
1011; GFX910-NEXT:    s_mov_b32 s4, 2.0
1012; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1013; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
1014; GFX910-NEXT:    ; return to shader part epilog
1015;
1016; GFX11-LABEL: s_buffer_load_imm_bit30:
1017; GFX11:       ; %bb.0:
1018; GFX11-NEXT:    s_mov_b32 s4, 2.0
1019; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1020; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1021; GFX11-NEXT:    ; return to shader part epilog
1022;
1023; GFX12-LABEL: s_buffer_load_imm_bit30:
1024; GFX12:       ; %bb.0:
1025; GFX12-NEXT:    s_mov_b32 s4, 2.0
1026; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1027; GFX12-NEXT:    s_wait_kmcnt 0x0
1028; GFX12-NEXT:    ; return to shader part epilog
1029  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
1030  ret i32 %load
1031}
1032
1033define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
1034; GFX6-LABEL: s_buffer_load_imm_bit29:
1035; GFX6:       ; %bb.0:
1036; GFX6-NEXT:    s_brev_b32 s4, 4
1037; GFX6-NEXT:    s_nop 3
1038; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1039; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1040; GFX6-NEXT:    ; return to shader part epilog
1041;
1042; GFX7-LABEL: s_buffer_load_imm_bit29:
1043; GFX7:       ; %bb.0:
1044; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x8000000
1045; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1046; GFX7-NEXT:    ; return to shader part epilog
1047;
1048; GFX8-LABEL: s_buffer_load_imm_bit29:
1049; GFX8:       ; %bb.0:
1050; GFX8-NEXT:    s_brev_b32 s4, 4
1051; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1052; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1053; GFX8-NEXT:    ; return to shader part epilog
1054;
1055; GFX910-LABEL: s_buffer_load_imm_bit29:
1056; GFX910:       ; %bb.0:
1057; GFX910-NEXT:    s_brev_b32 s4, 4
1058; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1059; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
1060; GFX910-NEXT:    ; return to shader part epilog
1061;
1062; GFX11-LABEL: s_buffer_load_imm_bit29:
1063; GFX11:       ; %bb.0:
1064; GFX11-NEXT:    s_brev_b32 s4, 4
1065; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1066; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1067; GFX11-NEXT:    ; return to shader part epilog
1068;
1069; GFX12-LABEL: s_buffer_load_imm_bit29:
1070; GFX12:       ; %bb.0:
1071; GFX12-NEXT:    s_brev_b32 s4, 4
1072; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1073; GFX12-NEXT:    s_wait_kmcnt 0x0
1074; GFX12-NEXT:    ; return to shader part epilog
1075  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
1076  ret i32 %load
1077}
1078
1079define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
1080; GFX6-LABEL: s_buffer_load_imm_bit21:
1081; GFX6:       ; %bb.0:
1082; GFX6-NEXT:    s_mov_b32 s4, 0x200000
1083; GFX6-NEXT:    s_nop 3
1084; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1085; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1086; GFX6-NEXT:    ; return to shader part epilog
1087;
1088; GFX7-LABEL: s_buffer_load_imm_bit21:
1089; GFX7:       ; %bb.0:
1090; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x80000
1091; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1092; GFX7-NEXT:    ; return to shader part epilog
1093;
1094; GFX8-LABEL: s_buffer_load_imm_bit21:
1095; GFX8:       ; %bb.0:
1096; GFX8-NEXT:    s_mov_b32 s4, 0x200000
1097; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1098; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1099; GFX8-NEXT:    ; return to shader part epilog
1100;
1101; GFX910-LABEL: s_buffer_load_imm_bit21:
1102; GFX910:       ; %bb.0:
1103; GFX910-NEXT:    s_mov_b32 s4, 0x200000
1104; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1105; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
1106; GFX910-NEXT:    ; return to shader part epilog
1107;
1108; GFX11-LABEL: s_buffer_load_imm_bit21:
1109; GFX11:       ; %bb.0:
1110; GFX11-NEXT:    s_mov_b32 s4, 0x200000
1111; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1112; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1113; GFX11-NEXT:    ; return to shader part epilog
1114;
1115; GFX12-LABEL: s_buffer_load_imm_bit21:
1116; GFX12:       ; %bb.0:
1117; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x200000
1118; GFX12-NEXT:    s_wait_kmcnt 0x0
1119; GFX12-NEXT:    ; return to shader part epilog
1120  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
1121  ret i32 %load
1122}
1123
1124define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
1125; GFX6-LABEL: s_buffer_load_imm_bit20:
1126; GFX6:       ; %bb.0:
1127; GFX6-NEXT:    s_mov_b32 s4, 0x100000
1128; GFX6-NEXT:    s_nop 3
1129; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1130; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1131; GFX6-NEXT:    ; return to shader part epilog
1132;
1133; GFX7-LABEL: s_buffer_load_imm_bit20:
1134; GFX7:       ; %bb.0:
1135; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x40000
1136; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1137; GFX7-NEXT:    ; return to shader part epilog
1138;
1139; GFX8-LABEL: s_buffer_load_imm_bit20:
1140; GFX8:       ; %bb.0:
1141; GFX8-NEXT:    s_mov_b32 s4, 0x100000
1142; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1143; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1144; GFX8-NEXT:    ; return to shader part epilog
1145;
1146; GFX910-LABEL: s_buffer_load_imm_bit20:
1147; GFX910:       ; %bb.0:
1148; GFX910-NEXT:    s_mov_b32 s4, 0x100000
1149; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1150; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
1151; GFX910-NEXT:    ; return to shader part epilog
1152;
1153; GFX11-LABEL: s_buffer_load_imm_bit20:
1154; GFX11:       ; %bb.0:
1155; GFX11-NEXT:    s_mov_b32 s4, 0x100000
1156; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1157; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1158; GFX11-NEXT:    ; return to shader part epilog
1159;
1160; GFX12-LABEL: s_buffer_load_imm_bit20:
1161; GFX12:       ; %bb.0:
1162; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x100000
1163; GFX12-NEXT:    s_wait_kmcnt 0x0
1164; GFX12-NEXT:    ; return to shader part epilog
1165  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
1166  ret i32 %load
1167}
1168
1169define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
1170; GFX6-LABEL: s_buffer_load_imm_neg_bit20:
1171; GFX6:       ; %bb.0:
1172; GFX6-NEXT:    s_mov_b32 s4, 0xfff00000
1173; GFX6-NEXT:    s_nop 3
1174; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1175; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1176; GFX6-NEXT:    ; return to shader part epilog
1177;
1178; GFX7-LABEL: s_buffer_load_imm_neg_bit20:
1179; GFX7:       ; %bb.0:
1180; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3ffc0000
1181; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1182; GFX7-NEXT:    ; return to shader part epilog
1183;
1184; GFX8-LABEL: s_buffer_load_imm_neg_bit20:
1185; GFX8:       ; %bb.0:
1186; GFX8-NEXT:    s_mov_b32 s4, 0xfff00000
1187; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1188; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1189; GFX8-NEXT:    ; return to shader part epilog
1190;
1191; GFX910-LABEL: s_buffer_load_imm_neg_bit20:
1192; GFX910:       ; %bb.0:
1193; GFX910-NEXT:    s_mov_b32 s4, 0xfff00000
1194; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1195; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
1196; GFX910-NEXT:    ; return to shader part epilog
1197;
1198; GFX11-LABEL: s_buffer_load_imm_neg_bit20:
1199; GFX11:       ; %bb.0:
1200; GFX11-NEXT:    s_mov_b32 s4, 0xfff00000
1201; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1202; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1203; GFX11-NEXT:    ; return to shader part epilog
1204;
1205; GFX12-LABEL: s_buffer_load_imm_neg_bit20:
1206; GFX12:       ; %bb.0:
1207; GFX12-NEXT:    s_mov_b32 s4, 0xfff00000
1208; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1209; GFX12-NEXT:    s_wait_kmcnt 0x0
1210; GFX12-NEXT:    ; return to shader part epilog
1211  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32  -1048576, i32 0)
1212  ret i32 %load
1213}
1214
1215define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
1216; GFX6-LABEL: s_buffer_load_imm_bit19:
1217; GFX6:       ; %bb.0:
1218; GFX6-NEXT:    s_mov_b32 s4, 0x80000
1219; GFX6-NEXT:    s_nop 3
1220; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1221; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1222; GFX6-NEXT:    ; return to shader part epilog
1223;
1224; GFX7-LABEL: s_buffer_load_imm_bit19:
1225; GFX7:       ; %bb.0:
1226; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x20000
1227; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1228; GFX7-NEXT:    ; return to shader part epilog
1229;
1230; GFX8910-LABEL: s_buffer_load_imm_bit19:
1231; GFX8910:       ; %bb.0:
1232; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x80000
1233; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1234; GFX8910-NEXT:    ; return to shader part epilog
1235;
1236; GFX11-LABEL: s_buffer_load_imm_bit19:
1237; GFX11:       ; %bb.0:
1238; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x80000
1239; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1240; GFX11-NEXT:    ; return to shader part epilog
1241;
1242; GFX12-LABEL: s_buffer_load_imm_bit19:
1243; GFX12:       ; %bb.0:
1244; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x80000
1245; GFX12-NEXT:    s_wait_kmcnt 0x0
1246; GFX12-NEXT:    ; return to shader part epilog
1247  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
1248  ret i32 %load
1249}
1250
1251define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
1252; GFX6-LABEL: s_buffer_load_imm_neg_bit19:
1253; GFX6:       ; %bb.0:
1254; GFX6-NEXT:    s_mov_b32 s4, 0xfff80000
1255; GFX6-NEXT:    s_nop 3
1256; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1257; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1258; GFX6-NEXT:    ; return to shader part epilog
1259;
1260; GFX7-LABEL: s_buffer_load_imm_neg_bit19:
1261; GFX7:       ; %bb.0:
1262; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3ffe0000
1263; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1264; GFX7-NEXT:    ; return to shader part epilog
1265;
1266; GFX8-LABEL: s_buffer_load_imm_neg_bit19:
1267; GFX8:       ; %bb.0:
1268; GFX8-NEXT:    s_mov_b32 s4, 0xfff80000
1269; GFX8-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1270; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1271; GFX8-NEXT:    ; return to shader part epilog
1272;
1273; GFX910-LABEL: s_buffer_load_imm_neg_bit19:
1274; GFX910:       ; %bb.0:
1275; GFX910-NEXT:    s_mov_b32 s4, 0xfff80000
1276; GFX910-NEXT:    s_buffer_load_dword s0, s[0:3], s4 offset:0x0
1277; GFX910-NEXT:    s_waitcnt lgkmcnt(0)
1278; GFX910-NEXT:    ; return to shader part epilog
1279;
1280; GFX11-LABEL: s_buffer_load_imm_neg_bit19:
1281; GFX11:       ; %bb.0:
1282; GFX11-NEXT:    s_mov_b32 s4, 0xfff80000
1283; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1284; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1285; GFX11-NEXT:    ; return to shader part epilog
1286;
1287; GFX12-LABEL: s_buffer_load_imm_neg_bit19:
1288; GFX12:       ; %bb.0:
1289; GFX12-NEXT:    s_mov_b32 s4, 0xfff80000
1290; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], s4 offset:0x0
1291; GFX12-NEXT:    s_wait_kmcnt 0x0
1292; GFX12-NEXT:    ; return to shader part epilog
1293  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
1294  ret i32 %load
1295}
1296
1297define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
1298; GFX6-LABEL: s_buffer_load_imm_255:
1299; GFX6:       ; %bb.0:
1300; GFX6-NEXT:    s_movk_i32 s4, 0xff
1301; GFX6-NEXT:    s_nop 3
1302; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1303; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1304; GFX6-NEXT:    ; return to shader part epilog
1305;
1306; GFX7-LABEL: s_buffer_load_imm_255:
1307; GFX7:       ; %bb.0:
1308; GFX7-NEXT:    s_movk_i32 s4, 0xff
1309; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1310; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1311; GFX7-NEXT:    ; return to shader part epilog
1312;
1313; GFX8910-LABEL: s_buffer_load_imm_255:
1314; GFX8910:       ; %bb.0:
1315; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0xff
1316; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1317; GFX8910-NEXT:    ; return to shader part epilog
1318;
1319; GFX11-LABEL: s_buffer_load_imm_255:
1320; GFX11:       ; %bb.0:
1321; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0xff
1322; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1323; GFX11-NEXT:    ; return to shader part epilog
1324;
1325; GFX12-LABEL: s_buffer_load_imm_255:
1326; GFX12:       ; %bb.0:
1327; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0xff
1328; GFX12-NEXT:    s_wait_kmcnt 0x0
1329; GFX12-NEXT:    ; return to shader part epilog
1330  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
1331  ret i32 %load
1332}
1333
1334define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
1335; GFX67-LABEL: s_buffer_load_imm_256:
1336; GFX67:       ; %bb.0:
1337; GFX67-NEXT:    s_buffer_load_dword s0, s[0:3], 0x40
1338; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1339; GFX67-NEXT:    ; return to shader part epilog
1340;
1341; GFX8910-LABEL: s_buffer_load_imm_256:
1342; GFX8910:       ; %bb.0:
1343; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x100
1344; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1345; GFX8910-NEXT:    ; return to shader part epilog
1346;
1347; GFX11-LABEL: s_buffer_load_imm_256:
1348; GFX11:       ; %bb.0:
1349; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x100
1350; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1351; GFX11-NEXT:    ; return to shader part epilog
1352;
1353; GFX12-LABEL: s_buffer_load_imm_256:
1354; GFX12:       ; %bb.0:
1355; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x100
1356; GFX12-NEXT:    s_wait_kmcnt 0x0
1357; GFX12-NEXT:    ; return to shader part epilog
1358  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
1359  ret i32 %load
1360}
1361
1362define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
1363; GFX67-LABEL: s_buffer_load_imm_1016:
1364; GFX67:       ; %bb.0:
1365; GFX67-NEXT:    s_buffer_load_dword s0, s[0:3], 0xfe
1366; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1367; GFX67-NEXT:    ; return to shader part epilog
1368;
1369; GFX8910-LABEL: s_buffer_load_imm_1016:
1370; GFX8910:       ; %bb.0:
1371; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3f8
1372; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1373; GFX8910-NEXT:    ; return to shader part epilog
1374;
1375; GFX11-LABEL: s_buffer_load_imm_1016:
1376; GFX11:       ; %bb.0:
1377; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3f8
1378; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1379; GFX11-NEXT:    ; return to shader part epilog
1380;
1381; GFX12-LABEL: s_buffer_load_imm_1016:
1382; GFX12:       ; %bb.0:
1383; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3f8
1384; GFX12-NEXT:    s_wait_kmcnt 0x0
1385; GFX12-NEXT:    ; return to shader part epilog
1386  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
1387  ret i32 %load
1388}
1389
1390define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
1391; GFX67-LABEL: s_buffer_load_imm_1020:
1392; GFX67:       ; %bb.0:
1393; GFX67-NEXT:    s_buffer_load_dword s0, s[0:3], 0xff
1394; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1395; GFX67-NEXT:    ; return to shader part epilog
1396;
1397; GFX8910-LABEL: s_buffer_load_imm_1020:
1398; GFX8910:       ; %bb.0:
1399; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3fc
1400; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1401; GFX8910-NEXT:    ; return to shader part epilog
1402;
1403; GFX11-LABEL: s_buffer_load_imm_1020:
1404; GFX11:       ; %bb.0:
1405; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3fc
1406; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1407; GFX11-NEXT:    ; return to shader part epilog
1408;
1409; GFX12-LABEL: s_buffer_load_imm_1020:
1410; GFX12:       ; %bb.0:
1411; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3fc
1412; GFX12-NEXT:    s_wait_kmcnt 0x0
1413; GFX12-NEXT:    ; return to shader part epilog
1414  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
1415  ret i32 %load
1416}
1417
1418define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
1419; GFX6-LABEL: s_buffer_load_imm_1021:
1420; GFX6:       ; %bb.0:
1421; GFX6-NEXT:    s_movk_i32 s4, 0x3fd
1422; GFX6-NEXT:    s_nop 3
1423; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1424; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1425; GFX6-NEXT:    ; return to shader part epilog
1426;
1427; GFX7-LABEL: s_buffer_load_imm_1021:
1428; GFX7:       ; %bb.0:
1429; GFX7-NEXT:    s_movk_i32 s4, 0x3fd
1430; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1431; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1432; GFX7-NEXT:    ; return to shader part epilog
1433;
1434; GFX8910-LABEL: s_buffer_load_imm_1021:
1435; GFX8910:       ; %bb.0:
1436; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x3fd
1437; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1438; GFX8910-NEXT:    ; return to shader part epilog
1439;
1440; GFX11-LABEL: s_buffer_load_imm_1021:
1441; GFX11:       ; %bb.0:
1442; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3fd
1443; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1444; GFX11-NEXT:    ; return to shader part epilog
1445;
1446; GFX12-LABEL: s_buffer_load_imm_1021:
1447; GFX12:       ; %bb.0:
1448; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x3fd
1449; GFX12-NEXT:    s_wait_kmcnt 0x0
1450; GFX12-NEXT:    ; return to shader part epilog
1451  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
1452  ret i32 %load
1453}
1454
1455define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
1456; GFX6-LABEL: s_buffer_load_imm_1024:
1457; GFX6:       ; %bb.0:
1458; GFX6-NEXT:    s_movk_i32 s4, 0x400
1459; GFX6-NEXT:    s_nop 3
1460; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1461; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1462; GFX6-NEXT:    ; return to shader part epilog
1463;
1464; GFX7-LABEL: s_buffer_load_imm_1024:
1465; GFX7:       ; %bb.0:
1466; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x100
1467; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1468; GFX7-NEXT:    ; return to shader part epilog
1469;
1470; GFX8910-LABEL: s_buffer_load_imm_1024:
1471; GFX8910:       ; %bb.0:
1472; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x400
1473; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1474; GFX8910-NEXT:    ; return to shader part epilog
1475;
1476; GFX11-LABEL: s_buffer_load_imm_1024:
1477; GFX11:       ; %bb.0:
1478; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x400
1479; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1480; GFX11-NEXT:    ; return to shader part epilog
1481;
1482; GFX12-LABEL: s_buffer_load_imm_1024:
1483; GFX12:       ; %bb.0:
1484; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x400
1485; GFX12-NEXT:    s_wait_kmcnt 0x0
1486; GFX12-NEXT:    ; return to shader part epilog
1487  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1488  ret i32 %load
1489}
1490
1491define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
1492; GFX6-LABEL: s_buffer_load_imm_1025:
1493; GFX6:       ; %bb.0:
1494; GFX6-NEXT:    s_movk_i32 s4, 0x401
1495; GFX6-NEXT:    s_nop 3
1496; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1497; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1498; GFX6-NEXT:    ; return to shader part epilog
1499;
1500; GFX7-LABEL: s_buffer_load_imm_1025:
1501; GFX7:       ; %bb.0:
1502; GFX7-NEXT:    s_movk_i32 s4, 0x401
1503; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1504; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1505; GFX7-NEXT:    ; return to shader part epilog
1506;
1507; GFX8910-LABEL: s_buffer_load_imm_1025:
1508; GFX8910:       ; %bb.0:
1509; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x401
1510; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1511; GFX8910-NEXT:    ; return to shader part epilog
1512;
1513; GFX11-LABEL: s_buffer_load_imm_1025:
1514; GFX11:       ; %bb.0:
1515; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x401
1516; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1517; GFX11-NEXT:    ; return to shader part epilog
1518;
1519; GFX12-LABEL: s_buffer_load_imm_1025:
1520; GFX12:       ; %bb.0:
1521; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x401
1522; GFX12-NEXT:    s_wait_kmcnt 0x0
1523; GFX12-NEXT:    ; return to shader part epilog
1524  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
1525  ret i32 %load
1526}
1527
1528define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
1529; GFX6-LABEL: s_buffer_load_imm_1028:
1530; GFX6:       ; %bb.0:
1531; GFX6-NEXT:    s_movk_i32 s4, 0x400
1532; GFX6-NEXT:    s_nop 3
1533; GFX6-NEXT:    s_buffer_load_dword s0, s[0:3], s4
1534; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1535; GFX6-NEXT:    ; return to shader part epilog
1536;
1537; GFX7-LABEL: s_buffer_load_imm_1028:
1538; GFX7:       ; %bb.0:
1539; GFX7-NEXT:    s_buffer_load_dword s0, s[0:3], 0x100
1540; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1541; GFX7-NEXT:    ; return to shader part epilog
1542;
1543; GFX8910-LABEL: s_buffer_load_imm_1028:
1544; GFX8910:       ; %bb.0:
1545; GFX8910-NEXT:    s_buffer_load_dword s0, s[0:3], 0x400
1546; GFX8910-NEXT:    s_waitcnt lgkmcnt(0)
1547; GFX8910-NEXT:    ; return to shader part epilog
1548;
1549; GFX11-LABEL: s_buffer_load_imm_1028:
1550; GFX11:       ; %bb.0:
1551; GFX11-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x400
1552; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1553; GFX11-NEXT:    ; return to shader part epilog
1554;
1555; GFX12-LABEL: s_buffer_load_imm_1028:
1556; GFX12:       ; %bb.0:
1557; GFX12-NEXT:    s_buffer_load_b32 s0, s[0:3], 0x400
1558; GFX12-NEXT:    s_wait_kmcnt 0x0
1559; GFX12-NEXT:    ; return to shader part epilog
1560  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
1561  ret i32 %load
1562}
1563
1564declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
1565declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
1566declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
1567declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
1568declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
1569
1570;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1571; GFX6789: {{.*}}
1572; GFX789: {{.*}}
1573; GFX89: {{.*}}
1574