xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX68 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GFX68 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefix=NOPRT %s
6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
8
9define amdgpu_ps float @load_1d_f32_x(<8 x i32> inreg %rsrc, i32 %s) {
10; GFX68-LABEL: load_1d_f32_x:
11; GFX68:       ; %bb.0:
12; GFX68-NEXT:    s_mov_b32 s0, s2
13; GFX68-NEXT:    s_mov_b32 s1, s3
14; GFX68-NEXT:    s_mov_b32 s2, s4
15; GFX68-NEXT:    s_mov_b32 s3, s5
16; GFX68-NEXT:    s_mov_b32 s4, s6
17; GFX68-NEXT:    s_mov_b32 s5, s7
18; GFX68-NEXT:    s_mov_b32 s6, s8
19; GFX68-NEXT:    s_mov_b32 s7, s9
20; GFX68-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm
21; GFX68-NEXT:    s_waitcnt vmcnt(0)
22; GFX68-NEXT:    ; return to shader part epilog
23;
24; GFX10-LABEL: load_1d_f32_x:
25; GFX10:       ; %bb.0:
26; GFX10-NEXT:    s_mov_b32 s0, s2
27; GFX10-NEXT:    s_mov_b32 s1, s3
28; GFX10-NEXT:    s_mov_b32 s2, s4
29; GFX10-NEXT:    s_mov_b32 s3, s5
30; GFX10-NEXT:    s_mov_b32 s4, s6
31; GFX10-NEXT:    s_mov_b32 s5, s7
32; GFX10-NEXT:    s_mov_b32 s6, s8
33; GFX10-NEXT:    s_mov_b32 s7, s9
34; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
35; GFX10-NEXT:    s_waitcnt vmcnt(0)
36; GFX10-NEXT:    ; return to shader part epilog
37;
38; NOPRT-LABEL: load_1d_f32_x:
39; NOPRT:       ; %bb.0:
40; NOPRT-NEXT:    s_mov_b32 s0, s2
41; NOPRT-NEXT:    s_mov_b32 s1, s3
42; NOPRT-NEXT:    s_mov_b32 s2, s4
43; NOPRT-NEXT:    s_mov_b32 s3, s5
44; NOPRT-NEXT:    s_mov_b32 s4, s6
45; NOPRT-NEXT:    s_mov_b32 s5, s7
46; NOPRT-NEXT:    s_mov_b32 s6, s8
47; NOPRT-NEXT:    s_mov_b32 s7, s9
48; NOPRT-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
49; NOPRT-NEXT:    s_waitcnt vmcnt(0)
50; NOPRT-NEXT:    ; return to shader part epilog
51;
52; GFX12-LABEL: load_1d_f32_x:
53; GFX12:       ; %bb.0:
54; GFX12-NEXT:    s_mov_b32 s0, s2
55; GFX12-NEXT:    s_mov_b32 s1, s3
56; GFX12-NEXT:    s_mov_b32 s2, s4
57; GFX12-NEXT:    s_mov_b32 s3, s5
58; GFX12-NEXT:    s_mov_b32 s4, s6
59; GFX12-NEXT:    s_mov_b32 s5, s7
60; GFX12-NEXT:    s_mov_b32 s6, s8
61; GFX12-NEXT:    s_mov_b32 s7, s9
62; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
63; GFX12-NEXT:    s_wait_loadcnt 0x0
64; GFX12-NEXT:    ; return to shader part epilog
65  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
66  ret float %v
67}
68
69define amdgpu_ps float @load_1d_f32_y(<8 x i32> inreg %rsrc, i32 %s) {
70; GFX68-LABEL: load_1d_f32_y:
71; GFX68:       ; %bb.0:
72; GFX68-NEXT:    s_mov_b32 s0, s2
73; GFX68-NEXT:    s_mov_b32 s1, s3
74; GFX68-NEXT:    s_mov_b32 s2, s4
75; GFX68-NEXT:    s_mov_b32 s3, s5
76; GFX68-NEXT:    s_mov_b32 s4, s6
77; GFX68-NEXT:    s_mov_b32 s5, s7
78; GFX68-NEXT:    s_mov_b32 s6, s8
79; GFX68-NEXT:    s_mov_b32 s7, s9
80; GFX68-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm
81; GFX68-NEXT:    s_waitcnt vmcnt(0)
82; GFX68-NEXT:    ; return to shader part epilog
83;
84; GFX10-LABEL: load_1d_f32_y:
85; GFX10:       ; %bb.0:
86; GFX10-NEXT:    s_mov_b32 s0, s2
87; GFX10-NEXT:    s_mov_b32 s1, s3
88; GFX10-NEXT:    s_mov_b32 s2, s4
89; GFX10-NEXT:    s_mov_b32 s3, s5
90; GFX10-NEXT:    s_mov_b32 s4, s6
91; GFX10-NEXT:    s_mov_b32 s5, s7
92; GFX10-NEXT:    s_mov_b32 s6, s8
93; GFX10-NEXT:    s_mov_b32 s7, s9
94; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm
95; GFX10-NEXT:    s_waitcnt vmcnt(0)
96; GFX10-NEXT:    ; return to shader part epilog
97;
98; NOPRT-LABEL: load_1d_f32_y:
99; NOPRT:       ; %bb.0:
100; NOPRT-NEXT:    s_mov_b32 s0, s2
101; NOPRT-NEXT:    s_mov_b32 s1, s3
102; NOPRT-NEXT:    s_mov_b32 s2, s4
103; NOPRT-NEXT:    s_mov_b32 s3, s5
104; NOPRT-NEXT:    s_mov_b32 s4, s6
105; NOPRT-NEXT:    s_mov_b32 s5, s7
106; NOPRT-NEXT:    s_mov_b32 s6, s8
107; NOPRT-NEXT:    s_mov_b32 s7, s9
108; NOPRT-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm
109; NOPRT-NEXT:    s_waitcnt vmcnt(0)
110; NOPRT-NEXT:    ; return to shader part epilog
111;
112; GFX12-LABEL: load_1d_f32_y:
113; GFX12:       ; %bb.0:
114; GFX12-NEXT:    s_mov_b32 s0, s2
115; GFX12-NEXT:    s_mov_b32 s1, s3
116; GFX12-NEXT:    s_mov_b32 s2, s4
117; GFX12-NEXT:    s_mov_b32 s3, s5
118; GFX12-NEXT:    s_mov_b32 s4, s6
119; GFX12-NEXT:    s_mov_b32 s5, s7
120; GFX12-NEXT:    s_mov_b32 s6, s8
121; GFX12-NEXT:    s_mov_b32 s7, s9
122; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D
123; GFX12-NEXT:    s_wait_loadcnt 0x0
124; GFX12-NEXT:    ; return to shader part epilog
125  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
126  ret float %v
127}
128
129define amdgpu_ps float @load_1d_f32_z(<8 x i32> inreg %rsrc, i32 %s) {
130; GFX68-LABEL: load_1d_f32_z:
131; GFX68:       ; %bb.0:
132; GFX68-NEXT:    s_mov_b32 s0, s2
133; GFX68-NEXT:    s_mov_b32 s1, s3
134; GFX68-NEXT:    s_mov_b32 s2, s4
135; GFX68-NEXT:    s_mov_b32 s3, s5
136; GFX68-NEXT:    s_mov_b32 s4, s6
137; GFX68-NEXT:    s_mov_b32 s5, s7
138; GFX68-NEXT:    s_mov_b32 s6, s8
139; GFX68-NEXT:    s_mov_b32 s7, s9
140; GFX68-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm
141; GFX68-NEXT:    s_waitcnt vmcnt(0)
142; GFX68-NEXT:    ; return to shader part epilog
143;
144; GFX10-LABEL: load_1d_f32_z:
145; GFX10:       ; %bb.0:
146; GFX10-NEXT:    s_mov_b32 s0, s2
147; GFX10-NEXT:    s_mov_b32 s1, s3
148; GFX10-NEXT:    s_mov_b32 s2, s4
149; GFX10-NEXT:    s_mov_b32 s3, s5
150; GFX10-NEXT:    s_mov_b32 s4, s6
151; GFX10-NEXT:    s_mov_b32 s5, s7
152; GFX10-NEXT:    s_mov_b32 s6, s8
153; GFX10-NEXT:    s_mov_b32 s7, s9
154; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm
155; GFX10-NEXT:    s_waitcnt vmcnt(0)
156; GFX10-NEXT:    ; return to shader part epilog
157;
158; NOPRT-LABEL: load_1d_f32_z:
159; NOPRT:       ; %bb.0:
160; NOPRT-NEXT:    s_mov_b32 s0, s2
161; NOPRT-NEXT:    s_mov_b32 s1, s3
162; NOPRT-NEXT:    s_mov_b32 s2, s4
163; NOPRT-NEXT:    s_mov_b32 s3, s5
164; NOPRT-NEXT:    s_mov_b32 s4, s6
165; NOPRT-NEXT:    s_mov_b32 s5, s7
166; NOPRT-NEXT:    s_mov_b32 s6, s8
167; NOPRT-NEXT:    s_mov_b32 s7, s9
168; NOPRT-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm
169; NOPRT-NEXT:    s_waitcnt vmcnt(0)
170; NOPRT-NEXT:    ; return to shader part epilog
171;
172; GFX12-LABEL: load_1d_f32_z:
173; GFX12:       ; %bb.0:
174; GFX12-NEXT:    s_mov_b32 s0, s2
175; GFX12-NEXT:    s_mov_b32 s1, s3
176; GFX12-NEXT:    s_mov_b32 s2, s4
177; GFX12-NEXT:    s_mov_b32 s3, s5
178; GFX12-NEXT:    s_mov_b32 s4, s6
179; GFX12-NEXT:    s_mov_b32 s5, s7
180; GFX12-NEXT:    s_mov_b32 s6, s8
181; GFX12-NEXT:    s_mov_b32 s7, s9
182; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D
183; GFX12-NEXT:    s_wait_loadcnt 0x0
184; GFX12-NEXT:    ; return to shader part epilog
185  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
186  ret float %v
187}
188
189define amdgpu_ps float @load_1d_f32_w(<8 x i32> inreg %rsrc, i32 %s) {
190; GFX68-LABEL: load_1d_f32_w:
191; GFX68:       ; %bb.0:
192; GFX68-NEXT:    s_mov_b32 s0, s2
193; GFX68-NEXT:    s_mov_b32 s1, s3
194; GFX68-NEXT:    s_mov_b32 s2, s4
195; GFX68-NEXT:    s_mov_b32 s3, s5
196; GFX68-NEXT:    s_mov_b32 s4, s6
197; GFX68-NEXT:    s_mov_b32 s5, s7
198; GFX68-NEXT:    s_mov_b32 s6, s8
199; GFX68-NEXT:    s_mov_b32 s7, s9
200; GFX68-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm
201; GFX68-NEXT:    s_waitcnt vmcnt(0)
202; GFX68-NEXT:    ; return to shader part epilog
203;
204; GFX10-LABEL: load_1d_f32_w:
205; GFX10:       ; %bb.0:
206; GFX10-NEXT:    s_mov_b32 s0, s2
207; GFX10-NEXT:    s_mov_b32 s1, s3
208; GFX10-NEXT:    s_mov_b32 s2, s4
209; GFX10-NEXT:    s_mov_b32 s3, s5
210; GFX10-NEXT:    s_mov_b32 s4, s6
211; GFX10-NEXT:    s_mov_b32 s5, s7
212; GFX10-NEXT:    s_mov_b32 s6, s8
213; GFX10-NEXT:    s_mov_b32 s7, s9
214; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm
215; GFX10-NEXT:    s_waitcnt vmcnt(0)
216; GFX10-NEXT:    ; return to shader part epilog
217;
218; NOPRT-LABEL: load_1d_f32_w:
219; NOPRT:       ; %bb.0:
220; NOPRT-NEXT:    s_mov_b32 s0, s2
221; NOPRT-NEXT:    s_mov_b32 s1, s3
222; NOPRT-NEXT:    s_mov_b32 s2, s4
223; NOPRT-NEXT:    s_mov_b32 s3, s5
224; NOPRT-NEXT:    s_mov_b32 s4, s6
225; NOPRT-NEXT:    s_mov_b32 s5, s7
226; NOPRT-NEXT:    s_mov_b32 s6, s8
227; NOPRT-NEXT:    s_mov_b32 s7, s9
228; NOPRT-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm
229; NOPRT-NEXT:    s_waitcnt vmcnt(0)
230; NOPRT-NEXT:    ; return to shader part epilog
231;
232; GFX12-LABEL: load_1d_f32_w:
233; GFX12:       ; %bb.0:
234; GFX12-NEXT:    s_mov_b32 s0, s2
235; GFX12-NEXT:    s_mov_b32 s1, s3
236; GFX12-NEXT:    s_mov_b32 s2, s4
237; GFX12-NEXT:    s_mov_b32 s3, s5
238; GFX12-NEXT:    s_mov_b32 s4, s6
239; GFX12-NEXT:    s_mov_b32 s5, s7
240; GFX12-NEXT:    s_mov_b32 s6, s8
241; GFX12-NEXT:    s_mov_b32 s7, s9
242; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D
243; GFX12-NEXT:    s_wait_loadcnt 0x0
244; GFX12-NEXT:    ; return to shader part epilog
245  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
246  ret float %v
247}
248
249define amdgpu_ps <2 x float> @load_1d_v2f32_xy(<8 x i32> inreg %rsrc, i32 %s) {
250; GFX68-LABEL: load_1d_v2f32_xy:
251; GFX68:       ; %bb.0:
252; GFX68-NEXT:    s_mov_b32 s0, s2
253; GFX68-NEXT:    s_mov_b32 s1, s3
254; GFX68-NEXT:    s_mov_b32 s2, s4
255; GFX68-NEXT:    s_mov_b32 s3, s5
256; GFX68-NEXT:    s_mov_b32 s4, s6
257; GFX68-NEXT:    s_mov_b32 s5, s7
258; GFX68-NEXT:    s_mov_b32 s6, s8
259; GFX68-NEXT:    s_mov_b32 s7, s9
260; GFX68-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm
261; GFX68-NEXT:    s_waitcnt vmcnt(0)
262; GFX68-NEXT:    ; return to shader part epilog
263;
264; GFX10-LABEL: load_1d_v2f32_xy:
265; GFX10:       ; %bb.0:
266; GFX10-NEXT:    s_mov_b32 s0, s2
267; GFX10-NEXT:    s_mov_b32 s1, s3
268; GFX10-NEXT:    s_mov_b32 s2, s4
269; GFX10-NEXT:    s_mov_b32 s3, s5
270; GFX10-NEXT:    s_mov_b32 s4, s6
271; GFX10-NEXT:    s_mov_b32 s5, s7
272; GFX10-NEXT:    s_mov_b32 s6, s8
273; GFX10-NEXT:    s_mov_b32 s7, s9
274; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
275; GFX10-NEXT:    s_waitcnt vmcnt(0)
276; GFX10-NEXT:    ; return to shader part epilog
277;
278; NOPRT-LABEL: load_1d_v2f32_xy:
279; NOPRT:       ; %bb.0:
280; NOPRT-NEXT:    s_mov_b32 s0, s2
281; NOPRT-NEXT:    s_mov_b32 s1, s3
282; NOPRT-NEXT:    s_mov_b32 s2, s4
283; NOPRT-NEXT:    s_mov_b32 s3, s5
284; NOPRT-NEXT:    s_mov_b32 s4, s6
285; NOPRT-NEXT:    s_mov_b32 s5, s7
286; NOPRT-NEXT:    s_mov_b32 s6, s8
287; NOPRT-NEXT:    s_mov_b32 s7, s9
288; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
289; NOPRT-NEXT:    s_waitcnt vmcnt(0)
290; NOPRT-NEXT:    ; return to shader part epilog
291;
292; GFX12-LABEL: load_1d_v2f32_xy:
293; GFX12:       ; %bb.0:
294; GFX12-NEXT:    s_mov_b32 s0, s2
295; GFX12-NEXT:    s_mov_b32 s1, s3
296; GFX12-NEXT:    s_mov_b32 s2, s4
297; GFX12-NEXT:    s_mov_b32 s3, s5
298; GFX12-NEXT:    s_mov_b32 s4, s6
299; GFX12-NEXT:    s_mov_b32 s5, s7
300; GFX12-NEXT:    s_mov_b32 s6, s8
301; GFX12-NEXT:    s_mov_b32 s7, s9
302; GFX12-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
303; GFX12-NEXT:    s_wait_loadcnt 0x0
304; GFX12-NEXT:    ; return to shader part epilog
305  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
306  ret <2 x float> %v
307}
308
309define amdgpu_ps <2 x float> @load_1d_v2f32_xz(<8 x i32> inreg %rsrc, i32 %s) {
310; GFX68-LABEL: load_1d_v2f32_xz:
311; GFX68:       ; %bb.0:
312; GFX68-NEXT:    s_mov_b32 s0, s2
313; GFX68-NEXT:    s_mov_b32 s1, s3
314; GFX68-NEXT:    s_mov_b32 s2, s4
315; GFX68-NEXT:    s_mov_b32 s3, s5
316; GFX68-NEXT:    s_mov_b32 s4, s6
317; GFX68-NEXT:    s_mov_b32 s5, s7
318; GFX68-NEXT:    s_mov_b32 s6, s8
319; GFX68-NEXT:    s_mov_b32 s7, s9
320; GFX68-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 unorm
321; GFX68-NEXT:    s_waitcnt vmcnt(0)
322; GFX68-NEXT:    ; return to shader part epilog
323;
324; GFX10-LABEL: load_1d_v2f32_xz:
325; GFX10:       ; %bb.0:
326; GFX10-NEXT:    s_mov_b32 s0, s2
327; GFX10-NEXT:    s_mov_b32 s1, s3
328; GFX10-NEXT:    s_mov_b32 s2, s4
329; GFX10-NEXT:    s_mov_b32 s3, s5
330; GFX10-NEXT:    s_mov_b32 s4, s6
331; GFX10-NEXT:    s_mov_b32 s5, s7
332; GFX10-NEXT:    s_mov_b32 s6, s8
333; GFX10-NEXT:    s_mov_b32 s7, s9
334; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm
335; GFX10-NEXT:    s_waitcnt vmcnt(0)
336; GFX10-NEXT:    ; return to shader part epilog
337;
338; NOPRT-LABEL: load_1d_v2f32_xz:
339; NOPRT:       ; %bb.0:
340; NOPRT-NEXT:    s_mov_b32 s0, s2
341; NOPRT-NEXT:    s_mov_b32 s1, s3
342; NOPRT-NEXT:    s_mov_b32 s2, s4
343; NOPRT-NEXT:    s_mov_b32 s3, s5
344; NOPRT-NEXT:    s_mov_b32 s4, s6
345; NOPRT-NEXT:    s_mov_b32 s5, s7
346; NOPRT-NEXT:    s_mov_b32 s6, s8
347; NOPRT-NEXT:    s_mov_b32 s7, s9
348; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm
349; NOPRT-NEXT:    s_waitcnt vmcnt(0)
350; NOPRT-NEXT:    ; return to shader part epilog
351;
352; GFX12-LABEL: load_1d_v2f32_xz:
353; GFX12:       ; %bb.0:
354; GFX12-NEXT:    s_mov_b32 s0, s2
355; GFX12-NEXT:    s_mov_b32 s1, s3
356; GFX12-NEXT:    s_mov_b32 s2, s4
357; GFX12-NEXT:    s_mov_b32 s3, s5
358; GFX12-NEXT:    s_mov_b32 s4, s6
359; GFX12-NEXT:    s_mov_b32 s5, s7
360; GFX12-NEXT:    s_mov_b32 s6, s8
361; GFX12-NEXT:    s_mov_b32 s7, s9
362; GFX12-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D
363; GFX12-NEXT:    s_wait_loadcnt 0x0
364; GFX12-NEXT:    ; return to shader part epilog
365  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
366  ret <2 x float> %v
367}
368
369define amdgpu_ps <2 x float> @load_1d_v2f32_xw(<8 x i32> inreg %rsrc, i32 %s) {
370; GFX68-LABEL: load_1d_v2f32_xw:
371; GFX68:       ; %bb.0:
372; GFX68-NEXT:    s_mov_b32 s0, s2
373; GFX68-NEXT:    s_mov_b32 s1, s3
374; GFX68-NEXT:    s_mov_b32 s2, s4
375; GFX68-NEXT:    s_mov_b32 s3, s5
376; GFX68-NEXT:    s_mov_b32 s4, s6
377; GFX68-NEXT:    s_mov_b32 s5, s7
378; GFX68-NEXT:    s_mov_b32 s6, s8
379; GFX68-NEXT:    s_mov_b32 s7, s9
380; GFX68-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
381; GFX68-NEXT:    s_waitcnt vmcnt(0)
382; GFX68-NEXT:    ; return to shader part epilog
383;
384; GFX10-LABEL: load_1d_v2f32_xw:
385; GFX10:       ; %bb.0:
386; GFX10-NEXT:    s_mov_b32 s0, s2
387; GFX10-NEXT:    s_mov_b32 s1, s3
388; GFX10-NEXT:    s_mov_b32 s2, s4
389; GFX10-NEXT:    s_mov_b32 s3, s5
390; GFX10-NEXT:    s_mov_b32 s4, s6
391; GFX10-NEXT:    s_mov_b32 s5, s7
392; GFX10-NEXT:    s_mov_b32 s6, s8
393; GFX10-NEXT:    s_mov_b32 s7, s9
394; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm
395; GFX10-NEXT:    s_waitcnt vmcnt(0)
396; GFX10-NEXT:    ; return to shader part epilog
397;
398; NOPRT-LABEL: load_1d_v2f32_xw:
399; NOPRT:       ; %bb.0:
400; NOPRT-NEXT:    s_mov_b32 s0, s2
401; NOPRT-NEXT:    s_mov_b32 s1, s3
402; NOPRT-NEXT:    s_mov_b32 s2, s4
403; NOPRT-NEXT:    s_mov_b32 s3, s5
404; NOPRT-NEXT:    s_mov_b32 s4, s6
405; NOPRT-NEXT:    s_mov_b32 s5, s7
406; NOPRT-NEXT:    s_mov_b32 s6, s8
407; NOPRT-NEXT:    s_mov_b32 s7, s9
408; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm
409; NOPRT-NEXT:    s_waitcnt vmcnt(0)
410; NOPRT-NEXT:    ; return to shader part epilog
411;
412; GFX12-LABEL: load_1d_v2f32_xw:
413; GFX12:       ; %bb.0:
414; GFX12-NEXT:    s_mov_b32 s0, s2
415; GFX12-NEXT:    s_mov_b32 s1, s3
416; GFX12-NEXT:    s_mov_b32 s2, s4
417; GFX12-NEXT:    s_mov_b32 s3, s5
418; GFX12-NEXT:    s_mov_b32 s4, s6
419; GFX12-NEXT:    s_mov_b32 s5, s7
420; GFX12-NEXT:    s_mov_b32 s6, s8
421; GFX12-NEXT:    s_mov_b32 s7, s9
422; GFX12-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D
423; GFX12-NEXT:    s_wait_loadcnt 0x0
424; GFX12-NEXT:    ; return to shader part epilog
425  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
426  ret <2 x float> %v
427}
428
429define amdgpu_ps <2 x float> @load_1d_v2f32_yz(<8 x i32> inreg %rsrc, i32 %s) {
430; GFX68-LABEL: load_1d_v2f32_yz:
431; GFX68:       ; %bb.0:
432; GFX68-NEXT:    s_mov_b32 s0, s2
433; GFX68-NEXT:    s_mov_b32 s1, s3
434; GFX68-NEXT:    s_mov_b32 s2, s4
435; GFX68-NEXT:    s_mov_b32 s3, s5
436; GFX68-NEXT:    s_mov_b32 s4, s6
437; GFX68-NEXT:    s_mov_b32 s5, s7
438; GFX68-NEXT:    s_mov_b32 s6, s8
439; GFX68-NEXT:    s_mov_b32 s7, s9
440; GFX68-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 unorm
441; GFX68-NEXT:    s_waitcnt vmcnt(0)
442; GFX68-NEXT:    ; return to shader part epilog
443;
444; GFX10-LABEL: load_1d_v2f32_yz:
445; GFX10:       ; %bb.0:
446; GFX10-NEXT:    s_mov_b32 s0, s2
447; GFX10-NEXT:    s_mov_b32 s1, s3
448; GFX10-NEXT:    s_mov_b32 s2, s4
449; GFX10-NEXT:    s_mov_b32 s3, s5
450; GFX10-NEXT:    s_mov_b32 s4, s6
451; GFX10-NEXT:    s_mov_b32 s5, s7
452; GFX10-NEXT:    s_mov_b32 s6, s8
453; GFX10-NEXT:    s_mov_b32 s7, s9
454; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm
455; GFX10-NEXT:    s_waitcnt vmcnt(0)
456; GFX10-NEXT:    ; return to shader part epilog
457;
458; NOPRT-LABEL: load_1d_v2f32_yz:
459; NOPRT:       ; %bb.0:
460; NOPRT-NEXT:    s_mov_b32 s0, s2
461; NOPRT-NEXT:    s_mov_b32 s1, s3
462; NOPRT-NEXT:    s_mov_b32 s2, s4
463; NOPRT-NEXT:    s_mov_b32 s3, s5
464; NOPRT-NEXT:    s_mov_b32 s4, s6
465; NOPRT-NEXT:    s_mov_b32 s5, s7
466; NOPRT-NEXT:    s_mov_b32 s6, s8
467; NOPRT-NEXT:    s_mov_b32 s7, s9
468; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm
469; NOPRT-NEXT:    s_waitcnt vmcnt(0)
470; NOPRT-NEXT:    ; return to shader part epilog
471;
472; GFX12-LABEL: load_1d_v2f32_yz:
473; GFX12:       ; %bb.0:
474; GFX12-NEXT:    s_mov_b32 s0, s2
475; GFX12-NEXT:    s_mov_b32 s1, s3
476; GFX12-NEXT:    s_mov_b32 s2, s4
477; GFX12-NEXT:    s_mov_b32 s3, s5
478; GFX12-NEXT:    s_mov_b32 s4, s6
479; GFX12-NEXT:    s_mov_b32 s5, s7
480; GFX12-NEXT:    s_mov_b32 s6, s8
481; GFX12-NEXT:    s_mov_b32 s7, s9
482; GFX12-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D
483; GFX12-NEXT:    s_wait_loadcnt 0x0
484; GFX12-NEXT:    ; return to shader part epilog
485  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
486  ret <2 x float> %v
487}
488
489define amdgpu_ps <3 x float> @load_1d_v3f32_xyz(<8 x i32> inreg %rsrc, i32 %s) {
490; GFX68-LABEL: load_1d_v3f32_xyz:
491; GFX68:       ; %bb.0:
492; GFX68-NEXT:    s_mov_b32 s0, s2
493; GFX68-NEXT:    s_mov_b32 s1, s3
494; GFX68-NEXT:    s_mov_b32 s2, s4
495; GFX68-NEXT:    s_mov_b32 s3, s5
496; GFX68-NEXT:    s_mov_b32 s4, s6
497; GFX68-NEXT:    s_mov_b32 s5, s7
498; GFX68-NEXT:    s_mov_b32 s6, s8
499; GFX68-NEXT:    s_mov_b32 s7, s9
500; GFX68-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm
501; GFX68-NEXT:    s_waitcnt vmcnt(0)
502; GFX68-NEXT:    ; return to shader part epilog
503;
504; GFX10-LABEL: load_1d_v3f32_xyz:
505; GFX10:       ; %bb.0:
506; GFX10-NEXT:    s_mov_b32 s0, s2
507; GFX10-NEXT:    s_mov_b32 s1, s3
508; GFX10-NEXT:    s_mov_b32 s2, s4
509; GFX10-NEXT:    s_mov_b32 s3, s5
510; GFX10-NEXT:    s_mov_b32 s4, s6
511; GFX10-NEXT:    s_mov_b32 s5, s7
512; GFX10-NEXT:    s_mov_b32 s6, s8
513; GFX10-NEXT:    s_mov_b32 s7, s9
514; GFX10-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm
515; GFX10-NEXT:    s_waitcnt vmcnt(0)
516; GFX10-NEXT:    ; return to shader part epilog
517;
518; NOPRT-LABEL: load_1d_v3f32_xyz:
519; NOPRT:       ; %bb.0:
520; NOPRT-NEXT:    s_mov_b32 s0, s2
521; NOPRT-NEXT:    s_mov_b32 s1, s3
522; NOPRT-NEXT:    s_mov_b32 s2, s4
523; NOPRT-NEXT:    s_mov_b32 s3, s5
524; NOPRT-NEXT:    s_mov_b32 s4, s6
525; NOPRT-NEXT:    s_mov_b32 s5, s7
526; NOPRT-NEXT:    s_mov_b32 s6, s8
527; NOPRT-NEXT:    s_mov_b32 s7, s9
528; NOPRT-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm
529; NOPRT-NEXT:    s_waitcnt vmcnt(0)
530; NOPRT-NEXT:    ; return to shader part epilog
531;
532; GFX12-LABEL: load_1d_v3f32_xyz:
533; GFX12:       ; %bb.0:
534; GFX12-NEXT:    s_mov_b32 s0, s2
535; GFX12-NEXT:    s_mov_b32 s1, s3
536; GFX12-NEXT:    s_mov_b32 s2, s4
537; GFX12-NEXT:    s_mov_b32 s3, s5
538; GFX12-NEXT:    s_mov_b32 s4, s6
539; GFX12-NEXT:    s_mov_b32 s5, s7
540; GFX12-NEXT:    s_mov_b32 s6, s8
541; GFX12-NEXT:    s_mov_b32 s7, s9
542; GFX12-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D
543; GFX12-NEXT:    s_wait_loadcnt 0x0
544; GFX12-NEXT:    ; return to shader part epilog
545  %v = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
546  ret <3 x float> %v
547}
548
549define amdgpu_ps <4 x float> @load_1d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
550; GFX68-LABEL: load_1d_v4f32_xyzw:
551; GFX68:       ; %bb.0:
552; GFX68-NEXT:    s_mov_b32 s0, s2
553; GFX68-NEXT:    s_mov_b32 s1, s3
554; GFX68-NEXT:    s_mov_b32 s2, s4
555; GFX68-NEXT:    s_mov_b32 s3, s5
556; GFX68-NEXT:    s_mov_b32 s4, s6
557; GFX68-NEXT:    s_mov_b32 s5, s7
558; GFX68-NEXT:    s_mov_b32 s6, s8
559; GFX68-NEXT:    s_mov_b32 s7, s9
560; GFX68-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
561; GFX68-NEXT:    s_waitcnt vmcnt(0)
562; GFX68-NEXT:    ; return to shader part epilog
563;
564; GFX10-LABEL: load_1d_v4f32_xyzw:
565; GFX10:       ; %bb.0:
566; GFX10-NEXT:    s_mov_b32 s0, s2
567; GFX10-NEXT:    s_mov_b32 s1, s3
568; GFX10-NEXT:    s_mov_b32 s2, s4
569; GFX10-NEXT:    s_mov_b32 s3, s5
570; GFX10-NEXT:    s_mov_b32 s4, s6
571; GFX10-NEXT:    s_mov_b32 s5, s7
572; GFX10-NEXT:    s_mov_b32 s6, s8
573; GFX10-NEXT:    s_mov_b32 s7, s9
574; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
575; GFX10-NEXT:    s_waitcnt vmcnt(0)
576; GFX10-NEXT:    ; return to shader part epilog
577;
578; NOPRT-LABEL: load_1d_v4f32_xyzw:
579; NOPRT:       ; %bb.0:
580; NOPRT-NEXT:    s_mov_b32 s0, s2
581; NOPRT-NEXT:    s_mov_b32 s1, s3
582; NOPRT-NEXT:    s_mov_b32 s2, s4
583; NOPRT-NEXT:    s_mov_b32 s3, s5
584; NOPRT-NEXT:    s_mov_b32 s4, s6
585; NOPRT-NEXT:    s_mov_b32 s5, s7
586; NOPRT-NEXT:    s_mov_b32 s6, s8
587; NOPRT-NEXT:    s_mov_b32 s7, s9
588; NOPRT-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
589; NOPRT-NEXT:    s_waitcnt vmcnt(0)
590; NOPRT-NEXT:    ; return to shader part epilog
591;
592; GFX12-LABEL: load_1d_v4f32_xyzw:
593; GFX12:       ; %bb.0:
594; GFX12-NEXT:    s_mov_b32 s0, s2
595; GFX12-NEXT:    s_mov_b32 s1, s3
596; GFX12-NEXT:    s_mov_b32 s2, s4
597; GFX12-NEXT:    s_mov_b32 s3, s5
598; GFX12-NEXT:    s_mov_b32 s4, s6
599; GFX12-NEXT:    s_mov_b32 s5, s7
600; GFX12-NEXT:    s_mov_b32 s6, s8
601; GFX12-NEXT:    s_mov_b32 s7, s9
602; GFX12-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
603; GFX12-NEXT:    s_wait_loadcnt 0x0
604; GFX12-NEXT:    ; return to shader part epilog
605  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
606  ret <4 x float> %v
607}
608
609define amdgpu_ps float @load_1d_f32_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
610; GFX68-LABEL: load_1d_f32_tfe_dmask_x:
611; GFX68:       ; %bb.0:
612; GFX68-NEXT:    v_mov_b32_e32 v1, 0
613; GFX68-NEXT:    s_mov_b32 s0, s2
614; GFX68-NEXT:    s_mov_b32 s1, s3
615; GFX68-NEXT:    s_mov_b32 s2, s4
616; GFX68-NEXT:    s_mov_b32 s3, s5
617; GFX68-NEXT:    s_mov_b32 s4, s6
618; GFX68-NEXT:    s_mov_b32 s5, s7
619; GFX68-NEXT:    s_mov_b32 s6, s8
620; GFX68-NEXT:    s_mov_b32 s7, s9
621; GFX68-NEXT:    v_mov_b32_e32 v2, v1
622; GFX68-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
623; GFX68-NEXT:    s_waitcnt vmcnt(0)
624; GFX68-NEXT:    v_mov_b32_e32 v0, v2
625; GFX68-NEXT:    ; return to shader part epilog
626;
627; GFX10-LABEL: load_1d_f32_tfe_dmask_x:
628; GFX10:       ; %bb.0:
629; GFX10-NEXT:    v_mov_b32_e32 v1, 0
630; GFX10-NEXT:    s_mov_b32 s0, s2
631; GFX10-NEXT:    s_mov_b32 s1, s3
632; GFX10-NEXT:    s_mov_b32 s2, s4
633; GFX10-NEXT:    s_mov_b32 s3, s5
634; GFX10-NEXT:    s_mov_b32 s4, s6
635; GFX10-NEXT:    s_mov_b32 s5, s7
636; GFX10-NEXT:    s_mov_b32 s6, s8
637; GFX10-NEXT:    s_mov_b32 s7, s9
638; GFX10-NEXT:    v_mov_b32_e32 v2, v1
639; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
640; GFX10-NEXT:    s_waitcnt vmcnt(0)
641; GFX10-NEXT:    v_mov_b32_e32 v0, v2
642; GFX10-NEXT:    ; return to shader part epilog
643;
644; NOPRT-LABEL: load_1d_f32_tfe_dmask_x:
645; NOPRT:       ; %bb.0:
646; NOPRT-NEXT:    s_mov_b32 s0, s2
647; NOPRT-NEXT:    s_mov_b32 s1, s3
648; NOPRT-NEXT:    s_mov_b32 s2, s4
649; NOPRT-NEXT:    s_mov_b32 s3, s5
650; NOPRT-NEXT:    s_mov_b32 s4, s6
651; NOPRT-NEXT:    s_mov_b32 s5, s7
652; NOPRT-NEXT:    s_mov_b32 s6, s8
653; NOPRT-NEXT:    s_mov_b32 s7, s9
654; NOPRT-NEXT:    v_mov_b32_e32 v1, 0
655; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
656; NOPRT-NEXT:    s_waitcnt vmcnt(0)
657; NOPRT-NEXT:    v_mov_b32_e32 v0, v1
658; NOPRT-NEXT:    ; return to shader part epilog
659;
660; GFX12-LABEL: load_1d_f32_tfe_dmask_x:
661; GFX12:       ; %bb.0:
662; GFX12-NEXT:    v_mov_b32_e32 v1, 0
663; GFX12-NEXT:    s_mov_b32 s0, s2
664; GFX12-NEXT:    s_mov_b32 s1, s3
665; GFX12-NEXT:    s_mov_b32 s2, s4
666; GFX12-NEXT:    s_mov_b32 s3, s5
667; GFX12-NEXT:    s_mov_b32 s4, s6
668; GFX12-NEXT:    s_mov_b32 s5, s7
669; GFX12-NEXT:    s_mov_b32 s6, s8
670; GFX12-NEXT:    s_mov_b32 s7, s9
671; GFX12-NEXT:    v_mov_b32_e32 v2, v1
672; GFX12-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
673; GFX12-NEXT:    s_wait_loadcnt 0x0
674; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
675; GFX12-NEXT:    v_mov_b32_e32 v0, v2
676; GFX12-NEXT:    ; return to shader part epilog
677  %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
678  %v.err = extractvalue { float, i32 } %v, 1
679  %vv = bitcast i32 %v.err to float
680  ret float %vv
681}
682
683define amdgpu_ps float @load_1d_v2f32_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
684; GFX68-LABEL: load_1d_v2f32_tfe_dmask_xy:
685; GFX68:       ; %bb.0:
686; GFX68-NEXT:    v_mov_b32_e32 v1, 0
687; GFX68-NEXT:    s_mov_b32 s0, s2
688; GFX68-NEXT:    s_mov_b32 s1, s3
689; GFX68-NEXT:    s_mov_b32 s2, s4
690; GFX68-NEXT:    s_mov_b32 s3, s5
691; GFX68-NEXT:    s_mov_b32 s4, s6
692; GFX68-NEXT:    s_mov_b32 s5, s7
693; GFX68-NEXT:    s_mov_b32 s6, s8
694; GFX68-NEXT:    s_mov_b32 s7, s9
695; GFX68-NEXT:    v_mov_b32_e32 v2, v1
696; GFX68-NEXT:    v_mov_b32_e32 v3, v1
697; GFX68-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe
698; GFX68-NEXT:    s_waitcnt vmcnt(0)
699; GFX68-NEXT:    v_mov_b32_e32 v0, v3
700; GFX68-NEXT:    ; return to shader part epilog
701;
702; GFX10-LABEL: load_1d_v2f32_tfe_dmask_xy:
703; GFX10:       ; %bb.0:
704; GFX10-NEXT:    v_mov_b32_e32 v1, 0
705; GFX10-NEXT:    s_mov_b32 s0, s2
706; GFX10-NEXT:    s_mov_b32 s1, s3
707; GFX10-NEXT:    s_mov_b32 s2, s4
708; GFX10-NEXT:    s_mov_b32 s3, s5
709; GFX10-NEXT:    s_mov_b32 s4, s6
710; GFX10-NEXT:    s_mov_b32 s5, s7
711; GFX10-NEXT:    s_mov_b32 s6, s8
712; GFX10-NEXT:    s_mov_b32 s7, s9
713; GFX10-NEXT:    v_mov_b32_e32 v2, v1
714; GFX10-NEXT:    v_mov_b32_e32 v3, v1
715; GFX10-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe
716; GFX10-NEXT:    s_waitcnt vmcnt(0)
717; GFX10-NEXT:    v_mov_b32_e32 v0, v3
718; GFX10-NEXT:    ; return to shader part epilog
719;
720; NOPRT-LABEL: load_1d_v2f32_tfe_dmask_xy:
721; NOPRT:       ; %bb.0:
722; NOPRT-NEXT:    s_mov_b32 s0, s2
723; NOPRT-NEXT:    s_mov_b32 s1, s3
724; NOPRT-NEXT:    s_mov_b32 s2, s4
725; NOPRT-NEXT:    s_mov_b32 s3, s5
726; NOPRT-NEXT:    s_mov_b32 s4, s6
727; NOPRT-NEXT:    s_mov_b32 s5, s7
728; NOPRT-NEXT:    s_mov_b32 s6, s8
729; NOPRT-NEXT:    s_mov_b32 s7, s9
730; NOPRT-NEXT:    v_mov_b32_e32 v2, 0
731; NOPRT-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe
732; NOPRT-NEXT:    s_waitcnt vmcnt(0)
733; NOPRT-NEXT:    v_mov_b32_e32 v0, v2
734; NOPRT-NEXT:    ; return to shader part epilog
735;
736; GFX12-LABEL: load_1d_v2f32_tfe_dmask_xy:
737; GFX12:       ; %bb.0:
738; GFX12-NEXT:    v_mov_b32_e32 v1, 0
739; GFX12-NEXT:    s_mov_b32 s0, s2
740; GFX12-NEXT:    s_mov_b32 s1, s3
741; GFX12-NEXT:    s_mov_b32 s2, s4
742; GFX12-NEXT:    s_mov_b32 s3, s5
743; GFX12-NEXT:    s_mov_b32 s4, s6
744; GFX12-NEXT:    s_mov_b32 s5, s7
745; GFX12-NEXT:    s_mov_b32 s6, s8
746; GFX12-NEXT:    s_mov_b32 s7, s9
747; GFX12-NEXT:    v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1
748; GFX12-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
749; GFX12-NEXT:    s_wait_loadcnt 0x0
750; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
751; GFX12-NEXT:    v_mov_b32_e32 v0, v3
752; GFX12-NEXT:    ; return to shader part epilog
753  %v = call { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
754  %v.err = extractvalue { <2 x float>, i32 } %v, 1
755  %vv = bitcast i32 %v.err to float
756  ret float %vv
757}
758
759define amdgpu_ps float @load_1d_v3f32_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
760; GFX68-LABEL: load_1d_v3f32_tfe_dmask_xyz:
761; GFX68:       ; %bb.0:
762; GFX68-NEXT:    v_mov_b32_e32 v1, 0
763; GFX68-NEXT:    s_mov_b32 s0, s2
764; GFX68-NEXT:    s_mov_b32 s1, s3
765; GFX68-NEXT:    s_mov_b32 s2, s4
766; GFX68-NEXT:    s_mov_b32 s3, s5
767; GFX68-NEXT:    s_mov_b32 s4, s6
768; GFX68-NEXT:    s_mov_b32 s5, s7
769; GFX68-NEXT:    s_mov_b32 s6, s8
770; GFX68-NEXT:    s_mov_b32 s7, s9
771; GFX68-NEXT:    v_mov_b32_e32 v2, v1
772; GFX68-NEXT:    v_mov_b32_e32 v3, v1
773; GFX68-NEXT:    v_mov_b32_e32 v4, v1
774; GFX68-NEXT:    image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe
775; GFX68-NEXT:    s_waitcnt vmcnt(0)
776; GFX68-NEXT:    v_mov_b32_e32 v0, v4
777; GFX68-NEXT:    ; return to shader part epilog
778;
779; GFX10-LABEL: load_1d_v3f32_tfe_dmask_xyz:
780; GFX10:       ; %bb.0:
781; GFX10-NEXT:    v_mov_b32_e32 v1, 0
782; GFX10-NEXT:    s_mov_b32 s0, s2
783; GFX10-NEXT:    s_mov_b32 s1, s3
784; GFX10-NEXT:    s_mov_b32 s2, s4
785; GFX10-NEXT:    s_mov_b32 s3, s5
786; GFX10-NEXT:    s_mov_b32 s4, s6
787; GFX10-NEXT:    s_mov_b32 s5, s7
788; GFX10-NEXT:    s_mov_b32 s6, s8
789; GFX10-NEXT:    s_mov_b32 s7, s9
790; GFX10-NEXT:    v_mov_b32_e32 v2, v1
791; GFX10-NEXT:    v_mov_b32_e32 v3, v1
792; GFX10-NEXT:    v_mov_b32_e32 v4, v1
793; GFX10-NEXT:    image_load v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe
794; GFX10-NEXT:    s_waitcnt vmcnt(0)
795; GFX10-NEXT:    v_mov_b32_e32 v0, v4
796; GFX10-NEXT:    ; return to shader part epilog
797;
798; NOPRT-LABEL: load_1d_v3f32_tfe_dmask_xyz:
799; NOPRT:       ; %bb.0:
800; NOPRT-NEXT:    s_mov_b32 s0, s2
801; NOPRT-NEXT:    s_mov_b32 s1, s3
802; NOPRT-NEXT:    s_mov_b32 s2, s4
803; NOPRT-NEXT:    s_mov_b32 s3, s5
804; NOPRT-NEXT:    s_mov_b32 s4, s6
805; NOPRT-NEXT:    s_mov_b32 s5, s7
806; NOPRT-NEXT:    s_mov_b32 s6, s8
807; NOPRT-NEXT:    s_mov_b32 s7, s9
808; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
809; NOPRT-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe
810; NOPRT-NEXT:    s_waitcnt vmcnt(0)
811; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
812; NOPRT-NEXT:    ; return to shader part epilog
813;
814; GFX12-LABEL: load_1d_v3f32_tfe_dmask_xyz:
815; GFX12:       ; %bb.0:
816; GFX12-NEXT:    v_mov_b32_e32 v1, 0
817; GFX12-NEXT:    s_mov_b32 s0, s2
818; GFX12-NEXT:    s_mov_b32 s1, s3
819; GFX12-NEXT:    s_mov_b32 s2, s4
820; GFX12-NEXT:    s_mov_b32 s3, s5
821; GFX12-NEXT:    s_mov_b32 s4, s6
822; GFX12-NEXT:    s_mov_b32 s5, s7
823; GFX12-NEXT:    s_mov_b32 s6, s8
824; GFX12-NEXT:    s_mov_b32 s7, s9
825; GFX12-NEXT:    v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1
826; GFX12-NEXT:    v_mov_b32_e32 v4, v1
827; GFX12-NEXT:    image_load v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D tfe
828; GFX12-NEXT:    s_wait_loadcnt 0x0
829; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
830; GFX12-NEXT:    v_mov_b32_e32 v0, v4
831; GFX12-NEXT:    ; return to shader part epilog
832  %v = call { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
833  %v.err = extractvalue { <3 x float>, i32 } %v, 1
834  %vv = bitcast i32 %v.err to float
835  ret float %vv
836}
837
838define amdgpu_ps float @load_1d_v4f32_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
839; GFX68-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
840; GFX68:       ; %bb.0:
841; GFX68-NEXT:    v_mov_b32_e32 v1, 0
842; GFX68-NEXT:    s_mov_b32 s0, s2
843; GFX68-NEXT:    s_mov_b32 s1, s3
844; GFX68-NEXT:    s_mov_b32 s2, s4
845; GFX68-NEXT:    s_mov_b32 s3, s5
846; GFX68-NEXT:    s_mov_b32 s4, s6
847; GFX68-NEXT:    s_mov_b32 s5, s7
848; GFX68-NEXT:    s_mov_b32 s6, s8
849; GFX68-NEXT:    s_mov_b32 s7, s9
850; GFX68-NEXT:    v_mov_b32_e32 v2, v1
851; GFX68-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe
852; GFX68-NEXT:    s_waitcnt vmcnt(0)
853; GFX68-NEXT:    v_mov_b32_e32 v0, v2
854; GFX68-NEXT:    ; return to shader part epilog
855;
856; GFX10-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
857; GFX10:       ; %bb.0:
858; GFX10-NEXT:    v_mov_b32_e32 v1, 0
859; GFX10-NEXT:    s_mov_b32 s0, s2
860; GFX10-NEXT:    s_mov_b32 s1, s3
861; GFX10-NEXT:    s_mov_b32 s2, s4
862; GFX10-NEXT:    s_mov_b32 s3, s5
863; GFX10-NEXT:    s_mov_b32 s4, s6
864; GFX10-NEXT:    s_mov_b32 s5, s7
865; GFX10-NEXT:    s_mov_b32 s6, s8
866; GFX10-NEXT:    s_mov_b32 s7, s9
867; GFX10-NEXT:    v_mov_b32_e32 v2, v1
868; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe
869; GFX10-NEXT:    s_waitcnt vmcnt(0)
870; GFX10-NEXT:    v_mov_b32_e32 v0, v2
871; GFX10-NEXT:    ; return to shader part epilog
872;
873; NOPRT-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
874; NOPRT:       ; %bb.0:
875; NOPRT-NEXT:    s_mov_b32 s0, s2
876; NOPRT-NEXT:    s_mov_b32 s1, s3
877; NOPRT-NEXT:    s_mov_b32 s2, s4
878; NOPRT-NEXT:    s_mov_b32 s3, s5
879; NOPRT-NEXT:    s_mov_b32 s4, s6
880; NOPRT-NEXT:    s_mov_b32 s5, s7
881; NOPRT-NEXT:    s_mov_b32 s6, s8
882; NOPRT-NEXT:    s_mov_b32 s7, s9
883; NOPRT-NEXT:    v_mov_b32_e32 v1, 0
884; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe
885; NOPRT-NEXT:    s_waitcnt vmcnt(0)
886; NOPRT-NEXT:    v_mov_b32_e32 v0, v1
887; NOPRT-NEXT:    ; return to shader part epilog
888;
889; GFX12-LABEL: load_1d_v4f32_tfe_dmask_xyzw:
890; GFX12:       ; %bb.0:
891; GFX12-NEXT:    v_mov_b32_e32 v1, 0
892; GFX12-NEXT:    s_mov_b32 s0, s2
893; GFX12-NEXT:    s_mov_b32 s1, s3
894; GFX12-NEXT:    s_mov_b32 s2, s4
895; GFX12-NEXT:    s_mov_b32 s3, s5
896; GFX12-NEXT:    s_mov_b32 s4, s6
897; GFX12-NEXT:    s_mov_b32 s5, s7
898; GFX12-NEXT:    s_mov_b32 s6, s8
899; GFX12-NEXT:    s_mov_b32 s7, s9
900; GFX12-NEXT:    v_mov_b32_e32 v2, v1
901; GFX12-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D tfe
902; GFX12-NEXT:    s_wait_loadcnt 0x0
903; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
904; GFX12-NEXT:    v_mov_b32_e32 v0, v2
905; GFX12-NEXT:    ; return to shader part epilog
906  %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
907  %v.err = extractvalue { <4 x float>, i32 } %v, 1
908  %vv = bitcast i32 %v.err to float
909  ret float %vv
910}
911
912define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) {
913; GFX68-LABEL: load_1d_f32_tfe_dmask_0:
914; GFX68:       ; %bb.0:
915; GFX68-NEXT:    v_mov_b32_e32 v1, 0
916; GFX68-NEXT:    s_mov_b32 s0, s2
917; GFX68-NEXT:    s_mov_b32 s1, s3
918; GFX68-NEXT:    s_mov_b32 s2, s4
919; GFX68-NEXT:    s_mov_b32 s3, s5
920; GFX68-NEXT:    s_mov_b32 s4, s6
921; GFX68-NEXT:    s_mov_b32 s5, s7
922; GFX68-NEXT:    s_mov_b32 s6, s8
923; GFX68-NEXT:    s_mov_b32 s7, s9
924; GFX68-NEXT:    v_mov_b32_e32 v2, v1
925; GFX68-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
926; GFX68-NEXT:    s_waitcnt vmcnt(0)
927; GFX68-NEXT:    v_mov_b32_e32 v0, v2
928; GFX68-NEXT:    ; return to shader part epilog
929;
930; GFX10-LABEL: load_1d_f32_tfe_dmask_0:
931; GFX10:       ; %bb.0:
932; GFX10-NEXT:    v_mov_b32_e32 v1, 0
933; GFX10-NEXT:    s_mov_b32 s0, s2
934; GFX10-NEXT:    s_mov_b32 s1, s3
935; GFX10-NEXT:    s_mov_b32 s2, s4
936; GFX10-NEXT:    s_mov_b32 s3, s5
937; GFX10-NEXT:    s_mov_b32 s4, s6
938; GFX10-NEXT:    s_mov_b32 s5, s7
939; GFX10-NEXT:    s_mov_b32 s6, s8
940; GFX10-NEXT:    s_mov_b32 s7, s9
941; GFX10-NEXT:    v_mov_b32_e32 v2, v1
942; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
943; GFX10-NEXT:    s_waitcnt vmcnt(0)
944; GFX10-NEXT:    v_mov_b32_e32 v0, v2
945; GFX10-NEXT:    ; return to shader part epilog
946;
947; NOPRT-LABEL: load_1d_f32_tfe_dmask_0:
948; NOPRT:       ; %bb.0:
949; NOPRT-NEXT:    s_mov_b32 s0, s2
950; NOPRT-NEXT:    s_mov_b32 s1, s3
951; NOPRT-NEXT:    s_mov_b32 s2, s4
952; NOPRT-NEXT:    s_mov_b32 s3, s5
953; NOPRT-NEXT:    s_mov_b32 s4, s6
954; NOPRT-NEXT:    s_mov_b32 s5, s7
955; NOPRT-NEXT:    s_mov_b32 s6, s8
956; NOPRT-NEXT:    s_mov_b32 s7, s9
957; NOPRT-NEXT:    v_mov_b32_e32 v1, 0
958; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe
959; NOPRT-NEXT:    s_waitcnt vmcnt(0)
960; NOPRT-NEXT:    v_mov_b32_e32 v0, v1
961; NOPRT-NEXT:    ; return to shader part epilog
962;
963; GFX12-LABEL: load_1d_f32_tfe_dmask_0:
964; GFX12:       ; %bb.0:
965; GFX12-NEXT:    v_mov_b32_e32 v1, 0
966; GFX12-NEXT:    s_mov_b32 s0, s2
967; GFX12-NEXT:    s_mov_b32 s1, s3
968; GFX12-NEXT:    s_mov_b32 s2, s4
969; GFX12-NEXT:    s_mov_b32 s3, s5
970; GFX12-NEXT:    s_mov_b32 s4, s6
971; GFX12-NEXT:    s_mov_b32 s5, s7
972; GFX12-NEXT:    s_mov_b32 s6, s8
973; GFX12-NEXT:    s_mov_b32 s7, s9
974; GFX12-NEXT:    v_mov_b32_e32 v2, v1
975; GFX12-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
976; GFX12-NEXT:    s_wait_loadcnt 0x0
977; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
978; GFX12-NEXT:    v_mov_b32_e32 v0, v2
979; GFX12-NEXT:    ; return to shader part epilog
980  %v = call { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
981  %v.err = extractvalue { float, i32 } %v, 1
982  %vv = bitcast i32 %v.err to float
983  ret float %vv
984}
985
986declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
987declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
988declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
989declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
990
991declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
992declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
993declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
994declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
995
996attributes #0 = { nounwind readonly }
997