xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-PACKED %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s
6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS %s
7; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
8
9define amdgpu_ps half @load_1d_f16_x(<8 x i32> inreg %rsrc, i32 %s) {
10; GFX8-UNPACKED-LABEL: load_1d_f16_x:
11; GFX8-UNPACKED:       ; %bb.0:
12; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
13; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
14; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
15; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
16; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
17; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
18; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
19; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
20; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
21; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
22; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
23;
24; GFX8-PACKED-LABEL: load_1d_f16_x:
25; GFX8-PACKED:       ; %bb.0:
26; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
27; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
28; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
29; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
30; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
31; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
32; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
33; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
34; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
35; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
36; GFX8-PACKED-NEXT:    ; return to shader part epilog
37;
38; GFX9-LABEL: load_1d_f16_x:
39; GFX9:       ; %bb.0:
40; GFX9-NEXT:    s_mov_b32 s0, s2
41; GFX9-NEXT:    s_mov_b32 s1, s3
42; GFX9-NEXT:    s_mov_b32 s2, s4
43; GFX9-NEXT:    s_mov_b32 s3, s5
44; GFX9-NEXT:    s_mov_b32 s4, s6
45; GFX9-NEXT:    s_mov_b32 s5, s7
46; GFX9-NEXT:    s_mov_b32 s6, s8
47; GFX9-NEXT:    s_mov_b32 s7, s9
48; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 unorm d16
49; GFX9-NEXT:    s_waitcnt vmcnt(0)
50; GFX9-NEXT:    ; return to shader part epilog
51;
52; GFX10PLUS-LABEL: load_1d_f16_x:
53; GFX10PLUS:       ; %bb.0:
54; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
55; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
56; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
57; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
58; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
59; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
60; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
61; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
62; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm d16
63; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
64; GFX10PLUS-NEXT:    ; return to shader part epilog
65;
66; GFX12-LABEL: load_1d_f16_x:
67; GFX12:       ; %bb.0:
68; GFX12-NEXT:    s_mov_b32 s0, s2
69; GFX12-NEXT:    s_mov_b32 s1, s3
70; GFX12-NEXT:    s_mov_b32 s2, s4
71; GFX12-NEXT:    s_mov_b32 s3, s5
72; GFX12-NEXT:    s_mov_b32 s4, s6
73; GFX12-NEXT:    s_mov_b32 s5, s7
74; GFX12-NEXT:    s_mov_b32 s6, s8
75; GFX12-NEXT:    s_mov_b32 s7, s9
76; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D d16
77; GFX12-NEXT:    s_wait_loadcnt 0x0
78; GFX12-NEXT:    ; return to shader part epilog
79  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
80  ret half %v
81}
82
83define amdgpu_ps half @load_1d_f16_y(<8 x i32> inreg %rsrc, i32 %s) {
84; GFX8-UNPACKED-LABEL: load_1d_f16_y:
85; GFX8-UNPACKED:       ; %bb.0:
86; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
87; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
88; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
89; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
90; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
91; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
92; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
93; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
94; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
95; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
96; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
97;
98; GFX8-PACKED-LABEL: load_1d_f16_y:
99; GFX8-PACKED:       ; %bb.0:
100; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
101; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
102; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
103; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
104; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
105; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
106; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
107; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
108; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
109; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
110; GFX8-PACKED-NEXT:    ; return to shader part epilog
111;
112; GFX9-LABEL: load_1d_f16_y:
113; GFX9:       ; %bb.0:
114; GFX9-NEXT:    s_mov_b32 s0, s2
115; GFX9-NEXT:    s_mov_b32 s1, s3
116; GFX9-NEXT:    s_mov_b32 s2, s4
117; GFX9-NEXT:    s_mov_b32 s3, s5
118; GFX9-NEXT:    s_mov_b32 s4, s6
119; GFX9-NEXT:    s_mov_b32 s5, s7
120; GFX9-NEXT:    s_mov_b32 s6, s8
121; GFX9-NEXT:    s_mov_b32 s7, s9
122; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 unorm d16
123; GFX9-NEXT:    s_waitcnt vmcnt(0)
124; GFX9-NEXT:    ; return to shader part epilog
125;
126; GFX10PLUS-LABEL: load_1d_f16_y:
127; GFX10PLUS:       ; %bb.0:
128; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
129; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
130; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
131; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
132; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
133; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
134; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
135; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
136; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm d16
137; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
138; GFX10PLUS-NEXT:    ; return to shader part epilog
139;
140; GFX12-LABEL: load_1d_f16_y:
141; GFX12:       ; %bb.0:
142; GFX12-NEXT:    s_mov_b32 s0, s2
143; GFX12-NEXT:    s_mov_b32 s1, s3
144; GFX12-NEXT:    s_mov_b32 s2, s4
145; GFX12-NEXT:    s_mov_b32 s3, s5
146; GFX12-NEXT:    s_mov_b32 s4, s6
147; GFX12-NEXT:    s_mov_b32 s5, s7
148; GFX12-NEXT:    s_mov_b32 s6, s8
149; GFX12-NEXT:    s_mov_b32 s7, s9
150; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D d16
151; GFX12-NEXT:    s_wait_loadcnt 0x0
152; GFX12-NEXT:    ; return to shader part epilog
153  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
154  ret half %v
155}
156
157define amdgpu_ps half @load_1d_f16_z(<8 x i32> inreg %rsrc, i32 %s) {
158; GFX8-UNPACKED-LABEL: load_1d_f16_z:
159; GFX8-UNPACKED:       ; %bb.0:
160; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
161; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
162; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
163; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
164; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
165; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
166; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
167; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
168; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
169; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
170; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
171;
172; GFX8-PACKED-LABEL: load_1d_f16_z:
173; GFX8-PACKED:       ; %bb.0:
174; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
175; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
176; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
177; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
178; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
179; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
180; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
181; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
182; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
183; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
184; GFX8-PACKED-NEXT:    ; return to shader part epilog
185;
186; GFX9-LABEL: load_1d_f16_z:
187; GFX9:       ; %bb.0:
188; GFX9-NEXT:    s_mov_b32 s0, s2
189; GFX9-NEXT:    s_mov_b32 s1, s3
190; GFX9-NEXT:    s_mov_b32 s2, s4
191; GFX9-NEXT:    s_mov_b32 s3, s5
192; GFX9-NEXT:    s_mov_b32 s4, s6
193; GFX9-NEXT:    s_mov_b32 s5, s7
194; GFX9-NEXT:    s_mov_b32 s6, s8
195; GFX9-NEXT:    s_mov_b32 s7, s9
196; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 unorm d16
197; GFX9-NEXT:    s_waitcnt vmcnt(0)
198; GFX9-NEXT:    ; return to shader part epilog
199;
200; GFX10PLUS-LABEL: load_1d_f16_z:
201; GFX10PLUS:       ; %bb.0:
202; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
203; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
204; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
205; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
206; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
207; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
208; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
209; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
210; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D unorm d16
211; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
212; GFX10PLUS-NEXT:    ; return to shader part epilog
213;
214; GFX12-LABEL: load_1d_f16_z:
215; GFX12:       ; %bb.0:
216; GFX12-NEXT:    s_mov_b32 s0, s2
217; GFX12-NEXT:    s_mov_b32 s1, s3
218; GFX12-NEXT:    s_mov_b32 s2, s4
219; GFX12-NEXT:    s_mov_b32 s3, s5
220; GFX12-NEXT:    s_mov_b32 s4, s6
221; GFX12-NEXT:    s_mov_b32 s5, s7
222; GFX12-NEXT:    s_mov_b32 s6, s8
223; GFX12-NEXT:    s_mov_b32 s7, s9
224; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_1D d16
225; GFX12-NEXT:    s_wait_loadcnt 0x0
226; GFX12-NEXT:    ; return to shader part epilog
227  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 4, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
228  ret half %v
229}
230
231define amdgpu_ps half @load_1d_f16_w(<8 x i32> inreg %rsrc, i32 %s) {
232; GFX8-UNPACKED-LABEL: load_1d_f16_w:
233; GFX8-UNPACKED:       ; %bb.0:
234; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
235; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
236; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
237; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
238; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
239; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
240; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
241; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
242; GFX8-UNPACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
243; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
244; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
245;
246; GFX8-PACKED-LABEL: load_1d_f16_w:
247; GFX8-PACKED:       ; %bb.0:
248; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
249; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
250; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
251; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
252; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
253; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
254; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
255; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
256; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
257; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
258; GFX8-PACKED-NEXT:    ; return to shader part epilog
259;
260; GFX9-LABEL: load_1d_f16_w:
261; GFX9:       ; %bb.0:
262; GFX9-NEXT:    s_mov_b32 s0, s2
263; GFX9-NEXT:    s_mov_b32 s1, s3
264; GFX9-NEXT:    s_mov_b32 s2, s4
265; GFX9-NEXT:    s_mov_b32 s3, s5
266; GFX9-NEXT:    s_mov_b32 s4, s6
267; GFX9-NEXT:    s_mov_b32 s5, s7
268; GFX9-NEXT:    s_mov_b32 s6, s8
269; GFX9-NEXT:    s_mov_b32 s7, s9
270; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm d16
271; GFX9-NEXT:    s_waitcnt vmcnt(0)
272; GFX9-NEXT:    ; return to shader part epilog
273;
274; GFX10PLUS-LABEL: load_1d_f16_w:
275; GFX10PLUS:       ; %bb.0:
276; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
277; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
278; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
279; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
280; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
281; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
282; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
283; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
284; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm d16
285; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
286; GFX10PLUS-NEXT:    ; return to shader part epilog
287;
288; GFX12-LABEL: load_1d_f16_w:
289; GFX12:       ; %bb.0:
290; GFX12-NEXT:    s_mov_b32 s0, s2
291; GFX12-NEXT:    s_mov_b32 s1, s3
292; GFX12-NEXT:    s_mov_b32 s2, s4
293; GFX12-NEXT:    s_mov_b32 s3, s5
294; GFX12-NEXT:    s_mov_b32 s4, s6
295; GFX12-NEXT:    s_mov_b32 s5, s7
296; GFX12-NEXT:    s_mov_b32 s6, s8
297; GFX12-NEXT:    s_mov_b32 s7, s9
298; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D d16
299; GFX12-NEXT:    s_wait_loadcnt 0x0
300; GFX12-NEXT:    ; return to shader part epilog
301  %v = call half @llvm.amdgcn.image.load.1d.half.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
302  ret half %v
303}
304
305define amdgpu_ps <2 x half> @load_1d_v2f16_xy(<8 x i32> inreg %rsrc, i32 %s) {
306; GFX8-UNPACKED-LABEL: load_1d_v2f16_xy:
307; GFX8-UNPACKED:       ; %bb.0:
308; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
309; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
310; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
311; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
312; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
313; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
314; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
315; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
316; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x3 unorm d16
317; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
318; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
319; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
320; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
321; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
322;
323; GFX8-PACKED-LABEL: load_1d_v2f16_xy:
324; GFX8-PACKED:       ; %bb.0:
325; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
326; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
327; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
328; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
329; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
330; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
331; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
332; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
333; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
334; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
335; GFX8-PACKED-NEXT:    ; return to shader part epilog
336;
337; GFX9-LABEL: load_1d_v2f16_xy:
338; GFX9:       ; %bb.0:
339; GFX9-NEXT:    s_mov_b32 s0, s2
340; GFX9-NEXT:    s_mov_b32 s1, s3
341; GFX9-NEXT:    s_mov_b32 s2, s4
342; GFX9-NEXT:    s_mov_b32 s3, s5
343; GFX9-NEXT:    s_mov_b32 s4, s6
344; GFX9-NEXT:    s_mov_b32 s5, s7
345; GFX9-NEXT:    s_mov_b32 s6, s8
346; GFX9-NEXT:    s_mov_b32 s7, s9
347; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 unorm d16
348; GFX9-NEXT:    s_waitcnt vmcnt(0)
349; GFX9-NEXT:    ; return to shader part epilog
350;
351; GFX10PLUS-LABEL: load_1d_v2f16_xy:
352; GFX10PLUS:       ; %bb.0:
353; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
354; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
355; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
356; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
357; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
358; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
359; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
360; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
361; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm d16
362; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
363; GFX10PLUS-NEXT:    ; return to shader part epilog
364;
365; GFX12-LABEL: load_1d_v2f16_xy:
366; GFX12:       ; %bb.0:
367; GFX12-NEXT:    s_mov_b32 s0, s2
368; GFX12-NEXT:    s_mov_b32 s1, s3
369; GFX12-NEXT:    s_mov_b32 s2, s4
370; GFX12-NEXT:    s_mov_b32 s3, s5
371; GFX12-NEXT:    s_mov_b32 s4, s6
372; GFX12-NEXT:    s_mov_b32 s5, s7
373; GFX12-NEXT:    s_mov_b32 s6, s8
374; GFX12-NEXT:    s_mov_b32 s7, s9
375; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D d16
376; GFX12-NEXT:    s_wait_loadcnt 0x0
377; GFX12-NEXT:    ; return to shader part epilog
378  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
379  ret <2 x half> %v
380}
381
382define amdgpu_ps <2 x half> @load_1d_v2f16_xz(<8 x i32> inreg %rsrc, i32 %s) {
383; GFX8-UNPACKED-LABEL: load_1d_v2f16_xz:
384; GFX8-UNPACKED:       ; %bb.0:
385; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
386; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
387; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
388; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
389; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
390; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
391; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
392; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
393; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x5 unorm d16
394; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
395; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
396; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
397; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
398; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
399;
400; GFX8-PACKED-LABEL: load_1d_v2f16_xz:
401; GFX8-PACKED:       ; %bb.0:
402; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
403; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
404; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
405; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
406; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
407; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
408; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
409; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
410; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
411; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
412; GFX8-PACKED-NEXT:    ; return to shader part epilog
413;
414; GFX9-LABEL: load_1d_v2f16_xz:
415; GFX9:       ; %bb.0:
416; GFX9-NEXT:    s_mov_b32 s0, s2
417; GFX9-NEXT:    s_mov_b32 s1, s3
418; GFX9-NEXT:    s_mov_b32 s2, s4
419; GFX9-NEXT:    s_mov_b32 s3, s5
420; GFX9-NEXT:    s_mov_b32 s4, s6
421; GFX9-NEXT:    s_mov_b32 s5, s7
422; GFX9-NEXT:    s_mov_b32 s6, s8
423; GFX9-NEXT:    s_mov_b32 s7, s9
424; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 unorm d16
425; GFX9-NEXT:    s_waitcnt vmcnt(0)
426; GFX9-NEXT:    ; return to shader part epilog
427;
428; GFX10PLUS-LABEL: load_1d_v2f16_xz:
429; GFX10PLUS:       ; %bb.0:
430; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
431; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
432; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
433; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
434; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
435; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
436; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
437; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
438; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D unorm d16
439; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
440; GFX10PLUS-NEXT:    ; return to shader part epilog
441;
442; GFX12-LABEL: load_1d_v2f16_xz:
443; GFX12:       ; %bb.0:
444; GFX12-NEXT:    s_mov_b32 s0, s2
445; GFX12-NEXT:    s_mov_b32 s1, s3
446; GFX12-NEXT:    s_mov_b32 s2, s4
447; GFX12-NEXT:    s_mov_b32 s3, s5
448; GFX12-NEXT:    s_mov_b32 s4, s6
449; GFX12-NEXT:    s_mov_b32 s5, s7
450; GFX12-NEXT:    s_mov_b32 s6, s8
451; GFX12-NEXT:    s_mov_b32 s7, s9
452; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x5 dim:SQ_RSRC_IMG_1D d16
453; GFX12-NEXT:    s_wait_loadcnt 0x0
454; GFX12-NEXT:    ; return to shader part epilog
455  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 5, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
456  ret <2 x half> %v
457}
458
459define amdgpu_ps <2 x half> @load_1d_v2f16_xw(<8 x i32> inreg %rsrc, i32 %s) {
460; GFX8-UNPACKED-LABEL: load_1d_v2f16_xw:
461; GFX8-UNPACKED:       ; %bb.0:
462; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
463; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
464; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
465; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
466; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
467; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
468; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
469; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
470; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm d16
471; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
472; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
473; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
474; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
475; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
476;
477; GFX8-PACKED-LABEL: load_1d_v2f16_xw:
478; GFX8-PACKED:       ; %bb.0:
479; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
480; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
481; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
482; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
483; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
484; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
485; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
486; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
487; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
488; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
489; GFX8-PACKED-NEXT:    ; return to shader part epilog
490;
491; GFX9-LABEL: load_1d_v2f16_xw:
492; GFX9:       ; %bb.0:
493; GFX9-NEXT:    s_mov_b32 s0, s2
494; GFX9-NEXT:    s_mov_b32 s1, s3
495; GFX9-NEXT:    s_mov_b32 s2, s4
496; GFX9-NEXT:    s_mov_b32 s3, s5
497; GFX9-NEXT:    s_mov_b32 s4, s6
498; GFX9-NEXT:    s_mov_b32 s5, s7
499; GFX9-NEXT:    s_mov_b32 s6, s8
500; GFX9-NEXT:    s_mov_b32 s7, s9
501; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 unorm d16
502; GFX9-NEXT:    s_waitcnt vmcnt(0)
503; GFX9-NEXT:    ; return to shader part epilog
504;
505; GFX10PLUS-LABEL: load_1d_v2f16_xw:
506; GFX10PLUS:       ; %bb.0:
507; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
508; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
509; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
510; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
511; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
512; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
513; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
514; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
515; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm d16
516; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
517; GFX10PLUS-NEXT:    ; return to shader part epilog
518;
519; GFX12-LABEL: load_1d_v2f16_xw:
520; GFX12:       ; %bb.0:
521; GFX12-NEXT:    s_mov_b32 s0, s2
522; GFX12-NEXT:    s_mov_b32 s1, s3
523; GFX12-NEXT:    s_mov_b32 s2, s4
524; GFX12-NEXT:    s_mov_b32 s3, s5
525; GFX12-NEXT:    s_mov_b32 s4, s6
526; GFX12-NEXT:    s_mov_b32 s5, s7
527; GFX12-NEXT:    s_mov_b32 s6, s8
528; GFX12-NEXT:    s_mov_b32 s7, s9
529; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D d16
530; GFX12-NEXT:    s_wait_loadcnt 0x0
531; GFX12-NEXT:    ; return to shader part epilog
532  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
533  ret <2 x half> %v
534}
535
536define amdgpu_ps <2 x half> @load_1d_v2f16_yz(<8 x i32> inreg %rsrc, i32 %s) {
537; GFX8-UNPACKED-LABEL: load_1d_v2f16_yz:
538; GFX8-UNPACKED:       ; %bb.0:
539; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
540; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
541; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
542; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
543; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
544; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
545; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
546; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
547; GFX8-UNPACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x6 unorm d16
548; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
549; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
550; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
551; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
552; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
553;
554; GFX8-PACKED-LABEL: load_1d_v2f16_yz:
555; GFX8-PACKED:       ; %bb.0:
556; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
557; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
558; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
559; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
560; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
561; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
562; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
563; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
564; GFX8-PACKED-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
565; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
566; GFX8-PACKED-NEXT:    ; return to shader part epilog
567;
568; GFX9-LABEL: load_1d_v2f16_yz:
569; GFX9:       ; %bb.0:
570; GFX9-NEXT:    s_mov_b32 s0, s2
571; GFX9-NEXT:    s_mov_b32 s1, s3
572; GFX9-NEXT:    s_mov_b32 s2, s4
573; GFX9-NEXT:    s_mov_b32 s3, s5
574; GFX9-NEXT:    s_mov_b32 s4, s6
575; GFX9-NEXT:    s_mov_b32 s5, s7
576; GFX9-NEXT:    s_mov_b32 s6, s8
577; GFX9-NEXT:    s_mov_b32 s7, s9
578; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 unorm d16
579; GFX9-NEXT:    s_waitcnt vmcnt(0)
580; GFX9-NEXT:    ; return to shader part epilog
581;
582; GFX10PLUS-LABEL: load_1d_v2f16_yz:
583; GFX10PLUS:       ; %bb.0:
584; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
585; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
586; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
587; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
588; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
589; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
590; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
591; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
592; GFX10PLUS-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm d16
593; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
594; GFX10PLUS-NEXT:    ; return to shader part epilog
595;
596; GFX12-LABEL: load_1d_v2f16_yz:
597; GFX12:       ; %bb.0:
598; GFX12-NEXT:    s_mov_b32 s0, s2
599; GFX12-NEXT:    s_mov_b32 s1, s3
600; GFX12-NEXT:    s_mov_b32 s2, s4
601; GFX12-NEXT:    s_mov_b32 s3, s5
602; GFX12-NEXT:    s_mov_b32 s4, s6
603; GFX12-NEXT:    s_mov_b32 s5, s7
604; GFX12-NEXT:    s_mov_b32 s6, s8
605; GFX12-NEXT:    s_mov_b32 s7, s9
606; GFX12-NEXT:    image_load v0, v0, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16
607; GFX12-NEXT:    s_wait_loadcnt 0x0
608; GFX12-NEXT:    ; return to shader part epilog
609  %v = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
610  ret <2 x half> %v
611}
612
613define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
614; GFX8-UNPACKED-LABEL: load_1d_v3f16_xyz:
615; GFX8-UNPACKED:       ; %bb.0:
616; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
617; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
618; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
619; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
620; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
621; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
622; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
623; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
624; GFX8-UNPACKED-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16
625; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
626; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v3, 0xffff, v1
627; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v2
628; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
629; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
630; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
631;
632; GFX8-PACKED-LABEL: load_1d_v3f16_xyz:
633; GFX8-PACKED:       ; %bb.0:
634; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
635; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
636; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
637; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
638; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
639; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
640; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
641; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
642; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
643; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
644; GFX8-PACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
645; GFX8-PACKED-NEXT:    ; return to shader part epilog
646;
647; GFX9-LABEL: load_1d_v3f16_xyz:
648; GFX9:       ; %bb.0:
649; GFX9-NEXT:    s_mov_b32 s0, s2
650; GFX9-NEXT:    s_mov_b32 s1, s3
651; GFX9-NEXT:    s_mov_b32 s2, s4
652; GFX9-NEXT:    s_mov_b32 s3, s5
653; GFX9-NEXT:    s_mov_b32 s4, s6
654; GFX9-NEXT:    s_mov_b32 s5, s7
655; GFX9-NEXT:    s_mov_b32 s6, s8
656; GFX9-NEXT:    s_mov_b32 s7, s9
657; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16
658; GFX9-NEXT:    s_waitcnt vmcnt(0)
659; GFX9-NEXT:    ; return to shader part epilog
660;
661; GFX10PLUS-LABEL: load_1d_v3f16_xyz:
662; GFX10PLUS:       ; %bb.0:
663; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
664; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
665; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
666; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
667; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
668; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
669; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
670; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
671; GFX10PLUS-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
672; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
673; GFX10PLUS-NEXT:    ; return to shader part epilog
674;
675; GFX12-LABEL: load_1d_v3f16_xyz:
676; GFX12:       ; %bb.0:
677; GFX12-NEXT:    s_mov_b32 s0, s2
678; GFX12-NEXT:    s_mov_b32 s1, s3
679; GFX12-NEXT:    s_mov_b32 s2, s4
680; GFX12-NEXT:    s_mov_b32 s3, s5
681; GFX12-NEXT:    s_mov_b32 s4, s6
682; GFX12-NEXT:    s_mov_b32 s5, s7
683; GFX12-NEXT:    s_mov_b32 s6, s8
684; GFX12-NEXT:    s_mov_b32 s7, s9
685; GFX12-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D d16
686; GFX12-NEXT:    s_wait_loadcnt 0x0
687; GFX12-NEXT:    ; return to shader part epilog
688  %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
689  ret <3 x half> %v
690}
691
692define amdgpu_ps <4 x half> @load_1d_v4f16_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
693; GFX8-UNPACKED-LABEL: load_1d_v4f16_xyzw:
694; GFX8-UNPACKED:       ; %bb.0:
695; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
696; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
697; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
698; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
699; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
700; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
701; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
702; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
703; GFX8-UNPACKED-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm d16
704; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
705; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v1, 0xffff, v1
706; GFX8-UNPACKED-NEXT:    v_and_b32_e32 v3, 0xffff, v3
707; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
708; GFX8-UNPACKED-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
709; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
710; GFX8-UNPACKED-NEXT:    v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
711; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
712;
713; GFX8-PACKED-LABEL: load_1d_v4f16_xyzw:
714; GFX8-PACKED:       ; %bb.0:
715; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
716; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
717; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
718; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
719; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
720; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
721; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
722; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
723; GFX8-PACKED-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
724; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
725; GFX8-PACKED-NEXT:    ; return to shader part epilog
726;
727; GFX9-LABEL: load_1d_v4f16_xyzw:
728; GFX9:       ; %bb.0:
729; GFX9-NEXT:    s_mov_b32 s0, s2
730; GFX9-NEXT:    s_mov_b32 s1, s3
731; GFX9-NEXT:    s_mov_b32 s2, s4
732; GFX9-NEXT:    s_mov_b32 s3, s5
733; GFX9-NEXT:    s_mov_b32 s4, s6
734; GFX9-NEXT:    s_mov_b32 s5, s7
735; GFX9-NEXT:    s_mov_b32 s6, s8
736; GFX9-NEXT:    s_mov_b32 s7, s9
737; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf unorm d16
738; GFX9-NEXT:    s_waitcnt vmcnt(0)
739; GFX9-NEXT:    ; return to shader part epilog
740;
741; GFX10PLUS-LABEL: load_1d_v4f16_xyzw:
742; GFX10PLUS:       ; %bb.0:
743; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
744; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
745; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
746; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
747; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
748; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
749; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
750; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
751; GFX10PLUS-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16
752; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
753; GFX10PLUS-NEXT:    ; return to shader part epilog
754;
755; GFX12-LABEL: load_1d_v4f16_xyzw:
756; GFX12:       ; %bb.0:
757; GFX12-NEXT:    s_mov_b32 s0, s2
758; GFX12-NEXT:    s_mov_b32 s1, s3
759; GFX12-NEXT:    s_mov_b32 s2, s4
760; GFX12-NEXT:    s_mov_b32 s3, s5
761; GFX12-NEXT:    s_mov_b32 s4, s6
762; GFX12-NEXT:    s_mov_b32 s5, s7
763; GFX12-NEXT:    s_mov_b32 s6, s8
764; GFX12-NEXT:    s_mov_b32 s7, s9
765; GFX12-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D d16
766; GFX12-NEXT:    s_wait_loadcnt 0x0
767; GFX12-NEXT:    ; return to shader part epilog
768  %v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
769  ret <4 x half> %v
770}
771
772define amdgpu_ps float @load_1d_f16_tfe_dmask_x(<8 x i32> inreg %rsrc, i32 %s) {
773; GFX8-UNPACKED-LABEL: load_1d_f16_tfe_dmask_x:
774; GFX8-UNPACKED:       ; %bb.0:
775; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
776; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
777; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
778; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
779; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
780; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
781; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
782; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
783; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
784; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
785; GFX8-UNPACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
786; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
787; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2
788; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
789;
790; GFX8-PACKED-LABEL: load_1d_f16_tfe_dmask_x:
791; GFX8-PACKED:       ; %bb.0:
792; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
793; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
794; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
795; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
796; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
797; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
798; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
799; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
800; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
801; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
802; GFX8-PACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
803; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
804; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v2
805; GFX8-PACKED-NEXT:    ; return to shader part epilog
806;
807; GFX9-LABEL: load_1d_f16_tfe_dmask_x:
808; GFX9:       ; %bb.0:
809; GFX9-NEXT:    v_mov_b32_e32 v1, 0
810; GFX9-NEXT:    s_mov_b32 s0, s2
811; GFX9-NEXT:    s_mov_b32 s1, s3
812; GFX9-NEXT:    s_mov_b32 s2, s4
813; GFX9-NEXT:    s_mov_b32 s3, s5
814; GFX9-NEXT:    s_mov_b32 s4, s6
815; GFX9-NEXT:    s_mov_b32 s5, s7
816; GFX9-NEXT:    s_mov_b32 s6, s8
817; GFX9-NEXT:    s_mov_b32 s7, s9
818; GFX9-NEXT:    v_mov_b32_e32 v2, v1
819; GFX9-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe d16
820; GFX9-NEXT:    s_waitcnt vmcnt(0)
821; GFX9-NEXT:    v_mov_b32_e32 v0, v2
822; GFX9-NEXT:    ; return to shader part epilog
823;
824; GFX10PLUS-LABEL: load_1d_f16_tfe_dmask_x:
825; GFX10PLUS:       ; %bb.0:
826; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, 0
827; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
828; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
829; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
830; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
831; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
832; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
833; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
834; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
835; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v1
836; GFX10PLUS-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe d16
837; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
838; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, v2
839; GFX10PLUS-NEXT:    ; return to shader part epilog
840;
841; GFX12-LABEL: load_1d_f16_tfe_dmask_x:
842; GFX12:       ; %bb.0:
843; GFX12-NEXT:    v_mov_b32_e32 v1, 0
844; GFX12-NEXT:    s_mov_b32 s0, s2
845; GFX12-NEXT:    s_mov_b32 s1, s3
846; GFX12-NEXT:    s_mov_b32 s2, s4
847; GFX12-NEXT:    s_mov_b32 s3, s5
848; GFX12-NEXT:    s_mov_b32 s4, s6
849; GFX12-NEXT:    s_mov_b32 s5, s7
850; GFX12-NEXT:    s_mov_b32 s6, s8
851; GFX12-NEXT:    s_mov_b32 s7, s9
852; GFX12-NEXT:    v_mov_b32_e32 v2, v1
853; GFX12-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16
854; GFX12-NEXT:    s_wait_loadcnt 0x0
855; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
856; GFX12-NEXT:    v_mov_b32_e32 v0, v2
857; GFX12-NEXT:    ; return to shader part epilog
858  %v = call { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
859  %v.err = extractvalue { half, i32 } %v, 1
860  %vv = bitcast i32 %v.err to float
861  ret float %vv
862}
863
864define amdgpu_ps float @load_1d_v2f16_tfe_dmask_xy(<8 x i32> inreg %rsrc, i32 %s) {
865; GFX8-UNPACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
866; GFX8-UNPACKED:       ; %bb.0:
867; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
868; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
869; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
870; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
871; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
872; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
873; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
874; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
875; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
876; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
877; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v3, v1
878; GFX8-UNPACKED-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x3 unorm tfe d16
879; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
880; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v3
881; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
882;
883; GFX8-PACKED-LABEL: load_1d_v2f16_tfe_dmask_xy:
884; GFX8-PACKED:       ; %bb.0:
885; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
886; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
887; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
888; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
889; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
890; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
891; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
892; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
893; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
894; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
895; GFX8-PACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
896; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
897; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v2
898; GFX8-PACKED-NEXT:    ; return to shader part epilog
899;
900; GFX9-LABEL: load_1d_v2f16_tfe_dmask_xy:
901; GFX9:       ; %bb.0:
902; GFX9-NEXT:    v_mov_b32_e32 v1, 0
903; GFX9-NEXT:    s_mov_b32 s0, s2
904; GFX9-NEXT:    s_mov_b32 s1, s3
905; GFX9-NEXT:    s_mov_b32 s2, s4
906; GFX9-NEXT:    s_mov_b32 s3, s5
907; GFX9-NEXT:    s_mov_b32 s4, s6
908; GFX9-NEXT:    s_mov_b32 s5, s7
909; GFX9-NEXT:    s_mov_b32 s6, s8
910; GFX9-NEXT:    s_mov_b32 s7, s9
911; GFX9-NEXT:    v_mov_b32_e32 v2, v1
912; GFX9-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x3 unorm tfe d16
913; GFX9-NEXT:    s_waitcnt vmcnt(0)
914; GFX9-NEXT:    v_mov_b32_e32 v0, v2
915; GFX9-NEXT:    ; return to shader part epilog
916;
917; GFX10PLUS-LABEL: load_1d_v2f16_tfe_dmask_xy:
918; GFX10PLUS:       ; %bb.0:
919; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, 0
920; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
921; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
922; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
923; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
924; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
925; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
926; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
927; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
928; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v1
929; GFX10PLUS-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm tfe d16
930; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
931; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, v2
932; GFX10PLUS-NEXT:    ; return to shader part epilog
933;
934; GFX12-LABEL: load_1d_v2f16_tfe_dmask_xy:
935; GFX12:       ; %bb.0:
936; GFX12-NEXT:    v_mov_b32_e32 v1, 0
937; GFX12-NEXT:    s_mov_b32 s0, s2
938; GFX12-NEXT:    s_mov_b32 s1, s3
939; GFX12-NEXT:    s_mov_b32 s2, s4
940; GFX12-NEXT:    s_mov_b32 s3, s5
941; GFX12-NEXT:    s_mov_b32 s4, s6
942; GFX12-NEXT:    s_mov_b32 s5, s7
943; GFX12-NEXT:    s_mov_b32 s6, s8
944; GFX12-NEXT:    s_mov_b32 s7, s9
945; GFX12-NEXT:    v_mov_b32_e32 v2, v1
946; GFX12-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe d16
947; GFX12-NEXT:    s_wait_loadcnt 0x0
948; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
949; GFX12-NEXT:    v_mov_b32_e32 v0, v2
950; GFX12-NEXT:    ; return to shader part epilog
951  %v = call { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
952  %v.err = extractvalue { <2 x half>, i32 } %v, 1
953  %vv = bitcast i32 %v.err to float
954  ret float %vv
955}
956
957define amdgpu_ps float @load_1d_v3f16_tfe_dmask_xyz(<8 x i32> inreg %rsrc, i32 %s) {
958; GFX8-UNPACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
959; GFX8-UNPACKED:       ; %bb.0:
960; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
961; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
962; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
963; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
964; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
965; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
966; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
967; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
968; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
969; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
970; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v3, v1
971; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v4, v1
972; GFX8-UNPACKED-NEXT:    image_load v[1:4], v0, s[0:7] dmask:0x7 unorm tfe d16
973; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
974; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v4
975; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
976;
977; GFX8-PACKED-LABEL: load_1d_v3f16_tfe_dmask_xyz:
978; GFX8-PACKED:       ; %bb.0:
979; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
980; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
981; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
982; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
983; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
984; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
985; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
986; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
987; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
988; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
989; GFX8-PACKED-NEXT:    v_mov_b32_e32 v3, v1
990; GFX8-PACKED-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
991; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
992; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v3
993; GFX8-PACKED-NEXT:    ; return to shader part epilog
994;
995; GFX9-LABEL: load_1d_v3f16_tfe_dmask_xyz:
996; GFX9:       ; %bb.0:
997; GFX9-NEXT:    v_mov_b32_e32 v1, 0
998; GFX9-NEXT:    s_mov_b32 s0, s2
999; GFX9-NEXT:    s_mov_b32 s1, s3
1000; GFX9-NEXT:    s_mov_b32 s2, s4
1001; GFX9-NEXT:    s_mov_b32 s3, s5
1002; GFX9-NEXT:    s_mov_b32 s4, s6
1003; GFX9-NEXT:    s_mov_b32 s5, s7
1004; GFX9-NEXT:    s_mov_b32 s6, s8
1005; GFX9-NEXT:    s_mov_b32 s7, s9
1006; GFX9-NEXT:    v_mov_b32_e32 v2, v1
1007; GFX9-NEXT:    v_mov_b32_e32 v3, v1
1008; GFX9-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x7 unorm tfe d16
1009; GFX9-NEXT:    s_waitcnt vmcnt(0)
1010; GFX9-NEXT:    v_mov_b32_e32 v0, v3
1011; GFX9-NEXT:    ; return to shader part epilog
1012;
1013; GFX10PLUS-LABEL: load_1d_v3f16_tfe_dmask_xyz:
1014; GFX10PLUS:       ; %bb.0:
1015; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, 0
1016; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1017; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1018; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1019; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1020; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1021; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1022; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1023; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1024; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v1
1025; GFX10PLUS-NEXT:    v_mov_b32_e32 v3, v1
1026; GFX10PLUS-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe d16
1027; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1028; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, v3
1029; GFX10PLUS-NEXT:    ; return to shader part epilog
1030;
1031; GFX12-LABEL: load_1d_v3f16_tfe_dmask_xyz:
1032; GFX12:       ; %bb.0:
1033; GFX12-NEXT:    v_mov_b32_e32 v1, 0
1034; GFX12-NEXT:    s_mov_b32 s0, s2
1035; GFX12-NEXT:    s_mov_b32 s1, s3
1036; GFX12-NEXT:    s_mov_b32 s2, s4
1037; GFX12-NEXT:    s_mov_b32 s3, s5
1038; GFX12-NEXT:    s_mov_b32 s4, s6
1039; GFX12-NEXT:    s_mov_b32 s5, s7
1040; GFX12-NEXT:    s_mov_b32 s6, s8
1041; GFX12-NEXT:    s_mov_b32 s7, s9
1042; GFX12-NEXT:    v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v3, v1
1043; GFX12-NEXT:    image_load v[1:3], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D tfe d16
1044; GFX12-NEXT:    s_wait_loadcnt 0x0
1045; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1046; GFX12-NEXT:    v_mov_b32_e32 v0, v3
1047; GFX12-NEXT:    ; return to shader part epilog
1048  %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
1049  %v.err = extractvalue { <3 x half>, i32 } %v, 1
1050  %vv = bitcast i32 %v.err to float
1051  ret float %vv
1052}
1053
1054define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32 %s) {
1055; GFX8-UNPACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
1056; GFX8-UNPACKED:       ; %bb.0:
1057; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v1, 0
1058; GFX8-UNPACKED-NEXT:    s_mov_b32 s0, s2
1059; GFX8-UNPACKED-NEXT:    s_mov_b32 s1, s3
1060; GFX8-UNPACKED-NEXT:    s_mov_b32 s2, s4
1061; GFX8-UNPACKED-NEXT:    s_mov_b32 s3, s5
1062; GFX8-UNPACKED-NEXT:    s_mov_b32 s4, s6
1063; GFX8-UNPACKED-NEXT:    s_mov_b32 s5, s7
1064; GFX8-UNPACKED-NEXT:    s_mov_b32 s6, s8
1065; GFX8-UNPACKED-NEXT:    s_mov_b32 s7, s9
1066; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v2, v1
1067; GFX8-UNPACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
1068; GFX8-UNPACKED-NEXT:    s_waitcnt vmcnt(0)
1069; GFX8-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2
1070; GFX8-UNPACKED-NEXT:    ; return to shader part epilog
1071;
1072; GFX8-PACKED-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
1073; GFX8-PACKED:       ; %bb.0:
1074; GFX8-PACKED-NEXT:    v_mov_b32_e32 v1, 0
1075; GFX8-PACKED-NEXT:    s_mov_b32 s0, s2
1076; GFX8-PACKED-NEXT:    s_mov_b32 s1, s3
1077; GFX8-PACKED-NEXT:    s_mov_b32 s2, s4
1078; GFX8-PACKED-NEXT:    s_mov_b32 s3, s5
1079; GFX8-PACKED-NEXT:    s_mov_b32 s4, s6
1080; GFX8-PACKED-NEXT:    s_mov_b32 s5, s7
1081; GFX8-PACKED-NEXT:    s_mov_b32 s6, s8
1082; GFX8-PACKED-NEXT:    s_mov_b32 s7, s9
1083; GFX8-PACKED-NEXT:    v_mov_b32_e32 v2, v1
1084; GFX8-PACKED-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
1085; GFX8-PACKED-NEXT:    s_waitcnt vmcnt(0)
1086; GFX8-PACKED-NEXT:    v_mov_b32_e32 v0, v2
1087; GFX8-PACKED-NEXT:    ; return to shader part epilog
1088;
1089; GFX9-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
1090; GFX9:       ; %bb.0:
1091; GFX9-NEXT:    v_mov_b32_e32 v1, 0
1092; GFX9-NEXT:    s_mov_b32 s0, s2
1093; GFX9-NEXT:    s_mov_b32 s1, s3
1094; GFX9-NEXT:    s_mov_b32 s2, s4
1095; GFX9-NEXT:    s_mov_b32 s3, s5
1096; GFX9-NEXT:    s_mov_b32 s4, s6
1097; GFX9-NEXT:    s_mov_b32 s5, s7
1098; GFX9-NEXT:    s_mov_b32 s6, s8
1099; GFX9-NEXT:    s_mov_b32 s7, s9
1100; GFX9-NEXT:    v_mov_b32_e32 v2, v1
1101; GFX9-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 unorm tfe d16
1102; GFX9-NEXT:    s_waitcnt vmcnt(0)
1103; GFX9-NEXT:    v_mov_b32_e32 v0, v2
1104; GFX9-NEXT:    ; return to shader part epilog
1105;
1106; GFX10PLUS-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
1107; GFX10PLUS:       ; %bb.0:
1108; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, 0
1109; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1110; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1111; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1112; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1113; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1114; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1115; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1116; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1117; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v1
1118; GFX10PLUS-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D unorm tfe d16
1119; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1120; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, v2
1121; GFX10PLUS-NEXT:    ; return to shader part epilog
1122;
1123; GFX12-LABEL: load_1d_v4f16_tfe_dmask_xyzw:
1124; GFX12:       ; %bb.0:
1125; GFX12-NEXT:    v_mov_b32_e32 v1, 0
1126; GFX12-NEXT:    s_mov_b32 s0, s2
1127; GFX12-NEXT:    s_mov_b32 s1, s3
1128; GFX12-NEXT:    s_mov_b32 s2, s4
1129; GFX12-NEXT:    s_mov_b32 s3, s5
1130; GFX12-NEXT:    s_mov_b32 s4, s6
1131; GFX12-NEXT:    s_mov_b32 s5, s7
1132; GFX12-NEXT:    s_mov_b32 s6, s8
1133; GFX12-NEXT:    s_mov_b32 s7, s9
1134; GFX12-NEXT:    v_mov_b32_e32 v2, v1
1135; GFX12-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x10 dim:SQ_RSRC_IMG_1D tfe d16
1136; GFX12-NEXT:    s_wait_loadcnt 0x0
1137; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1138; GFX12-NEXT:    v_mov_b32_e32 v0, v2
1139; GFX12-NEXT:    ; return to shader part epilog
1140  %v = call { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 16, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
1141  %v.err = extractvalue { <4 x half>, i32 } %v, 1
1142  %vv = bitcast i32 %v.err to float
1143  ret float %vv
1144}
1145
1146declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1147declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1148declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1149declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1150
1151declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1152declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1153declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1154declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
1155
1156attributes #0 = { nounwind readonly }
1157