xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
5
6define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
7; GFX10-LABEL: sample_d_1d:
8; GFX10:       ; %bb.0: ; %main_body
9; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
10; GFX10-NEXT:    s_waitcnt vmcnt(0)
11; GFX10-NEXT:    ; return to shader part epilog
12;
13; GFX11-LABEL: sample_d_1d:
14; GFX11:       ; %bb.0: ; %main_body
15; GFX11-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
16; GFX11-NEXT:    s_waitcnt vmcnt(0)
17; GFX11-NEXT:    ; return to shader part epilog
18;
19; GFX12-LABEL: sample_d_1d:
20; GFX12:       ; %bb.0: ; %main_body
21; GFX12-NEXT:    image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
22; GFX12-NEXT:    s_wait_samplecnt 0x0
23; GFX12-NEXT:    ; return to shader part epilog
24main_body:
25  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
26  ret <4 x float> %v
27}
28
29define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
30; GFX10-LABEL: sample_d_2d:
31; GFX10:       ; %bb.0: ; %main_body
32; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
33; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
34; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
35; GFX10-NEXT:    s_waitcnt vmcnt(0)
36; GFX10-NEXT:    ; return to shader part epilog
37;
38; GFX11-LABEL: sample_d_2d:
39; GFX11:       ; %bb.0: ; %main_body
40; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
41; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
42; GFX11-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
43; GFX11-NEXT:    s_waitcnt vmcnt(0)
44; GFX11-NEXT:    ; return to shader part epilog
45;
46; GFX12-LABEL: sample_d_2d:
47; GFX12:       ; %bb.0: ; %main_body
48; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
49; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
50; GFX12-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
51; GFX12-NEXT:    s_wait_samplecnt 0x0
52; GFX12-NEXT:    ; return to shader part epilog
53main_body:
54  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
55  ret <4 x float> %v
56}
57
58define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
59; GFX10-LABEL: sample_d_3d:
60; GFX10:       ; %bb.0: ; %main_body
61; GFX10-NEXT:    v_mov_b32_e32 v9, v3
62; GFX10-NEXT:    v_mov_b32_e32 v3, v2
63; GFX10-NEXT:    v_perm_b32 v2, v1, v0, 0x5040100
64; GFX10-NEXT:    v_perm_b32 v4, v4, v9, 0x5040100
65; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
66; GFX10-NEXT:    s_waitcnt vmcnt(0)
67; GFX10-NEXT:    ; return to shader part epilog
68;
69; GFX11-LABEL: sample_d_3d:
70; GFX11:       ; %bb.0: ; %main_body
71; GFX11-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
72; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
73; GFX11-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
74; GFX11-NEXT:    s_waitcnt vmcnt(0)
75; GFX11-NEXT:    ; return to shader part epilog
76;
77; GFX12-LABEL: sample_d_3d:
78; GFX12:       ; %bb.0: ; %main_body
79; GFX12-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
80; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
81; GFX12-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
82; GFX12-NEXT:    s_wait_samplecnt 0x0
83; GFX12-NEXT:    ; return to shader part epilog
84main_body:
85  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
86  ret <4 x float> %v
87}
88
89define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
90; GFX10-LABEL: sample_c_d_1d:
91; GFX10:       ; %bb.0: ; %main_body
92; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
93; GFX10-NEXT:    s_waitcnt vmcnt(0)
94; GFX10-NEXT:    ; return to shader part epilog
95;
96; GFX11-LABEL: sample_c_d_1d:
97; GFX11:       ; %bb.0: ; %main_body
98; GFX11-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
99; GFX11-NEXT:    s_waitcnt vmcnt(0)
100; GFX11-NEXT:    ; return to shader part epilog
101;
102; GFX12-LABEL: sample_c_d_1d:
103; GFX12:       ; %bb.0: ; %main_body
104; GFX12-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
105; GFX12-NEXT:    s_wait_samplecnt 0x0
106; GFX12-NEXT:    ; return to shader part epilog
107main_body:
108  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
109  ret <4 x float> %v
110}
111
112define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
113; GFX10-LABEL: sample_c_d_2d:
114; GFX10:       ; %bb.0: ; %main_body
115; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
116; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
117; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
118; GFX10-NEXT:    s_waitcnt vmcnt(0)
119; GFX10-NEXT:    ; return to shader part epilog
120;
121; GFX11-LABEL: sample_c_d_2d:
122; GFX11:       ; %bb.0: ; %main_body
123; GFX11-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
124; GFX11-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
125; GFX11-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
126; GFX11-NEXT:    s_waitcnt vmcnt(0)
127; GFX11-NEXT:    ; return to shader part epilog
128;
129; GFX12-LABEL: sample_c_d_2d:
130; GFX12:       ; %bb.0: ; %main_body
131; GFX12-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
132; GFX12-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
133; GFX12-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
134; GFX12-NEXT:    s_wait_samplecnt 0x0
135; GFX12-NEXT:    ; return to shader part epilog
136main_body:
137  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
138  ret <4 x float> %v
139}
140
141define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
142; GFX10-LABEL: sample_d_cl_1d:
143; GFX10:       ; %bb.0: ; %main_body
144; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
145; GFX10-NEXT:    s_waitcnt vmcnt(0)
146; GFX10-NEXT:    ; return to shader part epilog
147;
148; GFX11-LABEL: sample_d_cl_1d:
149; GFX11:       ; %bb.0: ; %main_body
150; GFX11-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
151; GFX11-NEXT:    s_waitcnt vmcnt(0)
152; GFX11-NEXT:    ; return to shader part epilog
153;
154; GFX12-LABEL: sample_d_cl_1d:
155; GFX12:       ; %bb.0: ; %main_body
156; GFX12-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
157; GFX12-NEXT:    s_wait_samplecnt 0x0
158; GFX12-NEXT:    ; return to shader part epilog
159main_body:
160  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
161  ret <4 x float> %v
162}
163
164define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
165; GFX10-LABEL: sample_d_cl_2d:
166; GFX10:       ; %bb.0: ; %main_body
167; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
168; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
169; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
170; GFX10-NEXT:    s_waitcnt vmcnt(0)
171; GFX10-NEXT:    ; return to shader part epilog
172;
173; GFX11-LABEL: sample_d_cl_2d:
174; GFX11:       ; %bb.0: ; %main_body
175; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
176; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
177; GFX11-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
178; GFX11-NEXT:    s_waitcnt vmcnt(0)
179; GFX11-NEXT:    ; return to shader part epilog
180;
181; GFX12-LABEL: sample_d_cl_2d:
182; GFX12:       ; %bb.0: ; %main_body
183; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
184; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
185; GFX12-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
186; GFX12-NEXT:    s_wait_samplecnt 0x0
187; GFX12-NEXT:    ; return to shader part epilog
188main_body:
189  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
190  ret <4 x float> %v
191}
192
193define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
194; GFX10-LABEL: sample_c_d_cl_1d:
195; GFX10:       ; %bb.0: ; %main_body
196; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
197; GFX10-NEXT:    s_waitcnt vmcnt(0)
198; GFX10-NEXT:    ; return to shader part epilog
199;
200; GFX11-LABEL: sample_c_d_cl_1d:
201; GFX11:       ; %bb.0: ; %main_body
202; GFX11-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
203; GFX11-NEXT:    s_waitcnt vmcnt(0)
204; GFX11-NEXT:    ; return to shader part epilog
205;
206; GFX12-LABEL: sample_c_d_cl_1d:
207; GFX12:       ; %bb.0: ; %main_body
208; GFX12-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
209; GFX12-NEXT:    s_wait_samplecnt 0x0
210; GFX12-NEXT:    ; return to shader part epilog
211main_body:
212  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
213  ret <4 x float> %v
214}
215
216define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
217; GFX10-LABEL: sample_c_d_cl_2d:
218; GFX10:       ; %bb.0: ; %main_body
219; GFX10-NEXT:    v_mov_b32_e32 v8, v2
220; GFX10-NEXT:    v_mov_b32_e32 v2, v0
221; GFX10-NEXT:    v_perm_b32 v4, v4, v3, 0x5040100
222; GFX10-NEXT:    v_perm_b32 v3, v8, v1, 0x5040100
223; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
224; GFX10-NEXT:    s_waitcnt vmcnt(0)
225; GFX10-NEXT:    ; return to shader part epilog
226;
227; GFX11-LABEL: sample_c_d_cl_2d:
228; GFX11:       ; %bb.0: ; %main_body
229; GFX11-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
230; GFX11-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
231; GFX11-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
232; GFX11-NEXT:    s_waitcnt vmcnt(0)
233; GFX11-NEXT:    ; return to shader part epilog
234;
235; GFX12-LABEL: sample_c_d_cl_2d:
236; GFX12:       ; %bb.0: ; %main_body
237; GFX12-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
238; GFX12-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
239; GFX12-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v[5:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
240; GFX12-NEXT:    s_wait_samplecnt 0x0
241; GFX12-NEXT:    ; return to shader part epilog
242main_body:
243  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
244  ret <4 x float> %v
245}
246
247define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
248; GFX10-LABEL: sample_c_d_o_2darray_V1:
249; GFX10:       ; %bb.0: ; %main_body
250; GFX10-NEXT:    v_mov_b32_e32 v9, v3
251; GFX10-NEXT:    v_mov_b32_e32 v10, v2
252; GFX10-NEXT:    v_mov_b32_e32 v3, v1
253; GFX10-NEXT:    v_mov_b32_e32 v2, v0
254; GFX10-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
255; GFX10-NEXT:    v_perm_b32 v4, v9, v10, 0x5040100
256; GFX10-NEXT:    image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
257; GFX10-NEXT:    s_waitcnt vmcnt(0)
258; GFX10-NEXT:    ; return to shader part epilog
259;
260; GFX11-LABEL: sample_c_d_o_2darray_V1:
261; GFX11:       ; %bb.0: ; %main_body
262; GFX11-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
263; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
264; GFX11-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
265; GFX11-NEXT:    s_waitcnt vmcnt(0)
266; GFX11-NEXT:    ; return to shader part epilog
267;
268; GFX12-LABEL: sample_c_d_o_2darray_V1:
269; GFX12:       ; %bb.0: ; %main_body
270; GFX12-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
271; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
272; GFX12-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
273; GFX12-NEXT:    s_wait_samplecnt 0x0
274; GFX12-NEXT:    ; return to shader part epilog
275main_body:
276  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
277  ret float %v
278}
279
280define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
281; GFX10-LABEL: sample_c_d_o_2darray_V2:
282; GFX10:       ; %bb.0: ; %main_body
283; GFX10-NEXT:    v_mov_b32_e32 v9, v3
284; GFX10-NEXT:    v_mov_b32_e32 v10, v2
285; GFX10-NEXT:    v_mov_b32_e32 v3, v1
286; GFX10-NEXT:    v_mov_b32_e32 v2, v0
287; GFX10-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
288; GFX10-NEXT:    v_perm_b32 v4, v9, v10, 0x5040100
289; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
290; GFX10-NEXT:    s_waitcnt vmcnt(0)
291; GFX10-NEXT:    ; return to shader part epilog
292;
293; GFX11-LABEL: sample_c_d_o_2darray_V2:
294; GFX11:       ; %bb.0: ; %main_body
295; GFX11-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
296; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
297; GFX11-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
298; GFX11-NEXT:    s_waitcnt vmcnt(0)
299; GFX11-NEXT:    ; return to shader part epilog
300;
301; GFX12-LABEL: sample_c_d_o_2darray_V2:
302; GFX12:       ; %bb.0: ; %main_body
303; GFX12-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
304; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
305; GFX12-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
306; GFX12-NEXT:    s_wait_samplecnt 0x0
307; GFX12-NEXT:    ; return to shader part epilog
308main_body:
309  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
310  ret <2 x float> %v
311}
312
313declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
314declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
315declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
316declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
317declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
318declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
319declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
320declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
321declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
322
323declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
324declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
325
326attributes #0 = { nounwind }
327attributes #1 = { nounwind readonly }
328attributes #2 = { nounwind readnone }
329